├── ANALYSIS.md
├── COREFERENCE.md
├── LICENSE
├── MANIFEST.in
├── MULTIGRAPH.md
├── README.md
├── bin
    ├── cort-predict-conll
    ├── cort-predict-raw
    ├── cort-train
    ├── cort-visualize
    └── run-multigraph
├── cort
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   ├── data_structures.py
    │   ├── error_extractors.py
    │   ├── plotting.py
    │   ├── spanning_tree_algorithms.py
    │   ├── visualization.py
    │   └── visualization
    │   │   ├── TODO
    │   │   ├── lib
    │   │       ├── cort-for-raw.js
    │   │       ├── cort.js
    │   │       ├── jquery-2.1.1.min.js
    │   │       └── jquery.jsPlumb-1.6.4.js
    │   │   └── style.css
    ├── config_files
    │   ├── corenlp.ini
    │   └── corenlp_with_coref.ini
    ├── core
    │   ├── __init__.py
    │   ├── corpora.py
    │   ├── documents.py
    │   ├── external_data.py
    │   ├── head_finders.py
    │   ├── mention_extractor.py
    │   ├── mention_property_computer.py
    │   ├── mentions.py
    │   ├── mixins.py
    │   ├── singletons.py
    │   ├── spans.py
    │   └── util.py
    ├── coreference
    │   ├── __init__.py
    │   ├── approaches
    │   │   ├── __init__.py
    │   │   ├── antecedent_trees.py
    │   │   ├── mention_pairs.py
    │   │   └── mention_ranking.py
    │   ├── clusterer.py
    │   ├── cost_functions.py
    │   ├── experiments.py
    │   ├── features.py
    │   ├── instance_extractors.py
    │   ├── multigraph
    │   │   ├── __init__.py
    │   │   ├── decoders.py
    │   │   ├── features.py
    │   │   ├── multigraphs.py
    │   │   └── weighting_functions.py
    │   └── perceptrons.pyx
    ├── preprocessing
    │   ├── __init__.py
    │   └── pipeline.py
    ├── reference-coreference-scorers
    │   └── v8.01
    │   │   ├── README.txt
    │   │   ├── lib
    │   │       ├── Algorithm
    │   │       │   ├── Munkres.pm
    │   │       │   └── README.Munkres
    │   │       ├── CorScorer.pm
    │   │       ├── Cwd.pm
    │   │       ├── Data
    │   │       │   └── Dumper.pm
    │   │       └── Math
    │   │       │   └── Combinatorics.pm
    │   │   ├── scorer.bat
    │   │   ├── scorer.pl
    │   │   └── test
    │   │       ├── CorefMetricTest.pm
    │   │       ├── CorefMetricTestConfig.pm
    │   │       ├── DataFiles
    │   │           ├── TC-A-1.response
    │   │           ├── TC-A-10.response
    │   │           ├── TC-A-11.response
    │   │           ├── TC-A-12.response
    │   │           ├── TC-A-13.response
    │   │           ├── TC-A-2.response
    │   │           ├── TC-A-3.response
    │   │           ├── TC-A-4.response
    │   │           ├── TC-A-5.response
    │   │           ├── TC-A-6.response
    │   │           ├── TC-A-7.response
    │   │           ├── TC-A-8.response
    │   │           ├── TC-A-9.response
    │   │           ├── TC-A.key
    │   │           ├── TC-B-1.response
    │   │           ├── TC-B.key
    │   │           ├── TC-C-1.response
    │   │           ├── TC-C.key
    │   │           ├── TC-D-1.response
    │   │           ├── TC-D.key
    │   │           ├── TC-E-1.response
    │   │           ├── TC-E.key
    │   │           ├── TC-F-1.response
    │   │           ├── TC-F.key
    │   │           ├── TC-G-1.response
    │   │           ├── TC-G.key
    │   │           ├── TC-H-1.response
    │   │           ├── TC-H.key
    │   │           ├── TC-I-1.response
    │   │           ├── TC-I.key
    │   │           ├── TC-J-1.response
    │   │           ├── TC-J.key
    │   │           ├── TC-K-1.response
    │   │           ├── TC-K.key
    │   │           ├── TC-L-1.response
    │   │           ├── TC-L.key
    │   │           ├── TC-M-1.response
    │   │           ├── TC-M-2.response
    │   │           ├── TC-M-3.response
    │   │           ├── TC-M-4.response
    │   │           ├── TC-M-5.response
    │   │           ├── TC-M-6.response
    │   │           ├── TC-M.key
    │   │           ├── TC-N-1.response
    │   │           ├── TC-N-2.response
    │   │           ├── TC-N-3.response
    │   │           ├── TC-N-4.response
    │   │           ├── TC-N-5.response
    │   │           ├── TC-N-6.response
    │   │           └── TC-N.key
    │   │       ├── TestCases.README
    │   │       └── test.pl
    ├── resources
    │   ├── coreferent_pairs.obj
    │   ├── female.list
    │   ├── male.list
    │   ├── neutral.list
    │   ├── plural.list
    │   └── singletons_not_cleaned.obj
    ├── test
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   ├── test_data_structures.py
    │   │   ├── test_error_extractors.py
    │   │   └── test_spanning_tree_algorithms.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── resources
    │   │   │   └── input.conll
    │   │   ├── test_corpora.py
    │   │   ├── test_documents.py
    │   │   ├── test_external_data.py
    │   │   ├── test_head_finders.py
    │   │   ├── test_mention_extractor.py
    │   │   ├── test_mention_property_computer.py
    │   │   ├── test_mentions.py
    │   │   ├── test_spans.py
    │   │   └── test_util.py
    │   └── multigraph
    │   │   ├── __init__.py
    │   │   └── test_features.py
    └── util
    │   ├── __init__.py
    │   └── import_helper.py
├── plot.png
├── scripts
    ├── acl15demo.py
    ├── naacl15-demo.py
    └── train-and-predict-all.py
├── setup.py
├── stanford_corenlp_pywrapper
    ├── __init__.py
    ├── javasrc
    │   ├── corenlp
    │   │   ├── JsonPipeline.java
    │   │   ├── PipeRunner.java
    │   │   └── SocketServer.java
    │   └── util
    │   │   ├── Arr.java
    │   │   ├── BasicFileIO.java
    │   │   ├── JsonUtil.java
    │   │   ├── U.java
    │   │   └── misc
    │   │       ├── Pair.java
    │   │       └── Triple.java
    ├── lib
    │   ├── corenlpwrapper.jar
    │   ├── guava-13.0.1.jar
    │   └── jackson-all-1.9.11.jar
    ├── rcorenlp.r
    └── sockwrap.py
├── tree.png
└── visualization.png


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014-2015 Sebastian Martschat
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include cort/resources/*
 2 | include cort/config_files/*
 3 | include cort/analysis/visualization/*
 4 | include cort/analysis/visualization/lib/*
 5 | include cort/coreference/perceptrons.pyx
 6 | include stanford_corenlp_pywrapper/rcorenlp.r
 7 | include stanford_corenlp_pywrapper/lib/*
 8 | include stanford_corenlp_pywrapper/javasrc/corenlp/*
 9 | include stanford_corenlp_pywrapper/javasrc/util/misc/*
10 | include stanford_corenlp_pywrapper/javasrc/util/*
11 | include cort/reference-coreference-scorers/v8.01/*
12 | include cort/reference-coreference-scorers/v8.01/lib/*
13 | include cort/reference-coreference-scorers/v8.01/lib/Algorithm/*
14 | include cort/reference-coreference-scorers/v8.01/lib/Data/*
15 | include cort/reference-coreference-scorers/v8.01/lib/Math/*


--------------------------------------------------------------------------------
/MULTIGRAPH.md:
--------------------------------------------------------------------------------
 1 | # Running cort's multigraph system
 2 | 
 3 | **cort** ships with a deterministic coreference resolution system based on
 4 | multigraph clustering. The input must follow  [the 
 5 | format from the CoNLL shared tasks on coreference resolution](http://conll.cemantix.org/2012/data.html).
 6 | 
 7 | To run the multigraph system, use
 8 | 
 9 | ```shell
10 | run-multigraph -in my_data.data -out out.data
11 | ```
12 | 
13 | With the optional argument `-ante`, antecedent decisions are also written to a 
14 | file:
15 | 
16 | ```shell
17 | run-multigraph -in my_data.data -out out.data -ante antecedents_out.data
18 | ```


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # cort
  2 | 
  3 | __cort__ is a <b>co</b>reference <b>r</b>esolution <b>t</b>oolkit. It consists
  4 | of two parts: the *coreference resolution* component implements a framework for 
  5 | coreference resolution based on latent variables, which allows you to rapidly 
  6 | devise approaches to coreference resolution, while the *error analysis* component 
  7 | provides extensive functionality for analyzing and visualizing errors made by 
  8 | coreference resolution systems.
  9 | 
 10 | If you have any questions or comments, drop me an e-mail at 
 11 | [sebastian.martschat@gmail.com](mailto:sebastian.martschat@gmail.com).
 12 | 
 13 | ## Branches/Forks
 14 | 
 15 | * the [kbest branch](https://github.com/smartschat/cort/tree/kbest) contains code for kbest extraction of coreference information, as described in Ji et al. (2017)
 16 | * the [v03 branch](https://github.com/smartschat/cort/tree/v03) contains a version of __cort__ with more models and a better train/dev/test workflow. For more details on the models see Martschat (2017).
 17 | * [Nafise Moosavi's fork of __cort__](https://github.com/ns-moosavi/cort/tree/singleton_feature) implements search space pruning on top of __cort__, as described in Moosavi and Strube (2016)
 18 | 
 19 | ## Documentation
 20 | 
 21 | * <a href="COREFERENCE.md">coreference resolution with cort</a>
 22 | * <a href="ANALYSIS.md">error analysis with cort</a>
 23 | * <a href="MULTIGRAPH.md">running the multigraph system</a>
 24 | 
 25 | ## Installation
 26 | 
 27 | __cort__ is available on PyPi. You can install it via
 28 | 
 29 | ```
 30 | pip install cort
 31 | ```
 32 | Dependencies (automatically installed by pip) are 
 33 | [nltk](http://www.nltk.org/), [numpy](http://www.numpy.org/), 
 34 | [matplotlib](http://matplotlib.org), 
 35 | [mmh3](https://pypi.python.org/pypi/mmh3),
 36 | [PyStanfordDependencies](https://github.com/dmcc/PyStanfordDependencies),
 37 | [cython](http://cython.org/),
 38 | [future](https://pypi.python.org/pypi/future),
 39 | [jpype](https://pypi.python.org/pypi/jpype1) and
 40 | [beautifulsoup](https://pypi.python.org/pypi/beautifulsoup4). It ships with 
 41 | [stanford_corenlp_pywrapper](https://github.com/brendano/stanford_corenlp_pywrapper)
 42 | and [the reference implementation of the CoNLL scorer](https://github.com/conll/reference-coreference-scorers).
 43 | 
 44 | __cort__ is written for use on Linux with Python 3.3+. While __cort__ also runs under 
 45 | Python 2.7, I strongly recommend running __cort__ with Python 3, since the Python 3 
 46 | version is much more efficient.
 47 | 
 48 | ## References
 49 | 
 50 | Yangfeng Ji, Chenhao Tan, Sebastian Martschat, Yejin Choi and Noah A. Smith (2017). **Dynamic Entity Representations in Neural Language Models.** To appear in *Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing (EMNLP), Copenhagen, Denmark, 7-11 September 2017*.  
 51 | [PDF](https://arxiv.org/abs/1708.00781)
 52 | 
 53 | Sebastian Martschat (2017). **Structured Representations for Coreference Resolution.** PhD thesis, Heidelberg University.  
 54 | [PDF](http://www.ub.uni-heidelberg.de/archiv/23305)
 55 | 
 56 | Nafise Sadat Moosavi and Michael Strube (2016). **Search space pruning: A 
 57 | simple solution for better coreference resolvers**. In *Proceedings of the 2016 
 58 | Conference of the North American Chapter of the Association for Computational 
 59 | Linguistics: Human Language Technologies*, San Diego, Cal., 12-17 June 2016, 
 60 | pages 1005-1011.  
 61 | [PDF](http://www.aclweb.org/anthology/N16-1115.pdf)
 62 | 
 63 | Sebastian Martschat and Michael Strube (2015). **Latent Structures for 
 64 | Coreference Resolution**. *Transactions of the Association for 
 65 | Computational Linguistics*, 3, pages 405-418.  
 66 | [PDF](http://www.aclweb.org/anthology/Q/Q15/Q15-1029.pdf)
 67 | 
 68 | Sebastian Martschat, Patrick Claus and Michael Strube (2015). **Plug Latent 
 69 | Structures and Play Coreference Resolution**. In *Proceedings of 
 70 | the Proceedings of ACL-IJCNLP 2015 System Demonstrations*, Beijing, China, 
 71 | 26-31 July 2015, pages 61-66.  
 72 | [PDF](http://www.aclweb.org/anthology/P/P15/P15-4011.pdf)
 73 | 
 74 | Sebastian Martschat, Thierry Göckel and Michael Strube (2015). **Analyzing and 
 75 | Visualizing Coreference Resolution Errors**. In *Proceedings of the 2015 
 76 | Conference of the North American Chapter of the Association for Computational 
 77 | Linguistics: Demonstrations*, Denver, Colorado, USA, 31 May-5 June 2015,
 78 | pages 6-10.  
 79 | [PDF](https://aclweb.org/anthology/N/N15/N15-3002.pdf)
 80 | 
 81 | Sebastian Martschat and Michael Strube (2014). **Recall Error Analysis for 
 82 | Coreference Resolution**. In *Proceedings of the 2014 Conference on Empirical 
 83 | Methods in Natural Language Processing (EMNLP)*, Doha, Qatar, 25-29 October 
 84 | 2014, pages 2070-2081.  
 85 | [PDF](http://aclweb.org/anthology/D/D14/D14-1221.pdf)
 86 | 
 87 | Sebastian Martschat (2013). **Multigraph Clustering for Unsupervised 
 88 | Coreference Resolution**. In *Proceedings of the Student Research Workshop 
 89 | at the 51st Annual Meeting of the Association for Computational Linguistics*, 
 90 | Sofia, Bulgaria, 5-7 August 2013, pages 81-88.  
 91 | [PDF](http://aclweb.org/anthology/P/P13/P13-3012.pdf)
 92 | 
 93 | If you use the error analysis component in your research, please cite the
 94 | [EMNLP'14 paper](http://aclweb.org/anthology/D/D14/D14-1221.pdf). If you use 
 95 | the coreference component in your research, please cite the 
 96 | [TACL paper](http://www.aclweb.org/anthology/Q/Q15/Q15-1029.pdf). If you use 
 97 | the multigraph system, please cite the 
 98 | [ACL'13-SRW paper](http://aclweb.org/anthology/P/P13/P13-3012.pdf).
 99 | 
100 | ## Changelog
101 | 
102 | __Wednesday, 4 November 2015__  
103 | Support numeric features. Due to a different feature representation the models changed,
104 | hence I have updated the downloadable models.
105 | 
106 | __Friday, 9 October 2015__   
107 | Now supports label-dependent cost functions.
108 | 
109 | __Tuesday, 15 September 2015__   
110 | Minor bugfixes.
111 | 
112 | __Monday, 27 July 2015__   
113 | Now can perform coreference resolution on raw text. 
114 | 
115 | __Tuesday, 21 July 2015__   
116 | Updated to status of TACL paper.
117 | 
118 | __Wednesday, 3 June 2015__  
119 | Improvements to visualization (mention highlighting and scrolling).
120 | 
121 | __Monday, 1 June 2015__  
122 | Fixed a bug in mention highlighting for visualization.
123 | 
124 | __Sunday, 31 May 2015__  
125 | Updated to status of NAACL'15 demo paper.
126 | 
127 | __Wednesday, 13 May 2015__  
128 | Fixed another bug in the documentation regarding format of antecedent data.
129 | 
130 | __Tuesday, 3 February 2015__  
131 | Fixed a bug in the documentation: part no. in antecedent file must be with trailing 0s.
132 | 
133 | __Thursday, 30 October 2014__  
134 | Fixed data structure bug in documents.py. The results from the paper are not affected by this bug.
135 | 
136 | __Wednesday, 22 October 2014__  
137 | Initial release.
138 | 


--------------------------------------------------------------------------------
/bin/cort-predict-conll:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | 
  4 | from __future__ import print_function
  5 | import argparse
  6 | import codecs
  7 | import logging
  8 | import os
  9 | import pickle
 10 | import subprocess
 11 | import sys
 12 | 
 13 | 
 14 | import cort
 15 | from cort.core import corpora
 16 | from cort.core import mention_extractor
 17 | from cort.coreference import cost_functions
 18 | from cort.coreference import experiments
 19 | from cort.coreference import features
 20 | from cort.coreference import instance_extractors
 21 | from cort.util import import_helper
 22 | 
 23 | 
 24 | __author__ = 'smartschat'
 25 | 
 26 | logging.basicConfig(level=logging.INFO,
 27 |                     format='%(asctime)s %(levelname)s %(''message)s')
 28 | 
 29 | 
 30 | def parse_args():
 31 |     parser = argparse.ArgumentParser(description='Predict coreference '
 32 |                                                  'relations.')
 33 |     parser.add_argument('-in',
 34 |                         required=True,
 35 |                         dest='input_filename',
 36 |                         help='The input file. Must follow the format of the '
 37 |                              'CoNLL shared tasks on coreference resolution '
 38 |                              '(see http://conll.cemantix.org/2012/data.html).)')
 39 |     parser.add_argument('-model',
 40 |                         required=True,
 41 |                         dest='model',
 42 |                         help='The model learned via cort-train.')
 43 |     parser.add_argument('-out',
 44 |                         dest='output_filename',
 45 |                         required=True,
 46 |                         help='The output file the predictions will be stored'
 47 |                              'in (in the CoNLL format.')
 48 |     parser.add_argument('-ante',
 49 |                         dest='ante',
 50 |                         help='The file where antecedent predictions will be'
 51 |                              'stored to.')
 52 |     parser.add_argument('-extractor',
 53 |                         dest='extractor',
 54 |                         required=True,
 55 |                         help='The function to extract instances.')
 56 |     parser.add_argument('-perceptron',
 57 |                         dest='perceptron',
 58 |                         required=True,
 59 |                         help='The perceptron to use.')
 60 |     parser.add_argument('-clusterer',
 61 |                         dest='clusterer',
 62 |                         required=True,
 63 |                         help='The clusterer to use.')
 64 |     parser.add_argument('-gold',
 65 |                         dest='gold',
 66 |                         help='Gold data (in the CoNLL format) for evaluation.')
 67 |     parser.add_argument('-features',
 68 |                         dest='features',
 69 |                         help='The file containing the list of features. If not'
 70 |                              'provided, defaults to a standard set of'
 71 |                              'features.')
 72 | 
 73 |     return parser.parse_args()
 74 | 
 75 | 
 76 | def get_scores(output_data, gold_data):
 77 |     scorer_output = subprocess.check_output([
 78 |         "perl",
 79 |         cort.__path__[0] + "/reference-coreference-scorers/v8.01/scorer.pl",
 80 |         "all",
 81 |         gold_data,
 82 |         os.getcwd() + "/" + output_data,
 83 |         "none"]).decode()
 84 | 
 85 |     metrics = ['muc', 'bcub', 'ceafm', 'ceafe', 'blanc']
 86 | 
 87 |     metrics_results = {}
 88 | 
 89 |     metric = None
 90 | 
 91 |     results_formatted = ""
 92 | 
 93 |     for line in scorer_output.split("\n"):
 94 |         if not line:
 95 |             continue
 96 | 
 97 |         splitted = line.split()
 98 | 
 99 |         if splitted[0] == "METRIC":
100 |             metric = line.split()[1][:-1]
101 | 
102 |         if (metric != 'blanc' and line.startswith("Coreference:")) \
103 |            or (metric == 'blanc' and line.startswith("BLANC:")):
104 |             metrics_results[metric] = (
105 |                 float(splitted[5][:-1]),
106 |                 float(splitted[10][:-1]),
107 |                 float(splitted[12][:-1]),
108 |             )
109 | 
110 |     results_formatted += "\tR\tP\tF1\n"
111 | 
112 |     for metric in metrics:
113 |         results_formatted += metric + "\t" + \
114 |             "\t".join([str(val) for val in metrics_results[metric]]) + "\n"
115 |     results_formatted += "\n"
116 |     average = (metrics_results["muc"][2] + metrics_results["bcub"][2] +
117 |                metrics_results["ceafe"][2])/3
118 |     results_formatted += "conll\t\t\t" + format(average, '.2f') + "\n"
119 | 
120 |     return results_formatted
121 | 
122 | 
123 | logging.basicConfig(level=logging.INFO,
124 |                     format='%(asctime)s %(levelname)s %(''message)s')
125 | 
126 | if sys.version_info[0] == 2:
127 |     logging.warning("You are running cort under Python 2. cort is much more "
128 |                     "efficient under Python 3.3+.")
129 | args = parse_args()
130 | 
131 | if args.features:
132 |     mention_features, pairwise_features = import_helper.get_features(
133 |         args.features)
134 | else:
135 |     mention_features = [
136 |         features.fine_type,
137 |         features.gender,
138 |         features.number,
139 |         features.sem_class,
140 |         features.deprel,
141 |         features.head_ner,
142 |         features.length,
143 |         features.head,
144 |         features.first,
145 |         features.last,
146 |         features.preceding_token,
147 |         features.next_token,
148 |         features.governor,
149 |         features.ancestry
150 |     ]
151 | 
152 |     pairwise_features = [
153 |         features.exact_match,
154 |         features.head_match,
155 |         features.same_speaker,
156 |         features.alias,
157 |         features.sentence_distance,
158 |         features.embedding,
159 |         features.modifier,
160 |         features.tokens_contained,
161 |         features.head_contained,
162 |         features.token_distance
163 |     ]
164 | 
165 | logging.info("Loading model.")
166 | priors, weights = pickle.load(open(args.model, "rb"))
167 | 
168 | perceptron = import_helper.import_from_path(args.perceptron)(
169 |     priors=priors,
170 |     weights=weights,
171 |     cost_scaling=0
172 | )
173 | 
174 | extractor = instance_extractors.InstanceExtractor(
175 |     import_helper.import_from_path(args.extractor),
176 |     mention_features,
177 |     pairwise_features,
178 |     cost_functions.null_cost,
179 |     perceptron.get_labels()
180 | )
181 | 
182 | logging.info("Reading in data.")
183 | testing_corpus = corpora.Corpus.from_file(
184 |     "testing",
185 |     codecs.open(args.input_filename, "r", "utf-8"))
186 | 
187 | logging.info("Extracting system mentions.")
188 | for doc in testing_corpus:
189 |     doc.system_mentions = mention_extractor.extract_system_mentions(doc)
190 | 
191 | mention_entity_mapping, antecedent_mapping = experiments.predict(
192 |     testing_corpus,
193 |     extractor,
194 |     perceptron,
195 |     import_helper.import_from_path(args.clusterer)
196 | )
197 | 
198 | testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping)
199 | 
200 | 
201 | logging.info("Write corpus to file.")
202 | testing_corpus.write_to_file(codecs.open(args.output_filename, "w", "utf-8"))
203 | 
204 | if args.ante:
205 |     logging.info("Write antecedent decisions to file")
206 |     testing_corpus.write_antecedent_decisions_to_file(open(args.ante, "w"))
207 | 
208 | if args.gold:
209 |     logging.info("Evaluate.")
210 |     print(get_scores(args.output_filename, args.gold))
211 | 
212 | logging.info("Done.")
213 | 


--------------------------------------------------------------------------------
/bin/cort-predict-raw:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | 
  4 | from __future__ import print_function
  5 | import argparse
  6 | import codecs
  7 | import logging
  8 | import pickle
  9 | import sys
 10 | 
 11 | 
 12 | from cort.preprocessing import pipeline
 13 | from cort.core import mention_extractor
 14 | from cort.coreference import cost_functions
 15 | from cort.coreference import experiments
 16 | from cort.coreference import features
 17 | from cort.coreference import instance_extractors
 18 | from cort.util import import_helper
 19 | 
 20 | 
 21 | __author__ = 'smartschat'
 22 | 
 23 | logging.basicConfig(level=logging.INFO,
 24 |                     format='%(asctime)s %(levelname)s %(''message)s')
 25 | 
 26 | 
 27 | def parse_args():
 28 |     parser = argparse.ArgumentParser(description='Predict coreference '
 29 |                                                  'relations.')
 30 |     parser.add_argument('-in',
 31 |                         required=True,
 32 |                         dest='input_filename',
 33 |                         help='The raw text input files.',
 34 |                         nargs="*")
 35 |     parser.add_argument('-model',
 36 |                         required=True,
 37 |                         dest='model',
 38 |                         help='The model learned via cort-train.')
 39 |     parser.add_argument('-suffix',
 40 |                         dest='suffix',
 41 |                         default="out",
 42 |                         help='Sufix for output files. Defaults to "out".')
 43 |     parser.add_argument('-extractor',
 44 |                         dest='extractor',
 45 |                         required=True,
 46 |                         help='The function to extract instances.')
 47 |     parser.add_argument('-perceptron',
 48 |                         dest='perceptron',
 49 |                         required=True,
 50 |                         help='The perceptron to use.')
 51 |     parser.add_argument('-clusterer',
 52 |                         dest='clusterer',
 53 |                         required=True,
 54 |                         help='The clusterer to use.')
 55 |     parser.add_argument('-features',
 56 |                         dest='features',
 57 |                         help='The file containing the list of features. If not'
 58 |                              'provided, defaults to a standard set of'
 59 |                              'features.')
 60 |     parser.add_argument('-corenlp',
 61 |                         dest='corenlp',
 62 |                         required=True,
 63 |                         help='Location of CoreNLP jars.')
 64 | 
 65 |     return parser.parse_args()
 66 | 
 67 | 
 68 | logging.basicConfig(level=logging.INFO,
 69 |                     format='%(asctime)s %(levelname)s %(''message)s')
 70 | 
 71 | if sys.version_info[0] == 2:
 72 |     logging.warning("You are running cort under Python 2. cort is much more "
 73 |                     "efficient under Python 3.3+.")
 74 | 
 75 | args = parse_args()
 76 | 
 77 | if args.features:
 78 |     mention_features, pairwise_features = import_helper.get_features(
 79 |         args.features)
 80 | else:
 81 |     mention_features = [
 82 |         features.fine_type,
 83 |         features.gender,
 84 |         features.number,
 85 |         features.sem_class,
 86 |         features.deprel,
 87 |         features.head_ner,
 88 |         features.length,
 89 |         features.head,
 90 |         features.first,
 91 |         features.last,
 92 |         features.preceding_token,
 93 |         features.next_token,
 94 |         features.governor,
 95 |         features.ancestry
 96 |     ]
 97 | 
 98 |     pairwise_features = [
 99 |         features.exact_match,
100 |         features.head_match,
101 |         features.same_speaker,
102 |         features.alias,
103 |         features.sentence_distance,
104 |         features.embedding,
105 |         features.modifier,
106 |         features.tokens_contained,
107 |         features.head_contained,
108 |         features.token_distance
109 |     ]
110 | 
111 | 
112 | logging.info("Loading model.")
113 | priors, weights = pickle.load(open(args.model, "rb"))
114 | 
115 | perceptron = import_helper.import_from_path(args.perceptron)(
116 |     priors=priors,
117 |     weights=weights,
118 |     cost_scaling=0
119 | )
120 | 
121 | extractor = instance_extractors.InstanceExtractor(
122 |     import_helper.import_from_path(args.extractor),
123 |     mention_features,
124 |     pairwise_features,
125 |     cost_functions.null_cost,
126 |     perceptron.get_labels()
127 | )
128 | 
129 | logging.info("Reading in and preprocessing data.")
130 | p = pipeline.Pipeline(args.corenlp)
131 | 
132 | testing_corpus = p.run_on_docs("corpus", args.input_filename)
133 | 
134 | logging.info("Extracting system mentions.")
135 | for doc in testing_corpus:
136 |     doc.system_mentions = mention_extractor.extract_system_mentions(doc)
137 | 
138 | mention_entity_mapping, antecedent_mapping = experiments.predict(
139 |     testing_corpus,
140 |     extractor,
141 |     perceptron,
142 |     import_helper.import_from_path(args.clusterer)
143 | )
144 | 
145 | testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping)
146 | 
147 | logging.info("Write output to file.")
148 | 
149 | for doc in testing_corpus:
150 |     output = doc.to_simple_output()
151 |     my_file = codecs.open(doc.identifier + "." + args.suffix, "w", "utf-8")
152 |     my_file.write(output)
153 |     my_file.close()
154 | 
155 | logging.info("Done.")
156 | 


--------------------------------------------------------------------------------
/bin/cort-train:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import codecs
  5 | import logging
  6 | import pickle
  7 | import sys
  8 | 
  9 | 
 10 | from cort.core import corpora
 11 | from cort.core import mention_extractor
 12 | from cort.coreference import experiments
 13 | from cort.coreference import features
 14 | from cort.coreference import instance_extractors
 15 | from cort.util import import_helper
 16 | 
 17 | 
 18 | __author__ = 'smartschat'
 19 | 
 20 | 
 21 | logging.basicConfig(level=logging.INFO,
 22 |                     format='%(asctime)s %(levelname)s %(''message)s')
 23 | 
 24 | 
 25 | def parse_args():
 26 |     parser = argparse.ArgumentParser(description='Train coreference resolution '
 27 |                                                  'models.')
 28 |     parser.add_argument('-in',
 29 |                         required=True,
 30 |                         dest='input_filename',
 31 |                         help='The input file. Must follow the format of the '
 32 |                              'CoNLL shared tasks on coreference resolution '
 33 |                              '(see http://conll.cemantix.org/2012/data.html).)')
 34 |     parser.add_argument('-out',
 35 |                         dest='output_filename',
 36 |                         required=True,
 37 |                         help='The output file the learned model will be saved '
 38 |                              'to.')
 39 |     parser.add_argument('-extractor',
 40 |                         dest='extractor',
 41 |                         required=True,
 42 |                         help='The function to extract instances.')
 43 |     parser.add_argument('-perceptron',
 44 |                         dest='perceptron',
 45 |                         required=True,
 46 |                         help='The perceptron to use.')
 47 |     parser.add_argument('-cost_function',
 48 |                         dest='cost_function',
 49 |                         required=True,
 50 |                         help='The cost function to use.')
 51 |     parser.add_argument('-n_iter',
 52 |                         dest='n_iter',
 53 |                         default=5,
 54 |                         help='Number of perceptron iterations. Defaults to 5.')
 55 |     parser.add_argument('-cost_scaling',
 56 |                         dest='cost_scaling',
 57 |                         default=1,
 58 |                         help='Scaling factor of the cost function. Defaults '
 59 |                              'to 1')
 60 |     parser.add_argument('-random_seed',
 61 |                         dest='seed',
 62 |                         default=23,
 63 |                         help='Random seed for training data shuffling. '
 64 |                              'Defaults to 23.')
 65 |     parser.add_argument('-features',
 66 |                         dest='features',
 67 |                         help='The file containing the list of features. If not'
 68 |                              'provided, defaults to a standard set of'
 69 |                              'features.')
 70 | 
 71 |     return parser.parse_args()
 72 | 
 73 | 
 74 | if sys.version_info[0] == 2:
 75 |     logging.warning("You are running cort under Python 2. cort is much more "
 76 |                     "efficient under Python 3.3+.")
 77 | 
 78 | args = parse_args()
 79 | 
 80 | if args.features:
 81 |     mention_features, pairwise_features = import_helper.get_features(
 82 |         args.features)
 83 | else:
 84 |     mention_features = [
 85 |         features.fine_type,
 86 |         features.gender,
 87 |         features.number,
 88 |         features.sem_class,
 89 |         features.deprel,
 90 |         features.head_ner,
 91 |         features.length,
 92 |         features.head,
 93 |         features.first,
 94 |         features.last,
 95 |         features.preceding_token,
 96 |         features.next_token,
 97 |         features.governor,
 98 |         features.ancestry
 99 |     ]
100 | 
101 |     pairwise_features = [
102 |         features.exact_match,
103 |         features.head_match,
104 |         features.same_speaker,
105 |         features.alias,
106 |         features.sentence_distance,
107 |         features.embedding,
108 |         features.modifier,
109 |         features.tokens_contained,
110 |         features.head_contained,
111 |         features.token_distance
112 |     ]
113 | 
114 | 
115 | perceptron = import_helper.import_from_path(args.perceptron)(
116 |     cost_scaling=int(args.cost_scaling),
117 |     n_iter=int(args.n_iter),
118 |     seed=int(args.seed)
119 | )
120 | 
121 | extractor = instance_extractors.InstanceExtractor(
122 |     import_helper.import_from_path(args.extractor),
123 |     mention_features,
124 |     pairwise_features,
125 |     import_helper.import_from_path(args.cost_function),
126 |     perceptron.get_labels()
127 | )
128 | 
129 | logging.info("Reading in data.")
130 | training_corpus = corpora.Corpus.from_file("training",
131 |                                            codecs.open(args.input_filename,
132 |                                                        "r", "utf-8"))
133 | 
134 | logging.info("Extracting system mentions.")
135 | for doc in training_corpus:
136 |     doc.system_mentions = mention_extractor.extract_system_mentions(doc)
137 | 
138 | model = experiments.learn(
139 |     training_corpus,
140 |     extractor,
141 |     perceptron
142 | )
143 | 
144 | logging.info("Writing model to file.")
145 | pickle.dump(model, open(args.output_filename, "wb"), protocol=2)
146 | 
147 | logging.info("Done.")
148 | 


--------------------------------------------------------------------------------
/bin/cort-visualize:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | from __future__ import print_function
 5 | import argparse
 6 | import codecs
 7 | import logging
 8 | 
 9 | 
10 | from cort.preprocessing import pipeline
11 | from cort.analysis import visualization, error_extractors, spanning_tree_algorithms
12 | from cort.core import corpora
13 | 
14 | 
15 | __author__ = 'smartschat'
16 | 
17 | 
18 | def parse_args():
19 |     parser = argparse.ArgumentParser(description='Visualize output.')
20 |     parser.add_argument('input_filename',
21 |                         help='The files to visualize',
22 |                         nargs='*')
23 |     parser.add_argument('-corenlp',
24 |                         required=True,
25 |                         dest='corenlp',
26 |                         help='Where is CoreNLP?')
27 | 
28 |     return parser.parse_args()
29 | 
30 | 
31 | logging.basicConfig(level=logging.INFO,
32 |                     format='%(asctime)s %(levelname)s %(''message)s')
33 | 
34 | args = parse_args()
35 | 
36 | p = pipeline.Pipeline(args.corenlp, with_coref=True)
37 | 
38 | corpus_to_visualize = p.run_on_docs("corpus", args.input_filename)
39 | 
40 | ex = error_extractors.ErrorExtractor(corpus_to_visualize,
41 |                                      spanning_tree_algorithms.recall_accessibility,
42 |                                      spanning_tree_algorithms.precision_system_output)
43 | 
44 | ex.add_system(corpus_to_visualize)
45 | 
46 | decisions = ex.get_errors()
47 | 
48 | visualizer = visualization.Visualizer(decisions, "corpus",
49 |                                       for_raw_input=True)
50 | 
51 | visualizer.run()


--------------------------------------------------------------------------------
/bin/run-multigraph:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import logging
 5 | 
 6 | from cort.core import corpora
 7 | from cort.core import mention_extractor
 8 | from cort.coreference.multigraph import multigraphs, features, decoders, \
 9 |     weighting_functions
10 | 
11 | 
12 | logging.basicConfig(level=logging.INFO,
13 |                     format='%(asctime)s %(levelname)s %(message)s')
14 | 
15 | parser = argparse.ArgumentParser(description='Run the multigraph coreference '
16 |                                              'resolution system..')
17 | parser.add_argument('-in',
18 |                     required=True,
19 |                     dest='input_filename',
20 |                     help='The input file. Must follow the format of the CoNLL '
21 |                          'shared tasks on coreference resolution (see '
22 |                          'http://conll.cemantix.org/2012/data.html).)')
23 | parser.add_argument('-out',
24 |                     dest='output_filename',
25 |                     required=True,
26 |                     help='The output file.')
27 | parser.add_argument('-ante',
28 |                     dest='antecedents_output_filename',
29 |                     default=None,
30 |                     help='The file where antecedent information should be'
31 |                          'written to. Defaults to None.')
32 | 
33 | args = parser.parse_args()
34 | 
35 | logging.info("Reading in corpus")
36 | 
37 | corpus = corpora.Corpus.from_file("my corpus",
38 |                                   open(args.input_filename))
39 | 
40 | logging.info("Extracting system mentions")
41 | for doc in corpus:
42 |     doc.system_mentions = mention_extractor.extract_system_mentions(doc)
43 | 
44 | negative_features = [features.not_modifier,
45 |                      features.not_compatible,
46 |                      features.not_embedding,
47 |                      features.not_speaker,
48 |                      features.not_singleton,
49 |                      features.not_pronoun_distance,
50 |                      features.not_anaphoric]
51 | 
52 | positive_features = [features.alias,
53 |                      features.non_pronominal_string_match,
54 |                      features.head_match,
55 |                      features.pronoun_same_canonical_form,
56 |                      features.anaphor_pronoun,
57 |                      features.speaker,
58 |                      features.antecedent_is_subject,
59 |                      features.antecedent_is_object,
60 |                      features.substring,
61 |                      features.lexical]
62 | 
63 | cmc = multigraphs.CorefMultigraphCreator(
64 |     positive_features,
65 |     negative_features,
66 |     weighting_functions.for_each_relation_with_distance,
67 |     {})
68 | 
69 | relation_weights = {}
70 | 
71 | for relation in positive_features:
72 |     relation_weights[relation] = 1
73 | 
74 | relation_weights[features.antecedent_is_object] = 0.5
75 | 
76 | cmc.relation_weights = relation_weights
77 | 
78 | logging.info("Decoding")
79 | 
80 | decoder = decoders.MultigraphDecoder(cmc)
81 | 
82 | decoder.decode(corpus)
83 | 
84 | logging.info("Writing coreference to file")
85 | 
86 | corpus.write_to_file(open(args.output_filename, 'w'))
87 | 
88 | if args.antecedents_output_filename:
89 |     logging.info("Writing antecedent decisions to file")
90 |     corpus.write_antecedent_decisions_to_file(
91 |         open(args.antecedents_output_filename, 'w'))
92 | 
93 | logging.info("Finished")
94 | 


--------------------------------------------------------------------------------
/cort/__init__.py:
--------------------------------------------------------------------------------
1 | """ cort - a toolkit for coreference resolution and error analysis. """
2 | 
3 | __author__ = 'martscsn'
4 | 


--------------------------------------------------------------------------------
/cort/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | """ Classes and functions for coreference resolution error analysis and
2 | visualisation. """
3 | 
4 | __author__ = 'smartschat'
5 | 


--------------------------------------------------------------------------------
/cort/analysis/error_extractors.py:
--------------------------------------------------------------------------------
  1 | """ Extract errors made by systems w.r.t. a reference corpus. """
  2 | 
  3 | 
  4 | from cort.analysis import data_structures
  5 | 
  6 | 
  7 | __author__ = 'smartschat'
  8 | 
  9 | 
 10 | class ErrorExtractor:
 11 |     """ Extract, manage and store recall and precision errors.
 12 | 
 13 |     Error extraction for recall errors works as follows:
 14 | 
 15 |     Go through each document. For each reference entity e in the document,
 16 |     construct an entity graph g_e for e and compute a partition of g_e by the
 17 |     system entity graphs. Then compute a spanning tree t_e of g_e and take
 18 |     every edge in t_e that does not appear in the partition as an error.
 19 | 
 20 |     For computing precision errors, switch the roles of reference and system
 21 |     entities.
 22 | 
 23 |     Attributes:
 24 |         reference_corpus (Corpus): The reference corpus with the gold
 25 |             information concerning the coreference relation.
 26 |         recall_spanning_tree_algorithm (function): A function mapping an
 27 |             entity graph and one its partitions to a list of mentions pairs,
 28 |             which represent a spanning tree of the entity graph. This
 29 |             function is used to compute recall errors.
 30 |         precision_spanning_tree_algorithm (function): Same as above, but for
 31 |             precision errors.
 32 |         errors (dict): A mapping of error descriptions to sets containing the
 33 |             respective errors.
 34 |     """
 35 |     def __init__(self,
 36 |                  reference_corpus,
 37 |                  recall_spanning_tree_algorithm,
 38 |                  precision_spanning_tree_algorithm,
 39 |                  ):
 40 |         """ Initialize the error analysis.
 41 | 
 42 |         Args:
 43 |             reference_corpus (Corpus): The reference corpus with the gold
 44 |                 information concerning the coreference relation.
 45 |             recall_spanning_tree_algorithm (function): A function mapping an
 46 |                 entity graph and one its partitions to a list of mentions pairs,
 47 |                 which represent a spanning tree of the entity graph. This
 48 |                 function is used to compute recall errors.
 49 |             precision_spanning_tree_algorithm (function): Same as above, but for
 50 |                 precision errors.
 51 |         """
 52 | 
 53 |         self.reference_corpus = reference_corpus
 54 |         self.recall_spanning_tree_algorithm = recall_spanning_tree_algorithm
 55 |         self.precision_spanning_tree_algorithm = \
 56 |             precision_spanning_tree_algorithm
 57 |         self.errors = {}
 58 |         self.corpora = {}
 59 | 
 60 |     def add_system(self, system_corpus, which_mentions="annotated"):
 61 |         """ Add a system to the error analysis.
 62 | 
 63 |         Error extraction for recall errors works as follows:
 64 | 
 65 |         Go through each document. For each reference entity e in the document,
 66 |         construct an entity graph g_e for e and compute a partition of g_e by
 67 |         the system entity graphs. Then compute a spanning tree t_e of g_e and
 68 |         take every edge in t_e that does not appear in the partition as an
 69 |         error.
 70 | 
 71 |         For computing precision errors, switch the roles of reference and system
 72 |         entities.
 73 | 
 74 |         Also extracts all pairwise decisions (if available).
 75 | 
 76 |         Args:
 77 |             system_corpus (Corpus): A corpus obtained from system output.
 78 |             which_mentions (str): Either "annotated" or "extracted",
 79 |                 defaults to "annotated". Specifies from which mentions in
 80 |                 the system corpus coreference information should be
 81 |                 obtained, either annotated mentions or system mentions.
 82 |         """
 83 |         if which_mentions not in ["annotated", "extracted"]:
 84 |             raise ValueError("which_mentions must be"
 85 |                              "either 'annotated' or 'extracted'.")
 86 | 
 87 |         recall_errors, precision_errors = self.__compute_errors(system_corpus,
 88 |                                                                 which_mentions)
 89 | 
 90 |         self.errors[system_corpus.description] = {
 91 |             "recall_errors": {},
 92 |             "precision_errors": {},
 93 |             "decisions": {}
 94 |         }
 95 | 
 96 |         self.errors[system_corpus.description]["recall_errors"]["all"] = \
 97 |             recall_errors
 98 |         self.errors[
 99 |             system_corpus.description]["precision_errors"]["all"] = \
100 |             precision_errors
101 |         self.errors[
102 |             system_corpus.description]["decisions"]["all"] = \
103 |             system_corpus.get_antecedent_decisions()[
104 |             system_corpus.description]["decisions"]["all"]
105 | 
106 |         self.corpora[system_corpus.description] = system_corpus
107 | 
108 |     def get_errors(self):
109 |         """ Get errors for all systems managed by this ErrorAnalysis.
110 | 
111 |         The errors are stored via an ``StructuredCoreferenceAnalysis`
112 |         which can be accessed like a dict.
113 | 
114 |         If a corpus with the description
115 |         ``ranking``was added via ``self.add_system``,
116 |         ``self.errors["ranking"]["recall_errors"]["all"]``is an ``EnhancedSet``
117 |         containing all recall errors of the system. Errors of other systems
118 |         and precision errors can be accessed analogously.
119 | 
120 |         Returns:
121 |             StructuredCoreferenceAnalysis: A StructuredCoreferenceAnalysis
122 |                 containing the errors.
123 |         """
124 |         return data_structures.StructuredCoreferenceAnalysis(
125 |             self.errors, corpora=self.corpora,
126 |             reference=self.reference_corpus)
127 | 
128 |     def __compute_errors(self, system_corpus, which_mentions):
129 |         gold_graphs = [data_structures.EntityGraph.from_mentions(
130 |             doc.annotated_mentions, "annotated_set_id")
131 |             for doc in self.reference_corpus.documents]
132 | 
133 |         if which_mentions == 'annotated':
134 |             system_graphs = [data_structures.EntityGraph.from_mentions(
135 |                 doc.annotated_mentions, "annotated_set_id")
136 |                 for doc in system_corpus.documents]
137 |         else:
138 |             system_graphs = [data_structures.EntityGraph.from_mentions(
139 |                 doc.system_mentions, "set_id")
140 |                 for doc in system_corpus.documents]
141 | 
142 |         recall_errors = []
143 |         precision_errors = []
144 | 
145 |         for doc_gold_graphs, doc_system_graphs in zip(gold_graphs,
146 |                                                       system_graphs):
147 |             recall_errors.extend(
148 |                 self.__compute_errors_for_doc(
149 |                     doc_gold_graphs,
150 |                     doc_system_graphs,
151 |                     self.recall_spanning_tree_algorithm))
152 |             precision_errors.extend(
153 |                 self.__compute_errors_for_doc(
154 |                     doc_system_graphs,
155 |                     doc_gold_graphs,
156 |                     self.precision_spanning_tree_algorithm))
157 | 
158 |         return (data_structures.EnhancedSet(recall_errors),
159 |                 data_structures.EnhancedSet(precision_errors))
160 | 
161 |     @staticmethod
162 |     def __compute_errors_for_doc(base_graphs,
163 |                                  partitioning_graphs,
164 |                                  spanning_tree_algorithm):
165 |         errors = []
166 | 
167 |         for graph in base_graphs:
168 |             errors.extend(
169 |                 ErrorExtractor.__compute_errors_for_graph(
170 |                     graph, partitioning_graphs, spanning_tree_algorithm))
171 | 
172 |         return errors
173 | 
174 |     @staticmethod
175 |     def __compute_errors_for_graph(graph,
176 |                                    partitioning_graphs,
177 |                                    spanning_tree_algorithm):
178 |         partitioned_graph = graph.partition(partitioning_graphs)
179 |         spanning_tree = spanning_tree_algorithm(graph, partitioned_graph)
180 |         extra_pairs = [
181 |             (anaphor, antecedent) for anaphor, antecedent in spanning_tree
182 |             if anaphor not in partitioned_graph.edges or
183 |             antecedent not in partitioned_graph.edges[anaphor]
184 |         ]
185 | 
186 |         return [(anaphor, antecedent) for anaphor, antecedent in sorted(
187 |             extra_pairs)]
188 | 


--------------------------------------------------------------------------------
/cort/analysis/plotting.py:
--------------------------------------------------------------------------------
  1 | """ Plot error analysis statistics. """
  2 | 
  3 | from __future__ import division
  4 | 
  5 | 
  6 | from matplotlib import pyplot
  7 | from matplotlib import cm
  8 | 
  9 | import numpy
 10 | 
 11 | from pylab import rcParams
 12 | 
 13 | 
 14 | __author__ = 'martscsn'
 15 | 
 16 | 
 17 | def plot(data,
 18 |          title,
 19 |          xlabel,
 20 |          ylabel,
 21 |          filename=None):
 22 |     """ Plot error analysis statistics.
 23 | 
 24 |     In particular, plot a bar chart for the numbers described in ``data``.
 25 | 
 26 |     Args:
 27 |         data (list(str, list((str,int)))): The data to be plotted. The ith entry
 28 |             of this list contains the name which will appear in the legend,
 29 |             and a list of (category, count) pairs. These are the individual
 30 |             data points which will be plotted.
 31 |         title (str): Title of the plot.
 32 |         xlabel (str): Label of the x axis.
 33 |         ylabel (str): Label of the y axis.
 34 |         filename (str, optional): If set, write plot to ``filename``.
 35 | 
 36 |     Example::
 37 |         pair_errs = errors["pair"]["recall_errors"]["all"]
 38 |         tree_errs = errors["tree"]["recall_errors"]["all"]
 39 | 
 40 |         plot(
 41 |             [("pair", [(cat, len(pair_errs[cat])) for cat in pair_errs.keys()]),
 42 |             ("tree", [(cat, len(tree_errs[cat])) for cat in tree_errs.keys()])],
 43 |             "Recall Errors",
 44 |             "Type of anaphor",
 45 |             "Number of Errors")
 46 |     """
 47 | 
 48 |     rcParams['xtick.major.pad'] = '12'
 49 |     rcParams['ytick.major.pad'] = '12'
 50 | 
 51 |     fig, ax = pyplot.subplots()
 52 | 
 53 |     systems = []
 54 |     categories = []
 55 | 
 56 |     colors = cm.Accent(numpy.linspace(0, 1, len(data)))
 57 | 
 58 |     bars_for_legend = []
 59 | 
 60 |     for i, system_data in enumerate(data):
 61 |         system_name, categories_and_numbers = system_data
 62 |         systems.append(system_name)
 63 | 
 64 |         for j, cat_and_number in enumerate(categories_and_numbers):
 65 |             category, number = cat_and_number
 66 | 
 67 |             if category not in categories:
 68 |                 categories.append(category)
 69 | 
 70 |             bar = ax.bar(2*j + i*(1/len(data)), number, color=colors[i],
 71 |                          width=1/len(data), label=system_name)
 72 | 
 73 |             if j == 0:
 74 |                 bars_for_legend.append(bar)
 75 | 
 76 |     xticks = [2*k + 0.5 for k in range(0, len(categories))]
 77 | 
 78 |     pyplot.title(title, fontsize=28)
 79 |     pyplot.xlabel(xlabel, fontsize=24)
 80 |     pyplot.ylabel(ylabel, fontsize=24)
 81 | 
 82 |     ax.spines["top"].set_visible(False)
 83 |     ax.spines["right"].set_visible(False)
 84 | 
 85 |     ax.get_xaxis().tick_bottom()
 86 |     ax.get_yaxis().tick_left()
 87 | 
 88 |     ax.set_xticklabels(categories)
 89 |     ax.set_xticks(xticks)
 90 | 
 91 |     pyplot.tick_params(axis='both', which='major', labelsize=20)
 92 | 
 93 |     if filename:
 94 |         legend = ax.legend(bars_for_legend, systems,
 95 |                            loc='upper right', bbox_to_anchor=(1.2, 1.2))
 96 | 
 97 |         fig.savefig(filename, bbox_extra_artists=(legend,), bbox_inches='tight')
 98 |     else:
 99 |         legend = ax.legend(bars_for_legend, systems, loc='upper right')
100 |         legend.draggable()
101 | 
102 |         fig.show()
103 | 


--------------------------------------------------------------------------------
/cort/analysis/spanning_tree_algorithms.py:
--------------------------------------------------------------------------------
  1 | """ Algorithms for computing spanning trees of entity graphs. """
  2 | 
  3 | 
  4 | __author__ = 'smartschat'
  5 | 
  6 | 
  7 | def precision_system_output(entity, partitioned_entity):
  8 |     """ Compute a spanning tree from antecedent information.
  9 | 
 10 |     All edges in the spanning tree correspond to anaphor-antecedent pairs. In
 11 |     order to access this antecedent information, the attribute "antecedent" of
 12 |     the mentions in the entity must be set.
 13 | 
 14 |     Args:
 15 |         entity (EntityGraph): The EntityGraph for the entity for which the
 16 |             spanning tree should be computed.
 17 |         partitioned_entity (EntityGraph): A partition of the entity -- not
 18 |             used for this algorithm.
 19 | 
 20 |     Returns:
 21 |         list(Mention, Mention): A list of mention pairs, which constitute the
 22 |         edges of the spanning tree. For a pair (m, n), n appears later in
 23 |         the text than m.
 24 |     """
 25 |     edges = []
 26 |     for mention in entity.edges:
 27 |         # just look at system output
 28 |         if ("antecedent" in mention.attributes
 29 |                 and mention.attributes["antecedent"] in entity.edges[mention]):
 30 |             edges.append((mention, mention.attributes["antecedent"]))
 31 | 
 32 |     return sorted(edges)
 33 | 
 34 | 
 35 | def recall_closest(entity, partitioned_entity):
 36 |     """ Compute a spanning tree by always taking the closest mention in the same
 37 |     entity.
 38 | 
 39 |     Args:
 40 |         entity (EntityGraph): The EntityGraph for the entity for which the
 41 |             spanning tree should be computed.
 42 |         partitioned_entity (EntityGraph): A partition of the entity -- not
 43 |             used for this algorithm.
 44 | 
 45 |     Returns:
 46 |         list(Mention, Mention): A list of mention pairs, which constitute the
 47 |         edges of the spanning tree. For a pair (m, n), n appears later in
 48 |         the text than m.
 49 |     """
 50 |     edges = []
 51 |     for mention in entity.edges:
 52 |         # always take closest (except for first mention in entity, which does
 53 |         # not have any antecedent)
 54 |         if entity.edges[mention]:
 55 |             if mention in partitioned_entity.edges:
 56 |                 antecedent = sorted(partitioned_entity.edges[mention],
 57 |                                     reverse=True)[0]
 58 |             else:
 59 |                 antecedent = sorted(entity.edges[mention], reverse=True)[0]
 60 |             edges.append((mention, antecedent))
 61 | 
 62 |     return sorted(edges)
 63 | 
 64 | 
 65 | def recall_accessibility(entity, partitioned_entity):
 66 |     """ Compute a spanning tree by choosing edges according to the accessibility
 67 |     of the antecedent.
 68 | 
 69 |     First, if a mention has an out-degree of at least one in the partitioned
 70 |     entity, take the edge with the closest mention distance as an edge for
 71 |     the spanning tree. Otherwise, proceed as follows.
 72 | 
 73 |     If a mention m is a proper name or a common noun, choose an antecedent as
 74 |     follows:
 75 | 
 76 |         - if a proper name antecedent exists, take the closest and output this
 77 |           pair as an edge
 78 |         - else if a common noun antecedent exists, take the closest and output
 79 |           this pair as an edge
 80 |         - else take the closest preceding mention and output this pair as an
 81 |           edge
 82 | 
 83 |     For all other mentions, take the closest preceding mention and output
 84 |     this pair as an edge.
 85 | 
 86 |     Args:
 87 |         entity (EntityGraph): The EntityGraph for the entity for which the
 88 |             spanning tree should be computed.
 89 |         partitioned_entity (EntityGraph): A partition of the entity -- not
 90 |             used for this algorithm.
 91 | 
 92 |     Returns:
 93 |         list(Mention, Mention): A list of mention pairs, which constitute the
 94 |         edges of the spanning tree. For a pair (m, n), n appears later in
 95 |         the text than m.
 96 |     """
 97 |     edges = []
 98 |     for mention in entity.edges:
 99 |         if entity.edges[mention]:
100 |             # mention is not the first in subentity? take closest!
101 |             if mention in partitioned_entity.edges:
102 |                 antecedent = sorted(partitioned_entity.edges[mention],
103 |                                     reverse=True)[0]
104 |             else:
105 |                 antecedent = __get_antecedent_by_type(mention,
106 |                                                       entity.edges[mention])
107 | 
108 |             edges.append((mention, antecedent))
109 | 
110 |     return sorted(edges)
111 | 
112 | 
113 | def __get_antecedent_by_type(mention, candidates):
114 |     # make sure...
115 |     candidates_reversed = sorted(candidates, reverse=True)
116 |     # mention is (demonstrative) pronoun? take closest!
117 |     if (mention.attributes["type"] == "PRO" or
118 |             mention.attributes["type"] == "DEM"):
119 |         return candidates_reversed[0]
120 |     # otherwise chose by type, back off to closest
121 |     elif __get_by_pos(candidates_reversed, "NAM"):
122 |         return __get_by_pos(candidates_reversed, "NAM")
123 |     elif __get_by_pos(candidates_reversed, "NOM"):
124 |         return __get_by_pos(candidates_reversed, "NOM")
125 |     else:
126 |         return candidates_reversed[0]
127 | 
128 | 
129 | def __get_by_pos(candidates, pos):
130 |     for mention in candidates:
131 |         if mention.attributes["type"] == pos:
132 |             return mention
133 | 


--------------------------------------------------------------------------------
/cort/analysis/visualization/TODO:
--------------------------------------------------------------------------------
1 | Python:
2 |     - use python http server in order to avoid multi-megabyte html blobs (simplehttpserver)
3 |     !- use discernible colours: https://github.com/gtaylor/python-colormath
4 | 
5 | jQuery/javascript:
6 |     - improve mentionhead tooltip behaviour
7 | 	- Dynamic computation of heights, etc. in scroll()


--------------------------------------------------------------------------------
/cort/analysis/visualization/style.css:
--------------------------------------------------------------------------------
  1 | html, body {
  2 |     margin: 0;
  3 |     font-family: Sans-Serif;
  4 | }
  5 | 
  6 | h1 {
  7 |     padding: 5px;
  8 |     margin: 0;
  9 |     text-align: left;
 10 | }
 11 | 
 12 | h3 {
 13 |     margin: 0 0 0 10px;
 14 |     padding: 0;
 15 |     font-family: Sans-Serif;
 16 |     font-size: 1em;
 17 | }
 18 | 
 19 | #header {
 20 |     background-color: rgb(1,70,153);
 21 |     margin: 0;
 22 |     padding: 5px;
 23 |     height: 50px;
 24 |     width: 100%;
 25 |     color: white;
 26 |     position: fixed;
 27 |     top: 0;
 28 |     z-index: 25;
 29 | }
 30 | 
 31 | #documentsNavi {
 32 |     margin: 10px 0 0 0;
 33 |     padding: 0;
 34 |     width: 225px; /* Must be same as .navcontainer*/
 35 |     position: fixed;
 36 |     top: 60px;
 37 |     float: left;
 38 | }
 39 | 
 40 | #documentsNavi ul {
 41 |     margin: 5px 0 0 10px;
 42 |     padding: 5px 0;
 43 |     list-style-type: none;
 44 |     height: 100px;
 45 |     overflow: auto;
 46 |     font-size: .8em;
 47 |     background-color: #bbbbbb;
 48 | }
 49 | 
 50 | #documentsNavi ul li {
 51 |     margin: 0;
 52 |     padding: 5px;
 53 |     cursor: pointer;
 54 | }
 55 | 
 56 | #documentsNavi li:nth-child(even) {
 57 |     background-color: #bbbbbb;
 58 | }
 59 | 
 60 | #documentsNavi li:nth-child(odd) {
 61 |     background-color: #cccccc;
 62 | }
 63 | 
 64 | #documentsNavi ul li:hover, #documentsNavi ul li:active {
 65 |     background-color: gray;
 66 | }
 67 | 
 68 | #documentsNavi ul li.highlight {
 69 |     font-weight: bolder;
 70 | }
 71 | 
 72 | /* Contains navigation bars and the document text itself */
 73 | .document {
 74 |     margin: 80px 10px 0 0;
 75 |     padding: 0;
 76 |     display: none;
 77 |     min-height: 600px;
 78 | }
 79 | 
 80 | #documentsNavi + .document {
 81 |     display: block;
 82 | }
 83 | 
 84 | .navcontainer {
 85 |     margin: 0;
 86 |     padding: 0;
 87 |     position: fixed;
 88 |     top: 200px;
 89 |     width: 225px;
 90 | }
 91 | 
 92 | .navcontainer > div {
 93 |     margin-top: 20px;
 94 |     padding: 0;
 95 | }
 96 | 
 97 | .tease {
 98 |     display: none;
 99 |     opacity: .8;
100 |     margin: 0 0 0 5px;
101 |     padding: 0;
102 |     font-family: Sans-Serif;
103 |     font-size: .8em;
104 | }
105 | 
106 | .navcontainer > div h3:hover {
107 |     display: inline-block;
108 |     cursor: pointer;
109 | }
110 | 
111 | .navcontainer > div h3:hover + .tease {
112 |     display: inline-block;
113 | }
114 | 
115 | /* Gold and system navigation boxes */
116 | .navcontainer > div > ul {
117 |     margin: 5px 0 0 10px;
118 |     padding: 5px 0;
119 |     list-style-type: none;
120 |     overflow-y: auto;
121 |     max-height: 80px;
122 |     font-size: .8em;
123 |     background-color: #bbbbbb;
124 | }
125 | 
126 | div.navcontainer div ul li:nth-child(even) {
127 |     background-color: #bbbbbb;
128 | }
129 | 
130 | div.navcontainer div ul li:nth-child(odd) {
131 |     background-color: #cccccc;
132 | }
133 | 
134 | .navcontainer > div ul li {
135 |     margin: 0;
136 |     padding: 2px;
137 | }
138 | 
139 | .navcontainer > div ul li:hover {
140 |     cursor: pointer;
141 | }
142 | 
143 | /* Errors navigation box */
144 | div.errorsNavi {
145 | 
146 | }
147 | 
148 | div.errorsNavi h4 {
149 |     margin: 0;
150 |     padding: 2px;
151 |     font-size: .9em;
152 |     font-weight: light;
153 | }
154 | 
155 | div.errorsNavi h4:hover {
156 |     display: inline-block;
157 |     cursor: pointer;
158 | }
159 | 
160 | div.errorsNavi h4:hover + .tease {
161 |     display: inline-block;
162 | }
163 | 
164 | div.errorsNavi > div {
165 |     margin: 0 0 0 10px;
166 |     padding: 5px 0;
167 |     background-color: #eeeeee;
168 | }
169 | 
170 | .precisionErrors, .recallErrors {
171 |     margin: 0;
172 |     padding: 5px 0;
173 |     list-style-type: none;
174 |     font-size: .8em;
175 |     height: 80px;
176 |     overflow: auto;
177 | }
178 | 
179 | ol.text {
180 |     margin: 10px 0 0 250px;
181 |     padding: 5px;
182 |     line-height: 250%;
183 |     font-family: Sans-Serif;
184 |     font-size: .9em;
185 |     background-color: #eeeeee;
186 | }
187 | 
188 | ol.text {
189 |     counter-reset: li;
190 |     display: table;
191 | }
192 | 
193 | ol.text li.sentence {
194 |     margin: 0;
195 |     padding: 0;
196 | }
197 | 
198 | ol.text > li {
199 |     margin: 0 0 6px 2em;
200 |     padding: 4px 8px;
201 |     list-style: none;
202 |     counter-increment: li;
203 |     display: table-row;    
204 | }
205 | 
206 | ol.text > li:before {
207 |     content: counter(li) ".";
208 |     font-size: .7em;
209 |     color: gray;
210 |     display: inline-block;
211 |     width: 20px;
212 |     text-align: right;
213 |     padding-right: 5px;
214 |     display: table-cell;   
215 | }
216 | 
217 | ol.text span.mention {
218 |     margin: 0;
219 |     display: inline;
220 |     border-radius: 0.5em;
221 | }
222 | 
223 | ol.text span.mention:hover {
224 |     cursor: pointer;
225 | }
226 | 
227 | div ol.text .goldBorder {
228 |     border: 3px solid gold;
229 | }
230 | 
231 | div ol.text .blueBorder {
232 |     border: 3px solid blue;
233 | }
234 | 
235 | ol.text *[class^='system']{
236 |     border: 1px solid blue;
237 |     padding: 5px;
238 | }
239 | 
240 | ol.text *[class^='gold']{
241 |     border: 1px solid gold;
242 |     padding: 2px;
243 | }
244 | /*
245 | span.transparentBg, .goldNavi ul li.transparentBg, .systemNavi ul li.transparentBg {
246 |     background-color: transparent;
247 |     border: 3px solid transparent;
248 | }
249 | */
250 | .label {
251 |     line-height: 100%;
252 |     background-color: #F1F101;
253 |     z-index: 24;
254 |     opacity: .9;
255 |     box-shadow: 2px 2px 13px #aaa;
256 | }
257 | 
258 | .label:hover {
259 |     display: block;
260 | }


--------------------------------------------------------------------------------
/cort/config_files/corenlp.ini:
--------------------------------------------------------------------------------
1 | annotators = tokenize,ssplit,pos,lemma,parse,ner


--------------------------------------------------------------------------------
/cort/config_files/corenlp_with_coref.ini:
--------------------------------------------------------------------------------
1 | annotators = tokenize,ssplit,pos,lemma,parse,ner
2 | tokenize.whitespace = true
3 | ssplit.eolonly = true


--------------------------------------------------------------------------------
/cort/core/__init__.py:
--------------------------------------------------------------------------------
1 | """ Includes core functionality for managing documents and mentions."""
2 | 
3 | __author__ = 'martscsn'
4 | 


--------------------------------------------------------------------------------
/cort/core/external_data.py:
--------------------------------------------------------------------------------
  1 | """ Read in and access data from external resources such as gender lists."""
  2 | 
  3 | import os
  4 | import pickle
  5 | 
  6 | 
  7 | import cort
  8 | from cort.core import singletons
  9 | from cort.core import util
 10 | 
 11 | 
 12 | __author__ = 'smartschat'
 13 | 
 14 | 
 15 | @singletons.Singleton
 16 | class GenderData:
 17 |     """ Read in and access data from lists with gender information.
 18 | 
 19 |     Attributes:
 20 |         word_to_gender (dict(str, str)): A mapping from lower-case strings
 21 |             to one of four genders: 'MALE', 'FEMALE', 'NEUTRAL' and 'PLURAL'.
 22 |     """
 23 |     def __init__(self):
 24 |         """ Initialize the word-to-gender mapping from gender lists.
 25 |         """
 26 |         self.word_to_gender = {}
 27 | 
 28 |         directory = cort.__path__[0] + "/resources/"
 29 | 
 30 |         lists = [
 31 |             open(directory + "male.list"),
 32 |             open(directory + "female.list"),
 33 |             open(directory + "neutral.list"),
 34 |             open(directory + "plural.list")
 35 |         ]
 36 | 
 37 |         genders = ["MALE", "FEMALE", "NEUTRAL", "PLURAL"]
 38 | 
 39 |         for gender, gender_list in zip(genders, lists):
 40 |             for word in gender_list.readlines():
 41 |                 self.word_to_gender[word.strip()] = gender
 42 | 
 43 |     def look_up(self, attributes):
 44 |         """ Look up the gender of a mention described by the input attributes.
 45 | 
 46 |         Args:
 47 |             attributes (dict(str,object)): A dict describing attributes of
 48 |                 mentions. Must contain "tokens" and "head", which have lists
 49 |                 of strings as values.
 50 | 
 51 |         Returns:
 52 |             (str): None or one of the four genders 'MALE', 'FEMALE',
 53 |             'NEUTRAL' or 'PLURAL'.
 54 |         """
 55 |         # whole string
 56 |         if " ".join(attributes["tokens"]).lower() in self.word_to_gender:
 57 |             return self.word_to_gender[" ".join(attributes["tokens"]).lower()]
 58 |         # head
 59 |         elif " ".join(attributes["head"]).lower() in self.word_to_gender:
 60 |             return self.word_to_gender[" ".join(attributes["head"]).lower()]
 61 |         # head token by token
 62 |         elif self.__look_up_token_by_token(attributes["head"]):
 63 |             return self.__look_up_token_by_token(attributes["head"])
 64 | 
 65 |     def __look_up_token_by_token(self, tokens):
 66 |         for token in tokens:
 67 |             if token[0].isupper() and token.lower() in self.word_to_gender:
 68 |                 return self.word_to_gender[token.lower()]
 69 | 
 70 | 
 71 | @singletons.Singleton
 72 | class LexicalData:
 73 |     """ Read in and access data containing pairs of coreferent mention strings.
 74 | 
 75 |     Attributes:
 76 |         pairs (set((str, str))): A set of string pairs, which represent strings
 77 |             of potentially coreferent mentions.
 78 |     """
 79 |     def __init__(self):
 80 |         """ Initialize the set of pairs from
 81 |             package_root/resources/coreferent_pairs.obj.
 82 |         """
 83 |         directory = cort.__path__[0] + "/resources/"
 84 | 
 85 |         self.pairs = pickle.load(
 86 |             open(directory + "coreferent_pairs.obj", "rb"))
 87 | 
 88 |     def look_up(self, anaphor, antecedent):
 89 |         """ Look up strings of the mentions in the pair list.
 90 | 
 91 |         Args:
 92 |             anaphor (Mention): A mention.
 93 |             antecedent (Mention): Another mention, the candidate antecedent
 94 |                 for anaphor.
 95 | 
 96 |         Returns:
 97 |             True if the pair of strings corresponding to anaphor of
 98 |             antecedent, stripped determiners and possessive s, can be found
 99 |             in the list of pairs.
100 |         """
101 |         # whole string
102 |         anaphor_cleaned = " ".join(
103 |             util.clean_via_pos(anaphor.attributes["tokens"],
104 |                           anaphor.attributes["pos"]))
105 |         antecedent_cleaned = " ".join(
106 |             util.clean_via_pos(antecedent.attributes["tokens"],
107 |                                antecedent.attributes["pos"]))
108 | 
109 |         return (
110 |             (anaphor_cleaned, antecedent_cleaned) in self.pairs
111 |             or (antecedent_cleaned, anaphor_cleaned) in self.pairs
112 |         )
113 | 
114 | 
115 | @singletons.Singleton
116 | class SingletonMentions:
117 |     """ Read in and access data strings of singleton mentions.
118 | 
119 |     Attributes:
120 |         singletons (set(str)): A set of strings, which represent strings of
121 |             of potential singleton mentions.
122 |     """
123 |     def __init__(self):
124 |         """ Initialize the set of pairs from
125 |             package_root/resources/singletons_not_cleaned.obj.
126 |         """
127 |         directory = cort.__path__[0] + "/resources/"
128 | 
129 |         self.singletons = pickle.load(
130 |             open(directory + "singletons_not_cleaned.obj", "rb"))
131 | 


--------------------------------------------------------------------------------
/cort/core/mixins.py:
--------------------------------------------------------------------------------
 1 | """ Mixins. """
 2 | 
 3 | 
 4 | __author__ = 'smartschat'
 5 | 
 6 | 
 7 | class ComparableMixin:
 8 |     """ A mixin for deducing comparison operators from __lt__. """
 9 |     def __eq__(self, other):
10 |         if self is None and other is not None:
11 |             return False
12 |         elif self is not None and other is None:
13 |             return False
14 |         else:
15 |             return not self < other and not other < self
16 | 
17 |     def __ne__(self, other):
18 |         return self < other or other < self
19 | 
20 |     def __gt__(self, other):
21 |         return other < self
22 | 
23 |     def __ge__(self, other):
24 |         return not self < other
25 | 
26 |     def __le__(self, other):
27 |         return not other < self
28 | 


--------------------------------------------------------------------------------
/cort/core/singletons.py:
--------------------------------------------------------------------------------
 1 | """ Implements the singleton pattern. """
 2 | 
 3 | 
 4 | __author__ = 'smartschat'
 5 | 
 6 | 
 7 | class Singleton:
 8 |     """
 9 |     A non-thread-safe helper class to ease implementing singletons.
10 |     This should be used as a decorator -- not a metaclass -- to the
11 |     class that should be a singleton.
12 | 
13 |     The decorated class can define one `__init__` function that
14 |     takes only the `self` argument. Other than that, there are
15 |     no restrictions that apply to the decorated class.
16 | 
17 |     To get the singleton instance, use the `get_instance` method. Trying
18 |     to use `__call__` will result in a `TypeError` being raised.
19 | 
20 |     Limitations: The decorated class cannot be inherited from.
21 | 
22 |     Source:
23 |       http://stackoverflow.com/questions/42558/python-and-the-singleton-pattern
24 | 
25 |     """
26 | 
27 |     def __init__(self, decorated):
28 |         self._decorated = decorated
29 |         self._instance = None
30 | 
31 |     def get_instance(self):
32 |         """
33 |         Returns the singleton instance. Upon its first call, it creates a
34 |         new instance of the decorated class and calls its `__init__` method.
35 |         On all subsequent calls, the already created instance is returned.
36 | 
37 |         """
38 |         if self._instance:
39 |             return self._instance
40 |         else:
41 |             self._instance = self._decorated()
42 |             return self._instance
43 | 
44 |     def __call__(self):
45 |         raise TypeError('Singletons must be accessed through '
46 |                         '`get_instance()`.')
47 | 
48 |     def __instancecheck__(self, inst):
49 |         return isinstance(inst, self._decorated)
50 | 


--------------------------------------------------------------------------------
/cort/core/spans.py:
--------------------------------------------------------------------------------
 1 | """ Manage spans in documents. """
 2 | 
 3 | from cort.core import mixins
 4 | 
 5 | 
 6 | __author__ = 'smartschat'
 7 | 
 8 | 
 9 | class Span(mixins.ComparableMixin):
10 |     """ Manage and compare spans in documents.
11 | 
12 |     Attributes:
13 |         begin (int): The begin of the span.
14 |         end (int): The end of the span (inclusive).
15 |     """
16 |     def __init__(self, begin, end):
17 |         """ Initialize a span from a begin and an end position.
18 | 
19 |         Args:
20 |             begin (int): The begin of the span.
21 |             end (int): The end of the span.
22 |         """
23 |         self.begin = begin
24 |         self.end = end
25 | 
26 |     def __str__(self):
27 |         return "(" + str(self.begin) + ", " + str(self.end) + ")"
28 | 
29 |     def __repr__(self):
30 |         return "(" + str(self.begin) + ", " + str(self.end) + ")"
31 | 
32 |     def __lt__(self, other):
33 |         """ Check whether this span is less than another span.
34 | 
35 |         (a,b) < (c,d) if and only if a < c or a = c and b < d
36 | 
37 |         Args:
38 |             other (Span): A span.
39 | 
40 |         Returns:
41 |             True if this span is less than other, False otherwise.
42 |         """
43 |         if self.begin < other.begin:
44 |             return True
45 |         elif self.begin > other.begin:
46 |             return False
47 |         elif self.end < other.end:
48 |             return True
49 |         else:
50 |             return False
51 | 
52 |     def embeds(self, other):
53 |         """ Check whether this span embeds another span.
54 | 
55 |         Args:
56 |             other (Span): A span.
57 | 
58 |         Returns:
59 |             True if this span embeds other, False otherwise.
60 |         """
61 |         return self.begin <= other.begin and self.end >= other.end
62 | 
63 |     def __hash__(self):
64 |         return hash((self.begin, self.end))
65 | 
66 |     @staticmethod
67 |     def parse(span_string):
68 |         """ Parse a string specification of a span to a Span object.
69 | 
70 |         Valid representations are for example "(1, 2)" or "(1,2)".
71 | 
72 |         Args:
73 |             span_string (str): A string representation of a span.
74 | 
75 |         Returns:
76 |             Span: The span corresponding to the string representation.
77 |         """
78 |         without_brackets = span_string.strip()[1:-1]
79 |         splitted_and_stripped = [token.strip() for token
80 |                                  in without_brackets.split(",")]
81 |         return Span(
82 |             int(splitted_and_stripped[0]),
83 |             int(splitted_and_stripped[1]))
84 | 


--------------------------------------------------------------------------------
/cort/core/util.py:
--------------------------------------------------------------------------------
 1 | """ Utility functions. """
 2 | 
 3 | __author__ = 'smartschat'
 4 | 
 5 | 
 6 | def clean_via_pos(tokens, pos):
 7 |     """ Clean a list of tokens according to their part-of-speech tags.
 8 | 
 9 |     In particular, retain only tokens which do not have the part-of-speech tag
10 |     DT (determiner) or POS (possessive 's').
11 | 
12 |     Args:
13 |         tokens (list(str)): A list of tokens.
14 |         pos (list(str)): A list of corresponding part-of-speech tags.
15 | 
16 |     Returns:
17 |         list(str): The list of tokens which do not have part-of-speech tag
18 |         DT or POS.
19 |     """
20 |     return [token for token, pos in zip(tokens, pos)
21 |             if pos not in ["DT", "POS"]]
22 | 


--------------------------------------------------------------------------------
/cort/coreference/__init__.py:
--------------------------------------------------------------------------------
1 | """ Includes a unified framework for representation and learning of coreference
2 | resolution approaches."""
3 | 
4 | __author__ = 'martscsn'
5 | 


--------------------------------------------------------------------------------
/cort/coreference/approaches/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Contains implementations of various coreference resolution approaches in
2 | the unified framework.
3 | '''
4 | 
5 | __author__ = 'martscsn'
6 | 


--------------------------------------------------------------------------------
/cort/coreference/approaches/antecedent_trees.py:
--------------------------------------------------------------------------------
  1 | """ Implements instance extraction and decoding for antecedent trees.
  2 | 
  3 | This module implements antecedent trees (Fernandes et al., 2014) within a
  4 | framework that expresses coreference resolution as predicting latent structures,
  5 | while performing learning using a latent structured perceptron with
  6 | cost-augmented inference.
  7 | 
  8 | Hence, antecedent trees are expressed as as predicting a latent graph.
  9 | In particular, let m_1, ..., m_n be all mentions in a document. Let m_0 be a
 10 | dummy mention for anaphoricity determination. We predict
 11 | the graph with nodes m_0, ..., m_n and with arcs (m_j, m_i) which correspond to
 12 | antecedent decisions. In particular, for each j there exists exactly one i < j
 13 | such that (m_j, m_i) is in the graph. Such a graph is called aa *substructure*
 14 | (for antecedent trees, substructures and structures coincide).
 15 | 
 16 | To implement antecedent trees, this module contains a function that defines the
 17 | search space for the graphs, and a decoder that computes the best-scoring tree
 18 | of antecedent decisions, and the best-scoring tree of antecedent decisions
 19 | consistent with the gold annotation (i.e. only having pairs of coreferent
 20 | mentions as arcs).
 21 | 
 22 | Reference:
 23 | 
 24 |     - Eraldo Fernandes, Cicero dos Santos, and Ruy Milidiu. 2014. Latent trees
 25 |       for coreference resolution. *Computational Linguistics*, 40(4):801-835.
 26 |       http://www.aclweb.org/anthology/J14-4004
 27 | """
 28 | 
 29 | from __future__ import division
 30 | 
 31 | 
 32 | import array
 33 | 
 34 | 
 35 | from cort.coreference import perceptrons
 36 | 
 37 | 
 38 | __author__ = 'martscsn'
 39 | 
 40 | 
 41 | def extract_substructures(doc):
 42 |     """ Extract the search space for the antecedent tree model,
 43 | 
 44 |     The mention ranking model consists in computing the optimal antecedent for
 45 |     each anaphor. These decisions are represented as edges in a tree of
 46 |     anaphor-antecedent decisions. This functions extracts the search space for
 47 |     the tree.
 48 | 
 49 |     The search space is represented as a nested list of mention pairs. The
 50 |     mention pairs are candidate arcs in the graph. The nested list contains
 51 |     only one list, since antecedent trees have only one substructure for
 52 |     each document.
 53 | 
 54 |     The list contains all potential (anaphor, antecedent) pairs in the
 55 |     following order: (m_1, m_0), (m_2, m_1), (m_2, m_0), (m_3, m_2), ...,
 56 |     where m_j is the jth mention in the document.
 57 | 
 58 |     Args:
 59 |         doc (CoNLLDocument): The document to extract substructures from.
 60 | 
 61 |     Returns:
 62 |         (list(list(Mention, Mention))): The nested list of mention pairs
 63 |         describing the search space for the substructures.
 64 |     """
 65 |     substructure = []
 66 | 
 67 |     # iterate over mentions
 68 |     for i, ana in enumerate(doc.system_mentions):
 69 | 
 70 |         # iterate in reversed order over candidate antecedents
 71 |         for ante in sorted(doc.system_mentions[:i], reverse=True):
 72 |             substructure.append((ana, ante))
 73 | 
 74 |     return [substructure]
 75 | 
 76 | 
 77 | class AntecedentTreePerceptron(perceptrons.Perceptron):
 78 |     """ A perceptron for antecedent trees. """
 79 |     def argmax(self, substructure, arc_information):
 80 |         """ Decoder for antecedent trees.
 81 | 
 82 |         Compute highest-scoring antecedent tree and highest-scoring antecedent
 83 |         tree consistent with the gold annotation.
 84 | 
 85 |         Args:
 86 |             substructure (list((Mention, Mention))): The list of mention pairs
 87 |                 which define the search space for one substructure. For mention
 88 |                 ranking, this list contains all potential anaphor-antecedent
 89 |                 pairs in the following order:
 90 |                 (m_1, m_0), (m_2, m_1), (m_2, m_0), (m_3, m_2), ...
 91 |             arc_information (dict((Mention, Mention),
 92 |                                   ((array, array, array), list(int), bool)):
 93 |                 A mapping of arcs (= mention pairs) to information about these
 94 |                 arcs. The information consists of the features, the costs for
 95 |                 the arc (for each label), and whether predicting the arc to be
 96 |                 coreferent is consistent with the gold annotation). The features
 97 |                 are divided in three arrays: the first array contains the non-
 98 |                 numeric features, the second array the numeric features, and the
 99 |                 third array the values for the numeric features. The features
100 |                 are represented as integers via feature hashing.
101 | 
102 |         Returns:
103 |             A 7-tuple describing the highest-scoring antecedent tree, and the
104 |             highest-scoring antecedent tree consistent with the gold
105 |             annotation. The tuple consists of:
106 | 
107 |                 - **best_arcs** (*list((Mention, Mention))*): the arcs
108 |                   constituting the highest-scoring antecedent tree,
109 |                 - **best_labels** (*list(str)*): empty, the antecedent tree
110 |                   approach does not employ any labels,
111 |                 - **best_scores** (*list(float)*): the scores of the
112 |                   arcs in the highest-scoring antecedent tree,
113 |                 - **best_cons_arcs** (*list((Mention, Mention))*): the arcs
114 |                   constituting the highest-scoring antecedent tree consistent
115 |                   with the gold annotation.
116 |                 - **best_cons_labels** (*list(str)*): empty, the antecedent
117 |                   tree approach does not employ any labels
118 |                 - **best_cons_scores** (*list(float)*): the scores of the
119 |                   arcs in the highest-scoring antecedent tree consistent with
120 |                   the gold annotation,
121 |                 - **is_consistent** (*bool*): whether the highest-scoring
122 |                   antecedent tree is consistent with the gold annotation.
123 |         """
124 |         if not substructure:
125 |             return [], [], [], [], [], [], True
126 | 
127 |         number_mentions = len(substructure[0][0].document.system_mentions)
128 | 
129 |         arcs = []
130 |         arcs_scores = []
131 |         coref_arcs = []
132 |         coref_arcs_scores = []
133 | 
134 |         is_consistent = True
135 | 
136 |         for ana_index in range(1, number_mentions):
137 | 
138 |             first_arc = ana_index*(ana_index-1)//2
139 |             last_arc = first_arc + ana_index
140 | 
141 |             best, max_val, best_cons, max_cons, best_is_consistent = \
142 |                 self.find_best_arcs(substructure[first_arc:last_arc],
143 |                                     arc_information)
144 | 
145 |             arcs.append(best)
146 |             arcs_scores.append(max_val)
147 |             coref_arcs.append(best_cons)
148 |             coref_arcs_scores.append(max_cons)
149 | 
150 |             is_consistent &= best_is_consistent
151 | 
152 |         return (
153 |             arcs,
154 |             [],
155 |             arcs_scores,
156 |             coref_arcs,
157 |             [],
158 |             coref_arcs_scores,
159 |             is_consistent
160 |         )
161 | 


--------------------------------------------------------------------------------
/cort/coreference/clusterer.py:
--------------------------------------------------------------------------------
  1 | """ Extract coreference information from pairwise predictions."""
  2 | 
  3 | __author__ = 'smartschat'
  4 | 
  5 | 
  6 | def best_first(substructures, labels, scores, coref_labels):
  7 |     """ Extract coreference clusters from coreference predictions via best-first
  8 |     clustering.
  9 | 
 10 |     In particular, go through a list of anaphor-antecedent pairs, where
 11 |     pairs with the same anaphor are consecutive. Then, for each anaphor, the
 12 |     best-scoring antecedent is selected (this is also called best-first
 13 |     clustering). Ties are broken by position in the list: earlier items are
 14 |     preferred.
 15 | 
 16 |     Args:
 17 |         substructures (list(list((Mention, Mention)))): A list of substructures.
 18 |             For this clusterer, each substructure should contain only one
 19 |             (anaphor, antecedent) pair. If two substructures have the same
 20 |             anaphor, they should be consecutive.
 21 |         labels (list(list(str))): A list of arc labels. This list should
 22 |             have the same length as the list of substructures, and each inner
 23 |             list should contain only one element (as in ``substructures``).
 24 |             Each entry describes the label of an arc.
 25 |         labels (list(list(str))): A list of arc scores. This list should
 26 |             have the same length as the list of substructures, and each inner
 27 |             list should contain only one element (as in ``substructures``).
 28 |             Each entry describes the score of an arc.
 29 |         coref_labels (set(str)): A list of labels that indicate that mentions
 30 |             connected via an arc that has one of these labels are coreferent.
 31 | 
 32 |     Returns
 33 |         A tuple containing two dicts. The components are
 34 | 
 35 |             - **mention_entity_mapping** (*dict(Mention, int)*): A mapping of
 36 |               mentions to entity identifiers.
 37 |             - **antecedent_mapping** (*dict(Mention, Mention)*): A mapping of
 38 |               mentions to their antecedent.
 39 |     """
 40 | 
 41 |     anaphor = None
 42 |     best = None
 43 |     max_val = float('-inf')
 44 | 
 45 |     mention_entity_mapping = {}
 46 |     antecedent_mapping = {}
 47 | 
 48 |     for substructure, substructure_label, substructure_score in zip(
 49 |             substructures, labels, scores):
 50 |         # each substructure consists of one pair
 51 |         pair = substructure[0]
 52 |         label = substructure_label[0]
 53 |         score = substructure_score[0]
 54 |         current_anaphor, current_antecedent = pair
 55 |         if current_anaphor != anaphor:
 56 |             # change in anaphor: set coreference information based on
 57 |             # best-scoring antecedent
 58 |             if anaphor and best and not best.is_dummy():
 59 |                 antecedent_mapping[anaphor] = best
 60 |                 if best not in mention_entity_mapping:
 61 |                     mention_entity_mapping[best] = \
 62 |                         best.document.system_mentions.index(best)
 63 | 
 64 |                 mention_entity_mapping[anaphor] = \
 65 |                     mention_entity_mapping[best]
 66 | 
 67 |             best = None
 68 |             max_val = float('-inf')
 69 | 
 70 |         if score > max_val and label in coref_labels:
 71 |             max_val = score
 72 |             best = current_antecedent
 73 | 
 74 |         anaphor = current_anaphor
 75 | 
 76 |     if anaphor and best and not best.is_dummy():
 77 |         antecedent_mapping[anaphor] = best
 78 |         if best not in mention_entity_mapping:
 79 |             mention_entity_mapping[best] = \
 80 |                 best.document.system_mentions.index(best)
 81 | 
 82 |         mention_entity_mapping[anaphor] = \
 83 |             mention_entity_mapping[best]
 84 | 
 85 |     return mention_entity_mapping, antecedent_mapping
 86 | 
 87 | 
 88 | def all_ante(substructures, labels, scores, coref_labels):
 89 |     """ Extract coreference clusters from coreference predictions via transitive
 90 |     closure.
 91 | 
 92 |     In particular, go through all (anaphor, antecedent) pairs contained in
 93 |     ``substructures``, and obtain coreference clusters by transitive closure.
 94 | 
 95 |     Args:
 96 |         substructures (list(list((Mention, Mention)))): A list of substructures.
 97 |         labels (list(list(str))): Not used by this function.
 98 |         labels (list(list(str))): Not used by this function.
 99 |         coref_labels (set(str)): Not used by this function.
100 | 
101 |     Returns
102 |         A tuple containing two dicts. The components are
103 | 
104 |             - **mention_entity_mapping** (*dict(Mention, int)*): A mapping of
105 |               mentions to entity identifiers.
106 |             - **antecedent_mapping** (*dict(Mention, Mention)*): A mapping of
107 |               mentions to their antecedent.
108 |     """
109 |     mention_entity_mapping = {}
110 |     antecedent_mapping = {}
111 | 
112 |     for substructure in substructures:
113 |         for pair in substructure:
114 |             anaphor, antecedent = pair
115 | 
116 |             # skip dummy antecedents
117 |             if antecedent.is_dummy():
118 |                 continue
119 | 
120 |             antecedent_mapping[anaphor] = antecedent
121 | 
122 |             # antecedent is not in the mapping: we initialize a new coreference
123 |             # chain
124 |             if antecedent not in mention_entity_mapping:
125 |                 # chain id: index of antecedent in system mentions
126 |                 mention_entity_mapping[antecedent] = \
127 |                         antecedent.document.system_mentions.index(antecedent)
128 | 
129 |             # assign id based on antecedent
130 |             mention_entity_mapping[anaphor] = \
131 |                 mention_entity_mapping[antecedent]
132 | 
133 |     return mention_entity_mapping, antecedent_mapping
134 | 


--------------------------------------------------------------------------------
/cort/coreference/cost_functions.py:
--------------------------------------------------------------------------------
 1 | """ Cost functions used during learning of coreference predictors. """
 2 | 
 3 | __author__ = 'martscsn'
 4 | 
 5 | 
 6 | def cost_based_on_consistency(arc, label="+"):
 7 |     """ Assign cost to arcs based on consistency of decision and anaphoricity.
 8 | 
 9 |     An anaphor-antecedent decision is consistent if either
10 |         (a) the mentions are coreferent, or
11 |         (b) the antecedent is the dummy mention, and the anaphor does not have
12 |             any preceding coreferent mention among all extracted mentions.
13 | 
14 |     Note that (b) also contains cases where the mention has an antecedent in the
15 |     gold data, but we were unable to extract this antecedent due to errors in
16 |     mention detection.
17 | 
18 |     If the anaphor-antecedent decision represented by ``arc``is consistent, it
19 |     gets cost 0. If the the decision is not consistent, and the antecedent is
20 |     the dummy mention, it gets cost 2. Otherwise, it gets cost 1.
21 | 
22 |     Args:
23 |         arc ((Mention, Mention)): A pair of mentions.
24 |         label (str): The label to predict for the arc. Defaults to '+'.
25 | 
26 |     Return:
27 |         (int): The cost of predicting the arc.
28 |     """
29 |     ana, ante = arc
30 | 
31 |     consistent = ana.decision_is_consistent(ante)
32 | 
33 |     # false new
34 |     if not consistent and ante.is_dummy():
35 |         return 2
36 |     # wrong link
37 |     elif not consistent:
38 |         return 1
39 |     else:
40 |         return 0
41 | 
42 | 
43 | def null_cost(arc, label="+"):
44 |     """ Dummy cost function which always returns 0 (corresponding to not using
45 |     a cost function at all).
46 | 
47 |     Args:
48 |         arc ((Mention, Mention)): A pair of mentions.
49 |         label (str): The label to predict for the arc. Defaults to '+'
50 | 
51 |     Return:
52 |         0
53 |     """
54 |     return 0


--------------------------------------------------------------------------------
/cort/coreference/experiments.py:
--------------------------------------------------------------------------------
 1 | """ Manage learning from training data and making predictions on test data. """
 2 | 
 3 | 
 4 | import logging
 5 | 
 6 | 
 7 | __author__ = 'smartschat'
 8 | 
 9 | 
10 | def learn(training_corpus, instance_extractor, perceptron):
11 |     """ Learn a model for coreference resolution from training data.
12 | 
13 |     In particular, apply an instance/feature extractor to a training corpus and
14 |     employ a machine learning model to learn a weight vector from these
15 |     instances.
16 | 
17 |     Args:
18 |         training_corpus (Corpus): The corpus to learn from.
19 |         instance_extractor (InstanceExtracor): The instance extractor that
20 |             defines the features and the structure of instances that are
21 |             extracted during training.
22 |         perceptron (Perceptron): A perceptron (including a decoder) that
23 |             learns from the instances extracted by ``instance_extractor``.
24 | 
25 |     Returns:
26 |         A tuple consisting of
27 |             - **priors** (*dict(str,float)*): A prior weight for each label
28 |               in the graphs representing the instances,
29 |             - **weights** (*dict(str, array)*): A mapping of labels to weight
30 |               vectors. For each label ``l``, ``weights[l]`` contains weights
31 |               for each feature seen during training (for representing the
32 |               features we employ *feature hashing*). If the graphs employed are
33 |               not labeled, ``l`` is set to "+".
34 |     """
35 |     logging.info("Learning.")
36 | 
37 |     logging.info("\tExtracting instances and features.")
38 |     substructures, arc_information = instance_extractor.extract(
39 |         training_corpus)
40 | 
41 |     logging.info("\tFitting model parameters.")
42 | 
43 |     perceptron.fit(substructures, arc_information)
44 | 
45 |     return perceptron.get_model()
46 | 
47 | 
48 | def predict(testing_corpus,
49 |             instance_extractor,
50 |             perceptron,
51 |             coref_extractor):
52 |     """ According to a learned model, predict coreference information.
53 | 
54 |     Args:
55 |         testing_corpus (Corpus): The corpus to predict coreference on.
56 |         instance_extractor (InstanceExtracor): The instance extracor that
57 |             defines the features and the structure of instances that are
58 |             extracted during testing.
59 |         perceptron (Perceptron): A perceptron learned from training data.
60 |         argmax_function (function): A decoder that computes the best-scoring
61 |             coreference structure over a set of structures.
62 |         coref_extractor (function): An extractor for consolidating pairwise
63 |             predictions into coreference clusters.
64 | 
65 |     Returns:
66 |         A tuple containing two dicts. The components are
67 | 
68 |             - **mention_entity_mapping** (*dict(Mention, int)*): A mapping of
69 |               mentions to entity identifiers.
70 |             - **antecedent_mapping** (*dict(Mention, Mention)*): A mapping of
71 |               mentions to their antecedent (as determined by the
72 |               ``coref_extractor``).
73 |     """
74 |     logging.info("Predicting.")
75 | 
76 |     logging.info("\tRemoving coreference annotations from corpus.")
77 |     for doc in testing_corpus:
78 |         doc.antecedent_decisions = {}
79 |         for mention in doc.system_mentions:
80 |             mention.attributes["antecedent"] = None
81 |             mention.attributes["set_id"] = None
82 | 
83 |     logging.info("\tExtracting instances and features.")
84 |     substructures, arc_information = instance_extractor.extract(testing_corpus)
85 | 
86 |     logging.info("\tDoing predictions.")
87 |     arcs, labels, scores = perceptron.predict(substructures, arc_information)
88 | 
89 |     logging.info("\tClustering results.")
90 | 
91 |     return coref_extractor(arcs, labels, scores, perceptron.get_coref_labels())
92 | 


--------------------------------------------------------------------------------
/cort/coreference/multigraph/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/coreference/multigraph/decoders.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'smartschat'
 2 | 
 3 | 
 4 | class MultigraphDecoder:
 5 |     def __init__(self, multigraph_creator):
 6 |         self.coref_multigraph_creator = multigraph_creator
 7 | 
 8 |     def decode(self, corpus):
 9 |         for doc in corpus:
10 |             for mention in doc.system_mentions:
11 |                 mention.attributes["set_id"] = None
12 | 
13 |             # discard dummy mention
14 |             self.decode_for_one_document(doc.system_mentions[1:])
15 | 
16 |     def decode_for_one_document(self, mentions):
17 |         multigraph = \
18 |             self.coref_multigraph_creator.construct_graph_from_mentions(
19 |                 mentions)
20 | 
21 |         for mention in mentions:
22 |             antecedent = self.compute_antecedent(mention, multigraph)
23 | 
24 |             if antecedent is not None:
25 |                 if antecedent.attributes["set_id"] is None:
26 |                     antecedent.attributes["set_id"] = \
27 |                         mentions.index(antecedent)
28 | 
29 |                 mention.attributes["set_id"] = antecedent.attributes["set_id"]
30 |                 mention.document.antecedent_decisions[mention.span] = \
31 |                     antecedent.span
32 | 
33 |     @staticmethod
34 |     def compute_antecedent(mention, multigraph):
35 |         weights = []
36 |         for antecedent in multigraph.edges[mention]:
37 |             if not multigraph.edges[mention][antecedent]["negative_relations"]:
38 |                 weights.append(
39 |                     (multigraph.get_weight(mention, antecedent), antecedent))
40 | 
41 |         # get antecedent with highest positive weight, break ties by distance
42 |         if len(weights) > 0 and sorted(weights)[-1][0] > 0:
43 |             return sorted(weights)[-1][1]
44 | 


--------------------------------------------------------------------------------
/cort/coreference/multigraph/multigraphs.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'smartschat'
  2 | 
  3 | 
  4 | class CorefMultigraphCreator:
  5 |     def __init__(self,
  6 |                  positive_features,
  7 |                  negative_features,
  8 |                  weighting_function,
  9 |                  relation_weights,
 10 |                  construct_when_negative=False):
 11 |         self.positive_features = positive_features
 12 |         self.negative_features = negative_features
 13 |         self.weighting_function = weighting_function
 14 |         self.relation_weights = relation_weights
 15 |         self.construct_when_negative = construct_when_negative
 16 | 
 17 |     def construct_graph_from_mentions(self, mentions):
 18 |         nodes = []
 19 |         edges = {}
 20 | 
 21 |         for i in range(0, len(mentions)):
 22 |             anaphor = mentions[i]
 23 | 
 24 |             nodes.append(anaphor)
 25 | 
 26 |             edges[anaphor] = self.construct_for_one_mention(mentions, i)
 27 | 
 28 |         return CorefMultigraph(nodes,
 29 |                                edges,
 30 |                                self.weighting_function,
 31 |                                self.relation_weights)
 32 | 
 33 |     def construct_for_one_mention(self, mentions, i):
 34 |         anaphor = mentions[i]
 35 | 
 36 |         edges = {}
 37 | 
 38 |         # do not include dummy mention
 39 |         for j in range(i-1, 0, -1):
 40 |             antecedent = mentions[j]
 41 |             if self.construct_when_negative:
 42 |                 edges[antecedent] = self.get_edge_relations(anaphor, antecedent)
 43 |             else:
 44 |                 if not self.has_negative(anaphor, antecedent):
 45 |                     edges[antecedent] = {
 46 |                         "negative_relations": [],
 47 |                         "positive_relations": self.get_positive_relations(
 48 |                             anaphor, antecedent)
 49 |                     }
 50 | 
 51 |         return edges
 52 | 
 53 |     def get_edge_relations(self, anaphor, antecedent):
 54 |         relations = {
 55 |             "negative_relations":
 56 |             self.get_negative_relations(anaphor, antecedent),
 57 |             "positive_relations":
 58 |             self.get_positive_relations(anaphor, antecedent)
 59 |         }
 60 | 
 61 |         return relations
 62 | 
 63 |     def has_negative(self, anaphor, antecedent):
 64 |         for r in self.negative_features:
 65 |             if r(anaphor, antecedent):
 66 |                 return True
 67 | 
 68 |     def get_negative_relations(self, anaphor, antecedent):
 69 |         negative_relations = []
 70 | 
 71 |         for r in self.negative_features:
 72 |             if r(anaphor, antecedent):
 73 |                 negative_relations.append(r)
 74 | 
 75 |         return negative_relations
 76 | 
 77 |     def get_positive_relations(self, anaphor, antecedent):
 78 |         positive_relations = []
 79 | 
 80 |         for r in self.positive_features:
 81 |             if r(anaphor, antecedent):
 82 |                 positive_relations.append(r)
 83 | 
 84 |         return positive_relations
 85 | 
 86 | 
 87 | class CorefMultigraph:
 88 |     def __init__(self, nodes, edges, weighting_function, relation_weights):
 89 |         self.nodes = nodes
 90 |         self.edges = edges
 91 |         self.weighting_function = weighting_function
 92 |         self.relation_weights = relation_weights
 93 | 
 94 |     def get_weight(self, anaphor, antecedent):
 95 |         return self.weighting_function(
 96 |             anaphor,
 97 |             antecedent,
 98 |             self.edges[anaphor][antecedent],
 99 |             self.relation_weights)
100 | 


--------------------------------------------------------------------------------
/cort/coreference/multigraph/weighting_functions.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'smartschat'
 2 | 
 3 | 
 4 | def for_each_relation_with_distance(anaphor,
 5 |                                     antecedent,
 6 |                                     relations,
 7 |                                     relation_weights):
 8 |     weight = 0.0
 9 | 
10 |     if len(relations["negative_relations"]) > 0:
11 |         return float("-inf")
12 | 
13 |     if len(relations["positive_relations"]) == 0:
14 |         return 0
15 | 
16 |     for relation in relations["positive_relations"]:
17 |         weight += relation_weights[relation]
18 | 
19 |     weight /= (anaphor.attributes["sentence_id"] -
20 |                antecedent.attributes["sentence_id"]
21 |                + 1)
22 | 
23 |     return weight
24 | 


--------------------------------------------------------------------------------
/cort/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/preprocessing/pipeline.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'martscsn'
  2 | 
  3 | import cort
  4 | 
  5 | import codecs
  6 | 
  7 | import stanford_corenlp_pywrapper
  8 | 
  9 | from StanfordDependencies import CoNLL
 10 | 
 11 | from cort.core import corpora, documents, spans
 12 | 
 13 | import bs4
 14 | 
 15 | 
 16 | class Pipeline():
 17 |     def __init__(self, corenlp_location, with_coref=False):
 18 |         package_dir = cort.__path__[0]
 19 | 
 20 |         if with_coref:
 21 |             self.proc = stanford_corenlp_pywrapper.CoreNLP(
 22 |                 configfile=package_dir + "/config_files/corenlp_with_coref.ini",
 23 |                 corenlp_jars=[corenlp_location + "/*"]
 24 |             )
 25 |         else:
 26 |             self.proc = stanford_corenlp_pywrapper.CoreNLP(
 27 |                 configfile=package_dir + "/config_files/corenlp.ini",
 28 |                 corenlp_jars=[corenlp_location + "/*"]
 29 |             )
 30 | 
 31 |         self.with_coref = with_coref
 32 | 
 33 |     def run_on_docs(self, identifier, docs):
 34 |         processed_documents = []
 35 | 
 36 |         for doc in docs:
 37 |             processed_documents.append(self.run_on_doc(
 38 |                 codecs.open(doc, "r", "utf-8")
 39 |             ))
 40 | 
 41 |         return corpora.Corpus(identifier, processed_documents)
 42 | 
 43 |     def run_on_doc(self, doc_file, name=None):
 44 |         if self.with_coref:
 45 |             soup = bs4.BeautifulSoup(doc_file.read())
 46 |             preprocessed = self.proc.parse_doc(soup.text)
 47 |         else:
 48 |             data = doc_file.read()
 49 |             preprocessed = self.proc.parse_doc(data)
 50 | 
 51 |         sentences = []
 52 | 
 53 |         for sentence in preprocessed["sentences"]:
 54 |             processed_ner = []
 55 |             for ner in sentence["ner"]:
 56 |                 if ner == "O" or ner == "MISC":
 57 |                     processed_ner.append("NONE")
 58 |                 else:
 59 |                     processed_ner.append(ner)
 60 | 
 61 |             processed_dep = []
 62 | 
 63 |             index_to_dep_info = {}
 64 |             for dep_info in sentence["deps_basic"]:
 65 |                 label, head, in_sent_index = dep_info
 66 |                 index_to_dep_info[in_sent_index] = label, head
 67 | 
 68 |             for i in range(0, len(sentence["tokens"])):
 69 |                 if i in index_to_dep_info.keys():
 70 |                     label, head = index_to_dep_info[i]
 71 |                     processed_dep.append(
 72 |                         CoNLL.Token(
 73 |                             form=sentence["tokens"][i],
 74 |                             lemma=sentence["lemmas"][i],
 75 |                             pos=sentence["pos"][i],
 76 |                             index=i+1,
 77 |                             head=head+1,
 78 |                             deprel=label,
 79 |                             cpos=None,
 80 |                             feats=None,
 81 |                             phead=None,
 82 |                             pdeprel=None,
 83 |                             extra=None
 84 |                         )
 85 |                     )
 86 |                 else:
 87 |                     processed_dep.append(
 88 |                         CoNLL.Token(
 89 |                             form=sentence["tokens"][i],
 90 |                             lemma=sentence["lemmas"][i],
 91 |                             pos=sentence["pos"][i],
 92 |                             index=i+1,
 93 |                             head=0,
 94 |                             deprel="punc",
 95 |                             cpos=None,
 96 |                             feats=None,
 97 |                             phead=None,
 98 |                             pdeprel=None,
 99 |                             extra=None
100 |                         )
101 |                     )
102 | 
103 |             sentences.append(
104 |                 (sentence["tokens"],
105 |                  sentence["pos"],
106 |                  processed_ner,
107 |                  ["-"]*len(sentence["tokens"]),
108 |                  sentence["parse"],
109 |                  processed_dep,
110 |                 )
111 |             )
112 | 
113 |         if not name:
114 |             name = doc_file.name
115 | 
116 |         if self.with_coref:
117 |             antecedent_decisions = {}
118 |             coref = {}
119 | 
120 |             mention_id_to_spans = {}
121 | 
122 |             max_entity = 0
123 | 
124 |             for mention in soup.findAll("mention"):
125 |                 if mention.get("entity"):
126 |                     max_entity = max(max_entity, int(mention.get("entity")))
127 | 
128 |             for mention in soup.findAll("mention"):
129 |                 mention_id = int(mention.get("id"))
130 | 
131 |                 span = spans.Span(int(mention.get("span_start")),
132 |                                   int(mention.get("span_end")))
133 | 
134 |                 mention_id_to_spans[mention_id] = span
135 | 
136 |                 if mention.get("entity"):
137 |                     annotated_set_id = int(mention.get("entity"))
138 |                 else:
139 |                     annotated_set_id = max_entity + 1 + mention_id
140 | 
141 |                 coref[span] = annotated_set_id
142 | 
143 |                 if mention.get("antecedent"):
144 |                     antecedent_decisions[span] = mention_id_to_spans[
145 |                         int(mention.get("antecedent"))
146 |                     ]
147 | 
148 |             doc = documents.Document(
149 |                 name,
150 |                 sentences,
151 |                 coref)
152 | 
153 |             spans_to_annotated_mentions = {}
154 | 
155 |             for mention in doc.annotated_mentions:
156 |                 spans_to_annotated_mentions[mention.span] = mention
157 | 
158 |             for span in antecedent_decisions:
159 |                 ante_span = antecedent_decisions[span]
160 |                 ana = spans_to_annotated_mentions[span]
161 |                 ante = spans_to_annotated_mentions[ante_span]
162 |                 ana.attributes["antecedent"] = ante
163 |         else:
164 |             doc = documents.Document(
165 |                 name,
166 |                 sentences,
167 |                 {})
168 | 
169 |         return doc
170 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/README.txt:
--------------------------------------------------------------------------------
  1 | NAME
  2 |    CorScorer: Perl package for scoring coreference resolution systems
  3 |    using different metrics.
  4 | 
  5 | 
  6 | VERSION
  7 |    v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
  8 | 
  9 | 
 10 | CHANGES SINCE v8.0
 11 |    - fixed a bug that crashed the BLANC scorer when a duplicate singleton
 12 |      mention was present in the response.
 13 | 
 14 | INSTALLATION
 15 |    Requirements:
 16 |       1. Perl: downloadable from http://perl.org
 17 |       2. Algorithm-Munkres: included in this package and downloadable
 18 |          from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
 19 | 
 20 | USE
 21 |    This package is distributed with two scripts to execute the scorer from
 22 |    the command line.
 23 | 
 24 |    Windows (tm): scorer.bat
 25 |    Linux: scorer.pl
 26 | 
 27 | 
 28 | SYNOPSIS
 29 |    use CorScorer;
 30 | 
 31 |    $metric = 'ceafm';
 32 | 
 33 |    # Scores the whole dataset
 34 |    &CorScorer::Score($metric, $keys_file, $response_file);
 35 | 
 36 |    # Scores one file
 37 |    &CorScorer::Score($metric, $keys_file, $response_file, $name);
 38 | 
 39 | 
 40 | INPUT
 41 |    metric: the metric desired to score the results:
 42 |      muc: MUCScorer (Vilain et al, 1995)
 43 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 44 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 45 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 46 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 47 |      all: uses all the metrics to score
 48 | 
 49 |    keys_file: file with expected coreference chains in CoNLL-2011/2012 format
 50 | 
 51 |    response_file: file with output of coreference system (CoNLL-2011/2012 format)
 52 | 
 53 |    name: [optional] the name of the document to score. If name is not
 54 |      given, all the documents in the dataset will be scored. If given
 55 |      name is "none" then all the documents are scored but only total
 56 |      results are shown.
 57 | 
 58 | 
 59 | OUTPUT
 60 |    The score subroutine returns an array with four values in this order:
 61 |    1) Recall numerator
 62 |    2) Recall denominator
 63 |    3) Precision numerator
 64 |    4) Precision denominator
 65 | 
 66 |    Also recall, precision and F1 are printed in the standard output when variable
 67 |    $VERBOSE is not null.
 68 | 
 69 |    Final scores:
 70 |    Recall = recall_numerator / recall_denominator
 71 |    Precision = precision_numerator / precision_denominator
 72 |    F1 = 2 * Recall * Precision / (Recall + Precision)
 73 | 
 74 |    Identification of mentions
 75 |    An scorer for identification of mentions (recall, precision and F1) is also included.
 76 |    Mentions from system response are compared with key mentions. This version performs
 77 |    strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
 78 | 
 79 | AUTHORS
 80 |    Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 81 |    Sameer Pradhan, sameer.pradhan <at> childrens.harvard.edu
 82 |    Sebastian Martschat, sebastian.martschat <at> h-its.org
 83 |    Xiaoqiang Luo, xql <at> google.com
 84 | 
 85 | COPYRIGHT AND LICENSE
 86 |    Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
 87 |                  2011-2014, Sameer Pradhan sameer.pradhan <at> childrens.harvard.edu
 88 | 
 89 |    This program is free software; you can redistribute it and/or modify it
 90 |    under the terms of the GNU General Public License as published by the
 91 |    Free Software Foundation; either version 2 of the License, or (at your
 92 |    option) any later version. This program is distributed in the hope that
 93 |    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 94 |    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 95 |    GNU General Public License for more details.
 96 | 
 97 |    You should have received a copy of the GNU General Public License along
 98 |    with this program; if not, write to the Free Software Foundation, Inc.,
 99 |    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 | 
101 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/lib/Algorithm/README.Munkres:
--------------------------------------------------------------------------------
  1 | NAME
  2 |         Algorithm-Munkres : Perl extension for Munkres' solution to 
  3 |         classical Assignment problem for square and rectangular matrices 
  4 |         This module extends the solution of Assignment problem for square
  5 |         matrices to rectangular matrices by padding zeros. Thus a rectangular 
  6 |         matrix is converted to square matrix by padding necessary zeros.
  7 | 
  8 | SYNOPSIS
  9 |     use Algorithm::Munkres;
 10 | 
 11 |         @mat = (
 12 |              [2, 4, 7, 9],
 13 |              [3, 9, 5, 1],
 14 |              [8, 2, 9, 7],
 15 |              );
 16 | 
 17 |     assign(\@mat,\@out_mat);
 18 | 
 19 |         Then the @out_mat array will have the output as: (0,3,1,2),
 20 |         where 
 21 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 22 |         1st element indicates that 1st row is assigned 3rd column i.e.value=1
 23 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 24 |         3rd element indicates that 3rd row is assigned 2nd column.i.e.value=0
 25 | 
 26 | DESCRIPTION
 27 |         Assignment Problem: Given N jobs, N workers and the time taken by 
 28 |         each worker to complete a job then how should the assignment of a 
 29 |         Worker to a Job be done, so as to minimize the time taken. 
 30 | 
 31 |             Thus if we have 3 jobs p,q,r and 3 workers x,y,z such that:
 32 |                 x  y  z             
 33 |              p  2  4  7
 34 |              q  3  9  5
 35 |              r  8  2  9
 36 |         
 37 |             where the cell values of the above matrix give the time required
 38 |             for the worker(given by column name) to complete the job(given by 
 39 |             the row name) 
 40 |     
 41 |             then possible solutions are:    
 42 |                              Total
 43 |              1. 2, 9, 9       20
 44 |              2. 2, 2, 5        9
 45 |              3. 3, 4, 9       16
 46 |              4. 3, 2, 7       12
 47 |              5. 8, 9, 7       24
 48 |              6. 8, 4, 5       17
 49 | 
 50 |         Thus (2) is the optimal solution for the above problem.
 51 |         This kind of brute-force approach of solving Assignment problem 
 52 |         quickly becomes slow and bulky as N grows, because the number of 
 53 |         possible solution are N! and thus the task is to evaluate each 
 54 |         and then find the optimal solution.(If N=10, number of possible
 55 |         solutions: 3628800 !)
 56 |         Munkres' gives us a solution to this problem, which is implemented 
 57 |         in this module.
 58 | 
 59 |         This module also solves Assignment problem for rectangular matrices 
 60 |         (M x N) by converting them to square matrices by padding zeros. ex:
 61 |         If input matrix is:
 62 |              [2, 4, 7, 9],
 63 |              [3, 9, 5, 1],
 64 |              [8, 2, 9, 7]
 65 |         i.e 3 x 4 then we will convert it to 4 x 4 and the modified input 
 66 |         matrix will be:
 67 |              [2, 4, 7, 9],
 68 |              [3, 9, 5, 1],
 69 |              [8, 2, 9, 7],
 70 |              [0, 0, 0, 0]
 71 | 
 72 | EXPORT
 73 |         "assign" function by default.
 74 | 
 75 | INPUT
 76 |         The input matrix should be in a two dimensional array(array of 
 77 |         array) and the 'assign' subroutine expects a reference to this 
 78 |         array and not the complete array. 
 79 |         eg:assign(\@inp_mat, \@out_mat);
 80 |         The second argument to the assign subroutine is the reference 
 81 |         to the output array.
 82 | 
 83 | OUTPUT
 84 |         The assign subroutine expects references to two arrays as its 
 85 |         input paramenters. The second parameter is the reference to the
 86 |         output array. This array is populated by assign subroutine. This 
 87 |         array is single dimensional Nx1 matrix.
 88 |         For above example the output array returned will be:
 89 |          (0,
 90 |          2,
 91 |          1)
 92 | 
 93 |         where 
 94 |         0th element indicates that 0th row is assigned 0th column i.e value=2
 95 |         1st element indicates that 1st row is assigned 2nd column i.e.value=5
 96 |         2nd element indicates that 2nd row is assigned 1st column.i.e.value=2
 97 | 
 98 | SEE ALSO
 99 |         1. http://216.249.163.93/bob.pilgrim/445/munkres.html
100 | 
101 |         2. Munkres, J. Algorithms for the assignment and transportation 
102 |            Problems. J. Siam 5 (Mar. 1957), 32-38
103 | 
104 |         3. François Bourgeois and Jean-Claude Lassalle. 1971.
105 |            An extension of the Munkres algorithm for the assignment 
106 |            problem to rectangular matrices.
107 |            Communication ACM, 14(12):802-804
108 | 
109 | AUTHOR
110 |         Anagha Kulkarni, University of Minnesota Duluth
111 |         kulka020 <at> d.umn.edu
112 |         
113 |         Ted Pedersen, University of Minnesota Duluth
114 |         tpederse <at> d.umn.edu
115 | 
116 | COPYRIGHT AND LICENSE
117 |     Copyright (C) 2007-2008, Ted Pedersen and Anagha Kulkarni
118 | 
119 |     This program is free software; you can redistribute it and/or modify it
120 |     under the terms of the GNU General Public License as published by the
121 |     Free Software Foundation; either version 2 of the License, or (at your
122 |     option) any later version. This program is distributed in the hope that
123 |     it will be useful, but WITHOUT ANY WARRANTY; without even the implied
124 |     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
125 |     GNU General Public License for more details.
126 | 
127 |     You should have received a copy of the GNU General Public License along
128 |     with this program; if not, write to the Free Software Foundation, Inc.,
129 |     59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
130 | 
131 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/CorefMetricTest.pm:
--------------------------------------------------------------------------------
  1 | package CorefMetricTest;
  2 | use strict;
  3 | use warnings;
  4 | use Exporter;
  5 | 
  6 | our @ISA= qw(Exporter);
  7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual);
  8 | 
  9 | ################################################################################
 10 | # Compute recall, precision and F1.
 11 | # 
 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall,
 13 | #         numerator_counts_for_precision, denominator_counts_for_precision)
 14 | # Output: (recall, precision, F1)
 15 | ################################################################################
 16 | sub ComputeScoreFromCounts {
 17 |   # The first 4 are also coref link counts when using BLANC.
 18 |   my ($recall_numerator, $recall_denominator, 
 19 |       $precision_numerator, $precision_denominator, @noncoref_counts) = @_;
 20 |   # The coref recall, precision, and F1 when using BLANC.
 21 |   my ($recall, $precision, $F1) = 
 22 |     RPFFromCounts($recall_numerator, $recall_denominator, 
 23 |                   $precision_numerator, $precision_denominator);
 24 | 
 25 |   # BLANC: @noncoref_counts=
 26 |   #   (noncoref_numerator_recall, noncoref_denominator_recall, 
 27 |   #    noncoref_numerator_precision, noncoref_denominator_precision) 
 28 |   if (scalar(@noncoref_counts) == 4) {
 29 |     ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts(
 30 | 	$recall_numerator, $recall_denominator, $precision_denominator,
 31 | 	$noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]);
 32 |   }
 33 |   $recall = ($recall < 0) ? 0 : $recall;
 34 |   $precision = ($precision < 0) ? 0 : $precision;
 35 |   $F1 = ($F1 < 0) ? 0 : $F1;
 36 |   return ($recall, $precision, $F1);
 37 | }
 38 | 
 39 | sub RPFFromCounts
 40 | {
 41 |   my ($recall_numerator, $recall_denominator, 
 42 |       $precision_numerator, $precision_denominator, @nonCorefCounts) = @_;
 43 |   my ($recall, $precision, $F1) = (-1, -1, 0);
 44 |   if ($recall_denominator > 0) {
 45 |     $recall = $recall_numerator / $recall_denominator; 
 46 |   }
 47 |   if ($precision_denominator > 0) {
 48 |     $precision = $precision_numerator / $precision_denominator;
 49 |   }
 50 | 
 51 |   if (($recall + $precision) > 0) {
 52 |     $F1 = 2 * $recall * $precision / ($recall + $precision);
 53 |   }
 54 |   
 55 |   return ($recall, $precision, $F1);
 56 | }
 57 | 
 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts().
 59 | sub ComputeBLANCRPF
 60 | {
 61 |   my ($coref_recall, $coref_precision, $coref_F1,
 62 |       $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_;
 63 | 
 64 |   my ($recall, $precision, $F1);
 65 | 
 66 |   if ($coref_recall < 0 && $noncoref_recall < 0) {
 67 |     # no key mention.
 68 |     $recall = $precision = $F1 = 0;
 69 |   } elsif ($coref_recall < 0) {
 70 |     # key: all links are non-coref (mentions are all singltons).
 71 |     $recall = $noncoref_recall;
 72 |     $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision;
 73 |     $F1 = $noncoref_F1;
 74 |   } elsif ($noncoref_recall < 0) {
 75 |     # key: all links are coref (all mentions are in one entity).
 76 |     $recall = $coref_recall;
 77 |     $precision = ($coref_precision < 0) ? 0 : $coref_precision;
 78 |     $F1 = $coref_F1;
 79 |   } else {
 80 |     #key contains both coref and non-coref links.
 81 |     if ($coref_precision < 0 && $noncoref_precision < 0) {
 82 |       # no response.
 83 |       $recall = $precision = $F1 = 0;
 84 |     } else {
 85 |       if ($coref_precision < 0) {
 86 |         # response: all links are non-coref, or response mentions are all
 87 |         # singletons.
 88 |         $coref_precision = 0;
 89 |       } elsif ($noncoref_precision < 0) {
 90 |         # response: all links are coref, or all mentions are in one entity.
 91 |         $noncoref_precision = 0;
 92 |       }
 93 |       $recall = ($coref_recall + $noncoref_recall)/2;
 94 |       $precision  = ($coref_precision + $noncoref_precision)/2;
 95 |       $F1 = ($coref_F1 + $noncoref_F1)/2;
 96 |     }
 97 |   }
 98 | 
 99 |   return ($recall, $precision, $F1);
100 | }
101 | 
102 | ##############################################################################
103 | # Compute the sum of the duifference between the expected recall, precision, 
104 | # F1 and the actual one. 
105 | ##############################################################################
106 | sub DiffExpectedAndActual {
107 |   my ($expected, $actual) = @_;
108 |   if (scalar(@$expected) != scalar(@$actual)) {
109 |     print STDERR "Expected and actual have diff dimensions: \n";
110 |     print STDERR "   Expected: ", join(" ", @$expected), "\n";
111 |     print STDERR "     Actual: ", join(" ", @$actual), "\n";
112 |     return 1.0e5;
113 |   }
114 |   my $sum = 0.0;
115 |   my $i = 0;
116 |   foreach my $e (@$expected) {
117 |     $sum += abs($e - $actual->[$i]);
118 |     ++$i;
119 |   }
120 |   return $sum;
121 | }
122 | 
123 | 1;
124 | 
125 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/cort/reference-coreference-scorers/v8.01/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/cort/resources/coreferent_pairs.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/cort/resources/coreferent_pairs.obj


--------------------------------------------------------------------------------
/cort/resources/singletons_not_cleaned.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/cort/resources/singletons_not_cleaned.obj


--------------------------------------------------------------------------------
/cort/test/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/test/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/test/analysis/test_data_structures.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | import unittest
 3 | 
 4 | from cort.analysis import data_structures
 5 | from cort.core import documents
 6 | from cort.core import mentions
 7 | from cort.core import spans
 8 | 
 9 | 
10 | __author__ = 'smartschat'
11 | 
12 | 
13 | class TestCorefStructures(unittest.TestCase):
14 |     def setUp(self):
15 |         self.complicated_mention_example = """#begin	document	(test2);	part	000
16 | test2	0	0	This    NN   (NP*	-   -   -   -   -   (0)
17 | test2	0	1	is  NN	*   -   -   -   -   -   -
18 | test2	0	2	just    NN   *	-   -   -   -   -   -
19 | test2	0	3	a   NN   *	-   -   -   -   -   (0|(1)
20 | test2	0	4	test    NN   *	-   -   -   -   -   0)
21 | test2	0	5	.   NN   *)	-   -   -   -   -   -
22 | 
23 | test2	0	0	It  NN   (NP*	-   -   -   -   -   (1)|(0
24 | test2	0	1	shows   NN   *	-   -   -   -   -   -
25 | test2	0	2	that    NN   *	-   -   -   -   -   (2)
26 | test2	0	3	the NN   *	-   -   -   -   -   (2|(3
27 | test2	0	4	scorer  NN   *	-   -   -   -   -   2)|0)
28 | test2	0	5	works   NN   *	-   -   -   -   -   3)
29 | test2	0	6	.   NN   *)	-   -   -   -   -   -
30 | 
31 | #end	document"""
32 | 
33 |         self.complicated_mention_document = documents.CoNLLDocument(
34 |             self.complicated_mention_example)
35 | 
36 |     def test_entity_graph_from_mentions(self):
37 |         annotated_mentions = \
38 |             self.complicated_mention_document.annotated_mentions
39 | 
40 |         first_graph = data_structures.EntityGraph({
41 |             annotated_mentions[4]: [annotated_mentions[2],
42 |                                     annotated_mentions[0]],
43 |             annotated_mentions[2]: [annotated_mentions[0]]
44 |         })
45 | 
46 |         second_graph = data_structures.EntityGraph({
47 |             annotated_mentions[3]: [annotated_mentions[1]]
48 |         })
49 | 
50 |         third_graph = data_structures.EntityGraph({
51 |             annotated_mentions[6]: [annotated_mentions[5]]
52 |         })
53 | 
54 |         self.assertEqual(
55 |             [first_graph, second_graph, third_graph],
56 |             data_structures.EntityGraph.from_mentions(annotated_mentions,
57 |                                                        "annotated_set_id"))
58 | 
59 |     def test_entity_graph_partition(self):
60 |         annotated_mentions = \
61 |             self.complicated_mention_document.annotated_mentions
62 | 
63 |         graph = data_structures.EntityGraph({
64 |             annotated_mentions[4]: [annotated_mentions[2],
65 |                                     annotated_mentions[0]],
66 |             annotated_mentions[2]: [annotated_mentions[0]]
67 |         })
68 | 
69 |         system_output = [
70 |             mentions.Mention(
71 |                 self.complicated_mention_document,
72 |                 spans.Span(0, 0),
73 |                 {"set_id": 0}),
74 |             mentions.Mention(
75 |                 self.complicated_mention_document,
76 |                 spans.Span(2, 3),
77 |                 {"set_id": 1}),
78 |             mentions.Mention(
79 |                 self.complicated_mention_document,
80 |                 spans.Span(6, 10),
81 |                 {"set_id": 0}),
82 |             mentions.Mention(
83 |                 self.complicated_mention_document,
84 |                 spans.Span(5, 5),
85 |                 {"set_id": 0})
86 |         ]
87 | 
88 |         expected_edges = defaultdict(list)
89 |         expected_edges[annotated_mentions[4]].append(annotated_mentions[0])
90 |         expected = data_structures.EntityGraph(expected_edges)
91 | 
92 |         self.assertEqual(expected,
93 |                          graph.partition(
94 |                              data_structures.EntityGraph.from_mentions(
95 |                                  system_output, "set_id")))
96 | 
97 | 
98 | if __name__ == '__main__':
99 |     unittest.main()


--------------------------------------------------------------------------------
/cort/test/analysis/test_error_extractors.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import unittest
  3 | 
  4 | from cort.analysis import data_structures
  5 | from cort.analysis import error_extractors
  6 | from cort.analysis import spanning_tree_algorithms
  7 | from cort.core import corpora
  8 | from cort.core import mentions
  9 | from cort.core import spans
 10 | 
 11 | __author__ = 'smartschat'
 12 | 
 13 | 
 14 | class TestErrorExtractor(unittest.TestCase):
 15 |     def setUp(self):
 16 |         self.first_cluster = [
 17 |             mentions.Mention(
 18 |                 None,
 19 |                 spans.Span(0, 0),
 20 |                 {"tokens": ["a"], "annotated_set_id": 0}),
 21 | 
 22 |             mentions.Mention(
 23 |                 None,
 24 |                 spans.Span(1, 1),
 25 |                 {"tokens": ["b"], "annotated_set_id": 0}),
 26 | 
 27 |             mentions.Mention(
 28 |                 None,
 29 |                 spans.Span(2, 3),
 30 |                 {"tokens": ["c", "d"], "annotated_set_id": 0}),
 31 | 
 32 |             mentions.Mention(
 33 |                 None,
 34 |                 spans.Span(4, 5),
 35 |                 {"tokens": ["e", "f"], "annotated_set_id": 0}),
 36 | 
 37 |             mentions.Mention(
 38 |                 None,
 39 |                 spans.Span(5, 6),
 40 |                 {"tokens": ["f", "g"], "annotated_set_id": 0}),
 41 | 
 42 |             mentions.Mention(
 43 |                 None,
 44 |                 spans.Span(7, 7),
 45 |                 {"tokens": ["h"], "annotated_set_id": 0}),
 46 |         ]
 47 | 
 48 |         self.second_cluster = [
 49 |             mentions.Mention(
 50 |                 None,
 51 |                 spans.Span(3, 4),
 52 |                 {"tokens": ["d", "e"], "annotated_set_id": 1}),
 53 | 
 54 |             mentions.Mention(
 55 |                 None,
 56 |                 spans.Span(7, 8),
 57 |                 {"tokens": ["h", "i"], "annotated_set_id": 1}),
 58 | 
 59 |             mentions.Mention(
 60 |                 None,
 61 |                 spans.Span(10, 10),
 62 |                 {"tokens": ["k"], "annotated_set_id": 1})
 63 |         ]
 64 | 
 65 |         self.system_cluster = [
 66 |             mentions.Mention(
 67 |                 None,
 68 |                 spans.Span(0, 0),
 69 |                 {"tokens": ["a"], "annotated_set_id": 0}),
 70 | 
 71 |             mentions.Mention(
 72 |                 None,
 73 |                 spans.Span(2, 3),
 74 |                 {"tokens": ["c", "d"], "annotated_set_id": 0}),
 75 | 
 76 |             mentions.Mention(
 77 |                 None,
 78 |                 spans.Span(4, 5),
 79 |                 {"tokens": ["e", "f"], "annotated_set_id": 2}),
 80 | 
 81 |             mentions.Mention(
 82 |                 None,
 83 |                 spans.Span(5, 6),
 84 |                 {"tokens": ["f", "g"], "annotated_set_id": 2}),
 85 | 
 86 |             mentions.Mention(
 87 |                 None,
 88 |                 spans.Span(7, 7),
 89 |                 {"tokens": ["h"], "annotated_set_id": 1}),
 90 | 
 91 |             mentions.Mention(
 92 |                 None,
 93 |                 spans.Span(10, 10),
 94 |                 {"tokens": ["k"], "annotated_set_id": 1})
 95 |         ]
 96 | 
 97 |         self.maxDiff = None
 98 | 
 99 |     def test_compute_errors(self):
100 |         # fake document using a named tuple
101 |         document = namedtuple("Document", "annotated_mentions")
102 |         doc_gold = document(self.first_cluster + self.second_cluster)
103 |         doc_system = document(self.system_cluster)
104 |         corpus_gold = corpora.Corpus("fake gold", [doc_gold])
105 |         corpus_system = corpora.Corpus("fake system", [doc_system])
106 | 
107 |         ex = error_extractors.ErrorExtractor(
108 |             corpus_gold,
109 |             spanning_tree_algorithms.recall_closest,
110 |             spanning_tree_algorithms.precision_system_output
111 |         )
112 | 
113 |         ex.add_system(corpus_system)
114 | 
115 |         self.assertEqual(
116 |             data_structures.EnhancedSet([
117 |                 (self.first_cluster[1], self.first_cluster[0]),
118 |                 (self.first_cluster[3], self.first_cluster[2]),
119 |                 (self.first_cluster[5], self.first_cluster[4]),
120 |                 (self.second_cluster[1], self.second_cluster[0]),
121 |                 (self.second_cluster[2], self.second_cluster[1]),
122 |             ]),
123 |             ex.get_errors()["fake system"]["recall_errors"]["all"]
124 |         )
125 | 
126 |     if __name__ == '__main__':
127 |         unittest.main()
128 | 


--------------------------------------------------------------------------------
/cort/test/analysis/test_spanning_tree_algorithms.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from cort.analysis import data_structures
  4 | from cort.analysis import spanning_tree_algorithms
  5 | from cort.core import mentions
  6 | from cort.core import spans
  7 | 
  8 | 
  9 | __author__ = 'smartschat'
 10 | 
 11 | 
 12 | class TestSpanningTreeAlgorithms(unittest.TestCase):
 13 |     def setUp(self):
 14 |         self.gold_first_cluster = [
 15 |             mentions.Mention(
 16 |                 None,
 17 |                 spans.Span(0, 0),
 18 |                 {"tokens": ["a"], "type": "NOM", "annotated_set_id": 0}),
 19 | 
 20 |             mentions.Mention(
 21 |                 None,
 22 |                 spans.Span(1, 1),
 23 |                 {"tokens": ["US"], "type": "NAM", "annotated_set_id": 0}),
 24 | 
 25 |             mentions.Mention(
 26 |                 None,
 27 |                 spans.Span(2, 3),
 28 |                 {"tokens": ["angry", "salesman"], "type": "PRO", "annotated_set_id": 0}),
 29 | 
 30 |             mentions.Mention(
 31 |                 None,
 32 |                 spans.Span(4, 5),
 33 |                 {"tokens": ["the", "rainbow"], "type": "NAM",
 34 |                  "annotated_set_id": 0}),
 35 | 
 36 |             mentions.Mention(
 37 |                 None,
 38 |                 spans.Span(5, 6),
 39 |                 {"tokens": ["and", "far"], "type": "NOM",
 40 |                  "annotated_set_id": 0}),
 41 | 
 42 |             mentions.Mention(
 43 |                 None,
 44 |                 spans.Span(7, 7),
 45 |                 {"tokens": ["neypmd"], "type": "NOM", "annotated_set_id": 0}),
 46 |         ]
 47 | 
 48 |         self.gold_second_cluster = [
 49 |             mentions.Mention(
 50 |                 None,
 51 |                 spans.Span(7, 8),
 52 |                 {"type": "NOM", "annotated_set_id": 1}),
 53 | 
 54 |             mentions.Mention(
 55 |                 None,
 56 |                 spans.Span(9, 9),
 57 |                 {"type": "NAM", "annotated_set_id": 1}),
 58 | 
 59 |             mentions.Mention(
 60 |                 None,
 61 |                 spans.Span(10, 10),
 62 |                 {"type": "PRO", "annotated_set_id": 1}),
 63 |         ]
 64 | 
 65 |         self.system1_mentions = [
 66 |             mentions.Mention(None, spans.Span(0, 0), {"set_id": 0}),
 67 |             mentions.Mention(None, spans.Span(2, 3), {"set_id": 0}),
 68 |             mentions.Mention(None, spans.Span(4, 5), {"set_id": 2}),
 69 |             mentions.Mention(None, spans.Span(5, 6), {"set_id": 2}),
 70 |             mentions.Mention(None, spans.Span(3, 4), {"set_id": 1}),
 71 |             mentions.Mention(None, spans.Span(7, 8), {"set_id": 1}),
 72 |         ]
 73 | 
 74 |         self.system2_cluster = [
 75 |             mentions.Mention(
 76 |                 None,
 77 |                 spans.Span(0, 0),
 78 |                 {"tokens": ["a"], "set_id": 0}),
 79 | 
 80 |             mentions.Mention(
 81 |                 None,
 82 |                 spans.Span(2, 3),
 83 |                 {"tokens": ["angry", "salesman"], "set_id": 0}),
 84 | 
 85 |             mentions.Mention(
 86 |                 None,
 87 |                 spans.Span(7, 8),
 88 |                 {"tokens": ["snafu", "foo"], "set_id": 0}),
 89 | 
 90 |             mentions.Mention(
 91 |                 None,
 92 |                 spans.Span(9, 9),
 93 |                 {"tokens": ["bar"], "set_id": 0}),
 94 |         ]
 95 |         self.system2_cluster[1].attributes["antecedent"] = \
 96 |             self.system2_cluster[0]
 97 |         self.system2_cluster[2].attributes["antecedent"] = \
 98 |             self.system2_cluster[0]
 99 |         self.system2_cluster[3].attributes["antecedent"] = \
100 |             self.system2_cluster[2]
101 | 
102 |         self.maxDiff = None
103 | 
104 |     def test_recall_closest(self):
105 |         gold_graph = data_structures.EntityGraph.from_mentions(
106 |             self.gold_first_cluster, "annotated_set_id")[0]
107 | 
108 |         spanning_tree_edges = [
109 |             (self.gold_first_cluster[1], self.gold_first_cluster[0]),
110 |             (self.gold_first_cluster[2], self.gold_first_cluster[0]),
111 |             (self.gold_first_cluster[3], self.gold_first_cluster[2]),
112 |             (self.gold_first_cluster[4], self.gold_first_cluster[3]),
113 |             (self.gold_first_cluster[5], self.gold_first_cluster[4])
114 |         ]
115 | 
116 |         self.assertEqual(
117 |             spanning_tree_edges,
118 |             spanning_tree_algorithms.recall_closest(
119 |                 gold_graph,
120 |                 gold_graph.partition(
121 |                     data_structures.EntityGraph.from_mentions(
122 |                         self.system1_mentions, "set_id"))))
123 | 
124 |     def test_recall_type(self):
125 |         gold_graph = data_structures.EntityGraph.from_mentions(
126 |             self.gold_first_cluster, "annotated_set_id")[0]
127 | 
128 |         spanning_tree_edges = [
129 |             (self.gold_first_cluster[1], self.gold_first_cluster[0]),
130 |             (self.gold_first_cluster[2], self.gold_first_cluster[0]),
131 |             (self.gold_first_cluster[3], self.gold_first_cluster[1]),
132 |             (self.gold_first_cluster[4], self.gold_first_cluster[3]),
133 |             (self.gold_first_cluster[5], self.gold_first_cluster[3])
134 |         ]
135 | 
136 |         self.assertEqual(
137 |             spanning_tree_edges,
138 |             spanning_tree_algorithms.recall_accessibility(
139 |                 gold_graph,
140 |                 gold_graph.partition(
141 |                     data_structures.EntityGraph.from_mentions(
142 |                         self.system1_mentions, "set_id"))))
143 | 
144 |     def test_precision_system_output(self):
145 |         gold_graph = data_structures.EntityGraph.from_mentions(
146 |             self.system2_cluster, "set_id")[0]
147 | 
148 |         spanning_tree_edges = [
149 |             (self.system2_cluster[1], self.system2_cluster[0]),
150 |             (self.system2_cluster[2], self.system2_cluster[0]),
151 |             (self.system2_cluster[3], self.system2_cluster[2])
152 |         ]
153 | 
154 |         self.assertEqual(
155 |             spanning_tree_edges,
156 |             spanning_tree_algorithms.precision_system_output(
157 |                 gold_graph,
158 |                 gold_graph.partition(
159 |                     data_structures.EntityGraph.from_mentions(
160 |                         self.gold_first_cluster, "annotated_set_id"))))
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     unittest.main()
165 | 


--------------------------------------------------------------------------------
/cort/test/core/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/test/core/test_corpora.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | from cort.core.corpora import Corpus
 5 | 
 6 | 
 7 | __author__ = 'smartschat'
 8 | 
 9 | 
10 | class TestCorpora(unittest.TestCase):
11 |     def setUp(self):
12 |         directory = os.path.dirname(os.path.realpath(__file__)) + "/resources/"
13 |         self.input_data = open(directory + "input.conll", "r")
14 | 
15 |     def test_conll_reader(self):
16 |         corpus = Corpus.from_file("test", self.input_data)
17 |         self.assertEqual(5, len(corpus.documents))
18 | 
19 | if __name__ == '__main__':
20 |     unittest.main()
21 | 


--------------------------------------------------------------------------------
/cort/test/core/test_external_data.py:
--------------------------------------------------------------------------------
 1 | from cort.core.external_data import GenderData
 2 | 
 3 | __author__ = 'smartschat'
 4 | 
 5 | import unittest
 6 | 
 7 | 
 8 | class TestGenderData(unittest.TestCase):
 9 |     def setUp(self):
10 |         self.gender_data = GenderData.get_instance()
11 | 
12 |     def test_look_up(self):
13 |         self.assertEqual("NEUTRAL",
14 |                          self.gender_data.look_up({"tokens": ["snafu"]}))
15 | 
16 |         self.assertEqual("FEMALE",
17 |                          self.gender_data.look_up(
18 |                              {"tokens": ["Barbara", "Bush"],
19 |                               "head": ["Barbara", "Bush"]}))
20 | 
21 |         self.assertEqual("MALE",
22 |                          self.gender_data.look_up({
23 |                              "tokens": ["Footballer", "Zidane"],
24 |                              "head": ["Zidane"]}))
25 | 
26 | if __name__ == '__main__':
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/cort/test/core/test_spans.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from cort.core.spans import Span
 4 | 
 5 | 
 6 | __author__ = 'smartschat'
 7 | 
 8 | 
 9 | class TestSpan(unittest.TestCase):
10 |     def test_span(self):
11 |         span = Span(0, 1)
12 |         self.assertEqual(0, span.begin)
13 |         self.assertEqual(1, span.end)
14 | 
15 |     def test_parse(self):
16 |         self.assertEqual(Span(10, 12), Span.parse("(10, 12)"))
17 |         self.assertEqual(Span(10, 12), Span.parse("(10,12)"))
18 | 
19 | if __name__ == '__main__':
20 |     unittest.main()
21 | 


--------------------------------------------------------------------------------
/cort/test/core/test_util.py:
--------------------------------------------------------------------------------
 1 | from cort.core.util import clean_via_pos
 2 | 
 3 | __author__ = 'smartschat'
 4 | 
 5 | import unittest
 6 | 
 7 | 
 8 | class TestUtil(unittest.TestCase):
 9 |     def test_clean_via_pos(self):
10 |         self.assertEqual(
11 |             ["newly-elect", "leader", "wife"],
12 |             clean_via_pos(
13 |                 ["the", "newly-elect", "leader", "'s", "wife"],
14 |                 ["DT", "JJ", "NN", "POS", "NN"]))
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------
/cort/test/multigraph/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'martscsn'
2 | 


--------------------------------------------------------------------------------
/cort/util/__init__.py:
--------------------------------------------------------------------------------
1 | "Utility functions."
2 | 
3 | __author__ = 'sebastian'
4 | 


--------------------------------------------------------------------------------
/cort/util/import_helper.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pyximport
 3 | pyximport.install(setup_args={"include_dirs": numpy.get_include()})
 4 | 
 5 | import importlib
 6 | import inspect
 7 | 
 8 | 
 9 | __author__ = 'martscsn'
10 | 
11 | 
12 | def import_from_path(name):
13 |     splitted = name.split(".")
14 |     package_name = ".".join(splitted[:-1])
15 |     cls = splitted[-1]
16 | 
17 |     package = importlib.import_module(package_name)
18 | 
19 |     imported = getattr(package, cls)
20 | 
21 |     return imported
22 | 
23 | 
24 | def get_features(filename):
25 |     mention_features = []
26 |     pairwise_features = []
27 | 
28 |     for line in open(filename).readlines():
29 |         feature = import_from_path(line.strip())
30 |         number_of_arguments = len(inspect.getargspec(feature)[0])
31 | 
32 |         if number_of_arguments == 1:
33 |             mention_features.append(feature)
34 |         elif number_of_arguments == 2:
35 |             pairwise_features.append(feature)
36 |         else:
37 |             raise ValueError("Features must have one or two arguments, "
38 |                              "feature " + line.strip() + " has " +
39 |                              str(number_of_arguments) + " arguments.")
40 | 
41 |     return mention_features, pairwise_features
42 | 


--------------------------------------------------------------------------------
/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/plot.png


--------------------------------------------------------------------------------
/scripts/acl15demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | import io
  5 | import logging
  6 | import pickle
  7 | import numpy
  8 | 
  9 | import pyximport
 10 | pyximport.install(setup_args={"include_dirs": numpy.get_include()})
 11 | 
 12 | from cort.preprocessing import pipeline
 13 | from cort.core import mention_extractor
 14 | from cort.coreference.approaches import mention_ranking
 15 | from cort.coreference import cost_functions, clusterer
 16 | from cort.coreference import experiments
 17 | from cort.coreference import features
 18 | from cort.coreference import instance_extractors
 19 | from cort.core import corpora
 20 | from cort.analysis import visualization, error_extractors, spanning_tree_algorithms
 21 | 
 22 | try:
 23 |     import tkinter as tki
 24 | except ImportError:
 25 |     import Tkinter as tki
 26 | 
 27 | __author__ = 'smartschat'
 28 | 
 29 | logging.basicConfig(level=logging.INFO,
 30 |                     format='%(asctime)s %(levelname)s %(''message)s')
 31 | 
 32 | class LiveDemo():
 33 |     def __init__(self):
 34 |         mention_features = [
 35 |             features.fine_type,
 36 |             features.gender,
 37 |             features.number,
 38 |             features.sem_class,
 39 |             features.deprel,
 40 |             features.head_ner,
 41 |             features.length,
 42 |             features.head,
 43 |             features.first,
 44 |             features.last,
 45 |             features.preceding_token,
 46 |             features.next_token,
 47 |             features.governor,
 48 |             features.ancestry
 49 |         ]
 50 | 
 51 |         pairwise_features = [
 52 |             features.exact_match,
 53 |             features.head_match,
 54 |             features.same_speaker,
 55 |             features.alias,
 56 |             features.sentence_distance,
 57 |             features.embedding,
 58 |             features.modifier,
 59 |             features.tokens_contained,
 60 |             features.head_contained,
 61 |             features.token_distance
 62 |         ]
 63 | 
 64 |         self.extractor = instance_extractors.InstanceExtractor(
 65 |             mention_ranking.extract_substructures,
 66 |             mention_features,
 67 |             pairwise_features,
 68 |             cost_functions.null_cost
 69 |         )
 70 | 
 71 |         logging.info("Loading model.")
 72 | 
 73 |         priors, weights = pickle.load(open("latent-model-train.obj", "rb"))
 74 | 
 75 |         self.perceptron = mention_ranking.RankingPerceptron(
 76 |             priors=priors,
 77 |             weights=weights,
 78 |             cost_scaling=0
 79 |         )
 80 | 
 81 |         logging.info("Loading CoreNLP models.")
 82 |         self.p = pipeline.Pipeline(
 83 |             "/home/sebastian/Downloads/stanford-corenlp-full-2015-04-20")
 84 | 
 85 |         self.root = tki.Tk()
 86 |         self.root.title("cort Demo")
 87 | 
 88 |         # create a Frame for the Text and Scrollbar
 89 |         self.txt_frm = tki.Frame(self.root, width=400, height=200)
 90 |         self.txt_frm.pack(fill="both", expand=True)
 91 | 
 92 |         # ensure a consistent GUI size
 93 |         self.txt_frm.grid_propagate(False)
 94 | 
 95 |         # implement stretchability
 96 |         self.txt_frm.grid_rowconfigure(0, weight=1)
 97 |         self.txt_frm.grid_columnconfigure(0, weight=1)
 98 | 
 99 |         # create a Text widget
100 |         self.txt = tki.Text(self.txt_frm, borderwidth=3, relief="sunken")
101 |         self.txt.config(font=("consolas", 12), undo=True, wrap='word')
102 |         self.txt.grid(row=0, column=0, sticky="nsew", padx=2, pady=2)
103 | 
104 |         # create a Scrollbar and associate it with txt
105 |         scrollb = tki.Scrollbar(self.txt_frm, command=self.txt.yview)
106 |         scrollb.grid(row=0, column=1, sticky='nsew')
107 |         self.txt['yscrollcommand'] = scrollb.set
108 | 
109 |         self.button = tki.Button(self.root, text='Resolve Coreference',
110 |                             command=self.do_coreference)
111 | 
112 |         self.button.pack()
113 | 
114 |     def run(self):
115 |         self.root.mainloop()
116 | 
117 |     def do_coreference(self):
118 |         testing_corpus = corpora.Corpus("input", [self.p.run_on_doc(
119 |             io.StringIO(self.txt.get("0.0", tki.END)), "input")])
120 | 
121 |         logging.info("Extracting system mentions.")
122 |         for doc in testing_corpus:
123 |             doc.system_mentions = mention_extractor.extract_system_mentions(doc)
124 | 
125 |         mention_entity_mapping, antecedent_mapping = experiments.predict(
126 |             testing_corpus,
127 |             self.extractor,
128 |             self.perceptron,
129 |             clusterer.all_ante
130 |         )
131 | 
132 |         testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping)
133 | 
134 |         logging.info("Visualize")
135 | 
136 |         for doc in testing_corpus:
137 |             max_id = 0
138 | 
139 |             for mention in doc.system_mentions[1:]:
140 |                 set_id = mention.attributes["set_id"]
141 | 
142 |                 if set_id:
143 |                     max_id = max(set_id, max_id)
144 | 
145 |             max_id += 1
146 | 
147 |             doc.annotated_mentions = []
148 | 
149 |             for i, mention in enumerate(doc.system_mentions[1:]):
150 |                 if mention.attributes["set_id"]:
151 |                     mention.attributes["annotated_set_id"] = mention.attributes[
152 |                         "set_id"]
153 |                 else:
154 |                     mention.attributes["annotated_set_id"] = max_id + i
155 |                 doc.annotated_mentions.append(mention)
156 | 
157 |         ex = error_extractors.ErrorExtractor(testing_corpus,
158 |                                          spanning_tree_algorithms.recall_accessibility,
159 |                                          spanning_tree_algorithms.precision_system_output)
160 | 
161 |         ex.add_system(testing_corpus)
162 | 
163 |         decisions = ex.get_errors()
164 | 
165 |         visualizer = visualization.Visualizer(decisions, "input",
166 |                                               for_raw_input=True)
167 | 
168 |         visualizer.run()
169 | 
170 | demo = LiveDemo()
171 | 
172 | demo.run()
173 | 


--------------------------------------------------------------------------------
/scripts/naacl15-demo.py:
--------------------------------------------------------------------------------
  1 | import codecs
  2 | 
  3 | 
  4 | from cort.analysis import error_extractors
  5 | from cort.analysis import plotting
  6 | from cort.analysis import spanning_tree_algorithms
  7 | from cort.core import corpora
  8 | 
  9 | 
 10 | __author__ = 'smartschat'
 11 | 
 12 | 
 13 | # read in corpora
 14 | reference = corpora.Corpus.from_file("reference", codecs.open("dev.gold", "r",
 15 |                                                               "utf-8"))
 16 | pair = corpora.Corpus.from_file("pair", codecs.open("pair-dev.out", "r", "utf-8"))
 17 | tree = corpora.Corpus.from_file("tree", codecs.open("tree-dev.out", "r", "utf-8"))
 18 | 
 19 | # optional -- not needed when you only want to compute recall errors
 20 | pair.read_antecedents(open('pair-dev.antecedents'))
 21 | tree.read_antecedents(open('tree-dev.antecedents'))
 22 | 
 23 | # define error extractor
 24 | extractor = error_extractors.ErrorExtractor(
 25 |     reference,
 26 |     spanning_tree_algorithms.recall_accessibility,
 27 |     spanning_tree_algorithms.precision_system_output
 28 | )
 29 | 
 30 | # extract errors
 31 | extractor.add_system(pair)
 32 | extractor.add_system(tree)
 33 | 
 34 | errors = extractor.get_errors()
 35 | 
 36 | # categorize by mention type of anaphor
 37 | by_type = errors.categorize(
 38 |     lambda err: err[0].attributes["type"]
 39 | )
 40 | 
 41 | 
 42 | # visualize
 43 | by_type.visualize("pair")
 44 | 
 45 | # filter by distance
 46 | by_type_filtered = by_type.filter(
 47 |     lambda err: err[0].attributes["sentence_id"] - err[1].attributes[
 48 |         "sentence_id"] <= 3
 49 | )
 50 | 
 51 | # plot
 52 | pair_errs = by_type_filtered["pair"]["recall_errors"]["all"]
 53 | tree_errs = by_type_filtered["tree"]["recall_errors"]["all"]
 54 | 
 55 | plotting.plot(
 56 |     [("pair", [(cat, len(errs)) for cat, errs in pair_errs.items()]),
 57 |      ("tree", [(cat, len(errs)) for cat, errs in tree_errs.items()])],
 58 |     "Recall Errors",
 59 |     "Type of anaphor",
 60 |     "Number of Errors")
 61 | 
 62 | # more advanced features
 63 | 
 64 | # is anaphor a gold mention?
 65 | all_gold = set()
 66 | for doc in reference:
 67 |     for mention in doc.annotated_mentions:
 68 |         all_gold.add(mention)
 69 | 
 70 | 
 71 | def is_anaphor_gold(mention):
 72 |     if mention in all_gold:
 73 |         return "is_gold"
 74 |     else:
 75 |         return "is_not_gold"
 76 | 
 77 | is_ana_gold = by_type.categorize(lambda err: is_anaphor_gold(err[0]))
 78 | 
 79 | # head statistics for NOM errors
 80 | from collections import Counter
 81 | 
 82 | for system in ["pair", "tree"]:
 83 |     nom_rec_errs = by_type[system]["recall_errors"]["all"]["NOM"]
 84 |     all_heads = [" ".join(err[0].attributes["head"]).lower() for err in nom_rec_errs]
 85 |     most_common = Counter(all_heads).most_common(10)
 86 |     print(system, most_common)
 87 | 
 88 | # common errors:
 89 | common = {
 90 |     "common": {
 91 |         "recall_errors": {},
 92 |         "precision_errors": {}
 93 |     }
 94 | }
 95 | 
 96 | common["common"]["recall_errors"]["all"] = errors["pair"]["recall_errors"][
 97 |     "all"].intersection(errors["tree"]["recall_errors"]["all"])
 98 | 
 99 | common["common"]["precision_errors"]["all"] = errors["pair"]["precision_errors"][
100 |     "all"].intersection(errors["tree"]["precision_errors"]["all"])
101 | 
102 | from cort.analysis import data_structures
103 | common = data_structures.StructuredCoreferenceAnalysis(
104 |     common, errors.reference, errors.corpora
105 | )
106 | 
107 | # plot decisions
108 | decs = by_type_filtered["pair"]["decisions"]["all"]
109 | prec_errs = by_type_filtered["pair"]["precision_errors"]["all"]
110 | 
111 | plotting.plot(
112 |     [("decisions", [(cat, len(errs)) for cat, errs in decs.items()]),
113 |      ("errors", [(cat, len(errs)) for cat, errs in prec_errs.items()])],
114 |     "Decisions and Errors",
115 |     "Type of anaphor",
116 |     "Number")


--------------------------------------------------------------------------------
/scripts/train-and-predict-all.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | 
 4 | import subprocess
 5 | 
 6 | 
 7 | __author__ = 'smartschat'
 8 | 
 9 | 
10 | def get_extractor(data_set, system):
11 |     if system == "closest" or system == "latent":
12 |         return "cort.coreference.approaches.mention_ranking.extract_substructures"
13 |     elif system == "tree":
14 |         return "cort.coreference.approaches.antecedent_trees.extract_substructures"
15 |     elif system == "pair":
16 |         if data_set == "train":
17 |             return "cort.coreference.approaches.mention_pairs" \
18 |                    ".extract_training_substructures"
19 |         else:
20 |             return "cort.coreference.approaches.mention_pairs" \
21 |                    ".extract_testing_substructures"
22 | 
23 | 
24 | def get_perceptron(system):
25 |     if system == "pair":
26 |         return "cort.coreference.approaches.mention_pairs.MentionPairsPerceptron"
27 |     elif system == "closest":
28 |         return "cort.coreference.approaches.mention_ranking.RankingPerceptronClosest"
29 |     elif system == "latent":
30 |         return "cort.coreference.approaches.mention_ranking.RankingPerceptron"
31 |     elif system == "tree":
32 |         return "cort.coreference.approaches.antecedent_trees.AntecedentTreePerceptron"
33 | 
34 | 
35 | def get_cost_function(system):
36 |     if system == "pair":
37 |         return "cort.coreference.cost_functions.null_cost"
38 |     else:
39 |         return "cort.coreference.cost_functions.cost_based_on_consistency"
40 | 
41 | 
42 | def get_clusterer(system):
43 |     if system == "pair":
44 |         return "cort.coreference.clusterer.best_first"
45 |     else:
46 |         return "cort.coreference.clusterer.all_ante"
47 | 
48 | 
49 | systems = ["pair", "closest", "latent", "tree"]
50 | data_sets = ["dev", "test"]
51 | 
52 | for system in systems:
53 |     print("Training", system, "on train.")
54 |     subprocess.call([
55 |         "cort-train",
56 |         "-in", "/data/nlp/martscsn/thesis/data/input/train.auto",
57 |         "-out", "model-" + system + "-train.obj",
58 |         "-extractor", get_extractor("train", system),
59 |         "-perceptron", get_perceptron(system),
60 |         "-cost_function", get_cost_function(system),
61 |         "-cost_scaling", "100"])
62 | 
63 |     print("Training", system, "on dev+train.")
64 |     subprocess.call([
65 |         "cort-train",
66 |         "-in", "/data/nlp/martscsn/thesis/data/input/train+dev.auto",
67 |         "-out", "model-" + system + "-train+dev.obj",
68 |         "-extractor", get_extractor("train", system),
69 |         "-perceptron", get_perceptron(system),
70 |         "-cost_function", get_cost_function(system),
71 |         "-cost_scaling", "100"])
72 | 
73 |     for data_set in data_sets:
74 |         print("Predicting", system, "on", data_set)
75 |         if data_set == "dev":
76 |             model = "model-" + system + "-train.obj"
77 |         else:
78 |             model = "model-" + system + "-train+dev.obj"
79 | 
80 |         subprocess.call([
81 |             "cort-predict-conll",
82 |             "-in", "/data/nlp/martscsn/thesis/data/input/" + data_set +
83 |             ".auto",
84 |             "-model", model,
85 |             "-out", system + "-" + data_set + ".out",
86 |             "-ante", system + "-" + data_set + ".antecedents",
87 |             "-gold", "/data/nlp/martscsn/thesis/data/input/" + data_set +
88 |             ".gold",
89 |             "-extractor", get_extractor(data_set, system),
90 |             "-perceptron", get_perceptron(system),
91 |             "-clusterer", get_clusterer(system)])
92 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='cort',
 6 |     version='0.2.4.5',
 7 |     packages=['cort',
 8 |               'cort.analysis',
 9 |               'cort.core',
10 |               'cort.test',
11 |               'cort.coreference',
12 |               'cort.test.multigraph',
13 |               'cort.test.analysis',
14 |               'cort.test.core',
15 |               'cort.coreference.multigraph',
16 |               'cort.coreference.approaches',
17 |               'cort.util',
18 |               'cort.preprocessing',
19 |               'stanford_corenlp_pywrapper'],
20 | 
21 |     url='http://github.com/smartschat/cort',
22 |     license='MIT',
23 |     author='Sebastian Martschat, Thierry Goeckel, Patrick Claus',
24 |     author_email='sebastian.martschat@gmail.com',
25 |     description='A coreference resolution research toolkit.',
26 |     keywords = ['NLP', 'CL', 'natural language processing',
27 |                 'computational linguistics', 'coreference resolution',
28 |                 'text analytics'],
29 |     classifiers = [
30 |         'Intended Audience :: Science/Research',
31 |         'Programming Language :: Python :: 2.7',
32 |         'Programming Language :: Python :: 3.3',
33 |         'Topic :: Scientific/Engineering',
34 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
35 |         'Topic :: Text Processing',
36 |         ],
37 |     install_requires=['nltk >= 3.0.1', 'numpy', 'matplotlib', 'mmh3', 'cython',
38 |                       'future', 'jpype1', 'beautifulsoup4',
39 |                       'pystanforddependencies >= 0.3.1'],
40 |     package_data={
41 |         'cort': ['analysis/visualization/style.css',
42 |                  'analysis/visualization/lib/*',
43 |                  'resources/*',
44 |                  'config_files/*',
45 |                  'coreference/perceptrons.pyx',
46 |                  "reference-coreference-scorers/v8.01/*.*",
47 |                  "reference-coreference-scorers/v8.01/lib/*.pm",
48 |                  "reference-coreference-scorers/v8.01/lib/Algorithm/*",
49 |                  "reference-coreference-scorers/v8.01/lib/Data/*",
50 |                  "reference-coreference-scorers/v8.01/lib/Math/*"],
51 |         'stanford_corenlp_pywrapper': ['rcorenlp.r',
52 |                                        'lib/*',
53 |                                        'javasrc/corenlp/*',
54 |                                        'javasrc/util/misc/*',
55 |                                        'javasrc/util/*.java'],
56 |     },
57 |     scripts=['bin/cort-train', 'bin/cort-predict-conll',
58 |              'bin/cort-predict-raw', 'bin/cort-visualize',
59 |              'bin/run-multigraph']
60 | )
61 | 


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/__init__.py:
--------------------------------------------------------------------------------
1 | from .sockwrap import *
2 | 


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/javasrc/corenlp/PipeRunner.java:
--------------------------------------------------------------------------------
  1 | package corenlp;
  2 | 
  3 | import org.codehaus.jackson.JsonNode;
  4 | 
  5 | import util.Arr;
  6 | import util.BasicFileIO;
  7 | import util.JsonUtil;
  8 | import util.U;
  9 | 
 10 | /**
 11 |  * stdin/stdout commandline pipe mode that lightly wraps JsonPipeline.
 12 |  * 
 13 |  * INPUT: one line per document.
 14 |  *  	docid \t TextAsJsonStringOrObjectWithTextField 
 15 |  *  OUTPUT: as JSON, one doc per line ("jdoc").
 16 |  *    docid \t {sentences: [ {sentobj}, {sentobj}, ... ]}
 17 |  *  where each sentobj is
 18 |  *    {tokens: [...], char_offsets: [...], ....}
 19 |  *
 20 |  */
 21 | public class PipeRunner {
 22 | 	ProcessingMode mode;
 23 | 	JsonPipeline parse;
 24 | 	
 25 | 	static enum InputFormat {
 26 | 		DETECT_JSON_VARIANT,
 27 | 		RAW_TEXT
 28 | 	};
 29 | 
 30 | 	/** the pre-baked processing modes, that define annotators and outputs. */
 31 | 	static enum ProcessingMode {
 32 | 		NOMODE,
 33 | 		SSPLIT,
 34 | 		POS,
 35 | 		NER,
 36 | 		PARSE,
 37 | 		NERPARSE;
 38 | 	}
 39 | 	static ProcessingMode modeFromString(String _mode) {
 40 | 		return 
 41 | 			_mode.equals("nomode") ? ProcessingMode.NOMODE :
 42 | 			_mode.equals("ssplit") ? ProcessingMode.SSPLIT :
 43 | 			_mode.equals("pos") ? ProcessingMode.POS :
 44 | 			_mode.equals("ner") ? ProcessingMode.NER :
 45 | 			_mode.equals("parse") ? ProcessingMode.PARSE :
 46 | 			_mode.equals("nerparse") ? ProcessingMode.NERPARSE :
 47 | 			null;
 48 | 	}
 49 | 	
 50 | 
 51 | 	static void usage() {
 52 | 		U.p("corenlp.Parse [options] \n" +
 53 | 				"Processes document texts on and outputs NLP-annotated versions.\n" +
 54 | 				"Both input and output formats are one document per line.\n" +
 55 | 				"\n" +
 56 | 				"Input format can be either\n" +
 57 | 				"  one column:   TextField\n" +
 58 | 				"  two columns:  docid \\t TextField\n" +
 59 | 				"Where TextField could be either\n" +
 60 | 				"  * a JSON string, or\n" +
 61 | 				"  * a JSON object with field 'text'.\n" +
 62 | 				"--raw-input  allows the text field to be raw text, interpreted as UTF-8 encoded.\n" +
 63 | 				"Note that JSON strings can be preferable, since they can contain any type of whitespace.\n" +
 64 | 				"\n" +
 65 | 				"In all cases, the output mode is two-column: docid \\t NLPInfoAsJson\n" +
 66 | 				"");
 67 | 		System.exit(1);
 68 | 	}
 69 | 
 70 | 	public void runStdinStdout(InputFormat inputFormat) {
 71 | 		for (String line : BasicFileIO.STDIN_LINES) {
 72 | 			System.err.print(".");
 73 | 			
 74 | 			String[] parts = line.split("\t");
 75 | 			String docid, doctext;
 76 | 			JsonNode payload = null;
 77 | 			if (inputFormat == InputFormat.DETECT_JSON_VARIANT) {
 78 | 				payload =JsonUtil.parse(parts[parts.length-1]);
 79 | 				doctext = 
 80 | 						payload.isTextual() ? payload.asText() :
 81 | 							payload.has("text") ? payload.get("text").asText() :
 82 | 								null;
 83 | 			}
 84 | 			else if (inputFormat == InputFormat.RAW_TEXT) {
 85 | 				doctext = parts[parts.length-1];
 86 | 			}
 87 | 			else { throw new RuntimeException("wtf"); }
 88 | 
 89 | 			docid = parts.length >= 2 ? parts[0] :
 90 | 				payload !=null && payload.has("docid") ? payload.get("docid").getTextValue() :
 91 | 					"doc" + parse.numDocs;
 92 | 
 93 | 				assert docid != null : "inconsistent 'docid' key";
 94 | 				if (doctext == null) throw new RuntimeException("Couldn't interpret JSON payload: should be string, or else object with a 'text' field.");
 95 | 
 96 | 				JsonNode outDoc = parse.processTextDocument(doctext);
 97 | 				U.pf("%s\t%s\n", docid, JsonUtil.toJson(outDoc));
 98 | 		}
 99 | 		
100 | 		double elapsedSec = 1.0*(System.currentTimeMillis() - parse.startMilli) / 1000;
101 | 		System.err.print("\n");
102 | 		System.err.printf("%d docs, %d tokens, %.1f tok/sec, %.1f byte/sec\n", parse.numDocs, parse.numTokens, parse.numTokens*1.0/elapsedSec, parse.numChars*1.0/elapsedSec);
103 | 	}
104 | 	
105 | 	public static void main(String[] args) {
106 | 		if (args.length < 1) {
107 | 			usage();
108 | 		}
109 | 		InputFormat inputFormat = InputFormat.DETECT_JSON_VARIANT;
110 | 
111 | 		while (args.length > 1) {
112 | 			String flag = args[0];
113 | 			if (flag.equals("--raw-input")) {
114 | 				inputFormat = InputFormat.RAW_TEXT;
115 | 				args = Arr.subArray(args, 1, args.length);
116 | 			}
117 | 			else { throw new RuntimeException("bad flag: " + flag); }
118 | 		}
119 | 		
120 | 		
121 | 		throw new RuntimeException("TODO need to handle mode parsing; in the meantime this is broken");
122 | 		
123 | //		PipeRunner runner = new PipeRunner();
124 | //		String _mode = args[0];
125 | //		ProcessingMode mode = modeFromString(_mode);
126 | //		if (runner.mode==null) {
127 | //			U.pf("Bad mode '%s' ... to disable a mode, use 'nomode'\n", _mode);
128 | //			usage();
129 | //		}
130 | //		runner.runStdinStdout(inputFormat);
131 | 	}
132 | 	
133 | 
134 | 
135 | }
136 | 


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/javasrc/util/JsonUtil.java:
--------------------------------------------------------------------------------
  1 | package util;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.*;
  5 | import org.codehaus.jackson.JsonNode;
  6 | import org.codehaus.jackson.JsonProcessingException;
  7 | import org.codehaus.jackson.map.ObjectMapper;
  8 | import org.codehaus.jackson.map.type.TypeFactory;
  9 | import org.codehaus.jackson.node.*;
 10 | 
 11 | import com.google.common.collect.Multiset;
 12 | 
 13 | import util.misc.Pair;
 14 | 
 15 | /** simplified wrapper functions for the Jackson JSON library 
 16 |  * this is half-baked, still learning the right way to use the library
 17 |  */
 18 | public class JsonUtil {
 19 | 	
 20 | 	public static ObjectMapper om;
 21 | 	static {
 22 | 		om = new ObjectMapper();
 23 | 	}
 24 | 	
 25 | 	public static void main(String args[]) {
 26 | 		List<String> x = toList(args[0], String.class);
 27 | 		U.p(x);
 28 | 	}
 29 | 	
 30 | 	public static String getTextDefault(JsonNode ob, String keyname, String defaultValue) {
 31 | 		return ob.has(keyname) ? ob.get(keyname).asText() : defaultValue;
 32 | 	}
 33 | 	
 34 | 	//////////////////////////////////////
 35 | 	
 36 | 	// toList() derived from
 37 | 	// http://stackoverflow.com/questions/9942475/convert-json-to-multiple-objects-using-jackson
 38 | 	
 39 | 	public static <T> ArrayList<T> toList(String jsonString, final Class<T> type) {
 40 | 		try {
 41 | 			return om.readValue(jsonString, TypeFactory.defaultInstance().constructCollectionType(ArrayList.class, type));
 42 | 		} catch (IOException e) {
 43 | 			return null;
 44 | 		}
 45 | 	}
 46 | 
 47 | 	public static <T> ArrayList<T> toList(JsonNode jsonNode, final Class<T> type) {
 48 | 		try {
 49 | 			return om.readValue(jsonNode, TypeFactory.defaultInstance().constructCollectionType(ArrayList.class, type));
 50 | 		} catch (IOException e) {
 51 | 			return null;
 52 | 		}
 53 | 	}
 54 | 
 55 | 	public static <T> ObjectNode toJson(Multiset<T> counts) {
 56 | 		ObjectNode jmap = newObject();
 57 | 		for (Multiset.Entry<T> e : counts.entrySet()) {
 58 | 			jmap.put(e.getElement().toString(), e.getCount());
 59 | 		}
 60 | 		return jmap;
 61 | 	}
 62 | 	
 63 |     public static <T> JsonNode toJson(final List<T> data) {
 64 |     	ArrayNode jlist = new ObjectMapper().createArrayNode();
 65 |     	for (T elt : data) {
 66 |     		jlist.add( toJson(elt) );
 67 |     	}
 68 |     	return jlist;
 69 |     }
 70 | 
 71 |     public static <S, T> JsonNode toJson(final Pair<S,T> pair) {
 72 |         try {
 73 |         	List<Object> x = new ArrayList<>();
 74 |         	x.add( (Object) pair.first);
 75 |         	x.add( (Object) pair.second);
 76 |             return new ObjectMapper().valueToTree(x);
 77 |         } catch(Exception e) {
 78 |             throw new RuntimeException(e);
 79 |         }
 80 |     }
 81 | 
 82 |     ///////// from Play framework below
 83 |     
 84 |     /**
 85 |      * Convert an object to JsonNode.
 86 |      *
 87 |      * @param data Value to convert in Json.
 88 |      */
 89 |     public static JsonNode toJson(final Object data) {
 90 |         try {
 91 |             return om.valueToTree(data);
 92 |         } catch(Exception e) {
 93 |             throw new RuntimeException(e);
 94 |         }
 95 |     }
 96 |    
 97 |     /**
 98 |      * Convert a JsonNode to a Java value
 99 |      *
100 |      * @param json Json value to convert.
101 |      * @param clazz Expected Java value type.
102 |      */
103 |     public static <A> A fromJson(JsonNode json, Class<A> clazz) {
104 |         try {
105 |             return om.treeToValue(json, clazz);
106 |         } catch(Exception e) {
107 |             throw new RuntimeException(e);
108 |         }
109 |     }
110 |     
111 |     /**
112 |      * Creates a new empty ObjectNode.
113 |      */ 
114 |     public static ObjectNode newObject() {
115 |         return om.createObjectNode();
116 |     }
117 |     
118 |     /**
119 |      * Convert a JsonNode to its string representation.
120 |      */
121 |     public static String stringify(JsonNode json) {
122 |         return json.toString();
123 |     }
124 |     
125 |     /**
126 |      * Parse a String representing a json, and return it as a JsonNode.
127 |      */
128 |     public static JsonNode parse(String src) {
129 |         try {
130 |             return om.readValue(src, JsonNode.class);
131 |         } catch(Throwable t) {
132 |             throw new RuntimeException(t);
133 |         }
134 |     }
135 | 
136 | 	public static JsonNode readJson(String jsonStr) throws JsonProcessingException, IOException {
137 | 		return om.readTree(jsonStr);
138 | 	}
139 | 
140 | 	public static JsonNode readJsonNX(String jsonStr) {
141 | 		try {
142 | 			return om.readTree(jsonStr);
143 | 		} catch (IOException e) {
144 | 			e.printStackTrace();
145 | 			return null;
146 | 		}
147 | 	}
148 | 
149 | }
150 | 


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/javasrc/util/misc/Triple.java:
--------------------------------------------------------------------------------
 1 | package util.misc;
 2 | 
 3 | /**
 4 |  * borrowed from berkeley nlp libraries which we were told was apache licensed
 5 |  */
 6 | public class Triple<S,T,U> {
 7 | 	public S first;
 8 | 	public T second;
 9 | 	public U third;
10 | 	
11 | 	public Triple(S first, T second, U third) {
12 | 		this.first = first;
13 | 		this.second = second;
14 | 		this.third = third;
15 | 	}
16 | 	
17 | 	@Override
18 | 	public int hashCode() {
19 | 		final int prime = 31;
20 | 		int result = 1;
21 | 		result = prime * result + ((first == null) ? 0 : first.hashCode());
22 | 		result = prime * result + ((second == null) ? 0 : second.hashCode());
23 | 		result = prime * result + ((third == null) ? 0 : third.hashCode());
24 | 		return result;
25 | 	}
26 | 
27 | 	@Override
28 | 	public boolean equals(Object obj) {
29 | 		if (this == obj)
30 | 			return true;
31 | 		if (obj == null)
32 | 			return false;
33 | 		if (getClass() != obj.getClass())
34 | 			return false;
35 | 		final Triple other = (Triple) obj;
36 | 		if (first == null) {
37 | 			if (other.first != null)
38 | 				return false;
39 | 		} else if (!first.equals(other.first))
40 | 			return false;
41 | 		if (second == null) {
42 | 			if (other.second != null)
43 | 				return false;
44 | 		} else if (!second.equals(other.second))
45 | 			return false;
46 | 		if (third == null) {
47 | 			if (other.third != null)
48 | 				return false;
49 | 		} else if (!third.equals(other.third))
50 | 			return false;
51 | 		return true;
52 | 	}
53 | 	
54 | 	public String toString() {
55 | 		return String.format("(%s,%s,%s)",first,second,third);
56 | 	}
57 | 
58 | 	public static <S,T,U> Triple<S,T,U> makeTriple(S s, T t, U u) {
59 | 		// TODO Auto-generated method stub
60 | 		return new Triple<S, T, U>(s,t,u);
61 | 	}
62 | 	
63 | }
64 | 


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/lib/corenlpwrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/stanford_corenlp_pywrapper/lib/corenlpwrapper.jar


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/lib/guava-13.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/stanford_corenlp_pywrapper/lib/guava-13.0.1.jar


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/lib/jackson-all-1.9.11.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/stanford_corenlp_pywrapper/lib/jackson-all-1.9.11.jar


--------------------------------------------------------------------------------
/stanford_corenlp_pywrapper/rcorenlp.r:
--------------------------------------------------------------------------------
 1 | # R wrapper for the Java JSON pipe server for CoreNLP
 2 | library(rjson)
 3 | library(stringr)
 4 | 
 5 | # paste from python sockwrap.py modes_json
 6 | MODES = rjson::fromJSON(
 7 | "{\"ssplit\":{\"annotators\":\"tokenize, ssplit\",\"description\":\"tokenization and sentence splitting (included in all subsequent ones)\"},\"coref\":{\"annotators\":\"tokenize, ssplit, pos, lemma, ner, entitymentions, parse, dcoref\",\"description\":\"Coreference, including constituent parsing.\"},\"pos\":{\"annotators\":\"tokenize, ssplit, pos, lemma\",\"description\":\"POS (and lemmas)\"},\"parse\":{\"annotators\":\"tokenize, ssplit, pos, lemma, parse\",\"description\":\"fairly basic parsing with POS, lemmas, trees, dependencies\"},\"nerparse\":{\"annotators\":\"tokenize, ssplit, pos, lemma, ner, entitymentions, parse\",\"description\":\"parsing with NER, POS, lemmas, depenencies.\"},\"ner\":{\"annotators\":\"tokenize, ssplit, pos, lemma, ner, entitymentions\",\"description\":\"POS and NER (and lemmas)\"}}"
 8 | )
 9 | 
10 | CoreNLP = function(
11 |             mode=NULL,
12 |             configdict=list(annotators="tokenize, ssplit"),
13 |             corenlp_jars=c(
14 |                 "/home/sw/corenlp/stanford-corenlp-full-2015-04-20/*",
15 |                 "/home/sw/stanford-srparser-2014-10-23-models.jar"),
16 |             java_command="java",
17 |             java_options="-Xmx4g -XX:ParallelGCThreads=1",
18 |             outpipe_filename_prefix="/tmp/corenlp_rwrap_pipe",
19 |             ...
20 | ) {
21 | 
22 |     # If a mode is specified, set the annotators on the configdict.
23 |     if (!is.null(mode)) {
24 |         stopifnot(mode %in% names(MODES))
25 |         configdict[['annotators']] = MODES[[mode]][['annotators']]
26 |     }
27 | 
28 |     # Extra arguments are put into the configdict.
29 | 
30 |     moreargs = list(...)
31 |     for (k in names(moreargs)) {
32 |         configdict[[k]] = moreargs[[k]]
33 |     }
34 | 
35 |     corenlp = list()
36 |     corenlp$outpipe_filename = sprintf("%s_rpid=%s_time=%s", outpipe_filename_prefix, Sys.getpid(), as.numeric(Sys.time()))
37 | 
38 |     cmd = "exec JAVA_COMMAND JAVA_OPTIONS -cp 'CLASSPATH' \
39 |             corenlp.SocketServer COMM_INFO MORE_CONFIG"
40 |     cmd = str_replace(cmd, "JAVA_COMMAND", java_command)
41 |     cmd = str_replace(cmd, "JAVA_OPTIONS", java_options)
42 |     # How to specify location of resources in R? there's no __FILE__ equivalent
43 |     # Packages are the only way?  Too bad.
44 |     jars = c("lib/corenlpwrapper.jar", "lib/*")
45 |     jars = c(jars, corenlp_jars)
46 |     cmd = str_replace(cmd, "CLASSPATH", str_join(jars, collapse=":"))
47 |     cmd = str_replace(cmd, "COMM_INFO", sprintf("--outpipe %s", corenlp$outpipe_filename))
48 |     cmd = str_replace(cmd, "MORE_CONFIG", sprintf(" --configdict '%s'", rjson::toJSON(configdict)))
49 | 
50 |     cmd = str_replace_all(cmd, "\n", " ")
51 |     logmessage(sprintf("Starting with command: %s\n", cmd))
52 | 
53 |     # - I'm not sure how R encodings work
54 |     # - pipe() in write mode seems to block until the subprocess tries to read
55 |     #   from stdin.  Perfect, so we don't need to check for that.
56 |     corenlp$pipe = pipe(cmd, "wb", encoding="UTF-8")
57 |     system(sprintf("mkfifo %s", corenlp$outpipe_filename))
58 |     corenlp$outpipe = file(corenlp$outpipe_filename, "rb", encoding="UTF-8", raw=TRUE)
59 | 
60 |     class(corenlp) = "corenlp_wrapper"
61 |     corenlp
62 | }
63 | 
64 | logmessage = function(msg) cat(sprintf("INFO:CoreNLP_RWrapper:%s", msg), file=stderr())
65 | 
66 | readresult = function(outpipe) {
67 | # TESTING
68 | # readresult(file("return.bin","rb", raw=TRUE))
69 |     size = readBin(outpipe, 'integer', n=1, endian='big', size=8)
70 |     cat(sprintf("Returned size %s\n", size))
71 |     stopifnot(size > 0)
72 |     # does useBytes=TRUE circumvent the encoding declaration earlier?
73 |     result = readChar(outpipe, size, useBytes=TRUE)
74 |     result = rjson::fromJSON(result)
75 |     result
76 | }
77 | 
78 | 
79 | parsedoc = function(corenlp, string) {
80 |     command = sprintf("PARSEDOC\t%s", rjson::toJSON(string))
81 |     writeLines(command, corenlp$pipe)
82 |     flush(corenlp$pipe)
83 |     readresult(corenlp$outpipe)
84 | }
85 | 
86 | close.corenlp_wrapper = function(corenlp) {
87 |     close(corenlp$outpipe)
88 |     close(corenlp$pipe)
89 |     system(sprintf("rm -f %s", corenlp$outpipe_filename))
90 | }
91 | 


--------------------------------------------------------------------------------
/tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/tree.png


--------------------------------------------------------------------------------
/visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartschat/cort/2349f0308a4115acb89d442fe945533bdb3b70e2/visualization.png


--------------------------------------------------------------------------------