├── .gitignore
├── LICENSE.txt
├── README.md
├── SMALLDevOPCs.txt
├── SMALLTrainOPCs.txt
├── modifiedBCS
    ├── LICENSE.txt
    ├── README.md
    ├── WriteCoNLLPreds.sh
    ├── base.conf
    ├── build.sbt
    ├── lib
    │   ├── BerkeleyParser-1.7.jar
    │   └── futile.jar
    ├── moarcoref-assembly-1.jar
    ├── project
    │   └── assembly.sbt
    └── src
    │   └── main
    │       └── java
    │           └── edu
    │               ├── berkeley
    │                   └── nlp
    │                   │   └── coref
    │                   │       ├── ConjType.java
    │                   │       ├── ConllDoc.scala
    │                   │       ├── ConllDocReader.scala
    │                   │       ├── ConllDocWriter.scala
    │                   │       ├── CorefConllScorer.scala
    │                   │       ├── CorefDoc.scala
    │                   │       ├── CorefDocAssembler.scala
    │                   │       ├── CorefEvaluator.scala
    │                   │       ├── CorefFeaturizerTrainer.scala
    │                   │       ├── CorefSystem.scala
    │                   │       ├── Decoder.scala
    │                   │       ├── DepConstTree.scala
    │                   │       ├── DocumentGraph.scala
    │                   │       ├── DocumentInferencer.scala
    │                   │       ├── DocumentInferencerBasic.scala
    │                   │       ├── DocumentInferencerBinary.scala
    │                   │       ├── DocumentInferencerLoopy.scala
    │                   │       ├── DocumentInferencerOracle.scala
    │                   │       ├── DocumentInferencerRahman.scala
    │                   │       ├── Driver.java
    │                   │       ├── EntityFeaturizer.scala
    │                   │       ├── Feature.scala
    │                   │       ├── GUtil.scala
    │                   │       ├── Gender.java
    │                   │       ├── LexicalCountsBundle.scala
    │                   │       ├── Mention.scala
    │                   │       ├── MentionPropertyComputer.scala
    │                   │       ├── MentionType.java
    │                   │       ├── Number.java
    │                   │       ├── NumberGenderComputer.scala
    │                   │       ├── OraclePosteriorSampler.scala
    │                   │       ├── OrderedClustering.scala
    │                   │       ├── OrderedClusteringBound.scala
    │                   │       ├── PairwiseIndexingFeaturizer.scala
    │                   │       ├── PairwiseIndexingFeaturizerJoint.scala
    │                   │       ├── PairwiseLossFunctions.scala
    │                   │       ├── PairwiseScorer.scala
    │                   │       ├── PronounDictionary.scala
    │                   │       ├── PruningStrategy.scala
    │                   │       ├── WordNetInterfacer.scala
    │                   │       ├── bp
    │                   │           ├── DocumentFactorGraph.scala
    │                   │           ├── Domain.scala
    │                   │           ├── Factor.scala
    │                   │           └── Node.scala
    │                   │       ├── lang
    │                   │           ├── ArabicTreebankLanguagePack.java
    │                   │           ├── CorefLanguagePack.scala
    │                   │           ├── Language.java
    │                   │           ├── ModArabicHeadFinder.java
    │                   │           └── ModCollinsHeadFinder.java
    │                   │       ├── nchains
    │                   │           └── DiscourseAnalyzer.scala
    │                   │       ├── preprocess
    │                   │           ├── NerDriver.java
    │                   │           ├── NerExample.scala
    │                   │           ├── NerSystem.scala
    │                   │           ├── PreprocessingDriver.java
    │                   │           ├── Reprocessor.scala
    │                   │           ├── SentenceSplitter.scala
    │                   │           └── SentenceSplitterTokenizerDriver.java
    │                   │       └── sem
    │                   │           ├── QueryCountAnalyzer.scala
    │                   │           ├── QueryCountCollector.scala
    │                   │           └── QueryCountsBundle.scala
    │               └── harvard
    │                   └── nlp
    │                       └── moarcoref
    │                           ├── AnimacyHelper.java
    │                           ├── FeatureExtractor.scala
    │                           ├── MiniDriver.java
    │                           ├── MoarLexicalCountsBundle.scala
    │                           ├── SeparatingFeaturizer.scala
    │                           ├── SeparatingFeaturizerKeepFirst.scala
    │                           ├── SmallerSeparatingFeaturizer.scala
    │                           └── TextPickler.scala
├── nn
    ├── ana_model.lua
    ├── ante_model.lua
    ├── clust_batcher.lua
    ├── coref_utils.lua
    ├── model_utils.lua
    ├── mr_clust_embed.lua
    ├── sparse_doc_data.lua
    └── vanilla_mr.lua
├── nncoref_acl15_slides.pdf
├── nncoref_naacl16_slides.pdf
├── reference-coreference-scorers
    └── v8.01
    │   ├── README.txt
    │   ├── scorer.bat
    │   ├── scorer.pl
    │   └── test
    │       ├── CorefMetricTest.pm
    │       ├── CorefMetricTestConfig.pm
    │       ├── DataFiles
    │           ├── TC-A-1.response
    │           ├── TC-A-10.response
    │           ├── TC-A-11.response
    │           ├── TC-A-12.response
    │           ├── TC-A-13.response
    │           ├── TC-A-2.response
    │           ├── TC-A-3.response
    │           ├── TC-A-4.response
    │           ├── TC-A-5.response
    │           ├── TC-A-6.response
    │           ├── TC-A-7.response
    │           ├── TC-A-8.response
    │           ├── TC-A-9.response
    │           ├── TC-A.key
    │           ├── TC-B-1.response
    │           ├── TC-B.key
    │           ├── TC-C-1.response
    │           ├── TC-C.key
    │           ├── TC-D-1.response
    │           ├── TC-D.key
    │           ├── TC-E-1.response
    │           ├── TC-E.key
    │           ├── TC-F-1.response
    │           ├── TC-F.key
    │           ├── TC-G-1.response
    │           ├── TC-G.key
    │           ├── TC-H-1.response
    │           ├── TC-H.key
    │           ├── TC-I-1.response
    │           ├── TC-I.key
    │           ├── TC-J-1.response
    │           ├── TC-J.key
    │           ├── TC-K-1.response
    │           ├── TC-K.key
    │           ├── TC-L-1.response
    │           ├── TC-L.key
    │           ├── TC-M-1.response
    │           ├── TC-M-2.response
    │           ├── TC-M-3.response
    │           ├── TC-M-4.response
    │           ├── TC-M-5.response
    │           ├── TC-M-6.response
    │           ├── TC-M.key
    │           ├── TC-N-1.response
    │           ├── TC-N-2.response
    │           ├── TC-N-3.response
    │           ├── TC-N-4.response
    │           ├── TC-N-5.response
    │           ├── TC-N-6.response
    │           └── TC-N.key
    │       ├── TestCases.README
    │       └── test.pl
└── text_feats_to_hdf5_replacezero.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | 
  5 | # C extensions
  6 | *.so
  7 | 
  8 | # Distribution / packaging
  9 | .Python
 10 | env/
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | lib/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | 
 25 | # PyInstaller
 26 | #  Usually these files are written by a python script from a template
 27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 28 | *.manifest
 29 | *.spec
 30 | 
 31 | # Installer logs
 32 | pip-log.txt
 33 | pip-delete-this-directory.txt
 34 | 
 35 | # Unit test / coverage reports
 36 | htmlcov/
 37 | .tox/
 38 | .coverage
 39 | .cache
 40 | nosetests.xml
 41 | coverage.xml
 42 | 
 43 | # Translations
 44 | *.mo
 45 | *.pot
 46 | 
 47 | # Django stuff:
 48 | *.log
 49 | 
 50 | # Sphinx documentation
 51 | docs/_build/
 52 | 
 53 | # PyBuilder
 54 | target/
 55 | 
 56 | # latex things
 57 | *.aux
 58 | *.out
 59 | *.synctex.gz
 60 | *.pdf
 61 | *.blg
 62 | *.bbl
 63 | 
 64 | # temp files
 65 | *~
 66 | 
 67 | # Compiled Lua sources
 68 | luac.out
 69 | 
 70 | # luarocks build files
 71 | *.src.rock
 72 | *.zip
 73 | *.tar.gz
 74 | 
 75 | # Object files
 76 | *.o
 77 | *.os
 78 | *.ko
 79 | *.obj
 80 | *.elf
 81 | 
 82 | # Precompiled Headers
 83 | *.gch
 84 | *.pch
 85 | 
 86 | # Libraries
 87 | *.lib
 88 | *.a
 89 | *.la
 90 | *.lo
 91 | *.def
 92 | *.exp
 93 | 
 94 | # Shared objects (inc. Windows DLLs)
 95 | *.dll
 96 | *.so
 97 | *.so.*
 98 | *.dylib
 99 | 
100 | # Executables
101 | *.exe
102 | *.out
103 | *.app
104 | *.i*86
105 | *.x86_64
106 | *.hex
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nn_coref
 2 | Neural Coref Models, as described in 
 3 | ["Learning Global Features for Coreference Resolution"](http://nlp.seas.harvard.edu/papers/corefmain.pdf), Sam Wiseman, Alexander M. Rush, and Stuart M. Shieber, NAACL 2016,
 4 | 
 5 | and
 6 | 
 7 | ["Learning Anaphoricity and Antecedent Ranking Features for Coreference Resolution"](http://people.seas.harvard.edu/~srush/acl15.pdf), Sam Wiseman, Alexander M. Rush, Stuart M. Shieber, and Jason Weston. ACL 2015.
 8 | 
 9 | For questions/concerns/bugs please contact swiseman at seas.harvard.edu.
10 | 
11 | 
12 | ## Overview
13 | To keep things simple, the original ACL code is now in the acl15 branch. This README will cover duplicating the NAACL 2016 results.
14 | 
15 | ## Prerequisites
16 | In addition to torch, nn, and the prerequisites listed in modifiedBCS/README.md, you will need the Element-Research rnn library: https://github.com/Element-Research/rnn
17 | 
18 | ## Generating Features
19 | See the README in the modifiedBCS/ directory for running the Scala feature/mention extractor. Once you've generated text feature files, use text_feats_to_hdf_5_replacezero.py to convert them to hdf5 (to be consumed by Torch), as follows:
20 | 
21 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-anaphTrainFeats.txt train_small ana -n 4 -r 14215```
22 | 
23 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-anaphDevFeats.txt dev_small ana -n 4 -r 14215```
24 | 
25 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-anaphTestFeats.txt test_small ana -n 4 -r 14215```
26 | 
27 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-pwTrainFeats.txt train_small pw -n 4 -r 28394```
28 | 
29 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-pwDevFeats.txt dev_small pw -n 4 -r 28394```
30 | 
31 | ```python text_feats_to_hdf5_replacezero.py SMALL-FINAL+MOARANAPH+MOARPW-pwTestFeats.txt test_small pw -n 4 -r 28394```
32 | 
33 | The "-r" argument takes the index of a dummy feature used to replace features unseen in the training set; above it is set to be one greater than the number of training features (and should never be less than this). The "-n" argument controls the number of processes spawned by the script.
34 | 
35 | You can also download bzipped hdf5 features here: https://drive.google.com/folderview?id=0B1ytQXPDuw7OVzI3MlRLMEFCcHM&usp=sharing 
36 | 
37 | **Before doing any training or pre-training, please create a directory called nn/models/**
38 | 
39 | ## Pre-training
40 | Given the hdf5 files generated in the previous step, you can pre-train anaphoricity and pairwise networks as follows:
41 | 
42 | ```th ana_model.lua```
43 | 
44 | ```th ante_model.lua -gpuid 0```
45 | 
46 | See the respective files for additional options and documentation.
47 | 
48 | You can download bzipped pre-trained anaphoricity and pairwise networks from https://drive.google.com/folderview?id=0B1ytQXPDuw7OYUcwSEVPRjFEM00&usp=sharing , where they are called small_200.model-na-0.100000.bz2 and small_700.model-pw-0.100000.bz2, respectively.
49 | 
50 | ## Training the Full Model
51 | Assuming you've put your pre-trained networks in nn/models/, you can now train the full model as follows:
52 | 
53 | ```th mr_clust_embed.lua -gpuid 0 -PT -save -savePfx trpldev```
54 | 
55 | The default settings in mr_clust_embed.lua reflect those used in our final experiments (and so, for instance, both dev and train will be used as training data), but see the file for additional options and documentation.
56 | 
57 | You can download bzipped trained full model components from https://drive.google.com/folderview?id=0B1ytQXPDuw7OYUcwSEVPRjFEM00&usp=sharing , where the relevant files are trpldev-mce-700-200.model-na.bz2, trpldev-mce-700-200.model-pw.bz2, and trpldev-mce-700-200.model-lstm.bz2
58 | 
59 | ## Predicting with Saved Models
60 | If you've trained (or downloaded) full model components, you can make predictions as follows:
61 | 
62 | - If they don't exist, create the directories nn/bps/ and nn/conllouts/ .
63 | - Run ```th mr_clust_embed.lua -gpuid 0 -loadAndPredict -pwDevFeatPrefix test_small -anaDevFeatPrefix test_small -savedPWNetFi models/trpldev-mce-700-200.model-pw -savedNANetFi models/trpldev-mce-700-200.model-na -savedLSTMFi models/trpldevdup-mce-700-200.model-lstm```
64 | - The above will create a back-pointer file in bps/ . Suppose the file is called bps/xyzdev.bps . Then to generate a CoNLL output file, run ```../modifiedBCS/WriteCoNLLPreds.sh bps bps/xyzdev.bps conllouts ../flat_test_2012/ ../gender.data```
65 |     - N.B. You may need to modify the paths to the jar files on the second line of modifiedBCS/WriteCoNLLPreds.sh to get this to work
66 | - The resulting output file (in conllouts/) can now be scored using the standard CoNLL scorer.
67 | 
68 | Training as in the previous sub-section and evaluating as above should produce results very close to those in the NAACL paper, and probably a bit better. After re-training the cleaned-up and re-factored version in this repo, I got P/R/F scores of:
69 | 
70 | MUC: 77.14/70.12/73.46
71 | 
72 | BCUB: 66.43/57.47/61.62
73 | 
74 | CEAFe: 62.29/54.01/57.85
75 | 
76 | CoNLL: 64.31
77 | 
78 | ## Training the ACL (non-cluster) Model
79 | The mention-ranking model from the ACL paper has been re-implemented and considerably simplified in vanilla_mr.lua. It can be run as follows:
80 | 
81 | ```th vanilla_mr.lua -gpuid 0 -PT```
82 | 
83 | Unlike the original ACL implementation, this implementation is easy to run on a GPU, and with the new, even-smaller feature-set it should do at least as well. 
84 | 
85 | ## Copyright
86 | Copyright (c) 2016 Sam Wiseman. All Rights Reserved.
87 | 
88 | ## License
89 | The code in this repository is covered by a GNU GPL License. See LICENSE.txt.
90 | 
91 | 


--------------------------------------------------------------------------------
/modifiedBCS/README.md:
--------------------------------------------------------------------------------
  1 | ## Overview
  2 | 
  3 | This directory contains the code necessary for extracting features and "oracle predicted clusters" (which are used as supervision) from the English CoNLL data. 
  4 | 
  5 | We use code written on top of the Berkeley Coref System (BCS) v1.1 (see http://nlp.cs.berkeley.edu/projects/coref.shtml) to extract features, and so we have included the BCS v1.1 code along with its license and dependencies here. The BCS code is in src/main/java/edu/berkeley/* and any additional code we have added is in src/main/java/edu/harvard/* .
  6 | 
  7 | ## Compilation
  8 | 
  9 | Although we provide a pre-compiled jar ("moarcoref-assembly-1.jar") in the modifiedBCS/ directory, you can use sbt to re-compile the scala and java source. After downloading sbt (www.scala-sbt.org), simply type
 10 |   
 11 | ```
 12 | sbt assembly
 13 | ```
 14 | 
 15 | from inside the modifiedBCS/ directory, which will produce a runnable jar in the target/ subdirectory.
 16 | 
 17 | ## Data Prerequisites
 18 | 
 19 | To extract features you will need the CoNLL 2012 English train, development, and test data, as well as the number and gender data that goes along with it. See http://conll.cemantix.org/2012/data.html for instructions on downloading and extracting it. 
 20 | 
 21 | BCS expects the CoNLL data to be in a flattened directories, so that all train, development, and test files are in flat train, development, and test directories (resp.).  If you've extracted the CoNLL data into a top-level directory called conll-2012/, you can create a flattened train directory flat_train_2012/ using the following python code:
 22 | 
 23 | ```python
 24 | import subprocess
 25 | import shutil
 26 | import os
 27 | 
 28 | def flatten(root_dir,flat_dir,file_suf="auto_conll"):
 29 |     if not os.path.exists(flat_dir):
 30 |         os.makedirs(flat_dir)
 31 |     
 32 |     matches = subprocess.check_output("find %s -name *%s" % (root_dir,file_suf),shell=True)
 33 |     matches = matches.split('\n')[:-1]
 34 |     for match in matches:
 35 |         match_fields = match.split('/')
 36 |         shutil.copyfile(match, os.path.join(flat_dir,match_fields[-4]+"_"+match_fields[-1]))
 37 | 
 38 | 
 39 | flatten("conll-2012/v4/data/train/data/english", "flat_train_2012")
 40 | ```
 41 | 
 42 | The same goes for creating flattened development and test directories.
 43 | 
 44 | You will also need the list of animate and inanimate unigrams used by the Stanford Coref system. These can be found in the Stanford CoreNLP models jar under edu.stanford.nlp.models.dcoref .
 45 | 
 46 | ## Running
 47 | 
 48 | To extract the features described in the (NAACL) paper, first create a directory to store log files (say, `execdir'), and then type the following
 49 | 
 50 | ```
 51 | java -jar -Xmx30g modifiedBCS/target/scala-2.11/moarcoref-assembly-1.jar ++modifiedBCS/base.conf -execDir execdir -numberGenderData gender.data -animacyPath animate.unigrams.txt -inanimacyPath inanimate.unigrams.txt -trainPath flat_train_2012 -devPath flat_dev_2012 -testPath flat_test_2012  -mode SMALLER -conjType NONE -pairwiseFeats FINAL+MOARANAPH+MOARPW
 52 | ```
 53 | 
 54 | The above assumes the gender and animacy files are in the current directory, and that the flattened CoNLL directories are flat_train_2012/, flat_dev_2012/, and flat_test_2012/. 
 55 | 
 56 | The argument to pairwiseFeats specifies which features to extract. The argument `FINAL+MOARANAPH+MOARPW` corresponds to the features described in the paper.
 57 | 
 58 | There are additional options described in edu.harvard.nlp.moarcoref.MiniDriver.java.
 59 | 
 60 | ## Output Generated
 61 | 
 62 | Running as above should give you 10 files, as follows:
 63 | 
 64 |  - SMALL-FINAL+MOARANAPH+MOARPW-anaph\[Train|Dev|Test\]Feats.txt
 65 |  
 66 |      Anaphoricity features. These files put each document on its own line, with each line having the following format:
 67 |      
 68 |      ```
 69 |      num_mentions_in_doc|ment_0_feat_0 ment_0_feat_1 ...|ment_n_feat_0 ...
 70 |      ``` 
 71 |      
 72 |      where n is the number of mentions in the document.
 73 |      
 74 |  - SMALL-FINAL+MOARANAPH+MOARPW-pw\[Train|Dev|Test\]Feats.txt
 75 |  
 76 |      Pairwise features. These files put each document on its own line, with each line having the following format:
 77 |      
 78 |      ```
 79 |      num_mentions_in_doc|ment_0_ant_0_feat_0 ment_0_ant_0_feat_1 ...|ment_1_ant_0_feat_0 ment_1_ant_0_feat_1 ...|...|ment_n_ant_n_feat_0 ...
 80 |      ```
 81 |      
 82 |      As such, there are n(n+1)/2 cells containing features on each line (one for each pair of mention-antecedent pairs plus self-link mention-mention pairs), and n(n+1)/2+1 cells in total, because the first cell contains the number of mentions. Since the pairwise features do not make sense for the self-link mention-mention pairs, we simply insert a dummy integer in the corresponding cell. 
 83 |      
 84 |  - SMALL-FINAL+MOARANAPH+MOARPW-\[anaph|pw\]Mapping.txt
 85 |      
 86 |      A file mapping feature index numbers to feature descriptions. Each feature is on its own line, and the format is:
 87 |      
 88 |      ```
 89 |      feature_idx : feature_description
 90 |      ```
 91 |      
 92 |  - SMALL\[Train|Dev\]OPCs.txt
 93 |  
 94 |      Oracle Predicted Clustering files. These are the clusterings induced by the true gold clusters on the mentions extracted by the automatic mention extractor, and they constitute the supervision for this task. Again each document is on its own line, where each line contains clusters separated by a `|`, and the mention indices within a cluster are separated by a space, and are in ascending order. For example the following line
 95 |      
 96 |      ```
 97 |       0|1 2 4|3
 98 |      ```
 99 |      
100 |      indicates that there are 3 clusters over 5 mentions, with the first and third cluster just containing the first and fourth mentions (resp.), and the second cluster containing the 2nd, 3rd, and 5th mentions.
101 | 
102 | ## System Requirements
103 | 
104 | In addition to sbt you will need java. When running without any real memory restrictions, feature extraction requires around 30GB of RAM; it's likely that you can get away with a bit less than this, however.
105 | 
106 | 


--------------------------------------------------------------------------------
/modifiedBCS/WriteCoNLLPreds.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | exec scala -J-Xmx3G -classpath "moarcoref-assembly-1.jar:lib/futile.jar:lib/BerkeleyParser-1.7.jar" "$0" "$@"
  3 | !#
  4 | 
  5 | import java.io._
  6 | import scala.collection.mutable.ListBuffer
  7 | import scala.collection.mutable.ArrayBuffer
  8 | import scala.io.Source
  9 | import edu.berkeley.nlp.coref.NumberGenderComputer
 10 | import edu.berkeley.nlp.coref._
 11 | import edu.berkeley.nlp.futile.fig.basic.IOUtils
 12 | import edu.berkeley.nlp.futile.util.Logger
 13 | 
 14 | object BP2CoNLL {
 15 | 
 16 |   // the following two functions are just copied from BCS CorefSystem.scala
 17 |   def checkFileReachableForRead(file: String, msg: String) {
 18 |     if (file.isEmpty) {
 19 |       throw new RuntimeException("Undefined " + msg + "; must be defined for the mode you're running in");
 20 |     }
 21 |     if (!new File(file).exists()) {
 22 |       throw new RuntimeException(msg + " file/directory doesn't exist for read: " + file);
 23 |     }
 24 |   }
 25 |   def checkFileReachableForWrite(file: String, msg: String) {
 26 |     if (file.isEmpty) {
 27 |       throw new RuntimeException("Undefined " + msg + "; must be defined for the mode you're running in");
 28 |     }
 29 |     
 30 |     if (file.contains("/") && !new File(file).getParentFile().exists()) {
 31 |       throw new RuntimeException(msg + " file/directory couldn't be opened for write: " + file);
 32 |     }
 33 |   }
 34 |   
 35 |   // same as original, except we sort files by names so we can dump features and then repredict  
 36 |   def loadRawConllDocs(path: String, size: Int, gold: Boolean): Seq[ConllDoc] = {
 37 |     val suffix = if (gold) "gold_conll" else Driver.docSuffix;
 38 |     Logger.logss("Loading " + size + " docs from " + path + " ending with " + suffix);
 39 |     val files = new File(path).listFiles().filter(file => file.getAbsolutePath.endsWith(suffix)); //.sorted;
 40 |     val reader = new ConllDocReader(Driver.lang);
 41 |     val docs = new ArrayBuffer[ConllDoc];
 42 |     var docCounter = 0;
 43 |     var fileIdx = 0;
 44 |     while (fileIdx < files.size && (size == -1 || docCounter < size)) {
 45 |       val newDocs = reader.readConllDocs(files(fileIdx).getAbsolutePath);
 46 |       docs ++= newDocs;
 47 |       docCounter += newDocs.size
 48 |       fileIdx += 1;
 49 |     }
 50 |     val numDocs = if (size == -1) docs.size else Math.min(size, files.size);
 51 |     Logger.logss(docs.size + " docs loaded from " + fileIdx + " files, retaining " + numDocs);
 52 |     if (docs.size == 0) {
 53 |       Logger.logss("WARNING: Zero docs loaded...double check your paths unless you meant for this happen");
 54 |     }
 55 |     val docsToUse = docs.slice(0, numDocs);
 56 |     
 57 |     docsToUse;
 58 |   }
 59 |   
 60 |   // same as in original
 61 |   def loadCorefDocs(path: String, size: Int, numberGenderComputer: NumberGenderComputer, gold: Boolean): Seq[CorefDoc] = {
 62 |     val docs = loadRawConllDocs(path, size, gold);
 63 |     val assembler = CorefDocAssembler(Driver.lang, Driver.useGoldMentions);
 64 |     val mentionPropertyComputer = new MentionPropertyComputer(numberGenderComputer);
 65 |     val corefDocs = docs.map(doc => assembler.createCorefDoc(doc, mentionPropertyComputer));
 66 |     CorefDoc.checkGoldMentionRecall(corefDocs);
 67 |     corefDocs;
 68 |   }
 69 |  
 70 |   def main(args: Array[String]) {
 71 |     val indir = args(0);
 72 |     val bpfi = args(1);
 73 |     val outdir = args(2);
 74 |     val devPath = args(3);    
 75 |     val ngPath = args(4);
 76 |     val numberGenderComputer = NumberGenderComputer.readBergsmaLinData(ngPath);
 77 |     val devDGs = loadCorefDocs(devPath, -1, numberGenderComputer, false).map(new DocumentGraph(_, false)).sortBy(_.corefDoc.rawDoc.printableDocName);    
 78 |     //val files = new File(indir).listFiles().filter(file => file.getAbsolutePath.contains(".bps"));
 79 |     val files = new File(indir).listFiles().filter(file => file.getAbsolutePath.contains(bpfi));
 80 |     for (fi <- files) {
 81 |       println("doing " + fi.getAbsolutePath());
 82 |       val bps = ListBuffer[Array[Int]]();
 83 |       for (line <- Source.fromFile(fi.getAbsolutePath()).getLines()) {
 84 |         val preds = line.split(' ');
 85 |         bps += preds.map(x => x.toInt);
 86 |       }
 87 |       val allPredBackptrs = bps.toArray;
 88 |       val allPredClusterings = (0 until devDGs.size).map(i => OrderedClustering.createFromBackpointers(allPredBackptrs(i))).toArray;
 89 |       val writer = IOUtils.openOutHard(outdir+"/" + fi.getName() + ".out");
 90 |       for (i <- 0 until devDGs.size) {
 91 |         val outputClustering = new OrderedClusteringBound(devDGs(i).getMentions, allPredClusterings(i));
 92 |         ConllDocWriter.writeDoc(writer, devDGs(i).corefDoc.rawDoc, outputClustering.postprocessForConll());
 93 |       }
 94 |       writer.close();
 95 |     } 
 96 |   }
 97 |  
 98 | }
 99 | 
100 | BP2CoNLL.main(args)
101 | 


--------------------------------------------------------------------------------
/modifiedBCS/base.conf:
--------------------------------------------------------------------------------
1 | create	true
2 | useStandardExecPoolDirStrategy	false
3 | overwriteExecDir	true
4 | execDir	specify_execDir
5 | 


--------------------------------------------------------------------------------
/modifiedBCS/build.sbt:
--------------------------------------------------------------------------------
1 | name := "moarcoref"
2 | 
3 | version := "1"
4 | 
5 | scalaVersion := "2.11.7"
6 | 
7 | mainClass in assembly := Some("edu.harvard.nlp.moarcoref.MiniDriver")
8 | 
9 | 


--------------------------------------------------------------------------------
/modifiedBCS/lib/BerkeleyParser-1.7.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swiseman/nn_coref/e29b16deecd0d87d4b7c145e07e2908266fe63d6/modifiedBCS/lib/BerkeleyParser-1.7.jar


--------------------------------------------------------------------------------
/modifiedBCS/lib/futile.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swiseman/nn_coref/e29b16deecd0d87d4b7c145e07e2908266fe63d6/modifiedBCS/lib/futile.jar


--------------------------------------------------------------------------------
/modifiedBCS/moarcoref-assembly-1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swiseman/nn_coref/e29b16deecd0d87d4b7c145e07e2908266fe63d6/modifiedBCS/moarcoref-assembly-1.jar


--------------------------------------------------------------------------------
/modifiedBCS/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.7")
2 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/ConjType.java:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref;
2 | 
3 | 
4 | public enum ConjType {
5 |   NONE, TYPE, TYPE_OR_RAW_PRON, CANONICAL, CANONICAL_NOPRONPRON, CANONICAL_ONLY_PAIR_CONJ, CANONICAL_OR_COMMON;
6 | }
7 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/ConllDoc.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.syntax.Tree
 3 | 
 4 | // Chunks are semi-inclusive intervals.
 5 | case class Chunk[T](val start: Int,
 6 |                     val end: Int,
 7 |                     val label: T);
 8 | 
 9 | // rawText should only be used to save trouble when outputting the document
10 | // for scoring; never at any other time!
11 | case class ConllDoc(val docID: String,
12 |                     val docPartNo: Int,
13 |                     val words: Seq[Seq[String]],
14 |                     val pos: Seq[Seq[String]],
15 |                     val trees: Seq[DepConstTree],
16 |                     val nerChunks: Seq[Seq[Chunk[String]]],
17 |                     val corefChunks: Seq[Seq[Chunk[Int]]],
18 |                     val speakers: Seq[Seq[String]],
19 |                     val rawText: Seq[Seq[String]]) {
20 |   
21 |   val numSents = words.size;
22 |   
23 |   // updating...blah
24 |   val allSpeakers = scala.collection.mutable.Set[String]();
25 |   var gatheredSpeakers = false;
26 |   
27 |   def getSpeakers():scala.collection.mutable.Set[String] = {
28 |     if (gatheredSpeakers){
29 |       return allSpeakers;
30 |     } else {
31 |       for (speakerSent <- speakers){
32 |         for (speaker <- speakerSent){
33 |           allSpeakers.add(speaker.replace("-","").replace("_","").replace(".","").toLowerCase);
34 |         }
35 |       }
36 |       gatheredSpeakers = true;
37 |       return allSpeakers;
38 |     }
39 |   }
40 |   
41 |   def printableDocName = docID + " (part " + docPartNo + ")";
42 |   
43 |   def isConversation = docID.startsWith("bc") || docID.startsWith("wb");
44 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/ConllDocWriter.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | 
  3 | import java.io.PrintWriter
  4 | import scala.collection.mutable.HashMap
  5 | import scala.collection.mutable.ArrayBuffer
  6 | import edu.berkeley.nlp.coref.preprocess.PreprocessingDriver
  7 | import edu.berkeley.nlp.futile.syntax.Tree
  8 | import edu.berkeley.nlp.coref.preprocess.Reprocessor
  9 | import scala.collection.mutable.HashSet
 10 | import scala.collection.JavaConverters._
 11 | import edu.berkeley.nlp.futile.util.Logger
 12 | 
 13 | object ConllDocWriter {
 14 | 
 15 |   def writeDoc(writer: PrintWriter, conllDoc: ConllDoc, clustering: OrderedClusteringBound) {
 16 | //    writeDocIncompleteConll(writer, conllDoc.docID, conllDoc.docPartNo, conllDoc.words, conllDoc.pos, conllDoc.trees.map(_.constTree), conllDoc.speakers, conllDoc.nerChunks, convertOrderedClusteringBoundToChunks(clustering, conllDoc.words.size));  
 17 |     val corefBits = getCorefBits(conllDoc.words, convertOrderedClusteringBoundToChunks(clustering, conllDoc.words.size));
 18 |     val numZeroesToAddToPartNo = 3 - conllDoc.docPartNo.toString.size;
 19 |     writer.println("#begin document (" + conllDoc.docID + "); part " + ("0" * numZeroesToAddToPartNo) + conllDoc.docPartNo);
 20 |     for (sentIdx <- 0 until conllDoc.rawText.size) {
 21 |       val sent = conllDoc.rawText(sentIdx);
 22 |       for (tokenIdx <- 0 until sent.size) {
 23 |         val line = conllDoc.rawText(sentIdx)(tokenIdx);
 24 |         val lineNoCoref = line.substring(0, Math.max(line.lastIndexOf("\t"), line.lastIndexOf(" ")) + 1);
 25 | //        writer.println(lineNoCoref + corefBits(sentIdx)(tokenIdx));
 26 |         writer.println(lineNoCoref.replaceAll("\\s+", "\t") + corefBits(sentIdx)(tokenIdx));
 27 |       }
 28 |       writer.println();
 29 |     }
 30 |     writer.println("#end document");
 31 |   }
 32 |   
 33 |   // Doesn't write predicate-argument structures, senses, or lemmas (but we don't use these).
 34 |   def writeIncompleteConllDoc(writer: PrintWriter,
 35 |                               docName: String,
 36 |                               partNo: Int,
 37 |                               words: Seq[Seq[String]],
 38 |                               pos: Seq[Seq[String]],
 39 |                               parses: Seq[Tree[String]],
 40 |                               speakers: Seq[Seq[String]],
 41 |                               nerChunks: Seq[Seq[Chunk[String]]],
 42 |                               corefChunks: Seq[Seq[Chunk[Int]]]) {
 43 |     val numZeroesToAddToPartNo = 3 - partNo.toString.size;
 44 |     val corefBits = getCorefBits(words, corefChunks);
 45 |     val parseBits = parses.map(tree => PreprocessingDriver.computeParseBits(Reprocessor.convertFromFutileTree(tree)));
 46 |     val nerBits = getNerBits(words, nerChunks);
 47 |     writer.println("#begin document (" + docName + "); part " + ("0" * numZeroesToAddToPartNo) + partNo);
 48 |     for (sentIdx <- 0 until words.size) {
 49 |       val sent = words(sentIdx);
 50 |       for (i <- 0 until sent.size) {
 51 |         writer.println(docName + "\t" + partNo + "\t" + i + "\t" + words(sentIdx)(i) + "\t" + pos(sentIdx)(i) + "\t" + parseBits(sentIdx)(i) +
 52 |           "\t-\t-\t-\t" + speakers(sentIdx)(i) + "\t" + nerBits(sentIdx)(i) + "\t" + corefBits(sentIdx)(i));
 53 |       }
 54 |       writer.println();
 55 |     }
 56 |     writer.println("#end document");
 57 |   }
 58 |   
 59 |   private def convertOrderedClusteringBoundToChunks(clustering: OrderedClusteringBound, numSentences: Int): Seq[Seq[Chunk[Int]]] = {
 60 |     val chunksPerSentence = Array.tabulate(numSentences)(i => new ArrayBuffer[Chunk[Int]]());
 61 |     for (i <- 0 until clustering.ments.size) {
 62 |       val ment = clustering.ments(i);
 63 |       chunksPerSentence(ment.sentIdx) += new Chunk(ment.startIdx, ment.endIdx, clustering.clustering.getClusterIdx(i));
 64 |     }
 65 |     chunksPerSentence;
 66 |   }
 67 |   
 68 |   private def getNerBits(words: Seq[Seq[String]], nerChunks: Seq[Seq[Chunk[String]]]): Seq[Seq[String]] = {
 69 |     for (sentIdx <- 0 until words.size) yield {
 70 |       val chunkStarts = new HashMap[Int,String];
 71 |       val chunkEnds = new HashSet[Int];
 72 |       Logger.logss("NER CHUNKS: " + nerChunks);
 73 |       for (chunk <- nerChunks(sentIdx)) {
 74 |         chunkStarts.put(chunk.start, chunk.label);
 75 |         chunkEnds += chunk.end - 1;
 76 |       }
 77 |       for (tokenIdx <- 0 until words(sentIdx).size) yield {
 78 |         if (chunkStarts.contains(tokenIdx) && chunkEnds.contains(tokenIdx)) {
 79 |           "(" + chunkStarts.get(tokenIdx).getOrElse("") + ")";
 80 |         } else if (chunkStarts.contains(tokenIdx)) {
 81 |           "(" + chunkStarts.get(tokenIdx).getOrElse("") + "*";
 82 |         } else if (chunkEnds.contains(tokenIdx)) {
 83 |           "*)";
 84 |         } else {
 85 |           "*";
 86 |         }
 87 |       }
 88 |     }
 89 |   }
 90 |   
 91 |   private def getCorefBits(words: Seq[Seq[String]], corefChunks: Seq[Seq[Chunk[Int]]]): Seq[Seq[String]] = {
 92 |     for (sentIdx <- 0 until words.size) yield {
 93 |       val mentionStarts = new HashMap[Int,ArrayBuffer[Int]];
 94 |       val mentionEnds = new HashMap[Int,ArrayBuffer[Int]];
 95 |       val mentionStartEnds = new HashMap[Int,Int];
 96 |       val chunksThisSent = corefChunks(sentIdx);
 97 |       for (chunk <- chunksThisSent) {
 98 |         val start = chunk.start;
 99 |         val end = chunk.end - 1;
100 |         if (start == end) {
101 |           mentionStartEnds.put(start, chunk.label);
102 |         } else {
103 |           if (!mentionStarts.contains(start)) {
104 |             mentionStarts.put(start, new ArrayBuffer[Int]())
105 |           }
106 |           mentionStarts(start) += chunk.label;
107 |           if (!mentionEnds.contains(end)) {
108 |             mentionEnds.put(end, new ArrayBuffer[Int]())
109 |           }
110 |           mentionEnds(end) += chunk.label;
111 |         }
112 |       }
113 |       for (tokenIdx <- 0 until words(sentIdx).size) yield {
114 |         var corefBit = "";
115 |         if (mentionStarts.contains(tokenIdx)) {
116 |           for (start <- mentionStarts(tokenIdx)) {
117 |             corefBit += "(" + start + "|";
118 |           }
119 |         }
120 |         if (mentionStartEnds.contains(tokenIdx)) {
121 |           corefBit += "(" + mentionStartEnds(tokenIdx) + ")|";
122 |         }
123 |         if (mentionEnds.contains(tokenIdx)) {
124 |           for (end <- mentionEnds(tokenIdx)) {
125 |             corefBit += end + ")|";
126 |           }
127 |         }
128 |         if (corefBit.isEmpty) "-" else corefBit.dropRight(1);
129 |       }
130 |     }
131 |   }
132 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/CorefConllScorer.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import java.io.File
  3 | import java.io.PrintWriter
  4 | import java.util.regex.Pattern
  5 | 
  6 | import scala.collection.mutable.ArrayBuffer
  7 | import scala.collection.mutable.HashMap
  8 | import scala.sys.process.stringSeqToProcess
  9 | import scala.sys.process.Process
 10 | 
 11 | import edu.berkeley.nlp.futile.util.Logger
 12 | 
 13 | class CorefConllScorer(val conllEvalScriptPath: String) {
 14 |   
 15 |   def renderFinalScore(conllDocs: Seq[ConllDoc], rawPredClusterings: Seq[OrderedClusteringBound], goldClusterings: Seq[OrderedClusteringBound]) = {
 16 |     val summary = score(conllDocs, rawPredClusterings, goldClusterings, true);
 17 |     CorefConllScorer.processConllString(summary, false);
 18 |   }
 19 |   
 20 |   def renderSuffStats(conllDoc: ConllDoc, rawPredClustering: OrderedClusteringBound, goldClustering: OrderedClusteringBound) = {
 21 |     val summary = score(Seq(conllDoc), Seq(rawPredClustering), Seq(goldClustering), false);
 22 |     CorefConllScorer.processConllString(summary, true);
 23 |   }
 24 |   
 25 |   def score(conllDocs: Seq[ConllDoc], rawPredClusterings: Seq[OrderedClusteringBound], goldClusterings: Seq[OrderedClusteringBound], saveTempFiles: Boolean) = {
 26 |     val predClusterings = rawPredClusterings.map(_.postprocessForConll());
 27 | //    var predFile = File.createTempFile("temp", ".conll");
 28 |     val (predFile, goldFile) = if (Driver.conllOutputDir != "" && saveTempFiles) {
 29 |       val pFile = File.createTempFile("temp", ".conll", new File(Driver.conllOutputDir));
 30 |       val gFile = new File(pFile.getAbsolutePath() + "-gold");      
 31 |       Logger.logss("PRED FILE: " + pFile.getAbsolutePath());
 32 |       Logger.logss("GOLD FILE: " + gFile.getAbsolutePath());
 33 |       Logger.logss("To score, run:");
 34 |       Logger.logss("perl scorer.pl all " + gFile.getAbsolutePath() + " " + pFile.getAbsolutePath() + " none");
 35 |       (pFile, gFile);
 36 |     } else {
 37 |       val pFile = File.createTempFile("temp", ".conll");
 38 |       val gFile = new File(pFile.getAbsolutePath() + "-gold");
 39 |       pFile.deleteOnExit();
 40 |       gFile.deleteOnExit();
 41 |       (pFile, gFile);
 42 |     }
 43 |     val predWriter = new PrintWriter(predFile);
 44 |     val goldWriter = new PrintWriter(goldFile);
 45 |     for (i <- 0 until conllDocs.size) {
 46 |       ConllDocWriter.writeDoc(predWriter, conllDocs(i), predClusterings(i));
 47 |       ConllDocWriter.writeDoc(goldWriter, conllDocs(i), goldClusterings(i));
 48 |     }
 49 |     // Flush and close the buffers
 50 |     predWriter.close();
 51 |     goldWriter.close();
 52 |     // Build and run the process for the CoNLL eval script script
 53 |     import scala.sys.process._
 54 |     val output = Process(Seq(conllEvalScriptPath, "all", goldFile.getAbsolutePath(), predFile.getAbsolutePath(), "none")).lines;
 55 |     output.reduce(_ + "\n" + _);
 56 |   }
 57 | }
 58 | 
 59 | object CorefConllScorer {
 60 |   
 61 |   def processConllString(summary: String, renderSuffStats: Boolean) = {
 62 |     val pr = Pattern.compile("Coreference:.*\\(([0-9.]+) / ([0-9.]+)\\).*\\(([0-9.]+) / ([0-9.]+)\\)");
 63 |     val prMatcher = pr.matcher(summary);
 64 |     var prCount = 0;
 65 |     var (mucPNum, mucPDenom, mucRNum, mucRDenom) = (0.0, 0.0, 0.0, 0.0);
 66 |     var (bcubPNum, bcubPDenom, bcubRNum, bcubRDenom) = (0.0, 0.0, 0.0, 0.0);
 67 |     var (ceafePNum, ceafePDenom, ceafeRNum, ceafeRDenom) = (0.0, 0.0, 0.0, 0.0);
 68 |     // Four matches: MUC, B-cubed, CEAFM, CEAFE (BLANC doesn't match because of different formatting)
 69 |     while (prMatcher.find()) {
 70 |       if (prCount == 0) {
 71 |         mucRNum = prMatcher.group(1).toDouble;
 72 |         mucRDenom = prMatcher.group(2).toDouble;
 73 |         mucPNum = prMatcher.group(3).toDouble;
 74 |         mucPDenom = prMatcher.group(4).toDouble;
 75 |       }
 76 |       if (prCount == 1) {
 77 |         bcubRNum = prMatcher.group(1).toDouble;
 78 |         bcubRDenom = prMatcher.group(2).toDouble;
 79 |         bcubPNum = prMatcher.group(3).toDouble;
 80 |         bcubPDenom = prMatcher.group(4).toDouble;
 81 |       }
 82 |       if (prCount == 3) {
 83 |         ceafeRNum = prMatcher.group(1).toDouble;
 84 |         ceafeRDenom = prMatcher.group(2).toDouble;
 85 |         ceafePNum = prMatcher.group(3).toDouble;
 86 |         ceafePDenom = prMatcher.group(4).toDouble;
 87 |       }
 88 |       prCount += 1;
 89 |     }
 90 |     val mucP = mucPNum/mucPDenom * 100.0;
 91 |     val mucR = mucRNum/mucRDenom * 100.0;
 92 |     val mucF = 2 * mucP * mucR/(mucP + mucR);
 93 |     val bcubP = bcubPNum/bcubPDenom * 100.0;
 94 |     val bcubR = bcubRNum/bcubRDenom * 100.0;
 95 |     val bcubF = 2 * bcubP * bcubR/(bcubP + bcubR);
 96 |     val ceafeP = ceafePNum/ceafePDenom * 100.0;
 97 |     val ceafeR = ceafeRNum/ceafeRDenom * 100.0;
 98 |     val ceafeF = 2 * ceafeP * ceafeR/(ceafeP + ceafeR);
 99 |     val avg = (mucF + bcubF + ceafeF)/3.0;
100 |     if (renderSuffStats) {
101 |       "MUC/BCUB/CEAFE P/R N/D:\t" + mucPNum + "\t" + mucPDenom + "\t" + mucRNum + "\t" + mucRDenom + "\t" + bcubPNum + "\t" + bcubPDenom + "\t" + bcubRNum + "\t" + bcubRDenom + "\t" + ceafePNum + "\t" + ceafePDenom + "\t" + ceafeRNum + "\t" +ceafeRDenom;
102 |     } else {
103 |       "MUC P-R-F1, BCUB P-R-F1, CEAFE P-R-F1, Average:\t" + fmt(mucP) + "\t" + fmt(mucR) + "\t" + fmt(mucF) + "\t" + fmt(bcubP) + "\t" + fmt(bcubR) + "\t" + fmt(bcubF) + "\t" + fmt(ceafeP) + "\t" + fmt(ceafeR) + "\t" + fmt(ceafeF) + "\t" + fmt(avg) + "\n" +
104 |              "MUC = " + fmt(mucF) + ", BCUB = " + fmt(bcubF) + ", CEAFE = " + fmt(ceafeF) + ", AVG = " + fmt(avg);
105 |     }
106 |   }
107 |   
108 |   private def fmt(d: Double): String = {
109 |     val str = "" + (d + 0.005);
110 |     str.substring(0, Math.min(str.length(), str.indexOf(".") + 3));
111 |   }
112 |   
113 |   def main(args: Array[String]) {
114 |     import scala.sys.process._
115 |     val cmd = Seq("ls", "clean-data/");
116 |     println(cmd.lines.toIndexedSeq);
117 |   }
118 |   
119 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/CorefDoc.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import java.io.File
  3 | 
  4 | import scala.collection.JavaConverters.asScalaBufferConverter
  5 | import scala.collection.JavaConverters.mapAsScalaMapConverter
  6 | import scala.collection.mutable.HashSet
  7 | import scala.collection.mutable.ArrayBuffer
  8 | import scala.collection.mutable.HashMap
  9 | 
 10 | import edu.berkeley.nlp.coref.lang.Language
 11 | import edu.berkeley.nlp.futile.syntax.Trees.PennTreeRenderer
 12 | import edu.berkeley.nlp.futile.util.Counter
 13 | import edu.berkeley.nlp.futile.util.Logger
 14 | 
 15 | case class CorefDoc(val rawDoc: ConllDoc,
 16 |                     val goldMentions: Seq[Mention],
 17 |                     val goldClustering: OrderedClustering,
 18 |                     val predMentions: Seq[Mention]) {
 19 |   
 20 |   var oraclePredOrderedClustering: OrderedClustering = null;
 21 |   
 22 |   def numPredMents = predMentions.size;
 23 |   
 24 |   /**
 25 |    * Determines and caches an "oracle predicted clustering." For each predicted mention:
 26 |    * --If that mention does not have a corresponding gold mention (start and end indices match):
 27 |    *   --Put the current mention in its own cluster.
 28 |    * --If that mention does have a corresponding gold mention:
 29 |    *   --Fetch that mention's antecedents (if any)
 30 |    *   --Choose the first with a corresponding predicted mention (if any)
 31 |    *   --Assign this mention as the current mention's parent.
 32 |    */
 33 |   def getOraclePredClustering = {
 34 |     if (oraclePredOrderedClustering == null) {
 35 |       val predToGoldIdxMap = new HashMap[Int,Int]();
 36 |       val goldToPredIdxMap = new HashMap[Int,Int]();
 37 |       for (pIdx <- 0 until predMentions.size) {
 38 |         for (gIdx <- 0 until goldMentions.size) {
 39 |           val pMent = predMentions(pIdx);
 40 |           val gMent = goldMentions(gIdx);
 41 |           if (pMent.sentIdx == gMent.sentIdx && pMent.startIdx == gMent.startIdx && pMent.endIdx == gMent.endIdx) {
 42 |             predToGoldIdxMap.put(pIdx, gIdx);
 43 |             goldToPredIdxMap.put(gIdx, pIdx);
 44 |           }
 45 |         }
 46 |       }
 47 |       val oracleClusterIds = new ArrayBuffer[Int];
 48 |       var nextClusterId = 0;
 49 |       for (predIdx <- 0 until predMentions.size) {
 50 |         // Fetch the parent
 51 |         var parent = -1;
 52 |         if (predToGoldIdxMap.contains(predIdx)) {
 53 |           val correspondingGoldIdx = predToGoldIdxMap(predIdx);
 54 |           // Find the antecedents of the corresponding gold mention
 55 |           val goldAntecedentIdxs = goldClustering.getAllAntecedents(correspondingGoldIdx);
 56 |           // For each one, do a weird data sanitizing check, then try to find a corresponding
 57 |           // predicted mention to use as the predicted parent
 58 |           for (goldAntecedentIdx <- goldAntecedentIdxs.reverse) {
 59 |             val correspondingGold = goldMentions(correspondingGoldIdx);
 60 |             val goldAntecedent = goldMentions(goldAntecedentIdx);
 61 |             // wsj_0990 has some duplicate gold mentions, need to handle these...
 62 |             val sameMention = goldAntecedent.sentIdx == correspondingGold.sentIdx && goldAntecedent.startIdx == correspondingGold.startIdx && goldAntecedent.endIdx == correspondingGold.endIdx
 63 |             if (!sameMention && goldToPredIdxMap.contains(goldAntecedentIdx)) {
 64 |               val predAntecedentIdx = goldToPredIdxMap(goldAntecedentIdx)
 65 |               if (predAntecedentIdx >= predIdx) {
 66 |                 val ment = predMentions(predIdx);
 67 |                 val predAntecedent = predMentions(predAntecedentIdx);
 68 |                 Logger.logss("Monotonicity violated:\n" +
 69 |                           "Antecedent(" + predAntecedentIdx + "): " + predAntecedent.startIdx + " " + predAntecedent.endIdx + " " + predAntecedent.headIdx + "\n" +
 70 |                           "Current(" + predMentions.indexOf(ment) + "): " + ment.startIdx + " " + ment.endIdx + " " + ment.headIdx + "\n" +
 71 |                           "Gold antecedent(" + goldMentions.indexOf(goldAntecedent) + "): " + goldAntecedent.startIdx + " " + goldAntecedent.endIdx + " " + goldAntecedent.headIdx + "\n" +
 72 |                           "Gold current(" + goldMentions.indexOf(correspondingGold) + "): " + correspondingGold.startIdx + " " + correspondingGold.endIdx + " " + correspondingGold.headIdx);
 73 |                 Logger.logss("Setting parent to -1...");
 74 |                 parent = -1;
 75 |               } else {
 76 |                 parent = predAntecedentIdx
 77 |               }
 78 |             }
 79 |           }
 80 |         }
 81 |         // Now compute the oracle cluster ID
 82 |         val clusterId = if (parent == -1) {
 83 |           nextClusterId += 1;
 84 |           nextClusterId - 1;
 85 |         } else {
 86 |           oracleClusterIds(parent);
 87 |         }
 88 |         oracleClusterIds += clusterId;
 89 |       }
 90 |       oraclePredOrderedClustering = OrderedClustering.createFromClusterIds(oracleClusterIds);
 91 |     }
 92 |     oraclePredOrderedClustering
 93 |   }
 94 | }
 95 | 
 96 | object CorefDoc {
 97 |   
 98 |   def checkGoldMentionRecall(docs: Seq[CorefDoc]) {
 99 |     var numGMs = docs.map(_.goldMentions.size).reduce(_ + _);
100 |     val numPMs = docs.map(_.predMentions.size).reduce(_ + _);
101 |     val numNomPMs = docs.map(doc => doc.predMentions.filter(_.mentionType == MentionType.NOMINAL).size).reduce(_ + _);
102 |     val numPropPMs = docs.map(doc => doc.predMentions.filter(_.mentionType == MentionType.PROPER).size).reduce(_ + _);
103 |     val numPronPMs = docs.map(doc => doc.predMentions.filter(_.mentionType == MentionType.PRONOMINAL).size).reduce(_ + _);
104 |     var numGMsRecalled = 0;
105 |     var numGMsUnrecalledNonConstituents = 0;
106 |     for (doc <- docs; gm <- doc.goldMentions) {
107 |       if (doc.predMentions.filter(pm => pm.startIdx == gm.startIdx && pm.endIdx == gm.endIdx).size >= 1) {
108 |         numGMsRecalled += 1;
109 |       } else {
110 |         if (!doc.rawDoc.trees(gm.sentIdx).isConstituent(gm.startIdx, gm.endIdx)) {
111 |           numGMsUnrecalledNonConstituents += 1;
112 |         }
113 |       }
114 |     }
115 |     Logger.logss("Detected " + numPMs + " predicted mentons (" + numNomPMs + " nominal, " + numPropPMs + " proper, " + numPronPMs + " pronominal), " +
116 |                  numGMsRecalled + " / " + numGMs + " = " + (numGMsRecalled.toDouble/numGMs) + " gold mentions recalled (" + numGMsUnrecalledNonConstituents + " missed ones are not constituents)")
117 |   }
118 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/Decoder.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import scala.collection.mutable.ArrayBuffer
 3 | import edu.berkeley.nlp.futile.util.Logger
 4 | 
 5 | object Decoder {
 6 | 
 7 |   def decodeMax(docGraph: DocumentGraph, probFcn: Int => Array[Double]): Array[Int] = {
 8 |     val backpointers = new Array[Int](docGraph.size);
 9 |     for (i <- 0 until docGraph.size) {
10 |       val allProbs = probFcn(i);
11 |       var bestIdx = -1;
12 |       var bestProb = Double.NegativeInfinity;
13 |       for (j <- 0 to i) {
14 |         val currProb = allProbs(j);
15 |         if (bestIdx == -1 || currProb > bestProb) {
16 |           bestIdx = j;
17 |           bestProb = currProb;
18 |         }
19 |       }
20 |       backpointers(i) = bestIdx;
21 |     }
22 |     backpointers;
23 |   }
24 |   
25 |   def decodeLeftToRightMarginalize(docGraph: DocumentGraph, probFcn: Int => Array[Double]): Array[Int] = {
26 |     val clustersSoFar = new ArrayBuffer[ArrayBuffer[Int]]();
27 |     val backpointers = new Array[Int](docGraph.size);
28 |     for (i <- 0 until docGraph.size) {
29 |       val allProbs = probFcn(i);
30 |       val clusterProbs = clustersSoFar.map(_.foldLeft(0.0)((total, mentIdx) => total + allProbs(mentIdx)));
31 | //      Logger.logss("All probs: " + allProbs.toSeq.zipWithIndex);
32 | //      Logger.logss("Clusters so far: " + clustersSoFar);
33 | //      Logger.logss("Cluster probs: " + clusterProbs.zipWithIndex);
34 |       // Just a sanity-check, should return the same clusters as the max method
35 | //      val clusterProbs = clustersSoFar.map(_.foldLeft(0.0)((total, mentIdx) => Math.max(total, allProbs(mentIdx))));
36 |       val startNewProb = allProbs(i);
37 |       val bestClusterProbAndIdx = clusterProbs.zipWithIndex.foldLeft((0.0, -1))((bestProbAndIdx, currProbAndIdx) => if (bestProbAndIdx._1 < currProbAndIdx._1) currProbAndIdx else bestProbAndIdx);
38 |       if (startNewProb > bestClusterProbAndIdx._1) {
39 |         backpointers(i) = i;
40 |         clustersSoFar += ArrayBuffer(i);
41 |       } else {
42 |         backpointers(i) = clustersSoFar(bestClusterProbAndIdx._2).last;
43 |         clustersSoFar(bestClusterProbAndIdx._2) += i;
44 |       }
45 |     }
46 |     backpointers;
47 |   }
48 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/DocumentInferencer.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.util.Logger
 3 | import edu.berkeley.nlp.futile.fig.basic.Indexer
 4 | 
 5 | trait DocumentInferencer {
 6 |   
 7 |   def getInitialWeightVector(featureIndexer: Indexer[String]): Array[Double];
 8 |   
 9 |   def computeLikelihood(docGraph: DocumentGraph,
10 |                         pairwiseScorer: PairwiseScorer,
11 |                         lossFcn: (CorefDoc, Int, Int) => Double): Double;
12 |   
13 |   def addUnregularizedStochasticGradient(docGraph: DocumentGraph,
14 |                                          pairwiseScorer: PairwiseScorer,
15 |                                          lossFcn: (CorefDoc, Int, Int) => Double,
16 |                                          gradient: Array[Double]);
17 |   
18 |   def viterbiDecode(docGraph: DocumentGraph,
19 |                     pairwiseScorer: PairwiseScorer): Array[Int];
20 |   
21 |   def finishPrintStats();
22 |   
23 |   def viterbiDecodeAll(docGraphs: Seq[DocumentGraph], pairwiseScorer: PairwiseScorer): Array[Array[Int]] = {
24 |     val allPredBackptrs = new Array[Array[Int]](docGraphs.size);
25 |     for (i <- 0 until docGraphs.size) {
26 |       val docGraph = docGraphs(i);
27 |       Logger.logs("Decoding " + i);
28 |       val predBackptrs = viterbiDecode(docGraph, pairwiseScorer);
29 |       allPredBackptrs(i) = predBackptrs;
30 |     }
31 |     allPredBackptrs;
32 |   }
33 |   
34 |   def viterbiDecodeAllFormClusterings(docGraphs: Seq[DocumentGraph], pairwiseScorer: PairwiseScorer): (Array[Array[Int]], Array[OrderedClustering]) = {
35 |     val allPredBackptrs = viterbiDecodeAll(docGraphs, pairwiseScorer);
36 |     val allPredClusteringsSeq = (0 until docGraphs.size).map(i => OrderedClustering.createFromBackpointers(allPredBackptrs(i)));
37 |     (allPredBackptrs, allPredClusteringsSeq.toArray)
38 |   }
39 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/DocumentInferencerBasic.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import edu.berkeley.nlp.futile.fig.basic.Indexer
  3 | 
  4 | class DocumentInferencerBasic extends DocumentInferencer {
  5 |   
  6 |   def getInitialWeightVector(featureIndexer: Indexer[String]): Array[Double] = Array.fill(featureIndexer.size())(0.0);
  7 |   
  8 |   /**
  9 |    * N.B. always returns a reference to the same matrix, so don't call twice in a row and
 10 |    * attempt to use the results of both computations
 11 |    */
 12 |   private def computeMarginals(docGraph: DocumentGraph,
 13 |                                gold: Boolean,
 14 |                                lossFcn: (CorefDoc, Int, Int) => Double,
 15 |                                pairwiseScorer: PairwiseScorer): Array[Array[Double]] = {
 16 |     computeMarginals(docGraph, gold, lossFcn, docGraph.featurizeIndexAndScoreNonPrunedUseCache(pairwiseScorer)._2)
 17 |   }
 18 |   
 19 |   private def computeMarginals(docGraph: DocumentGraph,
 20 |                                gold: Boolean,
 21 |                                lossFcn: (CorefDoc, Int, Int) => Double,
 22 |                                scoresChart: Array[Array[Double]]): Array[Array[Double]] = {
 23 | //    var marginals = new Array[Array[Double]](docGraph.doc.predMentions.size());
 24 | //    for (i <- 0 until marginals.size) {
 25 | //      marginals(i) = Array.fill(i+1)(Double.NegativeInfinity);
 26 | //    }
 27 |     val marginals = docGraph.cachedMarginalMatrix;
 28 |     for (i <- 0 until docGraph.size) {
 29 |       var normalizer = 0.0;
 30 |       // Restrict to gold antecedents if we're doing gold, but don't load the gold antecedents
 31 |       // if we're not.
 32 |       val goldAntecedents: Seq[Int] = if (gold) docGraph.getGoldAntecedentsUnderCurrentPruning(i) else null;
 33 |       for (j <- 0 to i) {
 34 |         // If this is a legal antecedent
 35 |         if (!docGraph.isPruned(i, j) && (!gold || goldAntecedents.contains(j))) {
 36 |           // N.B. Including lossFcn is okay even for gold because it should be zero
 37 |           val unnormalizedProb = Math.exp(scoresChart(i)(j) + lossFcn(docGraph.corefDoc, i, j));
 38 |           marginals(i)(j) = unnormalizedProb;
 39 |           normalizer += unnormalizedProb;
 40 |         } else {
 41 |           marginals(i)(j) = 0.0;
 42 |         }
 43 |       }
 44 |       for (j <- 0 to i) {
 45 |         marginals(i)(j) /= normalizer;
 46 |       }
 47 |     }
 48 |     marginals;
 49 |   }
 50 |   
 51 |   def computeLikelihood(docGraph: DocumentGraph,
 52 |                         pairwiseScorer: PairwiseScorer,
 53 |                         lossFcn: (CorefDoc, Int, Int) => Double): Double = {
 54 |     var likelihood = 0.0;
 55 |     val marginals = computeMarginals(docGraph, false, lossFcn, pairwiseScorer);
 56 |     for (i <- 0 until docGraph.size) {
 57 |       val goldAntecedents = docGraph.getGoldAntecedentsUnderCurrentPruning(i);
 58 |       var currProb = 0.0;
 59 |       for (j <- goldAntecedents) {
 60 |         currProb += marginals(i)(j);
 61 |       }
 62 |       var currLogProb = Math.log(currProb);
 63 |       if (currLogProb.isInfinite()) {
 64 |         currLogProb = -30;
 65 |       }
 66 |       likelihood += currLogProb;
 67 |     }
 68 |     likelihood;
 69 |   }
 70 |   
 71 |   def addUnregularizedStochasticGradient(docGraph: DocumentGraph,
 72 |                                          pairwiseScorer: PairwiseScorer,
 73 |                                          lossFcn: (CorefDoc, Int, Int) => Double,
 74 |                                          gradient: Array[Double]) = {
 75 |     val (featsChart, scoresChart) = docGraph.featurizeIndexAndScoreNonPrunedUseCache(pairwiseScorer);
 76 |     // N.B. Can't have pred marginals and gold marginals around at the same time because
 77 |     // they both live in the same cached matrix
 78 |     val predMarginals = this.computeMarginals(docGraph, false, lossFcn, scoresChart);
 79 |     for (i <- 0 until docGraph.size) {
 80 |       for (j <- 0 to i) {
 81 |         if (predMarginals(i)(j) > 1e-20) {
 82 |           addToGradient(featsChart(i)(j), -predMarginals(i)(j), gradient);
 83 |         }
 84 |       }
 85 |     }
 86 |     val goldMarginals = this.computeMarginals(docGraph, true, lossFcn, scoresChart);
 87 |     for (i <- 0 until docGraph.size) {
 88 |       for (j <- 0 to i) {
 89 |         if (goldMarginals(i)(j) > 1e-20) {
 90 |           addToGradient(featsChart(i)(j), goldMarginals(i)(j), gradient);
 91 |         }
 92 |       }
 93 |     }
 94 |   }
 95 |   
 96 |   private def addToGradient(feats: Seq[Int], scale: Double, gradient: Array[Double]) {
 97 |     var i = 0;
 98 |     while (i < feats.size) {
 99 |       val feat = feats(i);
100 |       gradient(feat) += 1.0 * scale;
101 |       i += 1;
102 |     }
103 |   }
104 | 
105 |   def viterbiDecode(docGraph: DocumentGraph, scorer: PairwiseScorer): Array[Int] = {
106 |     val (featsChart, scoresChart) = docGraph.featurizeIndexAndScoreNonPrunedUseCache(scorer);
107 |     if (Driver.decodeType == "sum") { 
108 |       val backptrs = Decoder.decodeLeftToRightMarginalize(docGraph, (idx: Int) => {
109 |         val probs = scoresChart(idx);
110 |         GUtil.expAndNormalizeiHard(probs);
111 |         probs;
112 |       });
113 |       backptrs;
114 |     } else {
115 |       val backptrs = Decoder.decodeMax(docGraph, (idx: Int) => {
116 |         val probs = scoresChart(idx);
117 |         GUtil.expAndNormalizeiHard(probs);
118 |         probs;
119 |       });
120 |       backptrs;
121 |     }
122 |   }
123 |   
124 |   def finishPrintStats() = {}
125 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/DocumentInferencerBinary.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import scala.collection.mutable.HashMap
  3 | 
  4 | import edu.berkeley.nlp.futile.fig.basic.Indexer
  5 | 
  6 | // TODO: Tune both of these, also try out some subsampling/reweighting approaches
  7 | class DocumentInferencerBinary(val logThreshold: Double,
  8 |                                val clusterType: String,
  9 |                                val negativeClassWeight: Double) extends DocumentInferencer {
 10 |   
 11 |   def getInitialWeightVector(featureIndexer: Indexer[String]): Array[Double] = Array.fill(featureIndexer.size())(0.0);
 12 |   
 13 |   private def subsample(docGraph: DocumentGraph, i: Int): Seq[Int] = {
 14 |     (0 until i);
 15 |   }
 16 |   
 17 |   def computeLikelihood(docGraph: DocumentGraph,
 18 |                         pairwiseScorer: PairwiseScorer,
 19 |                         lossFcn: (CorefDoc, Int, Int) => Double): Double = {
 20 |     val (featsChart, scoresChart) = docGraph.featurizeIndexAndScoreNonPrunedUseCache(pairwiseScorer);
 21 |     var likelihood = 0.0;
 22 |     for (i <- 0 until docGraph.size) {
 23 |       for (j <- subsample(docGraph, i)) {
 24 |         val pos = docGraph.isGoldNoPruning(i, j); 
 25 |         var increment = if (pos) {
 26 |           scoresChart(i)(j) - Math.log(1 + Math.exp(scoresChart(i)(j)))
 27 |         } else {
 28 |           negativeClassWeight * -Math.log(1 + Math.exp(scoresChart(i)(j)));
 29 |         }
 30 |         if (increment.isNegInfinity) {
 31 |           increment = -30;
 32 |         }
 33 |         likelihood += increment;
 34 |       }
 35 |     }
 36 |     likelihood;
 37 |   }
 38 |   
 39 |   def addUnregularizedStochasticGradient(docGraph: DocumentGraph,
 40 |                                          pairwiseScorer: PairwiseScorer,
 41 |                                          lossFcn: (CorefDoc, Int, Int) => Double,
 42 |                                          gradient: Array[Double]) = {
 43 |     val (featsChart, scoresChart) = docGraph.featurizeIndexAndScoreNonPrunedUseCache(pairwiseScorer);
 44 |     for (i <- 0 until docGraph.size) {
 45 |       for (j <- subsample(docGraph, i)) {
 46 |         val expedScore = Math.exp(scoresChart(i)(j));
 47 |         if (docGraph.isGoldNoPruning(i, j)) {
 48 |           addToGradient(featsChart(i)(j), 1.0 - expedScore/(1.0 + expedScore), gradient);
 49 |         } else {
 50 |           addToGradient(featsChart(i)(j), negativeClassWeight * -expedScore/(1.0 + expedScore), gradient);
 51 |         }
 52 |       }
 53 |     }
 54 |   }
 55 |   
 56 |   private def addToGradient(feats: Seq[Int], scale: Double, gradient: Array[Double]) {
 57 |     var i = 0;
 58 |     while (i < feats.size) {
 59 |       val feat = feats(i);
 60 |       gradient(feat) += 1.0 * scale;
 61 |       i += 1;
 62 |     }
 63 |   }
 64 | 
 65 |   def viterbiDecode(docGraph: DocumentGraph, scorer: PairwiseScorer): Array[Int] = {
 66 |     val (featsChart, scoresChart) = docGraph.featurizeIndexAndScoreNonPrunedUseCache(scorer);
 67 |     clusterType match {
 68 |       case "CLOSEST_FIRST" => {
 69 |         (0 until docGraph.size).map(i => {
 70 |           var nearest = i;
 71 |           for (j <- i-1 to 0 by -1) {
 72 |             if (nearest == i && scoresChart(i)(j) > logThreshold) {
 73 |               nearest = j;
 74 |             }
 75 |           }
 76 |           nearest;
 77 |         }).toArray;
 78 |       }
 79 |       case "BEST_FIRST" => {
 80 |         (0 until docGraph.size).map(i => {
 81 |           var best = i;
 82 |           var bestScore = Double.NegativeInfinity;
 83 |           for (j <- i-1 to 0 by -1) {
 84 |             if (scoresChart(i)(j) > logThreshold && scoresChart(i)(j) > bestScore) {
 85 |               best = j;
 86 |               bestScore = scoresChart(i)(j);
 87 |             }
 88 |           }
 89 |           best;
 90 |         }).toArray;
 91 |       }
 92 |       case _ => { // TRANSITIVE_CLOSURE
 93 |         var mapping = new HashMap[Int,Int]();
 94 |         var nextClusterIndex = 0;
 95 |         for (i <- 0 until docGraph.size) {
 96 |           var edgeAlreadyFound = false;
 97 |           for (j <- 0 until i) {
 98 |             if (scoresChart(i)(j) > logThreshold) {
 99 |               var antecedentCluster = mapping(j);
100 |               // Merge the two
101 |               if (edgeAlreadyFound && antecedentCluster != mapping(i)) {
102 |                 var newCluster = mapping(i);
103 |                 for (mentIdx <- mapping.keySet) {
104 |                   if (mapping(mentIdx) == antecedentCluster) {
105 |                     mapping(mentIdx) = newCluster;
106 |                   }
107 |                 }
108 |               } else {
109 |                 edgeAlreadyFound = true;
110 |                 mapping(i) = antecedentCluster;
111 |               }
112 |             }
113 |           }
114 |           if (!edgeAlreadyFound) {
115 |             mapping(i) = nextClusterIndex;
116 |             nextClusterIndex += 1;
117 |           }
118 |         }
119 |         (0 until docGraph.size).map(i => {
120 |           var backptr = i;
121 |           for (j <- 0 until i) {
122 |             if (mapping(j) == mapping(i)) {
123 |               backptr = j;
124 |             }
125 |           }
126 |           backptr;
127 |         }).toArray;
128 |       }
129 |     }
130 |   }
131 |   
132 |   def finishPrintStats() = {}
133 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/DocumentInferencerOracle.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.fig.basic.Indexer
 3 | 
 4 | class DocumentInferencerOracle extends DocumentInferencer {
 5 |   
 6 |   def getInitialWeightVector(featureIndexer: Indexer[String]): Array[Double] = Array.fill(featureIndexer.size())(0.0);
 7 |   
 8 |   def computeLikelihood(docGraph: DocumentGraph,
 9 |                         pairwiseScorer: PairwiseScorer,
10 |                         lossFcn: (CorefDoc, Int, Int) => Double) = {
11 |     0.0;
12 |   }
13 |   
14 |   def addUnregularizedStochasticGradient(docGraph: DocumentGraph,
15 |                                          pairwiseScorer: PairwiseScorer,
16 |                                          lossFcn: (CorefDoc, Int, Int) => Double,
17 |                                          gradient: Array[Double]) = {
18 |   }
19 |   
20 |   def viterbiDecode(docGraph: DocumentGraph,
21 |                     pairwiseScorer: PairwiseScorer): Array[Int] = {
22 |     val clustering = docGraph.getOraclePredClustering();
23 |     val resultSeq = for (i <- 0 until docGraph.size) yield {
24 |       val immediateAntecedentOrMinus1 = clustering.getImmediateAntecedent(i);
25 |       if (immediateAntecedentOrMinus1 == -1) {
26 |         i;
27 |       } else {
28 |         docGraph.getMentions.indexOf(immediateAntecedentOrMinus1);
29 |       }
30 |     }
31 |     resultSeq.toArray;
32 |   }
33 |   
34 |   def finishPrintStats() = {}
35 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/Feature.scala:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref
2 | 
3 | case class Feature(context: String, event: String, value: Double, basic: Boolean) {
4 |   val name = context + " >> " + event;
5 |   val contextAndTemplate = context + ":" + (if (basic) "basic" else "conj");
6 | };
7 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/GUtil.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import edu.berkeley.nlp.futile.util.Counter
  3 | import edu.berkeley.nlp.futile.util.Iterators
  4 | import scala.collection.mutable.ArrayBuffer
  5 | import scala.collection.JavaConverters._
  6 | import edu.berkeley.nlp.futile.math.SloppyMath
  7 | import scala.util.Sorting
  8 | import java.util.Collection
  9 | 
 10 | object GUtil {
 11 |   
 12 |   def fmt(mat: Array[Array[Double]]): String = {
 13 |     var str = "";
 14 |     for (i <- 0 until mat.size) {
 15 |       for (j <- 0 until mat(i).size) {
 16 |         str += GUtil.fmt(mat(i)(j)) + "\t";
 17 |       }
 18 |       str += "\n";
 19 |     }
 20 |     str;
 21 |   }
 22 |   
 23 | //  def fmt(col: Collection[Double]): String = {
 24 | //    if (col.size == 0) {
 25 | //      "[]"
 26 | //    } else {
 27 | //      "[" + col.foldLeft("")((curr, nextD) => curr + fmt(nextD) + ", ").dropRight(2) + "]";
 28 | //    }
 29 | //  }
 30 |   
 31 |   def fmt(d: Double): String = {
 32 |     if (d.isNaN) {
 33 |       "NaN";
 34 |     } else if (d.isPosInfinity) {
 35 |       "+Inf";
 36 |     } else if (d.isNegInfinity) {
 37 |       "-Inf";
 38 |     } else {
 39 |       if (d < 0) "-" + fmtPositiveNumber(-d) else fmtPositiveNumber(d);
 40 |     }
 41 |   }
 42 |   
 43 |   def fmtProb(d: Double): String = {
 44 |     fmtPositiveNumber(d);
 45 |   }
 46 |   
 47 |   def fmtPositiveNumber(d: Double): String = {
 48 |     require(d >= 0);
 49 |     if (d == 0) {
 50 |       "0";
 51 |     }
 52 |     if (d < 1e-20) {
 53 |       "tiny"
 54 |     } else if (d < 0.001) {
 55 |       val numPlacesToMove = Math.ceil(-Math.log(d)/Math.log(10)).toInt;
 56 |       "%1.1f".format(d * Math.pow(10, numPlacesToMove)) + "e-" + numPlacesToMove; 
 57 |     } else if (d < 10000) {
 58 |       "%1.3f".format(d);
 59 |     } else {
 60 |       val numPlacesToMove = Math.floor(Math.log(d)/Math.log(10)).toInt;
 61 |       "%1.1f".format(d / Math.pow(10, numPlacesToMove)) + "e" + numPlacesToMove;
 62 |     }
 63 |   }
 64 |   
 65 |   def fmtTwoDigitNumber(d: Double, numDecimalPlaces: Int): String = {
 66 |     ("%1." + numDecimalPlaces + "f").format(d);
 67 |   }
 68 |   
 69 |   def containsNaN(array: Array[Double]): Boolean = {
 70 |     var containsNaN = false;
 71 |     for (value <- array) {
 72 |       containsNaN = containsNaN || value.isNaN;
 73 |     }
 74 |     containsNaN;
 75 |   }
 76 |   
 77 |   def containsNaNOrNegInf(array: Array[Double]): Boolean = {
 78 |     var bad = false;
 79 |     for (value <- array) {
 80 |       bad = bad || value.isNaN || value.isNegInfinity;
 81 |     }
 82 |     bad;
 83 |   }
 84 | 
 85 |   def getNBest[A](stuff: Seq[A], scorer: (A) => Double, n: Int): Seq[(A, Double)] = {
 86 |     val counter = new Counter[A]();
 87 |     for (thing <- stuff) {
 88 |       counter.setCount(thing, scorer(thing));
 89 |     }
 90 |     val results = new ArrayBuffer[(A, Double)]();
 91 |     for (thing <- Iterators.able(counter.asPriorityQueue()).asScala) {
 92 |       if (results.size < n) {
 93 |         results += new Tuple2(thing, counter.getCount(thing));
 94 |       }
 95 |     }
 96 |     results;
 97 |   }
 98 |   
 99 |   def getTopNKeysSubCounter(counter: Counter[String], n: Int) = {
100 |     val newCounter = new Counter[String]();
101 |     val pq = counter.asPriorityQueue()
102 |     var numPrinted = 0;
103 |     while (pq.hasNext() && numPrinted < n) {
104 |       val obj = pq.next();
105 |       newCounter.setCount(obj, counter.getCount(obj));
106 |       numPrinted += 1;
107 |     }
108 |     newCounter;
109 |   }
110 |   
111 |   def normalizeiSoft(arr: Array[Double]): Boolean = {
112 |     var idx = 0;
113 |     var total = 0.0;
114 |     while (idx < arr.size) {
115 |       total += arr(idx);
116 |       idx += 1;
117 |     }
118 |     if (total <= 0.0) {
119 |       false;
120 |     } else {
121 |       idx = 0;
122 |       while (idx < arr.size) {
123 |         arr(idx) /= total;
124 |         idx += 1;
125 |       }
126 |       true;
127 |     }
128 |   }
129 |   
130 |   def normalizeiHard(arr: Array[Double]) {
131 |     var idx = 0;
132 |     var total = 0.0;
133 |     while (idx < arr.size) {
134 |       total += arr(idx);
135 |       idx += 1;
136 |     }
137 |     if (total <= 0.0) {
138 |       throw new RuntimeException("Bad total for normalizing: " + total);
139 |     }
140 |     idx = 0;
141 |     while (idx < arr.size) {
142 |       arr(idx) /= total;
143 |       idx += 1;
144 |     }
145 |   }
146 |   
147 |   def expAndNormalizeiHard(arr: Array[Double]) {
148 |     var idx = 0;
149 |     while (idx < arr.size) {
150 |       arr(idx) = Math.exp(arr(idx));
151 |       idx += 1;
152 |     }
153 |     normalizeiHard(arr);
154 |   }
155 |   
156 |   def renderMat[A](mat: Array[Array[A]]): String = {
157 |     mat.map(row => row.map(_.toString).reduce((c1, c2) => c1 + ", " + c2)).reduce((r1, r2) => r1 + "\n" + r2);
158 |   }
159 |   
160 |   def normalizei(vector: Array[Double]) {
161 |     val normalizer = vector.reduce(_ + _);
162 |     for (i <- 0 until vector.size) {
163 |       vector(i) /= normalizer;
164 |     }
165 |   }
166 |   
167 |   def logNormalizei(vector: Array[Double]) {
168 |     val normalizer = SloppyMath.logAdd(vector);
169 |     for (i <- 0 until vector.size) {
170 |       vector(i) -= normalizer;
171 |     }
172 |   }
173 |   
174 |   def logNormalizeiByRow(mat: Array[Array[Double]]) {
175 |     for (i <- 0 until mat.size) {
176 |       val normalizer = SloppyMath.logAdd(mat(i));
177 |       for (j <- 0 until mat(i).size) {
178 |         mat(i)(j) -= normalizer;
179 |       }
180 |     }
181 |   }
182 |   
183 |   def computeQuantile(nums: Array[Double], quantile: Double): Double = {
184 |     val numsCpy = new Array[Double](nums.size);
185 |     Array.copy(nums, 0, numsCpy, 0, nums.size);
186 |     Sorting.quickSort(numsCpy);
187 |     numsCpy((quantile * nums.size).toInt);
188 |   }
189 |   
190 |   def main(args: Array[String]) {
191 |     println(fmtProb(1.0));
192 |     println(fmtProb(0.01));
193 |     println(fmtProb(0.001));
194 |     println(fmtProb(0.0001));
195 |     println(fmtProb(0.00001));
196 |     println(fmtProb(0.000001));
197 |     println(fmtProb(0.0000001));
198 |     
199 |     println(fmtProb(0.000000000000000000000001));
200 |   }
201 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/Gender.java:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref;
2 | 
3 | public enum Gender {
4 |   MALE, FEMALE, NEUTRAL, UNKNOWN;
5 | }
6 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/MentionPropertyComputer.scala:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref
2 | 
3 | class MentionPropertyComputer(val ngComputer: NumberGenderComputer) {
4 | 
5 |   
6 |   
7 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/MentionType.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref;
 2 | 
 3 | 
 4 | public enum MentionType {
 5 | 
 6 |   PROPER(false), NOMINAL(false), PRONOMINAL(true), DEMONSTRATIVE(true);
 7 |   
 8 |   private boolean isClosedClass;
 9 |   
10 |   private MentionType(boolean isClosedClass) {
11 |     this.isClosedClass = isClosedClass;
12 |   }
13 |   
14 |   public boolean isClosedClass() {
15 |     return isClosedClass;
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/Number.java:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref;
2 | 
3 | 
4 | public enum Number {
5 |   SINGULAR, PLURAL, UNKNOWN;
6 | }
7 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/OraclePosteriorSampler.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.fig.prob.Dirichlet
 3 | 
 4 | object OraclePosteriorSampler {
 5 |   
 6 |   def sample(alphas: Array[Double], rng: java.util.Random): Array[Double] = {
 7 |     new Dirichlet(alphas).sample(rng);
 8 |   }
 9 | 
10 |   def randomPosterior(domainSize: Int, specialIndex: Int, rng: java.util.Random): Array[Double] = {    
11 |     val baseAlpha = 1.0;
12 |     val specialAlpha = if (domainSize == 2) {
13 |       2.1
14 |     } else if (domainSize == 5) {
15 |       3.5
16 |     } else {
17 |       throw new RuntimeException("Domain size " + domainSize + " doesn't have fitparameters");
18 |     }
19 |     val alphas = Array.fill(domainSize)(baseAlpha);
20 |     alphas(specialIndex) = specialAlpha;
21 |     sample(alphas, rng);
22 |   }
23 |   
24 |   def main(args: Array[String]) {
25 |     val rng = new java.util.Random(0);
26 | //    val alpha = 0.1;
27 | //    val specialAlpha = 0.3;
28 |     {
29 |       //    val alpha = 0.4;
30 |       //    val specialAlpha = 1.0;
31 |       val alpha = 1.0;
32 |       val specialAlpha = 2.1;
33 |       val totalSamples = 1000;
34 |       var numInversions = 0;
35 |       var totalInverted = 0.0;
36 |       var totalNoninverted = 0.0;
37 |       for (i <- 0 until totalSamples) {
38 |         val currSample = sample(Array(specialAlpha, alpha), rng).toSeq;
39 |         val max = currSample.reduce(Math.max(_, _));
40 |         if (currSample(0) < max - 1e-8) {
41 |           numInversions += 1;
42 |           totalInverted += max;
43 |         } else {
44 |           totalNoninverted += max;
45 |         }
46 |       }
47 |       println("Domain size 2");
48 |       println("Num inversions: " + numInversions + "/" + totalSamples);
49 |       println("Avg max if not inverted: " + totalNoninverted/(totalSamples - numInversions));
50 |       println("Avg max if inverted: " + totalInverted/numInversions);
51 |     }
52 |     
53 |     {
54 |       //    val alpha = 0.4;
55 |       //    val specialAlpha = 1.9;
56 |       val alpha = 1.0;
57 |       val specialAlpha = 3.5;
58 |       val totalSamples = 1000;
59 |       var numInversions = 0;
60 |       var totalInverted = 0.0;
61 |       var totalNoninverted = 0.0;
62 |       for (i <- 0 until totalSamples) {
63 |         val currSample = sample(Array(specialAlpha, alpha, alpha, alpha, alpha), rng).toSeq;
64 |         val max = currSample.reduce(Math.max(_, _));
65 |         if (currSample(0) < max - 1e-8) {
66 |           numInversions += 1;
67 |           totalInverted += max;
68 |         } else {
69 |           totalNoninverted += max;
70 |         }
71 |       }
72 |       println("Domain size 5");
73 |       println("Num inversions: " + numInversions + "/" + totalSamples);
74 |       println("Avg max if not inverted: " + totalNoninverted/(totalSamples - numInversions));
75 |       println("Avg max if inverted: " + totalInverted/numInversions);
76 |     }
77 |   }
78 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/OrderedClustering.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import scala.collection.mutable.HashMap
 3 | import scala.collection.JavaConverters._
 4 | import scala.collection.mutable.ArrayBuffer
 5 | 
 6 | class OrderedClustering(val clusters: Seq[Seq[Int]]) {
 7 |   // Elements must be consecutive integers from 0 up to n
 8 |   private val allIndicesSorted = clusters.foldLeft(new ArrayBuffer[Int])(_ ++ _).sorted; 
 9 |   require(allIndicesSorted.sameElements((0 until allIndicesSorted.size).toSeq), allIndicesSorted);
10 |   private val mentionToClusterMap = new HashMap[Int,Seq[Int]];
11 |   for (cluster <- clusters) {
12 |     for (i <- cluster) {
13 |       mentionToClusterMap.put(i, cluster);
14 |     }
15 |   }
16 |   
17 |   def getCluster(idx: Int) = mentionToClusterMap(idx);
18 |   
19 |   def isSingleton(idx: Int) = mentionToClusterMap(idx).size == 1;
20 |   
21 |   def startsCluster(idx: Int) = mentionToClusterMap(idx)(0) == idx;
22 |   
23 |   def areInSameCluster(idx1: Int, idx2: Int) = mentionToClusterMap(idx1).contains(idx2);
24 |   
25 |   def getImmediateAntecedent(idx: Int) = {
26 |     val cluster = mentionToClusterMap(idx);
27 |     val mentIdxInCluster = cluster.indexOf(idx);
28 |     if (mentIdxInCluster == 0) {
29 |       -1
30 |     } else {
31 |       cluster(mentIdxInCluster - 1);
32 |     }
33 |   }
34 |   
35 |   def getAllAntecedents(idx: Int) = {
36 |     val cluster = mentionToClusterMap(idx);
37 |     cluster.slice(0, cluster.indexOf(idx));
38 |   }
39 |   
40 |   def getAllConsequents(idx: Int) = {
41 |     val cluster = mentionToClusterMap(idx);
42 |     cluster.slice(cluster.indexOf(idx) + 1, cluster.size);
43 |   }
44 |   
45 |   
46 |   // Needed for output printing
47 |   def getClusterIdx(idx: Int) = {
48 |     var clusterIdx = 0;
49 |     for (i <- 0 until clusters.size) {
50 |       if (clusters(i).sameElements(mentionToClusterMap(idx))) {
51 |         clusterIdx = i;
52 |       }
53 |     }
54 |     clusterIdx;
55 |   }
56 |   
57 |   def getSubclustering(mentIdxsToKeep: Seq[Int]): OrderedClustering = {
58 |     val oldIndicesToNewIndicesMap = new HashMap[Int,Int]();
59 |     (0 until mentIdxsToKeep.size).map(i => oldIndicesToNewIndicesMap.put(mentIdxsToKeep(i), i));
60 |     val filteredConvertedClusters = clusters.map(cluster => cluster.filter(mentIdxsToKeep.contains(_)).map(mentIdx => oldIndicesToNewIndicesMap(mentIdx)));
61 |     val filteredConvertedClustersNoEmpties = filteredConvertedClusters.filter(cluster => !cluster.isEmpty); 
62 |     new OrderedClustering(filteredConvertedClustersNoEmpties);
63 |   }
64 | }
65 | 
66 | object OrderedClustering {
67 |   
68 |   def createFromClusterIds(clusterIds: Seq[Int]) = {
69 |     val mentIdAndClusterId = (0 until clusterIds.size).map(i => (i, clusterIds(i)));
70 |     val clustersUnsorted = mentIdAndClusterId.groupBy(_._2).values;
71 |     val finalClusters = clustersUnsorted.toSeq.sortBy(_.head).map(clusterWithClusterId => clusterWithClusterId.map(_._1));
72 |     new OrderedClustering(finalClusters.toSeq);
73 |   }
74 |   
75 |   def createFromBackpointers(backpointers: Seq[Int]) = {
76 |     var nextClusterID = 0;
77 |     val clusters = new ArrayBuffer[ArrayBuffer[Int]]();
78 |     val mentionToCluster = new HashMap[Int,ArrayBuffer[Int]]();
79 |     for (i <- 0 until backpointers.size) {
80 |       if (backpointers(i) == i) {
81 |         val cluster = ArrayBuffer(i);
82 |         clusters += cluster;
83 |         mentionToCluster.put(i, cluster); 
84 |       } else {
85 |         val cluster = mentionToCluster(backpointers(i));
86 |         cluster += i;
87 |         mentionToCluster.put(i, cluster);
88 |       }
89 |     }
90 |     new OrderedClustering(clusters);
91 |   }
92 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/OrderedClusteringBound.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import scala.collection.JavaConverters._
 3 | 
 4 | class OrderedClusteringBound(val ments: Seq[Mention],
 5 |                              val clustering: OrderedClustering) {
 6 |   
 7 |   def postprocessForConll(): OrderedClusteringBound = {
 8 |     val mentIdxsToKeep = (0 until ments.size).filter(i => !clustering.isSingleton(i));
 9 |     new OrderedClusteringBound(mentIdxsToKeep.map(i => ments(i)), clustering.getSubclustering(mentIdxsToKeep));
10 |   }
11 |   
12 |   def getClusterIdx(ment: Mention) = {
13 |     clustering.getClusterIdx(ments.indexOf(ment));
14 |   }
15 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/PairwiseIndexingFeaturizer.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.fig.basic.Indexer
 3 | import edu.berkeley.nlp.futile.util.Counter
 4 | import edu.berkeley.nlp.futile.util.Logger
 5 | import scala.collection.JavaConverters._
 6 | import edu.berkeley.nlp.coref.sem.QueryCountsBundle
 7 | 
 8 | trait PairwiseIndexingFeaturizer {
 9 |   
10 |   def getIndexer(): Indexer[String];
11 | 
12 |   def getIndex(feature: String, addToFeaturizer: Boolean): Int;
13 |   
14 |   def getQueryCountsBundle: QueryCountsBundle;
15 | 
16 |   def featurizeIndex(docGraph: DocumentGraph, currMentIdx: Int, antecedentIdx: Int, addToFeaturizer: Boolean): Seq[Int];
17 |   
18 |   def printFeatureTemplateCounts() {
19 |     val indexer = getIndexer();
20 |     val templateCounts = new Counter[String]();
21 |     for (i <- 0 until indexer.size) {
22 |       val currFeatureName = indexer.get(i);
23 |       val currFeatureTemplateStop = currFeatureName.indexOf("=");
24 |       if (currFeatureTemplateStop == -1) {
25 |         Logger.logss("No =: " + currFeatureName);
26 |       } else {
27 |         templateCounts.incrementCount(currFeatureName.substring(0, currFeatureTemplateStop), 1.0);
28 |       }
29 |     }
30 |     templateCounts.keepTopNKeys(200);
31 |     if (templateCounts.size > 200) {
32 |       Logger.logss("Not going to print more than 200 templates");
33 |     }
34 |     templateCounts.keySet().asScala.toSeq.sorted.foreach(template => Logger.logss(template + ": " + templateCounts.getCount(template).toInt));
35 |   }
36 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/PairwiseLossFunctions.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | import edu.berkeley.nlp.futile.util.Logger
 3 | 
 4 | object PairwiseLossFunctions {
 5 |   
 6 |   val noLoss = (doc: CorefDoc, ment: Int, ant: Int) => 0.0;
 7 |   
 8 |   val precisionLoss = (doc: CorefDoc, ment: Int, ant: Int) => {
 9 |     val oracleCluster = doc.getOraclePredClustering;
10 |     // Only penalize if we predict a link and it's incorrect. N.B. because of our
11 |     // conventions, ment == ant if we're predicting nonanaphoricity.
12 |     if (!oracleCluster.areInSameCluster(ment, ant)) 1.0 else 0.0;
13 |   };
14 |   
15 |   val recallLoss = (doc: CorefDoc, ment: Int, ant: Int) => {
16 |     val oracleCluster = doc.getOraclePredClustering;
17 |     // Only penalize when we were supposed to make a prediction and we didn't or it
18 |     // was wrong.
19 |     if (!oracleCluster.startsCluster(ment) && !oracleCluster.areInSameCluster(ment, ant)) 1.0 else 0.0;
20 |   };
21 |   
22 |   // 1) Penalty when we link up someone who should start a new cluster (boosting this helps precision)
23 |   // 2) Penalty when we start a new cluster with someone who should link up (boosting this helps recall)
24 |   // 3) Penalty when we mess up a link
25 |   val customLoss = (falseLinkScore: Double, falseNewScore: Double, wrongLinkScore: Double) => {
26 |     (doc: CorefDoc, ment: Int, ant: Int) => {
27 |       val oracleCluster = doc.getOraclePredClustering;
28 |       if (oracleCluster.startsCluster(ment) && ment != ant) {
29 |         falseLinkScore;
30 |       } else if (!oracleCluster.startsCluster(ment) && ment == ant) {
31 |         falseNewScore;
32 |       } else if (!oracleCluster.startsCluster(ment) && !oracleCluster.areInSameCluster(ment, ant)) {
33 |         wrongLinkScore;
34 |       } else {
35 |         0.0;
36 |       };
37 |     }
38 |   };
39 |   
40 |   // interpolationFactor interpolates between customLoss and a version of customLoss where
41 |   // everything is weighted by the size of the gold cluster (should hypothetically be more
42 |   // MUC-oriented than our current loss function)
43 |   val weightedCustomLoss = (falseLinkScore: Double, falseNewScore: Double, wrongLinkScore: Double, interpolationFactor: Double) => {
44 |     (doc: CorefDoc, ment: Int, ant: Int) => {
45 |       val oracleCluster = doc.getOraclePredClustering;
46 |       val oracleClusterSize = oracleCluster.getCluster(ment).size;
47 |       val scalingFactor = (1 - interpolationFactor + interpolationFactor * oracleClusterSize);
48 |       if (oracleCluster.startsCluster(ment) && ment != ant) {
49 |         falseLinkScore * scalingFactor;
50 |       } else if (!oracleCluster.startsCluster(ment) && ment == ant) {
51 |         falseNewScore * scalingFactor;
52 |       } else if (!oracleCluster.startsCluster(ment) && !oracleCluster.areInSameCluster(ment, ant)) {
53 |         wrongLinkScore * scalingFactor;
54 |       } else {
55 |         0.0;
56 |       };
57 |     }
58 |   }
59 |   
60 |   def apply(x: String) = getLossFcn(x);
61 |   
62 |   def getLossFcn(name: String): (CorefDoc, Int, Int) => Double = {
63 |     if (name == "noLoss") {
64 |       noLoss;
65 |     } else if (name == "precisionLoss") {
66 |       precisionLoss;
67 |     } else if (name == "recallLoss") {
68 |       recallLoss;
69 |     } else if (name.startsWith("customLoss")) {
70 |       val params = name.split("-");
71 |       require(params.size == 4);
72 |       customLoss(params(1).toDouble, params(2).toDouble, params(3).toDouble);
73 |     } else if (name.startsWith("weightedCustomLoss")) {
74 |       val params = name.split("-");
75 |       require(params.size == 5);
76 |       weightedCustomLoss(params(1).toDouble, params(2).toDouble, params(3).toDouble, params(4).toDouble);
77 |     } else {
78 |       throw new RuntimeException("Unsupported");
79 |     }
80 |   }
81 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/PairwiseScorer.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | 
 3 | @SerialVersionUID(1L)
 4 | class PairwiseScorer(val featurizer: PairwiseIndexingFeaturizer, val weights: Array[Double]) extends Serializable {
 5 |   
 6 |   def numWeights = weights.size
 7 |   
 8 |   def scoreIndexedFeats(feats: Seq[Int]): Double = {
 9 |     var featIdx = 0;
10 |     var featTotal = 0.0;
11 |     while (featIdx < feats.size) {
12 |       featTotal += weights(feats(featIdx));
13 |       featIdx += 1;
14 |     }
15 |     featTotal;
16 |   }
17 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/PronounDictionary.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref
  2 | import scala.collection.mutable.HashMap
  3 | 
  4 | object PronounDictionary {
  5 |   val firstPersonPronouns = Set("i", "me", "myself", "mine", "my", "we", "us", "ourself", "ourselves", "ours", "our");
  6 |   val secondPersonPronouns = Set("you", "yourself", "yours", "your", "yourselves");
  7 |   val thirdPersonPronouns = Set("he", "him", "himself", "his", "she", "her", "herself", "hers", "her", "it", "itself", "its", "one", "oneself", "one's", "they", "them", "themself", "themselves", "theirs", "their", "they", "them", "'em", "themselves");
  8 |   val otherPronouns = Set("who", "whom", "whose", "where", "when","which");
  9 |   
 10 |   val demonstratives = Set("this", "that", "these", "those");
 11 |   
 12 |   // Borrowed from Stanford
 13 |   val singularPronouns = Set("i", "me", "myself", "mine", "my", "yourself", "he", "him", "himself", "his", "she", "her", "herself", "hers", "her", "it", "itself", "its", "one", "oneself", "one's");
 14 |   val pluralPronouns = Set("we", "us", "ourself", "ourselves", "ours", "our", "yourself", "yourselves", "they", "them", "themself", "themselves", "theirs", "their");
 15 |   val malePronouns = Set("he", "him", "himself", "his");
 16 |   val femalePronouns = Set("her", "hers", "herself", "she");
 17 |   val neutralPronouns = Set("it", "its", "itself", "where", "here", "there", "which");
 18 |   
 19 |   
 20 |   val allPronouns = firstPersonPronouns ++ secondPersonPronouns ++ thirdPersonPronouns ++ otherPronouns;
 21 | 
 22 |   // Constructed based on Stanford's Dictionaries class
 23 |   val canonicalizations = new HashMap[String,String]();
 24 |   canonicalizations.put("i", "i");
 25 |   canonicalizations.put("me", "i");
 26 |   canonicalizations.put("my", "i");
 27 |   canonicalizations.put("myself", "i");
 28 |   canonicalizations.put("mine", "i");
 29 |   canonicalizations.put("you", "you");
 30 |   canonicalizations.put("your", "you");
 31 |   canonicalizations.put("yourself", "you");
 32 |   canonicalizations.put("yourselves", "you");
 33 |   canonicalizations.put("yours", "you");
 34 |   canonicalizations.put("he", "he");
 35 |   canonicalizations.put("him", "he");
 36 |   canonicalizations.put("his", "he");
 37 |   canonicalizations.put("himself", "he");
 38 |   canonicalizations.put("she", "she");
 39 |   canonicalizations.put("her", "she");
 40 |   canonicalizations.put("herself", "she");
 41 |   canonicalizations.put("hers", "she");
 42 |   
 43 |   canonicalizations.put("we", "we");
 44 |   canonicalizations.put("us", "we");
 45 |   canonicalizations.put("our", "we");
 46 |   canonicalizations.put("ourself", "we");
 47 |   canonicalizations.put("ourselves", "we");
 48 |   canonicalizations.put("ours", "we");
 49 |   canonicalizations.put("they", "they");
 50 |   canonicalizations.put("them", "they");
 51 |   canonicalizations.put("their", "they");
 52 |   canonicalizations.put("themself", "they");
 53 |   canonicalizations.put("themselves", "they");
 54 |   canonicalizations.put("theirs", "they");
 55 |   canonicalizations.put("'em", "they");
 56 |   canonicalizations.put("it", "it");
 57 |   canonicalizations.put("itself", "it");
 58 |   canonicalizations.put("its", "it");
 59 |   canonicalizations.put("one", "one");
 60 |   canonicalizations.put("oneself", "one");
 61 |   canonicalizations.put("one's", "one");
 62 |   
 63 |   canonicalizations.put("this", "this");
 64 |   canonicalizations.put("that", "that");
 65 |   canonicalizations.put("these", "these");
 66 |   canonicalizations.put("those", "those");
 67 |   canonicalizations.put("which", "which");
 68 |   canonicalizations.put("who", "who");
 69 |   canonicalizations.put("whom", "who");
 70 | //  canonicalizations.put("where", "where");
 71 | //  canonicalizations.put("whose", "whose");
 72 |   // This entry is here just to make results consistent with earlier ones
 73 |   // on our very small dev set
 74 |   canonicalizations.put("thy", "thy");
 75 |   canonicalizations.put("y'all", "you");
 76 |   canonicalizations.put("you're", "you");
 77 |   canonicalizations.put("you'll", "you");
 78 |   canonicalizations.put("'s", "'s");
 79 |   
 80 |   def isPronLc(str: String): Boolean = {
 81 |     allPronouns.contains(str.toLowerCase());
 82 |   }
 83 |   
 84 |   def isDemonstrative(str: String): Boolean = {
 85 |     demonstratives.contains(str.toLowerCase());
 86 |   }
 87 |   
 88 |   def canonicalize(str: String): String = {
 89 |     if (!canonicalizations.contains(str.toLowerCase())) {
 90 |       "";
 91 |     } else {
 92 |       canonicalizations(str.toLowerCase());
 93 |     }
 94 |   }
 95 |   
 96 |   def main(args: Array[String]) {
 97 |     println(PronounDictionary.canonicalizations("'em"));
 98 |     println(PronounDictionary.isPronLc("them"));
 99 |     println(PronounDictionary.isPronLc("Them"));
100 |     println(PronounDictionary.isPronLc("NotThem"));
101 |   }
102 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/PruningStrategy.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref
 2 | 
 3 | case class PruningStrategy(val strategy: String) {
 4 |   
 5 |   def getDistanceArgs(): (Int, Int) = {
 6 |     require(strategy.startsWith("distance"));
 7 |     val splitStrategy = strategy.split(":");
 8 |     (splitStrategy(1).toInt, splitStrategy(2).toInt);
 9 |   }
10 |   
11 |   def getLogRatio(): Double = {
12 |     require(strategy.startsWith("c2flogratio"));
13 |     strategy.substring(strategy.indexOf(":") + 1).toDouble;
14 |   }
15 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/WordNetInterfacer.scala:
--------------------------------------------------------------------------------
  1 | //package edu.berkeley.nlp.coref
  2 | //import java.net.URL
  3 | //import edu.mit.jwi.item.IIndexWord
  4 | //import edu.mit.jwi.item.IWord
  5 | //import edu.mit.jwi.item.IWordID
  6 | //import edu.mit.jwi.Dictionary
  7 | //import edu.mit.jwi.item.POS
  8 | //import edu.mit.jwi.morph.WordnetStemmer
  9 | //import edu.mit.jwi.item.ISynset
 10 | //import scala.collection.JavaConverters._
 11 | //import edu.mit.jwi.item.Pointer
 12 | //import scala.collection.mutable.ArrayBuffer
 13 | //import scala.collection.mutable.HashSet
 14 | //import edu.mit.jwi.RAMDictionary
 15 | //import edu.mit.jwi.data.ILoadPolicy
 16 | //import edu.berkeley.nlp.futile.util.Logger
 17 | //
 18 | //class WordNetInterfacer(path: String) {
 19 | //  val url = new URL("file", null, path);
 20 | //  
 21 | ////  val dict = new Dictionary(url);
 22 | ////  dict.open();
 23 | //  val originalDict = new Dictionary(url);
 24 | //  originalDict.open();
 25 | //  val dict = new RAMDictionary(originalDict, ILoadPolicy.IMMEDIATE_LOAD);
 26 | //  dict.open();
 27 | //  
 28 | //  val wns = new WordnetStemmer(dict);
 29 | //  
 30 | //  def getLemmas(head: String): Set[String] = {
 31 | //    getNounStemSet(head);
 32 | //  }
 33 | //  
 34 | //  def getSynonyms(head: String): Set[String] = {
 35 | //    getNounStemSet(head).flatMap((headStem: String) => {
 36 | //      val wordSynset = getWordSynset(headStem);
 37 | //      if (wordSynset != null) wordSynset.getWords().asScala.map(_.getLemma()) else Set[String]();
 38 | //    });
 39 | //  }
 40 | //  
 41 | //  def getHypernyms(head: String): Set[String] = {
 42 | //    val initialSynset = getNounStemSet(head).flatMap((headStem: String) => {
 43 | //      if (getWordSynset(headStem) != null) Set[ISynset](getWordSynset(headStem)) else Set[ISynset]();
 44 | //    }).toSet
 45 | //    getHypernyms(10, initialSynset).flatMap(_.getWords().asScala.map(_.getLemma())).toSet;
 46 | //  }
 47 | //  
 48 | //  def areSynonyms(firstHead: String, secondHead: String) = {
 49 | //    val stemsFirstHead = getNounStemSet(firstHead);
 50 | //    val stemsSecondHead = getNounStemSet(secondHead);
 51 | //    var isSynonym = false;
 52 | //    for (wordAStem <- stemsFirstHead) {
 53 | //      val wordASynset: ISynset = getWordSynset(wordAStem);
 54 | //      if (wordASynset != null) {
 55 | //        for (wordBStem <- stemsSecondHead) {
 56 | //          isSynonym = isSynonym || wordASynset.getWords().asScala.map(_.getLemma()).contains(wordBStem);
 57 | //        }
 58 | //      }
 59 | //    }
 60 | //    isSynonym
 61 | //  }
 62 | //  
 63 | //  def areHypernyms(head: String, possibleHypernym: String) = {
 64 | //    val stemsHead = getNounStemSet(head);
 65 | //    val stemsPossibleHypernym = getNounStemSet(possibleHypernym);
 66 | //    var isHypernym = false;
 67 | //    for (headStem <- stemsHead) {
 68 | //      val headSynset: ISynset = getWordSynset(headStem);
 69 | //      if (headSynset != null) {
 70 | //        // 10 levels in the tree should be enough for anybody...
 71 | //        val hypernyms = getHypernyms(10, Set(headSynset));
 72 | //        for(hypernym <- hypernyms){
 73 | //          val hypernymWords = hypernym.getWords();
 74 | //          for (i <- 0 until hypernymWords.size()) {
 75 | //            isHypernym = isHypernym || stemsPossibleHypernym.contains(hypernymWords.get(i).getLemma());
 76 | //          }
 77 | //        }
 78 | //      }
 79 | //    }
 80 | //    isHypernym
 81 | //  }
 82 | //  
 83 | //  private def getHypernyms(numLevelsToRecurse: Int, synsets: Set[ISynset]): HashSet[ISynset] = {
 84 | //    var synsetsThisLevel = new HashSet[ISynset]() ++ synsets;
 85 | //    var synsetsNextLevel = new HashSet[ISynset]();
 86 | //    val allSynsets = new HashSet[ISynset]();
 87 | //    for (i <- 0 until numLevelsToRecurse) {
 88 | //      if (!synsetsThisLevel.isEmpty) {
 89 | //        for (synset <- synsetsThisLevel) {
 90 | //          synsetsNextLevel ++= synset.getRelatedSynsets(Pointer.HYPERNYM).asScala.map(dict.getSynset(_));
 91 | //        }
 92 | //        // Don't visit nodes we've already been to
 93 | //        synsetsThisLevel = (synsetsNextLevel -- allSynsets);
 94 | //        allSynsets ++= synsetsNextLevel;
 95 | //        synsetsNextLevel = new HashSet[ISynset]();
 96 | //      }
 97 | //    }
 98 | //    allSynsets;
 99 | //  }
100 | //  
101 | //  private def getWordSynset(stemmedWord: String) = {
102 | //    val idxWord: IIndexWord = dict.getIndexWord(stemmedWord, POS.NOUN);
103 | //    if (idxWord != null) {
104 | //      val wordID: IWordID = idxWord.getWordIDs().get(0);
105 | //      val word: IWord = dict.getWord(wordID);
106 | //      word.getSynset();
107 | //    } else {
108 | //      null;
109 | //    }
110 | //  }
111 | //  
112 | //  private def getNounStemSet(head: String): Set[String] = {
113 | //    require(head != null && !head.isEmpty());
114 | //    var toReturn = Set[String]();
115 | //    try {
116 | //      toReturn = wns.findStems(head, POS.NOUN).asScala.toSet;
117 | //    } catch {
118 | //      case e: IllegalArgumentException => Logger.logss("IllegalArgumentException on " + head);
119 | //      case _ => Logger.logss("Badness"); System.exit(0);
120 | //    }
121 | //    toReturn;
122 | //  }
123 | //
124 | //}
125 | //
126 | //object WordNetInterfacer {
127 | //  
128 | //  
129 | //  
130 | //  
131 | //  def main(args: Array[String]) = {
132 | //    val path = "/Users/gdurrett/Documents/Berkeley/Utils/WNdb-3.0/dict/";
133 | //    val url = new URL("file", null, path);
134 | //    
135 | //    val dict = new Dictionary(url);
136 | //    dict.open();
137 | //    val idxWord: IIndexWord = dict.getIndexWord("dog", POS.NOUN);
138 | //    val wordID: IWordID = idxWord.getWordIDs().get(0);
139 | //    val word: IWord = dict.getWord(wordID);
140 | //    println("Id = " + wordID);
141 | //    println("Lemma = " + word.getLemma());
142 | //    println("Gloss = " + word.getSynset().getGloss());
143 | //    
144 | //    val synset: ISynset = word.getSynset();
145 | //    // iterate over words associated with the synset
146 | //    println("Synonyms");
147 | //    synset.getWords().asScala.foreach(word => println(word.getLemma()))
148 | //    
149 | //    val hypernyms = synset.getRelatedSynsets(Pointer.HYPERNYM);
150 | //    println("Hypernyms");
151 | //    for(sid <- hypernyms.asScala){
152 | //      println(sid + ": " + dict.getSynset(sid).getWords().asScala.map(_.getLemma()));
153 | //    }
154 | //    
155 | //    val wns = new WordnetStemmer(dict);
156 | //    println(wns.findStems("dogs", POS.NOUN));
157 | //    println(wns.findStems("DOGS", POS.NOUN));
158 | //    println(wns.findStems("Presidents", POS.NOUN));
159 | //    
160 | //    
161 | //    println("===============");
162 | //    val wordNetInterfacer = new WordNetInterfacer(path);
163 | //    println("Synonyms: dog cat? (should be false) " + wordNetInterfacer.areSynonyms("dog", "cat"));
164 | //    println("Synonyms: dog domestic_dog? (should be true) " + wordNetInterfacer.areSynonyms("dog", "domestic_dog"));
165 | //    
166 | //    
167 | //    println("Hypernyms: dog domestic_dog? (should be false) " + wordNetInterfacer.areHypernyms("dog", "domestic_dog"));
168 | //    println("Hypernyms: dog canine? (should be true) " + wordNetInterfacer.areHypernyms("dog", "canine"));
169 | //    println("Hypernyms: canine dog? (should be false) " + wordNetInterfacer.areHypernyms("canine", "dog"));
170 | //    
171 | //    
172 | //    println("===============");
173 | //    println(wordNetInterfacer.getLemmas("dog"));
174 | //    println(wordNetInterfacer.getSynonyms("dog"));
175 | //    println(wordNetInterfacer.getSynonyms("cat"));
176 | //    println(wordNetInterfacer.getHypernyms("cat"));
177 | //    
178 | //  }
179 | //}


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/bp/Domain.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.bp
 2 | 
 3 | case class Domain[T](val entries: Array[T]) {
 4 |   def size = entries.size
 5 |   
 6 |   def indexOf(entry: T) = entries.indexOf(entry);
 7 |   
 8 |   def value(idx: Int): T = entries(idx);
 9 |   
10 |   override def toString() = entries.foldLeft("")((str, entry) => str + entry + " ").dropRight(1);
11 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/bp/Node.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref.bp
  2 | import scala.collection.mutable.ArrayBuffer
  3 | import edu.berkeley.nlp.futile.util.Logger
  4 | import edu.berkeley.nlp.coref.GUtil
  5 | 
  6 | class Node[T](val domain: Domain[T]) {
  7 |   var factors = new ArrayBuffer[Factor]();
  8 |   var receivedMessages: Array[Array[Double]] = null;
  9 |   var sentMessages: Array[Array[Double]] = null;
 10 |   var cachedBeliefsOrMarginals: Array[Double] = Array.fill(domain.size)(0.0);
 11 |   
 12 |   def registerFactor(factor: Factor) {
 13 |     factors += factor;
 14 |   }
 15 |   
 16 |   // TODO: Do I need this null thing?
 17 |   def initializeReceivedMessagesUniform() {
 18 |     if (receivedMessages == null) {
 19 |       receivedMessages = new Array[Array[Double]](factors.size);
 20 |     } else {
 21 |       for (i <- 0 until receivedMessages.size) {
 22 |         receivedMessages(i) = null;
 23 |       }
 24 |     }
 25 |   }
 26 |   
 27 |   // This is just here so we can let things be null...At some point, it was a problem because
 28 |   // the received messages remember which factors sent them, so clearing them for some reason
 29 |   // caused problems (maybe writing the value 1.0 was problematic when we weren't clearing the
 30 |   // received messages on the other end?). Can probably get rid of this somehow and just do the
 31 |   // obvious thing of initializing messages to 1.0.
 32 |   def receivedMessageValue(i: Int, j: Int): Double = {
 33 |     if (receivedMessages(i) == null) {
 34 |       1.0;
 35 |     } else {
 36 |       receivedMessages(i)(j);
 37 |     }
 38 |   }
 39 |   
 40 |   def receiveMessage(factor: Factor, message: Array[Double]) {
 41 |     require(receivedMessages != null);
 42 |     require(!GUtil.containsNaN(message));
 43 |     val idx = factors.indexOf(factor);
 44 |     require(idx != -1 && idx < receivedMessages.size);
 45 |     if (message.toSeq.contains(0.0)) {
 46 |       Logger.logss("For domain: " + domain + ", bad received message: " + message.toSeq + " from " + factor.getClass());
 47 |       Logger.logss("Previous message: " + receivedMessages(factors.indexOf(factor)).toSeq);
 48 |       require(false);
 49 |     }
 50 |     if (message.reduce(_ + _) == 0) {
 51 |       Logger.logss("For domain: " + domain + ", bad received message: " + message.toSeq + " from " + factor.getClass());
 52 |       Logger.logss("Previous message: " + receivedMessages(factors.indexOf(factor)).toSeq);
 53 |       require(false);
 54 |     }
 55 |     require(message.size == domain.size);
 56 |     receivedMessages(factors.indexOf(factor)) = message;
 57 |   }
 58 |   
 59 |   def sendMessages() {
 60 | //    sendMessagesUseRealSpace();
 61 |     sendMessagesUseLogSpace();
 62 |   }
 63 |   
 64 |   def sendMessagesUseRealSpace() {
 65 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
 66 |       cachedBeliefsOrMarginals(i) = 1.0;
 67 |     }
 68 |     require(receivedMessages.size == factors.size);
 69 |     for (i <- 0 until receivedMessages.size) {
 70 |       var j = 0;
 71 |       while (j < cachedBeliefsOrMarginals.size) {
 72 |         cachedBeliefsOrMarginals(j) *= receivedMessageValue(i, j);
 73 |         j += 1;
 74 |       }
 75 |     }
 76 |     // Normalize beliefs
 77 |     val normalizedNonzero = GUtil.normalizeiSoft(cachedBeliefsOrMarginals);
 78 |     if (!normalizedNonzero) {
 79 |       Logger.logss("For domain: " + domain + ", received messages:" + receivedMessages.foldLeft("")((currStr, msg) => currStr + "\n" + msg.toSeq.toString))
 80 |       require(false);
 81 |     }
 82 |     if (sentMessages == null) {
 83 |       sentMessages = new Array[Array[Double]](factors.size);
 84 |     }
 85 |     for (i <- 0 until factors.length) {
 86 |       sentMessages(i) = new Array[Double](domain.size);
 87 |       var j = 0;
 88 |       while (j < domain.size) {
 89 |         val rmVal = receivedMessageValue(i, j);
 90 |         if (rmVal == 0) {
 91 |           sentMessages(i)(j) = 0;
 92 |         } else {
 93 |           sentMessages(i)(j) = cachedBeliefsOrMarginals(j)/rmVal;
 94 |         }
 95 |         j += 1;
 96 |       }
 97 |       factors(i).receiveMessage(this, sentMessages(i));
 98 |     }
 99 |   }
100 |   
101 |   def sendMessagesUseLogSpace() {
102 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
103 |       cachedBeliefsOrMarginals(i) = 0.0;
104 |     }
105 |     require(receivedMessages.size == factors.size);
106 |     for (i <- 0 until receivedMessages.size) {
107 |       var j = 0;
108 |       while (j < cachedBeliefsOrMarginals.size) {
109 |         cachedBeliefsOrMarginals(j) += Math.log(receivedMessageValue(i, j));
110 |         j += 1;
111 |       }
112 |     }
113 |     GUtil.logNormalizei(cachedBeliefsOrMarginals);
114 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
115 |       cachedBeliefsOrMarginals(i) = Math.exp(cachedBeliefsOrMarginals(i));
116 |     }
117 |     if (sentMessages == null) {
118 |       sentMessages = new Array[Array[Double]](factors.size);
119 |     }
120 |     for (i <- 0 until factors.length) {
121 |       sentMessages(i) = new Array[Double](domain.size);
122 |       var j = 0;
123 |       while (j < domain.size) {
124 |         val rmVal = receivedMessageValue(i, j);
125 |         if (rmVal == 0) {
126 |           sentMessages(i)(j) = 0;
127 |         } else {
128 |           sentMessages(i)(j) = cachedBeliefsOrMarginals(j)/rmVal;
129 |         }
130 |         j += 1;
131 |       }
132 |       factors(i).receiveMessage(this, sentMessages(i));
133 |     }
134 |   }
135 |   
136 |   def getMarginals(): Array[Double] = {
137 |     getMarginalsUseLogSpace();
138 |   }
139 |   
140 |   def getMarginalsUseLogSpace(): Array[Double] = {
141 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
142 |       cachedBeliefsOrMarginals(i) = 0.0;
143 |     }
144 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
145 |       for (j <- 0 until receivedMessages.size) {
146 |         cachedBeliefsOrMarginals(i) += Math.log(receivedMessageValue(j, i));
147 |       }
148 |     }
149 |     GUtil.logNormalizei(cachedBeliefsOrMarginals);
150 |     for (i <- 0 until cachedBeliefsOrMarginals.size) {
151 |       cachedBeliefsOrMarginals(i) = Math.exp(cachedBeliefsOrMarginals(i));
152 |     }
153 |     cachedBeliefsOrMarginals
154 |   }
155 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/lang/ArabicTreebankLanguagePack.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.lang;
 2 | 
 3 | import edu.berkeley.nlp.futile.treebank.AbstractTreebankLanguagePack;
 4 | 
 5 | 
 6 | public class ArabicTreebankLanguagePack extends AbstractTreebankLanguagePack {
 7 |   private static final String[] collinsPunctTags = {"PUNC"};
 8 | 
 9 |   private static final String[] pennPunctTags = {"PUNC"};
10 | 
11 |   private static final String[] pennPunctWords = {".","\"",",","-LRB-","-RRB-","-",":","/","?","_","*","%","!",">","-PLUS-","...",";","..","&","=","ر","'","\\","`","......"};
12 |   
13 |   private static final String[] pennSFPunctTags = {"PUNC"};
14 | 
15 |   private static final String[] pennSFPunctWords = {".", "!", "?"};
16 | 
17 |   /**
18 |    * The first 3 are used by the Penn Treebank; # is used by the
19 |    * BLLIP corpus, and ^ and ~ are used by Klein's lexparser.
20 |    * Chris deleted '_' for Arabic as it appears in tags (NO_FUNC).
21 |    * June 2006: CDM tested _ again with true (new) Treebank tags to see if it
22 |    * was useful for densening up the tag space, but the results were negative.
23 |    * Roger added + for Arabic but Chris deleted it again, since unless you've
24 |    * recoded determiners, it screws up DET+NOUN, etc.  (That is, it would only be useful if
25 |    * you always wanted to cut at the first '+', but in practice that is not viable, certainly
26 |    * not with the IBM ATB processing either.)
27 |    */
28 |   private static final char[] annotationIntroducingChars = {'-', '=', '|', '#', '^', '~'};
29 | 
30 |   /**
31 |    * This is valid for "BobChrisTreeNormalizer" conventions only. 
32 |    * wsg: "ROOT" should always be the first value. See {@link #startSymbol} in
33 |    * the parent class.
34 |    */
35 |   private static final String[] pennStartSymbols = {"ROOT"};
36 | 
37 | 
38 |   /**
39 |    * Returns a String array of punctuation tags for this treebank/language.
40 |    *
41 |    * @return The punctuation tags
42 |    */
43 |   @Override
44 |   public String[] punctuationTags() {
45 |     return pennPunctTags;
46 |   }
47 | 
48 | 
49 |   /**
50 |    * Returns a String array of punctuation words for this treebank/language.
51 |    *
52 |    * @return The punctuation words
53 |    */
54 |   @Override
55 |   public String[] punctuationWords() {
56 |     return pennPunctWords;
57 |   }
58 | 
59 | 
60 |   /**
61 |    * Returns a String array of sentence final punctuation tags for this
62 |    * treebank/language.
63 |    *
64 |    * @return The sentence final punctuation tags
65 |    */
66 |   @Override
67 |   public String[] sentenceFinalPunctuationTags() {
68 |     return pennSFPunctTags;
69 |   }
70 | 
71 |   /**
72 |    * Returns a String array of sentence final punctuation words for this
73 |    * treebank/language.
74 |    *
75 |    * @return The sentence final punctuation tags
76 |    */
77 |   public String[] sentenceFinalPunctuationWords() {
78 |     return pennSFPunctWords;
79 |   }
80 |   
81 |   /**
82 |    * Returns a String array of treebank start symbols.
83 |    *
84 |    * @return The start symbols
85 |    */
86 |   @Override
87 |   public String[] startSymbols() {
88 |     return pennStartSymbols;
89 |   }
90 | 
91 |   /**
92 |    * Returns the extension of treebank files for this treebank.
93 |    * This is "tree".
94 |    */
95 |   public String treebankFileExtension() {
96 |     return "tree";
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/lang/CorefLanguagePack.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.lang
 2 | 
 3 | trait CorefLanguagePack {
 4 |   def getMentionConstituentTypes: Seq[String];
 5 |   def getPronominalTags: Seq[String];
 6 |   def getProperTags: Seq[String];
 7 | }
 8 | 
 9 | class EnglishCorefLanguagePack extends CorefLanguagePack {
10 |   def getMentionConstituentTypes: Seq[String] = Seq("NP");
11 |   def getPronominalTags: Seq[String] = Seq("PRP", "PRP$");
12 |   def getProperTags: Seq[String] = Seq("NNP");
13 | }
14 | 
15 | class ChineseCorefLanguagePack extends CorefLanguagePack {
16 |   def getMentionConstituentTypes: Seq[String] = Seq("NP");
17 |   def getPronominalTags: Seq[String] = Seq("PN");
18 |   def getProperTags: Seq[String] = Seq("NR");
19 | }
20 | 
21 | class ArabicCorefLanguagePack extends CorefLanguagePack {
22 |   def getMentionConstituentTypes: Seq[String] = Seq("NP");
23 |   def getPronominalTags: Seq[String] = Seq("PRP", "PRP$");
24 |   def getProperTags: Seq[String] = Seq("NNP");
25 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/lang/Language.java:
--------------------------------------------------------------------------------
1 | package edu.berkeley.nlp.coref.lang;
2 | 
3 | 
4 | public enum Language {
5 |   ENGLISH, ARABIC, CHINESE;
6 | }
7 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/preprocess/NerDriver.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.preprocess;
 2 | 
 3 | import edu.berkeley.nlp.futile.util.Logger;
 4 | import edu.berkeley.nlp.futile.fig.basic.Option;
 5 | import edu.berkeley.nlp.futile.fig.exec.Execution;
 6 | 
 7 | 
 8 | public class NerDriver implements Runnable {
 9 |   @Option(gloss = "")
10 |   public static Mode mode = Mode.TRAIN;
11 | 
12 |   @Option(gloss = "Path to read/write the model")
13 |   public static String modelPath = "";
14 | 
15 |   // TRAINING_OPTIONS
16 |   @Option(gloss = "Path to CoNLL training set")
17 |   public static String trainPath = "";
18 |   @Option(gloss = "Training set size, -1 for all")
19 |   public static int trainSize = -1;
20 |   @Option(gloss = "Path to CoNLL test set")
21 |   public static String testPath = "";
22 |   @Option(gloss = "Test set size, -1 for all")
23 |   public static int testSize = -1;
24 |   
25 |   public static enum Mode {
26 |     TRAIN, RUN;
27 |   }
28 |   
29 |   public static void main(String[] args) {
30 |     NerDriver main = new NerDriver();
31 |     Execution.run(args, main); // add .class here if that class should receive command-line args
32 |   }
33 |   
34 |   public void run() {
35 |     Logger.setFig();
36 |     switch (mode) {
37 |       case TRAIN: NerSystem.trainNerSystem();
38 |         break;
39 |       case RUN:
40 |         // Read trees
41 | //        PennTreeReader.berkeleyParserBadTree
42 |         // Extract words and POS
43 |         
44 |         // 
45 |         break;
46 |     }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/preprocess/NerExample.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref.preprocess
  2 | import edu.berkeley.nlp.futile.fig.basic.Indexer
  3 | import scala.collection.mutable.ArrayBuffer
  4 | 
  5 | case class NerExample(val words: Seq[String], val poss: Seq[String], val goldLabels: Seq[String], val labelIndexer: Indexer[String]) {
  6 | 
  7 |   def featurize(featureIndexer: Indexer[String], addToIndexer: Boolean): Array[Array[Array[Int]]] = {
  8 |     Array.tabulate(words.size, labelIndexer.size)((tokIdx, labelIdx) => {
  9 |       require(labelIndexer.size > labelIdx);
 10 |       val labelName = labelIndexer.getObject(labelIdx);
 11 |       // Extract word and word shape features 
 12 |       val wordAt = (i: Int) => if (tokIdx + i < 0) "<<START>>" else if (tokIdx + i >= words.size) "<<END>>" else words(tokIdx + i); 
 13 |       val wordShapeAt = (i: Int) => NerExample.shapeFor(wordAt(i));
 14 |       val wordClassAt = (i: Int) => NerExample.classFor(wordAt(i));
 15 |       val posAt = (i: Int) => if (tokIdx + i < 0) "<<START>>" else if (tokIdx + i >= words.size) "<<END>>" else poss(tokIdx + i);
 16 | 
 17 |       val feats = new ArrayBuffer[Int]();
 18 |       val maybeAddFeat = (feat: String) => {
 19 |         val labeledFeat = labelName + ":" + feat;
 20 |         if (addToIndexer || featureIndexer.contains(labeledFeat)) feats += featureIndexer.getIndex(labeledFeat)
 21 |       }
 22 |       // Words
 23 |       maybeAddFeat("-1W=" + wordAt(-2));
 24 |       maybeAddFeat("-1W=" + wordAt(-1));
 25 |       maybeAddFeat("0W=" + wordAt(0));
 26 |       maybeAddFeat("1W=" + wordAt(1));
 27 |       maybeAddFeat("2W=" + wordAt(2));
 28 |       // POS
 29 |       maybeAddFeat("-2P=" + posAt(-2));
 30 |       maybeAddFeat("-1P=" + posAt(-1));
 31 |       maybeAddFeat("0P=" + posAt(0));
 32 |       maybeAddFeat("1P=" + posAt(1));
 33 |       maybeAddFeat("2P=" + posAt(2));
 34 |       // Shape
 35 |       maybeAddFeat("-2S=" + wordShapeAt(-2));
 36 |       maybeAddFeat("-1S=" + wordShapeAt(-1));
 37 |       maybeAddFeat("0S=" + wordShapeAt(0));
 38 |       maybeAddFeat("1S=" + wordShapeAt(1));
 39 |       maybeAddFeat("2S=" + wordShapeAt(2));
 40 |       // Class
 41 |       maybeAddFeat("-2C=" + wordClassAt(-1));
 42 |       maybeAddFeat("-1C=" + wordClassAt(-1));
 43 |       maybeAddFeat("0C=" + wordClassAt(0));
 44 |       maybeAddFeat("1C=" + wordClassAt(1));
 45 |       maybeAddFeat("2C=" + wordClassAt(1));
 46 |       // POS-POS conjunctions
 47 |       maybeAddFeat("-2-1P=" + posAt(-2) + "," + posAt(-1));
 48 |       maybeAddFeat("-10P=" + posAt(-1) + "," + posAt(0));
 49 |       maybeAddFeat("01P=" + posAt(0) + "," + posAt(1));
 50 |       maybeAddFeat("12P=" + posAt(1) + "," + posAt(2));
 51 |       //        // Word-word conjunctions
 52 |       //        maybeAddFeat("-2-1W=" + wordAt(-2) + "," + wordAt(-1));
 53 |       //        maybeAddFeat("-10W=" + wordAt(-1) + "," + wordAt(0));
 54 |       //        maybeAddFeat("01W=" + wordAt(0) + "," + wordAt(1));
 55 |       //        maybeAddFeat("12W=" + wordAt(1) + "," + wordAt(2));
 56 |       // Word-POS conjunctions
 57 |       maybeAddFeat("-2-1PW=" + posAt(-2) + "," + wordAt(-1));
 58 |       maybeAddFeat("-10PW=" + posAt(-1) + "," + wordAt(0));
 59 |       maybeAddFeat("01PW=" + posAt(0) + "," + wordAt(1));
 60 |       maybeAddFeat("12PW=" + posAt(1) + "," + wordAt(2));
 61 |       maybeAddFeat("-2-1WP=" + wordAt(-2) + "," + posAt(-1));
 62 |       maybeAddFeat("-10WP=" + wordAt(-1) + "," + posAt(0));
 63 |       maybeAddFeat("01WP=" + wordAt(0) + "," + posAt(1));
 64 |       maybeAddFeat("12WP=" + wordAt(1) + "," + posAt(2));
 65 |       // Word-class conjunctions
 66 |       maybeAddFeat("-2-1CW=" + wordClassAt(-2) + "," + wordAt(-1));
 67 |       maybeAddFeat("-10CW=" + wordClassAt(-1) + "," + wordAt(0));
 68 |       maybeAddFeat("01CW=" + wordClassAt(0) + "," + wordAt(1));
 69 |       maybeAddFeat("12CW=" + wordClassAt(1) + "," + wordAt(2));
 70 |       maybeAddFeat("-2-1WC=" + wordAt(-2) + "," + wordClassAt(-1));
 71 |       maybeAddFeat("-10WC=" + wordAt(-1) + "," + wordClassAt(0));
 72 |       maybeAddFeat("01WC=" + wordAt(0) + "," + wordClassAt(1));
 73 |       maybeAddFeat("12WC=" + wordAt(1) + "," + wordClassAt(2));
 74 |       feats.toArray;
 75 |     });
 76 |   }
 77 | }
 78 | 
 79 | object NerExample {
 80 |   
 81 |   def shapeFor(word: String) = {
 82 |     val result = new StringBuilder(word.length);
 83 |     var i = 0;
 84 |     while (i < word.length) {
 85 |       val c = word(i);
 86 |       val x = if (c.isLetter && c.isUpper) 'X' else if (c.isLetter) 'x' else if (c.isDigit) 'd' else c;
 87 |       if (result.length > 1 && (result.last == x) && result(result.length - 2) == x) {
 88 |         result += 'e'
 89 |       } else if (result.length > 1 && result.last == 'e' && result(result.length - 2) == x) {
 90 |         () // nothing
 91 |       } else {
 92 |         result += x;
 93 |       }
 94 |       i += 1;
 95 |     }
 96 |     result.toString
 97 |   }
 98 |     
 99 |   def classFor(word: String) = {
100 |     val sb = new StringBuilder;
101 |     val wlen = word.length();
102 |     val numCaps = (word: Seq[Char]).count(_.isUpper);
103 |     val hasDigit = word.exists(_.isDigit);
104 |     val hasDash = word.contains('-');
105 |     val hasLower = numCaps < wlen;
106 |     val ch0 = word.charAt(0);
107 |     val lowered = word.toLowerCase();
108 |     if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
109 |       if (numCaps == 1) {
110 |         sb.append("-INITC");
111 |       } else {
112 |         sb.append("-CAPS");
113 |       }
114 |     } else if (!Character.isLetter(ch0) && numCaps > 0) {
115 |       sb.append("-CAPS");
116 |     } else if (hasLower) {
117 |       sb.append("-LC");
118 |     }
119 | 
120 |     if (hasDigit) {
121 |       sb.append("-NUM");
122 |     }
123 |     if (hasDash) {
124 |       sb.append("-DASH");
125 |     }
126 |     if (lowered.endsWith("s") && wlen >= 3) {
127 |       // here length 3, so you don't miss out on ones like 80s
128 |       val ch2 = lowered.charAt(wlen - 2);
129 |       // not -ess suffixes or greek/latin -us, -is
130 |       if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
131 |         sb.append("-s");
132 |       }
133 |     } else if (word.length() >= 5 && !hasDash && !(hasDigit && numCaps > 0)) {
134 |       if (lowered.endsWith("ed")) {
135 |         sb.append("-ed");
136 |       } else if (lowered.endsWith("ing")) {
137 |         sb.append("-ing");
138 |       } else if (lowered.endsWith("ion")) {
139 |         sb.append("-ion");
140 |       } else if (lowered.endsWith("er")) {
141 |         sb.append("-er");
142 |       } else if (lowered.endsWith("est")) {
143 |         sb.append("-est");
144 |       } else if (lowered.endsWith("ly")) {
145 |         sb.append("-ly");
146 |       } else if (lowered.endsWith("ity")) {
147 |         sb.append("-ity");
148 |       } else if (lowered.endsWith("y")) {
149 |         sb.append("-y");
150 |       } else if (lowered.endsWith("al")) {
151 |         sb.append("-al");
152 |       }
153 |     }
154 |     sb.toString;
155 |   }
156 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/preprocess/Reprocessor.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.preprocess
 2 | 
 3 | import edu.berkeley.nlp.PCFGLA.CoarseToFineMaxRuleParser
 4 | import edu.berkeley.nlp.coref.ConllDoc
 5 | import scala.collection.JavaConverters._
 6 | import scala.collection.mutable.ArrayBuffer
 7 | import java.io.PrintWriter
 8 | import edu.berkeley.nlp.coref.ConllDocReader
 9 | import edu.berkeley.nlp.syntax.Tree
10 | import edu.berkeley.nlp.futile.util.Logger
11 | import java.util.Arrays
12 | import edu.berkeley.nlp.futile.fig.basic.IOUtils
13 | import edu.berkeley.nlp.coref.Chunk
14 | import edu.berkeley.nlp.coref.ConllDocWriter
15 | 
16 | object Reprocessor {
17 | 
18 |   def redoConllDocument(parser: CoarseToFineMaxRuleParser, backoffParser: CoarseToFineMaxRuleParser, nerSystem: NerSystem, docReader: ConllDocReader, inputPath: String, outputPath: String) {
19 |     val writer = IOUtils.openOutHard(outputPath);
20 |     val docs = docReader.readConllDocs(inputPath);
21 |     for (doc <- docs) {
22 |       Logger.logss("Reprocessing: " + doc.docID + " part " + doc.docPartNo);
23 |       val newPos = new ArrayBuffer[Seq[String]]();
24 |       val newParses = new ArrayBuffer[edu.berkeley.nlp.futile.syntax.Tree[String]]();
25 |       val newNerChunks = new ArrayBuffer[Seq[Chunk[String]]]();
26 |       for (sentIdx <- 0 until doc.words.size) {
27 |         if (sentIdx % 10 == 0) {
28 |           Logger.logss("Sentence " + sentIdx);
29 |         }
30 |         val sent = doc.words(sentIdx);
31 |         var parse = PreprocessingDriver.parse(parser, backoffParser, sent.asJava);
32 |         parse = if (parse.getYield().size() != sent.length) {
33 |           Logger.logss("Couldn't parse sentence: " + sent.toSeq);
34 |           Logger.logss("Using default parse");
35 |           convertFromFutileTree(doc.trees(sentIdx).constTree);
36 |         } else {
37 |           parse;
38 |         }
39 |         val posTags = parse.getPreTerminalYield().asScala.toArray;
40 |         newPos += posTags;
41 |         newParses += convertToFutileTree(parse);
42 |         val nerBioLabels = nerSystem.runNerSystem(sent.toArray, posTags);
43 |         newNerChunks += convertBioToChunks(nerBioLabels);
44 |       }
45 |       ConllDocWriter.writeIncompleteConllDoc(writer, doc.docID, doc.docPartNo, doc.words, newPos, newParses, doc.speakers, newNerChunks, doc.corefChunks);
46 |     }
47 |     writer.close();
48 |   }
49 |   
50 |   def convertBioToChunks(nerBioLabels: Seq[String]): Seq[Chunk[String]] = {
51 |     var lastNerStart = -1;
52 |     val chunks = new ArrayBuffer[Chunk[String]]();
53 |     for (i <- 0 until nerBioLabels.size) {
54 |       if (nerBioLabels(i).startsWith("B")) {
55 |         if (lastNerStart != -1) {
56 |           chunks += new Chunk[String](lastNerStart, i, "MISC");
57 |         }
58 |         lastNerStart = i;
59 |       } else if (nerBioLabels(i).startsWith("O")) {
60 |         if (lastNerStart != -1) {
61 |           chunks += new Chunk[String](lastNerStart, i, "MISC");
62 |           lastNerStart = -1;
63 |         }
64 |       }
65 |     }
66 |     chunks;
67 |   }
68 |   
69 |   def convertToFutileTree(slavTree: edu.berkeley.nlp.syntax.Tree[String]): edu.berkeley.nlp.futile.syntax.Tree[String] = {
70 |     new edu.berkeley.nlp.futile.syntax.Tree[String](slavTree.getLabel(), slavTree.getChildren().asScala.map(convertToFutileTree(_)).asJava);
71 |   }
72 |   
73 |   def convertFromFutileTree(myTree: edu.berkeley.nlp.futile.syntax.Tree[String]): edu.berkeley.nlp.syntax.Tree[String] = {
74 |     new edu.berkeley.nlp.syntax.Tree[String](myTree.getLabel(), myTree.getChildren().asScala.map(convertFromFutileTree(_)).asJava);
75 |   }
76 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/preprocess/SentenceSplitterTokenizerDriver.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.preprocess;
 2 | 
 3 | import java.io.PrintWriter;
 4 | import java.util.List;
 5 | 
 6 | import edu.berkeley.nlp.futile.tokenizer.PTBLineLexer;
 7 | import edu.berkeley.nlp.futile.util.Logger;
 8 | import edu.berkeley.nlp.futile.fig.basic.IOUtils;
 9 | import edu.berkeley.nlp.futile.fig.basic.Option;
10 | import edu.berkeley.nlp.futile.fig.exec.Execution;
11 | 
12 | 
13 | public class SentenceSplitterTokenizerDriver implements Runnable {
14 |   @Option(gloss = "")
15 |   public static Mode mode = Mode.TRAIN;
16 |   
17 |   @Option(gloss = "Raw text input")
18 |   public static String inputPath = "";
19 |   @Option(gloss = "")
20 |   public static String outputPath = "";
21 |   @Option(gloss = "")
22 |   public static boolean respectInputLineBreaks = false;
23 |   @Option(gloss = "")
24 |   public static boolean respectInputTwoLineBreaks = true;
25 |   
26 |   @Option(gloss = "Path to read/write the model")
27 |   public static String modelPath = "";
28 |   
29 |   // TRAINING OPTIONS
30 |   @Option(gloss = "Train the sentence splitter from the CoNLL data. If false, you " +
31 |   		"must provide your own data in the format\n" +
32 |   		"<word>. <word> <0 or 1>\n" +
33 |   		"where 0 indicates not a boundary and 1 indicates a boundary.")
34 |   public static boolean trainFromConll = true;
35 | 
36 |   @Option(gloss = "Path to training set")
37 |   public static String trainPath = "";
38 |   @Option(gloss = "Path to test set")
39 |   public static String testPath = "";
40 |   @Option(gloss = "Path to CoNLL training set")
41 |   public static String conllTrainPath = "";
42 |   @Option(gloss = "Training set size, -1 for all")
43 |   public static int conllTrainSize = -1;
44 |   @Option(gloss = "Path to CoNLL test set")
45 |   public static String conllTestPath = "";
46 |   @Option(gloss = "Test set size, -1 for all")
47 |   public static int conllTestSize = -1;
48 |   
49 |   public static enum Mode {
50 |     TRAIN, RUN;
51 |   }
52 |   
53 |   public static void main(String[] args) {
54 |     SentenceSplitterTokenizerDriver main = new SentenceSplitterTokenizerDriver();
55 |     Execution.run(args, main); // add .class here if that class should receive command-line args
56 |   }
57 |   
58 |   public void run() {
59 |     Logger.setFig();
60 |     switch (mode) {
61 |       case TRAIN: SentenceSplitter.trainSentenceSplitter();
62 |         break;
63 |       case RUN:
64 |         SentenceSplitter splitter = SentenceSplitter.loadSentenceSplitter(modelPath);
65 |         String[] lines = IOUtils.readLinesHard(inputPath).toArray(new String[0]);
66 |         String[] canonicalizedParagraphs = splitter.formCanonicalizedParagraphs(lines, respectInputLineBreaks, respectInputTwoLineBreaks);
67 |         String[] sentences = splitter.splitSentences(canonicalizedParagraphs);
68 |         String[][] tokenizedSentences = splitter.tokenize(sentences);
69 |         PrintWriter writer = IOUtils.openOutHard(outputPath);
70 |         for (String[] sentence : tokenizedSentences) {
71 |           for (int i = 0; i < sentence.length; i++) {
72 |             writer.print(sentence[i]);
73 |             if (i < sentence.length - 1) {
74 |               writer.print(" ");
75 |             }
76 |           }
77 |           writer.println();
78 |         }
79 |         writer.close();
80 |         break;
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/sem/QueryCountAnalyzer.scala:
--------------------------------------------------------------------------------
  1 | package edu.berkeley.nlp.coref.sem
  2 | 
  3 | import edu.berkeley.nlp.coref.DocumentGraph
  4 | import edu.berkeley.nlp.coref.MentionType
  5 | import edu.berkeley.nlp.util.Counter
  6 | import edu.berkeley.nlp.coref.PronounDictionary
  7 | import edu.berkeley.nlp.futile.util.Logger
  8 | import edu.berkeley.nlp.coref.Mention
  9 | 
 10 | object QueryCountAnalyzer {
 11 |   
 12 |   def renderSomeQueries(docGraph: DocumentGraph, predBackptrs: Seq[Int], queryCounts: QueryCountsBundle) = {
 13 |     var rendered = "";
 14 |     for (mentIdx <- 0 until docGraph.size) {
 15 |       if (isReferring(docGraph, mentIdx) &&
 16 |           isIncorrect(docGraph, mentIdx, predBackptrs(mentIdx)) &&
 17 |           hasReferringAntecedents(docGraph, mentIdx) &&
 18 |           !hasHeadMatchWithAntecedent(docGraph, mentIdx)) {
 19 |         val myHeadTc = docGraph.getMention(mentIdx).headString;
 20 |         val antIndicesCounts = (0 until mentIdx).filter(idx => isReferring(docGraph, idx)).map(idx => (idx, queryCounts.pairCounts.getCount(myHeadTc, docGraph.getMention(idx).headString)))
 21 |         // Top five scores and whether they're in
 22 |         val topAntIndicesCounts = antIndicesCounts.sortBy(_._2).reverse.slice(0, Math.min(5, antIndicesCounts.size));
 23 |         val goldRefAntecedents = docGraph.getGoldAntecedentsUnderCurrentPruning(mentIdx).filter(docGraph.getMention(_).mentionType != MentionType.PRONOMINAL);
 24 |         def renderMentionAndCount = (idx: Int, count: Double) => "[" + idx + ": " + renderMentionWithHead(docGraph.getMention(idx)) + ", " + count + ", " + 
 25 |                             (if (docGraph.getGoldAntecedentsUnderCurrentPruning(mentIdx).contains(idx)) "corr" else "wrong") + "] ";
 26 |         rendered += docGraph.corefDoc.rawDoc.docID + " " + mentIdx + ": " + renderMentionWithHead(docGraph.getMention(mentIdx)) + "\n  correct = ";
 27 |         for (goldRefAntecedent <- goldRefAntecedents) {
 28 |           val antHeadTc = docGraph.getMention(goldRefAntecedent).headString;
 29 |           rendered += renderMentionAndCount(goldRefAntecedent, queryCounts.pairCounts.getCount(myHeadTc, antHeadTc));
 30 |         }
 31 |         rendered += "\n  top five = ";
 32 |         for (i <- 0 until topAntIndicesCounts.size) {
 33 |           rendered += renderMentionAndCount(topAntIndicesCounts(i)._1, topAntIndicesCounts(i)._2);
 34 |         }
 35 |         rendered += "\n";
 36 |       }
 37 |     }
 38 |     rendered;
 39 |   }
 40 |   
 41 |   private def renderMentionWithHead(mention: Mention) = {
 42 |     val startIdx = Math.max(mention.headIdx - mention.startIdx - 2, 0);
 43 |     val endIdx = Math.min(mention.headIdx - mention.startIdx + 3, mention.words.size);
 44 |     val str = mention.words.slice(startIdx, mention.headIdx - mention.startIdx).foldLeft("")(_ + " " + _) + " _" + mention.words(mention.headIdx - mention.startIdx) +
 45 |       "_" + mention.words.slice(mention.headIdx - mention.startIdx + 1, endIdx).foldLeft("")(_ + " " + _)
 46 |     str.trim;
 47 |   }
 48 | 
 49 |   def renderQueryCountStats(docGraphs: Seq[DocumentGraph], allPredBackptrs: Seq[Seq[Int]], queryCounts: QueryCountsBundle) = {
 50 | //    var numTop = 0.0;
 51 | //    var numUnseen = 0.0;
 52 | //    for (i <- 0 until docGraphs.size) {
 53 | //      val docGraph = docGraphs(i);
 54 | //      for (j <- 0 until docGraph.size) {
 55 | //        if (isReferring(docGraph, j) &&
 56 | //            isIncorrect(docGraph, j, allPredBackptrs(i)(j)) &&
 57 | //            hasReferringAntecedents(docGraph, j) &&
 58 | //            !hasHeadMatchWithAntecedent(docGraph, j)) {
 59 | //          val myHeadTc = docGraph.getMention(j).headString;
 60 | //          val antHeads = (0 until j).filter(isReferring(docGraph, _)).map(docGraph.getMention(_).headString);
 61 | //          val topCountScore = queryCounts.pairCounts.getCount(myHeadTc, )
 62 | //        }
 63 | //      }
 64 | //    }
 65 |     ""
 66 |   }
 67 |   
 68 |   def renderTopFailedRecallHeadPairs(docGraphs: Seq[DocumentGraph], allPredBackptrs: Array[Array[Int]]) = {
 69 |     val headCounter = new Counter[String]();
 70 |     val headCounterMislead = new Counter[String]();
 71 |     for (i <- 0 until docGraphs.size) {
 72 |       val docGraph = docGraphs(i);
 73 |       for (j <- 0 until docGraph.size) {
 74 |         if (isReferring(docGraph, j) &&
 75 |             isIncorrect(docGraph, j, allPredBackptrs(i)(j)) &&
 76 |             hasReferringAntecedents(docGraph, j) &&
 77 |             hasHeadMatchWithAntecedent(docGraph, j)) {
 78 |           if (hasHeadMatchWithPrediction(docGraph, j, allPredBackptrs(i)(j))) {
 79 |             headCounterMislead.incrementCount(docGraph.getMention(j).headStringLc, 1.0);
 80 |           }
 81 |           headCounter.incrementCount(docGraph.getMention(j).headStringLc, 1.0);
 82 |         }
 83 |       }
 84 |     }
 85 |     var rendered = headCounter.size + " heads missed, " + headCounterMislead.size + " heads mislead\n";
 86 |     headCounter.keepTopNKeys(100);
 87 |     rendered += headCounter.toString + "\n";
 88 |     headCounterMislead.keepTopNKeys(100);
 89 |     rendered += headCounterMislead.toString + "\n";
 90 |     rendered;
 91 |   }
 92 |   
 93 |   // N.B. Referring here means nominal or proper, not coreferent
 94 |   def isReferring(docGraph: DocumentGraph, idx: Int) = {
 95 |     docGraph.getMention(idx).mentionType != MentionType.PRONOMINAL;
 96 |   }
 97 |   
 98 |   def isIncorrect(docGraph: DocumentGraph, idx: Int, backptr: Int) = {
 99 |     !docGraph.getGoldAntecedentsUnderCurrentPruning(idx).contains(backptr);
100 |   }
101 |   
102 |   def isPredictedNewCluster(docGraph: DocumentGraph, idx: Int, backptr: Int) = {
103 |     backptr == idx;
104 |   }
105 |   
106 |   def hasReferringAntecedents(docGraph: DocumentGraph, idx: Int) = {
107 |     val goldAntecedents = docGraph.getGoldAntecedentsUnderCurrentPruning(idx);
108 |     goldAntecedents.filter(i => docGraph.getMention(i).mentionType != MentionType.PRONOMINAL).size > 0;
109 |   }
110 |   
111 |   def hasHeadMatchWithAntecedent(docGraph: DocumentGraph, idx: Int) = {
112 |     val goldAntecedents = docGraph.getGoldAntecedentsUnderCurrentPruning(idx);
113 |     goldAntecedents.filter(i => docGraph.getMention(i).headStringLc == docGraph.getMention(idx).headStringLc).size > 0;
114 |   }
115 |   
116 |   def hasHeadMatchWithPrediction(docGraph: DocumentGraph, idx: Int, backptr: Int) = {
117 |     backptr != idx && docGraph.getMention(idx).headStringLc == docGraph.getMention(backptr).headStringLc;
118 |   }
119 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/berkeley/nlp/coref/sem/QueryCountsBundle.scala:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.nlp.coref.sem
 2 | import edu.berkeley.nlp.futile.fig.basic.IOUtils
 3 | import edu.berkeley.nlp.futile.util.Counter
 4 | import edu.berkeley.nlp.futile.util.Logger
 5 | import java.io.File
 6 | 
 7 | @SerialVersionUID(1L)
 8 | class QueryCountsBundle(val wordCounts: Counter[String],
 9 |                         val pairCounts: Counter[(String,String)]) extends Serializable {
10 | }
11 | 
12 | object QueryCountsBundle {
13 |   
14 |   def createFromFile(path: String) = {
15 |     val wordCounts = new Counter[String];
16 |     val pairCounts = new Counter[(String,String)];
17 |     val cleanedPath = if (path != path.trim) {
18 |       Logger.logss("WARNING: queryCountsFile has spurious spaces for some inexplicable reason; trimming");
19 |       path.trim;
20 |     } else {
21 |       path;
22 |     }
23 |     val lineItr = IOUtils.lineIterator(cleanedPath);
24 |     while (lineItr.hasNext) {
25 |       val line = lineItr.next;
26 |       val fields = line.split("\\s+");
27 |       if (fields.size == 2) {
28 |         wordCounts.incrementCount(fields(0), fields(1).toDouble);
29 |       } else if (fields.size == 3) {
30 |         pairCounts.incrementCount(fields(0) -> fields(1), fields(2).toDouble);
31 |       }
32 |     }
33 |     Logger.logss("Loaded " + pairCounts.size + " query counts from " + path);
34 |     new QueryCountsBundle(wordCounts, pairCounts);
35 |   }
36 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/harvard/nlp/moarcoref/AnimacyHelper.java:
--------------------------------------------------------------------------------
 1 | package edu.harvard.nlp.moarcoref;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileInputStream;
 5 | import java.io.IOException;
 6 | import java.io.InputStreamReader;
 7 | import java.util.Arrays;
 8 | import java.util.HashSet;
 9 | import java.util.Set;
10 | 
11 | import edu.berkeley.nlp.coref.Mention;
12 | import edu.berkeley.nlp.coref.MentionType;
13 | 
14 | public class AnimacyHelper {
15 | 	
16 |   public static Set<String> animates;
17 |   public static Set<String> inanimates;
18 | 	  
19 | 	static {
20 | 			try {
21 | 				animates = getWordsFromFile(MiniDriver.animacyPath, false);
22 | 				inanimates = getWordsFromFile(MiniDriver.inanimacyPath, false);
23 | 			} catch (IOException e) {
24 | 				e.printStackTrace();
25 | 				System.exit(1);
26 | 			}
27 | 				
28 | 	}
29 | 	
30 | 	
31 | 	//////////////////////////////////////////////////////
32 | 	// implementation of some recasens features
33 | 	/////////////////////////////////////////////////////
34 | 	
35 | 	public static String getAnimacy(Mention ment) {
36 | 		String animacy = "UNKNOWN";
37 | 		String headString = ment.headString();
38 | 		String nerString = ment.nerString();
39 | 		Set<String> inanimateNers = new HashSet<String>(Arrays.asList(
40 | 				"LOCATION", "MONEY", "NUMBER", "PERCENT", "DATE", "TIME",
41 | 				"FAC", "GPE", "WEA", "ORG"));
42 | 		if (ment.mentionType() == MentionType.PRONOMINAL) {
43 | 			if (animates.contains(headString)) {
44 | 				animacy = "ANIMATE";
45 | 			} else if (inanimates.contains(headString)) {
46 | 				animacy = "INANIMATE";
47 | 			}
48 | 		} else if (nerString.equals("PERSON") || nerString.startsWith("PER")) {
49 | 			animacy = "ANIMATE";
50 | 		} else if (inanimateNers.contains(nerString)
51 | 				|| nerString.startsWith("LOC")) {
52 | 			animacy = "INANIMATE";
53 | 		}
54 | 		// if still unknown, use list
55 | 		if (ment.mentionType() != MentionType.PRONOMINAL
56 | 				&& animacy.equals("UNKNOWN")) {
57 | 			if (animates.contains(headString)) {
58 | 				animacy = "ANIMATE";
59 | 			} else if (inanimates.contains(headString)) {
60 | 				animacy = "INANIMATE";
61 | 			}
62 | 		}
63 | 		return animacy;
64 | 	}	
65 | 	
66 | 	// mostly stolen from Dictionaries.java in stanfordcorenlp.dcoref
67 | 	public static Set<String> getWordsFromFile(String filename, boolean lowercase) throws IOException{
68 | 		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
69 | 		Set<String> words = new HashSet<String>();
70 | 		while (reader.ready()){
71 | 			if (lowercase){
72 | 				words.add(reader.readLine().toLowerCase()); // readLine strips the trailing '\n' etc
73 | 			} else {
74 | 				words.add(reader.readLine());
75 | 			}
76 | 		}
77 | 		reader.close();
78 | 		return words;
79 | 	}	
80 | }


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/harvard/nlp/moarcoref/FeatureExtractor.scala:
--------------------------------------------------------------------------------
  1 | package edu.harvard.nlp.moarcoref;
  2 | 
  3 | import java.io.PrintWriter
  4 | 
  5 | import scala.collection.JavaConverters.mapAsScalaMapConverter
  6 | import scala.collection.immutable.TreeMap
  7 | 
  8 | import edu.berkeley.nlp.coref.CorefFeaturizerTrainer
  9 | import edu.berkeley.nlp.coref.CorefSystem
 10 | import edu.berkeley.nlp.coref.DocumentGraph
 11 | import edu.berkeley.nlp.coref.NumberGenderComputer
 12 | import edu.berkeley.nlp.coref.PairwiseIndexingFeaturizerJoint
 13 | import edu.berkeley.nlp.coref.PairwiseIndexingFeaturizer
 14 | import edu.berkeley.nlp.coref.sem.QueryCountsBundle
 15 | import edu.berkeley.nlp.futile.fig.basic.Indexer
 16 | import edu.berkeley.nlp.futile.util.Logger
 17 | 
 18 | object FeatureExtractor {
 19 |     
 20 |   def writeSeparatedFeatsAndOraclePredClustering(smaller:Boolean) {
 21 |     var pfx = (if (smaller) "SMALL" else "BIG");
 22 |     Logger.logss("Using conjType = " + MiniDriver.conjType);
 23 |     val numberGenderComputer = NumberGenderComputer.readBergsmaLinData(MiniDriver.numberGenderDataPath);
 24 |     // require(!MiniDriver.trainOnGold);
 25 | 
 26 |     var trainDocs = CorefSystem.loadCorefDocs(MiniDriver.trainPath, MiniDriver.trainSize, numberGenderComputer, MiniDriver.useGoldMentions);
 27 |     var trainDocGraphsOrigOrder = trainDocs.map(new DocumentGraph(_, true));
 28 |     var trainDocGraphs = if (MiniDriver.randomizeTrain) new scala.util.Random(0).shuffle(trainDocGraphsOrigOrder.sortBy(_.corefDoc.rawDoc.printableDocName)) else trainDocGraphsOrigOrder;   
 29 |     
 30 |     Logger.logss(trainDocGraphs.size + " many train docs");
 31 |     val totalMents = trainDocGraphs.foldLeft(0)((total, curr) => total + curr.size);
 32 |     val lexicalCounts = MoarLexicalCountsBundle.countLexicalItems(trainDocs, MiniDriver.lexicalFeatCutoff, MiniDriver.bilexicalFeatCutoff);
 33 |     val queryCounts: QueryCountsBundle = null;
 34 |     val featurizerTrainer = new CorefFeaturizerTrainer();
 35 |  
 36 |     // extract anaphoricity features
 37 |     var anaphFeatureIndexer = new Indexer[String]();
 38 |     anaphFeatureIndexer.getIndex(SeparatingFeaturizer.UnkFeatName);
 39 |     // last true parameter to function below means it's in anaphoricity mode
 40 |     var anaphFeaturizer = new SmallerSeparatingFeaturizer(anaphFeatureIndexer, MiniDriver.pairwiseFeats, MiniDriver.conjType, lexicalCounts, queryCounts, true); //anaphoricityMode=true    
 41 |     featurizerTrainer.featurizeBasic(trainDocGraphs, anaphFeaturizer);
 42 |     anaphFeaturizer.printFeatureTemplateCounts();
 43 |     // write our features to a file
 44 |     TextPickler.writeAnaphFeats(trainDocGraphs, pfx + "-" + MiniDriver.pairwiseFeats + "-" + "anaphTrainFeats.txt");
 45 |     
 46 |     // write anaph feature mapping
 47 |     val printerAnaph = new PrintWriter(pfx+"-"+MiniDriver.pairwiseFeats + "-" + "anaphMapping.txt");
 48 |     var invMap = anaphFeatureIndexer.getMap().asScala.map(_.swap); // asScala is magic
 49 |     var tmap = TreeMap(invMap.toSeq:_*); // sort the map
 50 |     for ((idx,str) <- tmap){ 
 51 |       printerAnaph.println(idx + " : " + str);
 52 |     }
 53 |     printerAnaph.flush();
 54 |     printerAnaph.close();
 55 |     
 56 |     // write oracle pred clustering for train
 57 |     TextPickler.writePredOracleClusterings(trainDocGraphs, pfx+"TrainOPCs.txt");
 58 |     
 59 |     // now do pairwise features
 60 |     trainDocGraphsOrigOrder = trainDocs.map(new DocumentGraph(_, true));
 61 |     trainDocGraphs = if (MiniDriver.randomizeTrain) new scala.util.Random(0).shuffle(trainDocGraphsOrigOrder.sortBy(_.corefDoc.rawDoc.printableDocName)) else trainDocGraphsOrigOrder;
 62 |     var pwFeatureIndexer = new Indexer[String]();
 63 |     pwFeatureIndexer.getIndex(PairwiseIndexingFeaturizerJoint.UnkFeatName);
 64 |     // below we set anaphoricityMode = false
 65 |     var pwFeaturizer:PairwiseIndexingFeaturizer = null;
 66 |     if (smaller){
 67 |       pwFeaturizer = new SmallerSeparatingFeaturizer(pwFeatureIndexer, MiniDriver.pairwiseFeats, MiniDriver.conjType, lexicalCounts, queryCounts, false); //anaphoricityMode=false
 68 |     } else{
 69 |       pwFeaturizer = new SeparatingFeaturizer(pwFeatureIndexer, MiniDriver.pairwiseFeats, MiniDriver.conjType, lexicalCounts, queryCounts, false); //anaphoricityMode=false
 70 |     }
 71 |     //var pwFeaturizer = new SeparatingFeaturizer(pwFeatureIndexer, MiniDriver.pairwiseFeats, MiniDriver.conjType, lexicalCounts, queryCounts, false); //anaphoricityMode=false
 72 |     featurizerTrainer.featurizeBasic(trainDocGraphs, pwFeaturizer);
 73 |     pwFeaturizer.printFeatureTemplateCounts;
 74 |     // write pairwise train features
 75 |     TextPickler.writePWFeats(trainDocGraphs, pwFeatureIndexer.size(), pfx + "-" + MiniDriver.pairwiseFeats + "-" + "pwTrainFeats.txt");
 76 | 
 77 |     // write pw feature mapping
 78 |     val printerPW = new PrintWriter(pfx+"-"+ MiniDriver.pairwiseFeats + "-" + "pwMapping.txt");
 79 |     invMap = pwFeatureIndexer.getMap().asScala.map(_.swap); // asScala is magic
 80 |     tmap = TreeMap(invMap.toSeq:_*); // sort the map
 81 |     for ((idx,str) <- tmap){
 82 |       printerPW.println(idx + " : " + str);
 83 |     }
 84 |     printerPW.flush();
 85 |     printerPW.close();    
 86 |     
 87 |     // hopefully helps with gc
 88 |     trainDocs = null;
 89 |     trainDocGraphsOrigOrder = null;
 90 |     trainDocGraphs = null;
 91 | 
 92 |     var devDocs = CorefSystem.loadCorefDocs(MiniDriver.devPath, MiniDriver.devSize, numberGenderComputer, MiniDriver.useGoldMentions);
 93 |     var devDocGraphs = devDocs.map(new DocumentGraph(_, false)).sortBy(_.corefDoc.rawDoc.printableDocName);
 94 |     featurizerTrainer.featurizeBasic(devDocGraphs, anaphFeaturizer); // dev docs already know they are dev docs so they don't add features
 95 |     TextPickler.writeAnaphFeats(devDocGraphs, pfx + "-" + MiniDriver.pairwiseFeats + "-" + "anaphDevFeats.txt");
 96 |     devDocGraphs = devDocs.map(new DocumentGraph(_, false)).sortBy(_.corefDoc.rawDoc.printableDocName);
 97 |     featurizerTrainer.featurizeBasic(devDocGraphs,pwFeaturizer);
 98 |     TextPickler.writePWFeats(devDocGraphs, pwFeatureIndexer.size(), pfx + "-" +  MiniDriver.pairwiseFeats + "-" + "pwDevFeats.txt");
 99 |     
100 |     // write dev oracle predicted clustering
101 |     TextPickler.writePredOracleClusterings(devDocGraphs, pfx+"DevOPCs.txt"); 
102 |        
103 |     // do test docs
104 |     devDocs = null;
105 |     devDocGraphs = null;
106 |     var testDocs = CorefSystem.loadCorefDocs(MiniDriver.testPath, MiniDriver.testSize, numberGenderComputer, MiniDriver.useGoldMentions);
107 |       
108 |     var testDocGraphs = testDocs.map(new DocumentGraph(_, false)).sortBy(_.corefDoc.rawDoc.printableDocName);
109 |     featurizerTrainer.featurizeBasic(testDocGraphs, anaphFeaturizer); // test docs already know they are test docs so they don't add features
110 |     TextPickler.writeAnaphFeats(testDocGraphs, pfx + "-" +  MiniDriver.pairwiseFeats + "-" + "anaphTestFeats.txt");
111 |     testDocGraphs = testDocs.map(new DocumentGraph(_, false)).sortBy(_.corefDoc.rawDoc.printableDocName);
112 |     featurizerTrainer.featurizeBasic(testDocGraphs,pwFeaturizer);
113 |     TextPickler.writePWFeats(testDocGraphs, pwFeatureIndexer.size(), pfx + "-" + MiniDriver.pairwiseFeats + "-" + "pwTestFeats.txt");  
114 |   } 
115 | }
116 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/harvard/nlp/moarcoref/MiniDriver.java:
--------------------------------------------------------------------------------
 1 | package edu.harvard.nlp.moarcoref;
 2 | 
 3 | import edu.berkeley.nlp.coref.lang.Language;
 4 | import edu.berkeley.nlp.futile.util.Logger;
 5 | import edu.berkeley.nlp.futile.fig.basic.Option;
 6 | import edu.berkeley.nlp.futile.fig.exec.Execution;
 7 | import edu.berkeley.nlp.coref.ConjType;
 8 | 
 9 | /*
10 |  * A minimal version of BCS's Driver.java
11 |  */
12 | public class MiniDriver implements Runnable {
13 |   
14 |   @Option(gloss = "Which experiment to run?")
15 |   public static Mode mode = Mode.SMALLER;
16 |   @Option(gloss = "Language choice")
17 |   public static Language lang = Language.ENGLISH;
18 |   
19 |   // DATA AND PATHS
20 |   @Option(gloss = "Path to number/gender data")
21 |   public static String numberGenderDataPath = "gender.data";
22 |   @Option(gloss = "Path to Stanford Coref's animate unigrams")
23 |   public static String animacyPath = "animate.unigrams.txt";
24 |   @Option(gloss = "Path to Stanford Coref's inanimate unigrams")
25 |   public static String inanimacyPath = "inanimate.unigrams.txt";  
26 |   @Option(gloss = "Path to training set")
27 |   public static String trainPath = "flat_train_2012";
28 |   @Option(gloss = "Training set size, -1 for all")
29 |   public static int trainSize = -1;
30 |   @Option(gloss = "Path to dev set")
31 |   public static String devPath = "flat_dev_2012";
32 |   @Option(gloss = "Dev set size, -1 for all")
33 |   public static int devSize = -1;  
34 |   @Option(gloss = "Path to test set")
35 |   public static String testPath = "flat_test_2012";
36 |   @Option(gloss = "Test set size, -1 for all")
37 |   public static int testSize = -1;
38 |   @Option(gloss = "Suffix to use for documents")
39 |   public static String docSuffix = "auto_conll";
40 |   @Option(gloss = "Randomize the order of train documents")
41 |   public static boolean randomizeTrain = true;
42 | 
43 |   @Option(gloss = "True if we should train on the documents with gold annotations, false if we should use auto annotations")
44 |   public static boolean trainOnGold = false;
45 |   @Option(gloss = "Use gold mentions.")
46 |   public static boolean useGoldMentions = false;
47 |  
48 |   @Option(gloss = "Features to use; default is SURFACE, write \"+FINAL\" for FINAL")
49 |   public static String pairwiseFeats = "";
50 |   @Option(gloss = "Conjunction type")
51 |   public static ConjType conjType = ConjType.CANONICAL;
52 |   @Option(gloss = "Cutoff below which lexical features fire POS tags instead")
53 |   public static int lexicalFeatCutoff = 20;
54 |   @Option(gloss = "Cutoff below which bilexical features fire backoff indicator feature")
55 |   public static int bilexicalFeatCutoff = 10;  
56 |   
57 |   
58 |   public static enum Mode {
59 |     SMALLER;
60 |   }
61 |   
62 |   public static void main(String[] args) {
63 |     MiniDriver main = new MiniDriver();
64 |     Execution.run(args, main); // add .class here if that class should receive command-line args
65 |   }
66 |   
67 |   public void run() {
68 |     Logger.setFig();
69 |     if (mode.equals(Mode.SMALLER)) {
70 |         FeatureExtractor.writeSeparatedFeatsAndOraclePredClustering(true);
71 |     } else {
72 |     	FeatureExtractor.writeSeparatedFeatsAndOraclePredClustering(false);
73 |     }
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/modifiedBCS/src/main/java/edu/harvard/nlp/moarcoref/TextPickler.scala:
--------------------------------------------------------------------------------
  1 | package edu.harvard.nlp.moarcoref
  2 | 
  3 | import edu.berkeley.nlp.coref.DocumentGraph
  4 | import java.io.PrintWriter
  5 | import scala.collection.mutable.HashSet
  6 | import scala.collection.mutable.TreeSet
  7 | 
  8 | object TextPickler {
  9 | 
 10 |   // we'll write in the following fmt. each doc will be on its own line. the line will start with the number of mentions
 11 |   // then will be feats_j0| .. |feats_jj|feats_{j+1}0|..|feats_{j+1}{j+1} etc. 
 12 |   def writeFeats(docGraphs: Seq[DocumentGraph], fiName: String){
 13 |     val pw = new PrintWriter(fiName);
 14 |     for (dg <- docGraphs){
 15 |       pw.print(dg.size);
 16 |       var i = 0;
 17 |       while (i < dg.size){
 18 |         var j = 0;
 19 |         while (j <= i){
 20 |           pw.print('|');
 21 |           val feats = dg.cachedFeats(i)(j);
 22 |           var k = 0;
 23 |           while (k < feats.length){
 24 |             pw.print(feats(k));
 25 |             if (k < feats.length - 1){
 26 |               pw.print(' ');
 27 |             }
 28 |             k += 1;
 29 |           }
 30 |           j += 1;
 31 |         }
 32 |         i += 1;
 33 |       }
 34 |       pw.println();
 35 |     }
 36 |     pw.close();
 37 |   }
 38 | 
 39 |   def writeFeats(docGraphs: Seq[DocumentGraph], fiName: String, prunedIndices: TreeSet[(Int, Int, Int)]) {
 40 |     val pw = new PrintWriter(fiName);
 41 |     for ((dg, d) <- docGraphs.zipWithIndex) {
 42 |       pw.print(dg.size);
 43 |       var i = 0;
 44 |       while (i < dg.size) {
 45 |         var j = 0;
 46 |         while (j <= i) {
 47 |           pw.print('|');
 48 |           if (!prunedIndices.contains((d, i, j))) {
 49 |             val feats = dg.cachedFeats(i)(j);
 50 |             var k = 0;
 51 |             while (k < feats.length) {
 52 |               pw.print(feats(k));
 53 |               if (k < feats.length - 1) {
 54 |                 pw.print(' ');
 55 |               }
 56 |               k += 1;
 57 |             }
 58 |           }
 59 |           j += 1;
 60 |         }
 61 |         i += 1;
 62 |       }
 63 |       pw.println();
 64 |     }
 65 |     pw.close();
 66 |   }  
 67 | 
 68 |   // just writes anaphoric feats for each mention
 69 |   def writeAnaphFeats(docGraphs: Seq[DocumentGraph], fiName: String){
 70 |     val pw = new PrintWriter(fiName);
 71 |     for (dg <- docGraphs){
 72 |       pw.print(dg.size);
 73 |       var i = 0;
 74 |       while (i < dg.size){
 75 |         pw.print('|');
 76 |         val feats = dg.cachedFeats(i)(i).sorted;
 77 |         var k = 0;
 78 |         while (k < feats.length){
 79 |           pw.print(feats(k));
 80 |           if (k < feats.length - 1){
 81 |             pw.print(' ');
 82 |             }
 83 |           k += 1;
 84 |         }        
 85 |         i += 1;
 86 |       }
 87 |       pw.println();
 88 |     }
 89 |     pw.close();
 90 |   }  
 91 | 
 92 |   def writePWFeats(docGraphs: Seq[DocumentGraph], biasFeatIdx:Int, fiName: String){
 93 |     val pw = new PrintWriter(fiName);
 94 |     for (dg <- docGraphs){
 95 |       pw.print(dg.size);
 96 |       var i = 0;
 97 |       while (i < dg.size){
 98 |         var j = 0;
 99 |         while (j < i){
100 |           pw.print('|');
101 |           val feats = dg.cachedFeats(i)(j);
102 |           var k = 0;
103 |           while (k < feats.length){
104 |             pw.print(feats(k));
105 |             if (k < feats.length - 1){
106 |               pw.print(' ');
107 |             }
108 |             k += 1;
109 |           }
110 |           j += 1;
111 |         }
112 |         // now just write one bias feature for non-anaphoric option
113 |         pw.print('|');
114 |         pw.print(biasFeatIdx); // don't really use biasFeat anymore, but it indicates total number of features
115 |         i += 1;
116 |       }
117 |       pw.println();
118 |     }
119 |     pw.close();
120 |   }  
121 |   
122 |   
123 |   // format will be a sequence of clusters separated by '|'. this can be used during training
124 |   // and also for loss fcns.
125 |   def writePredOracleClusterings(docGraphs: Seq[DocumentGraph], fiName: String){
126 |     val pw = new PrintWriter(fiName);
127 |     for (dg <- docGraphs){
128 |       for ((clust,clustIdx) <- dg.getOraclePredClustering.clusters.zipWithIndex){
129 |         if (clustIdx > 0){
130 |           pw.print('|');
131 |         }
132 |         val clustSize = clust.size;
133 |         for ((ment,mentIdx) <- clust.zipWithIndex){
134 |           pw.print(ment);
135 |           if (mentIdx < clustSize - 1){
136 |             pw.print(' ');
137 |           }
138 |         }
139 |       }
140 |       pw.println();
141 |     }
142 |     pw.close();
143 |   }
144 |   
145 |   def writeMentHeads(docGraphs: Seq[DocumentGraph],fiName: String, lowercase:Boolean = false){
146 |     val pw = new PrintWriter(fiName);
147 |     for (dg <- docGraphs){
148 |       pw.print(dg.size);
149 |       var i = 0;
150 |       while (i < dg.size){
151 |         pw.print('|');
152 |         if (lowercase){
153 |            pw.print(dg.corefDoc.predMentions(i).headStringLc);
154 |         } else {
155 |           pw.print(dg.corefDoc.predMentions(i).headString);
156 |         }
157 |         i += 1;
158 |       }
159 |       pw.println();
160 |     }
161 |     pw.close();
162 |   }   
163 |   
164 |   
165 |  def writeFullMentandCtx(docGraphs: Seq[DocumentGraph],fiName: String, lowercase:Boolean = false){
166 |     val pw = new PrintWriter(fiName);
167 |     for (dg <- docGraphs){
168 |       pw.print(dg.size);
169 |       var i = 0;
170 |       while (i < dg.size){
171 |         pw.print('|');
172 |         val ment = dg.corefDoc.predMentions(i);
173 |         if (lowercase){
174 |             pw.print(ment.contextWordOrPlaceholder(-1).toLowerCase() + " [");
175 |             pw.print(ment.spanToString.toLowerCase() + "] ");
176 |             pw.print(ment.contextWordOrPlaceholder(ment.words.size).toLowerCase);
177 |         } else {
178 |             pw.print(ment.contextWordOrPlaceholder(-1) + " [");
179 |             pw.print(dg.corefDoc.predMentions(i).spanToString + "] ");
180 |             pw.print(ment.contextWordOrPlaceholder(ment.words.size));
181 |         }
182 |         i += 1;
183 |       }
184 |       pw.println();
185 |     }
186 |     pw.close();
187 |   } 
188 |   
189 | }
190 | 


--------------------------------------------------------------------------------
/nn/ante_model.lua:
--------------------------------------------------------------------------------
  1 | require 'nn'
  2 | require 'coref_utils'
  3 | require 'sparse_doc_data'
  4 | local mu = require 'model_utils'
  5 | 
  6 | torch.manualSeed(2)
  7 | 
  8 | do 
  9 |   local AnteModel = torch.class('AnteModel')
 10 | 
 11 |   function AnteModel:__init(pwD, hiddenPW, cuda, dop) 
 12 |     torch.manualSeed(2)
 13 |     if cuda then
 14 |       cutorch.manualSeed(2)
 15 |     end
 16 |     self.hiddenPW = hiddenPW
 17 |     
 18 |     local pwNet = nn.Sequential()
 19 |     pwNet:add(nn.LookupTable(pwD,hiddenPW))
 20 |     pwNet:add(nn.Sum(2))
 21 |     pwNet:add(nn.Add(hiddenPW))
 22 |     pwNet:add(nn.Tanh())
 23 |     pwNet:add(nn.Dropout(dop))
 24 |     pwNet:add(nn.Linear(hiddenPW,1))    
 25 |     
 26 |     -- make sure contiguous, and do sparse init while we're at it
 27 |     recSutsInit(pwNet,15)
 28 |     pwNet:get(1).weight[-1]:fill(0) -- assume last feature is a dummy, padding feature
 29 |     self.pwNet = cuda and pwNet:cuda() or pwNet
 30 |     collectgarbage()
 31 |   end 
 32 | 
 33 |   function AnteModel:docGrad(d,batch,clust,deltTensor,numMents)
 34 |     for m = 2, numMents do -- ignore first guy; always NA
 35 |       if clust:anaphoric(m) then 
 36 |         local start = ((m-2)*(m-1))/2 -- one behind first pair for mention m     
 37 |         local scores = self.pwNet:forward(batch:sub(start+1,start+m-1)):squeeze(2)
 38 |         local late = maxGoldAnt(clust,scores,m,0)
 39 |         local pred = simpleAnteLAArgmax(clust.m2c,scores,m,late,0)  
 40 |         if clust.m2c[pred] ~= clust.m2c[late] then
 41 |           self.pwNet:forward(batch:sub(start+pred,start+pred))
 42 |           self.pwNet:backward(batch:sub(start+pred,start+pred),deltTensor)
 43 |           self.pwNet:forward(batch:sub(start+late,start+late))
 44 |           self.pwNet:backward(batch:sub(start+late,start+late),-deltTensor)
 45 |         end
 46 |       end
 47 |     end
 48 |   end
 49 | 
 50 | 
 51 |   function AnteModel:getDevAcc(pwDevData,devClusts,cuda)
 52 |     assert(self.pwNet:get(1).weight[-1]:abs():sum() == 0)
 53 |     assert(self.pwNet.train == false)
 54 |     local total = 0
 55 |     local correct = 0
 56 |     for d = 1, pwDevData.numDocs do
 57 |       if d % 100 == 0 then
 58 |           print("dev doc " .. tostring(d))
 59 |           collectgarbage()
 60 |       end
 61 |       local numMents = pwDevData:numMents(d)
 62 |       local docBatch = pwDevData:getDocBatch(d)
 63 |       if cuda then
 64 |         docBatch = docBatch:cuda()
 65 |       end
 66 |       for m = 2, numMents do
 67 |         if devClusts[d]:anaphoric(m) then
 68 |           local start = ((m-2)*(m-1))/2 
 69 |           local scores = self.pwNet:forward(docBatch:sub(start+1,start+m-1)):squeeze(2)
 70 |           local _, pred = torch.max(scores,1)
 71 |           total = total + 1
 72 |           if devClusts[d].m2c[m] == devClusts[d].m2c[pred[1]] then
 73 |             correct = correct + 1
 74 |           end
 75 |         end 
 76 |       end
 77 |     end
 78 |     return correct/total
 79 |   end
 80 | 
 81 |   function AnteModel:docLoss(d,batch,clust,numMents)
 82 |     local loss = 0
 83 |     for m = 2, numMents do -- ignore first guy; always NA
 84 |       if clust:anaphoric(m) then
 85 |         local start = ((m-1)*(m-2))/2 -- index one behind first antecedent for this mention (in pwData)
 86 |         local scores = self.pwNet:forward(batch:sub(start+1,start+m-1)):squeeze(2)
 87 |         local late = maxGoldAnt(clust,scores,m,0)
 88 |         local pred = simpleAnteLAArgmax(clust.m2c,scores,m,late,0)
 89 |         if clust.m2c[pred] ~= clust.m2c[late] then
 90 |           loss = loss + (1 + scores[pred] - scores[late])
 91 |         end
 92 |       end
 93 |     end
 94 |     return loss
 95 |   end  
 96 |   
 97 | end
 98 | 
 99 | function train(pwData,clusts,pwDevData,devClusts,cuda)
100 |   local anteModel = AnteModel(pwData.maxFeat+1, opts.H, cuda, opts.dop)
101 |   local serFi = string.format("models/%s_%d.model", opts.savePrefix, opts.H)       
102 |   local params, gradParams = anteModel.pwNet:getParameters()
103 |   local optState = {}
104 |   local deltTensor = cuda and torch.ones(1,1):cuda() or torch.ones(1,1)
105 |   for t = 1, opts.nEpochs do
106 |     print("epoch: " .. tostring(t))
107 |     anteModel.pwNet:training()
108 |     -- use document sized minibatches
109 |     for d = 1, pwData.numDocs do
110 |       if d % 200 == 0 then
111 |         print("doc " .. tostring(d))
112 |         collectgarbage()
113 |       end 
114 |       local batch = pwData:getDocBatch(d)
115 |       if cuda then
116 |         batch = batch:cuda()
117 |       end
118 |       gradParams:zero()
119 |       anteModel:docGrad(d,batch,clusts[d],deltTensor,pwData:numMents(d))
120 |       -- do pw gradients
121 |       mu.adagradStep(params,gradParams,opts.eta,optState) 
122 |     end
123 | 
124 |     print("evaluating on dev...")
125 |     anteModel.pwNet:evaluate()
126 |     local currAcc = anteModel:getDevAcc(pwDevData,devClusts,cuda)
127 |     print("Acc " .. tostring(currAcc))
128 |     print("")
129 |   end
130 |   if opts.save then
131 |     print("overwriting params...")
132 |     torch.save(serFi..string.format("-pw-%f",opts.eta), anteModel.pwNet)
133 |   end  
134 | end
135 | 
136 | 
137 | cmd = torch.CmdLine()
138 | cmd:text()
139 | cmd:text()
140 | cmd:text('Training ante model')
141 | cmd:text()
142 | cmd:text('Options')
143 | cmd:option('-H', 700, 'Hidden layer size')
144 | cmd:option('-trainClustFile', '../SMALLTrainOPCs.txt', 'Train Oracle Predicted Clustering File')
145 | cmd:option('-devClustFile', '../SMALLDevOPCs.txt', 'Dev Oracle Predicted Clustering File')
146 | cmd:option('-pwTrFeatPrefix', 'train_small', 'Expects train pairwise features in <pwTrFeatPfx>-pw-*.h5')
147 | cmd:option('-pwDevFeatPrefix', 'dev_small', 'Expects dev pairwise features in <pwDevFeatPfx>-pw-*.h5')
148 | cmd:option('-nEpochs', 20, 'Number of epochs to train')
149 | cmd:option('-save', false, 'Save best model')
150 | cmd:option('-savePrefix', 'small', 'Prefixes saved model with this')
151 | cmd:option('-gpuid', -1, 'if >= 0, gives idx of gpu to use')
152 | cmd:option('-eta', 0.1, 'adagrad learning rate')
153 | cmd:option('-dop', 0.5, 'dropout rate')
154 | cmd:text()
155 | 
156 | -- Parse input options
157 | opts = cmd:parse(arg)
158 | 
159 | if opts.gpuid >= 0 then
160 |   print('using cuda on gpu ' .. opts.gpuid)
161 |   require 'cutorch'
162 |   require 'cunn'
163 |   cutorch.manualSeed(2)
164 |   cutorch.setDevice(opts.gpuid+1)
165 | end
166 | 
167 | function main()
168 |   local pwTrData = SpDMPWData.loadFromH5(opts.pwTrFeatPrefix)
169 |   print("read pw train data")
170 |   print("max pw feature is: " .. pwTrData.maxFeat)
171 |   local pwDevData = SpDMPWData.loadFromH5(opts.pwDevFeatPrefix)
172 |   print("read pw dev data")
173 |   local trClusts = getOPCs(opts.trainClustFile,pwTrData)
174 |   print("read train clusters")
175 |   local devClusts = getOPCs(opts.devClustFile,pwDevData)
176 |   print("read dev clusters") 
177 |  
178 |   train(pwTrData,trClusts,pwDevData,devClusts,opts.gpuid >= 0)
179 | end
180 | 
181 | main()
182 | 


--------------------------------------------------------------------------------
/nn/model_utils.lua:
--------------------------------------------------------------------------------
  1 | local model_utils = {}
  2 | 
  3 | function model_utils.adagradStep(x,dfdx,eta,state)
  4 |   if not state.var then
  5 |     state.var = torch.Tensor():typeAs(x):resizeAs(x):zero()
  6 |     state.std = torch.Tensor():typeAs(x):resizeAs(x)
  7 |   end
  8 |   state.var:addcmul(1,dfdx,dfdx)
  9 |   state.std:sqrt(state.var)
 10 |   x:addcdiv(-eta, dfdx, state.std:add(1e-10))
 11 | end
 12 | 
 13 | function model_utils.make_sp_mlp(D,H,zeroLast,justFirstLayer,dop)
 14 |   local mlp = nn.Sequential()
 15 |   mlp:add(nn.LookupTable(D,H))   
 16 |   mlp:add(nn.Sum(2))
 17 |   mlp:add(nn.Add(H)) -- add a bias
 18 |   mlp:add(nn.Tanh())
 19 |   if not justFirstLayer then
 20 |     mlp:add(nn.Dropout(dop or 0.5))
 21 |     mlp:add(nn.Linear(H,1))
 22 |   end
 23 |   -- make sure contiguous, and do sparse sutskever init while we're at it
 24 |   recSutsInit(mlp,15)
 25 |   if zeroLast then
 26 |     mlp:get(1).weight[-1]:fill(0)
 27 |   end
 28 |   return mlp
 29 | end
 30 | 
 31 | 
 32 | function model_utils.make_sp_and_dense_mlp(spD,dD,H,zeroLast,justFirstLayer,dop)
 33 |   local mlp = nn.Sequential()
 34 |   local parLayer = nn.ParallelTable()
 35 |   local left = nn.Sequential()
 36 |   left:add(nn.LookupTable(spD,H))
 37 |   left:add(nn.Sum(2)) -- after this pt, will have totalNumMents x H
 38 |   local right = nn.Sequential()
 39 |   right:add(nn.Linear(dD,H)) -- just handles the distance feature (and the bias, conveniently)
 40 |   parLayer:add(left)
 41 |   parLayer:add(right)
 42 |   mlp:add(parLayer)
 43 |   mlp:add(nn.CAddTable())
 44 |   mlp:add(nn.Tanh())
 45 |   if not justFirstLayer then
 46 |     mlp:add(nn.Dropout(dop or 0.5))
 47 |     mlp:add(nn.Linear(H,1))
 48 |   end
 49 |   recSutsInit(mlp,15)
 50 |   if zeroLast then
 51 |     mlp:get(1):get(1):get(1).weight[-1]:fill(0)
 52 |   end
 53 |   return mlp
 54 | end
 55 | 
 56 | 
 57 | function sparseSutsMatInit(W,numNZ,scale)
 58 |   local numNZ = numNZ or 15
 59 |   local scale = scale or 0.25
 60 |   local m = W:size(1)
 61 |   local n = W:size(2)
 62 |   -- zero everything out
 63 |   W:fill(0)
 64 |   if n >= m then -- assume columns are features and rows are hidden dims
 65 |     numNZ = math.min(numNZ,n)
 66 |     for i = 1, m do
 67 |       local perm = torch.randperm(n)
 68 |       -- probably better ways of doing this
 69 |       local r = torch.randn(numNZ)*scale
 70 |       for j = 1, numNZ do
 71 |         W[i][perm[j]] = r[j]
 72 |       end
 73 |     end
 74 |   else -- assume rows are features and columns hidden dims
 75 |     numNZ = math.min(numNZ,m)
 76 |     for j = 1, n do
 77 |       local perm = torch.randperm(m)
 78 |       local r = torch.randn(numNZ)*scale
 79 |       for i = 1, numNZ do
 80 |         W[perm[i]][j] = r[i]
 81 |       end
 82 |     end
 83 |   end
 84 | end
 85 | 
 86 | function recSutsInit(net,numNZ) -- assuming no module can have weight and children
 87 |   local numNZ = numNZ or 15
 88 |   if net.weight and net.bias then
 89 |     sparseSutsMatInit(net.weight, math.min(numNZ,net.weight:size(1),net.weight:size(2)))
 90 |     net.bias:fill(0.5)
 91 |   elseif net.weight then
 92 |     sparseSutsMatInit(net.weight, math.min(numNZ,net.weight:size(1),net.weight:size(2)))
 93 |   elseif net.bias then
 94 |     net.bias:fill(0.5)
 95 |   elseif net.modules and #net.modules > 0 then
 96 |     for layer, subnet in ipairs(net.modules) do
 97 |       recSutsInit(subnet, numNZ)
 98 |     end
 99 |   end
100 | end
101 | 
102 | -- stolen from https://github.com/karpathy/char-rnn/blob/master/util/model_utils.lua
103 | function model_utils.combine_all_parameters(...)
104 |     --[[ like module:getParameters, but operates on many modules ]]--
105 | 
106 |     -- get parameters
107 |     local networks = {...}
108 |     local parameters = {}
109 |     local gradParameters = {}
110 |     for i = 1, #networks do
111 |       local tn = torch.typename(layer)
112 | 	    local net_params, net_grads = networks[i]:parameters()
113 | 	    if net_params then
114 | 	      for _, p in pairs(net_params) do
115 | 		      parameters[#parameters + 1] = p
116 | 	      end
117 | 	      for _, g in pairs(net_grads) do
118 | 		      gradParameters[#gradParameters + 1] = g
119 | 	      end
120 | 	    end
121 |     end
122 | 
123 |     local function storageInSet(set, storage)
124 |         local storageAndOffset = set[torch.pointer(storage)]
125 |         if storageAndOffset == nil then
126 |             return nil
127 |         end
128 |         local _, offset = unpack(storageAndOffset)
129 |         return offset
130 |     end
131 | 
132 |     -- this function flattens arbitrary lists of parameters,
133 |     -- even complex shared ones
134 |     local function flatten(parameters)
135 |         if not parameters or #parameters == 0 then
136 |             return torch.Tensor()
137 |         end
138 |         local Tensor = parameters[1].new
139 | 
140 |         local storages = {}
141 |         local nParameters = 0
142 |         for k = 1,#parameters do
143 |             local storage = parameters[k]:storage()
144 |             if not storageInSet(storages, storage) then
145 |                 storages[torch.pointer(storage)] = {storage, nParameters}
146 |                 nParameters = nParameters + storage:size()
147 |             end
148 |         end
149 | 
150 |         local flatParameters = Tensor(nParameters):fill(1)
151 |         local flatStorage = flatParameters:storage()
152 | 
153 |         for k = 1,#parameters do
154 |             local storageOffset = storageInSet(storages, parameters[k]:storage())
155 |             parameters[k]:set(flatStorage,
156 |                 storageOffset + parameters[k]:storageOffset(),
157 |                 parameters[k]:size(),
158 |                 parameters[k]:stride())
159 |             parameters[k]:zero()
160 |         end
161 | 
162 |         local maskParameters=  flatParameters:float():clone()
163 |         local cumSumOfHoles = flatParameters:float():cumsum(1)
164 |         local nUsedParameters = nParameters - cumSumOfHoles[#cumSumOfHoles]
165 |         local flatUsedParameters = Tensor(nUsedParameters)
166 |         local flatUsedStorage = flatUsedParameters:storage()
167 | 
168 |         for k = 1,#parameters do
169 |             local offset = cumSumOfHoles[parameters[k]:storageOffset()]
170 |             parameters[k]:set(flatUsedStorage,
171 |                 parameters[k]:storageOffset() - offset,
172 |                 parameters[k]:size(),
173 |                 parameters[k]:stride())
174 |         end
175 | 
176 |         for _, storageAndOffset in pairs(storages) do
177 |             local k, v = unpack(storageAndOffset)
178 |             flatParameters[{{v+1,v+k:size()}}]:copy(Tensor():set(k))
179 |         end
180 | 
181 |         if cumSumOfHoles:sum() == 0 then
182 |             flatUsedParameters:copy(flatParameters)
183 |         else
184 |             local counter = 0
185 |             for k = 1,flatParameters:nElement() do
186 |                 if maskParameters[k] == 0 then
187 |                     counter = counter + 1
188 |                     flatUsedParameters[counter] = flatParameters[counter+cumSumOfHoles[k]]
189 |                 end
190 |             end
191 |             assert (counter == nUsedParameters)
192 |         end
193 |         return flatUsedParameters
194 |     end
195 | 
196 |     -- flatten parameters and gradients
197 |     local flatParameters = flatten(parameters)
198 |     local flatGradParameters = flatten(gradParameters)
199 | 
200 |     -- return new flat vector that contains all discrete parameters
201 |     return flatParameters, flatGradParameters
202 | end
203 | 
204 |   
205 | 
206 | return model_utils
207 | 


--------------------------------------------------------------------------------
/nn/sparse_doc_data.lua:
--------------------------------------------------------------------------------
  1 | require 'hdf5'
  2 | 
  3 | SpDMPWData = {} -- for pairwise mention data
  4 | SpDMPWData.__index = SpDMPWData
  5 | 
  6 | function SpDMPWData.loadFromH5(featPfx)
  7 |     spdmmd = {}
  8 |     setmetatable(spdmmd,SpDMPWData)
  9 |     local featfi = assert(hdf5.open(featPfx .. "-pw-feats.h5"))
 10 |     spdmmd.feats = featfi:read("feats"):all()
 11 |     featfi:close()
 12 |     local offsetfi = assert(hdf5.open(featPfx .. "-pw-offsets.h5"))
 13 |     spdmmd.docStarts = offsetfi:read("doc_starts"):all()
 14 |     spdmmd.mentStarts = offsetfi:read("ment_starts"):all()
 15 |     offsetfi:close()
 16 |     spdmmd.numDocs = spdmmd.docStarts:size(1)-1
 17 |     spdmmd.maxFeat = spdmmd.feats:max()
 18 |     collectgarbage()
 19 |     assert(spdmmd.feats:isContiguous())
 20 |     -- below only works if every pair actually has same number of features...
 21 |     spdmmd.numNZ = spdmmd.mentStarts[2] - spdmmd.mentStarts[1]
 22 |     return spdmmd
 23 | end
 24 | 
 25 | 
 26 | function SpDMPWData.makeFromTensors(feats,docStarts,mentStarts) -- for debugging
 27 |     spdmmd = {}
 28 |     setmetatable(spdmmd,SpDMPWData)
 29 |     spdmmd.feats = feats
 30 |     spdmmd.docStarts = docStarts
 31 |     spdmmd.mentStarts = mentStarts
 32 |     spdmmd.numDocs = spdmmd.docStarts:size(1)-1
 33 |     spdmmd.maxFeat = spdmmd.feats:max()
 34 |     spdmmd.numNZ = spdmmd.mentStarts[2] - spdmmd.mentStarts[1]
 35 |     collectgarbage()
 36 |     assert(spdmmd.feats:isContiguous())
 37 |     return spdmmd
 38 | end
 39 | 
 40 | 
 41 | -- d1m2m1, d1m3m1, d1m3m2, d1m4m1, d1m4m2, d1m4m3
 42 | -- this assumes self.docStarts and self.mentStarts begin at 0 rather than 1
 43 | function SpDMPWData:getFeats(d,m,a)
 44 |     local docStartIdx = self.docStarts[d] --idx within self.MentStarts 1 behind  where this doc starts
 45 |     local mentAntOffset = (m-2)*(m-1)/2 + a
 46 |     return self.feats:sub(self.mentStarts[docStartIdx+mentAntOffset]+1, self.mentStarts[docStartIdx+mentAntOffset+1])
 47 | end
 48 | 
 49 | 
 50 | function SpDMPWData:numMents(d) -- solve the quadratic equation (for the positive root)
 51 |     -- we want m such that m*(m-1)/2 = numPairs => m^2 -m -2*numPairs = 0
 52 |     local numPairs = self.docStarts[d+1] - self.docStarts[d]
 53 |     return (1 + math.sqrt(1 + 4*2*numPairs))/2
 54 | end
 55 | 
 56 | function SpDMPWData:getMentBatch(d,m)
 57 |   local docStartIdx = self.docStarts[d] --idx within self.MentStarts 1 behind  where this doc starts
 58 |   --local mentAntOffset = (m-2)*(m-1)/2 + a
 59 |   local mentOffset = (m-2)*(m-1)/2
 60 |   return self.feats:sub(self.mentStarts[docStartIdx+mentOffset+1]+1, self.mentStarts[docStartIdx+mentOffset+m]):view(m-1,self.numNZ)
 61 | end  
 62 | 
 63 | function SpDMPWData:getDocBatch(d)
 64 |   local docStartIdx = self.docStarts[d]
 65 |   local nextDocStartIdx = self.docStarts[d+1]
 66 |   local feats = self.feats:sub(self.mentStarts[docStartIdx+1]+1, self.mentStarts[nextDocStartIdx+1])
 67 |   local numRows = feats:size(1)/self.numNZ
 68 |   return feats:view(numRows, self.numNZ)
 69 | end
 70 | 
 71 | SpDMData = {} -- for just mention data
 72 | SpDMData.__index = SpDMData
 73 | 
 74 | function SpDMData.loadFromH5(featPfx)
 75 |     spdmd = {}
 76 |     setmetatable(spdmd,SpDMData)
 77 |     local featfi = assert(hdf5.open(featPfx .. "-na-feats.h5"))
 78 |     spdmd.feats = featfi:read("feats"):all()
 79 |     featfi:close()
 80 |     local offsetfi = assert(hdf5.open(featPfx .. "-na-offsets.h5"))
 81 |     spdmd.docStarts = offsetfi:read("doc_starts"):all()
 82 |     spdmd.mentStarts = offsetfi:read("ment_starts"):all()
 83 |     spdmd.numDocs = spdmd.docStarts:size(1)-1
 84 |     offsetfi:close()
 85 |     spdmd.maxFeat = spdmd.feats:max()
 86 |     collectgarbage()
 87 |     assert(spdmd.feats:isContiguous())
 88 |     return spdmd
 89 | end
 90 | 
 91 | function SpDMData.makeFromTensors(feats,docStarts,mentStarts)
 92 |     spdmd = {}
 93 |     setmetatable(spdmd,SpDMData)
 94 |     spdmd.feats = feats
 95 |     spdmd.docStarts = docStarts
 96 |     spdmd.mentStarts = mentStarts
 97 |     spdmd.numDocs = spdmd.docStarts:size(1)-1
 98 |     spdmd.maxFeat = spdmd.feats:max()
 99 |     collectgarbage()
100 |     assert(spdmd.feats:isContiguous())
101 |     return spdmd
102 | end
103 | 
104 | -- d1m2, d1m3,...
105 | -- assumes mentStarts and docStarts start at 0
106 | function SpDMData:getFeats(d,m)
107 |     local docStartIdx = self.docStarts[d] --idx within self.MentStarts that this doc starts
108 |     return self.feats:sub(self.mentStarts[docStartIdx+m-1]+1, self.mentStarts[docStartIdx+m])
109 | end
110 | 
111 | 
112 | function SpDMData:numMents(d)
113 |     return (self.docStarts[d+1] - self.docStarts[d]) + 1
114 | end
115 | 
116 | function SpDMData:docMiniBatch(d) -- this will only work if each mention has same # of features
117 |     local docStartIdx = self.docStarts[d]
118 |     local numMents = self:numMents(d)
119 |     local docFeats = self.feats:sub(self.mentStarts[docStartIdx+1]+1,self.mentStarts[docStartIdx+numMents])
120 |     local numCols = docFeats:size(1)/(numMents-1)
121 |     return docFeats:view(numMents-1,numCols)
122 | end
123 | 
124 | do
125 |   local SpKFDMData = torch.class('SpKFDMData')
126 |   
127 |   -- just gonna do a hacky thing for 2 constructors
128 |   function SpKFDMData:__init(featPfx,docStarts,numNZ,feats)
129 |     if featPfx ~= nil then
130 |       local featfi = assert(hdf5.open(featPfx .. "-na-feats.h5"))
131 |       local allfeats = featfi:read("feats"):all():long()
132 |       featfi:close()
133 |       local offsetfi = assert(hdf5.open(featPfx .. "-na-offsets.h5"))
134 |       self.docStarts = offsetfi:read("doc_starts"):all()
135 |       local mentStarts = offsetfi:read("ment_starts"):all()
136 |       self.numNZ = mentStarts[2] - mentStarts[1]
137 |       self.numDocs = self.docStarts:size(1)-1
138 |       offsetfi:close()
139 |       self.maxFeat = allfeats:max()
140 |       self.feats = allfeats:view(allfeats:size(1)/self.numNZ, self.numNZ)
141 |     else
142 |       self.docStarts = docStarts
143 |       self.numNZ = numNZ
144 |       self.numDocs = self.docStarts:size(1)-1
145 |       self.maxFeat = feats:max()
146 |       self.feats = feats:view(feats:size(1)/self.numNZ, self.numNZ) 
147 |     end
148 |     collectgarbage()
149 |     assert(self.feats:isContiguous())       
150 |   end
151 |   
152 |   function SpKFDMData:getFeats(d,m)
153 |     return self.feats[self.docStarts[d]+m]
154 |   end
155 |   
156 |   function SpKFDMData:docBatch(d)
157 |     return self.feats:sub(self.docStarts[d]+1,self.docStarts[d+1])
158 |   end
159 | 
160 |   function SpKFDMData:numMents(d)
161 |     return (self.docStarts[d+1] - self.docStarts[d])
162 |   end  
163 |   
164 |   function SpKFDMData:cudify()
165 |     self.feats = self.feats:cuda()
166 |     self.docStarts = self.docStarts:cuda()
167 |     collectgarbage()
168 |     assert(self.feats:getDevice() ~= nil)
169 |     assert(self.docStarts:getDevice() ~= nil)
170 |   end
171 | 
172 | end


--------------------------------------------------------------------------------
/nncoref_acl15_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swiseman/nn_coref/e29b16deecd0d87d4b7c145e07e2908266fe63d6/nncoref_acl15_slides.pdf


--------------------------------------------------------------------------------
/nncoref_naacl16_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swiseman/nn_coref/e29b16deecd0d87d4b7c145e07e2908266fe63d6/nncoref_naacl16_slides.pdf


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/README.txt:
--------------------------------------------------------------------------------
  1 | NAME
  2 |    CorScorer: Perl package for scoring coreference resolution systems
  3 |    using different metrics.
  4 | 
  5 | 
  6 | VERSION
  7 |    v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
  8 | 
  9 | 
 10 | CHANGES SINCE v8.0
 11 |    - fixed a bug that crashed the BLANC scorer when a duplicate singleton
 12 |      mention was present in the response.
 13 | 
 14 | INSTALLATION
 15 |    Requirements:
 16 |       1. Perl: downloadable from http://perl.org
 17 |       2. Algorithm-Munkres: included in this package and downloadable
 18 |          from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
 19 | 
 20 | USE
 21 |    This package is distributed with two scripts to execute the scorer from
 22 |    the command line.
 23 | 
 24 |    Windows (tm): scorer.bat
 25 |    Linux: scorer.pl
 26 | 
 27 | 
 28 | SYNOPSIS
 29 |    use CorScorer;
 30 | 
 31 |    $metric = 'ceafm';
 32 | 
 33 |    # Scores the whole dataset
 34 |    &CorScorer::Score($metric, $keys_file, $response_file);
 35 | 
 36 |    # Scores one file
 37 |    &CorScorer::Score($metric, $keys_file, $response_file, $name);
 38 | 
 39 | 
 40 | INPUT
 41 |    metric: the metric desired to score the results:
 42 |      muc: MUCScorer (Vilain et al, 1995)
 43 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 44 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 45 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 46 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 47 |      all: uses all the metrics to score
 48 | 
 49 |    keys_file: file with expected coreference chains in CoNLL-2011/2012 format
 50 | 
 51 |    response_file: file with output of coreference system (CoNLL-2011/2012 format)
 52 | 
 53 |    name: [optional] the name of the document to score. If name is not
 54 |      given, all the documents in the dataset will be scored. If given
 55 |      name is "none" then all the documents are scored but only total
 56 |      results are shown.
 57 | 
 58 | 
 59 | OUTPUT
 60 |    The score subroutine returns an array with four values in this order:
 61 |    1) Recall numerator
 62 |    2) Recall denominator
 63 |    3) Precision numerator
 64 |    4) Precision denominator
 65 | 
 66 |    Also recall, precision and F1 are printed in the standard output when variable
 67 |    $VERBOSE is not null.
 68 | 
 69 |    Final scores:
 70 |    Recall = recall_numerator / recall_denominator
 71 |    Precision = precision_numerator / precision_denominator
 72 |    F1 = 2 * Recall * Precision / (Recall + Precision)
 73 | 
 74 |    Identification of mentions
 75 |    An scorer for identification of mentions (recall, precision and F1) is also included.
 76 |    Mentions from system response are compared with key mentions. This version performs
 77 |    strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
 78 | 
 79 | AUTHORS
 80 |    Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 81 |    Sameer Pradhan, sameer.pradhan <at> childrens.harvard.edu
 82 |    Sebastian Martschat, sebastian.martschat <at> h-its.org
 83 |    Xiaoqiang Luo, xql <at> google.com
 84 | 
 85 | COPYRIGHT AND LICENSE
 86 |    Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
 87 |                  2011-2014, Sameer Pradhan sameer.pradhan <at> childrens.harvard.edu
 88 | 
 89 |    This program is free software; you can redistribute it and/or modify it
 90 |    under the terms of the GNU General Public License as published by the
 91 |    Free Software Foundation; either version 2 of the License, or (at your
 92 |    option) any later version. This program is distributed in the hope that
 93 |    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 94 |    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 95 |    GNU General Public License for more details.
 96 | 
 97 |    You should have received a copy of the GNU General Public License along
 98 |    with this program; if not, write to the Free Software Foundation, Inc.,
 99 |    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 | 
101 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/CorefMetricTest.pm:
--------------------------------------------------------------------------------
  1 | package CorefMetricTest;
  2 | use strict;
  3 | use warnings;
  4 | use Exporter;
  5 | 
  6 | our @ISA= qw(Exporter);
  7 | our @EXPORT = qw(ComputeScoreFromCounts DiffExpectedAndActual);
  8 | 
  9 | ################################################################################
 10 | # Compute recall, precision and F1.
 11 | # 
 12 | # Input: (numerator_counts_for_recall, denominator_counts_for_recall,
 13 | #         numerator_counts_for_precision, denominator_counts_for_precision)
 14 | # Output: (recall, precision, F1)
 15 | ################################################################################
 16 | sub ComputeScoreFromCounts {
 17 |   # The first 4 are also coref link counts when using BLANC.
 18 |   my ($recall_numerator, $recall_denominator, 
 19 |       $precision_numerator, $precision_denominator, @noncoref_counts) = @_;
 20 |   # The coref recall, precision, and F1 when using BLANC.
 21 |   my ($recall, $precision, $F1) = 
 22 |     RPFFromCounts($recall_numerator, $recall_denominator, 
 23 |                   $precision_numerator, $precision_denominator);
 24 | 
 25 |   # BLANC: @noncoref_counts=
 26 |   #   (noncoref_numerator_recall, noncoref_denominator_recall, 
 27 |   #    noncoref_numerator_precision, noncoref_denominator_precision) 
 28 |   if (scalar(@noncoref_counts) == 4) {
 29 |     ($recall, $precision, $F1) = CorScorer::ComputeBLANCFromCounts(
 30 | 	$recall_numerator, $recall_denominator, $precision_denominator,
 31 | 	$noncoref_counts[0], $noncoref_counts[1], $noncoref_counts[3]);
 32 |   }
 33 |   $recall = ($recall < 0) ? 0 : $recall;
 34 |   $precision = ($precision < 0) ? 0 : $precision;
 35 |   $F1 = ($F1 < 0) ? 0 : $F1;
 36 |   return ($recall, $precision, $F1);
 37 | }
 38 | 
 39 | sub RPFFromCounts
 40 | {
 41 |   my ($recall_numerator, $recall_denominator, 
 42 |       $precision_numerator, $precision_denominator, @nonCorefCounts) = @_;
 43 |   my ($recall, $precision, $F1) = (-1, -1, 0);
 44 |   if ($recall_denominator > 0) {
 45 |     $recall = $recall_numerator / $recall_denominator; 
 46 |   }
 47 |   if ($precision_denominator > 0) {
 48 |     $precision = $precision_numerator / $precision_denominator;
 49 |   }
 50 | 
 51 |   if (($recall + $precision) > 0) {
 52 |     $F1 = 2 * $recall * $precision / ($recall + $precision);
 53 |   }
 54 |   
 55 |   return ($recall, $precision, $F1);
 56 | }
 57 | 
 58 | # deprecated -- see CorScorer::ComputeBLANCFromCounts().
 59 | sub ComputeBLANCRPF
 60 | {
 61 |   my ($coref_recall, $coref_precision, $coref_F1,
 62 |       $noncoref_recall, $noncoref_precision, $noncoref_F1) = @_;
 63 | 
 64 |   my ($recall, $precision, $F1);
 65 | 
 66 |   if ($coref_recall < 0 && $noncoref_recall < 0) {
 67 |     # no key mention.
 68 |     $recall = $precision = $F1 = 0;
 69 |   } elsif ($coref_recall < 0) {
 70 |     # key: all links are non-coref (mentions are all singltons).
 71 |     $recall = $noncoref_recall;
 72 |     $precision = ($noncoref_precision < 0) ? 0 : $noncoref_precision;
 73 |     $F1 = $noncoref_F1;
 74 |   } elsif ($noncoref_recall < 0) {
 75 |     # key: all links are coref (all mentions are in one entity).
 76 |     $recall = $coref_recall;
 77 |     $precision = ($coref_precision < 0) ? 0 : $coref_precision;
 78 |     $F1 = $coref_F1;
 79 |   } else {
 80 |     #key contains both coref and non-coref links.
 81 |     if ($coref_precision < 0 && $noncoref_precision < 0) {
 82 |       # no response.
 83 |       $recall = $precision = $F1 = 0;
 84 |     } else {
 85 |       if ($coref_precision < 0) {
 86 |         # response: all links are non-coref, or response mentions are all
 87 |         # singletons.
 88 |         $coref_precision = 0;
 89 |       } elsif ($noncoref_precision < 0) {
 90 |         # response: all links are coref, or all mentions are in one entity.
 91 |         $noncoref_precision = 0;
 92 |       }
 93 |       $recall = ($coref_recall + $noncoref_recall)/2;
 94 |       $precision  = ($coref_precision + $noncoref_precision)/2;
 95 |       $F1 = ($coref_F1 + $noncoref_F1)/2;
 96 |     }
 97 |   }
 98 | 
 99 |   return ($recall, $precision, $F1);
100 | }
101 | 
102 | ##############################################################################
103 | # Compute the sum of the duifference between the expected recall, precision, 
104 | # F1 and the actual one. 
105 | ##############################################################################
106 | sub DiffExpectedAndActual {
107 |   my ($expected, $actual) = @_;
108 |   if (scalar(@$expected) != scalar(@$actual)) {
109 |     print STDERR "Expected and actual have diff dimensions: \n";
110 |     print STDERR "   Expected: ", join(" ", @$expected), "\n";
111 |     print STDERR "     Actual: ", join(" ", @$actual), "\n";
112 |     return 1.0e5;
113 |   }
114 |   my $sum = 0.0;
115 |   my $i = 0;
116 |   foreach my $e (@$expected) {
117 |     $sum += abs($e - $actual->[$i]);
118 |     ++$i;
119 |   }
120 |   return $sum;
121 | }
122 | 
123 | 1;
124 | 
125 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/reference-coreference-scorers/v8.01/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------