├── LICENSE
├── README.md
├── __init__.py
├── analysis
    ├── clear_optims.py
    ├── incremental_coref
    │   ├── entity_cluster_plot.ipynb
    │   └── misc plotting.ipynb
    ├── model_transfer
    │   ├── README.md
    │   ├── dev_analysis.py
    │   ├── plot_rq1.py
    │   ├── plot_rq2.py
    │   ├── plot_rq3.py
    │   ├── plot_rq4.py
    │   ├── plot_rq4_large.py
    │   └── process_dev_analysis.py
    └── print_clusters.py
├── cluster.py
├── concrete_wrapper
    ├── reader.py
    └── writer.py
├── conll-2012
    ├── scorer
    │   ├── reference-coreference-scorers
    │   │   └── v8.01
    │   │   │   ├── README.txt
    │   │   │   ├── lib
    │   │   │       ├── Algorithm
    │   │   │       │   ├── Munkres.pm
    │   │   │       │   └── README.Munkres
    │   │   │       ├── CorScorer.pm
    │   │   │       ├── Cwd.pm
    │   │   │       ├── Data
    │   │   │       │   └── Dumper.pm
    │   │   │       └── Math
    │   │   │       │   └── Combinatorics.pm
    │   │   │   ├── scorer.bat
    │   │   │   ├── scorer.pl
    │   │   │   └── test
    │   │   │       ├── CorefMetricTest.pm
    │   │   │       ├── CorefMetricTestConfig.pm
    │   │   │       ├── DataFiles
    │   │   │           ├── TC-A-1.response
    │   │   │           ├── TC-A-10.response
    │   │   │           ├── TC-A-11.response
    │   │   │           ├── TC-A-12.response
    │   │   │           ├── TC-A-13.response
    │   │   │           ├── TC-A-2.response
    │   │   │           ├── TC-A-3.response
    │   │   │           ├── TC-A-4.response
    │   │   │           ├── TC-A-5.response
    │   │   │           ├── TC-A-6.response
    │   │   │           ├── TC-A-7.response
    │   │   │           ├── TC-A-8.response
    │   │   │           ├── TC-A-9.response
    │   │   │           ├── TC-A.key
    │   │   │           ├── TC-B-1.response
    │   │   │           ├── TC-B.key
    │   │   │           ├── TC-C-1.response
    │   │   │           ├── TC-C.key
    │   │   │           ├── TC-D-1.response
    │   │   │           ├── TC-D.key
    │   │   │           ├── TC-E-1.response
    │   │   │           ├── TC-E.key
    │   │   │           ├── TC-F-1.response
    │   │   │           ├── TC-F.key
    │   │   │           ├── TC-G-1.response
    │   │   │           ├── TC-G.key
    │   │   │           ├── TC-H-1.response
    │   │   │           ├── TC-H.key
    │   │   │           ├── TC-I-1.response
    │   │   │           ├── TC-I.key
    │   │   │           ├── TC-J-1.response
    │   │   │           ├── TC-J.key
    │   │   │           ├── TC-K-1.response
    │   │   │           ├── TC-K.key
    │   │   │           ├── TC-L-1.response
    │   │   │           ├── TC-L.key
    │   │   │           ├── TC-M-1.response
    │   │   │           ├── TC-M-2.response
    │   │   │           ├── TC-M-3.response
    │   │   │           ├── TC-M-4.response
    │   │   │           ├── TC-M-5.response
    │   │   │           ├── TC-M-6.response
    │   │   │           ├── TC-M.key
    │   │   │           ├── TC-N-1.response
    │   │   │           ├── TC-N-2.response
    │   │   │           ├── TC-N-3.response
    │   │   │           ├── TC-N-4.response
    │   │   │           ├── TC-N-5.response
    │   │   │           ├── TC-N-6.response
    │   │   │           └── TC-N.key
    │   │   │       ├── TestCases.README
    │   │   │       └── test.pl
    │   └── v8.01
    │   │   ├── README.txt
    │   │   ├── lib
    │   │       ├── Algorithm
    │   │       │   ├── Munkres.pm
    │   │       │   └── README.Munkres
    │   │       ├── CorScorer.pm
    │   │       ├── Cwd.pm
    │   │       ├── Data
    │   │       │   └── Dumper.pm
    │   │       └── Math
    │   │       │   └── Combinatorics.pm
    │   │   ├── scorer.bat
    │   │   ├── scorer.pl
    │   │   └── test
    │   │       ├── CorefMetricTest.pm
    │   │       ├── CorefMetricTestConfig.pm
    │   │       ├── DataFiles
    │   │           ├── TC-A-1.response
    │   │           ├── TC-A-10.response
    │   │           ├── TC-A-11.response
    │   │           ├── TC-A-12.response
    │   │           ├── TC-A-13.response
    │   │           ├── TC-A-2.response
    │   │           ├── TC-A-3.response
    │   │           ├── TC-A-4.response
    │   │           ├── TC-A-5.response
    │   │           ├── TC-A-6.response
    │   │           ├── TC-A-7.response
    │   │           ├── TC-A-8.response
    │   │           ├── TC-A-9.response
    │   │           ├── TC-A.key
    │   │           ├── TC-B-1.response
    │   │           ├── TC-B.key
    │   │           ├── TC-C-1.response
    │   │           ├── TC-C.key
    │   │           ├── TC-D-1.response
    │   │           ├── TC-D.key
    │   │           ├── TC-E-1.response
    │   │           ├── TC-E.key
    │   │           ├── TC-F-1.response
    │   │           ├── TC-F.key
    │   │           ├── TC-G-1.response
    │   │           ├── TC-G.key
    │   │           ├── TC-H-1.response
    │   │           ├── TC-H.key
    │   │           ├── TC-I-1.response
    │   │           ├── TC-I.key
    │   │           ├── TC-J-1.response
    │   │           ├── TC-J.key
    │   │           ├── TC-K-1.response
    │   │           ├── TC-K.key
    │   │           ├── TC-L-1.response
    │   │           ├── TC-L.key
    │   │           ├── TC-M-1.response
    │   │           ├── TC-M-2.response
    │   │           ├── TC-M-3.response
    │   │           ├── TC-M-4.response
    │   │           ├── TC-M-5.response
    │   │           ├── TC-M-6.response
    │   │           ├── TC-M.key
    │   │           ├── TC-N-1.response
    │   │           ├── TC-N-2.response
    │   │           ├── TC-N-3.response
    │   │           ├── TC-N-4.response
    │   │           ├── TC-N-5.response
    │   │           ├── TC-N-6.response
    │   │           └── TC-N.key
    │   │       ├── TestCases.README
    │   │       └── test.pl
    └── v3
    │   └── scripts
    │       ├── conll2coreference.py
    │       ├── conll2coreference.sh
    │       ├── conll2name.py
    │       ├── conll2name.sh
    │       ├── conll2parse.py
    │       ├── conll2parse.sh
    │       ├── skeleton2conll.py
    │       └── skeleton2conll.sh
├── conll.py
├── constants.py
├── conversion_scripts
    ├── ancor_to_json.py
    ├── conll.py
    ├── convert_arrau.py
    ├── convert_tf_to_pytorch.py
    ├── make_qbcoref_splits.py
    ├── make_sara_splits.py
    ├── map_preco.py
    ├── minimize.py
    ├── minimize_json.py
    ├── minimize_litbank.py
    ├── minimize_qb.py
    ├── minimize_rucor_json.py
    ├── minimize_russian_json.py
    ├── minimize_semeval.py
    ├── remove_es_trace.py
    ├── rucor_to_json.py
    └── util.py
├── domain
    ├── README.md
    ├── ar_data_curve.py
    ├── arrau_data_curve.py
    ├── base_data_curve.py
    ├── forget_data_curve.py
    ├── layers_data_curve.py
    ├── litbank_data_curve.py
    ├── preco_data_curve.py
    ├── preco_dev_analysis.py
    ├── qbcoref_data_curve.py
    ├── sara_data_curve.py
    ├── semeval_data_curve.py
    ├── zero_data_curve.py
    └── zh_data_curve.py
├── encoder.py
├── eval_all.py
├── experiments.jsonnet
├── incremental.py
├── inference.py
├── jsonnets
    ├── aida.jsonnet
    ├── arrau.jsonnet
    ├── base.jsonnet
    ├── data.jsonnet
    ├── emnlp2020.jsonnet
    ├── encoders.jsonnet
    ├── litbank.jsonnet
    ├── multilingual.jsonnet
    ├── ontonotes.jsonnet
    ├── preco.jsonnet
    ├── qbcoref.jsonnet
    ├── sara.jsonnet
    ├── tests
    │   └── dataset_test.jsonnet
    └── verify_jsonnet.py
├── merge_functions.py
├── metrics.py
├── requirements.txt
├── run_xlmr.py
├── torch_scores.py
├── trainer.py
└── util.py


/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pitrack/incremental-coref/f8b912c208977eb72c82c2691cef74da71bf8b43/__init__.py


--------------------------------------------------------------------------------
/analysis/clear_optims.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:  python clear_optims.py ckpt_1 [ckpt_2, ...]
 3 | 
 4 | Iterates through a list of checkpoints and deletes their optimizer states,
 5 | effectively reducing the size of the checkpoint to save disk space
 6 | """
 7 | import torch
 8 | import sys
 9 | 
10 | 
11 | def clear(log_path):
12 |   try:
13 |     checkpoint = torch.load(log_path, map_location="cpu")
14 |     print(f"Found checkpoint at {log_path}, loading instead.")
15 |   except:
16 |     print(f"Checkpoint not found at {log_path}")
17 |     return
18 |   if "encoder_optimizer" in checkpoint:
19 |     del checkpoint["encoder_optimizer"]
20 |   if "optimizer" in checkpoint:
21 |     del checkpoint["optimizer"]
22 |   torch.save(checkpoint, log_path)
23 | 
24 | for ckpt in sys.argv[1:]:
25 |   clear(ckpt)
26 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/README.md:
--------------------------------------------------------------------------------
 1 | # Plotting scripts
 2 | 
 3 | These are plotting scripts for "Moving on from OntoNotes: Coreference Resolution Model Transfer." The scripts assume data is already correctly formatted as CSVs, and there are also some renaming code hacks/laziness from the raw files as the same file was used to generate multiple plots (usually to change size). These are provided mostly as a reference and not intended to be re-run exactly.
 4 | 
 5 | These all use the [plotnine](https://plotnine.readthedocs.io/en/stable/) library.
 6 | 
 7 | ## Figure 1, Figure 2, Figure 6
 8 | 
 9 | This uses the `plot_rq1.py` and assumes a csv file `rq1_f1.csv` (for Fig. 1) and `rq5_f1.csv` (for Fig. 2) which has these columns:
10 | 
11 | ```
12 | exp,model,Number of training examples,Average F1,Mention F1,Average F1 (gold mentions),Average (source) F1,Average (source) std
13 | 1. LitBank,SpanBERT-large,5,49.8,79.8,62.8,,
14 | ```
15 | There are some tweaks in the code based on what exactly needs to be plot (and how big). "small" only plots the first row.
16 | 
17 | There is a setting I couldn't figure out, and so I needed to go into [`plotnine/facets/facet_grid.py#144`](https://github.com/has2k1/plotnine/blob/master/plotnine/facets/facet_grid.py#L144) and add the following to get independent y-axis labels.
18 | 
19 | ```
20 | layout['SCALE_Y'] = layout['PANEL']
21 | layout['AXIS_Y'] = True
22 | ```
23 | 
24 | ## Figure 3
25 | 
26 | This relies on `dev_analysis.py` and `process_dev_analysis.py` to generate the results while `plot_rq2.py` plots the figure. The csv should contain entries like this
27 | 
28 | ```
29 | model,num train examples,num dev examples,average f1,average std,matches
30 | SpanBERT-On (L),1,1,32.575,21.699,4
31 | SpanBERT-On (L),1,5,44.959,18.213,11
32 | SpanBERT-On (L),1,10,50.941,9.708,7
33 | ```
34 | 
35 | ## Figure 4
36 | 
37 | This relies on a `rq6_f1.csv` file and `plot_rq3.py`. Again, we need to make a modification to [`plotnine/facets/facet_wrap.py#L74`](https://github.com/has2k1/plotnine/blob/master/plotnine/facets/facet_wrap.py#L74) and change ` _id = np.arange(1, n+1)` to `_id = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12]` so that panel 6 is skipped and so the legend can be placed in the top-right. The format should look the same as the files for Figure 1, 2.
38 | 
39 | ## Figure 5, Figure 7
40 | 
41 | This uses `plot_rq4.py` (Fig 5) or `plot_rq4_large.py` (Fig 7) with `rq4_f1.csv` containing entries like this:
42 | 
43 | ```
44 | exp,model,layers,layer percent,Number of training examples,Average F1,line type,type
45 | 1. LitBank,SpanBERT-large,0,0,10,28.4,1solid,1solidSpanBERT-large
46 | 1. LitBank,SpanBERT-large,6,25,10,51.9,1solid,1solidSpanBERT-large
47 | ```
48 | 
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/plot_rq1.py:
--------------------------------------------------------------------------------
  1 | from plotnine import *
  2 | from mizani.palettes import hue_pal
  3 | import pandas as pd
  4 | import numpy as np
  5 | 
  6 | 
  7 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
  8 | pal = palette(8)
  9 | 
 10 | name = 'rq5_f1'
 11 | df = pd.read_csv(f'{name}.csv')
 12 | # size = "small"
 13 | size = "big"
 14 | 
 15 | if size != "small":
 16 |     height = 7
 17 | else:
 18 |     height = 2
 19 | 
 20 | # Need to do some conversions first
 21 | 
 22 | new_df = pd.DataFrame(columns=["exp", "size", "score", "model", "type", "linetype"])
 23 | 
 24 | df.astype({'Number of training examples': np.float64})
 25 | 
 26 | def calc_type(model):
 27 |     if "SpanBERT" in model or "XLM" in model:
 28 |         return 2
 29 |     else:
 30 |         return 1
 31 | 
 32 | def fix(model):
 33 |     d = {
 34 |         "XLM-R-large": 1,
 35 |         "SpanBERT-large": 2,
 36 |         "SpanBERT-base-on": 3,
 37 |         "SpanBERT-large-on": 4,
 38 |         "Transfer-on": 5,
 39 |         "Transfer-pc": 6,
 40 |         "Transfer-en": 7
 41 |     }
 42 |     return d[model]
 43 | 
 44 | if name == "rq1_f1":
 45 |     model_labels = ["SpanBERT (L)", "SpanBERT-On (b)", "SpanBERT-On (L)",
 46 |                     "Transfer (on)", "Transfer (pc)"]
 47 |     old_linetypes = ["dashed", "dashed", "dashed", "solid", "solid"]
 48 | elif name == "rq5_f1":
 49 |     model_labels = ["XLM-R (L)", "Transfer (en)"]
 50 |     old_linetypes = ["dashed", "solid"]
 51 |                  
 52 | old_palette = [pal[3], pal[1], pal[0], pal[2],  pal[6], "blue", "pink"]
 53 | # old_palette = ["orange", pal[1], pal[0], pal[2],  pal[6], "blue", "pink"]
 54 | 
 55 | 
 56 | 
 57 | for i, row in df.iterrows():
 58 |     if row["Number of training examples"] == 0:
 59 |         continue
 60 |     new_row1 = {"exp": row["exp"], "size": row["Number of training examples"],
 61 |                 "model": fix(row["model"]), "score": row["Average F1"], "type": "1. Coref (pred. ment.)", "linetype": calc_type(row["model"]) }
 62 |     new_df = new_df.append(new_row1, ignore_index=True)
 63 |     if size != "small":
 64 |         new_row2 = {"exp": row["exp"], "size": row["Number of training examples"],
 65 |                     "model": fix(row["model"]), "score": row["Mention F1"], "type": "2. Mentions",  "linetype": calc_type(row["model"])}
 66 |         new_row3 = {"exp": row["exp"], "size": row["Number of training examples"],
 67 |                     "model": fix(row["model"]), "score": row["Average F1 (gold mentions)"], "type": "3. Coref (gold ment.)",  "linetype": calc_type(row["model"])}
 68 |         new_df = new_df.append(new_row2, ignore_index=True)
 69 |         new_df = new_df.append(new_row3, ignore_index=True)
 70 | 
 71 | new_df = new_df.dropna()
 72 | # new_df = new_df[new_df['exp'] == "1. LitBank"]  
 73 | 
 74 | def relabel_facet(f):
 75 |     return f[3:]
 76 | 
 77 | p = (ggplot(new_df, aes(x='size',y='score', group='model'))
 78 |      + labs(x='# training documents', y='F1', color="Init. method")
 79 |      + geom_line(aes(color="model", linetype="model"), size=1.0)
 80 |      + scale_x_log10()
 81 |      + scale_y_continuous()
 82 |      # + scale_linetype_manual(values=["solid", "dashed"], name="Init. method",
 83 |      #                         labels=["1", "2,"])
 84 |      + scale_color_manual(values = old_palette,
 85 |                           name="Initialization method", labels=model_labels)
 86 |      + scale_linetype_manual(values = old_linetypes,
 87 |                              name="Initialization method", labels=model_labels)
 88 |      + facet_grid(("type", "exp"), scales="free",
 89 |                   labeller=labeller(cols=relabel_facet,
 90 |                                     rows=relabel_facet))
 91 |      + theme_bw()
 92 |      + theme(panel_spacing_x=0.25,
 93 |              axis_title_x=element_text(size=16),
 94 |              axis_title_y=element_text(size=16),
 95 |              legend_position="top",
 96 |              legend_title_align="center",
 97 |              legend_text=element_text(size=12),
 98 |              legend_title=element_text(size=14),
 99 |              legend_entry_spacing_x=10.0,
100 |              strip_text_x=element_text(size=11),
101 |              strip_text_y=element_text(size=11),
102 |              
103 |      )
104 | )
105 | 
106 | p.save(filename = f'{name}_{size}.pdf', height=height, width=12, units = 'in', dpi=400)
107 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/plot_rq2.py:
--------------------------------------------------------------------------------
 1 | from plotnine import *
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | df = pd.read_csv('rq2_f1.csv')
 6 | 
 7 | df = df.astype({"num train examples": str, "num dev examples": str, "matches": int})
 8 | 
 9 | datavals = [1, 5,10, 15, 25, 50, 100, 150, 250, 500]
10 | datavals = [str(x) for x in datavals]
11 | 
12 | df["num train examples"] = pd.Categorical(df["num train examples"], categories=datavals)
13 | df["num dev examples"] = pd.Categorical(df["num dev examples"], categories=datavals)
14 | df["label"] = df["average f1"].round(decimals=1)
15 | df["stdev"] = df["average std"].round(decimals=1)
16 | 
17 | p = (ggplot(df, aes('num train examples', 'num dev examples', fill='average f1'))
18 |      + labs(x="# training documents", y="# dev documents", fill="Avg. F1")
19 |      + scale_fill_gradient(low="white", high="#02841c", limits=[70, 85])
20 |      + geom_tile(aes(width=.95, height=.95))
21 |      + geom_text(aes(label='label'), size=9, nudge_y = 0.1)
22 |      + geom_text(aes(label='stdev'), nudge_x = -0.15, nudge_y = -0.3, size=4.5, format_string="\u00B1{}")
23 |      + geom_text(aes(label='matches'), nudge_x = 0.30, nudge_y = -0.3, size=4.5)
24 |      + facet_wrap("model")
25 |      + theme_bw()
26 |      )
27 | 
28 | p.save(filename = 'rq2_f1.pdf', height=4, width=8, units = 'in', dpi=200)
29 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/plot_rq3.py:
--------------------------------------------------------------------------------
 1 | from plotnine import *
 2 | import pandas as pd
 3 | import numpy as np
 4 | from mizani.palettes import hue_pal
 5 | 
 6 | name="rq6_f1"
 7 | df = pd.read_csv(f'{name}.csv')
 8 | 
 9 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
10 | pal = palette(4)
11 | old_palette = [pal[1], pal[3], pal[0]]
12 | model_labels = ["Transfer (on)", "Transfer (pc)", "Transfer (en)"]
13 | # if name == "rq1_f1":
14 | #     old_palette = [pal[1], pal[3], pal[0]]
15 | # else:
16 | #     old_palette = [pal[0], pal[1], pal[3]]
17 | def fix(model):
18 |     d = {
19 |         "Transfer-on": "1",
20 |         "Transfer-pc": "2",
21 |         "Transfer-en": "3"
22 |     }
23 |     return d[model]
24 | # if name == "rq1_f1":
25 | #     model_labels = ["Transfer (on)", "Transfer (pc)"]#, "Transfer (en)"]
26 | # if name == "rq5_f1":
27 | #     model_labels = ["Transfer (en)"]
28 | # Need to do some conversions first
29 | 
30 | new_df = pd.DataFrame(columns=["exp", "size", "score", "model", "type", "std", "dataset"])
31 | 
32 | df.astype({'Number of training examples': np.float64})
33 | for i, row in df.iterrows():
34 |     if "Transfer" not in row["model"]:
35 |         continue
36 |     new_row1 = {"exp": row["exp"], "size": row["Number of training examples"],
37 |                 "model": fix(row["model"]), "score": row["Average F1"], "type": "1. Avg. F1 (pred. ment.)" + row["model"],
38 |                 "std": 0, "eval set":  "target"}
39 |     new_row2 = {"exp": row["exp"], "size": row["Number of training examples"],
40 |                 "model": fix(row["model"]), "score": row["Average (source) F1"], "type": "2. Avg. F1 (orig. data)" + row["model"],
41 |                 "std": row["Average (source) std"], "eval set": "source"}
42 |     new_df = new_df.append(new_row1, ignore_index=True)
43 |     new_df = new_df.append(new_row2, ignore_index=True)
44 | 
45 | new_df.fillna(0, inplace=True)
46 | 
47 | def relabel_facet(f):
48 |     return f[3:]
49 | 
50 | p = (ggplot(new_df, aes(x='size',y='score', color='model', group='type'))
51 |      + labs(x='# training documents', y='Avg. F1', color='Init. method', linetype="Eval. set")
52 |      + geom_line(aes(linetype='eval set'))
53 |      + scale_x_log10()
54 |      + scale_y_continuous()
55 |      # + facet_grid((".", ["exp"]), scales="free",
56 |      #              labeller=labeller(cols=relabel_facet,
57 |      #                                rows=relabel_facet))
58 |      + facet_wrap("exp", nrow=2, scales="free",
59 |                   labeller=labeller(cols=relabel_facet,
60 |                                     rows=relabel_facet))
61 |      
62 |      + scale_color_manual(values = old_palette,
63 |                           name="Init. method", labels=model_labels)
64 |      + theme_bw()
65 |      + theme(panel_spacing_x=0.25,
66 |              axis_title_x=element_text(size=16),
67 |              axis_title_y=element_text(size=16),
68 |              subplots_adjust={'hspace': 0.4},
69 |              legend_position=(0.85, 0.75),
70 |      )
71 | )
72 | 
73 | print(f"Saved to rq3_{name}.pdf")
74 | p.save(filename = f'rq3_{name}.pdf', height=4, width=10, units = 'in', dpi=400)
75 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/plot_rq4.py:
--------------------------------------------------------------------------------
 1 | from plotnine import *
 2 | import pandas as pd
 3 | import numpy as np
 4 | from mizani.palettes import hue_pal
 5 | df = pd.read_csv('rq4_f1.csv')
 6 | 
 7 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
 8 | pal = palette(4)
 9 | old_palette = [pal[1], pal[3], pal[0]]
10 | def fix(model):
11 |     d = {
12 |         "Transfer-on": "1",
13 |         "Transfer-pc": "2",
14 |         "Transfer-en": "3"
15 |     }
16 |     return d[model]
17 | 
18 | 
19 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
20 | pal = palette(8)
21 | 
22 | new_df = df.dropna()
23 | new_df = new_df.astype({'Number of training examples': str})
24 | 
25 | model_labels = ["SpanBERT (L)", "SpanBERT-On (L)",
26 |                 "Transfer (on)", "Transfer (pc)"]
27 | old_linetype = ["dashed", "dashed", "solid", "solid"]
28 | old_palette = [pal[3], pal[1], pal[2], pal[6], "blue", pal[0], "pink"]
29 | 
30 | def relabel_facet(f):
31 |     if f == "1solid":
32 |         return "Low"
33 |     elif f == "2dashed":
34 |         return "Medium"
35 |     elif f == "3dotted":
36 |         return "All"
37 |     else:
38 |         return f[3:]
39 | 
40 |     
41 | p = (ggplot(new_df, aes(x='layers', y='Average F1', color='model', group='type'))
42 |      + labs(x='Top k layers are trainable', y='Coref Avg. F1', color='model', linetype="line type")
43 |      + geom_line(aes(linetype="model"), size=1.0)
44 |      + scale_y_continuous()
45 |      + scale_x_continuous(breaks=[0,6,12,18,24])
46 |      + facet_grid(("line type", "exp"), scales="free",
47 |                   labeller=labeller(cols=relabel_facet,
48 |                                     rows=relabel_facet))
49 |      + scale_color_manual(values = old_palette,
50 |                           name="Initialization method", labels=model_labels)
51 |      + scale_linetype_manual(values = old_linetype,
52 |                           name="Initialization method", labels=model_labels)
53 |      + theme_bw()
54 |      + theme(panel_spacing_x=0.25,
55 |              axis_title_x=element_text(size=16),
56 |              axis_title_y=element_text(size=16),
57 |              legend_position="top",
58 |              legend_text=element_text(size=12),
59 |              legend_title=element_text(size=14),
60 |              legend_entry_spacing_x=10.0,
61 |              legend_entry_spacing_y=3.0
62 |      )
63 |      + guides(color=guide_legend(nrow=2))
64 | )
65 | 
66 | p.save(filename = 'rq4_f1.pdf', height=5, width=5, units = 'in', dpi=400)
67 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/plot_rq4_large.py:
--------------------------------------------------------------------------------
 1 | from plotnine import *
 2 | import pandas as pd
 3 | import numpy as np
 4 | from mizani.palettes import hue_pal
 5 | df = pd.read_csv('rq4_f1_large.csv')
 6 | 
 7 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
 8 | pal = palette(8)
 9 | # old_palette = [pal[1], pal[3], pal[0], pal[4], pal[2], pal[5], pal[7]]
10 | def fix(model):
11 |     d = {
12 |         "Transfer-on": "1",
13 |         "Transfer-pc": "2",
14 |         "Transfer-en": "3"
15 |     }
16 |     return d[model]
17 | 
18 | 
19 | palette = hue_pal(0.01, 0.6, 0.65, color_space='hls')
20 | pal = palette(8)
21 | 
22 | new_df = df.dropna()
23 | new_df = new_df.astype({'Number of training examples': str})
24 | 
25 | model_labels = ["SpanBERT (L)", "SpanBERT-On (L)", "Transfer (en)",
26 |                 "Transfer (on)", "Transfer (pc)", "XLM-R (L)"]
27 | old_linetype = ["dashed", "dashed", "solid", "solid", "solid", "dashed"]
28 | old_palette = [pal[0], pal[4], pal[2], pal[1], pal[6], pal[5], "pink"]
29 | 
30 | def relabel_facet(f):
31 |     if f == "1solid":
32 |         return "Low"
33 |     elif f == "2dashed":
34 |         return "Medium"
35 |     elif f == "3dotted":
36 |         return "All"
37 |     else:
38 |         return f[3:]
39 | 
40 | p = (ggplot(new_df, aes(x='layers', y='Average F1', color='model', group='type'))
41 |      + labs(x='Top k layers are trainable', y='Coref Avg. F1', color='model')
42 |      + geom_line(aes(color="model", linetype="model"), size=1.0)
43 |      + scale_y_continuous()
44 |      + scale_x_continuous(breaks=[0,6,12,18,24])
45 |      + facet_grid(("line type", "exp"), scales="free",
46 |                   labeller=labeller(cols=relabel_facet,
47 |                                     rows=relabel_facet))
48 |      + scale_color_manual(values = old_palette,
49 |                           name="Initialization method", labels=model_labels)
50 |      + scale_linetype_manual(values = old_linetype,
51 |                           name="Initialization method", labels=model_labels)
52 |      + theme_bw()
53 |      + theme(panel_spacing_x=0.25,
54 |              axis_title_x=element_text(size=16),
55 |              axis_title_y=element_text(size=16),
56 |              legend_position="top",
57 |              legend_text=element_text(size=12),
58 |              legend_title=element_text(size=14),
59 |              legend_entry_spacing_x=10.0,
60 |              legend_entry_spacing_y=3.0
61 |      )
62 |      + guides(color=guide_legend(nrow=2))
63 | )
64 | 
65 | p.save(filename = 'rq4_f1_large.pdf', height=5, width=5, units = 'in', dpi=400)
66 | 


--------------------------------------------------------------------------------
/analysis/model_transfer/process_dev_analysis.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generates space-separated file from `dev_analysis.py` output
 3 | that can then be used to generate Figure 3.
 4 | """
 5 | 
 6 | import sys
 7 | 
 8 | f = open(sys.argv[1], 'r').readlines()
 9 | 
10 | wait = ""
11 | for line in f:
12 |     if "RESULT" in line:
13 |         values = line.split(":")
14 |         size = int(values[-2])
15 |         data = values[-1].split()
16 |         names = values[-4].split("_")
17 |         dataset = names[-4]
18 |         num_train = int(names[-2])
19 |         data_string = f"{dataset} {num_train} {size} {100 * float(data[-3]):.5f} {100 * float(data[-2]):.5f} {data[-1] if size != 500 else 20}"
20 |         if size == 500:
21 |             print (wait)
22 |             wait = data_string
23 |         else:
24 |             print (data_string)
25 | print (wait)
26 | 


--------------------------------------------------------------------------------
/analysis/print_clusters.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Print out some examples of clusters
 3 | """
 4 | import json
 5 | import sys
 6 | import util
 7 | import argparse
 8 | 
 9 | NUM_FILES = 5
10 | 
11 | def print_clusters(args):
12 |     f = open(args.preds)
13 |     for i, line in enumerate(f):
14 |       data = json.loads(line)
15 |       text = util.flatten(data['sentences'])
16 |       for ci, cluster in enumerate(data[args.key]):
17 |         spans = [text[s:e+1] for s,e in cluster]
18 |         if len(spans) > args.size:
19 |             print(i, ci, len(spans), spans)
20 |       if i > NUM_FILES:
21 |         break
22 | 
23 | if __name__ == '__main__':
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument("-p", "--preds", required=True)
26 |     parser.add_argument("-k", "--key", default="predicted_clusters")
27 |     parser.add_argument("-s", "--size", default=0, type=int)
28 |     args = parser.parse_args()
29 |     print(f"Printing the clusters that have size {args.size} in the first {NUM_FILES} files")
30 |     print_clusters(args)
31 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/README.txt:
--------------------------------------------------------------------------------
  1 | NAME
  2 |    CorScorer: Perl package for scoring coreference resolution systems
  3 |    using different metrics.
  4 | 
  5 | 
  6 | VERSION
  7 |    v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
  8 | 
  9 | 
 10 | CHANGES SINCE v8.0
 11 |    - fixed a bug that crashed the BLANC scorer when a duplicate singleton
 12 |      mention was present in the response.
 13 | 
 14 | INSTALLATION
 15 |    Requirements:
 16 |       1. Perl: downloadable from http://perl.org
 17 |       2. Algorithm-Munkres: included in this package and downloadable
 18 |          from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
 19 | 
 20 | USE
 21 |    This package is distributed with two scripts to execute the scorer from
 22 |    the command line.
 23 | 
 24 |    Windows (tm): scorer.bat
 25 |    Linux: scorer.pl
 26 | 
 27 | 
 28 | SYNOPSIS
 29 |    use CorScorer;
 30 | 
 31 |    $metric = 'ceafm';
 32 | 
 33 |    # Scores the whole dataset
 34 |    &CorScorer::Score($metric, $keys_file, $response_file);
 35 | 
 36 |    # Scores one file
 37 |    &CorScorer::Score($metric, $keys_file, $response_file, $name);
 38 | 
 39 | 
 40 | INPUT
 41 |    metric: the metric desired to score the results:
 42 |      muc: MUCScorer (Vilain et al, 1995)
 43 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 44 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 45 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 46 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 47 |      all: uses all the metrics to score
 48 | 
 49 |    keys_file: file with expected coreference chains in CoNLL-2011/2012 format
 50 | 
 51 |    response_file: file with output of coreference system (CoNLL-2011/2012 format)
 52 | 
 53 |    name: [optional] the name of the document to score. If name is not
 54 |      given, all the documents in the dataset will be scored. If given
 55 |      name is "none" then all the documents are scored but only total
 56 |      results are shown.
 57 | 
 58 | 
 59 | OUTPUT
 60 |    The score subroutine returns an array with four values in this order:
 61 |    1) Recall numerator
 62 |    2) Recall denominator
 63 |    3) Precision numerator
 64 |    4) Precision denominator
 65 | 
 66 |    Also recall, precision and F1 are printed in the standard output when variable
 67 |    $VERBOSE is not null.
 68 | 
 69 |    Final scores:
 70 |    Recall = recall_numerator / recall_denominator
 71 |    Precision = precision_numerator / precision_denominator
 72 |    F1 = 2 * Recall * Precision / (Recall + Precision)
 73 | 
 74 |    Identification of mentions
 75 |    An scorer for identification of mentions (recall, precision and F1) is also included.
 76 |    Mentions from system response are compared with key mentions. This version performs
 77 |    strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
 78 | 
 79 | AUTHORS
 80 |    Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 81 |    Sameer Pradhan, sameer.pradhan <at> childrens.harvard.edu
 82 |    Sebastian Martschat, sebastian.martschat <at> h-its.org
 83 |    Xiaoqiang Luo, xql <at> google.com
 84 | 
 85 | COPYRIGHT AND LICENSE
 86 |    Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
 87 |                  2011-2014, Sameer Pradhan sameer.pradhan <at> childrens.harvard.edu
 88 | 
 89 |    This program is free software; you can redistribute it and/or modify it
 90 |    under the terms of the GNU General Public License as published by the
 91 |    Free Software Foundation; either version 2 of the License, or (at your
 92 |    option) any later version. This program is distributed in the hope that
 93 |    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 94 |    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 95 |    GNU General Public License for more details.
 96 | 
 97 |    You should have received a copy of the GNU General Public License along
 98 |    with this program; if not, write to the Free Software Foundation, Inc.,
 99 |    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 | 
101 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/reference-coreference-scorers/v8.01/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/README.txt:
--------------------------------------------------------------------------------
  1 | NAME
  2 |    CorScorer: Perl package for scoring coreference resolution systems
  3 |    using different metrics.
  4 | 
  5 | 
  6 | VERSION
  7 |    v8.01 -- reference implementations of MUC, B-cubed, CEAF and BLANC metrics.
  8 | 
  9 | 
 10 | CHANGES SINCE v8.0
 11 |    - fixed a bug that crashed the BLANC scorer when a duplicate singleton
 12 |      mention was present in the response.
 13 | 
 14 | INSTALLATION
 15 |    Requirements:
 16 |       1. Perl: downloadable from http://perl.org
 17 |       2. Algorithm-Munkres: included in this package and downloadable
 18 |          from CPAN http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08
 19 | 
 20 | USE
 21 |    This package is distributed with two scripts to execute the scorer from
 22 |    the command line.
 23 | 
 24 |    Windows (tm): scorer.bat
 25 |    Linux: scorer.pl
 26 | 
 27 | 
 28 | SYNOPSIS
 29 |    use CorScorer;
 30 | 
 31 |    $metric = 'ceafm';
 32 | 
 33 |    # Scores the whole dataset
 34 |    &CorScorer::Score($metric, $keys_file, $response_file);
 35 | 
 36 |    # Scores one file
 37 |    &CorScorer::Score($metric, $keys_file, $response_file, $name);
 38 | 
 39 | 
 40 | INPUT
 41 |    metric: the metric desired to score the results:
 42 |      muc: MUCScorer (Vilain et al, 1995)
 43 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
 44 |      ceafm: CEAF (Luo et al., 2005) using mention-based similarity
 45 |      ceafe: CEAF (Luo et al., 2005) using entity-based similarity
 46 |      blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
 47 |      all: uses all the metrics to score
 48 | 
 49 |    keys_file: file with expected coreference chains in CoNLL-2011/2012 format
 50 | 
 51 |    response_file: file with output of coreference system (CoNLL-2011/2012 format)
 52 | 
 53 |    name: [optional] the name of the document to score. If name is not
 54 |      given, all the documents in the dataset will be scored. If given
 55 |      name is "none" then all the documents are scored but only total
 56 |      results are shown.
 57 | 
 58 | 
 59 | OUTPUT
 60 |    The score subroutine returns an array with four values in this order:
 61 |    1) Recall numerator
 62 |    2) Recall denominator
 63 |    3) Precision numerator
 64 |    4) Precision denominator
 65 | 
 66 |    Also recall, precision and F1 are printed in the standard output when variable
 67 |    $VERBOSE is not null.
 68 | 
 69 |    Final scores:
 70 |    Recall = recall_numerator / recall_denominator
 71 |    Precision = precision_numerator / precision_denominator
 72 |    F1 = 2 * Recall * Precision / (Recall + Precision)
 73 | 
 74 |    Identification of mentions
 75 |    An scorer for identification of mentions (recall, precision and F1) is also included.
 76 |    Mentions from system response are compared with key mentions. This version performs
 77 |    strict mention matching as was used in the CoNLL-2011 and 2012 shared tasks.
 78 | 
 79 | AUTHORS
 80 |    Emili Sapena, Universitat Politècnica de Catalunya, http://www.lsi.upc.edu/~esapena, esapena <at> lsi.upc.edu
 81 |    Sameer Pradhan, sameer.pradhan <at> childrens.harvard.edu
 82 |    Sebastian Martschat, sebastian.martschat <at> h-its.org
 83 |    Xiaoqiang Luo, xql <at> google.com
 84 | 
 85 | COPYRIGHT AND LICENSE
 86 |    Copyright (C) 2009-2011, Emili Sapena esapena <at> lsi.upc.edu
 87 |                  2011-2014, Sameer Pradhan sameer.pradhan <at> childrens.harvard.edu
 88 | 
 89 |    This program is free software; you can redistribute it and/or modify it
 90 |    under the terms of the GNU General Public License as published by the
 91 |    Free Software Foundation; either version 2 of the License, or (at your
 92 |    option) any later version. This program is distributed in the hope that
 93 |    it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 94 |    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 95 |    GNU General Public License for more details.
 96 | 
 97 |    You should have received a copy of the GNU General Public License along
 98 |    with this program; if not, write to the Free Software Foundation, Inc.,
 99 |    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
100 | 
101 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/scorer.bat:
--------------------------------------------------------------------------------
 1 | @rem = '--*-Perl-*--
 2 | @echo off
 3 | if "%OS%" == "Windows_NT" goto WinNT
 4 | perl -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
 5 | goto endofperl
 6 | :WinNT
 7 | perl -x -S %0 %*
 8 | if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
 9 | if %errorlevel% == 9009 echo You do not have Perl in your PATH.
10 | if errorlevel 1 goto script_failed_so_exit_with_non_zero_val 2>nul
11 | goto endofperl
12 | @rem ';
13 | #!perl
14 | #line 15
15 | 
16 | BEGIN {
17 |     $d = $0;
18 |     $d =~ s/\/[^\/][^\/]*$//g;
19 |     push(@INC, $d."/lib");
20 | }
21 | 
22 | use strict;
23 | use CorScorer;
24 | 
25 | if (@ARGV < 3) {
26 |   print q|
27 |   use: scorer.bat <metric> <keys_file> <response_file> [name]
28 |   
29 |   metric: the metric desired to score the results:
30 |      muc: MUCScorer (Vilain et al, 1995)
31 |      bcub: B-Cubed (Bagga and Baldwin, 1998)
32 |      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
33 |      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
34 |      all: uses all the metrics to score
35 |   
36 |   keys_file: file with expected coreference chains in SemEval format
37 |   
38 |   response_file: file with output of coreference system (SemEval format)
39 |   
40 |   name: [optional] the name of the document to score. If name is not
41 |      given, all the documents in the dataset will be scored. If given
42 |      name is "none" then all the documents are scored but only total
43 |      results are shown.
44 |   
45 |   |;
46 |   exit;
47 | }
48 | 
49 | my $metric = shift (@ARGV);
50 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
51 |   print "Invalid metric\n";
52 |   exit;
53 | }
54 | 
55 | 
56 | if ($metric eq 'all') {
57 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
58 |     print "\nMETRIC $m:\n";
59 |     &CorScorer::Score( $m, @ARGV );
60 |   }
61 | }
62 | else {
63 |   &CorScorer::Score( $metric, @ARGV );
64 | }
65 | 
66 | __END__
67 | :endofperl
68 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/scorer.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |   $d = $0;
 5 |   $d =~ s/\/[^\/][^\/]*$//g;
 6 | 
 7 |   if ($d eq $0) {
 8 |     unshift(@INC, "lib");
 9 |   }
10 |   else {
11 |     unshift(@INC, $d . "/lib");
12 |   }
13 | }
14 | 
15 | use strict;
16 | use CorScorer;
17 | 
18 | if (@ARGV < 3) {
19 |   print q|
20 | use: scorer.pl <metric> <keys_file> <response_file> [name]
21 | 
22 |   metric: the metric desired to score the results:
23 |     muc: MUCScorer (Vilain et al, 1995)
24 |     bcub: B-Cubed (Bagga and Baldwin, 1998)
25 |     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
26 |     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
27 |     blanc: BLANC
28 |     all: uses all the metrics to score
29 | 
30 |   keys_file: file with expected coreference chains in SemEval format
31 | 
32 |   response_file: file with output of coreference system (SemEval format)
33 | 
34 |   name: [optional] the name of the document to score. If name is not
35 |     given, all the documents in the dataset will be scored. If given
36 |     name is "none" then all the documents are scored but only total
37 |     results are shown.
38 | 
39 | |;
40 |   exit;
41 | }
42 | 
43 | my $metric = shift(@ARGV);
44 | if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
45 |   print "Invalid metric\n";
46 |   exit;
47 | }
48 | 
49 | if ($metric eq 'all') {
50 |   foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
51 |     print "\nMETRIC $m:\n";
52 |     &CorScorer::Score($m, @ARGV);
53 |   }
54 | }
55 | else {
56 |   &CorScorer::Score($metric, @ARGV);
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-10.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	z	-
17 | test2	0	5	e	(4)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-11.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-12.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	1)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(2)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(3)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(4
15 | test2	0	3	d2	4)
16 | test2	0	4	z	-
17 | test2	0	5	e	(5)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(6)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-13.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	0)
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	(0)
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	x	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	z	-
17 | test2	0	5	e	(0)
18 | test2	0	6	y	-
19 | test2	0	7	f1	(0)
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	-
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	-
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	-
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	y	(2)
17 | test2	0	5	e	(2)
18 | test2	0	6	z	(3)
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	x	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(1
 7 | test1	0	5	b3	1)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	(3
 7 | test1	0	5	b3	3)
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-7.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-8.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A-9.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1(3(3(3(3(3(3(3(3(3(3
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	3)3)3)3)3)3)3)3)3)3)1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	x	(1)
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	z	(3)
17 | test2	0	5	e	-
18 | test2	0	6	y	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-A.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(2
15 | test2	0	3	d2	2)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(2)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-B-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-B.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 -
72 | nw/xinhua/00/chtb_0009 -
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-C-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10043
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10043)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 (10043
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 10043)
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-C.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (10043
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 -
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 10043)
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | nw/xinhua/00/chtb_0009 -
31 | nw/xinhua/00/chtb_0009 (10054
32 | nw/xinhua/00/chtb_0009 -
33 | nw/xinhua/00/chtb_0009 10054)
34 | nw/xinhua/00/chtb_0009 -
35 | nw/xinhua/00/chtb_0009 -
36 | nw/xinhua/00/chtb_0009 -
37 | nw/xinhua/00/chtb_0009 -
38 | nw/xinhua/00/chtb_0009 -
39 | nw/xinhua/00/chtb_0009 -
40 | nw/xinhua/00/chtb_0009 -
41 | nw/xinhua/00/chtb_0009 -
42 | nw/xinhua/00/chtb_0009 -
43 | nw/xinhua/00/chtb_0009 -
44 | nw/xinhua/00/chtb_0009 -
45 | nw/xinhua/00/chtb_0009 -
46 | nw/xinhua/00/chtb_0009 -
47 | nw/xinhua/00/chtb_0009 -
48 | nw/xinhua/00/chtb_0009 -
49 | nw/xinhua/00/chtb_0009 (10043)
50 | nw/xinhua/00/chtb_0009 -
51 | nw/xinhua/00/chtb_0009 -
52 | nw/xinhua/00/chtb_0009 -
53 | nw/xinhua/00/chtb_0009 -
54 | nw/xinhua/00/chtb_0009 -
55 | nw/xinhua/00/chtb_0009 -
56 | nw/xinhua/00/chtb_0009 -
57 | nw/xinhua/00/chtb_0009 -
58 | nw/xinhua/00/chtb_0009 -
59 | nw/xinhua/00/chtb_0009 -
60 | nw/xinhua/00/chtb_0009 -
61 | nw/xinhua/00/chtb_0009 -
62 | nw/xinhua/00/chtb_0009 -
63 | nw/xinhua/00/chtb_0009 -
64 | nw/xinhua/00/chtb_0009 (10054
65 | nw/xinhua/00/chtb_0009 10054)
66 | nw/xinhua/00/chtb_0009 -
67 | nw/xinhua/00/chtb_0009 -
68 | nw/xinhua/00/chtb_0009 (10054)
69 | nw/xinhua/00/chtb_0009 -
70 | nw/xinhua/00/chtb_0009 -
71 | nw/xinhua/00/chtb_0009 (10060)
72 | nw/xinhua/00/chtb_0009 (10060)
73 | 
74 | #end document
75 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-D-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-D.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-E-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (1)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (1)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (1)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (1)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-E.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (2)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (3)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 (3)
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 (3)
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 (3)
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 (3)
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-F-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-F.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-G-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-G.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-H-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-H.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-I-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-I.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-J-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 -
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-J.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 -
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 -
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-K-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 (3)
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-K.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 -
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (1)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (1)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 -
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (1)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (1)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 (1)
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-L-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (2)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 -
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (3)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (3)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 (3)
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-L.key:
--------------------------------------------------------------------------------
 1 | #begin document (nw/xinhua/00/chtb_0009); part 000
 2 | nw/xinhua/00/chtb_0009 -
 3 | nw/xinhua/00/chtb_0009 (1)
 4 | nw/xinhua/00/chtb_0009 -
 5 | nw/xinhua/00/chtb_0009 (1)
 6 | nw/xinhua/00/chtb_0009 -
 7 | nw/xinhua/00/chtb_0009 (1)
 8 | nw/xinhua/00/chtb_0009 -
 9 | nw/xinhua/00/chtb_0009 (2)
10 | nw/xinhua/00/chtb_0009 -
11 | nw/xinhua/00/chtb_0009 (2)
12 | nw/xinhua/00/chtb_0009 -
13 | nw/xinhua/00/chtb_0009 (2)
14 | nw/xinhua/00/chtb_0009 -
15 | nw/xinhua/00/chtb_0009 (2)
16 | nw/xinhua/00/chtb_0009 -
17 | nw/xinhua/00/chtb_0009 -
18 | nw/xinhua/00/chtb_0009 -
19 | nw/xinhua/00/chtb_0009 -
20 | nw/xinhua/00/chtb_0009 -
21 | nw/xinhua/00/chtb_0009 -
22 | nw/xinhua/00/chtb_0009 -
23 | nw/xinhua/00/chtb_0009 -
24 | nw/xinhua/00/chtb_0009 -
25 | nw/xinhua/00/chtb_0009 -
26 | nw/xinhua/00/chtb_0009 -
27 | nw/xinhua/00/chtb_0009 -
28 | nw/xinhua/00/chtb_0009 -
29 | nw/xinhua/00/chtb_0009 -
30 | 
31 | #end document
32 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-M.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-1.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-2.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(0
15 | test2	0	3	d2	0)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(0)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(0
20 | test2	0	8	f2	-
21 | test2	0	9	f3	0)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-3.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(1
15 | test2	0	3	d2	1)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(1)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(2
20 | test2	0	8	f2	-
21 | test2	0	9	f3	2)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-4.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	(3)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(4)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(5)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-5.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(0)
13 | test2	0	1	jnk	(0)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(0)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(0)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N-6.response:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(0
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	0)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(1)
13 | test2	0	1	jnk	(1)
14 | test2	0	2	d1	-
15 | test2	0	3	d2	-
16 | test2	0	4	jnk	(1)
17 | test2	0	5	e	-
18 | test2	0	6	jnk	(2)
19 | test2	0	7	f1	-
20 | test2	0	8	f2	-
21 | test2	0	9	f3	-
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/DataFiles/TC-N.key:
--------------------------------------------------------------------------------
 1 | #begin document (LuoTestCase); 
 2 | test1	0	0	a1	(0
 3 | test1	0	1	a2	0)
 4 | test1	0	2	junk	-
 5 | test1	0	3	b1	(1
 6 | test1	0	4	b2	-
 7 | test1	0	5	b3	-
 8 | test1	0	6	b4	1)
 9 | test1	0	7	jnk	-
10 | test1	0	8	.	-
11 | 
12 | test2	0	0	c	(2)
13 | test2	0	1	jnk	-
14 | test2	0	2	d1	(3
15 | test2	0	3	d2	3)
16 | test2	0	4	jnk	-
17 | test2	0	5	e	(4)
18 | test2	0	6	jnk	-
19 | test2	0	7	f1	(5
20 | test2	0	8	f2	-
21 | test2	0	9	f3	5)
22 | test2	0	10	.	-	
23 | #end document
24 | 


--------------------------------------------------------------------------------
/conll-2012/scorer/v8.01/test/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | BEGIN {
 4 |     $d = $0;
 5 |     $d =~ s/\/[^\/][^\/]*$//g;
 6 |     push(@INC, $d);
 7 |     push(@INC, $d . "/../lib");
 8 | }
 9 | 
10 | use strict;
11 | use CorScorer;
12 | use CorefMetricTest;
13 | use CorefMetricTestConfig;
14 | 
15 | my $error_tolerance = 1.e-4;
16 | my $script_dir = $0;
17 | $script_dir =~ s/\/[^\/][^\/]*$//g;
18 | 
19 | foreach my $test_case (@CorefMetricTestConfig::TestCases) {
20 |   my $id = $test_case->{'id'};
21 |   my @key_response_files = ($script_dir . "/" . $test_case->{'key_file'}, 
22 |                             $script_dir . "/" . $test_case->{'response_file'});
23 |   print "\nTesting case ($id): keyFile=", $key_response_files[0], 
24 |         " responseFile=", $key_response_files[1], "\n";
25 |   my $expected_metrics = $test_case->{'expected_metrics'};
26 |   foreach my $metric_name (sort keys %$expected_metrics) {
27 |     my $expected_values = $expected_metrics->{$metric_name};
28 |     *::SAVED_STDOUT = *STDOUT;
29 |     *STDOUT = *::SUPRRES_STDOUT;
30 |     my @actual_counts = &CorScorer::Score($metric_name, @key_response_files);
31 |     # Compute R,P,and F1 from raw counts.
32 |     my @actual_values = CorefMetricTest::ComputeScoreFromCounts(@actual_counts);
33 |     *STDOUT = *::SAVED_STDOUT;
34 |     my $diff = CorefMetricTest::DiffExpectedAndActual($expected_values, \@actual_values);
35 |     printf "  metric: %+10s", $metric_name;
36 |     if ($diff < $error_tolerance) {
37 |       print " => PASS\n";
38 |     } else {
39 |       print " => FAIL\n";
40 |       print "    Expected (recall, prec, F1) = (", join(" ", @$expected_values), ")\n";
41 |       print "    Actual (recall, prec, F1) = (", join(" ", @actual_values), ")\n";
42 |       #exit(1);
43 |     }
44 |   }
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/conll-2012/v3/scripts/conll2coreference.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | function usage {
  4 | cat <<EOF
  5 | 
  6 | 
  7 | 
  8 | ----------------------------------------------------------------------------------------------------
  9 | Usage:
 10 | -----
 11 | 
 12 | ${0##*/}  <conll-file>
 13 | 
 14 | 
 15 | Description:
 16 | -----------
 17 | 
 18 | Takes a *conll file as input and prints out the corresponding coreference file
 19 | 
 20 | ----------------------------------------------------------------------------------------------------
 21 | 
 22 | 
 23 | 
 24 | 
 25 | EOF
 26 | exit;
 27 | }
 28 | 
 29 | 
 30 | function message
 31 | {
 32 |   echo "----------------------------------------------------------------------------------------------------"
 33 |   echo
 34 |   echo $* 1>&2
 35 |   echo
 36 |   echo "----------------------------------------------------------------------------------------------------"
 37 | 
 38 | }
 39 | 
 40 | 
 41 | 
 42 | function r { echo ${1%.*}; }
 43 | function t { echo ${1##*/}; }
 44 | function e { echo $(t ${1##*.}); }
 45 | function h { echo ${1%/*}; }
 46 | 
 47 | # define helper function: run a command and print its exit code
 48 | function erun () {
 49 |   debug=0
 50 |   if [[ $1 == "-d" ]]; then
 51 |     debug=1
 52 |     shift;
 53 |   fi
 54 | 
 55 | 
 56 |   if [[ $DEBUG -eq 1 ]]; then
 57 |       debug=1
 58 |   fi
 59 | 
 60 | 
 61 | 
 62 | 
 63 |   verbose=0
 64 |   if [[ $1 == "-v" ]]; then
 65 |     verbose=1
 66 |     shift;
 67 |   fi
 68 | 
 69 | 
 70 |   if [[ $VERBOSE -eq 1 ]]; then
 71 |       verbose=1
 72 |   fi
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 |   if [[ $debug -eq 1 ]]; then
 80 |     echo "debug mode ..."
 81 |     echo "eval $1"
 82 |   else
 83 |     echo "normal mode ..."
 84 |     if [[ $verbose -eq 1 ]]; then
 85 |       echo -e "\nrun: $1\n-------------"
 86 |     fi
 87 | 
 88 |     eval $1
 89 |   fi
 90 | 
 91 | 
 92 |   local code=$?
 93 |   if [ $code -ne 0 ]; then
 94 | 	  echo "Exit code: $code"
 95 | 	  exit $code
 96 |   fi
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | # handle the valid command line options
103 | DEBUG=0
104 | VERBOSE=0
105 | DEBUG_OPTION=""
106 | while getopts vdh opt
107 | do
108 |   case "$opt" in
109 |     v)
110 |       VERBOSE=1;;
111 | 
112 |     d)
113 |       DEBUG=1;;
114 | 
115 |     \?)
116 |       usage
117 |       exit 1;;
118 | 
119 |     h)
120 |       usage
121 |       exit 0;;
122 | 
123 |     :)
124 |       echo "option -$OPTARG requires an argument"
125 |       usage
126 |       exit 1;;
127 | 
128 |     esac
129 | done
130 | shift `expr $OPTIND - 1`
131 | 
132 | 
133 | # at this point $* contains the arguments after interpreting the options
134 | 
135 | d=$1
136 | 
137 | # if no arguments are specified, then just print usage
138 | if [[ $# -eq 0 ]]; then
139 |     usage
140 | fi
141 | 
142 | 
143 | # debugging
144 | if [[ $DEBUG -eq 1 ]]; then
145 |     echo "debugging mode is on ..." 1>&2
146 |     DEBUG_OPTION="-d"
147 | fi
148 | 
149 | 
150 | 
151 | 
152 | 
153 | for file in $(find $d -name "*_conll"); do
154 | 
155 |   if [[ $file =~ "data/english/annotations" ]]; then
156 |     LANGUAGE=english
157 |   elif [[ $file =~ "data/chinese/annotations" ]]; then
158 |     LANGUAGE=chinese
159 |   else
160 |     LANGUAGE=arabic
161 |   fi
162 | 
163 |   echo "language: $LANGUAGE"
164 |   coref=${file/_conll/_coref}
165 |   echo "$file -> $coref ..."
166 |   conll2coreference.py -l $LANGUAGE $file > $coref
167 | done
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | # complain if the exit status of the last command executed is non-zero
177 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi
178 | 
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/conll-2012/v3/scripts/conll2name.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | function usage {
  4 | cat <<EOF
  5 | 
  6 | 
  7 | 
  8 | ----------------------------------------------------------------------------------------------------
  9 | Usage:
 10 | -----
 11 | 
 12 | ${0##*/}  <conll-file>
 13 | 
 14 | 
 15 | Description:
 16 | -----------
 17 | 
 18 | Takes a *conll file as input and prints out the corresponding coreference file
 19 | 
 20 | ----------------------------------------------------------------------------------------------------
 21 | 
 22 | 
 23 | 
 24 | 
 25 | EOF
 26 | exit;
 27 | }
 28 | 
 29 | 
 30 | function message
 31 | {
 32 |   echo "----------------------------------------------------------------------------------------------------"
 33 |   echo
 34 |   echo $* 1>&2
 35 |   echo
 36 |   echo "----------------------------------------------------------------------------------------------------"
 37 | 
 38 | }
 39 | 
 40 | 
 41 | 
 42 | function r { echo ${1%.*}; }
 43 | function t { echo ${1##*/}; }
 44 | function e { echo $(t ${1##*.}); }
 45 | function h { echo ${1%/*}; }
 46 | 
 47 | # define helper function: run a command and print its exit code
 48 | function erun () {
 49 |   debug=0
 50 |   if [[ $1 == "-d" ]]; then
 51 |     debug=1
 52 |     shift;
 53 |   fi
 54 | 
 55 | 
 56 |   if [[ $DEBUG -eq 1 ]]; then
 57 |       debug=1
 58 |   fi
 59 | 
 60 | 
 61 | 
 62 | 
 63 |   verbose=0
 64 |   if [[ $1 == "-v" ]]; then
 65 |     verbose=1
 66 |     shift;
 67 |   fi
 68 | 
 69 | 
 70 |   if [[ $VERBOSE -eq 1 ]]; then
 71 |       verbose=1
 72 |   fi
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 |   if [[ $debug -eq 1 ]]; then
 80 |     echo "debug mode ..."
 81 |     echo "eval $1"
 82 |   else
 83 |     echo "normal mode ..."
 84 |     if [[ $verbose -eq 1 ]]; then
 85 |       echo -e "\nrun: $1\n-------------"
 86 |     fi
 87 | 
 88 |     eval $1
 89 |   fi
 90 | 
 91 | 
 92 |   local code=$?
 93 |   if [ $code -ne 0 ]; then
 94 | 	  echo "Exit code: $code"
 95 | 	  exit $code
 96 |   fi
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | # handle the valid command line options
103 | DEBUG=0
104 | VERBOSE=0
105 | DEBUG_OPTION=""
106 | while getopts vdh opt
107 | do
108 |   case "$opt" in
109 |     v)
110 |       VERBOSE=1;;
111 | 
112 |     d)
113 |       DEBUG=1;;
114 | 
115 |     \?)
116 |       usage
117 |       exit 1;;
118 | 
119 |     h)
120 |       usage
121 |       exit 0;;
122 | 
123 |     :)
124 |       echo "option -$OPTARG requires an argument"
125 |       usage
126 |       exit 1;;
127 | 
128 |     esac
129 | done
130 | shift `expr $OPTIND - 1`
131 | 
132 | 
133 | # at this point $* contains the arguments after interpreting the options
134 | 
135 | d=$1
136 | 
137 | # if no arguments are specified, then just print usage
138 | if [[ $# -eq 0 ]]; then
139 |     usage
140 | fi
141 | 
142 | 
143 | # debugging
144 | if [[ $DEBUG -eq 1 ]]; then
145 |     echo "debugging mode is on ..." 1>&2
146 |     DEBUG_OPTION="-d"
147 | fi
148 | 
149 | 
150 | 
151 | 
152 | for file in $(find $d -name "*_conll"); do
153 | 
154 |   if [[ $file =~ "data/english/annotations" ]]; then
155 |     LANGUAGE=english
156 |   elif [[ $file =~ "data/chinese/annotations" ]]; then
157 |     LANGUAGE=chinese
158 |   else
159 |     LANGUAGE=arabic
160 |   fi
161 | 
162 |   echo "language: $LANGUAGE"
163 | 
164 |   name=${file/_conll/_name}
165 |   echo "$file -> $name ..."
166 |   conll2name.py -l $LANGUAGE $file > $name
167 | done
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | # complain if the exit status of the last command executed is non-zero
176 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/conll-2012/v3/scripts/conll2parse.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | function usage {
  4 | cat <<EOF
  5 | 
  6 | 
  7 | 
  8 | ----------------------------------------------------------------------------------------------------
  9 | Usage:
 10 | -----
 11 | 
 12 | ${0##*/}  <conll-file>
 13 | 
 14 | 
 15 | Description:
 16 | -----------
 17 | 
 18 | Takes a *conll file as input and prints out the corresponding parse file
 19 | 
 20 | ----------------------------------------------------------------------------------------------------
 21 | 
 22 | 
 23 | 
 24 | 
 25 | EOF
 26 | exit;
 27 | }
 28 | 
 29 | 
 30 | function message
 31 | {
 32 |   echo "----------------------------------------------------------------------------------------------------"
 33 |   echo
 34 |   echo $* 1>&2
 35 |   echo
 36 |   echo "----------------------------------------------------------------------------------------------------"
 37 | 
 38 | }
 39 | 
 40 | 
 41 | 
 42 | function r { echo ${1%.*}; }
 43 | function t { echo ${1##*/}; }
 44 | function e { echo $(t ${1##*.}); }
 45 | function h { echo ${1%/*}; }
 46 | 
 47 | # define helper function: run a command and print its exit code
 48 | function erun () {
 49 |   debug=0
 50 |   if [[ $1 == "-d" ]]; then
 51 |     debug=1
 52 |     shift;
 53 |   fi
 54 | 
 55 | 
 56 |   if [[ $DEBUG -eq 1 ]]; then
 57 |       debug=1
 58 |   fi
 59 | 
 60 | 
 61 | 
 62 | 
 63 |   verbose=0
 64 |   if [[ $1 == "-v" ]]; then
 65 |     verbose=1
 66 |     shift;
 67 |   fi
 68 | 
 69 | 
 70 |   if [[ $VERBOSE -eq 1 ]]; then
 71 |       verbose=1
 72 |   fi
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 |   if [[ $debug -eq 1 ]]; then
 80 |     echo "debug mode ..."
 81 |     echo "eval $1"
 82 |   else
 83 |     echo "normal mode ..."
 84 |     if [[ $verbose -eq 1 ]]; then
 85 |       echo -e "\nrun: $1\n-------------"
 86 |     fi
 87 | 
 88 |     eval $1
 89 |   fi
 90 | 
 91 | 
 92 |   local code=$?
 93 |   if [ $code -ne 0 ]; then
 94 | 	  echo "Exit code: $code"
 95 | 	  exit $code
 96 |   fi
 97 | }
 98 | 
 99 | 
100 | 
101 | 
102 | # handle the valid command line options
103 | DEBUG=0
104 | VERBOSE=0
105 | DEBUG_OPTION=""
106 | while getopts vdh opt
107 | do
108 |   case "$opt" in
109 |     v)
110 |       VERBOSE=1;;
111 | 
112 |     d)
113 |       DEBUG=1;;
114 | 
115 |     \?)
116 |       usage
117 |       exit 1;;
118 | 
119 |     h)
120 |       usage
121 |       exit 0;;
122 | 
123 |     :)
124 |       echo "option -$OPTARG requires an argument"
125 |       usage
126 |       exit 1;;
127 | 
128 |     esac
129 | done
130 | shift `expr $OPTIND - 1`
131 | 
132 | 
133 | # at this point $* contains the arguments after interpreting the options
134 | 
135 | d=$1
136 | 
137 | # if no arguments are specified, then just print usage
138 | if [[ $# -eq 0 ]]; then
139 |     usage
140 | fi
141 | 
142 | 
143 | 
144 | # debugging
145 | if [[ $DEBUG -eq 1 ]]; then
146 |     echo "debugging mode is on ..." 1>&2
147 |     DEBUG_OPTION="-d"
148 | fi
149 | 
150 | 
151 | 
152 | 
153 | 
154 | for file in $(find $d -name "*_conll"); do
155 | 
156 |   if [[ $file =~ "data/english/annotations" ]]; then
157 |     LANGUAGE=english
158 |   elif [[ $file =~ "data/chinese/annotations" ]]; then
159 |     LANGUAGE=chinese
160 |   else
161 |     LANGUAGE=arabic
162 |   fi
163 | 
164 | 
165 |   echo "language: $LANGUAGE"
166 | 
167 |   parse=${file/_conll/_parse}
168 |   echo "$file -> $parse ..."
169 |   conll2parse.py -l $LANGUAGE $file > $parse
170 | done
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | # complain if the exit status of the last command executed is non-zero
183 | if [[ $? != 0 ]]; then echo "the last command exited with a non-zero status" 1>&2; fi
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/conll.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import re
  6 | import tempfile
  7 | import subprocess
  8 | import operator
  9 | import collections
 10 | 
 11 | BEGIN_DOCUMENT_REGEX = re.compile(r"#begin document \((.*)\); part (\d+)")
 12 | COREF_RESULTS_REGEX = re.compile(r".*Coreference: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
 13 | 
 14 | def get_doc_key(doc_id, part):
 15 |   return "{}_{}".format(doc_id, int(part))
 16 | 
 17 | def output_conll(input_file, output_file, predictions, subtoken_map):
 18 |   prediction_map = {}
 19 |   for doc_key, clusters in predictions.items():
 20 |     start_map = collections.defaultdict(list)
 21 |     end_map = collections.defaultdict(list)
 22 |     word_map = collections.defaultdict(list)
 23 |     for cluster_id, mentions in enumerate(clusters):
 24 |       for start, end in mentions:
 25 |         start, end = subtoken_map[doc_key][start], subtoken_map[doc_key][end]
 26 |         if start == end:
 27 |           if cluster_id not in word_map[start]:
 28 |             word_map[start].append(cluster_id)
 29 |         else:
 30 |           if (((cluster_id, end) not in start_map[start])
 31 |               and ((cluster_id, start) not in end_map[end])):
 32 |             start_map[start].append((cluster_id, end))
 33 |             end_map[end].append((cluster_id, start))
 34 |     for k,v in start_map.items():
 35 |       start_map[k] = [cluster_id for cluster_id, end in sorted(v, key=operator.itemgetter(1), reverse=True)]
 36 |     for k,v in end_map.items():
 37 |       end_map[k] = [cluster_id for cluster_id, start in sorted(v, key=operator.itemgetter(1), reverse=True)]
 38 |     prediction_map[doc_key] = (start_map, end_map, word_map)
 39 |   word_index = 0
 40 |   for line in input_file.readlines():
 41 |     row = line.split()
 42 |     if len(row) == 0:
 43 |       output_file.write("\n")
 44 |     elif row[0].startswith("#"):
 45 |       begin_match = re.match(BEGIN_DOCUMENT_REGEX, line)
 46 |       if begin_match:
 47 |         doc_key = get_doc_key(begin_match.group(1), begin_match.group(2))
 48 |         start_map, end_map, word_map = prediction_map.get(doc_key,
 49 |                                                           ({}, {}, {}))
 50 |         word_index = 0
 51 |       output_file.write(line)
 52 |       output_file.write("\n")
 53 |     else:
 54 |       assert get_doc_key(row[0], row[1]) == doc_key
 55 |       coref_list = []
 56 |       if word_index in end_map:
 57 |         for cluster_id in end_map[word_index]:
 58 |           coref_list.append("{})".format(cluster_id))
 59 |       if word_index in word_map:
 60 |         for cluster_id in word_map[word_index]:
 61 |           coref_list.append("({})".format(cluster_id))
 62 |       if word_index in start_map:
 63 |         for cluster_id in start_map[word_index]:
 64 |           coref_list.append("({}".format(cluster_id))
 65 | 
 66 |       if len(coref_list) == 0:
 67 |         row[-1] = "-"
 68 |       else:
 69 |         row[-1] = "|".join(coref_list)
 70 | 
 71 |       output_file.write("   ".join(row))
 72 |       output_file.write("\n")
 73 |       word_index += 1
 74 | 
 75 | def official_conll_eval(gold_path, predicted_path, metric, official_stdout=False):
 76 |   cmd = ["conll-2012/scorer/v8.01/scorer.pl", metric, gold_path, predicted_path, "none"]
 77 |   process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 78 |   stdout, stderr = process.communicate()
 79 |   process.wait()
 80 | 
 81 |   stdout = stdout.decode("utf-8")
 82 |   if stderr is not None:
 83 |     print(stderr)
 84 | 
 85 |   if official_stdout:
 86 |     print("Official result for {}".format(metric))
 87 |     print(stdout)
 88 | 
 89 |   coref_results_match = re.match(COREF_RESULTS_REGEX, stdout)
 90 |   recall = float(coref_results_match.group(1))
 91 |   precision = float(coref_results_match.group(2))
 92 |   f1 = float(coref_results_match.group(3))
 93 |   return { "r": recall, "p": precision, "f": f1 }
 94 | 
 95 | def evaluate_conll(gold_path, predictions, subtoken_maps, official_stdout=False):
 96 |   with tempfile.NamedTemporaryFile(delete=False, mode="w") as prediction_file:
 97 |     with open(gold_path, "r") as gold_file:
 98 |       output_conll(gold_file, prediction_file, predictions, subtoken_maps)
 99 |     print("Predicted conll file: {}".format(prediction_file.name))
100 |   return { m: official_conll_eval(gold_file.name, prediction_file.name, m, official_stdout) for m in ("muc", "bcub", "ceafe") }
101 | 


--------------------------------------------------------------------------------
/conversion_scripts/ancor_to_json.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | from collections import defaultdict
  3 | import json
  4 | 
  5 | text_dir = sys.argv[1]
  6 | chains_dir = sys.argv[2]
  7 | mentions_dir = sys.argv[3]
  8 | output = sys.argv[4]
  9 | books = os.listdir(chains_dir)
 10 | 
 11 | output_file = open(output, 'w+')
 12 | 
 13 | def fix_bounds(token_dict, index, go_up=True):
 14 |     if index in token_dict:
 15 |         return token_dict[index]
 16 |     if go_up:
 17 |         return fix_bounds(token_dict, index + 1, go_up=go_up)
 18 |     else:
 19 |         return fix_bounds(token_dict, index - 1, go_up=go_up)
 20 | 
 21 | num_real_mentions = 0
 22 | num_mentions = 0
 23 | num_chains = 0
 24 | for book in books:
 25 |     try:
 26 |         tokens_file = open(text_dir + "/" + book, 'r')
 27 |     except:
 28 |         print (f"Skipping {book}")
 29 |         continue
 30 | 
 31 |     tokens_list = tokens_file.readlines()
 32 |     starts = {}
 33 |     ends = {}
 34 |     sent = []
 35 |     curr_doc_id = 0
 36 |     curr_doc = []
 37 |     doc_len = 0
 38 |     num_reals = 0
 39 |     for i, tokstr in enumerate(tokens_list):
 40 |         tokstr = tokstr.strip()
 41 |         sent_end = tokstr == ""
 42 |         if sent_end:
 43 |             curr_doc.append(sent)
 44 |             sent = []
 45 |         else:
 46 |             tok = tokstr.split("\t")
 47 |             sent.append(tok[3])
 48 |             starts[int(tok[1])] = doc_len
 49 |             ends[int(tok[1]) + int(tok[2])] = doc_len
 50 |             doc_len += 1
 51 | 
 52 |     if sent:
 53 |         curr_doc.append(sent)
 54 | 
 55 |     cluster_doc = open(chains_dir + "/" + book, 'r')
 56 |     clusters = defaultdict(list)
 57 |     seen_mentions = set()
 58 |     for line in cluster_doc:
 59 |         (mid, start, length, chain_id) = tuple([int(x) for x in line.strip().split()])
 60 |         left = fix_bounds(starts, start, go_up=True)
 61 |         right = fix_bounds(ends, start + length, go_up=False)
 62 |         if left > right:
 63 |             print (f"Died on {left}, {right}")
 64 |             right = left
 65 |         clusters[chain_id].append([left, right])
 66 |         if (left, right) not in seen_mentions:
 67 |             seen_mentions.add((left, right))
 68 |         else:
 69 |             print ("dupe")
 70 |             import pdb; pdb.set_trace()
 71 | 
 72 |     mentions_doc = open(mentions_dir + "/" + book, 'r')
 73 |     for i, line in enumerate(mentions_doc):
 74 |         (mid, start, length) = tuple([int(x) for x in line.strip().split()])
 75 |         num_real_mentions += 1
 76 |         num_reals += 1
 77 |         left = fix_bounds(starts, start, go_up=True)
 78 |         right = fix_bounds(ends, start + length, go_up=False)
 79 |         if left > right:
 80 |             print (f"And died on {left}, {right}")
 81 |             right = left
 82 |         if (left, right) not in seen_mentions:
 83 |             clusters[1000000 + i].append([left, right])
 84 | 
 85 | 
 86 |     num_chains += len(clusters)
 87 | 
 88 |     net_mentions = sum([len(c) for c in clusters.values()])
 89 |     num_mentions += net_mentions
 90 | 
 91 |     json_dict = {
 92 |         "doc_key": "ancor_" + book,
 93 |         "language": "russian",
 94 |         "sentences": curr_doc,
 95 |         "clusters": list(clusters.values()),
 96 |     }
 97 |     output_file.write(json.dumps(json_dict) + "\n")
 98 | 
 99 | print (num_real_mentions)
100 | print (num_mentions)
101 | print (num_chains)
102 | 


--------------------------------------------------------------------------------
/conversion_scripts/conll.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import re
  6 | import tempfile
  7 | import subprocess
  8 | import operator
  9 | import collections
 10 | 
 11 | BEGIN_DOCUMENT_REGEX = re.compile(r"#begin document \((.*)\); part (\d+)")
 12 | COREF_RESULTS_REGEX = re.compile(r".*Coreference: Recall: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tPrecision: \([0-9.]+ / [0-9.]+\) ([0-9.]+)%\tF1: ([0-9.]+)%.*", re.DOTALL)
 13 | 
 14 | def get_doc_key(doc_id, part):
 15 |   return "{}_{}".format(doc_id, int(part))
 16 | 
 17 | def output_conll(input_file, output_file, predictions, subtoken_map):
 18 |   prediction_map = {}
 19 |   for doc_key, clusters in predictions.items():
 20 |     start_map = collections.defaultdict(list)
 21 |     end_map = collections.defaultdict(list)
 22 |     word_map = collections.defaultdict(list)
 23 |     for cluster_id, mentions in enumerate(clusters):
 24 |       for start, end in mentions:
 25 |         start, end = subtoken_map[doc_key][start], subtoken_map[doc_key][end]
 26 |         if start == end:
 27 |           if cluster_id not in word_map[start]:
 28 |             word_map[start].append(cluster_id)
 29 |         else:
 30 |           if (((cluster_id, end) not in start_map[start])
 31 |               and ((cluster_id, start) not in end_map[end])):
 32 |             start_map[start].append((cluster_id, end))
 33 |             end_map[end].append((cluster_id, start))
 34 |     for k,v in start_map.items():
 35 |       start_map[k] = [cluster_id for cluster_id, end in sorted(v, key=operator.itemgetter(1), reverse=True)]
 36 |     for k,v in end_map.items():
 37 |       end_map[k] = [cluster_id for cluster_id, start in sorted(v, key=operator.itemgetter(1), reverse=True)]
 38 |     prediction_map[doc_key] = (start_map, end_map, word_map)
 39 |   word_index = 0
 40 |   for line in input_file.readlines():
 41 |     row = line.split()
 42 |     if len(row) == 0:
 43 |       output_file.write("\n")
 44 |     elif row[0].startswith("#"):
 45 |       begin_match = re.match(BEGIN_DOCUMENT_REGEX, line)
 46 |       if begin_match:
 47 |         doc_key = get_doc_key(begin_match.group(1), begin_match.group(2))
 48 |         start_map, end_map, word_map = prediction_map.get(doc_key,
 49 |                                                           ({}, {}, {}))
 50 |         word_index = 0
 51 |       output_file.write(line)
 52 |       output_file.write("\n")
 53 |     else:
 54 |       assert get_doc_key(row[0], row[1]) == doc_key
 55 |       coref_list = []
 56 |       if word_index in end_map:
 57 |         for cluster_id in end_map[word_index]:
 58 |           coref_list.append("{})".format(cluster_id))
 59 |       if word_index in word_map:
 60 |         for cluster_id in word_map[word_index]:
 61 |           coref_list.append("({})".format(cluster_id))
 62 |       if word_index in start_map:
 63 |         for cluster_id in start_map[word_index]:
 64 |           coref_list.append("({}".format(cluster_id))
 65 | 
 66 |       if len(coref_list) == 0:
 67 |         row[-1] = "-"
 68 |       else:
 69 |         row[-1] = "|".join(coref_list)
 70 | 
 71 |       output_file.write("   ".join(row))
 72 |       output_file.write("\n")
 73 |       word_index += 1
 74 | 
 75 | def official_conll_eval(gold_path, predicted_path, metric, official_stdout=False):
 76 |   cmd = ["conll-2012/scorer/v8.01/scorer.pl", metric, gold_path, predicted_path, "none"]
 77 |   process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 78 |   stdout, stderr = process.communicate()
 79 |   process.wait()
 80 | 
 81 |   stdout = stdout.decode("utf-8")
 82 |   if stderr is not None:
 83 |     print(stderr)
 84 | 
 85 |   if official_stdout:
 86 |     print("Official result for {}".format(metric))
 87 |     print(stdout)
 88 | 
 89 |   coref_results_match = re.match(COREF_RESULTS_REGEX, stdout)
 90 |   recall = float(coref_results_match.group(1))
 91 |   precision = float(coref_results_match.group(2))
 92 |   f1 = float(coref_results_match.group(3))
 93 |   return { "r": recall, "p": precision, "f": f1 }
 94 | 
 95 | def evaluate_conll(gold_path, predictions, subtoken_maps, official_stdout=False):
 96 |   with tempfile.NamedTemporaryFile(delete=False, mode="w") as prediction_file:
 97 |     with open(gold_path, "r") as gold_file:
 98 |       output_conll(gold_file, prediction_file, predictions, subtoken_maps)
 99 |     print("Predicted conll file: {}".format(prediction_file.name))
100 |   return { m: official_conll_eval(gold_file.name, prediction_file.name, m, official_stdout) for m in ("muc", "bcub", "ceafe") }
101 | 


--------------------------------------------------------------------------------
/conversion_scripts/convert_arrau.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import json
  4 | import xml.etree.ElementTree as ET
  5 | from collections import defaultdict
  6 | 
  7 | rst_path = "/exp/pxia/incremental_coref/data/LDC2013T22/data/RST_DTreeBank"
  8 | output_path = sys.argv[1]
  9 | 
 10 | coref_drop = 0
 11 | drop_counter = 0
 12 | min_drop = 0
 13 | total = 0
 14 | def get_files(split):
 15 |   path = f"{rst_path}/{split}/MMAX"
 16 |   files = []
 17 |   for f in os.listdir(path):
 18 |     if ".header" in f:
 19 |       files.append(f.split(".header")[0])
 20 |   return files
 21 | 
 22 | def get_offsets(span):
 23 |   if ".." in span:
 24 |     start, end = tuple(span.split(".."))
 25 |   else:
 26 |     start = span
 27 |     end = span
 28 |   return (start, end)
 29 | 
 30 | def xml_words(word_path):
 31 |   tree = ET.parse(word_path)
 32 |   root = tree.getroot()
 33 |   words = root.iter()
 34 |   tokens = []
 35 |   token_names = []
 36 |   for word in words:
 37 |     if "id" in word.attrib:
 38 |       word_id = word.attrib["id"]
 39 |       word_text = word.text
 40 |       tokens.append(word_text)
 41 |       token_names.append(word_id)
 42 |   token_index_map = dict({name: i for i, name in enumerate(token_names)})
 43 |   return (tokens, token_index_map)
 44 | 
 45 | def xml_sentences(sentence_path, token_list, token_map):
 46 |   tree = ET.parse(sentence_path)
 47 |   root = tree.getroot()
 48 |   sentence_iter = root.iter()
 49 |   sentences = []
 50 |   for sentence in sentence_iter:
 51 |     if "id" in sentence.attrib:
 52 |       span = sentence.attrib["span"]
 53 |       start, end = get_offsets(span)
 54 |       tokens = token_list[token_map[start]:token_map[end] + 1]
 55 |       sentences.append(tokens)
 56 |   return sentences
 57 | 
 58 | def xml_markables(path, token_list, token_map):
 59 |   tree = ET.parse(path)
 60 |   root = tree.getroot()
 61 |   markable_iter = root.iter()
 62 |   markables = {}
 63 |   for markable in markable_iter:
 64 |     if "id" in markable.attrib:
 65 |       markable_id = markable.attrib["id"]
 66 |       span = markable.attrib["span"]
 67 |       start, end = get_offsets(span)
 68 |       markables[markable_id] = (token_map[start], token_map[end])
 69 |   return markables
 70 | 
 71 | 
 72 | def xml_coref(path, token_list, token_map):
 73 |   global drop_counter
 74 |   global coref_drop
 75 |   global min_drop
 76 |   global total
 77 |   tree = ET.parse(path)
 78 |   root = tree.getroot()
 79 |   coref_iter = root.iter()
 80 |   clusters = defaultdict(list)
 81 |   for markable in coref_iter:
 82 |     if "id" in markable.attrib:
 83 |       total += 1
 84 |       markable_id = markable.attrib["id"]
 85 |       span = markable.attrib["span"]
 86 |       if "coref_set" not in markable.attrib:
 87 |         coref_drop += 1
 88 |         continue
 89 |       if ("," in span):
 90 |         if "min_ids" not in markable.attrib:
 91 |           min_drop += 1
 92 |           continue
 93 |         else:
 94 |           span = markable.attrib["min_ids"]
 95 |         drop_counter += 1
 96 |       if ".." in span:
 97 |         start, end = tuple(span.split(".."))
 98 |       else:
 99 |         start = span
100 |         end = span
101 |       clusters[markable.attrib["coref_set"]].append([token_map[start], token_map[end]])
102 |   coref_clusters = list(clusters.values())
103 |   return coref_clusters
104 | 
105 | 
106 | def get_markables(path, prefix):
107 |   coref_path = f"{path}/markables/{prefix}_coref_level.xml"
108 |   # markable_path = f"{path}/markables/{prefix}_markable_level.xml"
109 |   sentence_path = f"{path}/markables/{prefix}_sentence_level.xml"
110 |   words_path = f"{path}/Basedata/{prefix}_words.xml"
111 |   words = xml_words(words_path)
112 |   sentences = xml_sentences(sentence_path, words[0], words[1])
113 |   # markable = xml_markables(markable_path, words[0], words[1])
114 |   coref = xml_coref(coref_path, words[0], words[1])
115 |   return (sentences, coref)
116 | 
117 | def process_split(split):
118 |   files = get_files(split)
119 |   path = f"{rst_path}/{split}/MMAX"
120 |   all_files = []
121 |   for prefix in files:
122 |     text, clusters = get_markables(path, prefix)
123 |     new_dictionary = {
124 |       "doc_key": prefix,
125 |       "sentences": text,
126 |       "clusters": clusters
127 |     }
128 |     all_files.append(new_dictionary)
129 |   output = open(f"{output_path}/{split}.jsonlines", 'w+')
130 |   output.write("\n".join([json.dumps(doc) for doc in all_files]))
131 | 
132 | 
133 | if __name__ == "__main__":
134 |   process_split("train")
135 |   print(total, drop_counter, coref_drop, min_drop)
136 |   process_split("dev")
137 |   print(total, drop_counter, coref_drop, min_drop)
138 |   process_split("test")
139 |   print(total, drop_counter, coref_drop, min_drop)
140 | 


--------------------------------------------------------------------------------
/conversion_scripts/convert_tf_to_pytorch.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import torch
 3 | import sys
 4 | # from torch_scores import *
 5 | 
 6 | tf_path = sys.argv[1]
 7 | init_vars = tf.train.list_variables(tf_path)
 8 | torch_scorer_vars = {}
 9 | ffnn_vars = ["hidden_bias_0", "hidden_weights_0",
10 |              "output_bias", "output_weights"]
11 | torch_scorer_var_names = (
12 |   [
13 |     "coref_layer/antecedent_distance_emb",
14 |     "coref_layer/same_speaker_emb",
15 |     "coref_layer/segment_distance/segment_distance_embeddings",
16 |     "span_width_prior_embeddings",
17 |     "genre_embeddings",
18 |     "span_width_embeddings",
19 |   ] + [
20 |       "coref_layer/slow_antecedent_scores/{}".format(ffnn_var)
21 |       for ffnn_var in ffnn_vars
22 |   ] +
23 |   ["mention_word_attn/{}".format(ffnn_var) for ffnn_var in ffnn_vars] +
24 |   ["mention_scores/{}".format(ffnn_var) for ffnn_var in ffnn_vars] +
25 |   ["width_scores/{}".format(ffnn_var) for ffnn_var in ffnn_vars])
26 | 
27 | for name, shape in init_vars:
28 |   if name not in torch_scorer_var_names:
29 |     continue
30 |   print("Loading TF weight {} with shape {}".format(name, shape))
31 |   array = tf.train.load_variable(tf_path, name)
32 |   torch_scorer_vars[name] = torch.from_numpy(array.squeeze())
33 | 
34 | torch.save(torch_scorer_vars, sys.argv[2])
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/conversion_scripts/make_qbcoref_splits.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | 
 4 | f = open(sys.argv[1], 'r')
 5 | docs = list(f)
 6 | 
 7 | train, dev, test = [], [], []
 8 | for i in range(5):
 9 |   train.append(list(docs[:240]))
10 |   dev.append(list(docs[240:320]))
11 |   test.append(list(docs[320:400]))
12 |   docs = docs[80:] + docs[:80]
13 | 
14 | 
15 | docs = [json.loads(l) for l in f]
16 | everything = list(zip(train, dev, test))
17 | 
18 | for i, (train_split, dev_split, test_split) in enumerate(everything):
19 |   train_f =open(f"train.{i}.jsonlines", 'w+')
20 |   dev_f =open(f"dev.{i}.jsonlines", 'w+')
21 |   test_f =open(f"test.{i}.jsonlines", 'w+')
22 | 
23 |   train_f.write("".join(train_split))
24 |   dev_f.write("".join(dev_split))
25 |   test_f.write("".join(test_split))
26 | 


--------------------------------------------------------------------------------
/conversion_scripts/make_sara_splits.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | 
 4 | f = open(sys.argv[1], 'r')
 5 | train, dev, test = [], [], []
 6 | 
 7 | 
 8 | docs = [json.loads(l) for l in f]
 9 | for i in range(7):
10 |   train.append("\n".join([json.dumps(doc) for doc in docs if doc["split"] not in [i, (i+1) % 7]]))
11 |   dev.append("\n".join([json.dumps(doc) for doc in docs if doc["split"] == i]))
12 |   test.append("\n".join([json.dumps(doc) for doc in docs if doc["split"] == (i+1) % 7]))
13 | 
14 | unattested = [doc for doc in docs if doc["split"] not in [0, 1, 2, 3, 4, 5, 6, 7]]
15 | print (unattested)
16 | import pdb; pdb.set_trace()
17 | 
18 | everything = list(zip(train, dev, test))
19 | 
20 | for i, (train_split, dev_split, test_split) in enumerate(everything):
21 |   train_f =open(f"train.{i}.jsonlines", 'w+')
22 |   dev_f =open(f"dev.{i}.jsonlines", 'w+')
23 |   test_f =open(f"test.{i}.jsonlines", 'w+')
24 | 
25 |   train_f.write("".join(train_split))
26 |   dev_f.write("".join(dev_split))
27 |   test_f.write("".join(test_split))
28 | 


--------------------------------------------------------------------------------
/conversion_scripts/map_preco.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | input_file = open(sys.argv[1], 'r')
 5 | output_file = open(sys.argv[2], 'w+')
 6 | for line in input_file:
 7 |     d = json.loads(line)
 8 |     clusters = d["mention_clusters"]
 9 |     sentences = d["sentences"]
10 |     len_map = [len(sentence) for sentence in sentences]
11 |     cum_sum = [sum(len_map[:i]) for i in range(len(sentences))]
12 |     remap_clusters = lambda x: [cum_sum[x[0]] + x[1],
13 |                                 cum_sum[x[0]] + x[2]]
14 |     mapped_clusters = [[remap_clusters(span) for span in cluster]
15 |                        for cluster in clusters]
16 |     d["clusters"] = mapped_clusters
17 |     # print (mapped_clusters)
18 |     output_file.write(json.dumps(d) + "\n")
19 | 


--------------------------------------------------------------------------------
/conversion_scripts/remove_es_trace.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | 
 4 | old_file = open(sys.argv[1], 'r')
 5 | new_file = sys.argv[2]
 6 | 
 7 | def fix_clusters(cluster, traces, rev_map):
 8 |     new_spans = []
 9 |     for span in cluster:
10 |         if span[0] == span[1] and span[0] in traces:
11 |             continue
12 |         elif span[0] in traces:
13 |             new_spans.append([rev_map[span[0] + 1], rev_map[span[1]]])
14 |         elif span[1] in traces:
15 |             new_spans.append([rev_map[span[0]], rev_map[span[1] - 1]])
16 |         else:
17 |             new_spans.append([rev_map[span[0]], rev_map[span[1]]])
18 |     return new_spans
19 | 
20 | def remove_trace(jsondict):
21 |     tokens = [tok for sent in jsondict["sentences"] for tok in sent]
22 |     traces = []
23 |     new_idx = []
24 |     for i, tok in enumerate(tokens):
25 |         if tok == "\u2581_":
26 |             traces.append(i)
27 |         else:
28 |             new_idx.append(i)
29 |     rev_map = {j: i for i, j in enumerate(new_idx)}
30 |     # Fix clusters
31 |     new_spans = [fix_clusters(c, traces, rev_map) for c in jsondict["clusters"]]
32 |     new_clusters = [c for c in new_spans if len(c) > 0]
33 |     # Fix sentence
34 |     sentences = [[word for word in sentence if word != "\u2581_"] for sentence in jsondict["sentences"]]
35 |     # Fix sentence_map - this should be correct
36 |     sentence_map = [u for i, u in enumerate(jsondict["sentence_map"]) if i not in traces]
37 |     # Fix subtoken map
38 |     subtokens = []
39 |     offset = 0
40 |     for i, subtok_idx in enumerate(jsondict["subtoken_map"]):
41 |         if i in traces:
42 |             offset += 1
43 |         else:
44 |             subtokens.append(subtok_idx - offset)
45 |     return {
46 |         "doc_key": jsondict["doc_key"],
47 |         "sentences": sentences,
48 |         "clusters": new_clusters,
49 |         "sentence_map": sentence_map,
50 |         "subtoken_map": subtokens,
51 |         "langauge": "spanish",
52 |     }
53 | 
54 | 
55 | docs = [json.loads(l) for l in old_file]
56 | output = open(new_file, "w+")
57 | for line in docs:
58 |     output.write(json.dumps(remove_trace(line)) + "\n")
59 | 


--------------------------------------------------------------------------------
/conversion_scripts/rucor_to_json.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | from collections import defaultdict
 4 | 
 5 | groups_file = open(sys.argv[1], 'r')
 6 | tokens_file = open(sys.argv[2], 'r')
 7 | doc_file = open(sys.argv[3], 'r')
 8 | output_file = open(sys.argv[4], 'w+')
 9 | # tokens_list = csv.reader(tokens_file, delimiter="\t")
10 | 
11 | tokens_list = tokens_file.readlines()
12 | groups_list = groups_file.readlines()
13 | doc_list = doc_file.readlines()
14 | 
15 | documents = {}
16 | starts = defaultdict(dict)
17 | ends = defaultdict(dict)
18 | doc_lens = defaultdict(int)
19 | sent = []
20 | curr_doc_id = 0
21 | curr_doc = []
22 | 
23 | for i, tokstr in enumerate(tokens_list[1:]):
24 |     tok = tokstr.strip().split("\t")
25 |     doc_id = int(tok[0])
26 |     if doc_id != curr_doc_id:
27 |         # shift
28 |         print (doc_id)
29 |         if sent:
30 |             curr_doc.append(sent)
31 |         documents[curr_doc_id] = curr_doc
32 |         curr_doc_id = doc_id
33 |         curr_doc = []
34 | 
35 |     # if i < 31800 and i > 31400:
36 |     #     print (i, doc_id, tok[1])
37 |     sent_end = tok[5] == "SENT"
38 |     sent.append(tok[3])
39 |     starts[doc_id][int(tok[1])] = doc_lens[doc_id]
40 |     ends[doc_id][int(tok[1]) + int(tok[2])] = doc_lens[doc_id]
41 |     doc_lens[doc_id] += 1
42 |     if sent_end:
43 |         curr_doc.append(sent)
44 |         sent = []
45 | 
46 | if sent:
47 |     curr_doc.append(sent)
48 | documents[curr_doc_id] = curr_doc
49 | curr_doc_id = doc_id
50 | curr_doc = []
51 | 
52 | clusters = defaultdict(list)
53 | chains = defaultdict(set)
54 | for groupstr in groups_list[1:]:
55 |     group = groupstr.split("\t")
56 |     doc_id = int(group[0])
57 |     chain = group[3]
58 |     start = int(group[5])
59 |     end = int(group[5]) + int(group[6])
60 |     if group[2] == "475746":
61 |         end += 1 # there is an error there
62 |     if int(chain) == 0:
63 |         chain = doc_id * 100000 + 1
64 |     chains[doc_id].add(int(chain))
65 |     clusters[int(chain)].append([starts[doc_id][start],
66 |                                  ends[doc_id][end]])
67 | 
68 | for docstr in doc_list[1:]:
69 |     doc = docstr.split("\t")
70 |     doc_id = int(doc[0])
71 |     doc_key = "rucor_" + doc[1].replace("/", "_")
72 |     language = "russian"
73 |     tokens = documents[doc_id]
74 |     doc_clusters = [clusters[cid] for cid in chains[doc_id]]
75 |     json_dict = {
76 |         "doc_key": doc_key,
77 |         "language": language,
78 |         "sentences": tokens,
79 |         "clusters": doc_clusters,
80 |     }
81 |     output_file.write(json.dumps(json_dict) + "\n")
82 | 


--------------------------------------------------------------------------------
/conversion_scripts/util.py:
--------------------------------------------------------------------------------
1 | def flatten(l):
2 |   return [item for sublist in l for item in sublist]
3 | 


--------------------------------------------------------------------------------
/domain/ar_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | encoders = ["xlmr", "da"]
 6 | # encoders = ["da"]
 7 | layers = [25]
 8 | num_samples = [0, 10, 25, 50, 100, 250, 359]
 9 | GOLD = ["true", "false"]
10 | trials = [0]
11 | GPUs = [4, 5, 6, 7]
12 | 
13 | log_location = "onml_curve_ar_q1_"
14 | job_names = [(f"onml_curve_ar_{encoder}_{layers}_{num_samples}_{trial}{'_gold' if gold == 'true' else ''}", gold)
15 |              for (encoder, layers, num_samples, trial, gold) in
16 |              itertools.product(encoders, layers, num_samples, trials, GOLD)]
17 | 
18 | def add_job(name, gold, job_list):
19 |   job_list.append(
20 |     {"name": name,
21 |      "log_location": log_location,
22 |      "load_path": args.ckpt if not gold else args.gold_ckpt,
23 |      "test_set": "true",
24 |    })
25 | 
26 | if __name__ == "__main__":
27 |   parser = argparse.ArgumentParser()
28 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
29 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
30 |   parser.add_argument("-g", "--gold_ckpt", type=str, help="Location gold checkpoint to continue from")
31 |   args = parser.parse_args()
32 |   job_list = []
33 |   for name, gold in job_names:
34 |     add_job(name, gold, job_list)
35 |   config = {
36 |     "command": "trainer.py",
37 |     "jobs": job_list,
38 |   }
39 | 
40 |   start_jobs(config, GPUs)
41 | 


--------------------------------------------------------------------------------
/domain/arrau_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | GOLD = True
 6 | encoders = ["fb", "on", "da", "onb"]
 7 | # encoders = ["da"]
 8 | layers = [25]
 9 | num_samples = [10, 20, 40, 80, 160, 335]
10 | trials = [0]
11 | GPUs = [1, 2, 3, 5, 6, 7]
12 | 
13 | 
14 | log_location = f"arrau_data_curve_q1_{'gold_' if GOLD else ''}"
15 | job_names = [f"arrau_curve_{encoder}_{layers}_{num_samples}_{trial}"
16 |              for (encoder, layers, num_samples, trial) in
17 |              itertools.product(encoders, layers, num_samples, trials)]
18 | 
19 | if __name__ == "__main__":
20 |   parser = argparse.ArgumentParser()
21 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
22 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
23 |   args = parser.parse_args()
24 |   config = {
25 |     "log_location": log_location + args.pretrained_model,
26 |     "load_path": args.ckpt,
27 |     "test_set": "true",
28 |   }
29 |   if GOLD:
30 |     config["mentions"] = "false"
31 |     config["use_gold_spans"] = "true"
32 | 
33 |   start_jobs(job_names, GPUs, config)
34 | 


--------------------------------------------------------------------------------
/domain/base_data_curve.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import queue
 3 | import subprocess
 4 | import os
 5 | import errno
 6 | from threading import Thread
 7 | import logging
 8 | 
 9 | logging.basicConfig(
10 |   level=logging.INFO,
11 |   format="%(asctime)s [%(levelname)s] %(message)s",
12 |   datefmt="%Y-%m-%d %H:%M:%S",
13 | )
14 | 
15 | def mkdirs(path):
16 |   try:
17 |     os.makedirs(path)
18 |   except OSError as exception:
19 |     if exception.errno != errno.EEXIST:
20 |       raise
21 |   return path
22 | 
23 | def manage_gpu(q, gpu, command):
24 |   """
25 |   This function is called per thread. It finds the next unclaimed job off the
26 |   queue and starts running the command passed in. The command is usually
27 |   `trainer.py` but it could also be `inference.py`.
28 |   q: a Queue of job configs defined in start_jobs. Each job config needs:
29 |     name: name of jsonnet exp config
30 |     log_location: location of logs
31 |     In addition, any key that needs to be overriden should be passed in the config.
32 |   gpu: the number (e.g. 0) of the cuda device
33 |   command: a single python function, e.g. `trainer.py`
34 |   """
35 |   new_env = os.environ.copy()
36 |   new_env["CUDA_VISIBLE_DEVICES"] = str(gpu)
37 |   while not q.empty():
38 |     config = q.get()
39 |     job = config["name"]
40 |     mkdirs(f"/srv/local1/paxia/subprocess_logs/{config['log_location']}")
41 |     logging.info(f"getting next task {job} on {gpu} ({q.qsize()} remain)")
42 |     err_file = open(f"/srv/local1/paxia/subprocess_logs/{config['log_location']}/e.{job}", "a+")
43 |     command_list = (["python", f"/srv/local1/paxia/incremental_coref/{command}", job] +
44 |                     [f"{k}={v}" for k, v in config.items()])
45 |     subprocess.run(command_list, env=new_env, stdout=err_file, stderr=err_file)
46 |     err_file.close()
47 |     logging.info(f"finished {job} at {config['log_location']}")
48 |     q.task_done()
49 | 
50 | def start_jobs(config, gpus):
51 |   """
52 |   This function takes an experiment config and a list of available GPUs (ids)
53 |   and sets up a queue before launching all of them until the queue is empty.
54 |   config: a dict containing
55 |     jobs: list of job configs to be enqueued
56 |     command: Optional, command that is ultimately called (`inference.py` or `trainer.py`)
57 | a list of job configs (dict) containing keys for:
58 |   gpus: a list of integers.
59 |   """
60 |   if "command" not in config:
61 |     config["command"] = "trainer.py"
62 |   q = queue.Queue()
63 |   for job_config in config["jobs"]:
64 |     q.put(job_config)
65 |   logging.info(f"Queued {q.qsize()} jobs")
66 |   for gpu in gpus:
67 |     time.sleep(1)
68 |     worker = Thread(target=manage_gpu, args=(q, gpu, config["command"]))
69 |     logging.info(f"Starting {gpu}")
70 |     worker.start()
71 |   logging.info("Waiting for all to finish")
72 |   q.join()
73 |   logging.info("Finished")
74 | 


--------------------------------------------------------------------------------
/domain/layers_data_curve.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import itertools
  3 | from base_data_curve import start_jobs
  4 | 
  5 | path_model = {}
  6 | 
  7 | # Base models
  8 | path_model["on"] = "/srv/local1/paxia/exp_logs/public_icoref/ontonotes/checkpoint.bin"
  9 | path_model["preco"] = "/srv/local1/paxia/exp_logs/public_icoref/preco/checkpoint.bin"
 10 | path_model["en"] = "/srv/local1/paxia/exp_logs/public_icoref/ontonotes_en/checkpoint.bin"
 11 | 
 12 | 
 13 | # ENCODERS
 14 | # base_encoders = ["onb"]
 15 | # qb_encoders = ["fb", "on", "da"]
 16 | # lb_encoders = ["fb", "on", "da"]
 17 | qb_encoders = ["da"]
 18 | lb_encoders = ["da"]
 19 | zh_encoders = ["xlmr", "da"]
 20 | 
 21 | 
 22 | # LAYERS
 23 | large_layers = [0, 6, 12]
 24 | base_layers = [0, 3, 6]
 25 | 
 26 | # DATA
 27 | qb_samples = [15, 60, 240]
 28 | lb_samples = [10, 40, 80]
 29 | zh_samples = [50, 500, 1810]
 30 | 
 31 | trials = [0]
 32 | GPUs = [2, 3, 4, 5, 6, 7]
 33 | 
 34 | def add_job(name, encoder, lang, job_list):
 35 |   if encoder == "da" and lang == "en":
 36 |     job_list.append(
 37 |       {"name": name,
 38 |        "log_location": log_location + "preco",
 39 |        "load_path": path_model["preco"],
 40 |        "test_set": "true",
 41 |        })
 42 |     # job_list.append(
 43 |     #   {"name": name,
 44 |     #    "log_location": log_location + "on",
 45 |     #    "load_path": path_model["on"],
 46 |     #    "test_set": "true",
 47 |     #    })
 48 |   elif encoder == "da" and lang == "zh":
 49 |     job_list.append(
 50 |       {"name": name,
 51 |        "log_location": log_location + "onen",
 52 |        "load_path": path_model["en"],
 53 |        "test_set": "true",
 54 |        })
 55 |   else:
 56 |     job_list.append(
 57 |       {"name": name,
 58 |        "log_location": log_location + "_pretrained",
 59 |        "load_path": path_model["preco"],
 60 |        "test_set": "true",
 61 |        })
 62 | 
 63 | 
 64 | log_location = "layer_exps_da_"
 65 | job_list = []
 66 | # QB Jobs
 67 | large_job_names = [(f"qb_curve_{encoder}_0_{layers}_{num_samples}_{trial}", encoder)
 68 |                    for (encoder, layers, num_samples, trial) in
 69 |                    itertools.product(["da"], large_layers, qb_samples, trials)]
 70 | # small_job_names = [(f"qb_curve_{encoder}_0_{layers}_{num_samples}_{trial}", encoder)
 71 | #                    for (encoder, layers, num_samples, trial) in
 72 | #                    itertools.product(base_encoders, base_layers, qb_samples, trials)]
 73 | # for job, encoder in large_job_names: #+ small_job_names:
 74 | #   add_job(job, encoder, "en", job_list)
 75 | 
 76 | # litbank Jobs
 77 | large_job_names = [(f"litbank_curve_{encoder}_0_{layers}_{num_samples}_{trial}", encoder)
 78 |                    for (encoder, layers, num_samples, trial) in
 79 |                    itertools.product(lb_encoders, large_layers, lb_samples, trials)]
 80 | # small_job_names = [(f"litbank_curve_{encoder}_0_{layers}_{num_samples}_{trial}", encoder)
 81 | #                    for (encoder, layers, num_samples, trial) in
 82 | #                    itertools.product(base_encoders, base_layers, lb_samples, trials)]
 83 | 
 84 | # for job, encoder in large_job_names: # + small_job_names:
 85 | #   add_job(job, encoder, "en", job_list)
 86 | 
 87 | # zh
 88 | tmp_layers = [6]
 89 | large_job_names = [(f"onml_curve_zh_{encoder}_{layers}_{num_samples}_{trial}", encoder)
 90 |                    for (encoder, layers, num_samples, trial) in
 91 |                    itertools.product(zh_encoders, tmp_layers, zh_samples, trials)]
 92 | 
 93 | for job, encoder in large_job_names:
 94 |   add_job(job, encoder, "zh", job_list)
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |   parser = argparse.ArgumentParser()
 99 |   log_location = log_location
100 |   config = {
101 |     "command": "trainer.py",
102 |     "jobs": job_list,
103 |   }
104 |   start_jobs(config, GPUs)
105 | 


--------------------------------------------------------------------------------
/domain/litbank_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | GOLD = False
 6 | # encoders = ["fb", "on", "da", "onb"]
 7 | 
 8 | encoders = ["da"]
 9 | layers = [25]
10 | splits = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
11 | num_samples = [5, 10, 20, 40, 80]
12 | trials = [0]
13 | GPUs = [1, 2, 3, 4, 5, 7]
14 | 
15 | def add_job(name, gold, job_list):
16 |   job_list.append(
17 |     {"name": name,
18 |      "log_location": log_location + "on2pc",
19 |      "load_path": args.ckpt,
20 |      "test_set": "true",
21 |    })
22 | 
23 | log_location = f"litbank_data_curve_q1_{'gold_' if GOLD else ''}"
24 | job_names = [(f"litbank_curve_{encoder}_{split}_{layers}_{num_samples}_{trial}", GOLD)
25 |              for (split, encoder, layers, num_samples, trial) in
26 |              itertools.product(splits, encoders, layers, num_samples, trials)]
27 | 
28 | if __name__ == "__main__":
29 |   parser = argparse.ArgumentParser()
30 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
31 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
32 |   # parser.add_argument("-g", "--gold_ckpt", type=str, help="Location gold checkpoint to continue from")
33 |   args = parser.parse_args()
34 |   job_list = []
35 |   for name, gold in job_names:
36 |     add_job(name, gold, job_list)
37 |   config = {
38 |     "command": "trainer.py",
39 |     "jobs": job_list,
40 |   }
41 | 
42 |   start_jobs(config, GPUs)
43 | 


--------------------------------------------------------------------------------
/domain/preco_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | encoders = ["fb", "on", "da"]#, "onb"]
 6 | # encoders = ["onb"]
 7 | layers = [25]
 8 | num_samples = [5, 10, 25, 50, 100, 250, 500]
 9 | trials = [0]
10 | GPUs = [0, 1, 2, 3, 4, 5, 6, 7]
11 | 
12 | log_location = "preco_data_curve_q1_inference_"
13 | job_names = [f"preco_curve_{encoder}_{layers}_{num_samples}_{trial}"
14 |              for (layers, encoder, num_samples, trial) in
15 |              itertools.product(layers, encoders, num_samples, trials)]
16 | 
17 | if __name__ == "__main__":
18 |   parser = argparse.ArgumentParser()
19 |   # parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
20 |   # parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
21 |   args = parser.parse_args()
22 |   config = {
23 |     "log_location": log_location, # + args.pretrained_model,
24 |     "load_model": "true",
25 |     # "load_path": args.ckpt,
26 |     "test_set": "true",
27 |     "command": "inference.py"
28 |   }
29 |   # print(job_names)
30 |   start_jobs(job_names, GPUs, config)
31 | 


--------------------------------------------------------------------------------
/domain/preco_dev_analysis.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | encoders = ["on", "da"]
 6 | layers = [25]
 7 | # num_samples = [500, 250, 150, 1, 5, 15, 25, 50, 100]
 8 | num_samples = [10]
 9 | trials = [0]
10 | GPUs = [0, 1, 2, 3, 4, 5, 6, 7]
11 | 
12 | 
13 | log_location = "preco_dev_analysis_q2_"
14 | job_names = [f"preco_curve_{encoder}_{layers}_{num_samples}_{trial}"
15 |              for (layers, num_samples, encoder, trial) in
16 |              itertools.product(layers, num_samples, encoders, trials)]
17 | 
18 | 
19 | if __name__ == "__main__":
20 |   parser = argparse.ArgumentParser()
21 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
22 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
23 |   args = parser.parse_args()
24 |   config = {
25 |     "log_location": log_location + args.pretrained_model,
26 |     "load_path": args.ckpt,
27 |     "test_set": "true",
28 |     "num_epochs": 60,
29 |     "patience": 999,
30 |     "dev_path": "/srv/local1/paxia/incremental_coref/data/preco/nontrain.512.jsonlines",
31 |   }
32 | 
33 |   start_jobs(job_names, GPUs, config)
34 | 


--------------------------------------------------------------------------------
/domain/qbcoref_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | GOLD = True
 6 | encoders = ["fb", "on", "da", "onb"]
 7 | 
 8 | layers = [25]
 9 | splits = [0, 1, 2, 3, 4]
10 | num_samples = [5, 15, 30, 60, 120, 240]
11 | trials = [0]
12 | GPUs = [0, 1, 2, 3, 4, 5, 6, 7]
13 | 
14 | log_location = f"qbcoref_data_curve_q1_{'gold_' if GOLD else ''}"
15 | job_names = [f"qb_curve_{encoder}_{split}_{layers}_{num_samples}_{trial}"
16 |              for (encoder, split, layers, num_samples, trial) in
17 |              itertools.product(encoders, splits, layers, num_samples, trials)]
18 | 
19 | if __name__ == "__main__":
20 |   parser = argparse.ArgumentParser()
21 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
22 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
23 |   args = parser.parse_args()
24 |   config = {
25 |     "log_location": log_location + "_" + args.pretrained_model,
26 |     "load_path": args.ckpt,
27 |     "test_set": "true",
28 |   }
29 |   if GOLD:
30 |     config["mentions"] = "false"
31 |     config["use_gold_spans"] = "true"
32 | 
33 |   start_jobs(job_names, GPUs, config)
34 | 


--------------------------------------------------------------------------------
/domain/sara_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | # encoders = ["on", "da", "fb", "onb"]
 6 | # encoders = ["da", "on"]
 7 | encoders = ["fb", "onb"]
 8 | layers = [25]
 9 | splits = [0, 1, 2, 3, 4, 5, 6]
10 | num_samples = [0, 10, 20, 40, 80, 200]
11 | GOLD = ["true", "false"]
12 | trials = [0]
13 | GPUs = [5, 6, 7]
14 | 
15 | log_location = "sara_q1_"
16 | job_names = [(f"sara_curve_{encoder}_{split}_{layers}_{num_samples}_{trial}{'_gold' if gold == 'true' else ''}", gold)
17 |              for (encoder, split, layers, num_samples, trial, gold) in
18 |              itertools.product(encoders, splits, layers, num_samples, trials, GOLD)]
19 | 
20 | def add_job(name, gold, job_list):
21 |   job_list.append(
22 |     {"name": name,
23 |      "log_location": log_location,
24 |      "load_path": args.ckpt if not gold else args.gold_ckpt,
25 |      "test_set": "true",
26 |      "save_small_model": "true",
27 |    })
28 | 
29 | if __name__ == "__main__":
30 |   parser = argparse.ArgumentParser()
31 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
32 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
33 |   parser.add_argument("-g", "--gold_ckpt", type=str, help="Location gold checkpoint to continue from")
34 |   args = parser.parse_args()
35 |   log_location = log_location +  args.pretrained_model
36 |   job_list = []
37 |   for name, gold in job_names:
38 |     add_job(name, gold, job_list)
39 |   config = {
40 |     "command": "trainer.py",
41 |     "jobs": job_list,
42 |   }
43 | 
44 |   start_jobs(config, GPUs)
45 | 


--------------------------------------------------------------------------------
/domain/semeval_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | encoders = ["xlmr", "da"]
 6 | layers = [25]
 7 | num_samples = {
 8 |   "es": [0, 10, 25, 50, 100, 250, 875],
 9 |   "ca": [0, 10, 25, 50, 100, 250, 829],
10 |   "it": [0, 10, 20, 40, 80],
11 |   "nl": [0, 10, 20, 40, 80, 145],
12 | }
13 | 
14 | languages = ["it", "ca", "es", "nl"]
15 | GOLD = ["true", "false"]
16 | trials = [0]
17 | GPUs = [2, 3, 4, 5, 6, 7]
18 | 
19 | log_location = "onml_curve_se_q1_"
20 | all_jobs = []
21 | for language in languages:
22 |   job_names = [(f"onml_curve_{language}_{encoder}_{layers}_{samples}_{trial}{'_gold' if gold == 'true' else ''}", gold)
23 |                for (encoder, layers, samples, trial, gold) in
24 |                itertools.product(encoders, layers, num_samples[language], trials, GOLD)]
25 |   all_jobs.extend(job_names)
26 | 
27 | def add_job(name, gold, job_list):
28 |   job_list.append(
29 |     {"name": name,
30 |      "log_location": log_location,
31 |      "load_path": args.ckpt if gold != "true" else args.gold_ckpt,
32 |      "test_set": "true",
33 |      })
34 | 
35 | if __name__ == "__main__":
36 |   parser = argparse.ArgumentParser()
37 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
38 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
39 |   parser.add_argument("-g", "--gold_ckpt", type=str, help="Location gold checkpoint to continue from")
40 |   args = parser.parse_args()
41 |   log_location = log_location
42 |   job_list = []
43 |   for name, gold in all_jobs:
44 |     add_job(name, gold, job_list)
45 |   config = {
46 |     "command": "trainer.py",
47 |     "jobs": job_list,
48 |   }
49 |   start_jobs(config, GPUs)
50 | 


--------------------------------------------------------------------------------
/domain/zero_data_curve.py:
--------------------------------------------------------------------------------
 1 | from base_data_curve import start_jobs
 2 | 
 3 | PATHS = {
 4 |   "on": "/srv/local1/paxia/exp_logs/public_icoref/ontonotes/checkpoint.bin",
 5 |   "preco": "/srv/local1/paxia/exp_logs/public_icoref/preco/checkpoint.bin",
 6 |   "en": "/srv/local1/paxia/exp_logs/public_icoref/ontonotes_en/checkpoint.bin",
 7 |   "en_gold": "/srv/local1/paxia/exp_logs/public_icoref/mentions/baseline_onml_en_25_2802/checkpoint.bin",
 8 |   "preco_gold": "/srv/local1/paxia/exp_logs/public_icoref/mentions/fb_preco_25_36120/checkpoint.bin",
 9 |   "on_gold": "/srv/local1/paxia/exp_logs/public_icoref/mentions/on_0_2802/checkpoint.bin"
10 | }
11 | 
12 | DATA = {
13 |   "arrau": "/srv/local1/paxia/incremental_coref/data/arrau/test.512.jsonlines",
14 |   "zh": "/srv/local1/paxia/incremental_coref/data/ontonotes_ml/chinese/test.chinese.512.jsonlines",
15 |   "on": "MISSING",
16 |   "litbank": "/srv/local1/paxia/incremental_coref/data/litbank/train.jsonlines",
17 |   "qbcoref": "/srv/local1/paxia/incremental_coref/data/qbcoref/all_docs.512.jsonlines",
18 |   "preco": "/srv/local1/paxia/incremental_coref/data/preco/dev.preco.512.jsonlines",
19 | }
20 | LOG_DIR = "/srv/local1/paxia/subprocess_logs/baseline_forgetting/logs/"
21 | 
22 | GPUs = [0, 1, 2, 3, 4, 5, 6, 7]
23 | 
24 | log_location = "baseline_forgetting"
25 | 
26 | 
27 | def add_job(name, model_key, data_key, job_list):
28 |   job_list.append({
29 |     "name": name,
30 |     "command": "inference.py",
31 |     "log_location": log_location,
32 |     "load_path": PATHS[model_key],
33 |     "log_dir": LOG_DIR + "/" + model_key + "_" + name,
34 |     "log_path": LOG_DIR + "/" + model_key + "_" + name + "/out.log",
35 |     "dev_path": DATA[data_key],
36 |     "load_model": "true",
37 |   })
38 |   job_list.append({
39 |     "name": name,
40 |     "command": "inference.py",
41 |     "log_location": log_location,
42 |     "load_path": PATHS[f"{model_key}_gold"],
43 |     "mentions": "false",
44 |     "use_gold_spans": "true",
45 |     "log_dir": LOG_DIR + "/" + model_key + "_" + name + "_gold",
46 |     "log_path": LOG_DIR + "/" + model_key + "_" + name + "_gold" + "/gold_out.log",
47 |     "dev_path": DATA[data_key],
48 |     "load_model": "true",
49 |   })
50 | 
51 | job_list = []
52 | 
53 | # Zeros
54 | 
55 | # Litbank
56 | #add_job("litbank_curve_da_0_25_5_0", "on", "litbank", job_list)
57 | #add_job("litbank_curve_da_0_25_5_0", "preco", "litbank", job_list)
58 | 
59 | #add_job("preco_curve_da_25_5_0", "on", "preco", job_list)
60 | 
61 | # QBCoref
62 | #add_job("qb_curve_da_0_25_5_0", "on", "qbcoref", job_list)
63 | #add_job("qb_curve_da_0_25_5_0", "preco", "qbcoref", job_list)
64 | 
65 | # ARRAU
66 | add_job("arrau_curve_da_25_80_0", "on", "arrau", job_list)
67 | add_job("arrau_curve_da_25_80_0", "preco", "arrau", job_list)
68 | 
69 | # zh
70 | #add_job("onml_curve_zh_da_25_5_0", "en", "zh", job_list)
71 | 
72 | 
73 | if __name__ == "__main__":
74 |   config = {
75 |     "command": "inference.py",
76 |     "log_location": "log_location",
77 |     "jobs": job_list
78 |   }
79 |   start_jobs(config, GPUs)
80 | 


--------------------------------------------------------------------------------
/domain/zh_data_curve.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import itertools
 3 | from base_data_curve import start_jobs
 4 | 
 5 | encoders = ["xlmr", "da"]
 6 | layers = [25]
 7 | #num_samples = [10, 25, 50, 100, 250, 500, 1810]
 8 | num_samples = [0, 25, 50, 100, 250, 500, 1000, 1810]
 9 | GOLD = ["true", "false"]
10 | trials = [0]
11 | GPUs = [4, 5, 6, 7]
12 | 
13 | log_location = "onml_curve_zh_full_rerun_q1_"
14 | job_names = [(f"onml_curve_zh_{encoder}_{layers}_{num_samples}_{trial}{'_gold' if gold == 'true' else ''}", gold)
15 |              for (encoder, layers, num_samples, trial, gold) in
16 |              itertools.product(encoders, layers, num_samples, trials, GOLD)]
17 | 
18 | def add_job(name, gold, job_list):
19 |   job_list.append(
20 |     {"name": name,
21 |      "log_location": log_location,
22 |      "load_path": args.ckpt if not gold else args.gold_ckpt,
23 |      "test_set": "true",
24 |      })
25 | 
26 | if __name__ == "__main__":
27 |   parser = argparse.ArgumentParser()
28 |   parser.add_argument("-c", "--ckpt", required=True, type=str, help="Location of checkpoint to continue training from")
29 |   parser.add_argument("-p", "--pretrained_model", required=True, type=str, help="Which pretrained checkpoint is being used?")
30 |   parser.add_argument("-g", "--gold_ckpt", type=str, help="Location gold checkpoint to continue from")
31 |   args = parser.parse_args()
32 |   log_location = log_location
33 |   job_list = []
34 |   for name, gold in job_names:
35 |     add_job(name, gold, job_list)
36 |   config = {
37 |     "command": "trainer.py",
38 |     "jobs": job_list,
39 |   }
40 | 
41 |   start_jobs(config, GPUs)
42 | 


--------------------------------------------------------------------------------
/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import transformers
 3 | import logging
 4 | import util
 5 | 
 6 | class Encoder(torch.nn.Module):
 7 |   def __init__(self, config, use_cache=True):
 8 |     """`use_cache` is only used in this file to recompute embeddings.
 9 |     """
10 |     super(Encoder, self).__init__()
11 |     # Figure out what kind of config it is.
12 |     # Either is it downloadable from huggingface (and we need an accompanying local file)
13 |     # Or we load everything from a local checkpoint
14 |     if config["encoder_source"] == "HuggingFace":
15 |       try:
16 |         self.tokenizer = transformers.AutoTokenizer.from_pretrained(config["encoder_name"]) # replace with local files
17 |       except:
18 |         logging.info("Did not found tokenizer, using spanbert-cased")
19 |         self.tokenizer = transformers.AutoTokenizer.from_pretrained("SpanBERT/spanbert-base-cased")
20 |       self.model = transformers.AutoModel.from_pretrained(config["encoder_name"])
21 |     else:
22 |       # We only support XLMR otherwise
23 |       custom_xlmr_dir = config["custom_encoder_dir"]
24 |       logging.info(f"Applying custom XLMR encoder: {custom_xlmr_dir}")
25 |       self.tokenizer = transformers.XLMRobertaTokenizer.from_pretrained(custom_xlmr_dir + "/vocab.txt")
26 |       encoder_config = transformers.XLMRobertaConfig.from_json_file(custom_xlmr_dir + "/config.json")
27 |       self.model = transformers.XLMRobertaModel.from_pretrained(
28 |         custom_xlmr_dir + "/pytorch_model.bin",
29 |         config=encoder_config
30 |       )
31 |     self.device = config["device"]
32 | 
33 |     # If there is a cached file, we'll want to back off to use the final layer embs there
34 |     self.cached_embeddings = None
35 |     if use_cache:
36 |       try:
37 |         self.cached_embeddings = torch.load(config["log_dir"] + "/embeddings.pt")
38 |         logging.info(f"Found cached embeddings at {config['log_dir'] + '/embeddings.pt'}. Using them")
39 |       except FileNotFoundError:
40 |         pass
41 | 
42 |   def forward(self, sentence, doc_seg_id=None, eval_mode=False):
43 |     if self.cached_embeddings is not None and doc_seg_id is not None:
44 |       if doc_seg_id in self.cached_embeddings:
45 |         return self.cached_embeddings[doc_seg_id].to(self.device)
46 |       else:
47 |         logging.info(f"Did not find {doc_seg_id} cached. Recomputing instead.")
48 |     # The Predictor already puts the model in eval() mode, so this flag only used in cache_embeddings()
49 |     if eval_mode:
50 |       self.model.eval()
51 |     model_input = torch.tensor(self.tokenizer.encode(sentence[1:-1]), device=self.device).unsqueeze(0)
52 |     outputs = self.model(model_input)
53 |     final_layer = outputs[0]
54 |     return final_layer
55 | 
56 | 
57 | def cache_embeddings(config):
58 |   embedder = Encoder(config, use_cache=False)
59 |   embedder = embedder.to(config["device"])
60 |   cache_file = config["log_dir"] + "/embeddings.pt"
61 |   embeddings = {}
62 |   train_data = util.load_data(config["train_path"])
63 |   eval_data = util.load_data(config["eval_path"])
64 |   data_iterator = enumerate(eval_data + train_data)
65 |   for doc_num, document in data_iterator:
66 |     if doc_num % 200 == 99:
67 |       logging.info(f"Cached {doc_num} documents")
68 |     segment_iter = util.get_segment_iter(document)
69 |     start_idx = 0
70 |     for _, (segment, _, seglen) in segment_iter:
71 |       final_layer = embedder(segment, eval_mode=True)
72 |       doc_seg_id = f"{document['doc_key']}_{start_idx}"
73 |       embeddings[doc_seg_id] = final_layer.detach().cpu()
74 |       start_idx += seglen
75 | 
76 |   torch.save(embeddings, cache_file)
77 |   logging.info(f"Saved {len(embeddings)} embeddings to {cache_file}")
78 | 
79 | if __name__ == "__main__":
80 |   config = util.initialize_from_env()
81 |   cache_embeddings(config)
82 | 


--------------------------------------------------------------------------------
/experiments.jsonnet:
--------------------------------------------------------------------------------
  1 | // Config file of all experiments in roughly reverse chronological order
  2 | // Might want to refactor all the jsonnets some day
  3 | local icoref = import "jsonnets/emnlp2020.jsonnet"; // EMNLP 2020
  4 | local aida = import "jsonnets/aida.jsonnet";        // LOME-related
  5 | local multilingual = import "jsonnets/multilingual.jsonnet"; //LOME-related
  6 | local litbank = import "jsonnets/litbank.jsonnet";  // transfer
  7 | local preco = import "jsonnets/preco.jsonnet";      // transfer
  8 | local qbcoref = import "jsonnets/qbcoref.jsonnet";  // transfer
  9 | local ontonotes = import "jsonnets/ontonotes.jsonnet";  // transfer
 10 | local arrau = import "jsonnets/arrau.jsonnet";  // transfer
 11 | local sara = import "jsonnets/sara.jsonnet"; //transfer
 12 | 
 13 | (
 14 | ontonotes.on
 15 | + ontonotes.on_fb
 16 | )
 17 | 
 18 | +
 19 | 
 20 | // Configs for model transfer
 21 | (
 22 | litbank.litbank
 23 | + litbank.on_litbank
 24 | + litbank.litbank
 25 | + litbank.data_curve_da
 26 | + litbank.data_curve_on
 27 | + litbank.data_curve_fb
 28 | )
 29 | 
 30 | +
 31 | 
 32 | (
 33 | preco.preco
 34 | + preco.preco_ft
 35 | + preco.preco_fb
 36 | + preco.preco_eval
 37 | + preco.dev_variance
 38 | + preco.data_fork
 39 | + preco.data_curve_fb
 40 | + preco.data_curve_on
 41 | + preco.data_curve_da
 42 | + preco.data_curve_xlmr
 43 | )
 44 | 
 45 | +
 46 | 
 47 | (
 48 | qbcoref.qb
 49 | + qbcoref.data_curve_da
 50 | + qbcoref.data_curve_on
 51 | + qbcoref.data_curve_fb
 52 | + qbcoref.qb_curve
 53 | )
 54 | 
 55 | + 
 56 | 
 57 | (
 58 | arrau.data_curve_da
 59 | + arrau.data_curve_on
 60 | + arrau.data_curve_fb
 61 | )
 62 | 
 63 | + 
 64 | 
 65 | (
 66 | sara.data_curve
 67 | )
 68 | 
 69 | // Multilingual/LOME
 70 | +
 71 | 
 72 | (
 73 | multilingual.onml_baselines
 74 | + multilingual.data_curve_en
 75 | + multilingual.data_curve_zh
 76 | + multilingual.data_curve_ar
 77 | + multilingual.data_curve_ca
 78 | + multilingual.data_curve_es
 79 | + multilingual.data_curve_it
 80 | + multilingual.data_curve_nl
 81 | + multilingual.ml_exps
 82 | + multilingual.finetune_sweeps
 83 | + multilingual.base_data_exps
 84 | + multilingual.dev
 85 | + aida.dev
 86 | )
 87 | 
 88 | +
 89 | 
 90 | // Initial incremental coref model
 91 | (
 92 | icoref.main
 93 | + icoref.ablations
 94 | + icoref.ablations_2
 95 | + icoref.ablations_3
 96 | + icoref.ablations_3_evict
 97 | + icoref.main_trials
 98 | + icoref.encoders
 99 | + icoref.evaluation
100 | + icoref.unused
101 | + icoref.main_mention_xe
102 | )
103 | 


--------------------------------------------------------------------------------
/jsonnets/aida.jsonnet:
--------------------------------------------------------------------------------
 1 | // This could be autogenerated
 2 | local base = import "base.jsonnet";
 3 | local data = import "data.jsonnet";
 4 | local encoders = import "encoders.jsonnet";
 5 | local multilingual = import "multilingual.jsonnet";
 6 | 
 7 | {
 8 |   dev: {
 9 |     aida_xlmr: multilingual.dev.xlmr_mixed_ml_true_dev +
10 |     base.Name("xlmr_gold") +
11 |     {
12 |       eval_path: "",
13 |       preds_file: "",
14 |     },
15 | 
16 |     aida_spb: multilingual.dev.spb_on_en_true_dev +
17 |     base.Name("spanbert_gold") + {
18 |       eval_path: "",
19 |       preds_file: "",
20 |     }
21 |   }
22 | }


--------------------------------------------------------------------------------
/jsonnets/arrau.jsonnet:
--------------------------------------------------------------------------------
 1 | // file for arrau experiments
 2 | local base = import "base.jsonnet";
 3 | local data = import "data.jsonnet";
 4 | local encoders = import "encoders.jsonnet";
 5 | {
 6 |   // Set up LitBank base config, not usable on its own (missing Name)
 7 |   local Arrau(data_size, layers) = (
 8 |     base.base +
 9 |     encoders.spanbert_large +
10 |     encoders.finetune_top(layers) +
11 |     data.Arrau +
12 |     {singleton_eval: true,
13 |      mentions: true,
14 |      num_train_examples: data_size}
15 |   ),
16 | 
17 |   data_curve_da: {
18 |     ["arrau_curve_da_" + layers + "_" + num_examples + "_" + trial]: (
19 |     Arrau(num_examples, layers) +
20 |     base.Name("arrau_da_" + layers + "_" + num_examples + "_" + trial) +
21 |     {num_epochs: 100, patience: 10, load_model: true})
22 |     for layers in [25]
23 |     for num_examples in [10, 20, 40, 80, 160, 335]
24 |     for trial in [0, 1]
25 |   },
26 | 
27 |   data_curve_on: {
28 |     ["arrau_curve_on_" + layers + "_" + num_examples + "_" + trial]: (
29 |     Arrau(num_examples, layers) +
30 |     base.Name("arrau_on_" + layers + "_" + num_examples + "_" + trial) +
31 |     {num_epochs: 100, patience: 10, load_model: false}
32 |     )
33 |     for layers in [25]
34 |     for num_examples in [10, 20, 40, 80, 160, 335]
35 |     for trial in [0, 1]
36 |   } + {
37 |     ["arrau_curve_onb_" + layers + "_" + num_examples + "_" + trial]: (
38 |     Arrau(num_examples, layers) +
39 |     encoders.spanbert_base +
40 |     base.Name("arrau_onb_" + layers + "_" + num_examples + "_" + trial) +
41 |     {num_epochs: 100, patience: 10, load_model: false}
42 |     )
43 |     for layers in [25]
44 |     for num_examples in [10, 20, 40, 80, 160, 335]
45 |     for trial in [0, 1]
46 |   },
47 | 
48 |   data_curve_fb: {
49 |     ["arrau_curve_fb_" + layers + "_" + num_examples + "_" + trial]: (
50 |     Arrau(num_examples, layers) +
51 |     encoders.spanbert_large_fb +
52 |     base.Name("arrau_fb_" + layers + "_" + num_examples + "_" + trial) +
53 |     {num_epochs: 100, patience: 10, load_model: false}
54 |     )
55 |     for layers in [25]
56 |     for num_examples in [10, 20, 40, 80, 160, 335]
57 |     for trial in [0, 1]
58 |   }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/jsonnets/base.jsonnet:
--------------------------------------------------------------------------------
 1 | // Edit with caution, treat this as an abstract class.
 2 | local local_config = import "../local.jsonnet";
 3 | local encoders = import "encoders.jsonnet";
 4 | {
 5 |   local log_root = local_config.log_root, // all logs live here
 6 | 
 7 |   trunc_linscale(distance): {
 8 |     evict_fn: {
 9 |       name: "trunc_linscale",
10 |       distance: distance
11 |     }
12 |   },
13 | 
14 |   // Experiments -- final for EMNLP
15 |   local base_config = {
16 |     model_type: "incremental",
17 |     load_model: "auto",
18 |     top_span_ratio: 0.4,
19 |     threshold: 0, //
20 |     num_epochs: 50, // patience-based stopping
21 |     use_gold_spans: false,
22 |     teacher_forcing_rate: 0.0,
23 |     reset_weights: false,
24 |     memory_limit: local_config.gpu_gb,
25 |     negative_sample_rate: 1.0, // whether to downsample negative links
26 |     adam_learning_rate: 2e-04,
27 |     encoder_learning_rate: 1e-5,
28 |     max_grad_norm: 10,
29 |     dropout: 0.4, //
30 |     method: "alpha", //
31 |     evict_fn: false,
32 |     patience: 10,
33 |     finetune: false, // default is false, alternative is {top_k: k}
34 |     mentions: false,
35 |     update_each_segment: false,
36 |     singleton_eval: false,
37 |     token_emb_size: 0, // needs to be changed
38 |     debug_embs: false, // output embeddings?
39 |     test_set: false, // evaluate on test set?
40 |     seed: 67,
41 |   },
42 | 
43 |   // Naming template
44 |   Name(name): {
45 |     run_name: name,
46 |     log_dir: log_root + "/" + self.run_name,
47 |     log_path: self.log_dir + "/" + "checkpoint.bin",
48 |   },
49 | 
50 |   // Scorer Vars
51 |   local torch_scorer_vars = {
52 |     genre: {genre_emb_size: 20},
53 |     spans: {
54 |       output_size: 3000,
55 |       span_width_buckets: 30,
56 |       feature_size: 20,
57 |     },
58 |     pairwise: {
59 |       speaker_buckets: 2,
60 |       antecedent_distance_buckets: 10,
61 |       segment_buckets: 3,
62 |     }
63 | 
64 |   },
65 | 
66 |   base: base_config + torch_scorer_vars,
67 | }
68 | 


--------------------------------------------------------------------------------
/jsonnets/encoders.jsonnet:
--------------------------------------------------------------------------------
 1 | // Config file containing encoder information
 2 | local local_config = import "../local.jsonnet";
 3 | {
 4 |   local encoders_dir = local_config.encoders_dir, // all encoders lives here
 5 | 
 6 |   // Abstract encoder definition
 7 |   local Encoder(source, name, size) = {
 8 |     encoder_source: source,
 9 |     encoder_name: name,
10 |     token_emb_size: size,
11 |   },
12 | 
13 |   local HuggingFaceEncoder(name, size) = Encoder("HuggingFace", name, size),
14 |   local CustomEncoder(name, size) = Encoder("local", name, size) + {
15 |     ckpt_dir: encoders_dir + "/" + name,
16 |   },
17 | 
18 | 
19 |   spanbert_large: HuggingFaceEncoder("shtoshni/spanbert_coreference_large", 1024),
20 |   spanbert_base: HuggingFaceEncoder("shtoshni/spanbert_coreference_base", 768),
21 | 
22 |   spanbert_large_pt: HuggingFaceEncoder("shtoshni/spanbert_coreference_large", 1024) + {
23 |     ckpt_dir: encoders_dir + "/spanbert_large"
24 |   },
25 |   spanbert_base_pt: HuggingFaceEncoder("shtoshni/spanbert_coreference_base", 768) + {
26 |     ckpt_dir: encoders_dir + "/spanbert_base"
27 |   },
28 | 
29 |   spanbert_large_fb: HuggingFaceEncoder("SpanBERT/spanbert-large-cased", 1024),
30 | 
31 |   custom_large: CustomEncoder("checkpoint-230000", 1024),
32 |   custom_base: CustomEncoder("checkpoint-120000", 768),
33 |   xlmr_large: HuggingFaceEncoder("xlm-roberta-large", 1024),
34 | 
35 |   // Finetuning functions
36 |   finetune_top(k): {finetune: {layers: k}}
37 | }


--------------------------------------------------------------------------------
/jsonnets/litbank.jsonnet:
--------------------------------------------------------------------------------
  1 | // file for litbank experiments
  2 | local base = import "base.jsonnet";
  3 | local data = import "data.jsonnet";
  4 | local encoders = import "encoders.jsonnet";
  5 | {
  6 |   // Set up LitBank base config, not usable on its own (missing Name)
  7 |   local Litbank(split, data_size, layers) = (
  8 |     base.base +
  9 |     encoders.spanbert_large +
 10 |     encoders.finetune_top(layers) +
 11 |     data.Litbank_split(split) +
 12 |     {singleton_eval: true,
 13 |      mentions: true,
 14 |      num_train_examples: data_size}
 15 |   ),
 16 | 
 17 |   data_curve_da: {
 18 |     ["litbank_curve_da_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
 19 |     Litbank(split, num_examples, layers) +
 20 |     base.Name("litbank_da_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
 21 |     {num_epochs: 100, patience: 10,
 22 |     load_model: true,
 23 |     }
 24 |     )
 25 |     for split in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 26 |     for layers in [0, 3, 6, 12, 24, 25]
 27 |     for num_examples in [5, 10, 20, 40, 80, 100]
 28 |     for trial in [0, 1]
 29 |   },
 30 | 
 31 |   data_curve_on: {
 32 |     ["litbank_curve_on_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
 33 |     Litbank(split, num_examples, layers) +
 34 |     base.Name("litbank_on_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
 35 |     {num_epochs: 100, patience: 10, load_model: false,
 36 |     }
 37 |     )
 38 |     for split in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 39 |     for layers in [0, 3, 6, 12, 24, 25]
 40 |     for num_examples in [5, 10, 20, 40, 80]
 41 |     for trial in [0, 1]
 42 |   } + {
 43 |     ["litbank_curve_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
 44 |     Litbank(split, num_examples, layers) +
 45 |     encoders.spanbert_base +
 46 |     base.Name("litbank_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
 47 |     {num_epochs: 100, patience: 10, load_model: false,
 48 |     }
 49 |     )
 50 |     for split in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 51 |     for layers in [0, 3, 6, 12, 24, 25]
 52 |     for num_examples in [5, 10, 20, 40, 80]
 53 |     for trial in [0, 1]
 54 |   },
 55 | 
 56 |   data_curve_fb: {
 57 |     ["litbank_curve_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
 58 |     Litbank(split, num_examples, layers) +
 59 |     encoders.spanbert_large_fb +
 60 |     base.Name("litbank_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
 61 |     {num_epochs: 100, patience: 10, load_model: false,
 62 |     }
 63 |     )
 64 |     for split in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 65 |     for layers in [0, 3, 6, 12, 24, 25]
 66 |     for num_examples in [5, 10, 20, 40, 80]
 67 |     for trial in [0, 1]
 68 |   },
 69 | 
 70 | 
 71 |   // Old exps, might be a subset of above.
 72 | 
 73 |   // Train on ontonotes, test on litbank
 74 |   local Spb_on(genre, gold) = (
 75 |     base.base +
 76 |     encoders.spanbert_large +
 77 |     data.Ontonotes(512, genre) +
 78 |     base.Name("spb_on_litbank_" + genre + "_" + gold) +
 79 |     data.Litbank +
 80 |     base.trunc_linscale(1200)) + {
 81 |       use_gold_spans: gold
 82 |     },
 83 | 
 84 |   // Train on ontonotes, test on litbank
 85 |   local Litbank_on(gold, split, rate) = (
 86 |     base.base +
 87 |     encoders.spanbert_large +
 88 |     data.Litbank_split(split) +
 89 |     base.trunc_linscale(1200) +
 90 |     base.Name("litbank_" + split + "_" + gold + "_" + rate)) + {
 91 |       use_gold_spans: gold,
 92 |       method: "alpha",
 93 |       teacher_forcing_rate: rate,
 94 |       adam_learning_rate: 2e-4,
 95 |       top_span_ratio: 0.30,
 96 |       max_span_width: 25,
 97 |       mentions: true,
 98 |       update_each_segment: true,
 99 |       singleton_eval: true,
100 |     },
101 | 
102 |   on_litbank: {
103 |    ["spb_on_litbank_" + genre + "_" + gold]:  Spb_on(genre, gold)
104 |    for genre in [true, false]
105 |    for gold in [true, false]
106 |   },
107 | 
108 |   litbank: {
109 |    ["litbank_" + split + "_" + gold + "_" + rate]:  Litbank_on(gold, split, rate)
110 |    for gold in [true, false]
111 |    for split in std.range(0, 9)
112 |    for rate in [0, 1]
113 |   },
114 | 
115 |   litbank_exps: {
116 |     litbank_1_true_mean: Litbank_on(true, 1, 1) + {
117 |       adam_learning_rate: 1e-03,
118 |     },
119 | 
120 |     litbank_1_true_2: Litbank_on(true, 1, 1) + base.Name("litbank_1_true_2") +  {
121 |       adam_learning_rate: 1e-03,
122 |     },
123 | 
124 |     litbank_1_false: Litbank_on(false, 1, 0),
125 |     litbank_2_false: Litbank_on(false, 2, 0),
126 |     litbank_2_true: Litbank_on(true, 2, 0),
127 |   }
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/jsonnets/ontonotes.jsonnet:
--------------------------------------------------------------------------------
 1 | // file for ontonotes experiments
 2 | local base = import "base.jsonnet";
 3 | local data = import "data.jsonnet";
 4 | local encoders = import "encoders.jsonnet";
 5 | {
 6 | 
 7 |   local Ontonotes(data_size, layers, prefix) = (
 8 |     base.base +
 9 |     encoders.spanbert_large +
10 |     encoders.finetune_top(layers) +
11 |     data.Ontonotes(512, false) +
12 |     base.Name(prefix + "on_" + layers + "_" + data_size) +
13 |     {num_train_examples: data_size,}
14 |   ),
15 | 
16 |   local Ontonotes_fb(data_size, layers, prefix) = (
17 |     Ontonotes(data_size, layers, prefix) +
18 |     encoders.spanbert_large_fb
19 |   ),
20 | 
21 |   on: {
22 |     ["on_" + layers + "_" + data_size]: Ontonotes(data_size, layers, "")
23 |     for layers in [0, 24, 25]
24 |     for data_size in [2802]
25 |   },
26 | 
27 |   on_fb: {
28 |     ["fb_on_" + layers + "_" + data_size]: Ontonotes_fb(data_size, layers, "on_")
29 |     for layers in [24, 25]
30 |     for data_size in [2802]
31 |   },
32 | }
33 | 


--------------------------------------------------------------------------------
/jsonnets/qbcoref.jsonnet:
--------------------------------------------------------------------------------
 1 | // file for litbank experiments
 2 | local base = import "base.jsonnet";
 3 | local data = import "data.jsonnet";
 4 | local encoders = import "encoders.jsonnet";
 5 | {
 6 | 
 7 |   // Set up base config
 8 |   local Qb(data_size, layers, split, prefix) = (
 9 |     base.base +
10 |     encoders.spanbert_large +
11 |     encoders.finetune_top(layers) +
12 |     data.Qbcoref_split(split) +
13 |     base.Name(prefix + "qb_" + split + "_" + layers + "_" + data_size) +
14 |     {singleton_eval: true,
15 |      mentions: true,
16 |      num_train_examples: data_size,
17 |     }
18 |   ),
19 | 
20 |   //finetune full model by default
21 |   qb: {
22 |     ["qb_" + split]: Qb(null, 8, split, "") + base.Name("qb_" + split)
23 |     for split in std.range(0, 4)
24 |   },
25 | 
26 |   data_curve_da: {
27 |     ["qb_curve_da_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
28 |       Qb(num_examples, layers, split, "") +
29 |       base.Name("qbcoref_da_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
30 |       {num_epochs: 100, patience: 10,
31 |       load_model: true,
32 |       })
33 |     for layers in [0, 3, 6, 12, 25]
34 |     for split in [0, 1, 2, 3, 4]
35 |     for num_examples in [5, 10, 15, 30, 60, 120, 240]
36 |     for trial in [0, 1]
37 |   },
38 | 
39 |   data_curve_on: {
40 |     ["qb_curve_on_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
41 |       Qb(num_examples, layers, split, "") +
42 |       base.Name("qbcoref_on_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
43 |       {num_epochs: 100, patience: 10, load_model: false}
44 |     )
45 |     for layers in [0, 3, 6, 12, 25]
46 |     for split in [0, 1, 2, 3, 4]
47 |     for num_examples in [5, 10, 15, 30, 60, 120, 240]
48 |     for trial in [0, 1]
49 |   } + {
50 |     ["qb_curve_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial]: (
51 |       Qb(num_examples, layers, split, "") +
52 |       encoders.spanbert_base +
53 |       base.Name("qbcoref_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
54 |       {num_epochs: 100, patience: 10, load_model: false}
55 |     )
56 |     for layers in [0, 3, 6, 12, 25]
57 |     for split in [0, 1, 2, 3, 4]
58 |     for num_examples in [5, 10, 15, 30, 60, 120, 240]
59 |     for trial in [0, 1]
60 |   },
61 | 
62 |   data_curve_fb: {
63 |     ["qb_curve_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial]:  (
64 |       Qb(num_examples, layers, split, "") +
65 |       encoders.spanbert_large_fb +
66 |       base.Name("qbcoref_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial) +
67 |       {num_epochs: 100, patience: 10, load_model: false}
68 |     )
69 |     for layers in [0, 3, 6, 12, 25]
70 |     for split in [0, 1, 2, 3, 4]
71 |     for num_examples in [5, 10, 15, 30, 60, 120, 240]
72 |     for trial in [0, 1]
73 |   },
74 | 
75 |   // Not used for model transfer
76 |   qb_curve: {
77 |     ["qb_" + split + "_" + layers + "_" + data_size]: (Qb(data_size, layers, split, "") +
78 |     {load_model: true, test_set: true})
79 |     for layers in [24, 25]
80 |     for split in std.range(0, 4)
81 |     for data_size in [10, 25, 50, 100, 240, 320]
82 |   },
83 | }
84 | 


--------------------------------------------------------------------------------
/jsonnets/sara.jsonnet:
--------------------------------------------------------------------------------
 1 | local base = import "base.jsonnet";
 2 | local data = import "data.jsonnet";
 3 | local encoders = import "encoders.jsonnet";
 4 | {
 5 |   local Sara(data_size, layers, split, prefix) = (
 6 |     base.base +
 7 |     encoders.spanbert_large +
 8 |     encoders.finetune_top(layers) +
 9 |     data.Sara(split) +
10 |     base.Name("sara_" + split + "_" + layers)) + {
11 |       mentions: true,
12 |       singleton_eval: true,
13 |       num_train_examples: data_size,
14 |     },
15 | 
16 |   data_curve: {
17 |     ["sara_curve_" + load_model + "_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")]: (
18 |       Sara(num_examples, layers, split, "") +
19 |       base.Name("sara_" + load_model + "_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")) +
20 |       {num_epochs: 100, patience: 10,
21 |       load_model: (load_model=="da"),
22 |       })
23 |     for layers in [25]
24 |     for split in [0, 1, 2, 3, 4, 5, 6]
25 |     for num_examples in [0, 10, 20, 40, 80, 200]
26 |     for trial in [0, 1]
27 |     for load_model in ["da", "on"]
28 |     for gold in [true, false]
29 |   } + {
30 |     ["sara_curve_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")]: (
31 |       Sara(num_examples, layers, split, "") +
32 |       encoders.spanbert_base +
33 |       base.Name("sara_onb_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")) +
34 |       {num_epochs: 100, patience: 10,
35 |       load_model: false,
36 |       })
37 |     for layers in [25]
38 |     for split in [0, 1, 2, 3, 4, 5, 6]
39 |     for num_examples in [0, 10, 20, 40, 80, 200]
40 |     for trial in [0, 1]
41 |     for gold in [true, false]
42 |   } + {
43 |     ["sara_curve_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")]: (
44 |       Sara(num_examples, layers, split, "") +
45 |       encoders.spanbert_large_fb +
46 |       base.Name("sara_fb_" + split + "_" + layers + "_" + num_examples + "_" + trial + (if gold then "_gold" else "")) +
47 |       {num_epochs: 100, patience: 10,
48 |       load_model: false,
49 |       })
50 |     for layers in [25]
51 |     for split in [0, 1, 2, 3, 4, 5, 6]
52 |     for num_examples in [0, 10, 20, 40, 80, 200]
53 |     for trial in [0, 1]
54 |     for gold in [true, false]
55 |   },
56 | 
57 | 
58 | }


--------------------------------------------------------------------------------
/jsonnets/tests/dataset_test.jsonnet:
--------------------------------------------------------------------------------
1 | local data = import "../data.jsonnet";
2 | {
3 | 
4 |   Ontonotes_512_false: data.Ontonotes(512, false),
5 |   Ontonotes_512_true: data.Ontonotes(512, true),
6 |   Ontonotes_test: data.Ontonotes(512, true) + data.Ontonotes_test(512),
7 |   Ontonotes_to_litbank: data.Ontonotes(512, false) + data.Litbank_train,
8 | }
9 | 


--------------------------------------------------------------------------------
/jsonnets/verify_jsonnet.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import _jsonnet
 3 | import json
 4 | 
 5 | if len(sys.argv) > 1:
 6 |     check_file = sys.argv[1]
 7 | else:
 8 |     check_file = "../experiments.jsonnet"
 9 | 
10 | f = json.loads(_jsonnet.evaluate_file(check_file))
11 | # print (f.keys())
12 | 
13 | print (f"OK! {len(f)} configs loaded! ({sys.getsizeof(f)} bytes)")
14 | 


--------------------------------------------------------------------------------
/merge_functions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Contains several implementations of possible merge functions
 3 | 
 4 | All functions have type
 5 | 
 6 | Cluster * Span * score --> emb
 7 | 
 8 | score has been unused thus far
 9 | """
10 | 
11 | 
12 | def first(cluster, span, score):
13 |   return _alpha_weighted(1.0, cluster.emb, span.emb)
14 | 
15 | def last(cluster, span, score):
16 |   return _alpha_weighted(0.0, cluster.emb, span.emb)
17 | 
18 | def mean(cluster, span, score):
19 |   alpha = (cluster.size) / (1 + cluster.size)
20 |   return _alpha_weighted(alpha, cluster.emb, span.emb)
21 | 
22 | def exp(cluster, span, score):
23 |   return _alpha_weighted(0.5, cluster.emb, span.emb)
24 | 
25 | def _alpha_weighted(alpha, emb1, emb2):
26 |   return (alpha * emb1 + (1.0 - alpha) * emb2, alpha)
27 | 
28 | MERGE_NAMES = {
29 |     "mean": mean,
30 |     "first": first,
31 |     "last": last,
32 |     "exp": exp,
33 | }
34 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | calmsize==0.1.3
 2 | certifi==2020.6.20
 3 | chardet==3.0.4
 4 | click==7.1.2
 5 | dataclasses==0.6
 6 | filelock==3.0.12
 7 | future==0.18.2
 8 | idna==2.10
 9 | joblib==0.17.0
10 | jsonnet==0.16.0
11 | numpy==1.19.4
12 | packaging==20.4
13 | pandas==1.1.4
14 | Pillow==8.0.1
15 | protobuf==3.13.0
16 | pyparsing==2.4.7
17 | python-dateutil==2.8.1
18 | pytz==2020.4
19 | regex==2020.10.28
20 | requests==2.24.0
21 | sacremoses==0.0.43
22 | scipy==1.5.2
23 | sentencepiece==0.1.94
24 | six==1.15.0
25 | tokenizers==0.9.2
26 | torch==1.7.0
27 | torchaudio==0.7.0
28 | torchvision==0.8.1
29 | tqdm==4.51.0
30 | transformers==3.4.0
31 | typing-extensions==3.7.4.3
32 | urllib3==1.25.11
33 | 


--------------------------------------------------------------------------------
/run_xlmr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Run file assuming xlm-r tokenizer. Part of LOME project.
  3 | """
  4 | 
  5 | import logging
  6 | import json
  7 | import sys
  8 | import torch
  9 | 
 10 | from conversion_scripts.minimize_json import get_document
 11 | from incremental import Incremental
 12 | from inference import Predictor
 13 | from transformers import XLMRobertaTokenizer
 14 | import util
 15 | 
 16 | logging.basicConfig(level=logging.INFO)
 17 | 
 18 | def setup_model(config):
 19 |   config = util.initialize_from_env(use_overrides=False)
 20 |   incremental_model = Incremental(config)
 21 |   util.load_params(incremental_model, config["log_path"], "model")
 22 |   logging.info(f"Updating threshold to {config['threshold']}")
 23 |   incremental_model.set_threshold(config["threshold"])
 24 | 
 25 |   predictor = Predictor(incremental_model, [], config["singleton_eval"])
 26 |   return predictor
 27 | 
 28 | def predict(predictor, data):
 29 |   data["antecedent_map"] = {}  # Placeholder
 30 |   predictor.data = [data]  # We only have one example but it expects list
 31 |   with torch.no_grad():
 32 |     predictor.evaluate(perf=False)
 33 |   predictions = predictor.predictions
 34 |   return predictions
 35 | 
 36 | def minimize_data(json_blob):
 37 |   tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-large')
 38 |   text = []
 39 |   for sentence in json_blob["sentences"]:
 40 |     text.extend([[word] for word in sentence])
 41 |     text.append([])
 42 |   minimized_json = get_document((json_blob["doc_key"], text, json_blob["clusters"]),
 43 |                                 tokenizer, None, 512)
 44 |   return minimized_json
 45 | 
 46 | # Concrete-related wrappers
 47 | 
 48 | def read_from_concrete(concrete_input):
 49 |   """We can assume there's only one input file
 50 | 
 51 |   Returns a thin dict with text, doc key, and clusters
 52 |   """
 53 |   (data, comm) = next(reader.make_data_iter(concrete_input, None))
 54 |   data["doc_key"] = data["doc_id"] # same thing, doc key is used later
 55 |   return ((data, comm), {
 56 |     "doc_key": data["doc_id"],
 57 |     "sentences": data["sentences"],
 58 |     "clusters": [[list(span)] for span in data["mentions"]]
 59 |   })
 60 | 
 61 | def write_to_concrete(line_dict, examples_dict, concrete_output):
 62 |   augf = AnalyticUUIDGeneratorFactory()
 63 |   aug = augf.create()
 64 |   metadata = lambda: cmt.AnnotationMetadata(tool="jhu_xlmr_0:incremental_coref_v1",
 65 |                                             timestamp=now_timestamp())
 66 |   comm, _, _ = writer.convert_to_comm(line_dict, examples_dict, aug, metadata)
 67 |   write_communication_to_file(comm, concrete_output)
 68 | 
 69 | # Run files
 70 | 
 71 | def run(config, json_path, preds_file):
 72 |   # Only runs first example. This is raw json and needs to be minimized
 73 |   data = [json.loads(line) for line in open(json_path, 'r')][0]
 74 | 
 75 |   # Minimization is not idempotent, attempting a second time will malform input
 76 |   data = minimize_data(data)
 77 | 
 78 |   predictor = setup_model(config)
 79 |   predictions = predict(predictor, data)
 80 |   preds_output = open(preds_file, 'w+')
 81 |   preds_output.write(json.dumps([preds["clusters"] for preds in predictions]))
 82 | 
 83 | def run_concrete(config, concrete_input, concrete_output):
 84 |   from concrete_wrapper import reader, writer
 85 |   from concrete.util.concrete_uuid import AnalyticUUIDGeneratorFactory
 86 |   from concrete.util import write_communication_to_file, now_timestamp
 87 |   import concrete.metadata.ttypes as cmt
 88 |   full_data, data = read_from_concrete(concrete_input) # Return a dict
 89 |   data = minimize_data(data)
 90 |   predictor = setup_model(config)
 91 |   predictions = predict(predictor, data)[0] # Only care about last one
 92 |   data["predicted_clusters"] = json.loads(json.dumps(predictions["clusters"]))
 93 |   examples_dict = {data["doc_key"]: full_data}
 94 |   write_to_concrete(data, examples_dict, concrete_output)
 95 | 
 96 | if __name__ == "__main__":
 97 |   config = sys.argv[1]
 98 |   in_path = sys.argv[2]
 99 |   out_path = sys.argv[3]
100 |   # In LOME, we use input/output formatted under the concrete data scheme
101 |   # If in_path/out_path is plaintext (json), concrete should be False
102 |   concrete = sys.argv[4]
103 | 
104 |   if concrete:
105 |     run_concrete(config, in_path, out_path)
106 |   else:
107 |     run(config, in_path, out_path)
108 | 


--------------------------------------------------------------------------------