├── .gitignore
├── EVALB
├── COLLINS.prm
├── LICENSE
├── Makefile
├── README
├── bug
│ ├── bug.gld
│ ├── bug.rsl-new
│ ├── bug.rsl-old
│ └── bug.tst
├── evalb.c
├── new.prm
├── sample
│ ├── sample.gld
│ ├── sample.prm
│ ├── sample.rsl
│ └── sample.tst
└── tgrep_proc.prl
├── EVALB_SPMRL
├── Makefile
├── README
├── README.orig
├── evalb.c
├── spmrl.prm
└── spmrl_hebrew.prm
├── LICENSE
├── README.md
├── data
├── 02-21.10way.clean
├── 22.auto.clean
└── 23.auto.clean
└── src
├── evaluate.py
├── main.py
├── parse.py
├── trees.py
└── vocabulary.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # PyCharm directories
2 | .idea/
3 |
4 | # Mac OS X files
5 | .DS_Store
6 |
7 | # EVALB output files
8 | EVALB/evalb
9 | EVALB/evalb.dSYM/
10 |
11 | # Uncompressed models
12 | models/*.data
13 | models/*.meta
14 |
15 | # Python byte-compiled / optimized / DLL files
16 | __pycache__/
17 | *.py[cod]
18 | *$py.class
19 |
--------------------------------------------------------------------------------
/EVALB/COLLINS.prm:
--------------------------------------------------------------------------------
1 | ##------------------------------------------##
2 | ## Debug mode ##
3 | ## 0: No debugging ##
4 | ## 1: print data for individual sentence ##
5 | ##------------------------------------------##
6 | DEBUG 0
7 |
8 | ##------------------------------------------##
9 | ## MAX error ##
10 | ## Number of error to stop the process. ##
11 | ## This is useful if there could be ##
12 | ## tokanization error. ##
13 | ## The process will stop when this number##
14 | ## of errors are accumulated. ##
15 | ##------------------------------------------##
16 | MAX_ERROR 10
17 |
18 | ##------------------------------------------##
19 | ## Cut-off length for statistics ##
20 | ## At the end of evaluation, the ##
21 | ## statistics for the senetnces of length##
22 | ## less than or equal to this number will##
23 | ## be shown, on top of the statistics ##
24 | ## for all the sentences ##
25 | ##------------------------------------------##
26 | CUTOFF_LEN 40
27 |
28 | ##------------------------------------------##
29 | ## unlabeled or labeled bracketing ##
30 | ## 0: unlabeled bracketing ##
31 | ## 1: labeled bracketing ##
32 | ##------------------------------------------##
33 | LABELED 1
34 |
35 | ##------------------------------------------##
36 | ## Delete labels ##
37 | ## list of labels to be ignored. ##
38 | ## If it is a pre-terminal label, delete ##
39 | ## the word along with the brackets. ##
40 | ## If it is a non-terminal label, just ##
41 | ## delete the brackets (don't delete ##
42 | ## deildrens). ##
43 | ##------------------------------------------##
44 | DELETE_LABEL TOP
45 | DELETE_LABEL -NONE-
46 | DELETE_LABEL ,
47 | DELETE_LABEL :
48 | DELETE_LABEL ``
49 | DELETE_LABEL ''
50 | DELETE_LABEL .
51 |
52 | ##------------------------------------------##
53 | ## Delete labels for length calculation ##
54 | ## list of labels to be ignored for ##
55 | ## length calculation purpose ##
56 | ##------------------------------------------##
57 | DELETE_LABEL_FOR_LENGTH -NONE-
58 |
59 | ##------------------------------------------##
60 | ## Equivalent labels, words ##
61 | ## the pairs are considered equivalent ##
62 | ## This is non-directional. ##
63 | ##------------------------------------------##
64 | EQ_LABEL ADVP PRT
65 |
66 | # EQ_WORD Example example
67 |
--------------------------------------------------------------------------------
/EVALB/LICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/EVALB/Makefile:
--------------------------------------------------------------------------------
1 | all: evalb
2 |
3 | evalb: evalb.c
4 | gcc -Wall -g -o evalb evalb.c
5 |
--------------------------------------------------------------------------------
/EVALB/README:
--------------------------------------------------------------------------------
1 | #################################################################
2 | # #
3 | # Bug fix and additional functionality for evalb #
4 | # #
5 | # This updated version of evalb fixes a bug in which sentences #
6 | # were incorrectly categorized as "length mismatch" when the #
7 | # the parse output had certain mislabeled parts-of-speech. #
8 | # #
9 | # The bug was the result of evalb treating one of the tags (in #
10 | # gold or test) as a label to be deleted (see sections [6],[7] #
11 | # for details), but not the corresponding tag in the other. #
12 | # This most often occurs with punctuation. See the subdir #
13 | # "bug" for an example gld and tst file demonstating the bug, #
14 | # as well as output of evalb with and without the bug fix. #
15 | # #
16 | # For the present version in case of length mismatch, the nodes #
17 | # causing the imbalance are reinserted to resolve the miscount. #
18 | # If the lengths of gold and test truly differ, the error is #
19 | # still reported. The parameter file "new.prm" (derived from #
20 | # COLLINS.prm) shows how to add new potential mislabelings for #
21 | # quotes (",``,',`). #
22 | # #
23 | # I have preserved DJB's revision for modern compilers except #
24 | # for the delcaration of "exit" which is provided by stdlib. #
25 | # #
26 | # Other changes: #
27 | # #
28 | # * output of F-Measure in addition to precision and recall #
29 | # (I did not update the documention in section [4] for this) #
30 | # #
31 | # * more comprehensive DEBUG output that includes bracketing #
32 | # information as evalb is processing each sentence #
33 | # (useful in working through this, and peraps other bugs). #
34 | # Use either the "-D" run-time switch or set DEBUG to 2 in #
35 | # the parameter file. #
36 | # #
37 | # * added DELETE_LABEL lines in new.prm for S1 nodes produced #
38 | # by the Charniak parser and "?", "!" punctuation produced by #
39 | # the Bikel parser. #
40 | # #
41 | # #
42 | # David Ellis (Brown) #
43 | # #
44 | # January.2006 #
45 | #################################################################
46 |
47 | #################################################################
48 | # #
49 | # Update of evalb for modern compilers #
50 | # #
51 | # This is an updated version of evalb, for use with modern C #
52 | # compilers. There are a few updates, each marked in the code: #
53 | # #
54 | # /* DJB: explanation of comment */ #
55 | # #
56 | # The updates are purely to help compilation with recent #
57 | # versions of GCC (and other C compilers). There are *NO* other #
58 | # changes to the algorithm itself. #
59 | # #
60 | # I have made these changes following recommendations from #
61 | # users of the Corpora Mailing List, especially Peet Morris and #
62 | # Ramon Ziai. #
63 | # #
64 | # David Brooks (Birmingham) #
65 | # #
66 | # September.2005 #
67 | #################################################################
68 |
69 | #################################################################
70 | # #
71 | # README file for evalb #
72 | # #
73 | # Satoshi Sekine (NYU) #
74 | # Mike Collins (UPenn) #
75 | # #
76 | # October.1997 #
77 | #################################################################
78 |
79 | Contents of this README:
80 |
81 | [0] COPYRIGHT
82 | [1] INTRODUCTION
83 | [2] INSTALLATION AND RUN
84 | [3] OPTIONS
85 | [4] OUTPUT FORMAT FROM THE SCORER
86 | [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
87 | [6] THE PARAMETER FILE
88 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
89 |
90 |
91 | [0] COPYRIGHT
92 |
93 | The authors abandon the copyright of this program. Everyone is
94 | permitted to copy and distribute the program or a portion of the program
95 | with no charge and no restrictions unless it is harmful to someone.
96 |
97 | However, the authors are delightful for the user's kindness of proper
98 | usage and letting the authors know bugs or problems.
99 |
100 | This software is provided "AS IS", and the authors make no warranties,
101 | express or implied.
102 |
103 | To legally enforce the abandonment of copyright, this package is released
104 | under the Unlicense (see LICENSE).
105 |
106 | [1] INTRODUCTION
107 |
108 | Evaluation of bracketing looks simple, but in fact, there are minor
109 | differences from system to system. This is a program to parametarize
110 | such minor differences and to give an informative result.
111 |
112 | "evalb" evaluates bracketing accuracy in a test-file against a gold-file.
113 | It returns recall, precision, tagging accuracy. It uses an identical
114 | algorithm to that used in (Collins ACL97).
115 |
116 |
117 | [2] Installation and Run
118 |
119 | To compile the scorer, type
120 |
121 | > make
122 |
123 |
124 | To run the scorer:
125 |
126 | > evalb -p Parameter_file Gold_file Test_file
127 |
128 |
129 | For example to use the sample files:
130 |
131 | > evalb -p sample.prm sample.gld sample.tst
132 |
133 |
134 |
135 | [3] OPTIONS
136 |
137 | You can specify system parameters in the command line options.
138 | Other options concerning to evaluation metrix should be specified
139 | in parameter file, described later.
140 |
141 | -p param_file parameter file
142 | -d debug mode
143 | -e n number of error to kill (default=10)
144 | -h help
145 |
146 |
147 |
148 | [4] OUTPUT FORMAT FROM THE SCORER
149 |
150 | The scorer gives individual scores for each sentence, for
151 | example:
152 |
153 | Sent. Matched Bracket Cross Correct Tag
154 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy
155 | ============================================================================
156 | 1 8 0 100.00 100.00 5 5 5 0 6 5 83.33
157 |
158 | At the end of the output the === Summary === section gives statistics
159 | for all sentences, and for sentences <=40 words in length. The summary
160 | contains the following information:
161 |
162 | i) Number of sentences -- total number of sentences.
163 |
164 | ii) Number of Error/Skip sentences -- should both be 0 if there is no
165 | problem with the parsed/gold files.
166 |
167 | iii) Number of valid sentences = Number of sentences - Number of Error/Skip
168 | sentences
169 |
170 | iv) Bracketing recall = (number of correct constituents)
171 | ----------------------------------------
172 | (number of constituents in the goldfile)
173 |
174 | v) Bracketing precision = (number of correct constituents)
175 | ----------------------------------------
176 | (number of constituents in the parsed file)
177 |
178 | vi) Complete match = percentaage of sentences where recall and precision are
179 | both 100%.
180 |
181 | vii) Average crossing = (number of constituents crossing a goldfile constituen
182 | ----------------------------------------------------
183 | (number of sentences)
184 |
185 | viii) No crossing = percentage of sentences which have 0 crossing brackets.
186 |
187 | ix) 2 or less crossing = percentage of sentences which have <=2 crossing brackets.
188 |
189 | x) Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
190 | details of what is counted).
191 |
192 |
193 |
194 | [5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
195 |
196 |
197 | The gold and parsed files are in a format similar to this:
198 |
199 | (TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
200 |
201 | To create a gold file from the treebank:
202 |
203 | tgrep -wn '/.*/' | tgrep_proc.prl
204 |
205 | will produce a goldfile in the required format. ("tgrep -wn '/.*/'" prints
206 | parse trees, "tgrep_process.prl" just skips blank lines).
207 |
208 | For example, to produce a goldfile for section 23 of the treebank:
209 |
210 | tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
211 |
212 |
213 |
214 | [6] THE PARAMETER (.prm) FILE
215 |
216 |
217 | The .prm file sets options regarding the scoring method. COLLINS.prm gives
218 | the same scoring behaviour as the scorer used in (Collins 97). The options
219 | chosen were:
220 |
221 | 1) LABELED 1
222 |
223 | to give labelled precision/recall figures, i.e. a constituent must have the
224 | same span *and* label as a constituent in the goldfile.
225 |
226 | 2) DELETE_LABEL TOP
227 |
228 | Don't count the "TOP" label (which is always given in the output of tgrep)
229 | when scoring.
230 |
231 | 3) DELETE_LABEL -NONE-
232 |
233 | Remove traces (and all constituents which dominate nothing but traces) when
234 | scoring. For example
235 |
236 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
237 |
238 | would be processed to give
239 |
240 | .... (VP (VBD reported)) (. .)))
241 |
242 |
243 | 4)
244 | DELETE_LABEL , -- for the purposes of scoring remove punctuation
245 | DELETE_LABEL :
246 | DELETE_LABEL ``
247 | DELETE_LABEL ''
248 | DELETE_LABEL .
249 |
250 | 5) DELETE_LABEL_FOR_LENGTH -NONE- -- don't include traces when calculating
251 | the length of a sentence (important
252 | when classifying a sentence as <=40
253 | words or >40 words)
254 |
255 | 6) EQ_LABEL ADVP PRT
256 |
257 | Count ADVP and PRT as being the same label when scoring.
258 |
259 |
260 |
261 |
262 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
263 |
264 |
265 | 1) The scorer initially processes the files to remove all nodes specified
266 | by DELETE_LABEL in the .prm file. It also recursively removes nodes which
267 | dominate nothing due to all their children being removed. For example, if
268 | -NONE- is specified as a label to be deleted,
269 |
270 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
271 |
272 | would be processed to give
273 |
274 | .... (VP (VBD reported)) (. .)))
275 |
276 | 2) The scorer also removes all functional tags attached to non-terminals
277 | (functional tags are prefixed with "-" or "=" in the treebank). For example
278 | "NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
279 |
280 |
281 | 3) Tagging accuracy counts tags for all words *except* any tags which are
282 | deleted by a DELETE_LABEL specification in the .prm file. (For example, for
283 | COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
284 |
285 | 4) When calculating the length of a sentence, all words with POS tags not
286 | included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
287 | counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
288 | traces are removed before calculating the length of the sentence).
289 |
290 | 5) There are some subtleties in scoring when either the goldfile or parsed
291 | file contains multiple constituents for the same span which have the same
292 | non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n
293 | constituents for the same span, and the parsed file contains m constituents
294 | with that nonterminal, the scorer works as follows:
295 |
296 | i) If m>n, then the precision is n/m, recall is 100%
297 |
298 | ii) If n>m, then the precision is 100%, recall is m/n.
299 |
300 | iii) If n==m, recall and precision are both 100%.
301 |
--------------------------------------------------------------------------------
/EVALB/bug/bug.gld:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ (DT The) (NN Thy-1) (NN gene) (NN promoter) ) (VP (VBZ resembles) (NP (DT a) (`` ") (JJ housekeeping) ('' ") (NN promoter) ) (PP (IN in) (SBAR (IN that) (S (NP-SBJ-68 (PRP it) ) (VP-COOD (VP (VBZ is) (ADJP-PRD (JJ located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island) )))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NN TATA) (NN box) )) (, ,) (CC and) (VP (VBZ displays) (NP (NN heterogeneity) ) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini) ) (PP (IN of) (NP (DT the) (NN mRNA) )))))))))) (. .) ) )
2 | (TOP (S (NP-SBJ (DT The) (JJ latter) (`` ") (NP (NP (JJ nuclear) (NN factor) ) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells) ))) ('' ") ) (ADVP (RB likely) ) (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity) ) (PP (IN of) (NP (NN IL-2) (NN gene) (NN expression) ))))) (. .) ) )
3 | (TOP (S (ADVP (RB Thus) ) (, ,) (NP-SBJ (PRP we) ) (VP (VBD postulated) (SBAR-COOD (SBAR (IN that) (S (NP-SBJ (NP (DT the) (JJ circadian) (NN modification) ) (PP (IN of) (NP (NN GR) ))) (VP (VBD was) (ADJP-PRD (JJ independent) (PP (IN of) (NP-COOD (NP (NP (DT the) (JJ diurnal) (NNS fluctuations) ) (PP (IN in) (NP (NN plasma) (NN cortisol) (NN level) ))) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations) ) (PP (IN in) (NP (JJ environmental) (NN lighting) ))))))))) (CC and) (SBAR (IN that) (S (NP-SBJ-79 (DT the) (NN rhythmicity) ) (VP (MD might) (VP (VB be) (VP (VBN regulated) (NP (-NONE- *-79) ) (PP (IN by) (NP-LGS (NP (DT the) (`` ') (JJ circadian) (NN pacemaker) ('' ') ) (ADJP (JJ located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain) )))))))))))) (. .) ) )
4 | (TOP (S (NP-SBJ-70 (JJ Such) (NN transcription) (NNS factors) ) (VP (VBP play) (NP (DT a) (JJ key) (NN role) ) (PP (IN in) (NP (NP (DT the) (NN development) ) (PP (IN of) (NP (DT the) (JJ mature) (NN T-cell) (NN phenotype) )))) (PP (IN by) (S (NP-SBJ (-NONE- *-70) ) (VP (VBG functioning) (PP (IN as) (`` ') (NP (NP (JJ master) (NNS regulators) ) (PP (IN of) (NP (NN T-cell) (NN differentiation) ))) ('' ') ))))) (. .) ) )
5 | (TOP (S (NP-SBJ (NP (DT The) (NN conversion) ) (PP (IN of) (NP (DT the) (NN TCEd) )) (PP (TO to) (NP (DT a) (`` ') (JJ perfect) ('' ') (NN NF-kB) (NN binding) (NN site) ))) (VP-COOD (VP (VBZ leads) (PP (TO to) (NP-19 (NP (DT a) (JJR tighter) (NN binding) ) (PP (IN of) (NP (NN NF-kB) )) (PP (TO to) (NP (NN TCEd) (NN DNA) ))))) (CC and) (, ,) (VP (PP (IN as) (NP (DT a) (JJ functional) (NN consequence) )) (, ,) (PP (TO to) (NP=19 (NP (DT the) (NN activity) ) (PP (IN of) (NP (DT the) (`` ') (VBN converted) ('' ') (NN TCEd) (NNS motifs) )) (PP (IN in) (NP (NN HeLa) (NNS cells) )))))) (. .) ) )
6 |
--------------------------------------------------------------------------------
/EVALB/bug/bug.rsl-new:
--------------------------------------------------------------------------------
1 | Sent. Matched Bracket Cross Correct Tag
2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy
3 | ============================================================================
4 | 1 37 0 77.27 65.38 17 22 26 5 34 27 79.41
5 | 2 21 0 69.23 64.29 9 13 14 2 20 16 80.00
6 | 3 47 0 80.00 82.35 28 35 34 4 44 40 90.91
7 | 4 26 0 35.29 37.50 6 17 16 8 25 18 72.00
8 | 5 44 0 42.31 33.33 11 26 33 17 38 28 73.68
9 | ============================================================================
10 | 62.83 57.72 71 113 123 0 161 129 80.12
11 | === Summary ===
12 |
13 | -- All --
14 | Number of sentence = 5
15 | Number of Error sentence = 0
16 | Number of Skip sentence = 0
17 | Number of Valid sentence = 5
18 | Bracketing Recall = 62.83
19 | Bracketing Precision = 57.72
20 | Bracketing FMeasure = 60.17
21 | Complete match = 0.00
22 | Average crossing = 7.20
23 | No crossing = 0.00
24 | 2 or less crossing = 20.00
25 | Tagging accuracy = 80.12
26 |
27 | -- len<=40 --
28 | Number of sentence = 3
29 | Number of Error sentence = 0
30 | Number of Skip sentence = 0
31 | Number of Valid sentence = 3
32 | Bracketing Recall = 61.54
33 | Bracketing Precision = 57.14
34 | Bracketing FMeasure = 59.26
35 | Complete match = 0.00
36 | Average crossing = 5.00
37 | No crossing = 0.00
38 | 2 or less crossing = 33.33
39 | Tagging accuracy = 77.22
40 |
--------------------------------------------------------------------------------
/EVALB/bug/bug.rsl-old:
--------------------------------------------------------------------------------
1 | Sent. Matched Bracket Cross Correct Tag
2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy
3 | ============================================================================
4 | 1 : Length unmatch (33|35)
5 | 1 37 1 0.00 0.00 0 0 0 0 0 0 0.00
6 | 2 : Length unmatch (19|21)
7 | 2 21 1 0.00 0.00 0 0 0 0 0 0 0.00
8 | 3 : Length unmatch (44|45)
9 | 3 47 1 0.00 0.00 0 0 0 0 0 0 0.00
10 | 4 : Length unmatch (24|26)
11 | 4 26 1 0.00 0.00 0 0 0 0 0 0 0.00
12 | 5 : Length unmatch (38|39)
13 | 5 44 1 0.00 0.00 0 0 0 0 0 0 0.00
14 | ============================================================================
15 | 0 0 0.00
16 |
17 | === Summary ===
18 |
19 | -- All --
20 | Number of sentence = 5
21 | Number of Error sentence = 5
22 | Number of Skip sentence = 0
23 | Number of Valid sentence = 0
24 | Bracketing Recall = 0.00
25 | Bracketing Precision = 0.00
26 | Bracketing FMeasure = nan
27 | Complete match = 0.00
28 | Average crossing = 0.00
29 | No crossing = 0.00
30 | 2 or less crossing = 0.00
31 | Tagging accuracy = 0.00
32 |
33 | -- len<=40 --
34 | Number of sentence = 3
35 | Number of Error sentence = 3
36 | Number of Skip sentence = 0
37 | Number of Valid sentence = 0
38 | Bracketing Recall = 0.00
39 | Bracketing Precision = 0.00
40 | Bracketing FMeasure = nan
41 | Complete match = 0.00
42 | Average crossing = 0.00
43 | No crossing = 0.00
44 | 2 or less crossing = 0.00
45 | Tagging accuracy = 0.00
46 |
--------------------------------------------------------------------------------
/EVALB/bug/bug.tst:
--------------------------------------------------------------------------------
1 | (S1 (S (NP (DT The) (JJ Thy-1) (NN gene) (NN promoter)) (VP (VP (VBZ resembles) (NP (NP (DT a) (ADJP (CD ") (NN housekeeping)) (NN ") (NN promoter)) (SBAR (WHPP (IN in) (WHNP (WDT that))) (S (NP (PRP it)) (VP (VBZ is) (VP (VBN located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island))))))))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NNP TATA) (NN box))) (, ,) (CC and) (VP (VBZ displays) (NP (NP (NN heterogeneity)) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini)) (PP (IN of) (NP (DT the) (NN mRNA)))))))) (. .)))
2 | (S1 (S (NP (NP (DT The) (JJ latter) (CD ") (JJ nuclear) (NN factor)) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells)))) (VP (VBZ ") (ADJP (JJ likely) (S (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity)) (PP (IN of) (NP (JJ IL-2) (NN gene) (NN expression))))))))) (. .)))
3 | (S1 (S (ADVP (RB Thus)) (, ,) (NP (PRP we)) (VP (VBD postulated) (SBAR (SBAR (IN that) (S (NP (NP (DT the) (JJ circadian) (NN modification)) (PP (IN of) (NP (NNP GR)))) (VP (VBD was) (ADJP (JJ independent) (PP (IN of) (NP (DT the) (JJ diurnal) (NNS fluctuations)))) (PP (IN in) (NP (NP (NN plasma) (JJ cortisol) (NN level)) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations)) (PP (IN in) (NP (JJ environmental) (NN lighting))))))))) (CC and) (SBAR (IN that) (S (NP (DT the) (NN rhythmicity)) (VP (MD might) (VP (VB be) (VP (VBN regulated) (PP (IN by) (NP (DT the) ('' ') (NP (JJ circadian) (NN pacemaker) (POS ')) (VP (VBN located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain))))))))))))) (. .)))
4 | (S1 (S (NP (JJ Such) (NN transcription) (NNS factors)) (VP (VBP play) (NP (NP (DT a) (JJ key) (NN role)) (PP (IN in) (NP (NP (DT the) (NN development)) (PP (IN of) (NP (NP (DT the) (JJ mature) (JJ T-cell) (NN phenotype)) (PP (IN by) (NP (NP (NN functioning) (RB as) (POS ')) (NN master) (NNS regulators))))) (PP (IN of) (NP (JJ T-cell) (NN differentiation) (POS '))))))) (. .)))
5 | (S1 (S (NP (NP (DT The) (NN conversion)) (PP (IN of) (NP (DT the)))) (VP (VBD TCEd) (PP (TO to) (NP (NP (DT a) ('' ') (JJ perfect) ('' ') (NN NF-kB)) (SBAR (S (NP (JJ binding) (NN site)) (VP (VBZ leads) (PP (TO to) (NP (NP (NP (DT a) (ADJP (RBR tighter) (JJ binding)) (PP (IN of) (NP (NP (NNS NF-kB)) (PP (PP (TO to) (NP (JJ TCEd) (NN DNA))) (CC and) (PP (, ,) (PP (IN as) (NP (DT a) (JJ functional) (NN consequence))) (, ,) (TO to) (NP (NP (DT the) (NN activity)) (PP (IN of) (NP (DT the)))))))) (POS ')) (JJ converted) ('' ') (JJ TCEd) (NNS motifs)) (PP (IN in) (NP (NNP HeLa) (NNS cells))))))))))) (. .)))
6 |
--------------------------------------------------------------------------------
/EVALB/evalb.c:
--------------------------------------------------------------------------------
1 | /*****************************************************************/
2 | /* evalb [-p param_file] [-dh] [-e n] gold-file test-file */
3 | /* */
4 | /* Evaluate bracketing in test-file against gold-file. */
5 | /* Return recall, precision, tagging accuracy. */
6 | /* */
7 | /* */
8 | /* -p param_file parameter file */
9 | /* -d debug mode */
10 | /* -e n number of error to kill (default=10) */
11 | /* -h help */
12 | /* */
13 | /* Satoshi Sekine (NYU) */
14 | /* Mike Collins (UPenn) */
15 | /* */
16 | /* October.1997 */
17 | /* */
18 | /* Please refer README for the update information */
19 | /*****************************************************************/
20 |
21 | #include
22 | #include //### added for exit, atoi decls
23 | #include
24 | #include
25 |
26 |
27 | /* Internal Data format -------------------------------------------*/
28 | /* */
29 | /* (S (NP (NNX this)) (VP (VBX is) (NP (DT a) (NNX pen))) (SYM .)) */
30 | /* */
31 | /* wn=5 */
32 | /* word label */
33 | /* terminal[0] = this NNX */
34 | /* terminal[1] = is VBX */
35 | /* terminal[2] = a DT */
36 | /* terminal[3] = pen NNX */
37 | /* terminal[4] = . SYM */
38 | /* */
39 | /* bn=4 */
40 | /* start end label */
41 | /* bracket[0] = 0 5 S */
42 | /* bracket[1] = 0 0 NP */
43 | /* bracket[2] = 1 4 VP */
44 | /* bracket[3] = 2 4 NP */
45 | /* */
46 | /* matched bracketing */
47 | /* Recall = --------------------------- */
48 | /* # of bracket in ref-data */
49 | /* */
50 | /* matched bracketing */
51 | /* Recall = --------------------------- */
52 | /* # of bracket in test-data */
53 | /* */
54 | /*-----------------------------------------------------------------*/
55 |
56 | /******************/
57 | /* constant macro */
58 | /******************/
59 |
60 | #define MAX_SENT_LEN 5000
61 | #define MAX_WORD_IN_SENT 200
62 | #define MAX_BRACKET_IN_SENT 200
63 | #define MAX_WORD_LEN 100
64 | #define MAX_LABEL_LEN 30
65 | #define MAX_QUOTE_TERM 20
66 |
67 | #define MAX_DELETE_LABEL 100
68 | #define MAX_EQ_LABEL 100
69 | #define MAX_EQ_WORD 100
70 |
71 | #define MAX_LINE_LEN 500
72 |
73 | #define DEFAULT_MAX_ERROR 10
74 | #define DEFAULT_CUT_LEN 40
75 |
76 | /*************/
77 | /* structure */
78 | /*************/
79 |
80 | typedef struct ss_terminal {
81 | char word[MAX_WORD_LEN];
82 | char label[MAX_LABEL_LEN];
83 | int result; /* 0:unmatch, 1:match, 9:undef */
84 | } s_terminal;
85 |
86 | typedef struct ss_term_ind {
87 | s_terminal term;
88 | int index;
89 | int bracket;
90 | int endslen;
91 | int ends[MAX_BRACKET_IN_SENT];
92 | } s_term_ind;
93 |
94 | typedef struct ss_bracket {
95 | int start;
96 | int end;
97 | unsigned int buf_start;
98 | unsigned int buf_end;
99 | char label[MAX_LABEL_LEN];
100 | int result; /* 0: unmatch, 1:match, 5:delete 9:undef */
101 | } s_bracket;
102 |
103 |
104 | typedef struct ss_equiv {
105 | char *s1;
106 | char *s2;
107 | } s_equiv;
108 |
109 |
110 | /****************************/
111 | /* global variables */
112 | /* gold-data: suffix = 1 */
113 | /* test-data: suffix = 2 */
114 | /****************************/
115 |
116 | /*---------------*/
117 | /* Sentence data */
118 | /*---------------*/
119 | int wn1, wn2; /* number of words in sentence */
120 | int r_wn1; /* number of words in sentence */
121 | /* which only ignores labels in */
122 | /* DELETE_LABEL_FOR_LENGTH */
123 |
124 | s_terminal terminal1[MAX_WORD_IN_SENT]; /* terminal information */
125 | s_terminal terminal2[MAX_WORD_IN_SENT];
126 |
127 | s_term_ind quotterm1[MAX_QUOTE_TERM]; /* special terminals ("'","POS") */
128 | s_term_ind quotterm2[MAX_QUOTE_TERM];
129 |
130 | int bn1, bn2; /* number of brackets */
131 |
132 | int r_bn1, r_bn2; /* number of brackets */
133 | /* after deletion */
134 |
135 | s_bracket bracket1[MAX_BRACKET_IN_SENT]; /* bracket information */
136 | s_bracket bracket2[MAX_BRACKET_IN_SENT];
137 |
138 |
139 | /*------------*/
140 | /* Total data */
141 | /*------------*/
142 | int TOTAL_bn1, TOTAL_bn2, TOTAL_match; /* total number of brackets */
143 | int TOTAL_sent; /* No. of sentence */
144 | int TOTAL_error_sent; /* No. of error sentence */
145 | int TOTAL_skip_sent; /* No. of skip sentence */
146 | int TOTAL_comp_sent; /* No. of complete match sent */
147 | int TOTAL_word; /* total number of word */
148 | int TOTAL_crossing; /* total crossing */
149 | int TOTAL_no_crossing; /* no crossing sentence */
150 | int TOTAL_2L_crossing; /* 2 or less crossing sentence */
151 | int TOTAL_correct_tag; /* total correct tagging */
152 |
153 | int TOT_cut_len = DEFAULT_CUT_LEN; /* Cut-off length in statistics */
154 |
155 | /* data for sentences with len <= CUT_LEN */
156 | /* Historically it was 40. */
157 | int TOT40_bn1, TOT40_bn2, TOT40_match; /* total number of brackets */
158 | int TOT40_sent; /* No. of sentence */
159 | int TOT40_error_sent; /* No. of error sentence */
160 | int TOT40_skip_sent; /* No. of skip sentence */
161 | int TOT40_comp_sent; /* No. of complete match sent */
162 | int TOT40_word; /* total number of word */
163 | int TOT40_crossing; /* total crossing */
164 | int TOT40_no_crossing; /* no crossing sentence */
165 | int TOT40_2L_crossing; /* 2 or less crossing sentence */
166 | int TOT40_correct_tag; /* total correct tagging */
167 |
168 | /*------------*/
169 | /* miscallous */
170 | /*------------*/
171 | int Line; /* line number */
172 | int Error_count = 0; /* Error count */
173 | int Status; /* Result status for each sent */
174 | /* 0: OK, 1: skip, 2: error */
175 |
176 | /*-------------------*/
177 | /* stack manuplation */
178 | /*-------------------*/
179 | int stack_top;
180 | int stack[MAX_BRACKET_IN_SENT];
181 |
182 | /************************************************************/
183 | /* User parameters which can be specified in parameter file */
184 | /************************************************************/
185 |
186 | /*------------------------------------------*/
187 | /* Debug mode */
188 | /* print out data for individual sentence */
189 | /*------------------------------------------*/
190 | int DEBUG=0;
191 |
192 | /*------------------------------------------*/
193 | /* MAX error */
194 | /* Number of error to stop the process. */
195 | /* This is useful if there could be */
196 | /* tokanization error. */
197 | /* The process will stop when this number*/
198 | /* of errors are accumulated. */
199 | /*------------------------------------------*/
200 | int Max_error = DEFAULT_MAX_ERROR;
201 |
202 | /*------------------------------------------*/
203 | /* Cut-off length for statistics */
204 | /* int TOT_cut_len = DEFAULT_CUT_LEN; */
205 | /* (Defined above) */
206 | /*------------------------------------------*/
207 |
208 |
209 | /*------------------------------------------*/
210 | /* unlabeled or labeled bracketing */
211 | /* 0: unlabeled bracketing */
212 | /* 1: labeled bracketing */
213 | /*------------------------------------------*/
214 | int F_label = 1;
215 |
216 | /*------------------------------------------*/
217 | /* Delete labels */
218 | /* list of labels to be ignored. */
219 | /* If it is a pre-terminal label, delete */
220 | /* the word along with the brackets. */
221 | /* If it is a non-terminal label, just */
222 | /* delete the brackets (don't delete */
223 | /* childrens). */
224 | /*------------------------------------------*/
225 | char *Delete_label[MAX_DELETE_LABEL];
226 | int Delete_label_n = 0;
227 |
228 | /*------------------------------------------*/
229 | /* Delete labels for length calculation */
230 | /* list of labels to be ignored for */
231 | /* length calculation purpose */
232 | /*------------------------------------------*/
233 | char *Delete_label_for_length[MAX_DELETE_LABEL];
234 | int Delete_label_for_length_n = 0;
235 |
236 | /*------------------------------------------*/
237 | /* Labels to be considered for misquote */
238 | /* (could be possesive or quote) */
239 | /*------------------------------------------*/
240 | char *Quote_term[MAX_QUOTE_TERM];
241 | int Quote_term_n = 0;
242 |
243 | /*------------------------------------------*/
244 | /* Equivalent labels, words */
245 | /* the pairs are considered equivalent */
246 | /* This is non-directional. */
247 | /*------------------------------------------*/
248 | s_equiv EQ_label[MAX_EQ_LABEL];
249 | int EQ_label_n = 0;
250 |
251 | s_equiv EQ_word[MAX_EQ_WORD];
252 | int EQ_word_n = 0;
253 |
254 |
255 |
256 | /************************/
257 | /* Function return-type */
258 | /************************/
259 | int main();
260 | void init_global();
261 | void print_head();
262 | void init();
263 | void read_parameter_file();
264 | void set_param();
265 | int narg();
266 | int read_line();
267 |
268 | void pushb();
269 | int popb();
270 | int stackempty();
271 |
272 | void calc_result(unsigned char *buf1,unsigned char *buf);
273 | void fix_quote();
274 | void reinsert_term();
275 | void massage_data();
276 | void modify_label();
277 | void individual_result();
278 | void print_total();
279 | void dsp_info();
280 | int is_terminator();
281 | int is_deletelabel();
282 | int is_deletelabel_for_length();
283 | int is_quote_term();
284 | int word_comp();
285 | int label_comp();
286 |
287 | void Error();
288 | void Fatal();
289 | void Usage();
290 |
291 | /* ### provided by std headers
292 | int fprintf();
293 | int printf();
294 | int atoi();
295 | int fclose();
296 | int sscanf();
297 | */
298 |
299 | /***********/
300 | /* program */
301 | /***********/
302 | #define ARG_CHECK(st) if(!(*++(*argv) || (--argc && *++argv))){ \
303 | fprintf(stderr,"Missing argument: %s\n",st); \
304 | }
305 |
306 | int
307 | main(argc,argv)
308 | int argc;
309 | char *argv[];
310 | {
311 | char *filename1, *filename2;
312 | FILE *fd1, *fd2;
313 | unsigned char buff[5000];
314 | unsigned char buff1[5000];
315 |
316 | filename1=NULL;
317 | filename2=NULL;
318 |
319 | for(argc--,argv++;argc>0;argc--,argv++){
320 | if(**argv == '-'){
321 | while(*++(*argv)){
322 | switch(**argv){
323 |
324 | case 'h': /* help */
325 | Usage();
326 | exit(1);
327 |
328 | case 'd': /* debug mode */
329 | DEBUG = 1;
330 | goto nextarg;
331 |
332 | case 'D': /* debug mode */
333 | DEBUG = 2;
334 | goto nextarg;
335 |
336 | case 'c': /* cut-off length */
337 | ARG_CHECK("cut-off length for statistices");
338 | TOT_cut_len = atoi(*argv);
339 | goto nextarg;
340 |
341 | case 'e': /* max error */
342 | ARG_CHECK("number of error to kill");
343 | Max_error = atoi(*argv);
344 | goto nextarg;
345 |
346 | case 'p': /* parameter file */
347 | ARG_CHECK("parameter file");
348 | read_parameter_file(*argv);
349 | goto nextarg;
350 |
351 | default:
352 | Usage();
353 | exit(0);
354 | }
355 | }
356 | } else {
357 | if(filename1==NULL){
358 | filename1 = *argv;
359 | }else if(filename2==NULL){
360 | filename2 = *argv;
361 | }
362 | }
363 | nextarg: continue;
364 | }
365 |
366 | init_global();
367 |
368 |
369 | if((fd1 = fopen(filename1,"r"))==NULL){
370 | Fatal("Can't open gold file (%s)\n",filename1);
371 | }
372 | if((fd2 = fopen(filename2,"r"))==NULL){
373 | Fatal("Can't open test file (%s)\n",filename2);
374 | }
375 |
376 | print_head();
377 |
378 | for(Line=1;fgets(buff,5000,fd1)!=NULL;Line++){
379 |
380 | init();
381 |
382 | /* READ 1 */
383 | r_wn1 = read_line(buff,terminal1,quotterm1,&wn1,bracket1,&bn1);
384 |
385 | strcpy(buff1,buff);
386 |
387 | /* READ 2 */
388 | if(fgets(buff,5000,fd2)==NULL){
389 | Error("Number of lines unmatch (too many lines in gold file)\n");
390 | break;
391 | }
392 |
393 | read_line(buff,terminal2,quotterm2,&wn2,bracket2,&bn2);
394 |
395 | /* Calculate result and print it */
396 | calc_result(buff1,buff);
397 |
398 | if(DEBUG>=1){
399 | dsp_info();
400 | }
401 | }
402 |
403 | if(fgets(buff,5000,fd2)!=NULL){
404 | Error("Number of lines unmatch (too many lines in test file)\n");
405 | }
406 |
407 | print_total();
408 |
409 | return (0);
410 | }
411 |
412 |
413 | /*-----------------------------*/
414 | /* initialize global variables */
415 | /*-----------------------------*/
416 | void
417 | init_global()
418 | {
419 | TOTAL_bn1 = TOTAL_bn2 = TOTAL_match = 0;
420 | TOTAL_sent = TOTAL_error_sent = TOTAL_skip_sent = TOTAL_comp_sent = 0;
421 | TOTAL_word = TOTAL_correct_tag = 0;
422 | TOTAL_crossing = 0;
423 | TOTAL_no_crossing = TOTAL_2L_crossing = 0;
424 |
425 | TOT40_bn1 = TOT40_bn2 = TOT40_match = 0;
426 | TOT40_sent = TOT40_error_sent = TOT40_skip_sent = TOT40_comp_sent = 0;
427 | TOT40_word = TOT40_correct_tag = 0;
428 | TOT40_crossing = 0;
429 | TOT40_no_crossing = TOT40_2L_crossing = 0;
430 |
431 | }
432 |
433 |
434 | /*------------------*/
435 | /* print head title */
436 | /*------------------*/
437 | void
438 | print_head()
439 | {
440 | printf(" Sent. Matched Bracket Cross Correct Tag\n");
441 | printf(" ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy\n");
442 | printf("============================================================================\n");
443 | }
444 |
445 |
446 | /*-----------------------------------------------*/
447 | /* initialization at each individual computation */
448 | /*-----------------------------------------------*/
449 | void
450 | init()
451 | {
452 | int i;
453 |
454 | wn1 = 0;
455 | wn2 = 0;
456 | bn1 = 0;
457 | bn2 = 0;
458 | r_bn1 = 0;
459 | r_bn2 = 0;
460 |
461 | for(i=0;i0 && (isspace(buff[i]) || buff[i]=='\n');i--){
518 | buff[i]='\0';
519 | }
520 | if(buff[0]=='#' || /* comment-line */
521 | strlen(buff)<3){ /* too short, just ignore */
522 | continue;
523 | }
524 |
525 | /* place the parameter and value */
526 | /*-------------------------------*/
527 | for(i=0;!isspace(buff[i]);i++);
528 | for(;isspace(buff[i]) && buff[i]!='\0';i++);
529 | if(buff[i]=='\0'){
530 | fprintf(stderr,"Empty value in parameter file (%d)\n",line);
531 | }
532 |
533 | /* set parameter and value */
534 | /*-------------------------*/
535 | set_param(buff,buff+i);
536 | }
537 |
538 | fclose(fd);
539 | }
540 |
541 |
542 | #define STRNCMP(s) (strncmp(param,s,strlen(s))==0 && \
543 | (param[strlen(s)]=='\0' || isspace(param[strlen(s)])))
544 |
545 |
546 | void
547 | set_param(param,value)
548 | char *param, *value;
549 | {
550 | char l1[MAX_LABEL_LEN], l2[MAX_LABEL_LEN];
551 |
552 | if(STRNCMP("DEBUG")){
553 |
554 | DEBUG = atoi(value);
555 |
556 | }else if(STRNCMP("MAX_ERROR")){
557 |
558 | Max_error = atoi(value);
559 |
560 | }else if(STRNCMP("CUTOFF_LEN")){
561 |
562 | TOT_cut_len = atoi(value);
563 |
564 | }else if(STRNCMP("LABELED")){
565 |
566 | F_label = atoi(value);
567 |
568 | }else if(STRNCMP("DELETE_LABEL")){
569 |
570 | Delete_label[Delete_label_n] = (char *)malloc(strlen(value)+1);
571 | strcpy(Delete_label[Delete_label_n],value);
572 | Delete_label_n++;
573 |
574 | }else if(STRNCMP("DELETE_LABEL_FOR_LENGTH")){
575 |
576 | Delete_label_for_length[Delete_label_for_length_n] = (char *)malloc(strlen(value)+1);
577 | strcpy(Delete_label_for_length[Delete_label_for_length_n],value);
578 | Delete_label_for_length_n++;
579 |
580 | }else if(STRNCMP("QUOTE_LABEL")){
581 |
582 | Quote_term[Quote_term_n] = (char *)malloc(strlen(value)+1);
583 | strcpy(Quote_term[Quote_term_n],value);
584 | Quote_term_n++;
585 |
586 | }else if(STRNCMP("EQ_LABEL")){
587 |
588 | if(narg(value)!=2){
589 | fprintf(stderr,"EQ_LABEL requires two values\n");
590 | return;
591 | }
592 | sscanf(value,"%s %s",l1,l2);
593 | EQ_label[EQ_label_n].s1 = (char *)malloc(strlen(l1)+1);
594 | strcpy(EQ_label[EQ_label_n].s1,l1);
595 | EQ_label[EQ_label_n].s2 = (char *)malloc(strlen(l2)+1);
596 | strcpy(EQ_label[EQ_label_n].s2,l2);
597 | EQ_label_n++;
598 |
599 | }else if(STRNCMP("EQ_WORD")){
600 |
601 | if(narg(value)!=2){
602 | fprintf(stderr,"EQ_WORD requires two values\n");
603 | return;
604 | }
605 | sscanf(value,"%s %s",l1,l2);
606 | EQ_word[EQ_word_n].s1 = (char *)malloc(strlen(l1)+1);
607 | strcpy(EQ_word[EQ_word_n].s1,l1);
608 | EQ_word[EQ_word_n].s2 = (char *)malloc(strlen(l2)+1);
609 | strcpy(EQ_word[EQ_word_n].s2,l2);
610 | EQ_word_n++;
611 |
612 | }else{
613 |
614 | fprintf(stderr,"Unknown keyword (%s) in parameter file\n",param);
615 |
616 | }
617 | }
618 |
619 |
620 | int
621 | narg(s)
622 | char *s;
623 | {
624 | int n;
625 |
626 | for(n=0;*s!='\0';){
627 | for(;isspace(*s);s++);
628 | if(*s=='\0'){
629 | break;
630 | }
631 | n++;
632 | for(;!isspace(*s);s++){
633 | if(*s=='\0'){
634 | break;
635 | }
636 | }
637 | }
638 |
639 | return(n);
640 | }
641 |
642 | /*-----------------------------*/
643 | /* Read line and gather data. */
644 | /* Return langth of sentence. */
645 | /*-----------------------------*/
646 | int
647 | read_line(buff, terminal, quotterm, wn, bracket, bn)
648 | char *buff;
649 | s_terminal terminal[];
650 | s_term_ind quotterm[];
651 | int *wn;
652 | s_bracket bracket[];
653 | int *bn;
654 | {
655 | char *p, *q, label[MAX_LABEL_LEN], word[MAX_WORD_LEN];
656 | int qt; /* quote term counter */
657 | int wid, bid; /* word ID, bracket ID */
658 | int n; /* temporary remembering the position */
659 | int b; /* temporary remembering bid */
660 | int i;
661 | int len; /* length of the sentence */
662 |
663 | len = 0;
664 | stack_top=0;
665 |
666 | for(p=buff,qt=0,wid=0,bid=0;*p!='\0';){
667 |
668 | if(isspace(*p)){
669 | p++;
670 | continue;
671 |
672 | /* open bracket */
673 | /*--------------*/
674 | }else if(*p=='('){
675 |
676 | n=wid;
677 | for(p++,i=0;!is_terminator(*p);p++,i++){
678 | label[i]=*p;
679 | }
680 | label[i]='\0';
681 |
682 | /* Find terminals */
683 | q = p;
684 | if(isspace(*q)){
685 | for(q++;isspace(*q);q++);
686 | for(i=0;!is_terminator(*q);q++,i++){
687 | word[i]=*q;
688 | }
689 | word[i]='\0';
690 |
691 | /* compute length */
692 | if(*q==')' && !is_deletelabel_for_length(label)==1){
693 | len++;
694 | }
695 | if (DEBUG>1)
696 | printf("label=%s, word=%s, wid=%d\n",label,word,wid);
697 | /* quote terminal */
698 | if(*q==')' && is_quote_term(label,word)==1){
699 | strcpy(quotterm[qt].term.word,word);
700 | strcpy(quotterm[qt].term.label,label);
701 | quotterm[qt].index = wid;
702 | quotterm[qt].bracket = bid;
703 | quotterm[qt].endslen = stack_top;
704 | //quotterm[qt].ends = (int*)malloc(stack_top*sizeof(int));
705 | memcpy(quotterm[qt].ends,stack,stack_top*sizeof(int));
706 | qt++;
707 | }
708 |
709 | /* delete terminal */
710 | if(*q==')' && is_deletelabel(label)==1){
711 | p = q+1;
712 | continue;
713 |
714 | /* valid terminal */
715 | }else if(*q==')'){
716 | strcpy(terminal[wid].word,word);
717 | strcpy(terminal[wid].label,label);
718 | wid++;
719 | p = q+1;
720 | continue;
721 |
722 | /* error */
723 | }else if(*q!='('){
724 | Error("More than two elements in a bracket\n");
725 | }
726 | }
727 |
728 | /* otherwise non-terminal label */
729 | bracket[bid].start = wid;
730 | bracket[bid].buf_start = p-buff;
731 | strcpy(bracket[bid].label,label);
732 | pushb(bid);
733 | bid++;
734 |
735 | /* close bracket */
736 | /*---------------*/
737 | }else if(*p==')'){
738 |
739 | b = popb();
740 | bracket[b].end = wid;
741 | bracket[b].buf_end = p-buff;
742 | p++;
743 |
744 | /* error */
745 | /*-------*/
746 | }else{
747 |
748 | Error("Reading sentence\n");
749 | }
750 | }
751 |
752 | if(!stackempty()){
753 | Error("Bracketing is unbalanced (too many open bracket)\n");
754 | }
755 |
756 | *wn = wid;
757 | *bn = bid;
758 |
759 | return(len);
760 | }
761 |
762 |
763 | /*----------------------*/
764 | /* stack operation */
765 | /* for bracketing pairs */
766 | /*----------------------*/
767 | void
768 | pushb(item)
769 | int item;
770 | {
771 | stack[stack_top++]=item;
772 | }
773 |
774 | int
775 | popb()
776 | {
777 | int item;
778 |
779 | item = stack[stack_top-1];
780 |
781 | if(stack_top-- < 0){
782 | Error("Bracketing unbalance (too many close bracket)\n");
783 | }
784 | return(item);
785 | }
786 |
787 | int
788 | stackempty()
789 | {
790 | if(stack_top==0){
791 | return(1);
792 | }else{
793 | return(0);
794 | }
795 | }
796 |
797 |
798 | /*------------------*/
799 | /* calculate result */
800 | /*------------------*/
801 | void
802 | calc_result(unsigned char *buf1,unsigned char *buf)
803 | {
804 | int i, j, l;
805 | int match, crossing, correct_tag;
806 |
807 | int last_i = -1;
808 |
809 | char my_buf[1000];
810 | int match_found = 0;
811 |
812 | char match_j[200];
813 | for (j = 0; j < bn2; ++j) {
814 | match_j[j] = 0;
815 | }
816 |
817 | /* ML */
818 | if (DEBUG>1)
819 | printf("\n");
820 |
821 |
822 | /* Find skip and error */
823 | /*---------------------*/
824 | if(wn2==0){
825 | Status = 2;
826 | individual_result(0,0,0,0,0,0);
827 | return;
828 | }
829 |
830 | if(wn1 != wn2){
831 | //if (DEBUG>1)
832 | //Error("Length unmatch (%d|%d)\n",wn1,wn2);
833 | fix_quote();
834 | if(wn1 != wn2){
835 | Error("Length unmatch (%d|%d)\n",wn1,wn2);
836 | individual_result(0,0,0,0,0,0);
837 | return;
838 | }
839 | }
840 |
841 | for(i=0;i1)
861 | printf("1.res=%d, 2.res=%d, 1.start=%d, 2.start=%d, 1.end=%d, 2.end=%d\n",bracket1[i].result,bracket2[j].result,bracket1[i].start,bracket2[j].start,bracket1[i].end,bracket2[j].end);
862 |
863 | // does bracket match?
864 | if(bracket1[i].result != 5 &&
865 | bracket2[j].result == 0 &&
866 | bracket1[i].start == bracket2[j].start && bracket1[i].end == bracket2[j].end) {
867 |
868 | // (1) do we not care about the label or (2) does the label match?
869 | if (F_label==0 || label_comp(bracket1[i].label,bracket2[j].label)==1) {
870 | bracket1[i].result = bracket2[j].result = 1;
871 | match++;
872 | match_found = 1;
873 | break;
874 | } else {
875 | if (DEBUG>1) {
876 | printf(" LABEL[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
877 | l = bracket1[i].buf_end-bracket1[i].buf_start;
878 | strncpy(my_buf,buf1+bracket1[i].buf_start,l);
879 | my_buf[l] = '\0';
880 | printf("%s\n",my_buf);
881 | }
882 | match_found = 1;
883 | match_j[j] = 1;
884 | }
885 | }
886 | }
887 |
888 | if (!match_found && bracket1[i].result != 5 && DEBUG>1) {
889 | /* ### ML 09/28/03: gold bracket with no corresponding test bracket */
890 | printf(" BRACKET[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
891 | l = bracket1[i].buf_end-bracket1[i].buf_start;
892 | strncpy(my_buf,buf1+bracket1[i].buf_start,l);
893 | my_buf[l] = '\0';
894 | printf("%s\n",my_buf);
895 | }
896 | match_found = 0;
897 | }
898 |
899 | for(j=0;j1) {
901 | /* test bracket with no corresponding gold bracket */
902 | printf(" EXTRA[%d-%d]: ",bracket2[j].start,bracket2[j].end-1);
903 | l = bracket2[j].buf_end-bracket2[j].buf_start;
904 | strncpy(my_buf,buf+bracket2[j].buf_start,l);
905 | my_buf[l] = '\0';
906 | printf("%s\n",my_buf);
907 | }
908 | }
909 |
910 | /* crossing */
911 | /*----------*/
912 | crossing = 0;
913 |
914 | /* crossing is counted based on the brackets */
915 | /* in test rather than gold file (by Mike) */
916 | for(j=0;j bracket2[j].start &&
922 | bracket1[i].end < bracket2[j].end) ||
923 | (bracket1[i].start > bracket2[j].start &&
924 | bracket1[i].start < bracket2[j].end &&
925 | bracket1[i].end > bracket2[j].end))){
926 |
927 | /* ### ML 09/01/03: get details on cross-brackettings */
928 | if (i != last_i) {
929 | if (DEBUG>1) {
930 | printf(" CROSSING[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
931 | l = bracket1[i].buf_end-bracket1[i].buf_start;
932 | strncpy(my_buf,buf1+bracket1[i].buf_start,l);
933 | my_buf[l] = '\0';
934 | printf("%s\n",my_buf);
935 |
936 | /* ML
937 | printf("\n CROSSING at bracket %d:\n",i-1);
938 | printf(" GOLD (tokens %d-%d): ",bracket1[i].start,bracket1[i].end-1);
939 | l = bracket1[i].buf_end-bracket1[i].buf_start;
940 | strncpy(my_buf,buf1+bracket1[i].buf_start,l);
941 | my_buf[l] = '\0';
942 | printf("%s\n",my_buf);
943 | */
944 | }
945 | last_i = i;
946 | }
947 |
948 | /* ML
949 | printf(" TEST (tokens %d-%d): ",bracket2[j].start,bracket2[j].end-1);
950 | l = bracket2[j].buf_end-bracket2[j].buf_start;
951 | strncpy(my_buf,buf+bracket2[j].buf_start,l);
952 | my_buf[l] = '\0';
953 | printf("%s\n",my_buf);
954 | */
955 |
956 | crossing++;
957 | break;
958 | }
959 | }
960 | }
961 |
962 | /* Tagging accuracy */
963 | /*------------------*/
964 | correct_tag=0;
965 | for(i=0;i1) {
982 | for(i=0;iindex;
1025 | int bra = quot->bracket;
1026 | s_terminal* term = "->term;
1027 | int k;
1028 | memmove(&terminal[ind+1],
1029 | &terminal[ind],
1030 | sizeof(s_terminal)*(MAX_WORD_IN_SENT-ind-1));
1031 | strcpy(terminal[ind].label,term->label);
1032 | strcpy(terminal[ind].word,term->word);
1033 | (*wn)++;
1034 | if (DEBUG>1)
1035 | printf("bra=%d, ind=%d\n",bra,ind);
1036 | for(k=0;k1)
1040 | printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
1041 | if (k>=bra) {
1042 | bracket[k].start++;
1043 | bracket[k].end++;
1044 | }
1045 | //if (bracket[k].start<=ind && bracket[k].end>=ind)
1046 | //bracket[k].end++;
1047 | }
1048 | if (DEBUG>1)
1049 | printf("endslen=%d\n",quot->endslen);
1050 | for(k=0;kendslen;k++) {
1051 | //printf("ends[%d]=%d",k,quot->ends[k]);
1052 | bracket[quot->ends[k]].end++;
1053 | }
1054 | //free(quot->ends);
1055 | }
1056 | /*
1057 | void
1058 | adjust_end(ind,bra)
1059 | int ind;
1060 | int bra;
1061 | {
1062 | for(k=0;k=bra)
1067 | bracket[k].end++;
1068 | }
1069 | }
1070 | */
1071 | void
1072 | massage_data()
1073 | {
1074 | int i, j;
1075 |
1076 | /* for GOLD */
1077 | /*----------*/
1078 | for(i=0;i0 && TOTAL_bn2>0){
1245 | printf(" %6.2f %6.2f %6d %5d %5d %5d",
1246 | (TOTAL_bn1>0?100.0*TOTAL_match/TOTAL_bn1:0.0),
1247 | (TOTAL_bn2>0?100.0*TOTAL_match/TOTAL_bn2:0.0),
1248 | TOTAL_match,
1249 | TOTAL_bn1,
1250 | TOTAL_bn2,
1251 | TOTAL_crossing);
1252 | }
1253 |
1254 | printf(" %5d %5d %6.2f",
1255 | TOTAL_word,
1256 | TOTAL_correct_tag,
1257 | (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
1258 |
1259 | printf("\n");
1260 | printf("=== Summary ===\n");
1261 |
1262 | sentn = TOTAL_sent - TOTAL_error_sent - TOTAL_skip_sent;
1263 |
1264 | printf("\n-- All --\n");
1265 | printf("Number of sentence = %6d\n",TOTAL_sent);
1266 | printf("Number of Error sentence = %6d\n",TOTAL_error_sent);
1267 | printf("Number of Skip sentence = %6d\n",TOTAL_skip_sent);
1268 | printf("Number of Valid sentence = %6d\n",sentn);
1269 |
1270 | r = TOTAL_bn1>0 ? 100.0*TOTAL_match/TOTAL_bn1 : 0.0;
1271 | printf("Bracketing Recall = %6.2f\n",r);
1272 |
1273 | p = TOTAL_bn2>0 ? 100.0*TOTAL_match/TOTAL_bn2 : 0.0;
1274 | printf("Bracketing Precision = %6.2f\n",p);
1275 |
1276 | f = 2*p*r/(p+r);
1277 | printf("Bracketing FMeasure = %6.2f\n",f);
1278 |
1279 | printf("Complete match = %6.2f\n",
1280 | (sentn>0?100.0*TOTAL_comp_sent/sentn:0.0));
1281 | printf("Average crossing = %6.2f\n",
1282 | (sentn>0?1.0*TOTAL_crossing/sentn:0.0));
1283 | printf("No crossing = %6.2f\n",
1284 | (sentn>0?100.0*TOTAL_no_crossing/sentn:0.0));
1285 | printf("2 or less crossing = %6.2f\n",
1286 | (sentn>0?100.0*TOTAL_2L_crossing/sentn:0.0));
1287 | printf("Tagging accuracy = %6.2f\n",
1288 | (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
1289 |
1290 | sentn = TOT40_sent - TOT40_error_sent - TOT40_skip_sent;
1291 |
1292 | printf("\n-- len<=%d --\n",TOT_cut_len);
1293 | printf("Number of sentence = %6d\n",TOT40_sent);
1294 | printf("Number of Error sentence = %6d\n",TOT40_error_sent);
1295 | printf("Number of Skip sentence = %6d\n",TOT40_skip_sent);
1296 | printf("Number of Valid sentence = %6d\n",sentn);
1297 |
1298 |
1299 | r = TOT40_bn1>0 ? 100.0*TOT40_match/TOT40_bn1 : 0.0;
1300 | printf("Bracketing Recall = %6.2f\n",r);
1301 |
1302 | p = TOT40_bn2>0 ? 100.0*TOT40_match/TOT40_bn2 : 0.0;
1303 | printf("Bracketing Precision = %6.2f\n",p);
1304 |
1305 | f = 2*p*r/(p+r);
1306 | printf("Bracketing FMeasure = %6.2f\n",f);
1307 |
1308 | printf("Complete match = %6.2f\n",
1309 | (sentn>0?100.0*TOT40_comp_sent/sentn:0.0));
1310 | printf("Average crossing = %6.2f\n",
1311 | (sentn>0?1.0*TOT40_crossing/sentn:0.0));
1312 | printf("No crossing = %6.2f\n",
1313 | (sentn>0?100.0*TOT40_no_crossing/sentn:0.0));
1314 | printf("2 or less crossing = %6.2f\n",
1315 | (sentn>0?100.0*TOT40_2L_crossing/sentn:0.0));
1316 | printf("Tagging accuracy = %6.2f\n",
1317 | (TOT40_word>0?100.0*TOT40_correct_tag/TOT40_word:0.0));
1318 |
1319 | }
1320 |
1321 |
1322 | /*--------------------------------*/
1323 | /* display individual information */
1324 | /*--------------------------------*/
1325 | void
1326 | dsp_info()
1327 | {
1328 | int i, n;
1329 |
1330 | printf("-<1>---(wn1=%3d, bn1=%3d)- ",wn1,bn1);
1331 | printf("-<2>---(wn2=%3d, bn2=%3d)-\n",wn2,bn2);
1332 |
1333 | n = (wn1>wn2?wn1:wn2);
1334 |
1335 | for(i=0;ibn2?bn1:bn2);
1353 |
1354 | for(i=0;iMax_error){
1502 | exit(1);
1503 | }
1504 | }
1505 |
1506 |
1507 | /*---------------------*/
1508 | /* fatal error to exit */
1509 | /*---------------------*/
1510 | void
1511 | Fatal(s,arg1,arg2,arg3)
1512 | char *s, *arg1, *arg2, *arg3;
1513 | {
1514 | fprintf(stderr,s,arg1,arg2,arg3);
1515 | exit(1);
1516 | }
1517 |
1518 |
1519 | /*-------*/
1520 | /* Usage */
1521 | /*-------*/
1522 | void
1523 | Usage()
1524 | {
1525 | fprintf(stderr," evalb [-dDh][-c n][-e n][-p param_file] gold-file test-file \n");
1526 | fprintf(stderr," \n");
1527 | fprintf(stderr," Evaluate bracketing in test-file against gold-file. \n");
1528 | fprintf(stderr," Return recall, precision, F-Measure, tag accuracy. \n");
1529 | fprintf(stderr," \n");
1530 | fprintf(stderr," \n");
1531 | fprintf(stderr," -d debug mode \n");
1532 | fprintf(stderr," -D debug mode plus bracketing info \n");
1533 | fprintf(stderr," -c n cut-off length forstatistics (def.=40)\n");
1534 | fprintf(stderr," -e n number of error to kill (default=10) \n");
1535 | fprintf(stderr," -p param_file parameter file \n");
1536 | fprintf(stderr," -h help \n");
1537 | }
1538 |
--------------------------------------------------------------------------------
/EVALB/new.prm:
--------------------------------------------------------------------------------
1 | ##------------------------------------------##
2 | ## Debug mode ##
3 | ## 0: No debugging ##
4 | ## 1: print data for individual sentence ##
5 | ## 2: print detailed bracketing info ##
6 | ##------------------------------------------##
7 | DEBUG 0
8 |
9 | ##------------------------------------------##
10 | ## MAX error ##
11 | ## Number of error to stop the process. ##
12 | ## This is useful if there could be ##
13 | ## tokanization error. ##
14 | ## The process will stop when this number##
15 | ## of errors are accumulated. ##
16 | ##------------------------------------------##
17 | MAX_ERROR 10
18 |
19 | ##------------------------------------------##
20 | ## Cut-off length for statistics ##
21 | ## At the end of evaluation, the ##
22 | ## statistics for the senetnces of length##
23 | ## less than or equal to this number will##
24 | ## be shown, on top of the statistics ##
25 | ## for all the sentences ##
26 | ##------------------------------------------##
27 | CUTOFF_LEN 40
28 |
29 | ##------------------------------------------##
30 | ## unlabeled or labeled bracketing ##
31 | ## 0: unlabeled bracketing ##
32 | ## 1: labeled bracketing ##
33 | ##------------------------------------------##
34 | LABELED 1
35 |
36 | ##------------------------------------------##
37 | ## Delete labels ##
38 | ## list of labels to be ignored. ##
39 | ## If it is a pre-terminal label, delete ##
40 | ## the word along with the brackets. ##
41 | ## If it is a non-terminal label, just ##
42 | ## delete the brackets (don't delete ##
43 | ## deildrens). ##
44 | ##------------------------------------------##
45 | DELETE_LABEL TOP
46 | DELETE_LABEL S1
47 | DELETE_LABEL -NONE-
48 | DELETE_LABEL ,
49 | DELETE_LABEL :
50 | DELETE_LABEL ``
51 | DELETE_LABEL ''
52 | DELETE_LABEL .
53 | DELETE_LABEL ?
54 | DELETE_LABEL !
55 |
56 | ##------------------------------------------##
57 | ## Delete labels for length calculation ##
58 | ## list of labels to be ignored for ##
59 | ## length calculation purpose ##
60 | ##------------------------------------------##
61 | DELETE_LABEL_FOR_LENGTH -NONE-
62 |
63 | ##------------------------------------------##
64 | ## Labels to be considered for misquote ##
65 | ## (could be possesive or quote) ##
66 | ##------------------------------------------##
67 | QUOTE_LABEL ``
68 | QUOTE_LABEL ''
69 | QUOTE_LABEL POS
70 |
71 | ##------------------------------------------##
72 | ## These ones are less common, but ##
73 | ## are on occasion output by parsers: ##
74 | ##------------------------------------------##
75 | QUOTE_LABEL NN
76 | QUOTE_LABEL CD
77 | QUOTE_LABEL VBZ
78 | QUOTE_LABEL :
79 |
80 | ##------------------------------------------##
81 | ## Equivalent labels, words ##
82 | ## the pairs are considered equivalent ##
83 | ## This is non-directional. ##
84 | ##------------------------------------------##
85 | EQ_LABEL ADVP PRT
86 |
87 | # EQ_WORD Example example
88 |
--------------------------------------------------------------------------------
/EVALB/sample/sample.gld:
--------------------------------------------------------------------------------
1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
2 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
3 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
4 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
5 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
6 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
7 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
8 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
9 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
10 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
11 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
12 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
13 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
14 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
15 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
16 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
19 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
21 | (S (A-SBJ-1 (P this)) (B-WHATEVER (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 |
--------------------------------------------------------------------------------
/EVALB/sample/sample.prm:
--------------------------------------------------------------------------------
1 | ##------------------------------------------##
2 | ## Debug mode ##
3 | ## print out data for individual sentence ##
4 | ##------------------------------------------##
5 | DEBUG 0
6 |
7 | ##------------------------------------------##
8 | ## MAX error ##
9 | ## Number of error to stop the process. ##
10 | ## This is useful if there could be ##
11 | ## tokanization error. ##
12 | ## The process will stop when this number##
13 | ## of errors are accumulated. ##
14 | ##------------------------------------------##
15 | MAX_ERROR 10
16 |
17 | ##------------------------------------------##
18 | ## Cut-off length for statistics ##
19 | ## At the end of evaluation, the ##
20 | ## statistics for the senetnces of length##
21 | ## less than or equal to this number will##
22 | ## be shown, on top of the statistics ##
23 | ## for all the sentences ##
24 | ##------------------------------------------##
25 | CUTOFF_LEN 40
26 |
27 | ##------------------------------------------##
28 | ## unlabeled or labeled bracketing ##
29 | ## 0: unlabeled bracketing ##
30 | ## 1: labeled bracketing ##
31 | ##------------------------------------------##
32 | LABELED 1
33 |
34 | ##------------------------------------------##
35 | ## Delete labels ##
36 | ## list of labels to be ignored. ##
37 | ## If it is a pre-terminal label, delete ##
38 | ## the word along with the brackets. ##
39 | ## If it is a non-terminal label, just ##
40 | ## delete the brackets (don't delete ##
41 | ## deildrens). ##
42 | ##------------------------------------------##
43 | DELETE_LABEL TOP
44 | DELETE_LABEL -NONE-
45 | DELETE_LABEL ,
46 | DELETE_LABEL :
47 | DELETE_LABEL ``
48 | DELETE_LABEL ''
49 |
50 | ##------------------------------------------##
51 | ## Delete labels for length calculation ##
52 | ## list of labels to be ignored for ##
53 | ## length calculation purpose ##
54 | ##------------------------------------------##
55 | DELETE_LABEL_FOR_LENGTH -NONE-
56 |
57 |
58 | ##------------------------------------------##
59 | ## Equivalent labels, words ##
60 | ## the pairs are considered equivalent ##
61 | ## This is non-directional. ##
62 | ##------------------------------------------##
63 | EQ_LABEL T TT
64 |
65 | EQ_WORD This this
66 |
--------------------------------------------------------------------------------
/EVALB/sample/sample.rsl:
--------------------------------------------------------------------------------
1 | Sent. Matched Bracket Cross Correct Tag
2 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy
3 | ============================================================================
4 | 1 4 0 100.00 100.00 4 4 4 0 4 4 100.00
5 | 2 4 0 75.00 75.00 3 4 4 0 4 4 100.00
6 | 3 4 0 100.00 100.00 4 4 4 0 4 3 75.00
7 | 4 4 0 75.00 75.00 3 4 4 0 4 3 75.00
8 | 5 4 0 75.00 75.00 3 4 4 0 4 4 100.00
9 | 6 4 0 50.00 66.67 2 4 3 1 4 4 100.00
10 | 7 4 0 25.00 100.00 1 4 1 0 4 4 100.00
11 | 8 4 0 0.00 0.00 0 4 0 0 4 4 100.00
12 | 9 4 0 100.00 80.00 4 4 5 0 4 4 100.00
13 | 10 4 0 100.00 50.00 4 4 8 0 4 4 100.00
14 | 11 4 2 0.00 0.00 0 0 0 0 4 0 0.00
15 | 12 4 1 0.00 0.00 0 0 0 0 4 0 0.00
16 | 13 4 1 0.00 0.00 0 0 0 0 4 0 0.00
17 | 14 4 2 0.00 0.00 0 0 0 0 4 0 0.00
18 | 15 4 0 100.00 100.00 4 4 4 0 4 4 100.00
19 | 16 4 1 0.00 0.00 0 0 0 0 4 0 0.00
20 | 17 4 1 0.00 0.00 0 0 0 0 4 0 0.00
21 | 18 4 0 100.00 100.00 4 4 4 0 4 4 100.00
22 | 19 4 0 100.00 100.00 4 4 4 0 4 4 100.00
23 | 20 4 1 0.00 0.00 0 0 0 0 4 0 0.00
24 | 21 4 0 100.00 100.00 4 4 4 0 4 4 100.00
25 | 22 44 0 100.00 100.00 34 34 34 0 44 44 100.00
26 | 23 4 0 100.00 100.00 4 4 4 0 4 4 100.00
27 | 24 5 0 100.00 100.00 4 4 4 0 4 4 100.00
28 | ============================================================================
29 | 87.76 90.53 86 98 95 16 108 106 98.15
30 | === Summary ===
31 |
32 | -- All --
33 | Number of sentence = 24
34 | Number of Error sentence = 5
35 | Number of Skip sentence = 2
36 | Number of Valid sentence = 17
37 | Bracketing Recall = 87.76
38 | Bracketing Precision = 90.53
39 | Complete match = 52.94
40 | Average crossing = 0.06
41 | No crossing = 94.12
42 | 2 or less crossing = 100.00
43 | Tagging accuracy = 98.15
44 |
45 | -- len<=40 --
46 | Number of sentence = 23
47 | Number of Error sentence = 5
48 | Number of Skip sentence = 2
49 | Number of Valid sentence = 16
50 | Bracketing Recall = 81.25
51 | Bracketing Precision = 85.25
52 | Complete match = 50.00
53 | Average crossing = 0.06
54 | No crossing = 93.75
55 | 2 or less crossing = 100.00
56 | Tagging accuracy = 96.88
57 |
--------------------------------------------------------------------------------
/EVALB/sample/sample.tst:
--------------------------------------------------------------------------------
1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
2 | (S (A (P this)) (B (Q is) (C (R a) (T test))))
3 | (S (A (P this)) (B (Q is) (A (R a) (U test))))
4 | (S (C (P this)) (B (Q is) (A (R a) (U test))))
5 | (S (A (P this)) (B (Q is) (R a) (A (T test))))
6 | (S (A (P this) (Q is)) (A (R a) (T test)))
7 | (S (P this) (Q is) (R a) (T test))
8 | (P this) (Q is) (R a) (T test)
9 | (S (A (P this)) (B (Q is) (A (A (R a) (T test)))))
10 | (S (A (P this)) (B (Q is) (A (A (A (A (A (R a) (T test))))))))
11 |
12 | (S (A (P this)) (B (Q was) (A (A (R a) (T test)))))
13 | (S (A (P this)) (B (Q is) (U not) (A (A (R a) (T test)))))
14 |
15 | (TOP (S (A (P this)) (B (Q is) (A (R a) (T test)))))
16 | (S (A (P this)) (NONE *) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (S (NONE abc) (A (NONE *))) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (TT test))))
19 | (S (A (P This)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P That)) (B (Q is) (A (R a) (T test))))
21 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 |
--------------------------------------------------------------------------------
/EVALB/tgrep_proc.prl:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/perl
2 |
3 | while(<>)
4 | {
5 | if(m/TOP/) #skip lines which are blank
6 | {
7 | print;
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/EVALB_SPMRL/Makefile:
--------------------------------------------------------------------------------
1 | TESTFILE=dev.Arabic.gold.ptb.-feat
2 | JAVA=java
3 |
4 |
5 | all: evalb
6 |
7 |
8 | clean:
9 | rm -f evalb_spmrl
10 |
11 | install: evalb
12 | cp evalb_spmrl /usr/local/bin
13 |
14 |
15 |
16 | evalb: evalb.c
17 | gcc -Wall -O3 -g -o evalb_spmrl evalb.c
18 |
19 | evalb_linux: evalb.c
20 | gcc -Wall -fPIC -O3 -g -o evalb_spmrl evalb.c
21 | #to compile on linux
22 |
23 |
24 |
25 | # note: on the original makefile, the funsigned-char option was applied
26 |
27 | home: install_home
28 |
29 | install_home: all
30 | cp evalb_spmrl ${PREFIX}/bin
31 |
32 | up:
33 | tar zcvf ../evalb_spmrl2013.tar.gz ../evalb_spmrl2013/
34 | putW ../evalb_spmrl2013.tar.gz
35 |
36 |
37 |
38 | #################################
39 | # stuff to debug some treebanks #
40 | #################################
41 | test_full: all
42 | ./evalb dev.Arabic.gold.ptb dev.Arabic.gold.ptb
43 |
44 |
45 | test: all
46 | ./evalb -p ./new.prm ${TESTFILE} ${TESTFILE}
47 |
48 | debug: all
49 | ./evalb -D ${TESTFILE} ${TESTFILE}
50 | echo "./evalb -D ${TESTFILE} ${TESTFILE}"
51 |
52 | debug_one: all
53 | lines 616 < ${TESTFILE} > ${TESTFILE}.616
54 | ./evalb -D ${TESTFILE}.616 ${TESTFILE}.616
55 | echo "./evalb -D ${TESTFILE}.616 ${TESTFILE}.616"
56 |
57 | releaf:
58 | ./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug
59 | echo "./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug" > /dev/stderr
60 |
61 | java:
62 | ${JAVA} -jar ./evalC/evalC.jar ${TESTFILE} ${TESTFILE} /dev/stdout
63 |
64 |
65 |
--------------------------------------------------------------------------------
/EVALB_SPMRL/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgaddy/parser-analysis/1033017fef59090a48cf25210f7d43e3ff913a4c/EVALB_SPMRL/README
--------------------------------------------------------------------------------
/EVALB_SPMRL/README.orig:
--------------------------------------------------------------------------------
1 | #################################################################
2 | # #
3 | # README file for evalb #
4 | # #
5 | # Satoshi Sekine (NYU) #
6 | # Mike Collins (UPenn) #
7 | # #
8 | # October.1997 #
9 | #################################################################
10 |
11 | Contents of this README:
12 |
13 | [0] COPYRIGHT
14 | [1] INTRODUCTION
15 | [2] INSTALLATION AND RUN
16 | [3] OPTIONS
17 | [4] OUTPUT FORMAT FROM THE SCORER
18 | [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
19 | [6] THE PARAMETER FILE
20 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
21 |
22 |
23 | [0] COPYRIGHT
24 |
25 | The authors abandon the copyright of this program. Everyone is
26 | permitted to copy and distribute the program or a portion of the program
27 | with no charge and no restrictions unless it is harmful to someone.
28 |
29 | However, the authors are delightful for the user's kindness of proper
30 | usage and letting the authors know bugs or problems.
31 |
32 | This software is provided "AS IS", and the authors make no warranties,
33 | express or implied.
34 |
35 |
36 | [1] INTRODUCTION
37 |
38 | Evaluation of bracketing looks simple, but in fact, there are minor
39 | differences from system to system. This is a program to parametarize
40 | such minor differences and to give an informative result.
41 |
42 | "evalb" evaluates bracketing accuracy in a test-file against a gold-file.
43 | It returns recall, precision, tagging accuracy. It uses an identical
44 | algorithm to that used in (Collins ACL97).
45 |
46 |
47 | [2] Installation and Run
48 |
49 | To compile the scorer, type
50 |
51 | > make
52 |
53 |
54 | To run the scorer:
55 |
56 | > evalb -p Parameter_file Gold_file Test_file
57 |
58 |
59 | For example to use the sample files:
60 |
61 | > evalb -p sample.prm sample.gld sample.tst
62 |
63 |
64 |
65 | [3] OPTIONS
66 |
67 | You can specify system parameters in the command line options.
68 | Other options concerning to evaluation metrix should be specified
69 | in parameter file, described later.
70 |
71 | -p param_file parameter file
72 | -d debug mode
73 | -e n number of error to kill (default=10)
74 | -h help
75 |
76 |
77 |
78 | [4] OUTPUT FORMAT FROM THE SCORER
79 |
80 | The scorer gives individual scores for each sentence, for
81 | example:
82 |
83 | Sent. Matched Bracket Cross Correct Tag
84 | ID Len. Stat. Recal Prec. Bracket gold test Bracket Words Tags Accracy
85 | ============================================================================
86 | 1 8 0 100.00 100.00 5 5 5 0 6 5 83.33
87 |
88 | At the end of the output the === Summary === section gives statistics
89 | for all sentences, and for sentences <=40 words in length. The summary
90 | contains the following information:
91 |
92 | i) Number of sentences -- total number of sentences.
93 |
94 | ii) Number of Error/Skip sentences -- should both be 0 if there is no
95 | problem with the parsed/gold files.
96 |
97 | iii) Number of valid sentences = Number of sentences - Number of Error/Skip
98 | sentences
99 |
100 | iv) Bracketing recall = (number of correct constituents)
101 | ----------------------------------------
102 | (number of constituents in the goldfile)
103 |
104 | v) Bracketing precision = (number of correct constituents)
105 | ----------------------------------------
106 | (number of constituents in the parsed file)
107 |
108 | vi) Complete match = percentaage of sentences where recall and precision are
109 | both 100%.
110 |
111 | vii) Average crossing = (number of constituents crossing a goldfile constituen
112 | ----------------------------------------------------
113 | (number of sentences)
114 |
115 | viii) No crossing = percentage of sentences which have 0 crossing brackets.
116 |
117 | ix) 2 or less crossing = percentage of sentences which have <=2 crossing brackets.
118 |
119 | x) Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
120 | details of what is counted).
121 |
122 |
123 |
124 | [5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
125 |
126 |
127 | The gold and parsed files are in a format similar to this:
128 |
129 | (TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
130 |
131 | To create a gold file from the treebank:
132 |
133 | tgrep -wn '/.*/' | tgrep_proc.prl
134 |
135 | will produce a goldfile in the required format. ("tgrep -wn '/.*/'" prints
136 | parse trees, "tgrep_process.prl" just skips blank lines).
137 |
138 | For example, to produce a goldfile for section 23 of the treebank:
139 |
140 | tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
141 |
142 |
143 |
144 | [6] THE PARAMETER (.prm) FILE
145 |
146 |
147 | The .prm file sets options regarding the scoring method. COLLINS.prm gives
148 | the same scoring behaviour as the scorer used in (Collins 97). The options
149 | chosen were:
150 |
151 | 1) LABELED 1
152 |
153 | to give labelled precision/recall figures, i.e. a constituent must have the
154 | same span *and* label as a constituent in the goldfile.
155 |
156 | 2) DELETE_LABEL TOP
157 |
158 | Don't count the "TOP" label (which is always given in the output of tgrep)
159 | when scoring.
160 |
161 | 3) DELETE_LABEL -NONE-
162 |
163 | Remove traces (and all constituents which dominate nothing but traces) when
164 | scoring. For example
165 |
166 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
167 |
168 | would be processed to give
169 |
170 | .... (VP (VBD reported)) (. .)))
171 |
172 |
173 | 4)
174 | DELETE_LABEL , -- for the purposes of scoring remove punctuation
175 | DELETE_LABEL :
176 | DELETE_LABEL ``
177 | DELETE_LABEL ''
178 | DELETE_LABEL .
179 |
180 | 5) DELETE_LABEL_FOR_LENGTH -NONE- -- don't include traces when calculating
181 | the length of a sentence (important
182 | when classifying a sentence as <=40
183 | words or >40 words)
184 |
185 | 6) EQ_LABEL ADVP PRT
186 |
187 | Count ADVP and PRT as being the same label when scoring.
188 |
189 |
190 |
191 |
192 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
193 |
194 |
195 | 1) The scorer initially processes the files to remove all nodes specified
196 | by DELETE_LABEL in the .prm file. It also recursively removes nodes which
197 | dominate nothing due to all their children being removed. For example, if
198 | -NONE- is specified as a label to be deleted,
199 |
200 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
201 |
202 | would be processed to give
203 |
204 | .... (VP (VBD reported)) (. .)))
205 |
206 | 2) The scorer also removes all functional tags attached to non-terminals
207 | (functional tags are prefixed with "-" or "=" in the treebank). For example
208 | "NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
209 |
210 |
211 | 3) Tagging accuracy counts tags for all words *except* any tags which are
212 | deleted by a DELETE_LABEL specification in the .prm file. (For example, for
213 | COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
214 |
215 | 4) When calculating the length of a sentence, all words with POS tags not
216 | included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
217 | counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
218 | traces are removed before calculating the length of the sentence).
219 |
220 | 5) There are some subtleties in scoring when either the goldfile or parsed
221 | file contains multiple constituents for the same span which have the same
222 | non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n
223 | constituents for the same span, and the parsed file contains m constituents
224 | with that nonterminal, the scorer works as follows:
225 |
226 | i) If m>n, then the precision is n/m, recall is 100%
227 |
228 | ii) If n>m, then the precision is 100%, recall is m/n.
229 |
230 | iii) If n==m, recall and precision are both 100%.
231 |
--------------------------------------------------------------------------------
/EVALB_SPMRL/spmrl.prm:
--------------------------------------------------------------------------------
1 | ##------------------------------------------##
2 | ## Debug mode ##
3 | ## 0: No debugging ##
4 | ## 1: print data for individual sentence ##
5 | ## 2: print detailed bracketing info ##
6 | ##------------------------------------------##
7 | DEBUG 0
8 |
9 | ##------------------------------------------##
10 | ## MAX error ##
11 | ## Number of error to stop the process. ##
12 | ## This is useful if there could be ##
13 | ## tokanization error. ##
14 | ## The process will stop when this number##
15 | ## of errors are accumulated. ##
16 | ##------------------------------------------##
17 | MAX_ERROR 10000
18 |
19 | ##------------------------------------------##
20 | ## Cut-off length for statistics ##
21 | ## At the end of evaluation, the ##
22 | ## statistics for the senetnces of length##
23 | ## less than or equal to this number will##
24 | ## be shown, on top of the statistics ##
25 | ## for all the sentences ##
26 | ##------------------------------------------##
27 | CUTOFF_LEN 70
28 |
29 | ##------------------------------------------##
30 | ## unlabeled or labeled bracketing ##
31 | ## 0: unlabeled bracketing ##
32 | ## 1: labeled bracketing ##
33 | ##------------------------------------------##
34 | LABELED 1
35 |
36 | ##------------------------------------------##
37 | ## Delete labels ##
38 | ## list of labels to be ignored. ##
39 | ## If it is a pre-terminal label, delete ##
40 | ## the word along with the brackets. ##
41 | ## If it is a non-terminal label, just ##
42 | ## delete the brackets (don't delete ##
43 | ## deildrens). ##
44 | ##------------------------------------------##
45 | DELETE_LABEL TOP
46 | DELETE_LABEL ROOT
47 | DELETE_LABEL S1
48 | DELETE_LABEL -NONE-
49 | DELETE_LABEL VROOT
50 |
51 | #DELETE_LABEL ,
52 | #DELETE_LABEL :
53 | #DELETE_LABEL ``
54 | #DELETE_LABEL ''
55 | #DELETE_LABEL .
56 | #DELETE_LABEL ?
57 | #DELETE_LABEL !
58 | #DELETE_LABEL PONCT
59 |
60 | ##------------------------------------------##
61 | ## Delete labels for length calculation ##
62 | ## list of labels to be ignored for ##
63 | ## length calculation purpose ##
64 | ##------------------------------------------##
65 | DELETE_LABEL_FOR_LENGTH -NONE-
66 |
67 | ##------------------------------------------##
68 | ## Labels to be considered for misquote ##
69 | ## (could be possesive or quote) ##
70 | ##------------------------------------------##
71 | #QUOTE_LABEL ``
72 | #QUOTE_LABEL ''
73 | #QUOTE_LABEL POS
74 |
75 | ##------------------------------------------##
76 | ## These ones are less common, but ##
77 | ## are on occasion output by parsers: ##
78 | ##------------------------------------------##
79 | #QUOTE_LABEL NN
80 | #QUOTE_LABEL CD
81 | #QUOTE_LABEL VBZ
82 | #QUOTE_LABEL :
83 |
84 | ##------------------------------------------##
85 | ## Equivalent labels, words ##
86 | ## the pairs are considered equivalent ##
87 | ## This is non-directional. ##
88 | ##------------------------------------------##
89 | #EQ_LABEL ADVP PRT
90 |
91 | # EQ_WORD Example example
92 |
--------------------------------------------------------------------------------
/EVALB_SPMRL/spmrl_hebrew.prm:
--------------------------------------------------------------------------------
1 | ##------------------------------------------##
2 | ## Debug mode ##
3 | ## 0: No debugging ##
4 | ## 1: print data for individual sentence ##
5 | ## 2: print detailed bracketing info ##
6 | ##------------------------------------------##
7 | DEBUG 0
8 |
9 | ##------------------------------------------##
10 | ## MAX error ##
11 | ## Number of error to stop the process. ##
12 | ## This is useful if there could be ##
13 | ## tokanization error. ##
14 | ## The process will stop when this number##
15 | ## of errors are accumulated. ##
16 | ##------------------------------------------##
17 | MAX_ERROR 10000
18 |
19 | ##------------------------------------------##
20 | ## Cut-off length for statistics ##
21 | ## At the end of evaluation, the ##
22 | ## statistics for the senetnces of length##
23 | ## less than or equal to this number will##
24 | ## be shown, on top of the statistics ##
25 | ## for all the sentences ##
26 | ##------------------------------------------##
27 | CUTOFF_LEN 40
28 |
29 | ##------------------------------------------##
30 | ## unlabeled or labeled bracketing ##
31 | ## 0: unlabeled bracketing ##
32 | ## 1: labeled bracketing ##
33 | ##------------------------------------------##
34 | LABELED 1
35 |
36 | ##------------------------------------------##
37 | ## Delete labels ##
38 | ## list of labels to be ignored. ##
39 | ## If it is a pre-terminal label, delete ##
40 | ## the word along with the brackets. ##
41 | ## If it is a non-terminal label, just ##
42 | ## delete the brackets (don't delete ##
43 | ## deildrens). ##
44 | ##------------------------------------------##
45 | DELETE_LABEL TOP
46 | DELETE_LABEL ROOT
47 | DELETE_LABEL S1
48 | DELETE_LABEL -NONE-
49 | DELETE_LABEL VROOT
50 | #DELETE_LABEL SENT
51 |
52 | #DELETE_LABEL ,
53 | #DELETE_LABEL :
54 | #DELETE_LABEL ``
55 | #DELETE_LABEL ''
56 | #DELETE_LABEL .
57 | #DELETE_LABEL ?
58 | #DELETE_LABEL !
59 | #DELETE_LABEL PONCT
60 |
61 | ##------------------------------------------##
62 | ## Delete labels for length calculation ##
63 | ## list of labels to be ignored for ##
64 | ## length calculation purpose ##
65 | ##------------------------------------------##
66 | DELETE_LABEL_FOR_LENGTH -NONE-
67 |
68 | ##------------------------------------------##
69 | ## Labels to be considered for misquote ##
70 | ## (could be possesive or quote) ##
71 | ##------------------------------------------##
72 | #QUOTE_LABEL ``
73 | #QUOTE_LABEL ''
74 | #QUOTE_LABEL POS
75 |
76 | ##------------------------------------------##
77 | ## These ones are less common, but ##
78 | ## are on occasion output by parsers: ##
79 | ##------------------------------------------##
80 | #QUOTE_LABEL NN
81 | #QUOTE_LABEL CD
82 | #QUOTE_LABEL VBZ
83 | #QUOTE_LABEL :
84 |
85 | ##------------------------------------------##
86 | ## Equivalent labels, words ##
87 | ## the pairs are considered equivalent ##
88 | ## This is non-directional. ##
89 | ##------------------------------------------##
90 | #EQ_LABEL ADVP PRT
91 |
92 | # EQ_WORD Example example
93 | DELETE_LABEL SYN_NN
94 | DELETE_LABEL SYN_NNP
95 | DELETE_LABEL SYN_NNT
96 | DELETE_LABEL SYN_PRP
97 | DELETE_LABEL SYN_JJ
98 | DELETE_LABEL SYN_JJT
99 | DELETE_LABEL SYN_RB
100 | DELETE_LABEL SYN_RBR
101 | DELETE_LABEL SYN_MOD
102 | DELETE_LABEL SYN_VB
103 | DELETE_LABEL SYN_AUX
104 | DELETE_LABEL SYN_AGR
105 | DELETE_LABEL SYN_IN
106 | DELETE_LABEL SYN_COM
107 | DELETE_LABEL SYN_REL
108 | DELETE_LABEL SYN_CC
109 | DELETE_LABEL SYN_QW
110 | DELETE_LABEL SYN_HAM
111 | DELETE_LABEL SYN_WDT
112 | DELETE_LABEL SYN_DT
113 | DELETE_LABEL SYN_CD
114 | DELETE_LABEL SYN_CDT
115 | DELETE_LABEL SYN_AT
116 | DELETE_LABEL SYN_H
117 | DELETE_LABEL SYN_FL
118 | DELETE_LABEL SYN_ZVL
119 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Mitchell Stern
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Neural Constituency Parser Analysis
2 |
3 | This repository contains code necessary to reproduce experiments in *What's Going On in Neural Constituency Parsers? An Analysis* from NAACL 2018.
4 |
5 | If you are looking for a parser implementation and not the analysis, we recommend you instead use the code from [Mitchell's repository](https://github.com/mitchellstern/minimal-span-parser), which also includes the model improvements described in the paper.
6 |
7 | ## Requirements and Setup
8 |
9 | * Python 3.5 or higher.
10 | * [DyNet](https://github.com/clab/dynet). We recommend installing DyNet from source with MKL support for significantly faster run time.
11 | * [EVALB](http://nlp.cs.nyu.edu/evalb/). Before starting, run `make` inside the `EVALB/` directory to compile an `evalb` executable. This will be called from Python for evaluation.
12 |
13 | ## Command Line Arguments
14 |
15 | The base model can be trained with the command:
16 | ```
17 | python3 src/main.py train --parser-type chart --model-path-base models/base-model
18 | ```
19 | The dev score will be appended to the model file name in the form `_dev=xx.xx`, where each `x` is replaced with a digit, so this will need to be specified when running the program with an already trained model as is done for some experiments.
20 |
21 | The following table describes the command line arguments to run each experiment in the paper:
22 |
23 | Paper section | Arguments
24 | --- | ---
25 | 3.1 | Run `python3 src/main.py train-label --model-path-base models/base-model_dev=xx.xx`.
26 | 3.2 | Use the base model command with `--parser-type independent` instead of `chart`.
27 | 4.1 | Add the option `--embedding-type` with combinations of the characters w, t, and c for word, tag, and character (e.g. `--embedding-type wt`). For character only, we recommend using `--char-lstm-dim 250` as well.
28 | 5.1 | Run `python3 src/main.py derivative --model-path-base models/base-model_dev=xx.xx`.
29 | 5.2 | Add `--lstm-type truncated --lstm-context-size 3` to the base model command and use different values for the context size.
30 | 5.3 | Add `--lstm-type shuffled --lstm-context-size 3`.
31 | 5.4 | Add `--lstm-type no-lstm --lstm-context-size 3 --no-lstm-hidden-dims 1000`.
32 |
33 | To run on the test set, use
34 | ```
35 | python3 src/main.py test --model-path-base models/base-model_dev=xx.xx
36 | ```
37 |
38 |
--------------------------------------------------------------------------------
/src/evaluate.py:
--------------------------------------------------------------------------------
1 | import math
2 | import os.path
3 | import re
4 | import subprocess
5 | import tempfile
6 | from collections import Counter
7 |
8 | import trees
9 |
10 | class FScore(object):
11 | def __init__(self, recall, precision, fscore):
12 | self.recall = recall
13 | self.precision = precision
14 | self.fscore = fscore
15 |
16 | def __str__(self):
17 | return "(Recall={:.2f}, Precision={:.2f}, FScore={:.2f})".format(
18 | self.recall, self.precision, self.fscore)
19 |
20 | def evalb(evalb_dir, gold_trees, predicted_trees, ref_gold_path=None):
21 | assert os.path.exists(evalb_dir)
22 | evalb_program_path = os.path.join(evalb_dir, "evalb")
23 | evalb_spmrl_program_path = os.path.join(evalb_dir, "evalb_spmrl")
24 | assert os.path.exists(evalb_program_path) or os.path.exists(evalb_spmrl_program_path)
25 |
26 | if os.path.exists(evalb_program_path):
27 | evalb_param_path = os.path.join(evalb_dir, "COLLINS.prm")
28 | else:
29 | evalb_program_path = evalb_spmrl_program_path
30 | evalb_param_path = os.path.join(evalb_dir, "spmrl.prm")
31 |
32 | assert os.path.exists(evalb_program_path)
33 | assert os.path.exists(evalb_param_path)
34 |
35 | assert len(gold_trees) == len(predicted_trees)
36 | for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
37 | assert isinstance(gold_tree, trees.TreebankNode)
38 | assert isinstance(predicted_tree, trees.TreebankNode)
39 | gold_leaves = list(gold_tree.leaves())
40 | predicted_leaves = list(predicted_tree.leaves())
41 | assert len(gold_leaves) == len(predicted_leaves)
42 | assert all(
43 | gold_leaf.word == predicted_leaf.word
44 | for gold_leaf, predicted_leaf in zip(gold_leaves, predicted_leaves))
45 |
46 | temp_dir = tempfile.TemporaryDirectory(prefix="evalb-")
47 | gold_path = os.path.join(temp_dir.name, "gold.txt")
48 | predicted_path = os.path.join(temp_dir.name, "predicted.txt")
49 | output_path = os.path.join(temp_dir.name, "output.txt")
50 |
51 | with open(gold_path, "w") as outfile:
52 | if ref_gold_path is None:
53 | for tree in gold_trees:
54 | outfile.write("{}\n".format(tree.linearize()))
55 | else:
56 | # For the SPMRL dataset our data loader performs some modifications
57 | # (like stripping morphological features), so we compare to the
58 | # raw gold file to be certain that we haven't spoiled the evaluation
59 | # in some way.
60 | with open(ref_gold_path) as goldfile:
61 | outfile.write(goldfile.read())
62 |
63 | with open(predicted_path, "w") as outfile:
64 | for tree in predicted_trees:
65 | outfile.write("{}\n".format(tree.linearize()))
66 |
67 | command = "{} -p {} {} {} > {}".format(
68 | evalb_program_path,
69 | evalb_param_path,
70 | gold_path,
71 | predicted_path,
72 | output_path,
73 | )
74 | subprocess.run(command, shell=True)
75 |
76 | fscore = FScore(math.nan, math.nan, math.nan)
77 | with open(output_path) as infile:
78 | for line in infile:
79 | match = re.match(r"Bracketing Recall\s+=\s+(\d+\.\d+)", line)
80 | if match:
81 | fscore.recall = float(match.group(1))
82 | match = re.match(r"Bracketing Precision\s+=\s+(\d+\.\d+)", line)
83 | if match:
84 | fscore.precision = float(match.group(1))
85 | match = re.match(r"Bracketing FMeasure\s+=\s+(\d+\.\d+)", line)
86 | if match:
87 | fscore.fscore = float(match.group(1))
88 | break
89 |
90 | success = (
91 | not math.isnan(fscore.fscore) or
92 | fscore.recall == 0.0 or
93 | fscore.precision == 0.0)
94 |
95 | if success:
96 | temp_dir.cleanup()
97 | else:
98 | print("Error reading EVALB results.")
99 | print("Gold path: {}".format(gold_path))
100 | print("Predicted path: {}".format(predicted_path))
101 | print("Output path: {}".format(output_path))
102 |
103 | return fscore
104 |
105 | def bracket_f1(gold_trees, predicted_trees):
106 | correct_total = 0
107 | gold_total = 0
108 | pred_total = 0
109 | for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
110 | gold_brackets = gold_tree.brackets()
111 | predicted_brackets = predicted_tree.brackets()
112 | gbc = Counter(gold_brackets)
113 | pbc = Counter(predicted_brackets)
114 | correct = 0
115 | for gb in gbc:
116 | if gb in pbc:
117 | correct += min(gbc[gb], pbc[gb])
118 |
119 | correct_total += correct
120 | gold_total += len(gold_brackets)
121 | pred_total += len(predicted_brackets)
122 |
123 | precision = 100.0 * correct_total/pred_total
124 | recall = 100.0 * correct_total/gold_total
125 | f = (2 * precision * recall) / (precision + recall)
126 | return FScore(recall, precision, f)
127 |
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import os.path
4 | import time
5 | from collections import defaultdict
6 | import random
7 |
8 | import dynet as dy
9 | import numpy as np
10 |
11 | import evaluate
12 | import parse
13 | import trees
14 | import vocabulary
15 |
16 | def format_elapsed(start_time):
17 | elapsed_time = int(time.time() - start_time)
18 | minutes, seconds = divmod(elapsed_time, 60)
19 | hours, minutes = divmod(minutes, 60)
20 | days, hours = divmod(hours, 24)
21 | elapsed_string = "{}h{:02}m{:02}s".format(hours, minutes, seconds)
22 | if days > 0:
23 | elapsed_string = "{}d{}".format(days, elapsed_string)
24 | return elapsed_string
25 |
26 | def run_train(args):
27 | print("Running training with arguments:", args)
28 |
29 | if args.numpy_seed is not None:
30 | print("Setting numpy random seed to {}...".format(args.numpy_seed))
31 | np.random.seed(args.numpy_seed)
32 |
33 | print("Loading training trees from {}...".format(args.train_path))
34 | train_treebank = trees.load_trees(args.train_path)
35 | print("Loaded {:,} training examples.".format(len(train_treebank)))
36 |
37 | print("Loading development trees from {}...".format(args.dev_path))
38 | dev_treebank = trees.load_trees(args.dev_path)
39 | print("Loaded {:,} development examples.".format(len(dev_treebank)))
40 |
41 | print("Processing trees for training...")
42 | train_parse = [tree.convert() for tree in train_treebank]
43 |
44 | print("Constructing vocabularies...")
45 |
46 | tag_vocab = vocabulary.Vocabulary()
47 | tag_vocab.index(parse.START)
48 | tag_vocab.index(parse.STOP)
49 | tag_vocab.index(parse.COMMON_WORD)
50 |
51 | char_vocab = vocabulary.Vocabulary()
52 | char_vocab.index(parse.START)
53 | char_vocab.index(parse.STOP)
54 | char_vocab.index(parse.COMMON_WORD)
55 | char_vocab.index(parse.UNK)
56 |
57 | word_vocab = vocabulary.Vocabulary()
58 | word_vocab.index(parse.START)
59 | word_vocab.index(parse.STOP)
60 | word_vocab.index(parse.UNK)
61 |
62 | label_vocab = vocabulary.Vocabulary()
63 | label_vocab.index(())
64 |
65 | for tree in train_parse:
66 | nodes = [tree]
67 | while nodes:
68 | node = nodes.pop()
69 | if isinstance(node, trees.InternalParseNode):
70 | label_vocab.index(node.label)
71 | nodes.extend(reversed(node.children))
72 | else:
73 | tag_vocab.index(node.tag)
74 | word_vocab.index(node.word)
75 | for char in node.word:
76 | char_vocab.index(char)
77 |
78 | tag_vocab.freeze()
79 | char_vocab.freeze()
80 | word_vocab.freeze()
81 | label_vocab.freeze()
82 |
83 | def print_vocabulary(name, vocab):
84 | special = {parse.START, parse.STOP, parse.UNK}
85 | print("{} ({:,}): {}".format(
86 | name, vocab.size,
87 | sorted(value for value in vocab.values if value in special) +
88 | sorted(value for value in vocab.values if value not in special)))
89 |
90 | if args.print_vocabs:
91 | print_vocabulary("Tag", tag_vocab)
92 | print_vocabulary("Word", word_vocab)
93 | print_vocabulary("Character", char_vocab)
94 | print_vocabulary("Label", label_vocab)
95 |
96 | print("Initializing model...")
97 | model = dy.ParameterCollection()
98 | print("Input LSTM type:", args.lstm_type)
99 | assert args.embedding_type != ""
100 | span_representation_args = [
101 | tag_vocab,
102 | char_vocab,
103 | word_vocab,
104 | label_vocab,
105 | args.tag_embedding_dim,
106 | args.char_embedding_dim,
107 | args.char_lstm_layers,
108 | args.char_lstm_dim,
109 | args.word_embedding_dim,
110 | args.lstm_layers,
111 | args.lstm_dim,
112 | args.dropout,
113 | args.lstm_type,
114 | args.lstm_context_size,
115 | args.embedding_type,
116 | args.concat_bow,
117 | args.weight_bow,
118 | args.random_embeddings,
119 | args.random_lstm,
120 | args.common_word_threshold,
121 | args.no_lstm_hidden_dims,
122 | ]
123 |
124 | if args.parser_type == "top-down":
125 | parser = parse.TopDownParser(
126 | model,
127 | args.label_hidden_dim,
128 | args.split_hidden_dim,
129 | span_representation_args
130 | )
131 | elif args.parser_type == 'chart':
132 | parser = parse.ChartParser(
133 | model,
134 | args.label_hidden_dim,
135 | span_representation_args
136 | )
137 | elif args.parser_type == 'independent':
138 | parser = parse.IndependentParser(
139 | model,
140 | args.label_hidden_dim,
141 | span_representation_args
142 | )
143 | trainer = dy.AdamTrainer(parser.trainable_parameters)
144 |
145 | total_processed = 0
146 | current_processed = 0
147 | check_every = len(train_parse) / args.checks_per_epoch
148 | best_dev_fscore = -np.inf
149 | best_dev_model_path = None
150 |
151 | start_time = time.time()
152 |
153 | def check_dev():
154 | nonlocal best_dev_fscore
155 | nonlocal best_dev_model_path
156 |
157 | dev_start_time = time.time()
158 |
159 | dev_predicted = []
160 | for tree in dev_treebank:
161 | dy.renew_cg()
162 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
163 | predicted, _ = parser.parse(sentence)
164 | dev_predicted.append(predicted.convert())
165 |
166 | if args.parser_type == 'independent':
167 | tree_count = 0
168 | for pred in dev_predicted:
169 | if pred.is_tree():
170 | tree_count += 1
171 | print("Percentage of valid trees:", tree_count/len(dev_predicted))
172 |
173 | dev_fscore = evaluate.bracket_f1(dev_treebank, dev_predicted)
174 | else:
175 | dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank, dev_predicted)
176 |
177 | print(
178 | "dev-fscore {} "
179 | "dev-elapsed {} "
180 | "total-elapsed {}".format(
181 | dev_fscore,
182 | format_elapsed(dev_start_time),
183 | format_elapsed(start_time),
184 | )
185 | )
186 |
187 | if dev_fscore.fscore > best_dev_fscore:
188 | if best_dev_model_path is not None:
189 | for ext in [".data", ".meta"]:
190 | path = best_dev_model_path + ext
191 | if os.path.exists(path):
192 | print("Removing previous model file {}...".format(path))
193 | os.remove(path)
194 |
195 | best_dev_fscore = dev_fscore.fscore
196 | best_dev_model_path = "{}_dev={:.2f}".format(
197 | args.model_path_base, dev_fscore.fscore)
198 | print("Saving new best model to {}...".format(best_dev_model_path))
199 | dy.save(best_dev_model_path, [parser])
200 |
201 | for epoch in itertools.count(start=1):
202 | if args.epochs is not None and epoch > args.epochs:
203 | break
204 |
205 | np.random.shuffle(train_parse)
206 | epoch_start_time = time.time()
207 |
208 | for start_index in range(0, len(train_parse), args.batch_size):
209 | dy.renew_cg()
210 | parser.new_batch()
211 | batch_losses = []
212 | for tree in train_parse[start_index:start_index + args.batch_size]:
213 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
214 | if args.parser_type == "top-down":
215 | _, loss = parser.parse(sentence, tree, args.explore)
216 | else:
217 | _, loss = parser.parse(sentence, tree)
218 | batch_losses.append(loss)
219 | total_processed += 1
220 | current_processed += 1
221 |
222 | batch_loss = dy.average(batch_losses)
223 | batch_loss_value = batch_loss.scalar_value()
224 | batch_loss.backward()
225 | trainer.update()
226 |
227 | if (start_index // args.batch_size + 1) % args.print_frequency == 0:
228 | print(
229 | "epoch {:,} "
230 | "batch {:,}/{:,} "
231 | "processed {:,} "
232 | "batch-loss {:.4f} "
233 | "epoch-elapsed {} "
234 | "total-elapsed {}".format(
235 | epoch,
236 | start_index // args.batch_size + 1,
237 | int(np.ceil(len(train_parse) / args.batch_size)),
238 | total_processed,
239 | batch_loss_value,
240 | format_elapsed(epoch_start_time),
241 | format_elapsed(start_time),
242 | )
243 | )
244 |
245 | if current_processed >= check_every:
246 | current_processed -= check_every
247 | check_dev()
248 |
249 | def run_test(args):
250 | print("Loading test trees from {}...".format(args.test_path))
251 | test_treebank = trees.load_trees(args.test_path)
252 | print("Loaded {:,} test examples.".format(len(test_treebank)))
253 |
254 | print("Loading model from {}...".format(args.model_path_base))
255 | model = dy.ParameterCollection()
256 | [parser] = dy.load(args.model_path_base, model)
257 |
258 | print("Parsing test sentences...")
259 |
260 | start_time = time.time()
261 |
262 | test_predicted = []
263 | for tree in test_treebank:
264 | dy.renew_cg()
265 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
266 | predicted, _ = parser.parse(sentence)
267 | test_predicted.append(predicted.convert())
268 |
269 | if type(parser) == parse.IndependentParser:
270 | print('Warning: not using evalb for evaluation')
271 | test_fscore = evaluate.bracket_f1(test_treebank, test_predicted)
272 | else:
273 | test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)
274 |
275 | print(
276 | "test-fscore {} "
277 | "test-elapsed {}".format(
278 | test_fscore,
279 | format_elapsed(start_time),
280 | )
281 | )
282 |
283 | def predict_labels(args):
284 | print("Loading training trees from {}...".format(args.train_path))
285 | train_treebank = trees.load_trees(args.train_path)
286 | print("Loaded {:,} training examples.".format(len(train_treebank)))
287 |
288 | print("Loading development trees from {}...".format(args.dev_path))
289 | dev_treebank = trees.load_trees(args.dev_path)
290 | print("Loaded {:,} development examples.".format(len(dev_treebank)))
291 |
292 | print("Processing trees for training...")
293 | train_parse = [tree.convert() for tree in train_treebank]
294 | dev_parse = [tree.convert() for tree in dev_treebank]
295 |
296 | print("Calculating baseline...")
297 | counts = defaultdict(lambda : defaultdict(int))
298 | for tree in train_parse:
299 | for node, parent in tree.iterate_spans_with_parents(): # doesn't include top level
300 | counts[node.label][parent.label] += 1
301 | counts[tree.label][''] += 1
302 | predictions = {label:max(counts.keys(), key=lambda x: counts[x]) for label, counts in counts.items()}
303 | correct = 0
304 | total = 0
305 | for tree in dev_parse:
306 | for node, parent in tree.iterate_spans_with_parents(): # doesn't include top level
307 | if predictions[node.label] == parent.label:
308 | correct += 1
309 | total += 1
310 | if predictions[tree.label] == '':
311 | correct += 1
312 | total += 1
313 | print("baseline score:", correct/total)
314 |
315 | print("Loading model from {}...".format(args.model_path_base))
316 | model = dy.ParameterCollection()
317 | [base_parser] = dy.load(args.model_path_base, model)
318 |
319 | for self_not_parent in [False, True]:
320 | parser = parse.LabelPrediction(model, base_parser, args.label_hidden_dim)
321 | trainer = dy.AdamTrainer(parser.f_label.model)
322 |
323 | print('predicting own label' if self_not_parent else 'predicting parent label')
324 | for epoch_index in range(10):
325 | np.random.shuffle(train_parse)
326 | for start_index in range(0, len(train_parse), args.batch_size):
327 | dy.renew_cg()
328 | batch_losses = []
329 | for tree in train_parse[start_index:start_index + args.batch_size]:
330 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
331 | loss, _, _ = parser.predict_parent_label_for_spans(sentence, tree, self_not_parent)
332 | batch_losses.append(loss)
333 | batch_loss = dy.average(batch_losses)
334 | batch_loss_value = batch_loss.scalar_value()
335 | batch_loss.backward()
336 | trainer.update()
337 |
338 | correct = 0
339 | total = 0
340 | for tree in dev_parse:
341 | dy.renew_cg()
342 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
343 | _, c, t = parser.predict_parent_label_for_spans(sentence, tree, self_not_parent)
344 | correct += c
345 | total += t
346 | print("dev score at epoch", epoch_index+1, ":", correct/total)
347 |
348 | def derivative_analysis(args):
349 | print("Loading development trees from {}...".format(args.dev_path))
350 | dev_treebank = trees.load_trees(args.dev_path)
351 | print("Loaded {:,} development examples.".format(len(dev_treebank)))
352 |
353 | print("Processing trees...")
354 | dev_parse = [tree.convert() for tree in dev_treebank]
355 |
356 | print("Loading model from {}...".format(args.model_path_base))
357 | model = dy.ParameterCollection()
358 | [parser] = dy.load(args.model_path_base, model)
359 |
360 | total_l1_grad = np.zeros(500)
361 | total_l2_grad = np.zeros(500)
362 | total_count = np.zeros(500)
363 | for tree in dev_parse:
364 | sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
365 | for position in range(len(sentence)+1):
366 | index = random.randrange(parser.lstm_dim*2)
367 | dy.renew_cg()
368 | gradients = parser.lstm_derivative(sentence, position, index)
369 | buckets = list(reversed(range(position+1))) + list(range(len(sentence)-position+1))
370 | assert len(buckets) == len(gradients)
371 | for position, grad in zip(buckets, gradients):
372 | total_l1_grad[position] += np.linalg.norm(grad, ord=1)
373 | total_l2_grad[position] += np.linalg.norm(grad, ord=2)
374 | total_count[position] += 1
375 |
376 | print('l1:')
377 | for i in range(500):
378 | if total_count[i] == 0:
379 | break
380 | print(total_l1_grad[i]/total_count[i])
381 | print('l2:')
382 | for i in range(500):
383 | if total_count[i] == 0:
384 | break
385 | print(total_l2_grad[i]/total_count[i])
386 |
387 | def main():
388 | dynet_args = [
389 | "--dynet-mem",
390 | "--dynet-weight-decay",
391 | "--dynet-autobatch",
392 | "--dynet-gpus",
393 | "--dynet-gpu",
394 | "--dynet-devices",
395 | "--dynet-seed",
396 | ]
397 |
398 | parser = argparse.ArgumentParser()
399 | subparsers = parser.add_subparsers()
400 |
401 | subparser = subparsers.add_parser("train")
402 | subparser.set_defaults(callback=run_train)
403 | for arg in dynet_args:
404 | subparser.add_argument(arg)
405 | subparser.add_argument("--numpy-seed", type=int)
406 | subparser.add_argument("--parser-type", choices=["top-down", "chart", "independent"], required=True)
407 | subparser.add_argument("--tag-embedding-dim", type=int, default=50)
408 | subparser.add_argument("--char-embedding-dim", type=int, default=50)
409 | subparser.add_argument("--char-lstm-layers", type=int, default=1)
410 | subparser.add_argument("--char-lstm-dim", type=int, default=100)
411 | subparser.add_argument("--word-embedding-dim", type=int, default=100)
412 | subparser.add_argument("--lstm-layers", type=int, default=2)
413 | subparser.add_argument("--lstm-dim", type=int, default=250)
414 | subparser.add_argument("--label-hidden-dim", type=int, default=250)
415 | subparser.add_argument("--split-hidden-dim", type=int, default=250)
416 | subparser.add_argument("--dropout", type=float, default=0.4)
417 | subparser.add_argument("--explore", action="store_true")
418 | subparser.add_argument("--model-path-base", required=True)
419 | subparser.add_argument("--evalb-dir", default="EVALB/")
420 | subparser.add_argument("--train-path", default="data/02-21.10way.clean")
421 | subparser.add_argument("--dev-path", default="data/22.auto.clean")
422 | subparser.add_argument("--batch-size", type=int, default=10)
423 | subparser.add_argument("--epochs", type=int)
424 | subparser.add_argument("--checks-per-epoch", type=int, default=4)
425 | subparser.add_argument("--print-vocabs", action="store_true")
426 | subparser.add_argument("--lstm-type", choices=["basic","truncated","shuffled","inside","no-lstm","untied-truncated"], default="basic")
427 | subparser.add_argument("--lstm-context-size", type=int, default=3)
428 | subparser.add_argument("--embedding-type", default="wc") # characters w/t/c for word/tag/character
429 | subparser.add_argument("--random-embeddings", action="store_true")
430 | subparser.add_argument("--random-lstm", action="store_true")
431 | subparser.add_argument("--concat-bow", action="store_true")
432 | subparser.add_argument("--weight-bow", action="store_true")
433 | subparser.add_argument("--print-frequency", type=int, default=1)
434 | subparser.add_argument("--common-word-threshold", type=int, default=float('inf')) # replace tags and character-level inputs with a special token above this threshold
435 | subparser.add_argument("--no-lstm-hidden-dims", type=int, nargs="+", default=[250])
436 | train_subparser = subparser
437 |
438 | subparser = subparsers.add_parser("train-label", parents=[train_subparser], add_help=False)
439 | subparser.set_defaults(callback=predict_labels)
440 |
441 | subparser = subparsers.add_parser("derivative", parents=[train_subparser], add_help=False)
442 | subparser.set_defaults(callback=derivative_analysis)
443 |
444 | subparser = subparsers.add_parser("test")
445 | subparser.set_defaults(callback=run_test)
446 | for arg in dynet_args:
447 | subparser.add_argument(arg)
448 | subparser.add_argument("--model-path-base", required=True)
449 | subparser.add_argument("--evalb-dir", default="EVALB/")
450 | subparser.add_argument("--test-path", default="data/23.auto.clean")
451 |
452 | args = parser.parse_args()
453 | args.callback(args)
454 |
455 | if __name__ == "__main__":
456 | main()
457 |
--------------------------------------------------------------------------------
/src/parse.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import random
3 |
4 | import dynet as dy
5 | import numpy as np
6 |
7 | import trees
8 |
9 | START = ""
10 | STOP = ""
11 | UNK = ""
12 | COMMON_WORD = ""
13 |
14 | def augment(scores, oracle_index):
15 | assert isinstance(scores, dy.Expression)
16 | shape = scores.dim()[0]
17 | assert len(shape) == 1
18 | increment = np.ones(shape)
19 | increment[oracle_index] = 0
20 | return scores + dy.inputVector(increment)
21 |
22 | def shuffle(items, start, end):
23 | if end <= start:
24 | return
25 | to_shuffle = items[start:end]
26 | random.shuffle(to_shuffle)
27 | items[start:end] = to_shuffle
28 |
29 | def transpose_lists(nested_list):
30 | result = []
31 | for i in range(len(nested_list[0])):
32 | result.append([l[i] for l in nested_list])
33 | return result
34 |
35 | def bow_range(items, start, end):
36 | if end <= start:
37 | return dy.zeros(*items[0].dim())
38 | else:
39 | return dy.average(items[start:end]) # or esum
40 |
41 | def weighted_bow_range(items, start, end, params, location):
42 | if end <= start:
43 | return dy.zeros(*items[0].dim())
44 | else:
45 | selected_items = items[start:end]
46 | n = len(selected_items)
47 | selected_params = params[:n]
48 | assert location in ['left','right','middle']
49 | if location == 'left': # reverse weights to left
50 | selected_params = reversed(selected_params)
51 | elif location == 'middle' and n > 1: # mirror weights in middle
52 | first_half = selected_params[:n//2]
53 | selected_params[-(n//2):] = reversed(first_half)
54 | assert len(selected_params) == len(selected_items)
55 | weighted_items = [i*dy.parameter(p) for i, p in zip(selected_items, selected_params)]
56 | return dy.average(weighted_items)
57 |
58 | class Feedforward(object):
59 | def __init__(self, model, input_dim, hidden_dims, output_dim, dropout=0):
60 | self.spec = locals()
61 | self.spec.pop("self")
62 | self.spec.pop("model")
63 |
64 | self.model = model.add_subcollection("Feedforward")
65 |
66 | self.weights = []
67 | self.biases = []
68 | dims = [input_dim] + hidden_dims + [output_dim]
69 | for prev_dim, next_dim in zip(dims, dims[1:]):
70 | self.weights.append(self.model.add_parameters((next_dim, prev_dim)))
71 | self.biases.append(self.model.add_parameters(next_dim))
72 |
73 | self.dropout = dropout
74 |
75 | def param_collection(self):
76 | return self.model
77 |
78 | @classmethod
79 | def from_spec(cls, spec, model):
80 | return cls(model, **spec)
81 |
82 | def __call__(self, x):
83 | for i, (weight, bias) in enumerate(zip(self.weights, self.biases)):
84 | weight = dy.parameter(weight)
85 | bias = dy.parameter(bias)
86 | x = dy.affine_transform([bias, weight, x])
87 | if i < len(self.weights) - 1:
88 | x = dy.rectify(x)
89 | x = dy.dropout(x, self.dropout)
90 | return x
91 |
92 | class UntiedLSTMLayer(object):
93 | def __init__(self, model, in_size, hidden_size, length, dropout=0.0):
94 | self.model = model.add_subcollection("UntiedLSTM")
95 |
96 | self.in_size = in_size
97 | self.hidden_size = hidden_size
98 | self.length = length
99 | self.dropout = dropout
100 |
101 | self.Wxs = [self.model.add_parameters((4*hidden_size,in_size)) for _ in range(length)]
102 | self.Whs = [self.model.add_parameters((4*hidden_size,hidden_size)) for _ in range(length)]
103 | self.bs = [self.model.add_parameters(4*hidden_size) for _ in range(length)]
104 | self.initial_c = self.model.add_parameters(hidden_size)
105 | self.initial_h = self.model.add_parameters(hidden_size)
106 |
107 | def set_dropout(self, dropout):
108 | self.dropout = dropout
109 |
110 | def disable_dropout(self):
111 | self.dropout = 0.0
112 |
113 | def transduce(self, inputs):
114 | assert len(inputs) == self.length
115 |
116 | batch_size = inputs[0].dim()[1]
117 |
118 | dropout_retain = 1 - self.dropout
119 | dropout_mask_x = dy.random_bernoulli(self.in_size, dropout_retain, 1/dropout_retain, batch_size)
120 | dropout_mask_h = dy.random_bernoulli(self.hidden_size, dropout_retain, 1/dropout_retain, batch_size)
121 |
122 | c_init = dy.parameter(self.initial_c)
123 | c_tm1 = dy.concatenate_to_batch([c_init for _ in range(batch_size)])
124 | h_init = dy.parameter(self.initial_h)
125 | h_tm1 = dy.concatenate_to_batch([h_init for _ in range(batch_size)])
126 |
127 | outputs = []
128 |
129 | for i, x in enumerate(inputs):
130 | gates = dy.vanilla_lstm_gates_dropout_concat([x], h_tm1,
131 | dy.parameter(self.Wxs[i]), dy.parameter(self.Whs[i]), dy.parameter(self.bs[i]),
132 | dropout_mask_x, dropout_mask_h)
133 |
134 | c = dy.vanilla_lstm_c(c_tm1, gates)
135 | h = dy.vanilla_lstm_h(c, gates)
136 | outputs.append(h)
137 |
138 | c_tm1 = c
139 | h_tm1 = h
140 |
141 | return outputs
142 |
143 | class BidirectionalUntiedLSTM(object):
144 | def __init__(self, model, in_size, hidden_size, n_layers, length, dropout=0.0):
145 | self.model = model.add_subcollection("Bidirectional")
146 |
147 | self.layers = []
148 | for i in range(n_layers):
149 | f = UntiedLSTMLayer(self.model, in_size if i == 0 else 2*hidden_size, hidden_size, length, dropout)
150 | b = UntiedLSTMLayer(self.model, in_size if i == 0 else 2*hidden_size, hidden_size, length, dropout)
151 | self.layers.append((f,b))
152 |
153 | def set_dropout(self, dropout):
154 | for f,b in self.layers:
155 | f.set_dropout(dropout)
156 | b.set_dropout(dropout)
157 |
158 | def disable_dropout(self):
159 | for f,b in self.layers:
160 | f.disable_dropout()
161 | b.disable_dropout()
162 |
163 | def transduce(self, inputs):
164 | f,b = self.layers[0]
165 | fh = f.transduce(inputs)
166 | bh = reversed(b.transduce(inputs[::-1]))
167 | h = [dy.concatenate([a,b]) for a,b in zip(fh, bh)]
168 |
169 | for i in range(1,len(self.layers)):
170 | f,b = self.layers[i]
171 | fh = f.transduce(h)
172 | bh = reversed(b.transduce(h[::-1]))
173 | h = [dy.concatenate([a,b]) for a,b in zip(fh, bh)]
174 |
175 | return h
176 |
177 | class ParserBase(object):
178 | def __init__(
179 | self,
180 | model,
181 | tag_vocab,
182 | char_vocab,
183 | word_vocab,
184 | label_vocab,
185 | tag_embedding_dim,
186 | char_embedding_dim,
187 | char_lstm_layers,
188 | char_lstm_dim,
189 | word_embedding_dim,
190 | lstm_layers,
191 | lstm_dim,
192 | dropout,
193 | lstm_type,
194 | lstm_context_size,
195 | embedding_type,
196 | concat_bow,
197 | weight_bow,
198 | random_emb,
199 | random_lstm,
200 | common_word_threshold,
201 | no_lstm_hidden_dims,
202 | ):
203 | self.spec = locals()
204 | self.spec.pop("self")
205 | self.spec.pop("model")
206 |
207 | self.model = model.add_subcollection("Parser")
208 | self.trainable_parameters = self.model.add_subcollection("Trainable")
209 | self.tag_vocab = tag_vocab
210 | self.char_vocab = char_vocab
211 | self.word_vocab = word_vocab
212 | self.label_vocab = label_vocab
213 | self.char_lstm_dim = char_lstm_dim
214 | self.lstm_dim = lstm_dim
215 |
216 | emb_model = self.model if random_emb else self.trainable_parameters
217 |
218 | for c in embedding_type:
219 | assert c in 'wtc'
220 | self.embedding_type = embedding_type
221 | emb_dim = 0
222 | if 'w' in embedding_type:
223 | emb_dim += word_embedding_dim
224 | self.word_embeddings = emb_model.add_lookup_parameters(
225 | (word_vocab.size, word_embedding_dim), name="word-embeddings")
226 | if 't' in embedding_type:
227 | emb_dim += tag_embedding_dim
228 | self.tag_embeddings = emb_model.add_lookup_parameters(
229 | (tag_vocab.size, tag_embedding_dim), name="tag-embeddings")
230 | if 'c' in embedding_type:
231 | emb_dim += 2*char_lstm_dim
232 | self.char_embeddings = emb_model.add_lookup_parameters(
233 | (char_vocab.size, char_embedding_dim), name="char-embeddings")
234 |
235 | self.char_lstm = dy.BiRNNBuilder(
236 | char_lstm_layers,
237 | char_embedding_dim,
238 | 2 * char_lstm_dim,
239 | self.trainable_parameters,
240 | dy.VanillaLSTMBuilder)
241 |
242 | if lstm_type in ["truncated", "untied-truncated", "no-lstm"]:
243 | self.indexed_starts = [self.trainable_parameters.add_parameters(emb_dim) for _ in range(300)]
244 | self.indexed_stops = [self.trainable_parameters.add_parameters(emb_dim) for _ in range(300)]
245 |
246 | if lstm_type == "no-lstm":
247 | self.context_network = Feedforward(
248 | self.model if random_lstm else self.trainable_parameters,
249 | emb_dim*2*(lstm_context_size+1), no_lstm_hidden_dims, 2*lstm_dim, dropout)
250 | elif lstm_type == "untied-truncated":
251 | self.lstm = BidirectionalUntiedLSTM(
252 | self.model if random_lstm else self.trainable_parameters,
253 | emb_dim, lstm_dim, lstm_layers, 2*(lstm_context_size+1))
254 | else:
255 | self.lstm = dy.BiRNNBuilder(
256 | lstm_layers,
257 | emb_dim,
258 | 2 * lstm_dim,
259 | self.model if random_lstm else self.trainable_parameters,
260 | dy.VanillaLSTMBuilder)
261 |
262 | assert not (concat_bow and not lstm_type == 'truncated'), 'concat-bow only supported with truncated lstm-type'
263 | self.concat_bow = concat_bow
264 | self.weight_bow = weight_bow
265 | output_dim = 2 * lstm_dim
266 | if concat_bow:
267 | output_dim += 3 * emb_dim
268 | if weight_bow:
269 | self.bow_weights = [self.trainable_parameters.add_parameters(1) for i in range(300)]
270 | self.span_representation_dimension = output_dim
271 |
272 | self.dropout = dropout
273 |
274 | self.lstm_type = lstm_type
275 | self.lstm_context_size = lstm_context_size
276 |
277 | self.lstm_initialized = False
278 |
279 | self.common_word_threshold = common_word_threshold
280 |
281 | def param_collection(self):
282 | return self.model
283 |
284 | @classmethod
285 | def from_spec(cls, spec, model):
286 | return cls(model, **spec)
287 |
288 | def new_batch(self):
289 | self.lstm_initialized = False
290 |
291 | def transduce_lstm_batch(self, inputs):
292 | # this is a workaround for lstm dropout error in dynet
293 | if self.lstm_initialized:
294 | batch_size = inputs[0].dim()[1]
295 | for fb, bb in self.lstm.builder_layers:
296 | for b in [fb,bb]:
297 | b.set_dropout_masks(batch_size=batch_size)
298 | self.lstm_initialized = True
299 | return self.lstm.transduce(inputs)
300 |
301 | def get_basic_span_encoding(self, embeddings):
302 | lstm_outputs = self.lstm.transduce(embeddings)
303 |
304 | @functools.lru_cache(maxsize=None)
305 | def span_encoding(left, right):
306 | forward = (
307 | lstm_outputs[right][:self.lstm_dim] -
308 | lstm_outputs[left][:self.lstm_dim])
309 | backward = (
310 | lstm_outputs[left + 1][self.lstm_dim:] -
311 | lstm_outputs[right + 1][self.lstm_dim:])
312 | return dy.concatenate([forward, backward])
313 |
314 | return span_encoding
315 |
316 | def get_truncated_span_encoding(self, embeddings, distance, concat_bow, weight_bow, untied=False):
317 | padded_embeddings = [embeddings[0]]*(distance-1)+embeddings+[embeddings[-1]]*(distance-1)
318 | batched_embeddings = []
319 | batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
320 | self.indexed_starts[:len(embeddings)-1]]))
321 | for i in range(distance*2):
322 | selected = padded_embeddings[i:len(padded_embeddings)-(distance*2)+i+1]
323 | catted = dy.concatenate_to_batch(selected)
324 | batched_embeddings.append(catted)
325 | batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
326 | self.indexed_stops[:len(embeddings)-1]]))
327 | assert batched_embeddings[0].dim()[1] == len(embeddings)-1 # batch dimension is length of sentence + 1
328 |
329 | if untied:
330 | lstm_outputs = self.lstm.transduce(batched_embeddings)
331 | else:
332 | lstm_outputs = self.transduce_lstm_batch(batched_embeddings)
333 |
334 | forward_reps = lstm_outputs[distance-1][:self.lstm_dim]
335 | backward_reps = lstm_outputs[distance][self.lstm_dim:]
336 |
337 | @functools.lru_cache(maxsize=None)
338 | def span_encoding(left, right):
339 | forward = (
340 | dy.pick_batch_elem(forward_reps, right) -
341 | dy.pick_batch_elem(forward_reps, left))
342 | backward = (
343 | dy.pick_batch_elem(backward_reps, left) -
344 | dy.pick_batch_elem(backward_reps, right))
345 |
346 | if concat_bow:
347 | if weight_bow:
348 | bow_before = weighted_bow_range(embeddings, 1, left-distance+1,
349 | self.bow_weights, 'left')
350 | bow_inside = weighted_bow_range(embeddings, left+distance+1, right-distance+1,
351 | self.bow_weights, 'middle')
352 | bow_after = weighted_bow_range(embeddings, right+distance+1, len(embeddings)-1,
353 | self.bow_weights, 'right')
354 | else:
355 | bow_before = bow_range(embeddings, 1, left-distance+1)
356 | bow_inside = bow_range(embeddings, left+distance+1, right-distance+1)
357 | bow_after = bow_range(embeddings, right+distance+1, len(embeddings)-1)
358 | return dy.concatenate([forward, backward, bow_before, bow_inside, bow_after])
359 | else:
360 | return dy.concatenate([forward, backward])
361 |
362 | return span_encoding
363 |
364 | def get_truncated_no_lstm_span_encoding(self, embeddings, distance):
365 | padded_embeddings = [embeddings[0]]*(distance-1)+embeddings+[embeddings[-1]]*(distance-1)
366 | batched_embeddings = []
367 | batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
368 | self.indexed_starts[:len(embeddings)-1]]))
369 | for i in range(distance*2):
370 | selected = padded_embeddings[i:len(padded_embeddings)-(distance*2)+i+1]
371 | catted = dy.concatenate_to_batch(selected)
372 | batched_embeddings.append(catted)
373 | batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
374 | self.indexed_stops[:len(embeddings)-1]]))
375 | assert batched_embeddings[0].dim()[1] == len(embeddings)-1 # batch dimension is length of sentence + 1
376 |
377 | context_outputs = self.context_network(dy.concatenate(batched_embeddings))
378 |
379 | @functools.lru_cache(maxsize=None)
380 | def span_encoding(left, right):
381 | return dy.pick_batch_elem(context_outputs, right) - \
382 | dy.pick_batch_elem(context_outputs, left)
383 |
384 | return span_encoding
385 |
386 | def get_shuffled_span_encoding(self, embeddings, distance):
387 | all_lstm_inputs = []
388 | for i in range(len(embeddings)-1):
389 | lstm_inputs = embeddings[:] # copy
390 | shuffle(lstm_inputs, 1, i-distance+1) # note not shuffling start/end padding
391 | shuffle(lstm_inputs, i+distance+1, len(embeddings)-1)
392 | all_lstm_inputs.append(lstm_inputs)
393 |
394 | all_lstm_inputs = [dy.concatenate_to_batch(items) for items in transpose_lists(all_lstm_inputs)]
395 | all_lstm_outputs = self.transduce_lstm_batch(all_lstm_inputs)
396 |
397 | @functools.lru_cache(maxsize=None)
398 | def span_encoding(left, right):
399 | forward = (
400 | dy.pick_batch_elem(all_lstm_outputs[right], right)[:self.lstm_dim] -
401 | dy.pick_batch_elem(all_lstm_outputs[left], left)[:self.lstm_dim])
402 | backward = (
403 | dy.pick_batch_elem(all_lstm_outputs[left + 1], left)[self.lstm_dim:] -
404 | dy.pick_batch_elem(all_lstm_outputs[right + 1], right)[self.lstm_dim:])
405 | return dy.concatenate([forward, backward])
406 |
407 | return span_encoding
408 |
409 | def get_inside_span_encoding(self, embeddings, distance, shuffle_inside=False):
410 | padded_embeddings = [embeddings[0]]*distance+embeddings+[embeddings[-1]]*distance
411 | all_spans = []
412 | all_lstm_outputs = []
413 | for i in range(len(embeddings)-2):
414 | for j in range(i+1,len(embeddings)-1):
415 | all_spans.append((i,j))
416 | lstm_inputs = padded_embeddings[i+1:j+1+2*distance]
417 | if shuffle_inside:
418 | shuffle(lstm_inputs, 2*distance, len(lstm_inputs)-2*distance)
419 | all_lstm_outputs.append(self.lstm.transduce(lstm_inputs))
420 | span_map = {span:idx for idx, span in enumerate(all_spans)}
421 |
422 | @functools.lru_cache(maxsize=None)
423 | def span_encoding(left, right):
424 | lstm_outputs = all_lstm_outputs[span_map[(left,right)]]
425 | forward = (
426 | lstm_outputs[-distance-1][:self.lstm_dim] -
427 | lstm_outputs[distance-1][:self.lstm_dim])
428 | backward = (
429 | lstm_outputs[distance][self.lstm_dim:] -
430 | lstm_outputs[-distance][self.lstm_dim:])
431 | return dy.concatenate([forward, backward])
432 |
433 | return span_encoding
434 |
435 | def get_embeddings(self, sentence, is_train=False):
436 | embeddings = []
437 | for tag, word in [(START, START)] + sentence + [(STOP, STOP)]:
438 | embed = []
439 | is_common_word = word not in (START,STOP) and \
440 | self.word_vocab.count(word) > self.common_word_threshold
441 | if 't' in self.embedding_type:
442 | if is_common_word:
443 | tag = COMMON_WORD
444 | tag_embedding = self.tag_embeddings[self.tag_vocab.index(tag)]
445 | embed.append(tag_embedding)
446 | if 'c' in self.embedding_type:
447 | chars = list(word) if word not in (START, STOP) else [word]
448 | if is_common_word:
449 | chars = [COMMON_WORD]
450 | char_lstm_outputs = self.char_lstm.transduce([
451 | self.char_embeddings[self.char_vocab.index_or_unk(char, UNK)]
452 | for char in [START] + chars + [STOP]])
453 | char_encoding = dy.concatenate([
454 | char_lstm_outputs[-1][:self.char_lstm_dim],
455 | char_lstm_outputs[0][self.char_lstm_dim:]])
456 | embed.append(char_encoding)
457 | if 'w' in self.embedding_type:
458 | if word not in (START, STOP):
459 | count = self.word_vocab.count(word)
460 | if not count or (is_train and np.random.rand() < 1 / (1 + count)):
461 | word = UNK
462 | word_embedding = self.word_embeddings[self.word_vocab.index(word)]
463 | embed.append(word_embedding)
464 | embeddings.append(dy.concatenate(embed))
465 | return embeddings
466 |
467 | def get_representation_function(self, sentence, is_train):
468 | if self.lstm_type != "no-lstm":
469 | if is_train:
470 | self.lstm.set_dropout(self.dropout)
471 | else:
472 | self.lstm.disable_dropout()
473 | if 'c' in self.embedding_type:
474 | if is_train:
475 | self.char_lstm.set_dropout(self.dropout)
476 | else:
477 | self.char_lstm.disable_dropout()
478 |
479 |
480 | embeddings = self.get_embeddings(sentence, is_train)
481 |
482 | if self.lstm_type == 'truncated' or self.lstm_type == 'untied-truncated':
483 | span_encoding = self.get_truncated_span_encoding(embeddings, self.lstm_context_size, self.concat_bow, self.weight_bow, self.lstm_type == 'untied-truncated')
484 | elif self.lstm_type == 'no-lstm':
485 | span_encoding = self.get_truncated_no_lstm_span_encoding(embeddings, self.lstm_context_size)
486 | elif self.lstm_type == 'shuffled':
487 | span_encoding = self.get_shuffled_span_encoding(embeddings, self.lstm_context_size)
488 | elif self.lstm_type == 'inside':
489 | span_encoding = self.get_inside_span_encoding(embeddings, self.lstm_context_size)
490 | else:
491 | span_encoding = self.get_basic_span_encoding(embeddings)
492 |
493 | return span_encoding
494 |
495 | def lstm_derivative(self, sentence, position, index):
496 | self.lstm.disable_dropout()
497 | embeddings = self.get_embeddings(sentence, is_train=False)
498 | lstm_outputs = self.lstm.transduce(embeddings)
499 |
500 | forward = lstm_outputs[position][:self.lstm_dim]
501 | backward = lstm_outputs[position + 1][self.lstm_dim:]
502 | c = dy.concatenate([forward, backward])
503 | s = c[index]
504 | s.backward()
505 | gradients = [embed.gradient() for embed in embeddings]
506 | return gradients
507 |
508 | class TopDownParser(ParserBase):
509 | def __init__(
510 | self,
511 | model,
512 | label_hidden_dim,
513 | split_hidden_dim,
514 | span_representation_args
515 | ):
516 | super().__init__(model, *span_representation_args)
517 |
518 | self.spec = {'label_hidden_dim':label_hidden_dim, 'split_hidden_dim':split_hidden_dim, 'span_representation_args':span_representation_args}
519 |
520 | self.f_label = Feedforward(
521 | self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size)
522 | self.f_split = Feedforward(
523 | self.trainable_parameters, self.span_representation_dimension, [split_hidden_dim], 1)
524 |
525 | def parse(self, sentence, gold=None, explore=True):
526 | is_train = gold is not None
527 |
528 | get_span_encoding = self.get_representation_function(sentence, is_train)
529 |
530 | def helper(left, right):
531 | assert 0 <= left < right <= len(sentence)
532 |
533 | label_scores = self.f_label(get_span_encoding(left, right))
534 |
535 | if is_train:
536 | oracle_label = gold.oracle_label(left, right)
537 | oracle_label_index = self.label_vocab.index(oracle_label)
538 | label_scores = augment(label_scores, oracle_label_index)
539 |
540 | label_scores_np = label_scores.npvalue()
541 | argmax_label_index = int(
542 | label_scores_np.argmax() if right - left < len(sentence) else
543 | label_scores_np[1:].argmax() + 1)
544 | argmax_label = self.label_vocab.value(argmax_label_index)
545 |
546 | if is_train:
547 | label = argmax_label if explore else oracle_label
548 | label_loss = (
549 | label_scores[argmax_label_index] -
550 | label_scores[oracle_label_index]
551 | if argmax_label != oracle_label else dy.zeros(1))
552 | else:
553 | label = argmax_label
554 | label_loss = label_scores[argmax_label_index]
555 |
556 | if right - left == 1:
557 | tag, word = sentence[left]
558 | tree = trees.LeafParseNode(left, tag, word)
559 | if label:
560 | tree = trees.InternalParseNode(label, [tree])
561 | return [tree], label_loss
562 |
563 | left_encodings = []
564 | right_encodings = []
565 | for split in range(left + 1, right):
566 | left_encodings.append(get_span_encoding(left, split))
567 | right_encodings.append(get_span_encoding(split, right))
568 | left_scores = self.f_split(dy.concatenate_to_batch(left_encodings))
569 | right_scores = self.f_split(dy.concatenate_to_batch(right_encodings))
570 | split_scores = left_scores + right_scores
571 | split_scores = dy.reshape(split_scores, (len(left_encodings),))
572 |
573 | if is_train:
574 | oracle_splits = gold.oracle_splits(left, right)
575 | oracle_split = min(oracle_splits)
576 | oracle_split_index = oracle_split - (left + 1)
577 | split_scores = augment(split_scores, oracle_split_index)
578 |
579 | split_scores_np = split_scores.npvalue()
580 | argmax_split_index = int(split_scores_np.argmax())
581 | argmax_split = argmax_split_index + (left + 1)
582 |
583 | if is_train:
584 | split = argmax_split if explore else oracle_split
585 | split_loss = (
586 | split_scores[argmax_split_index] -
587 | split_scores[oracle_split_index]
588 | if argmax_split != oracle_split else dy.zeros(1))
589 | else:
590 | split = argmax_split
591 | split_loss = split_scores[argmax_split_index]
592 |
593 | left_trees, left_loss = helper(left, split)
594 | right_trees, right_loss = helper(split, right)
595 |
596 | children = left_trees + right_trees
597 | if label:
598 | children = [trees.InternalParseNode(label, children)]
599 |
600 | return children, label_loss + split_loss + left_loss + right_loss
601 |
602 | children, loss = helper(0, len(sentence))
603 | assert len(children) == 1
604 | tree = children[0]
605 | if is_train and not explore:
606 | assert gold.convert().linearize() == tree.convert().linearize()
607 | return tree, loss
608 |
609 | class ChartParser(ParserBase):
610 | def __init__(
611 | self,
612 | model,
613 | label_hidden_dim,
614 | span_representation_args
615 | ):
616 | super().__init__(model, *span_representation_args)
617 |
618 | self.spec = {'label_hidden_dim':label_hidden_dim, 'span_representation_args':span_representation_args}
619 |
620 | self.f_label = Feedforward(
621 | self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size - 1)
622 |
623 | def parse(self, sentence, gold=None):
624 | is_train = gold is not None
625 |
626 | get_span_encoding = self.get_representation_function(sentence, is_train)
627 |
628 | @functools.lru_cache(maxsize=None)
629 | def get_label_scores(left, right):
630 | non_empty_label_scores = self.f_label(get_span_encoding(left, right))
631 | return dy.concatenate([dy.zeros(1), non_empty_label_scores])
632 |
633 | def helper(force_gold):
634 | if force_gold:
635 | assert is_train
636 |
637 | chart = {}
638 |
639 | for length in range(1, len(sentence) + 1):
640 | for left in range(0, len(sentence) + 1 - length):
641 | right = left + length
642 |
643 | label_scores_expr = get_label_scores(left, right)
644 | label_scores_np = label_scores_expr.npvalue()
645 |
646 | if is_train:
647 | oracle_label = gold.oracle_label(left, right)
648 | oracle_label_index = self.label_vocab.index(oracle_label)
649 |
650 | if force_gold:
651 | label = oracle_label
652 | label_score_expr = label_scores_expr[oracle_label_index]
653 | label_score = label_scores_np[oracle_label_index]
654 | else:
655 | if is_train:
656 | # augment the np version, which we use to get argmax
657 | # the _expr versions won't have augmentation, but derivative is same
658 | label_scores_np += 1
659 | label_scores_np[oracle_label_index] -= 1
660 | argmax_label_index = int(
661 | label_scores_np.argmax() if length < len(sentence) else
662 | label_scores_np[1:].argmax() + 1)
663 | argmax_label = self.label_vocab.value(argmax_label_index)
664 | label = argmax_label
665 | label_score_expr = label_scores_expr[argmax_label_index]
666 | label_score = label_scores_np[argmax_label_index]
667 |
668 | if length == 1:
669 | tag, word = sentence[left]
670 | tree = trees.LeafParseNode(left, tag, word)
671 | if label:
672 | tree = trees.InternalParseNode(label, [tree])
673 | chart[left, right] = [tree], label_score, label_score_expr
674 | continue
675 |
676 | if force_gold:
677 | oracle_splits = gold.oracle_splits(left, right)
678 | oracle_split = min(oracle_splits)
679 | best_split = oracle_split
680 | else:
681 | best_split = max(
682 | range(left + 1, right),
683 | key=lambda split:
684 | chart[left, split][1] +
685 | chart[split, right][1])
686 |
687 | left_trees, left_score, left_score_expr = chart[left, best_split]
688 | right_trees, right_score, right_score_expr = chart[best_split, right]
689 |
690 | children = left_trees + right_trees
691 | if label:
692 | children = [trees.InternalParseNode(label, children)]
693 |
694 | chart[left, right] = (children, label_score + left_score + right_score,
695 | label_score_expr + left_score_expr + right_score_expr)
696 |
697 | children, score, score_expr = chart[0, len(sentence)]
698 | assert len(children) == 1
699 | return children[0], score, score_expr
700 |
701 | tree, score, score_expr = helper(False)
702 | if is_train:
703 | oracle_tree, oracle_score, oracle_score_expr = helper(True)
704 | assert oracle_tree.convert().linearize() == gold.convert().linearize()
705 | correct = tree.convert().linearize() == gold.convert().linearize()
706 | loss_expr = dy.zeros(1) if correct else score_expr - oracle_score_expr
707 | loss = 0 if correct else score - oracle_score
708 | augmentation = loss - loss_expr.value()
709 | return tree, loss_expr + augmentation
710 | else:
711 | return tree, score_expr
712 |
713 | class IndependentParser(ParserBase):
714 | def __init__(
715 | self,
716 | model,
717 | label_hidden_dim,
718 | span_representation_args
719 | ):
720 | super().__init__(model, *span_representation_args)
721 |
722 | self.spec = {'label_hidden_dim':label_hidden_dim, 'span_representation_args':span_representation_args}
723 |
724 | self.f_label = Feedforward(
725 | self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size - 1)
726 |
727 | def parse(self, sentence, gold=None):
728 | is_train = gold is not None
729 |
730 | get_span_encoding = self.get_representation_function(sentence, is_train)
731 |
732 | @functools.lru_cache(maxsize=None)
733 | def get_label_scores(left, right):
734 | non_empty_label_scores = self.f_label(get_span_encoding(left, right))
735 | return dy.concatenate([dy.zeros(1), non_empty_label_scores])
736 |
737 | brackets = trees.SpanList(sentence)
738 | total_loss = dy.zeros(1)
739 | for length in range(1, len(sentence) + 1):
740 | for left in range(0, len(sentence) + 1 - length):
741 | right = left + length
742 |
743 | label_scores_expr = get_label_scores(left, right)
744 | label_scores_np = label_scores_expr.npvalue()
745 |
746 | if is_train:
747 | oracle_label = gold.oracle_label(left, right)
748 | oracle_label_index = self.label_vocab.index(oracle_label)
749 | oracle_label_score_expr = label_scores_expr[oracle_label_index]
750 |
751 | # augment the np version, which we use to get argmax
752 | # the _expr versions won't have augmentation, but derivative is same
753 | label_scores_np += 1
754 | label_scores_np[oracle_label_index] -= 1
755 |
756 | argmax_label_index = int(
757 | label_scores_np.argmax() if length < len(sentence) else
758 | label_scores_np[1:].argmax() + 1)
759 | argmax_label = self.label_vocab.value(argmax_label_index)
760 | label = argmax_label
761 | label_score_expr = label_scores_expr[argmax_label_index]
762 | label_score = label_scores_np[argmax_label_index]
763 | for sublabel in label: # note that no_label is just an empty tuple
764 | brackets.add(left, right, sublabel)
765 |
766 | if is_train and argmax_label != oracle_label:
767 | total_loss = total_loss + label_score_expr - oracle_label_score_expr
768 |
769 | return brackets, total_loss
770 |
771 | class LabelPrediction(ParserBase):
772 | def __init__(
773 | self,
774 | model,
775 | parser,
776 | label_hidden_dim,
777 | ):
778 |
779 | self.parser = parser
780 | self.label_hidden_dim = label_hidden_dim
781 | self.f_label = Feedforward(
782 | model, parser.span_representation_dimension, [label_hidden_dim], parser.label_vocab.size)
783 |
784 | def predict_parent_label_for_spans(self, sentence, gold, self_not_parent=False):
785 | span_encoding = self.parser.get_representation_function(sentence, is_train=False)
786 |
787 | correct = 0
788 | total = 0
789 | total_loss = dy.zeros(1)
790 | def accumulate(left, right, target_label_index):
791 | nonlocal correct, total, total_loss
792 | label_scores = self.f_label(span_encoding(left, right))
793 |
794 | # predicted label
795 | label_scores_np = label_scores.npvalue()
796 | argmax_label_index = int(label_scores_np.argmax())
797 | if argmax_label_index == target_label_index:
798 | correct += 1
799 | total += 1
800 |
801 | # loss for training
802 | augmented_label_scores = augment(label_scores, target_label_index)
803 | augmented_argmax_label_index = int(augmented_label_scores.npvalue().argmax())
804 | label_loss = (
805 | label_scores[augmented_argmax_label_index] -
806 | label_scores[target_label_index]
807 | if augmented_argmax_label_index != target_label_index else dy.zeros(1))
808 | total_loss = total_loss + label_loss
809 |
810 | for node, parent in gold.iterate_spans_with_parents(): # doesn't include top level
811 | label = node.label if self_not_parent else parent.label
812 | label_index = self.parser.label_vocab.index(label)
813 | accumulate(node.left, node.right, label_index)
814 | label = gold.label if self_not_parent else () # () represents no-label, since root has no parent
815 | label_index = self.parser.label_vocab.index(label)
816 | accumulate(gold.left, gold.right, label_index)
817 |
818 | return total_loss, correct, total
819 |
--------------------------------------------------------------------------------
/src/trees.py:
--------------------------------------------------------------------------------
1 | import collections.abc
2 |
3 | class TreebankNode(object):
4 | pass
5 |
6 | class InternalTreebankNode(TreebankNode):
7 | def __init__(self, label, children):
8 | assert isinstance(label, str)
9 | self.label = label
10 |
11 | assert isinstance(children, collections.abc.Sequence)
12 | assert all(isinstance(child, TreebankNode) for child in children)
13 | assert children
14 | self.children = tuple(children)
15 |
16 | def linearize(self):
17 | return "({} {})".format(
18 | self.label, " ".join(child.linearize() for child in self.children))
19 |
20 | def leaves(self):
21 | for child in self.children:
22 | yield from child.leaves()
23 |
24 | def convert(self, index=0):
25 | tree = self
26 | sublabels = [self.label]
27 |
28 | while len(tree.children) == 1 and isinstance(
29 | tree.children[0], InternalTreebankNode):
30 | tree = tree.children[0]
31 | sublabels.append(tree.label)
32 |
33 | children = []
34 | for child in tree.children:
35 | children.append(child.convert(index=index))
36 | index = children[-1].right
37 |
38 | return InternalParseNode(tuple(sublabels), children)
39 |
40 | def brackets(self, advp_prt=True):
41 | return self._brackets(0, advp_prt)[0]
42 |
43 | def _brackets(self, start=0, advp_prt=True):
44 | results = []
45 |
46 | position = start
47 | for child in self.children:
48 | b, e = child._brackets(position, advp_prt)
49 | results.extend(b)
50 | position = e
51 | end = position
52 |
53 | label = self.label
54 | if label != 'TOP':
55 | if advp_prt and label =='PRT':
56 | label = 'ADVP'
57 | results.append((start, end, label))
58 |
59 | return results, end
60 |
61 | class LeafTreebankNode(TreebankNode):
62 | def __init__(self, tag, word):
63 | assert isinstance(tag, str)
64 | self.tag = tag
65 |
66 | assert isinstance(word, str)
67 | self.word = word
68 |
69 | def linearize(self):
70 | return "({} {})".format(self.tag, self.word)
71 |
72 | def leaves(self):
73 | yield self
74 |
75 | def convert(self, index=0):
76 | return LeafParseNode(index, self.tag, self.word)
77 |
78 | def _brackets(self, start=0, advp_prt=True):
79 | if self.tag in [",", ".", ":", "``", "''"]:
80 | return [], start
81 | else:
82 | return [], start+1
83 |
84 | class ParseNode(object):
85 | pass
86 |
87 | class InternalParseNode(ParseNode):
88 | def __init__(self, label, children):
89 | assert isinstance(label, tuple)
90 | assert all(isinstance(sublabel, str) for sublabel in label)
91 | assert label
92 | self.label = label
93 |
94 | assert isinstance(children, collections.abc.Sequence)
95 | assert all(isinstance(child, ParseNode) for child in children)
96 | assert children
97 | assert len(children) > 1 or isinstance(children[0], LeafParseNode)
98 | assert all(
99 | left.right == right.left
100 | for left, right in zip(children, children[1:]))
101 | self.children = tuple(children)
102 |
103 | self.left = children[0].left
104 | self.right = children[-1].right
105 |
106 | def leaves(self):
107 | for child in self.children:
108 | yield from child.leaves()
109 |
110 | def convert(self):
111 | children = [child.convert() for child in self.children]
112 | tree = InternalTreebankNode(self.label[-1], children)
113 | for sublabel in reversed(self.label[:-1]):
114 | tree = InternalTreebankNode(sublabel, [tree])
115 | return tree
116 |
117 | def enclosing(self, left, right):
118 | assert self.left <= left < right <= self.right
119 | for child in self.children:
120 | if isinstance(child, LeafParseNode):
121 | continue
122 | if child.left <= left < right <= child.right:
123 | return child.enclosing(left, right)
124 | return self
125 |
126 | def oracle_label(self, left, right):
127 | enclosing = self.enclosing(left, right)
128 | if enclosing.left == left and enclosing.right == right:
129 | return enclosing.label
130 | return ()
131 |
132 | def oracle_splits(self, left, right):
133 | return [
134 | child.left
135 | for child in self.enclosing(left, right).children
136 | if left < child.left < right
137 | ]
138 |
139 | def iterate_spans_with_parents(self):
140 | for child in self.children:
141 | if isinstance(child, InternalParseNode):
142 | yield child, self
143 | yield from child.iterate_spans_with_parents()
144 |
145 | class LeafParseNode(ParseNode):
146 | def __init__(self, index, tag, word):
147 | assert isinstance(index, int)
148 | assert index >= 0
149 | self.left = index
150 | self.right = index + 1
151 |
152 | assert isinstance(tag, str)
153 | self.tag = tag
154 |
155 | assert isinstance(word, str)
156 | self.word = word
157 |
158 | def leaves(self):
159 | yield self
160 |
161 | def convert(self):
162 | return LeafTreebankNode(self.tag, self.word)
163 |
164 | def iterate_spans_with_parents(self):
165 | return []
166 |
167 | class SpanList(object):
168 | def __init__(self, tagged_words):
169 | # tagged_words is list of (tag, word) tuples
170 | self.tagged_words = tagged_words
171 | self.list = []
172 |
173 | def add(self, left, right, label):
174 | assert isinstance(label, str)
175 | self.list.append((left, right, label))
176 |
177 | def convert(self):
178 | return self
179 |
180 | def brackets(self, advp_prt=True):
181 | location_shift = []
182 | i = 0
183 | for tag, word in self.tagged_words:
184 | location_shift.append(i)
185 | if tag not in [",", ".", ":", "``", "''"]:
186 | i += 1
187 | location_shift.append(i)
188 | result = []
189 | for left, right, label in self.list:
190 | if label == 'TOP':
191 | continue
192 | if advp_prt and label =='PRT':
193 | label = 'ADVP'
194 | result.append((location_shift[left], location_shift[right], label))
195 | return result
196 |
197 | def is_tree(self):
198 | # crossing brackets
199 | n_crossing = 0
200 | for left1, right1, label1 in self.list:
201 | for left2, right2, label2 in self.list:
202 | if left1 < left2 < right1 < right2:
203 | n_crossing += 1
204 | return n_crossing == 0
205 |
206 | def load_trees(path, strip_top=True, strip_spmrl_features=True):
207 | with open(path) as infile:
208 | treebank = infile.read()
209 |
210 | # Features bounded by `##` may contain spaces, so if we strip the features
211 | # we need to do so prior to tokenization
212 | if strip_spmrl_features:
213 | treebank = "".join(treebank.split("##")[::2])
214 |
215 | tokens = treebank.replace("(", " ( ").replace(")", " ) ").split()
216 |
217 | # XXX(nikita): this should really be passed as an argument
218 | if 'Hebrew' in path or 'Hungarian' in path or 'Arabic' in path:
219 | strip_top = False
220 |
221 | def helper(index):
222 | trees = []
223 |
224 | while index < len(tokens) and tokens[index] == "(":
225 | paren_count = 0
226 | while tokens[index] == "(":
227 | index += 1
228 | paren_count += 1
229 |
230 | label = tokens[index]
231 | index += 1
232 |
233 | if tokens[index] == "(":
234 | children, index = helper(index)
235 | trees.append(InternalTreebankNode(label, children))
236 | else:
237 | word = tokens[index]
238 | index += 1
239 | trees.append(LeafTreebankNode(label, word))
240 |
241 | while paren_count > 0:
242 | assert tokens[index] == ")"
243 | index += 1
244 | paren_count -= 1
245 |
246 | return trees, index
247 |
248 | trees, index = helper(0)
249 | assert index == len(tokens)
250 |
251 | # XXX(nikita): this behavior should really be controlled by an argument
252 | if 'German' in path:
253 | # Utterances where the root is a terminal symbol break our parser's
254 | # assumptions, so insert a dummy root node.
255 | for i, tree in enumerate(trees):
256 | if isinstance(tree, LeafTreebankNode):
257 | trees[i] = InternalTreebankNode("VROOT", [tree])
258 |
259 | if strip_top:
260 | for i, tree in enumerate(trees):
261 | if tree.label in ("TOP", "ROOT"):
262 | assert len(tree.children) == 1
263 | trees[i] = tree.children[0]
264 |
265 | return trees
266 |
--------------------------------------------------------------------------------
/src/vocabulary.py:
--------------------------------------------------------------------------------
1 | import collections
2 |
3 | class Vocabulary(object):
4 | def __init__(self):
5 | self.frozen = False
6 | self.values = []
7 | self.indices = {}
8 | self.counts = collections.defaultdict(int)
9 |
10 | @property
11 | def size(self):
12 | return len(self.values)
13 |
14 | def value(self, index):
15 | assert 0 <= index < len(self.values)
16 | return self.values[index]
17 |
18 | def index(self, value):
19 | if not self.frozen:
20 | self.counts[value] += 1
21 |
22 | if value in self.indices:
23 | return self.indices[value]
24 |
25 | elif not self.frozen:
26 | self.values.append(value)
27 | self.indices[value] = len(self.values) - 1
28 | return self.indices[value]
29 |
30 | else:
31 | raise ValueError("Unknown value: {}".format(value))
32 |
33 | def index_or_unk(self, value, unk_value):
34 | assert self.frozen
35 | if value in self.indices:
36 | return self.indices[value]
37 | else:
38 | return self.indices[unk_value]
39 |
40 | def count(self, value):
41 | return self.counts[value]
42 |
43 | def freeze(self):
44 | self.frozen = True
45 |
--------------------------------------------------------------------------------