├── .gitignore
├── EVALB
    ├── COLLINS.prm
    ├── LICENSE
    ├── Makefile
    ├── README
    ├── bug
    │   ├── bug.gld
    │   ├── bug.rsl-new
    │   ├── bug.rsl-old
    │   └── bug.tst
    ├── evalb.c
    ├── new.prm
    ├── sample
    │   ├── sample.gld
    │   ├── sample.prm
    │   ├── sample.rsl
    │   └── sample.tst
    └── tgrep_proc.prl
├── EVALB_SPMRL
    ├── Makefile
    ├── README
    ├── README.orig
    ├── evalb.c
    ├── spmrl.prm
    └── spmrl_hebrew.prm
├── LICENSE
├── README.md
├── data
    ├── 02-21.10way.clean
    ├── 22.auto.clean
    └── 23.auto.clean
└── src
    ├── evaluate.py
    ├── main.py
    ├── parse.py
    ├── trees.py
    └── vocabulary.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # PyCharm directories
 2 | .idea/
 3 | 
 4 | # Mac OS X files
 5 | .DS_Store
 6 | 
 7 | # EVALB output files
 8 | EVALB/evalb
 9 | EVALB/evalb.dSYM/
10 | 
11 | # Uncompressed models
12 | models/*.data
13 | models/*.meta
14 | 
15 | # Python byte-compiled / optimized / DLL files
16 | __pycache__/
17 | *.py[cod]
18 | *$py.class
19 | 


--------------------------------------------------------------------------------
/EVALB/COLLINS.prm:
--------------------------------------------------------------------------------
 1 | ##------------------------------------------##
 2 | ## Debug mode                               ##
 3 | ##   0: No debugging                        ##
 4 | ##   1: print data for individual sentence  ##
 5 | ##------------------------------------------##
 6 | DEBUG 0
 7 | 
 8 | ##------------------------------------------##
 9 | ## MAX error                                ##
10 | ##    Number of error to stop the process.  ##
11 | ##    This is useful if there could be      ##
12 | ##    tokanization error.                   ##
13 | ##    The process will stop when this number##
14 | ##    of errors are accumulated.            ##
15 | ##------------------------------------------##
16 | MAX_ERROR 10
17 | 
18 | ##------------------------------------------##
19 | ## Cut-off length for statistics            ##
20 | ##    At the end of evaluation, the         ##
21 | ##    statistics for the senetnces of length##
22 | ##    less than or equal to this number will##
23 | ##    be shown, on top of the statistics    ##
24 | ##    for all the sentences                 ##
25 | ##------------------------------------------##
26 | CUTOFF_LEN 40
27 | 
28 | ##------------------------------------------##
29 | ## unlabeled or labeled bracketing          ##
30 | ##    0: unlabeled bracketing               ##
31 | ##    1: labeled bracketing                 ##
32 | ##------------------------------------------##
33 | LABELED 1                 
34 | 
35 | ##------------------------------------------##
36 | ## Delete labels                            ##
37 | ##    list of labels to be ignored.         ##
38 | ##    If it is a pre-terminal label, delete ##
39 | ##    the word along with the brackets.     ##
40 | ##    If it is a non-terminal label, just   ##
41 | ##    delete the brackets (don't delete     ##
42 | ##    deildrens).                           ##
43 | ##------------------------------------------##
44 | DELETE_LABEL TOP
45 | DELETE_LABEL -NONE-
46 | DELETE_LABEL ,
47 | DELETE_LABEL :
48 | DELETE_LABEL ``
49 | DELETE_LABEL ''
50 | DELETE_LABEL .
51 | 
52 | ##------------------------------------------##
53 | ## Delete labels for length calculation     ##
54 | ##    list of labels to be ignored for      ##
55 | ##    length calculation purpose            ##
56 | ##------------------------------------------##
57 | DELETE_LABEL_FOR_LENGTH -NONE-
58 | 
59 | ##------------------------------------------##
60 | ## Equivalent labels, words                 ##
61 | ##     the pairs are considered equivalent  ##
62 | ##     This is non-directional.             ##
63 | ##------------------------------------------##
64 | EQ_LABEL ADVP PRT
65 | 
66 | # EQ_WORD  Example example
67 | 


--------------------------------------------------------------------------------
/EVALB/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/EVALB/Makefile:
--------------------------------------------------------------------------------
1 | all: evalb
2 | 
3 | evalb: evalb.c
4 | 	gcc -Wall -g -o evalb evalb.c
5 | 


--------------------------------------------------------------------------------
/EVALB/README:
--------------------------------------------------------------------------------
  1 | #################################################################
  2 | #                                                               #
  3 | #      Bug fix and additional functionality for evalb           #
  4 | #                                                               #
  5 | # This updated version of evalb fixes a bug in which sentences  #
  6 | # were incorrectly categorized as "length mismatch" when the    #
  7 | # the parse output had certain mislabeled parts-of-speech.      #
  8 | #                                                               #
  9 | # The bug was the result of evalb treating one of the tags (in  #
 10 | # gold or test) as a label to be deleted (see sections [6],[7]  #
 11 | # for details), but not the corresponding tag in the other.     #
 12 | # This most often occurs with punctuation. See the subdir       #
 13 | # "bug" for an example gld and tst file demonstating the bug,   #
 14 | # as well as output of evalb with and without the bug fix.      #
 15 | #                                                               #
 16 | # For the present version in case of length mismatch, the nodes #
 17 | # causing the imbalance are reinserted to resolve the miscount. #
 18 | # If the lengths of gold and test truly differ, the error is    #
 19 | # still reported. The parameter file "new.prm" (derived from    #
 20 | # COLLINS.prm) shows how to add new potential mislabelings for  #
 21 | # quotes (",``,',`).                                            #
 22 | #                                                               #
 23 | # I have preserved DJB's revision for modern compilers except   #
 24 | # for the delcaration of "exit" which is provided by stdlib.    #
 25 | #                                                               #
 26 | # Other changes:                                                #
 27 | #                                                               #
 28 | # * output of F-Measure in addition to precision and recall     #
 29 | #   (I did not update the documention in section [4] for this)  #
 30 | #                                                               #
 31 | # * more comprehensive DEBUG output that includes bracketing    #
 32 | #   information as evalb is processing each sentence            #
 33 | #   (useful in working through this, and peraps other bugs).    #
 34 | #   Use either the "-D" run-time switch or set DEBUG to 2 in    #
 35 | #   the parameter file.                                         #
 36 | #                                                               #
 37 | # * added DELETE_LABEL lines in new.prm for S1 nodes produced   #
 38 | #   by the Charniak parser and "?", "!" punctuation produced by #
 39 | #   the Bikel parser.                                           #
 40 | #                                                               #
 41 | #                                                               #
 42 | #                                           David Ellis (Brown) #
 43 | #                                                               #
 44 | #                                           January.2006        #
 45 | #################################################################
 46 | 
 47 | #################################################################
 48 | #                                                               #
 49 | #      Update of evalb for modern compilers                     #
 50 | #                                                               #
 51 | # This is an updated version of evalb, for use with modern C    #
 52 | # compilers. There are a few updates, each marked in the code:  #
 53 | #                                                               #
 54 | # /* DJB: explanation of comment */                             #
 55 | #                                                               #
 56 | # The updates are purely to help compilation with recent        #
 57 | # versions of GCC (and other C compilers). There are *NO* other #
 58 | # changes to the algorithm itself.                              #
 59 | #                                                               #
 60 | # I have made these changes following recommendations from      #
 61 | # users of the Corpora Mailing List, especially Peet Morris and #
 62 | # Ramon Ziai.                                                   #
 63 | #                                                               #
 64 | #                                     David Brooks (Birmingham) #
 65 | #                                                               #
 66 | #                                     September.2005            #
 67 | #################################################################
 68 | 
 69 | #################################################################
 70 | #                                                               #
 71 | #      README file for evalb                                    #
 72 | #                                                               #
 73 | #                                         Satoshi Sekine (NYU)  #
 74 | #                                         Mike Collins (UPenn)  #
 75 | #                                                               #
 76 | #                                         October.1997          #
 77 | #################################################################
 78 | 
 79 | Contents of this README:
 80 | 
 81 |    [0] COPYRIGHT
 82 |    [1] INTRODUCTION
 83 |    [2] INSTALLATION AND RUN
 84 |    [3] OPTIONS
 85 |    [4] OUTPUT FORMAT FROM THE SCORER
 86 |    [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
 87 |    [6] THE PARAMETER FILE
 88 |    [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
 89 | 
 90 | 
 91 | [0] COPYRIGHT
 92 | 
 93 | The authors abandon the copyright of this program. Everyone is 
 94 | permitted to copy and distribute the program or a portion of the program
 95 | with no charge and no restrictions unless it is harmful to someone.
 96 | 
 97 | However, the authors are delightful for the user's kindness of proper
 98 | usage and letting the authors know bugs or problems.
 99 | 
100 | This software is provided "AS IS", and the authors make no warranties,
101 | express or implied.
102 | 
103 | To legally enforce the abandonment of copyright, this package is released
104 | under the Unlicense (see LICENSE).
105 | 
106 | [1] INTRODUCTION
107 | 
108 | Evaluation of bracketing looks simple, but in fact, there are minor
109 | differences from system to system. This is a program to parametarize
110 | such minor differences and to give an informative result.
111 | 
112 | "evalb" evaluates bracketing accuracy in a test-file against a gold-file.
113 | It returns recall, precision, tagging accuracy. It uses an identical 
114 | algorithm to that used in (Collins ACL97).
115 | 
116 | 
117 | [2] Installation and Run
118 | 
119 | To compile the scorer, type 
120 | 
121 | > make
122 | 
123 | 
124 | To run the scorer:
125 | 
126 | > evalb -p Parameter_file Gold_file Test_file
127 | 
128 |  
129 | For example to use the sample files:
130 | 
131 | > evalb -p sample.prm sample.gld sample.tst
132 | 
133 | 
134 | 
135 | [3] OPTIONS
136 | 
137 | You can specify system parameters in the command line options.
138 | Other options concerning to evaluation metrix should be specified
139 | in parameter file, described later.
140 | 
141 |         -p param_file  parameter file                        
142 |         -d             debug mode                            
143 |         -e n           number of error to kill (default=10)  
144 |         -h             help                                  
145 | 
146 | 
147 | 
148 | [4] OUTPUT FORMAT FROM THE SCORER
149 | 
150 | The scorer gives individual scores for each sentence, for
151 | example:
152 | 
153 |   Sent.                        Matched  Bracket   Cross        Correct Tag
154 |  ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
155 | ============================================================================
156 |    1    8    0  100.00 100.00     5      5    5      0      6     5    83.33
157 | 
158 | At the end of the output the === Summary === section gives statistics 
159 | for all sentences, and for sentences <=40 words in length. The summary
160 | contains the following information:
161 | 
162 | i)   Number of sentences -- total number of sentences.
163 | 
164 | ii)  Number of Error/Skip sentences -- should both be 0 if there is no
165 |     problem with the parsed/gold files.
166 | 
167 | iii) Number of valid sentences = Number of sentences - Number of Error/Skip
168 |     sentences 
169 | 
170 | iv)  Bracketing recall =     (number of correct constituents)
171 |                          ----------------------------------------
172 |                          (number of constituents in the goldfile)
173 | 
174 | v)   Bracketing precision = (number of correct constituents)
175 |                          ----------------------------------------
176 |                          (number of constituents in the parsed file)
177 | 
178 | vi)  Complete match = percentaage of sentences where recall and precision are
179 |     both 100%. 
180 | 
181 | vii) Average crossing = (number of constituents crossing a goldfile constituen
182 |                          ----------------------------------------------------
183 |                                         (number of sentences)
184 | 
185 | viii) No crossing = percentage of sentences which have 0 crossing brackets.
186 | 
187 | ix)   2 or less crossing = percentage of sentences which have <=2 crossing brackets.
188 | 
189 | x)    Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
190 |      details of what is counted).
191 | 
192 | 
193 | 
194 | [5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
195 | 
196 | 
197 | The gold and parsed files are in a format similar to this:
198 | 
199 | (TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
200 | 
201 | To create a gold file from the treebank:
202 | 
203 | tgrep -wn '/.*/' | tgrep_proc.prl 
204 | 
205 | will produce a goldfile in the required format.  ("tgrep -wn '/.*/'" prints
206 | parse trees, "tgrep_process.prl" just skips blank lines).
207 | 
208 | For example, to produce a goldfile for section 23 of the treebank:
209 | 
210 | tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
211 | 
212 | 
213 | 
214 | [6] THE PARAMETER (.prm) FILE
215 | 
216 | 
217 | The .prm file sets options regarding the scoring method. COLLINS.prm gives
218 | the same scoring behaviour as the scorer used in (Collins 97). The options 
219 | chosen were: 
220 | 
221 | 1) LABELED 1
222 | 
223 | to give labelled precision/recall figures, i.e. a constituent must have the
224 | same span *and* label as a constituent in the goldfile.
225 | 
226 | 2) DELETE_LABEL TOP   
227 | 
228 | Don't count the "TOP" label (which is always given in the output of tgrep) 
229 | when scoring. 
230 | 
231 | 3) DELETE_LABEL -NONE-  
232 | 
233 | Remove traces (and all constituents which dominate nothing but traces) when
234 | scoring. For example
235 | 
236 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
237 | 
238 | would be processed to give
239 | 
240 | .... (VP (VBD reported)) (. .)))
241 | 
242 | 
243 | 4)
244 | DELETE_LABEL ,     -- for the purposes of scoring remove punctuation
245 | DELETE_LABEL :
246 | DELETE_LABEL ``
247 | DELETE_LABEL ''
248 | DELETE_LABEL .
249 | 
250 | 5) DELETE_LABEL_FOR_LENGTH -NONE-   -- don't include traces when calculating
251 |                                        the length of a sentence (important
252 |                                        when classifying a sentence as <=40
253 |                                        words or >40 words)
254 | 
255 | 6) EQ_LABEL ADVP PRT
256 | 
257 | Count ADVP and PRT as being the same label when scoring.
258 | 
259 | 
260 | 
261 | 
262 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
263 | 
264 | 
265 | 1) The scorer initially processes the files to remove all nodes specified
266 | by DELETE_LABEL in the .prm file. It also recursively removes nodes which
267 | dominate nothing due to all their children being removed. For example, if
268 | -NONE- is specified as a label to be deleted, 
269 | 
270 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
271 | 
272 | would be processed to give
273 | 
274 | .... (VP (VBD reported)) (. .)))
275 | 
276 | 2) The scorer also removes all functional tags attached to non-terminals
277 | (functional tags are prefixed with "-" or "=" in the treebank). For example
278 | "NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
279 | 
280 | 
281 | 3) Tagging accuracy counts tags for all words *except* any tags which are
282 | deleted by a DELETE_LABEL specification in the .prm file. (For example, for
283 | COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
284 | 
285 | 4) When calculating the length of a sentence, all words with POS tags not 
286 | included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
287 | counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
288 | traces are removed before calculating the length of the sentence).
289 | 
290 | 5) There are some subtleties in scoring when either the goldfile or parsed
291 | file contains multiple constituents for the same span which have the same
292 | non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n 
293 | constituents for the same span, and the parsed file contains m constituents
294 | with that nonterminal, the scorer works as follows:
295 | 
296 | i) If m>n, then the precision is n/m, recall is 100%
297 | 
298 | ii) If n>m, then the precision is 100%, recall is m/n.
299 | 
300 | iii) If n==m, recall and precision are both 100%.
301 | 


--------------------------------------------------------------------------------
/EVALB/bug/bug.gld:
--------------------------------------------------------------------------------
1 | (TOP (S (NP-SBJ (DT The)  (NN Thy-1)  (NN gene)  (NN promoter) ) (VP (VBZ resembles)  (NP (DT a)  (`` ")  (JJ housekeeping)  ('' ")  (NN promoter) ) (PP (IN in)  (SBAR (IN that)  (S (NP-SBJ-68 (PRP it) ) (VP-COOD (VP (VBZ is)  (ADJP-PRD (JJ located)  (PP (IN within)  (NP (DT a)  (JJ methylation-free)  (NN island) )))) (, ,)  (VP (VBZ lacks)  (NP (DT a)  (JJ canonical)  (NN TATA)  (NN box) )) (, ,)  (CC and)  (VP (VBZ displays)  (NP (NN heterogeneity) ) (PP (IN in)  (NP (NP (DT the)  (JJ 5'-end)  (NNS termini) ) (PP (IN of)  (NP (DT the)  (NN mRNA) )))))))))) (. .) ) )
2 | (TOP (S (NP-SBJ (DT The)  (JJ latter)  (`` ")  (NP (NP (JJ nuclear)  (NN factor) ) (PP (IN for)  (NP (VBN activated)  (NN T)  (NNS cells) ))) ('' ") ) (ADVP (RB likely) ) (VP (VBZ contributes)  (PP (TO to)  (NP (NP (DT the)  (NN tissue)  (NN specificity) ) (PP (IN of)  (NP (NN IL-2)  (NN gene)  (NN expression) ))))) (. .) ) )
3 | (TOP (S (ADVP (RB Thus) ) (, ,)  (NP-SBJ (PRP we) ) (VP (VBD postulated)  (SBAR-COOD (SBAR (IN that)  (S (NP-SBJ (NP (DT the)  (JJ circadian)  (NN modification) ) (PP (IN of)  (NP (NN GR) ))) (VP (VBD was)  (ADJP-PRD (JJ independent)  (PP (IN of)  (NP-COOD (NP (NP (DT the)  (JJ diurnal)  (NNS fluctuations) ) (PP (IN in)  (NP (NN plasma)  (NN cortisol)  (NN level) ))) (CC or)  (NP (NP (DT the)  (JJ circadian)  (NNS variations) ) (PP (IN in)  (NP (JJ environmental)  (NN lighting) ))))))))) (CC and)  (SBAR (IN that)  (S (NP-SBJ-79 (DT the)  (NN rhythmicity) ) (VP (MD might)  (VP (VB be)  (VP (VBN regulated)  (NP (-NONE- *-79) ) (PP (IN by)  (NP-LGS (NP (DT the)  (`` ')  (JJ circadian)  (NN pacemaker)  ('' ') ) (ADJP (JJ located)  (PP (IN in)  (NP (DT the)  (JJ human)  (JJ basal)  (NN brain) )))))))))))) (. .) ) )
4 | (TOP (S (NP-SBJ-70 (JJ Such)  (NN transcription)  (NNS factors) ) (VP (VBP play)  (NP (DT a)  (JJ key)  (NN role) ) (PP (IN in)  (NP (NP (DT the)  (NN development) ) (PP (IN of)  (NP (DT the)  (JJ mature)  (NN T-cell)  (NN phenotype) )))) (PP (IN by)  (S (NP-SBJ (-NONE- *-70) ) (VP (VBG functioning)  (PP (IN as)  (`` ')  (NP (NP (JJ master)  (NNS regulators) ) (PP (IN of)  (NP (NN T-cell)  (NN differentiation) ))) ('' ') ))))) (. .) ) )
5 | (TOP (S (NP-SBJ (NP (DT The)  (NN conversion) ) (PP (IN of)  (NP (DT the)  (NN TCEd) )) (PP (TO to)  (NP (DT a)  (`` ')  (JJ perfect)  ('' ')  (NN NF-kB)  (NN binding)  (NN site) ))) (VP-COOD (VP (VBZ leads)  (PP (TO to)  (NP-19 (NP (DT a)  (JJR tighter)  (NN binding) ) (PP (IN of)  (NP (NN NF-kB) )) (PP (TO to)  (NP (NN TCEd)  (NN DNA) ))))) (CC and)  (, ,)  (VP (PP (IN as)  (NP (DT a)  (JJ functional)  (NN consequence) )) (, ,)  (PP (TO to)  (NP=19 (NP (DT the)  (NN activity) ) (PP (IN of)  (NP (DT the)  (`` ')  (VBN converted)  ('' ')  (NN TCEd)  (NNS motifs) )) (PP (IN in)  (NP (NN HeLa)  (NNS cells) )))))) (. .) ) )
6 | 


--------------------------------------------------------------------------------
/EVALB/bug/bug.rsl-new:
--------------------------------------------------------------------------------
 1 | Sent.                        Matched  Bracket   Cross        Correct Tag
 2 | ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
 3 | ============================================================================
 4 | 1   37    0   77.27  65.38    17     22   26      5     34    27    79.41
 5 | 2   21    0   69.23  64.29     9     13   14      2     20    16    80.00
 6 | 3   47    0   80.00  82.35    28     35   34      4     44    40    90.91
 7 | 4   26    0   35.29  37.50     6     17   16      8     25    18    72.00
 8 | 5   44    0   42.31  33.33    11     26   33     17     38    28    73.68
 9 | ============================================================================
10 |               62.83  57.72    71   113   123      0    161   129    80.12
11 | === Summary ===
12 | 
13 | -- All --
14 | Number of sentence        =      5
15 | Number of Error sentence  =      0
16 | Number of Skip  sentence  =      0
17 | Number of Valid sentence  =      5
18 | Bracketing Recall         =  62.83
19 | Bracketing Precision      =  57.72
20 | Bracketing FMeasure       =  60.17
21 | Complete match            =   0.00
22 | Average crossing          =   7.20
23 | No crossing               =   0.00
24 | 2 or less crossing        =  20.00
25 | Tagging accuracy          =  80.12
26 | 
27 | -- len<=40 --
28 | Number of sentence        =      3
29 | Number of Error sentence  =      0
30 | Number of Skip  sentence  =      0
31 | Number of Valid sentence  =      3
32 | Bracketing Recall         =  61.54
33 | Bracketing Precision      =  57.14
34 | Bracketing FMeasure       =  59.26
35 | Complete match            =   0.00
36 | Average crossing          =   5.00
37 | No crossing               =   0.00
38 | 2 or less crossing        =  33.33
39 | Tagging accuracy          =  77.22
40 | 


--------------------------------------------------------------------------------
/EVALB/bug/bug.rsl-old:
--------------------------------------------------------------------------------
 1 | Sent.                        Matched  Bracket   Cross        Correct Tag
 2 | ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
 3 | ============================================================================
 4 | 1 : Length unmatch (33|35)
 5 |    1   37    1    0.00   0.00     0      0    0      0      0     0     0.00
 6 | 2 : Length unmatch (19|21)
 7 |    2   21    1    0.00   0.00     0      0    0      0      0     0     0.00
 8 | 3 : Length unmatch (44|45)
 9 |    3   47    1    0.00   0.00     0      0    0      0      0     0     0.00
10 | 4 : Length unmatch (24|26)
11 |    4   26    1    0.00   0.00     0      0    0      0      0     0     0.00
12 | 5 : Length unmatch (38|39)
13 |    5   44    1    0.00   0.00     0      0    0      0      0     0     0.00
14 | ============================================================================
15 |       0     0     0.00
16 | 
17 | === Summary ===
18 | 
19 | -- All --
20 | Number of sentence        =      5
21 | Number of Error sentence  =      5
22 | Number of Skip  sentence  =      0
23 | Number of Valid sentence  =      0
24 | Bracketing Recall         =   0.00
25 | Bracketing Precision      =   0.00
26 | Bracketing FMeasure       =    nan
27 | Complete match            =   0.00
28 | Average crossing          =   0.00
29 | No crossing               =   0.00
30 | 2 or less crossing        =   0.00
31 | Tagging accuracy          =   0.00
32 | 
33 | -- len<=40 --
34 | Number of sentence        =      3
35 | Number of Error sentence  =      3
36 | Number of Skip  sentence  =      0
37 | Number of Valid sentence  =      0
38 | Bracketing Recall         =   0.00
39 | Bracketing Precision      =   0.00
40 | Bracketing FMeasure       =    nan
41 | Complete match            =   0.00
42 | Average crossing          =   0.00
43 | No crossing               =   0.00
44 | 2 or less crossing        =   0.00
45 | Tagging accuracy          =   0.00
46 | 


--------------------------------------------------------------------------------
/EVALB/bug/bug.tst:
--------------------------------------------------------------------------------
1 | (S1 (S (NP (DT The) (JJ Thy-1) (NN gene) (NN promoter)) (VP (VP (VBZ resembles) (NP (NP (DT a) (ADJP (CD ") (NN housekeeping)) (NN ") (NN promoter)) (SBAR (WHPP (IN in) (WHNP (WDT that))) (S (NP (PRP it)) (VP (VBZ is) (VP (VBN located) (PP (IN within) (NP (DT a) (JJ methylation-free) (NN island))))))))) (, ,) (VP (VBZ lacks) (NP (DT a) (JJ canonical) (NNP TATA) (NN box))) (, ,) (CC and) (VP (VBZ displays) (NP (NP (NN heterogeneity)) (PP (IN in) (NP (NP (DT the) (JJ 5'-end) (NNS termini)) (PP (IN of) (NP (DT the) (NN mRNA)))))))) (. .)))
2 | (S1 (S (NP (NP (DT The) (JJ latter) (CD ") (JJ nuclear) (NN factor)) (PP (IN for) (NP (VBN activated) (NN T) (NNS cells)))) (VP (VBZ ") (ADJP (JJ likely) (S (VP (VBZ contributes) (PP (TO to) (NP (NP (DT the) (NN tissue) (NN specificity)) (PP (IN of) (NP (JJ IL-2) (NN gene) (NN expression))))))))) (. .)))
3 | (S1 (S (ADVP (RB Thus)) (, ,) (NP (PRP we)) (VP (VBD postulated) (SBAR (SBAR (IN that) (S (NP (NP (DT the) (JJ circadian) (NN modification)) (PP (IN of) (NP (NNP GR)))) (VP (VBD was) (ADJP (JJ independent) (PP (IN of) (NP (DT the) (JJ diurnal) (NNS fluctuations)))) (PP (IN in) (NP (NP (NN plasma) (JJ cortisol) (NN level)) (CC or) (NP (NP (DT the) (JJ circadian) (NNS variations)) (PP (IN in) (NP (JJ environmental) (NN lighting))))))))) (CC and) (SBAR (IN that) (S (NP (DT the) (NN rhythmicity)) (VP (MD might) (VP (VB be) (VP (VBN regulated) (PP (IN by) (NP (DT the) ('' ') (NP (JJ circadian) (NN pacemaker) (POS ')) (VP (VBN located) (PP (IN in) (NP (DT the) (JJ human) (JJ basal) (NN brain))))))))))))) (. .)))
4 | (S1 (S (NP (JJ Such) (NN transcription) (NNS factors)) (VP (VBP play) (NP (NP (DT a) (JJ key) (NN role)) (PP (IN in) (NP (NP (DT the) (NN development)) (PP (IN of) (NP (NP (DT the) (JJ mature) (JJ T-cell) (NN phenotype)) (PP (IN by) (NP (NP (NN functioning) (RB as) (POS ')) (NN master) (NNS regulators))))) (PP (IN of) (NP (JJ T-cell) (NN differentiation) (POS '))))))) (. .)))
5 | (S1 (S (NP (NP (DT The) (NN conversion)) (PP (IN of) (NP (DT the)))) (VP (VBD TCEd) (PP (TO to) (NP (NP (DT a) ('' ') (JJ perfect) ('' ') (NN NF-kB)) (SBAR (S (NP (JJ binding) (NN site)) (VP (VBZ leads) (PP (TO to) (NP (NP (NP (DT a) (ADJP (RBR tighter) (JJ binding)) (PP (IN of) (NP (NP (NNS NF-kB)) (PP (PP (TO to) (NP (JJ TCEd) (NN DNA))) (CC and) (PP (, ,) (PP (IN as) (NP (DT a) (JJ functional) (NN consequence))) (, ,) (TO to) (NP (NP (DT the) (NN activity)) (PP (IN of) (NP (DT the)))))))) (POS ')) (JJ converted) ('' ') (JJ TCEd) (NNS motifs)) (PP (IN in) (NP (NNP HeLa) (NNS cells))))))))))) (. .)))
6 | 


--------------------------------------------------------------------------------
/EVALB/evalb.c:
--------------------------------------------------------------------------------
   1 | /*****************************************************************/
   2 | /* evalb [-p param_file] [-dh] [-e n] gold-file test-file        */
   3 | /*                                                               */
   4 | /*        Evaluate bracketing in test-file against gold-file.    */
   5 | /*        Return recall, precision, tagging accuracy.            */
   6 | /*                                                               */
   7 | /*   <option>                                                    */
   8 | /*        -p param_file  parameter file                          */
   9 | /*        -d             debug mode                              */
  10 | /*        -e n           number of error to kill (default=10)    */
  11 | /*        -h             help                                    */
  12 | /*                                                               */
  13 | /*                                         Satoshi Sekine (NYU)  */
  14 | /*                                         Mike Collins (UPenn)  */
  15 | /*                                                               */
  16 | /*                                         October.1997          */
  17 | /*                                                               */
  18 | /* Please refer README for the update information                */
  19 | /*****************************************************************/
  20 | 
  21 | #include <stdio.h>
  22 | #include <stdlib.h> //### added for exit, atoi decls
  23 | #include <ctype.h>
  24 | #include <string.h>
  25 | 
  26 | 
  27 | /* Internal Data format -------------------------------------------*/
  28 | /*                                                                 */
  29 | /* (S (NP (NNX this)) (VP (VBX is) (NP (DT a) (NNX pen))) (SYM .)) */
  30 | /*                                                                 */
  31 | /*   wn=5                                                          */
  32 | /*                        word    label                            */
  33 | /*   terminal[0] =        this     NNX                             */
  34 | /*   terminal[1] =        is       VBX                             */
  35 | /*   terminal[2] =        a        DT                              */
  36 | /*   terminal[3] =        pen      NNX                             */
  37 | /*   terminal[4] =        .        SYM                             */
  38 | /*                                                                 */
  39 | /*   bn=4                                                          */
  40 | /*                      start     end      label                   */
  41 | /*   bracket[0]  =        0        5         S                     */
  42 | /*   bracket[1]  =        0        0         NP                    */
  43 | /*   bracket[2]  =        1        4         VP                    */
  44 | /*   bracket[3]  =        2        4         NP                    */
  45 | /*                                                                 */
  46 | /*              matched bracketing                                 */
  47 | /*   Recall = ---------------------------                          */
  48 | /*             # of bracket in ref-data                            */
  49 | /*                                                                 */
  50 | /*              matched bracketing                                 */
  51 | /*   Recall = ---------------------------                          */
  52 | /*             # of bracket in test-data                           */
  53 | /*                                                                 */
  54 | /*-----------------------------------------------------------------*/
  55 | 
  56 | /******************/
  57 | /* constant macro */
  58 | /******************/
  59 | 
  60 | #define MAX_SENT_LEN           5000
  61 | #define MAX_WORD_IN_SENT        200
  62 | #define MAX_BRACKET_IN_SENT     200
  63 | #define MAX_WORD_LEN            100
  64 | #define MAX_LABEL_LEN            30
  65 | #define MAX_QUOTE_TERM           20
  66 | 
  67 | #define MAX_DELETE_LABEL        100
  68 | #define MAX_EQ_LABEL            100
  69 | #define MAX_EQ_WORD             100
  70 | 
  71 | #define MAX_LINE_LEN            500
  72 | 
  73 | #define DEFAULT_MAX_ERROR        10
  74 | #define DEFAULT_CUT_LEN          40
  75 | 
  76 | /*************/
  77 | /* structure */
  78 | /*************/
  79 | 
  80 | typedef struct ss_terminal {
  81 |     char word[MAX_WORD_LEN];
  82 |     char label[MAX_LABEL_LEN];
  83 |     int  result;                /* 0:unmatch, 1:match, 9:undef */
  84 | } s_terminal;
  85 | 
  86 | typedef struct ss_term_ind {
  87 | 	s_terminal term;
  88 | 	int index;
  89 |    int bracket;
  90 |    int endslen;
  91 |    int ends[MAX_BRACKET_IN_SENT];
  92 | } s_term_ind;
  93 | 
  94 | typedef struct ss_bracket {
  95 |     int start;
  96 |     int end;
  97 |     unsigned int buf_start;
  98 |     unsigned int buf_end;
  99 |     char label[MAX_LABEL_LEN];
 100 |     int  result;                 /* 0: unmatch, 1:match, 5:delete 9:undef */
 101 | } s_bracket;
 102 | 
 103 | 
 104 | typedef struct ss_equiv {
 105 |     char *s1;
 106 |     char *s2;
 107 | } s_equiv;
 108 | 
 109 | 
 110 | /****************************/
 111 | /* global variables         */
 112 | /*   gold-data: suffix = 1  */
 113 | /*   test-data: suffix = 2  */
 114 | /****************************/
 115 | 
 116 | /*---------------*/
 117 | /* Sentence data */
 118 | /*---------------*/
 119 | int wn1, wn2;                              /* number of words in sentence  */
 120 | int r_wn1;                                 /* number of words in sentence  */
 121 |                                            /* which only ignores labels in */
 122 |                                            /* DELETE_LABEL_FOR_LENGTH      */
 123 | 
 124 | s_terminal terminal1[MAX_WORD_IN_SENT];    /* terminal information */
 125 | s_terminal terminal2[MAX_WORD_IN_SENT];
 126 | 
 127 | s_term_ind quotterm1[MAX_QUOTE_TERM];      /* special terminals ("'","POS") */
 128 | s_term_ind quotterm2[MAX_QUOTE_TERM];
 129 | 
 130 | int bn1, bn2;                              /* number of brackets */
 131 | 
 132 | int r_bn1, r_bn2;                          /* number of brackets */
 133 |                                            /* after deletion */
 134 | 
 135 | s_bracket bracket1[MAX_BRACKET_IN_SENT];   /* bracket information */
 136 | s_bracket bracket2[MAX_BRACKET_IN_SENT];
 137 | 
 138 | 
 139 | /*------------*/
 140 | /* Total data */
 141 | /*------------*/
 142 | int TOTAL_bn1, TOTAL_bn2, TOTAL_match;     /* total number of brackets */
 143 | int TOTAL_sent;                            /* No. of sentence */
 144 | int TOTAL_error_sent;                      /* No. of error sentence */
 145 | int TOTAL_skip_sent;                       /* No. of skip sentence */
 146 | int TOTAL_comp_sent;                       /* No. of complete match sent */
 147 | int TOTAL_word;                            /* total number of word */
 148 | int TOTAL_crossing;                        /* total crossing */
 149 | int TOTAL_no_crossing;                     /* no crossing sentence */
 150 | int TOTAL_2L_crossing;                     /* 2 or less crossing sentence */
 151 | int TOTAL_correct_tag;                     /* total correct tagging */
 152 | 
 153 | int TOT_cut_len = DEFAULT_CUT_LEN;         /* Cut-off length in statistics */
 154 | 
 155 |                                  /* data for sentences with len <= CUT_LEN */
 156 |                                  /* Historically it was 40.                */
 157 | int TOT40_bn1, TOT40_bn2, TOT40_match;     /* total number of brackets */
 158 | int TOT40_sent;                            /* No. of sentence */
 159 | int TOT40_error_sent;                      /* No. of error sentence */
 160 | int TOT40_skip_sent;                       /* No. of skip sentence */
 161 | int TOT40_comp_sent;                       /* No. of complete match sent */
 162 | int TOT40_word;                            /* total number of word */
 163 | int TOT40_crossing;                        /* total crossing */
 164 | int TOT40_no_crossing;                     /* no crossing sentence */
 165 | int TOT40_2L_crossing;                     /* 2 or less crossing sentence */
 166 | int TOT40_correct_tag;                     /* total correct tagging */
 167 | 
 168 | /*------------*/
 169 | /* miscallous */
 170 | /*------------*/
 171 | int Line;                                  /* line number */
 172 | int Error_count = 0;                       /* Error count */
 173 | int Status;                                /* Result status for each sent */
 174 |                                            /*    0: OK, 1: skip, 2: error */
 175 | 
 176 | /*-------------------*/
 177 | /* stack manuplation */
 178 | /*-------------------*/
 179 | int stack_top;
 180 | int stack[MAX_BRACKET_IN_SENT];
 181 | 
 182 | /************************************************************/
 183 | /* User parameters which can be specified in parameter file */
 184 | /************************************************************/
 185 | 
 186 | /*------------------------------------------*/
 187 | /* Debug mode                               */
 188 | /*   print out data for individual sentence */
 189 | /*------------------------------------------*/
 190 | int DEBUG=0;
 191 | 
 192 | /*------------------------------------------*/
 193 | /* MAX error                                */
 194 | /*    Number of error to stop the process.  */
 195 | /*    This is useful if there could be      */
 196 | /*    tokanization error.                   */
 197 | /*    The process will stop when this number*/
 198 | /*    of errors are accumulated.            */
 199 | /*------------------------------------------*/
 200 | int Max_error = DEFAULT_MAX_ERROR;
 201 | 
 202 | /*------------------------------------------*/
 203 | /* Cut-off length for statistics            */
 204 | /*    int TOT_cut_len = DEFAULT_CUT_LEN;    */
 205 | /*    (Defined above)                       */
 206 | /*------------------------------------------*/
 207 | 
 208 | 
 209 | /*------------------------------------------*/
 210 | /* unlabeled or labeled bracketing          */
 211 | /*    0: unlabeled bracketing               */
 212 | /*    1: labeled bracketing                 */
 213 | /*------------------------------------------*/
 214 | int F_label    = 1;                 
 215 | 
 216 | /*------------------------------------------*/
 217 | /* Delete labels                            */
 218 | /*    list of labels to be ignored.         */
 219 | /*    If it is a pre-terminal label, delete */
 220 | /*    the word along with the brackets.     */
 221 | /*    If it is a non-terminal label, just   */
 222 | /*    delete the brackets (don't delete     */
 223 | /*    childrens).                           */
 224 | /*------------------------------------------*/
 225 | char *Delete_label[MAX_DELETE_LABEL];
 226 | int Delete_label_n = 0;
 227 | 
 228 | /*------------------------------------------*/
 229 | /* Delete labels for length calculation     */
 230 | /*    list of labels to be ignored for      */
 231 | /*    length calculation purpose            */
 232 | /*------------------------------------------*/
 233 | char *Delete_label_for_length[MAX_DELETE_LABEL];
 234 | int Delete_label_for_length_n = 0;
 235 | 
 236 | /*------------------------------------------*/
 237 | /* Labels to be considered for misquote     */
 238 | /*    (could be possesive or quote)         */
 239 | /*------------------------------------------*/
 240 | char *Quote_term[MAX_QUOTE_TERM];
 241 | int Quote_term_n = 0;
 242 | 
 243 | /*------------------------------------------*/
 244 | /* Equivalent labels, words                 */
 245 | /*     the pairs are considered equivalent  */
 246 | /*     This is non-directional.             */
 247 | /*------------------------------------------*/
 248 | s_equiv EQ_label[MAX_EQ_LABEL];
 249 | int EQ_label_n = 0;
 250 | 
 251 | s_equiv EQ_word[MAX_EQ_WORD];
 252 | int EQ_word_n = 0;
 253 | 
 254 | 
 255 | 
 256 | /************************/
 257 | /* Function return-type */
 258 | /************************/
 259 | int main();
 260 | void init_global();
 261 | void print_head();
 262 | void init();
 263 | void read_parameter_file();
 264 | void set_param();
 265 | int narg();
 266 | int read_line();
 267 | 
 268 | void pushb();
 269 | int popb();
 270 | int stackempty();
 271 | 
 272 | void calc_result(unsigned char *buf1,unsigned char *buf);
 273 | void fix_quote();
 274 | void reinsert_term();
 275 | void massage_data();
 276 | void modify_label();
 277 | void individual_result();
 278 | void print_total();
 279 | void dsp_info();
 280 | int is_terminator();
 281 | int is_deletelabel();
 282 | int is_deletelabel_for_length();
 283 | int is_quote_term();
 284 | int word_comp();
 285 | int label_comp();
 286 | 
 287 | void Error();
 288 | void Fatal();
 289 | void Usage();
 290 | 
 291 | /* ### provided by std headers 
 292 | int fprintf();
 293 | int printf();
 294 | int atoi();
 295 | int fclose();
 296 | int sscanf();
 297 | */
 298 | 
 299 | /***********/
 300 | /* program */
 301 | /***********/
 302 | #define ARG_CHECK(st) if(!(*++(*argv) || (--argc && *++argv))){ \
 303 | 			 fprintf(stderr,"Missing argument: %s\n",st); \
 304 | 		      }
 305 | 
 306 | int
 307 | main(argc,argv)
 308 | int argc;
 309 | char *argv[];
 310 | {
 311 |     char *filename1, *filename2;
 312 |     FILE *fd1, *fd2;
 313 |     unsigned char buff[5000];
 314 |     unsigned char buff1[5000];
 315 | 
 316 |     filename1=NULL;
 317 |     filename2=NULL;
 318 | 
 319 |     for(argc--,argv++;argc>0;argc--,argv++){
 320 | 	if(**argv == '-'){
 321 | 	    while(*++(*argv)){
 322 | 		switch(**argv){
 323 | 
 324 | 		  case 'h':    /* help */
 325 | 		    Usage();
 326 | 		    exit(1);
 327 | 
 328 | 		  case 'd':      /* debug mode */
 329 | 		    DEBUG = 1;
 330 | 		    goto nextarg;
 331 | 
 332 | 		  case 'D':      /* debug mode */
 333 | 		    DEBUG = 2;
 334 | 		    goto nextarg;
 335 | 
 336 | 		  case 'c':      /* cut-off length */
 337 | 		    ARG_CHECK("cut-off length for statistices");
 338 | 		    TOT_cut_len = atoi(*argv);
 339 | 		    goto nextarg;
 340 | 
 341 | 		  case 'e':      /* max error */
 342 | 		    ARG_CHECK("number of error to kill");
 343 | 		    Max_error = atoi(*argv);
 344 | 		    goto nextarg;
 345 | 
 346 | 		  case 'p':      /* parameter file */
 347 | 		    ARG_CHECK("parameter file");
 348 | 		    read_parameter_file(*argv);
 349 | 		    goto nextarg;
 350 | 
 351 | 		  default:
 352 | 		    Usage();
 353 | 		    exit(0);
 354 | 		}
 355 | 	    }
 356 | 	} else {
 357 | 	    if(filename1==NULL){
 358 | 		filename1 = *argv;
 359 | 	    }else if(filename2==NULL){
 360 | 		filename2 = *argv;
 361 | 	    }
 362 | 	}
 363 |       nextarg: continue;
 364 |     }
 365 | 
 366 |     init_global();
 367 | 
 368 | 
 369 |     if((fd1 = fopen(filename1,"r"))==NULL){
 370 | 	Fatal("Can't open gold file (%s)\n",filename1);
 371 |     }
 372 |     if((fd2 = fopen(filename2,"r"))==NULL){
 373 | 	Fatal("Can't open test file (%s)\n",filename2);
 374 |     }
 375 | 
 376 |     print_head();
 377 | 
 378 |     for(Line=1;fgets(buff,5000,fd1)!=NULL;Line++){
 379 |     
 380 | 	init();
 381 | 
 382 |       /* READ 1 */
 383 | 	r_wn1 = read_line(buff,terminal1,quotterm1,&wn1,bracket1,&bn1);
 384 | 
 385 | 	strcpy(buff1,buff);
 386 | 
 387 |       /* READ 2 */
 388 | 	if(fgets(buff,5000,fd2)==NULL){
 389 | 	    Error("Number of lines unmatch (too many lines in gold file)\n");
 390 | 	    break;
 391 | 	}
 392 | 
 393 | 	read_line(buff,terminal2,quotterm2,&wn2,bracket2,&bn2);
 394 | 
 395 |       /* Calculate result and print it */
 396 | 	calc_result(buff1,buff);
 397 | 
 398 | 	if(DEBUG>=1){
 399 | 	    dsp_info();
 400 | 	}
 401 |     }
 402 | 
 403 |     if(fgets(buff,5000,fd2)!=NULL){
 404 | 	Error("Number of lines unmatch (too many lines in test file)\n");
 405 |     }
 406 | 
 407 |     print_total();
 408 | 
 409 |     return (0);
 410 | }
 411 | 
 412 | 
 413 | /*-----------------------------*/
 414 | /* initialize global variables */
 415 | /*-----------------------------*/
 416 | void
 417 | init_global()
 418 | {
 419 |     TOTAL_bn1 = TOTAL_bn2 = TOTAL_match = 0;
 420 |     TOTAL_sent = TOTAL_error_sent = TOTAL_skip_sent = TOTAL_comp_sent = 0;
 421 |     TOTAL_word = TOTAL_correct_tag = 0;
 422 |     TOTAL_crossing = 0;
 423 |     TOTAL_no_crossing = TOTAL_2L_crossing = 0;
 424 | 
 425 |     TOT40_bn1 = TOT40_bn2 = TOT40_match = 0;
 426 |     TOT40_sent = TOT40_error_sent = TOT40_skip_sent = TOT40_comp_sent = 0;
 427 |     TOT40_word = TOT40_correct_tag = 0;
 428 |     TOT40_crossing = 0;
 429 |     TOT40_no_crossing = TOT40_2L_crossing = 0;
 430 | 
 431 | }
 432 | 
 433 | 
 434 | /*------------------*/
 435 | /* print head title */
 436 | /*------------------*/
 437 | void
 438 | print_head()
 439 | {
 440 |     printf("  Sent.                        Matched  Bracket   Cross        Correct Tag\n");
 441 |     printf(" ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy\n");
 442 |     printf("============================================================================\n");
 443 | }
 444 | 
 445 | 
 446 | /*-----------------------------------------------*/
 447 | /* initialization at each individual computation */
 448 | /*-----------------------------------------------*/
 449 | void
 450 | init()
 451 | {
 452 |   int i;
 453 | 
 454 |   wn1 = 0;
 455 |   wn2 = 0;
 456 |   bn1 = 0;
 457 |   bn2 = 0;
 458 |   r_bn1 = 0;
 459 |   r_bn2 = 0;
 460 | 
 461 |   for(i=0;i<MAX_WORD_IN_SENT;i++){
 462 |       terminal1[i].word[0]  = '\0';
 463 |       terminal1[i].label[0] = '\0';
 464 |       terminal1[i].result   = 9;
 465 |       terminal2[i].word[0]  = '\0';
 466 |       terminal2[i].label[0] = '\0';
 467 |       terminal2[i].result   = 9;
 468 |   }
 469 | 
 470 |   for(i=0;i<MAX_QUOTE_TERM;i++){
 471 |       quotterm1[i].term.word[0]  = '\0';
 472 |       quotterm1[i].term.label[0] = '\0';
 473 |       quotterm1[i].term.result   = 9;
 474 |       quotterm1[i].index         = -1;
 475 |       quotterm1[i].bracket       = -1;
 476 |       quotterm2[i].term.word[0]  = '\0';
 477 |       quotterm2[i].term.label[0] = '\0';
 478 |       quotterm2[i].term.result   = 9;
 479 |       quotterm2[i].index         = -1;
 480 |       quotterm2[i].bracket       = -1;
 481 |   }
 482 | 
 483 |   for(i=0;i<MAX_BRACKET_IN_SENT;i++){
 484 |       bracket1[i].start    = -1;
 485 |       bracket1[i].end      = -1;
 486 |       bracket1[i].label[0] = '\0';
 487 |       bracket1[i].result   = 9;
 488 |       bracket2[i].start    = -1;
 489 |       bracket2[i].end      = -1;
 490 |       bracket2[i].label[0] = '\0';
 491 |       bracket2[i].result   = 9;
 492 |   }
 493 | 
 494 |   Status = 0;
 495 | }
 496 | 
 497 | /*----------------*/
 498 | /* parameter file */
 499 | /*----------------*/
 500 | void
 501 | read_parameter_file(filename)
 502 | char *filename;
 503 | {
 504 |     char buff[MAX_LINE_LEN];
 505 |     FILE *fd;
 506 |     int line;
 507 |     int i;
 508 | 
 509 |     if((fd=fopen(filename,"r"))==NULL){
 510 | 	Fatal("Can't open parameter file (%s)\n",filename);
 511 |     }
 512 | 
 513 |     for(line=1;fgets(buff,MAX_LINE_LEN,fd)!=NULL;line++){
 514 | 
 515 |       /* clean up the tail and find unvalid line */
 516 |       /*-----------------------------------------*/
 517 | 	for(i=strlen(buff)-1;i>0 && (isspace(buff[i]) || buff[i]=='\n');i--){
 518 | 	    buff[i]='\0';
 519 | 	}
 520 | 	if(buff[0]=='#' ||      /* comment-line */
 521 | 	   strlen(buff)<3){     /* too short, just ignore */
 522 | 	    continue;
 523 | 	}
 524 | 
 525 |       /* place the parameter and value */
 526 |       /*-------------------------------*/
 527 | 	for(i=0;!isspace(buff[i]);i++);
 528 | 	for(;isspace(buff[i]) && buff[i]!='\0';i++);
 529 | 	if(buff[i]=='\0'){
 530 | 	    fprintf(stderr,"Empty value in parameter file (%d)\n",line);
 531 | 	}
 532 | 
 533 |       /* set parameter and value */
 534 |       /*-------------------------*/
 535 | 	set_param(buff,buff+i);
 536 |     }
 537 | 
 538 |     fclose(fd);
 539 | }
 540 | 
 541 | 
 542 | #define STRNCMP(s) (strncmp(param,s,strlen(s))==0 &&  \
 543 | 		    (param[strlen(s)]=='\0' || isspace(param[strlen(s)])))
 544 | 
 545 | 
 546 | void
 547 | set_param(param,value)
 548 | char *param, *value;
 549 | {
 550 |     char l1[MAX_LABEL_LEN], l2[MAX_LABEL_LEN];
 551 | 
 552 |     if(STRNCMP("DEBUG")){
 553 | 
 554 | 	DEBUG = atoi(value);
 555 | 
 556 |     }else if(STRNCMP("MAX_ERROR")){
 557 | 
 558 | 	Max_error = atoi(value);
 559 | 
 560 |     }else if(STRNCMP("CUTOFF_LEN")){
 561 | 
 562 | 	TOT_cut_len = atoi(value);
 563 | 
 564 |     }else if(STRNCMP("LABELED")){
 565 | 
 566 | 	F_label = atoi(value);
 567 | 
 568 |     }else if(STRNCMP("DELETE_LABEL")){
 569 | 
 570 | 	Delete_label[Delete_label_n] = (char *)malloc(strlen(value)+1);
 571 | 	strcpy(Delete_label[Delete_label_n],value);
 572 | 	Delete_label_n++;
 573 | 
 574 |     }else if(STRNCMP("DELETE_LABEL_FOR_LENGTH")){
 575 | 
 576 | 	Delete_label_for_length[Delete_label_for_length_n] = (char *)malloc(strlen(value)+1);
 577 | 	strcpy(Delete_label_for_length[Delete_label_for_length_n],value);
 578 | 	Delete_label_for_length_n++;
 579 | 
 580 |     }else if(STRNCMP("QUOTE_LABEL")){
 581 | 
 582 | 	Quote_term[Quote_term_n] = (char *)malloc(strlen(value)+1);
 583 | 	strcpy(Quote_term[Quote_term_n],value);
 584 | 	Quote_term_n++;
 585 | 
 586 |     }else if(STRNCMP("EQ_LABEL")){
 587 | 
 588 | 	if(narg(value)!=2){
 589 | 	    fprintf(stderr,"EQ_LABEL requires two values\n");
 590 | 	    return;
 591 | 	}
 592 | 	sscanf(value,"%s %s",l1,l2);
 593 | 	EQ_label[EQ_label_n].s1 = (char *)malloc(strlen(l1)+1);
 594 | 	strcpy(EQ_label[EQ_label_n].s1,l1);
 595 | 	EQ_label[EQ_label_n].s2 = (char *)malloc(strlen(l2)+1);
 596 | 	strcpy(EQ_label[EQ_label_n].s2,l2);
 597 | 	EQ_label_n++;
 598 | 
 599 |     }else if(STRNCMP("EQ_WORD")){
 600 | 
 601 | 	if(narg(value)!=2){
 602 | 	    fprintf(stderr,"EQ_WORD requires two values\n");
 603 | 	    return;
 604 | 	}
 605 | 	sscanf(value,"%s %s",l1,l2);
 606 | 	EQ_word[EQ_word_n].s1 = (char *)malloc(strlen(l1)+1);
 607 | 	strcpy(EQ_word[EQ_word_n].s1,l1);
 608 | 	EQ_word[EQ_word_n].s2 = (char *)malloc(strlen(l2)+1);
 609 | 	strcpy(EQ_word[EQ_word_n].s2,l2);
 610 | 	EQ_word_n++;
 611 | 
 612 |     }else{
 613 | 
 614 | 	fprintf(stderr,"Unknown keyword (%s) in parameter file\n",param);
 615 | 
 616 |     }
 617 | }
 618 | 
 619 | 
 620 | int
 621 | narg(s)
 622 | char *s;
 623 | {
 624 |     int n;
 625 | 
 626 |     for(n=0;*s!='\0';){
 627 | 	for(;isspace(*s);s++);
 628 | 	if(*s=='\0'){
 629 | 	    break;
 630 | 	}
 631 | 	n++;
 632 | 	for(;!isspace(*s);s++){
 633 | 	    if(*s=='\0'){
 634 | 		break;
 635 | 	    }
 636 | 	}
 637 |     }
 638 | 
 639 |     return(n);
 640 | }
 641 | 
 642 | /*-----------------------------*/
 643 | /* Read line and gather data.  */
 644 | /* Return langth of sentence.  */
 645 | /*-----------------------------*/
 646 | int
 647 | read_line(buff, terminal, quotterm, wn, bracket, bn)
 648 | char *buff;
 649 | s_terminal terminal[];
 650 | s_term_ind quotterm[];
 651 | int *wn;
 652 | s_bracket bracket[];
 653 | int *bn;
 654 | {
 655 |     char *p, *q, label[MAX_LABEL_LEN], word[MAX_WORD_LEN];
 656 |     int	  qt;		  /* quote term counter */
 657 |     int   wid, bid;       /* word ID, bracket ID */
 658 |     int   n;              /* temporary remembering the position */
 659 |     int   b;              /* temporary remembering bid */
 660 |     int   i;
 661 |     int   len;            /* length of the sentence */
 662 | 
 663 |     len = 0;
 664 |     stack_top=0;
 665 | 
 666 |     for(p=buff,qt=0,wid=0,bid=0;*p!='\0';){
 667 | 
 668 | 	if(isspace(*p)){
 669 | 	    p++;
 670 | 	    continue;
 671 | 
 672 |         /* open bracket */
 673 |         /*--------------*/
 674 | 	}else if(*p=='('){
 675 | 
 676 | 	    n=wid;
 677 | 	    for(p++,i=0;!is_terminator(*p);p++,i++){
 678 | 		label[i]=*p;
 679 | 	    }
 680 | 	    label[i]='\0';
 681 | 
 682 | 	    /* Find terminals */
 683 | 	    q = p;
 684 | 	    if(isspace(*q)){
 685 | 		for(q++;isspace(*q);q++);
 686 | 		for(i=0;!is_terminator(*q);q++,i++){
 687 | 		    word[i]=*q;
 688 | 		}
 689 | 		word[i]='\0';
 690 | 
 691 |                 /* compute length */
 692 | 		if(*q==')' && !is_deletelabel_for_length(label)==1){
 693 | 		    len++;
 694 | 		}
 695 |       if (DEBUG>1)
 696 |          printf("label=%s, word=%s, wid=%d\n",label,word,wid);
 697 | 		/* quote terminal */
 698 | 		if(*q==')' && is_quote_term(label,word)==1){
 699 | 			strcpy(quotterm[qt].term.word,word);
 700 | 			strcpy(quotterm[qt].term.label,label);
 701 | 			quotterm[qt].index = wid;
 702 |          quotterm[qt].bracket = bid;
 703 |          quotterm[qt].endslen = stack_top;
 704 |          //quotterm[qt].ends = (int*)malloc(stack_top*sizeof(int));
 705 |          memcpy(quotterm[qt].ends,stack,stack_top*sizeof(int));
 706 | 			qt++;
 707 | 		}
 708 | 		
 709 |                 /* delete terminal */
 710 | 		if(*q==')' && is_deletelabel(label)==1){
 711 | 		    p = q+1;
 712 | 		    continue;
 713 | 
 714 | 		/* valid terminal */
 715 | 		}else if(*q==')'){
 716 | 		    strcpy(terminal[wid].word,word);
 717 | 		    strcpy(terminal[wid].label,label);
 718 | 		    wid++;
 719 | 		    p = q+1;
 720 | 		    continue;
 721 | 
 722 |                 /* error */
 723 | 		}else if(*q!='('){
 724 | 		    Error("More than two elements in a bracket\n");
 725 | 		}
 726 | 	    }
 727 | 
 728 |             /* otherwise non-terminal label */
 729 | 	    bracket[bid].start = wid;
 730 | 	    bracket[bid].buf_start = p-buff;
 731 | 	    strcpy(bracket[bid].label,label);
 732 | 	    pushb(bid);
 733 | 	    bid++;
 734 | 
 735 | 	/* close bracket */
 736 |         /*---------------*/
 737 | 	}else if(*p==')'){
 738 | 
 739 | 	    b = popb();
 740 | 	    bracket[b].end = wid;
 741 | 	    bracket[b].buf_end = p-buff;
 742 | 	    p++;
 743 | 
 744 |         /* error */
 745 |         /*-------*/
 746 | 	}else{
 747 | 
 748 | 	    Error("Reading sentence\n");
 749 | 	}
 750 |     }
 751 | 
 752 |     if(!stackempty()){
 753 | 	Error("Bracketing is unbalanced (too many open bracket)\n");
 754 |     }
 755 | 
 756 |     *wn = wid;
 757 |     *bn = bid;
 758 | 
 759 |     return(len);
 760 | }
 761 | 
 762 | 
 763 | /*----------------------*/
 764 | /* stack operation      */
 765 | /* for bracketing pairs */
 766 | /*----------------------*/
 767 | void
 768 | pushb(item)
 769 | int item;
 770 | {
 771 |     stack[stack_top++]=item;
 772 | }
 773 | 
 774 | int
 775 | popb()
 776 | {
 777 |     int item;
 778 | 
 779 |     item = stack[stack_top-1];
 780 | 
 781 |     if(stack_top-- < 0){
 782 | 	Error("Bracketing unbalance (too many close bracket)\n");
 783 |     }
 784 |     return(item);
 785 | }
 786 | 
 787 | int
 788 | stackempty()
 789 | {
 790 |     if(stack_top==0){
 791 | 	return(1);
 792 |     }else{
 793 | 	return(0);
 794 |     }
 795 | }
 796 | 
 797 | 
 798 | /*------------------*/
 799 | /* calculate result */
 800 | /*------------------*/
 801 | void
 802 | calc_result(unsigned char *buf1,unsigned char *buf)
 803 | {
 804 |     int i, j, l;
 805 |     int match, crossing, correct_tag;
 806 | 
 807 |     int last_i = -1;
 808 | 
 809 |     char my_buf[1000];
 810 |     int match_found = 0;
 811 |     
 812 |     char match_j[200];
 813 |     for (j = 0; j < bn2; ++j) {
 814 |       match_j[j] = 0;
 815 |     }
 816 | 
 817 |     /* ML */
 818 |     if (DEBUG>1)
 819 |     	printf("\n");
 820 | 
 821 | 
 822 |     /* Find skip and error */
 823 |     /*---------------------*/
 824 |     if(wn2==0){
 825 | 	Status = 2;
 826 | 	individual_result(0,0,0,0,0,0);
 827 | 	return;
 828 |     }
 829 | 
 830 |    if(wn1 != wn2){
 831 |       //if (DEBUG>1)
 832 |     //Error("Length unmatch (%d|%d)\n",wn1,wn2);
 833 | 	fix_quote();
 834 | 	if(wn1 != wn2){
 835 | 		Error("Length unmatch (%d|%d)\n",wn1,wn2);
 836 | 		individual_result(0,0,0,0,0,0);
 837 | 		return;
 838 | 	}
 839 |     }
 840 | 
 841 |     for(i=0;i<wn1;i++){
 842 | 	if(word_comp(terminal1[i].word,terminal2[i].word)==0){
 843 | 	    Error("Words unmatch (%s|%s)\n",terminal1[i].word,
 844 |                                             terminal2[i].word);
 845 | 	    individual_result(0,0,0,0,0,0);
 846 | 	    return;
 847 | 	}
 848 |     }
 849 | 
 850 |     /* massage the data */
 851 |     /*------------------*/
 852 |     massage_data();
 853 | 	   
 854 |     /* matching brackets */
 855 |     /*-------------------*/
 856 |     match = 0;
 857 |     for(i=0;i<bn1;i++){
 858 |       for(j=0;j<bn2;j++){
 859 | 
 860 |          if (DEBUG>1)
 861 |    printf("1.res=%d, 2.res=%d, 1.start=%d, 2.start=%d, 1.end=%d, 2.end=%d\n",bracket1[i].result,bracket2[j].result,bracket1[i].start,bracket2[j].start,bracket1[i].end,bracket2[j].end);
 862 | 
 863 | 	// does bracket match?
 864 | 	if(bracket1[i].result != 5 && 
 865 | 	   bracket2[j].result == 0 &&
 866 | 	   bracket1[i].start == bracket2[j].start && bracket1[i].end == bracket2[j].end) {
 867 | 
 868 | 	  // (1) do we not care about the label or (2) does the label match?
 869 | 	  if (F_label==0 || label_comp(bracket1[i].label,bracket2[j].label)==1) {
 870 | 	    bracket1[i].result = bracket2[j].result = 1;
 871 | 	    match++;
 872 | 	    match_found = 1;
 873 | 	    break;
 874 | 	  } else {
 875 | 	    if (DEBUG>1) {
 876 | 	      printf("  LABEL[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
 877 | 	      l = bracket1[i].buf_end-bracket1[i].buf_start;
 878 | 	      strncpy(my_buf,buf1+bracket1[i].buf_start,l);
 879 | 	      my_buf[l] = '\0';
 880 | 	      printf("%s\n",my_buf);
 881 | 	    }
 882 | 	    match_found = 1;
 883 | 	    match_j[j] = 1;
 884 | 	  }
 885 | 	}
 886 |       }
 887 | 
 888 |       if (!match_found && bracket1[i].result != 5 && DEBUG>1) {
 889 | 	/* ### ML 09/28/03: gold bracket with no corresponding test bracket */
 890 | 	printf("  BRACKET[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
 891 | 	l = bracket1[i].buf_end-bracket1[i].buf_start;
 892 | 	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
 893 | 	my_buf[l] = '\0';
 894 | 	printf("%s\n",my_buf);
 895 |       }
 896 |       match_found = 0;
 897 |     }
 898 | 
 899 |     for(j=0;j<bn2;j++){
 900 |       if (bracket2[j].result==0 && !match_j[j] && DEBUG>1) {
 901 | 	/* test bracket with no corresponding gold bracket */
 902 | 	printf("  EXTRA[%d-%d]: ",bracket2[j].start,bracket2[j].end-1);
 903 | 	l = bracket2[j].buf_end-bracket2[j].buf_start;
 904 | 	strncpy(my_buf,buf+bracket2[j].buf_start,l);
 905 | 	my_buf[l] = '\0';
 906 | 	printf("%s\n",my_buf);
 907 |       }
 908 |     }
 909 | 
 910 |     /* crossing */
 911 |     /*----------*/
 912 |     crossing = 0;
 913 | 
 914 |     /* crossing is counted based on the brackets */
 915 |     /* in test rather than gold file (by Mike)   */
 916 |     for(j=0;j<bn2;j++){
 917 |       for(i=0;i<bn1;i++){
 918 | 	if(bracket1[i].result != 5 &&
 919 | 	   bracket2[j].result != 5 &&
 920 | 	   ((bracket1[i].start < bracket2[j].start &&
 921 | 	     bracket1[i].end   > bracket2[j].start &&
 922 | 	     bracket1[i].end   < bracket2[j].end) ||
 923 | 	    (bracket1[i].start > bracket2[j].start &&
 924 | 	     bracket1[i].start < bracket2[j].end &&
 925 | 	     bracket1[i].end   > bracket2[j].end))){
 926 | 
 927 | 	  /* ### ML 09/01/03: get details on cross-brackettings */
 928 | 	  if (i != last_i) {
 929 | 	    if (DEBUG>1) {
 930 | 	    	printf("  CROSSING[%d-%d]: ",bracket1[i].start,bracket1[i].end-1);
 931 | 	    	l = bracket1[i].buf_end-bracket1[i].buf_start;
 932 | 	    	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
 933 | 	    	my_buf[l] = '\0';
 934 | 	    	printf("%s\n",my_buf);
 935 | 
 936 | 	    	/* ML
 937 | 	    	printf("\n  CROSSING at bracket %d:\n",i-1);
 938 | 	    	printf("  GOLD (tokens %d-%d): ",bracket1[i].start,bracket1[i].end-1);
 939 | 	    	l = bracket1[i].buf_end-bracket1[i].buf_start;
 940 | 	    	strncpy(my_buf,buf1+bracket1[i].buf_start,l);
 941 | 	    	my_buf[l] = '\0';
 942 | 	    	printf("%s\n",my_buf);
 943 | 	    	*/
 944 | 	    }
 945 | 	    last_i = i;
 946 | 	  }
 947 | 
 948 | 	  /* ML
 949 | 	  printf("  TEST (tokens %d-%d): ",bracket2[j].start,bracket2[j].end-1);
 950 | 	  l = bracket2[j].buf_end-bracket2[j].buf_start;
 951 | 	  strncpy(my_buf,buf+bracket2[j].buf_start,l);
 952 | 	  my_buf[l] = '\0';
 953 | 	  printf("%s\n",my_buf);
 954 | 	  */
 955 | 
 956 | 	  crossing++;
 957 | 	  break;
 958 | 	}
 959 |       }
 960 |     }
 961 | 
 962 |     /* Tagging accuracy */
 963 |     /*------------------*/
 964 |     correct_tag=0;
 965 |     for(i=0;i<wn1;i++){
 966 |        if(label_comp(terminal1[i].label,terminal2[i].label)==1){
 967 |           terminal1[i].result = terminal2[i].result = 1;
 968 |           correct_tag++;
 969 |        } else {
 970 |           terminal1[i].result = terminal2[i].result = 0;
 971 |        }
 972 |     }
 973 | 
 974 |     individual_result(wn1,r_bn1,r_bn2,match,crossing,correct_tag);
 975 | }
 976 | 
 977 | void
 978 | fix_quote()
 979 | {
 980 |    int i,j,k;
 981 |    if (DEBUG>1) {
 982 |       for(i=0;i<MAX_QUOTE_TERM;i++){
 983 |          if (quotterm1[i].index!=-1)
 984 |             printf("%d: %s - %s\n",quotterm1[i].index,
 985 |                   quotterm1[i].term.label,
 986 |                   quotterm1[i].term.word);
 987 |          if (quotterm2[i].index!=-1)
 988 |             printf("%d: %s - %s\n",quotterm2[i].index,
 989 |                   quotterm2[i].term.label,
 990 |                   quotterm2[i].term.word);
 991 |       }
 992 |    }
 993 |    for(i=0;i<MAX_QUOTE_TERM;i++) {
 994 |       int ind = quotterm2[i].index;
 995 |       if (ind!=-1) {
 996 |          for(j=0;j<MAX_QUOTE_TERM;j++){
 997 |             if (quotterm1[j].index==ind &&
 998 |                   strcmp(quotterm1[j].term.label,
 999 |                      quotterm2[i].term.label)!=0) {
1000 |                if (is_deletelabel(quotterm1[j].term.label) && !is_deletelabel(quotterm2[i].term.label)) {
1001 |                   reinsert_term(&quotterm1[j],terminal1,bracket1,&wn1);
1002 |                   for (k=j;k<MAX_QUOTE_TERM;k++)
1003 |                      if (quotterm1[k].index!=-1)
1004 |                         quotterm1[k].index++;
1005 |                } else if (is_deletelabel(quotterm2[i].term.label) && !is_deletelabel(quotterm1[j].term.label)) {
1006 |                   reinsert_term(&quotterm2[i],terminal2,bracket2,&wn2);
1007 |                   for (k=i;k<MAX_QUOTE_TERM;k++)
1008 |                      if (quotterm2[k].index!=-1)
1009 |                         quotterm2[k].index++;
1010 |                }
1011 |             }
1012 |          }
1013 |       } else break;
1014 |    }
1015 | }
1016 | 
1017 | void
1018 | reinsert_term(quot,terminal,bracket,wn)
1019 | s_term_ind* quot;
1020 | s_terminal terminal[];
1021 | s_bracket bracket[];
1022 | int* wn;
1023 | {
1024 |    int ind = quot->index;
1025 |    int bra = quot->bracket;
1026 |    s_terminal* term = &quot->term;
1027 |    int k;
1028 |    memmove(&terminal[ind+1],
1029 |          &terminal[ind],
1030 |          sizeof(s_terminal)*(MAX_WORD_IN_SENT-ind-1));
1031 |    strcpy(terminal[ind].label,term->label);
1032 |    strcpy(terminal[ind].word,term->word);
1033 |    (*wn)++;
1034 |    if (DEBUG>1)
1035 |       printf("bra=%d, ind=%d\n",bra,ind);
1036 |    for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
1037 |       if (bracket[k].start==-1)
1038 |          break;
1039 |       if (DEBUG>1)
1040 |          printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
1041 |       if (k>=bra) {
1042 |          bracket[k].start++;
1043 |          bracket[k].end++;
1044 |       }
1045 |       //if (bracket[k].start<=ind && bracket[k].end>=ind)
1046 |          //bracket[k].end++;
1047 |    }
1048 |    if (DEBUG>1)
1049 |       printf("endslen=%d\n",quot->endslen);
1050 |    for(k=0;k<quot->endslen;k++) {
1051 |       //printf("ends[%d]=%d",k,quot->ends[k]);
1052 |       bracket[quot->ends[k]].end++;
1053 |    }
1054 |    //free(quot->ends);
1055 | }
1056 | /*
1057 | void
1058 | adjust_end(ind,bra)
1059 | int ind;
1060 | int bra;
1061 | {
1062 |     for(k=0;k<MAX_BRACKET_IN_SENT;k++) {
1063 |       if (bracket[k].start==-1)
1064 |          break;
1065 |       printf("bracket[%d]={%d,%d}\n",k,bracket[k].start,bracket[k].end);
1066 |       if (k>=bra)
1067 |          bracket[k].end++;
1068 |    }
1069 | }
1070 | */
1071 | void
1072 | massage_data()
1073 | {
1074 |     int i, j;
1075 | 
1076 |     /* for GOLD */
1077 |     /*----------*/ 
1078 |     for(i=0;i<bn1;i++){
1079 | 
1080 | 	bracket1[i].result = 0;
1081 | 
1082 | 	/* Zero element */
1083 | 	if(bracket1[i].start == bracket1[i].end){
1084 | 	    bracket1[i].result = 5;
1085 | 	    continue;
1086 | 	}
1087 | 
1088 |         /* Modify label */
1089 | 	modify_label(bracket1[i].label);
1090 | 
1091 | 	/* Delete label */
1092 | 	for(j=0;j<Delete_label_n;j++){
1093 | 	    if(label_comp(bracket1[i].label,Delete_label[j])==1){
1094 | 		bracket1[i].result = 5;
1095 | 	    }
1096 | 	}
1097 |     }
1098 | 	   
1099 |     /* for TEST */
1100 |     /*----------*/ 
1101 |     for(i=0;i<bn2;i++){
1102 | 
1103 | 	bracket2[i].result = 0;
1104 | 
1105 | 	/* Zero element */
1106 | 	if(bracket2[i].start == bracket2[i].end){
1107 | 	    bracket2[i].result = 5;
1108 | 	    continue;
1109 | 	}
1110 | 
1111 |         /* Modify label */
1112 | 	modify_label(bracket2[i].label);
1113 | 
1114 | 	/* Delete label */
1115 | 	for(j=0;j<Delete_label_n;j++){
1116 | 	    if(label_comp(bracket2[i].label,Delete_label[j])==1){
1117 | 		bracket2[i].result = 5;
1118 | 	    }
1119 | 	}
1120 |     }
1121 | 
1122 | 
1123 |     /* count up real number of brackets (exclude deleted ones) */
1124 |     /*---------------------------------------------------------*/
1125 |     r_bn1 = r_bn2 = 0;
1126 | 
1127 |     for(i=0;i<bn1;i++){
1128 | 	if(bracket1[i].result != 5){
1129 | 	    r_bn1++;
1130 | 	}
1131 |     }
1132 | 
1133 |     for(i=0;i<bn2;i++){
1134 | 	if(bracket2[i].result != 5){
1135 | 	    r_bn2++;
1136 | 	}
1137 |     }
1138 | }
1139 | 
1140 | 
1141 | /*------------------------*/
1142 | /* trim the tail of label */
1143 | /*------------------------*/
1144 | void
1145 | modify_label(label)
1146 | char *label;
1147 | {
1148 |     char *p;
1149 | 
1150 |     for(p=label;*p!='\0';p++){
1151 | 	if(*p=='-' || *p=='='){
1152 | 	    *p='\0';
1153 | 	    break;
1154 | 	}
1155 |     }
1156 | }
1157 | 
1158 | 
1159 | /*-----------------------------------------------*/
1160 | /* add individual statistics to TOTAL statictics */
1161 | /*-----------------------------------------------*/
1162 | void
1163 | individual_result(wn1,bn1,bn2,match,crossing,correct_tag)
1164 | int wn1,bn1,bn2,match,crossing,correct_tag;
1165 | {
1166 | 
1167 |     /* Statistics for ALL */
1168 |     /*--------------------*/
1169 |     TOTAL_sent++;
1170 |     if(Status==1){
1171 | 	TOTAL_error_sent++;
1172 |     }else if(Status==2){
1173 | 	TOTAL_skip_sent++;
1174 |     }else{
1175 | 	TOTAL_bn1 += bn1;
1176 | 	TOTAL_bn2 += bn2;
1177 | 	TOTAL_match += match;
1178 | 	if(bn1==bn2 && bn2==match){
1179 | 	    TOTAL_comp_sent++;
1180 | 	}
1181 | 	TOTAL_word += wn1;
1182 | 	TOTAL_crossing += crossing;
1183 | 	if(crossing==0){
1184 | 	    TOTAL_no_crossing++;
1185 | 	}
1186 | 	if(crossing <= 2){
1187 | 	    TOTAL_2L_crossing++;
1188 | 	}
1189 | 	TOTAL_correct_tag += correct_tag;
1190 |     }
1191 | 
1192 | 
1193 |     /* Statistics for sent length <= TOT_cut_len */
1194 |     /*-------------------------------------------*/
1195 |     if(r_wn1<=TOT_cut_len){
1196 | 	TOT40_sent++;
1197 | 	if(Status==1){
1198 | 	    TOT40_error_sent++;
1199 | 	}else if(Status==2){
1200 | 	    TOT40_skip_sent++;
1201 | 	}else{
1202 | 	    TOT40_bn1 += bn1;
1203 | 	    TOT40_bn2 += bn2;
1204 | 	    TOT40_match += match;
1205 | 	    if(bn1==bn2 && bn2==match){
1206 | 		TOT40_comp_sent++;
1207 | 	    }
1208 | 	    TOT40_word += wn1;
1209 | 	    TOT40_crossing += crossing;
1210 | 	    if(crossing==0){
1211 | 		TOT40_no_crossing++;
1212 | 	    }
1213 | 	    if(crossing <= 2){
1214 | 		TOT40_2L_crossing++;
1215 | 	    }
1216 | 	    TOT40_correct_tag += correct_tag;
1217 | 	}
1218 |     }
1219 | 
1220 |     /* Print individual result */
1221 |     /*-------------------------*/
1222 |     printf("%4d  %3d    %d  ",Line,r_wn1,Status);
1223 |     printf("%6.2f %6.2f   %3d    %3d  %3d    %3d",
1224 | 	   (r_bn1==0?0.0:100.0*match/r_bn1), 
1225 | 	   (r_bn2==0?0.0:100.0*match/r_bn2),
1226 | 	   match, r_bn1, r_bn2, crossing);
1227 | 
1228 |     printf("   %4d  %4d   %6.2f\n",wn1,correct_tag,
1229 | 	   (wn1==0?0.0:100.0*correct_tag/wn1));
1230 | }
1231 | 
1232 | 
1233 | /*------------------------*/
1234 | /* print total statistics */
1235 | /*------------------------*/
1236 | void
1237 | print_total()
1238 | {
1239 |     int sentn;
1240 |     double r,p,f;
1241 | 
1242 |     printf("============================================================================\n");
1243 | 
1244 |     if(TOTAL_bn1>0 && TOTAL_bn2>0){
1245 | 	printf("                %6.2f %6.2f %6d %5d %5d  %5d",
1246 | 	       (TOTAL_bn1>0?100.0*TOTAL_match/TOTAL_bn1:0.0),
1247 | 	       (TOTAL_bn2>0?100.0*TOTAL_match/TOTAL_bn2:0.0),
1248 | 	       TOTAL_match, 
1249 | 	       TOTAL_bn1, 
1250 | 	       TOTAL_bn2,
1251 | 	       TOTAL_crossing);
1252 |     }
1253 | 
1254 |     printf("  %5d %5d   %6.2f",
1255 | 	   TOTAL_word,
1256 | 	   TOTAL_correct_tag,
1257 | 	   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
1258 | 
1259 |     printf("\n");
1260 |     printf("=== Summary ===\n");
1261 | 
1262 |     sentn = TOTAL_sent - TOTAL_error_sent - TOTAL_skip_sent;
1263 | 
1264 |     printf("\n-- All --\n");
1265 |     printf("Number of sentence        = %6d\n",TOTAL_sent);
1266 |     printf("Number of Error sentence  = %6d\n",TOTAL_error_sent);
1267 |     printf("Number of Skip  sentence  = %6d\n",TOTAL_skip_sent);
1268 |     printf("Number of Valid sentence  = %6d\n",sentn);
1269 |     
1270 |     r = TOTAL_bn1>0 ? 100.0*TOTAL_match/TOTAL_bn1 : 0.0;
1271 |     printf("Bracketing Recall         = %6.2f\n",r);
1272 | 
1273 |     p = TOTAL_bn2>0 ? 100.0*TOTAL_match/TOTAL_bn2 : 0.0;
1274 |     printf("Bracketing Precision      = %6.2f\n",p);
1275 | 
1276 |     f = 2*p*r/(p+r);
1277 |     printf("Bracketing FMeasure       = %6.2f\n",f);
1278 | 			    
1279 |     printf("Complete match            = %6.2f\n",
1280 | 	   (sentn>0?100.0*TOTAL_comp_sent/sentn:0.0));
1281 |     printf("Average crossing          = %6.2f\n",
1282 | 	   (sentn>0?1.0*TOTAL_crossing/sentn:0.0));
1283 |     printf("No crossing               = %6.2f\n",
1284 | 	   (sentn>0?100.0*TOTAL_no_crossing/sentn:0.0));
1285 |     printf("2 or less crossing        = %6.2f\n",
1286 | 	   (sentn>0?100.0*TOTAL_2L_crossing/sentn:0.0));
1287 |     printf("Tagging accuracy          = %6.2f\n",
1288 | 	   (TOTAL_word>0?100.0*TOTAL_correct_tag/TOTAL_word:0.0));
1289 | 
1290 |     sentn = TOT40_sent - TOT40_error_sent - TOT40_skip_sent;
1291 | 
1292 |     printf("\n-- len<=%d --\n",TOT_cut_len);
1293 |     printf("Number of sentence        = %6d\n",TOT40_sent);
1294 |     printf("Number of Error sentence  = %6d\n",TOT40_error_sent);
1295 |     printf("Number of Skip  sentence  = %6d\n",TOT40_skip_sent);
1296 |     printf("Number of Valid sentence  = %6d\n",sentn);
1297 | 
1298 | 
1299 |     r = TOT40_bn1>0 ? 100.0*TOT40_match/TOT40_bn1 : 0.0;
1300 |     printf("Bracketing Recall         = %6.2f\n",r);
1301 | 
1302 |     p = TOT40_bn2>0 ? 100.0*TOT40_match/TOT40_bn2 : 0.0;
1303 |     printf("Bracketing Precision      = %6.2f\n",p);
1304 | 
1305 |     f = 2*p*r/(p+r);
1306 |     printf("Bracketing FMeasure       = %6.2f\n",f);
1307 | 
1308 |     printf("Complete match            = %6.2f\n",
1309 | 	   (sentn>0?100.0*TOT40_comp_sent/sentn:0.0));
1310 |     printf("Average crossing          = %6.2f\n",
1311 | 	   (sentn>0?1.0*TOT40_crossing/sentn:0.0));
1312 |     printf("No crossing               = %6.2f\n",
1313 | 	   (sentn>0?100.0*TOT40_no_crossing/sentn:0.0));
1314 |     printf("2 or less crossing        = %6.2f\n",
1315 | 	   (sentn>0?100.0*TOT40_2L_crossing/sentn:0.0));
1316 |     printf("Tagging accuracy          = %6.2f\n",
1317 | 	   (TOT40_word>0?100.0*TOT40_correct_tag/TOT40_word:0.0));
1318 | 
1319 | }
1320 | 
1321 | 
1322 | /*--------------------------------*/
1323 | /* display individual information */
1324 | /*--------------------------------*/
1325 | void
1326 | dsp_info()
1327 | {
1328 |   int i, n;
1329 | 
1330 |   printf("-<1>---(wn1=%3d, bn1=%3d)-           ",wn1,bn1);
1331 |   printf("-<2>---(wn2=%3d, bn2=%3d)-\n",wn2,bn2);
1332 | 
1333 |   n = (wn1>wn2?wn1:wn2);
1334 | 
1335 |   for(i=0;i<n;i++){
1336 |       if(terminal1[i].word[0]!='\0'){
1337 | 	  printf("%3d : %d : %-6s  %-16s      ",i,terminal1[i].result,
1338 | 		 terminal1[i].label,terminal1[i].word);
1339 |       }else{
1340 | 	  printf("                                        ");
1341 |       }
1342 | 
1343 |       if(terminal2[i].word[0]!='\0'){
1344 | 	  printf("%3d : %d : %-6s  %-16s\n",i,terminal2[i].result,
1345 | 		 terminal2[i].label,terminal2[i].word);
1346 |       }else{
1347 | 	  printf("\n");
1348 |       }
1349 |   }
1350 |   printf("\n");
1351 | 
1352 |   n = (bn1>bn2?bn1:bn2);
1353 | 
1354 |   for(i=0;i<n;i++){
1355 |       if(bracket1[i].start != -1){
1356 | 	  printf("%3d : %d : %3d  %3d  %-6s      ",i,bracket1[i].result,
1357 | 	                            bracket1[i].start,bracket1[i].end,
1358 | 	                            bracket1[i].label);
1359 |       } else {
1360 | 	  printf("                                ");
1361 |       }
1362 | 
1363 |       if(bracket2[i].start != -1){
1364 | 	  printf("%3d : %d : %3d  %3d  %-6s\n",i,bracket2[i].result,
1365 |                                     bracket2[i].start,bracket2[i].end,
1366 | 	                            bracket2[i].label);
1367 |       } else {
1368 | 	  printf("\n");
1369 |       }
1370 |   }
1371 |   printf("\n");
1372 | 
1373 |   printf("========\n");
1374 | 
1375 | }
1376 | 
1377 | 
1378 | /*-----------------*/
1379 | /* some predicates */
1380 | /*-----------------*/
1381 | int
1382 | is_terminator(c)
1383 | char c;
1384 | {
1385 |     if(isspace(c) || c=='(' || c==')'){
1386 | 	return(1);
1387 |     }else{
1388 | 	return(0);
1389 |     }
1390 | }
1391 | 
1392 | int
1393 | is_deletelabel(s)
1394 | char *s;
1395 | {
1396 |     int i;
1397 | 
1398 |     for(i=0;i<Delete_label_n;i++){
1399 | 	if(strcmp(s,Delete_label[i])==0){
1400 | 	    return(1);
1401 | 	}
1402 |     }
1403 | 
1404 |     return(0);
1405 | }
1406 | 
1407 | int
1408 | is_deletelabel_for_length(s)
1409 | char *s;
1410 | {
1411 |     int i;
1412 | 
1413 |     for(i=0;i<Delete_label_for_length_n;i++){
1414 | 	if(strcmp(s,Delete_label_for_length[i])==0){
1415 | 	    return(1);
1416 | 	}
1417 |     }
1418 | 
1419 |     return(0);
1420 | }
1421 | 
1422 | int
1423 | is_quote_term(s,w)
1424 | char *s;
1425 | char *w;
1426 | {
1427 |     int i;
1428 | 
1429 |     for(i=0;i<Quote_term_n;i++){
1430 | 		if(strcmp(s,Quote_term[i])==0){
1431 | 			if (strcmp(w,"'")==0 || strcmp(w,"\"")==0 || strcmp(w,"/")==0)
1432 | 	    	return(1);
1433 | 		}
1434 |     }
1435 | 
1436 |     return(0);
1437 | }
1438 | 
1439 | 
1440 | /*---------------*/
1441 | /* compare words */
1442 | /*---------------*/
1443 | int
1444 | word_comp(s1,s2)
1445 | char *s1,*s2;
1446 | {
1447 |     int i;
1448 | 
1449 |     if(strcmp(s1,s2)==0){
1450 | 	return(1);
1451 |     }
1452 | 
1453 |     for(i=0;i<EQ_word_n;i++){
1454 | 	if((strcmp(s1,EQ_word[i].s1)==0 &&
1455 | 	    strcmp(s2,EQ_word[i].s2)==0) ||
1456 | 	   (strcmp(s1,EQ_word[i].s2)==0 &&
1457 | 	    strcmp(s2,EQ_word[i].s1)==0)){
1458 | 	    return(1);
1459 | 	}
1460 |     }
1461 | 
1462 |     return(0);
1463 | }
1464 | 
1465 | /*----------------*/
1466 | /* compare labels */
1467 | /*----------------*/
1468 | int
1469 | label_comp(s1,s2)
1470 | char *s1,*s2;
1471 | {
1472 |     int i;
1473 | 
1474 |     if(strcmp(s1,s2)==0){
1475 | 	return(1);
1476 |     }
1477 | 
1478 |     for(i=0;i<EQ_label_n;i++){
1479 | 	if((strcmp(s1,EQ_label[i].s1)==0 &&
1480 | 	    strcmp(s2,EQ_label[i].s2)==0) ||
1481 | 	   (strcmp(s1,EQ_label[i].s2)==0 &&
1482 | 	    strcmp(s2,EQ_label[i].s1)==0)){
1483 | 	    return(1);
1484 | 	}
1485 |     }
1486 | 
1487 |     return(0);
1488 | }
1489 | 
1490 | 
1491 | /*--------*/
1492 | /* errors */
1493 | /*--------*/
1494 | void
1495 | Error(s,arg1,arg2,arg3)
1496 | char *s, *arg1, *arg2, *arg3;
1497 | {
1498 |     Status = 1;
1499 |     fprintf(stderr,"%d : ",Line);
1500 |     fprintf(stderr,s,arg1,arg2,arg3);
1501 |     if(Error_count++>Max_error){
1502 | 	exit(1);
1503 |     }
1504 | }
1505 | 
1506 | 
1507 | /*---------------------*/
1508 | /* fatal error to exit */
1509 | /*---------------------*/
1510 | void
1511 | Fatal(s,arg1,arg2,arg3)
1512 | char *s, *arg1, *arg2, *arg3;
1513 | {
1514 |     fprintf(stderr,s,arg1,arg2,arg3);
1515 |     exit(1);
1516 | }
1517 | 
1518 | 
1519 | /*-------*/
1520 | /* Usage */
1521 | /*-------*/
1522 | void
1523 | Usage()
1524 | {
1525 |   fprintf(stderr," evalb [-dDh][-c n][-e n][-p param_file] gold-file test-file  \n");
1526 |   fprintf(stderr,"                                                         \n");
1527 |   fprintf(stderr,"    Evaluate bracketing in test-file against gold-file.  \n");
1528 |   fprintf(stderr,"    Return recall, precision, F-Measure, tag accuracy.              \n");
1529 |   fprintf(stderr,"                                                         \n");
1530 |   fprintf(stderr,"  <option>                                               \n");
1531 |   fprintf(stderr,"    -d             debug mode                            \n");
1532 |   fprintf(stderr,"    -D             debug mode plus bracketing info       \n");
1533 |   fprintf(stderr,"    -c n           cut-off length forstatistics (def.=40)\n");
1534 |   fprintf(stderr,"    -e n           number of error to kill (default=10)  \n");
1535 |   fprintf(stderr,"    -p param_file  parameter file                        \n");
1536 |   fprintf(stderr,"    -h    help                                           \n");
1537 | }
1538 | 


--------------------------------------------------------------------------------
/EVALB/new.prm:
--------------------------------------------------------------------------------
 1 | ##------------------------------------------##
 2 | ## Debug mode                               ##
 3 | ##   0: No debugging                        ##
 4 | ##   1: print data for individual sentence  ##
 5 | ##   2: print detailed bracketing info      ##
 6 | ##------------------------------------------##
 7 | DEBUG 0
 8 | 
 9 | ##------------------------------------------##
10 | ## MAX error                                ##
11 | ##    Number of error to stop the process.  ##
12 | ##    This is useful if there could be      ##
13 | ##    tokanization error.                   ##
14 | ##    The process will stop when this number##
15 | ##    of errors are accumulated.            ##
16 | ##------------------------------------------##
17 | MAX_ERROR 10
18 | 
19 | ##------------------------------------------##
20 | ## Cut-off length for statistics            ##
21 | ##    At the end of evaluation, the         ##
22 | ##    statistics for the senetnces of length##
23 | ##    less than or equal to this number will##
24 | ##    be shown, on top of the statistics    ##
25 | ##    for all the sentences                 ##
26 | ##------------------------------------------##
27 | CUTOFF_LEN 40
28 | 
29 | ##------------------------------------------##
30 | ## unlabeled or labeled bracketing          ##
31 | ##    0: unlabeled bracketing               ##
32 | ##    1: labeled bracketing                 ##
33 | ##------------------------------------------##
34 | LABELED 1                 
35 | 
36 | ##------------------------------------------##
37 | ## Delete labels                            ##
38 | ##    list of labels to be ignored.         ##
39 | ##    If it is a pre-terminal label, delete ##
40 | ##    the word along with the brackets.     ##
41 | ##    If it is a non-terminal label, just   ##
42 | ##    delete the brackets (don't delete     ##
43 | ##    deildrens).                           ##
44 | ##------------------------------------------##
45 | DELETE_LABEL TOP
46 | DELETE_LABEL S1
47 | DELETE_LABEL -NONE-
48 | DELETE_LABEL ,
49 | DELETE_LABEL :
50 | DELETE_LABEL ``
51 | DELETE_LABEL ''
52 | DELETE_LABEL .
53 | DELETE_LABEL ?
54 | DELETE_LABEL !
55 | 
56 | ##------------------------------------------##
57 | ## Delete labels for length calculation     ##
58 | ##    list of labels to be ignored for      ##
59 | ##    length calculation purpose            ##
60 | ##------------------------------------------##
61 | DELETE_LABEL_FOR_LENGTH -NONE-
62 | 
63 | ##------------------------------------------##
64 | ## Labels to be considered for misquote     ##
65 | ##    (could be possesive or quote)         ##
66 | ##------------------------------------------##
67 | QUOTE_LABEL ``
68 | QUOTE_LABEL ''
69 | QUOTE_LABEL POS
70 | 
71 | ##------------------------------------------##
72 | ## These ones are less common, but          ##
73 | ##    are on occasion output by parsers:    ##      
74 | ##------------------------------------------##
75 | QUOTE_LABEL NN
76 | QUOTE_LABEL CD
77 | QUOTE_LABEL VBZ
78 | QUOTE_LABEL :
79 | 
80 | ##------------------------------------------##
81 | ## Equivalent labels, words                 ##
82 | ##     the pairs are considered equivalent  ##
83 | ##     This is non-directional.             ##
84 | ##------------------------------------------##
85 | EQ_LABEL ADVP PRT
86 | 
87 | # EQ_WORD  Example example
88 | 


--------------------------------------------------------------------------------
/EVALB/sample/sample.gld:
--------------------------------------------------------------------------------
 1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 2 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 3 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 4 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 5 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 6 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 7 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 8 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 9 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
10 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
11 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
12 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
13 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
14 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
15 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
16 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
19 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
21 | (S (A-SBJ-1 (P this)) (B-WHATEVER (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 | 


--------------------------------------------------------------------------------
/EVALB/sample/sample.prm:
--------------------------------------------------------------------------------
 1 | ##------------------------------------------##
 2 | ## Debug mode                               ##
 3 | ##   print out data for individual sentence ##
 4 | ##------------------------------------------##
 5 | DEBUG 0
 6 | 
 7 | ##------------------------------------------##
 8 | ## MAX error                                ##
 9 | ##    Number of error to stop the process.  ##
10 | ##    This is useful if there could be      ##
11 | ##    tokanization error.                   ##
12 | ##    The process will stop when this number##
13 | ##    of errors are accumulated.            ##
14 | ##------------------------------------------##
15 | MAX_ERROR 10
16 | 
17 | ##------------------------------------------##
18 | ## Cut-off length for statistics            ##
19 | ##    At the end of evaluation, the         ##
20 | ##    statistics for the senetnces of length##
21 | ##    less than or equal to this number will##
22 | ##    be shown, on top of the statistics    ##
23 | ##    for all the sentences                 ##
24 | ##------------------------------------------##
25 | CUTOFF_LEN 40
26 | 
27 | ##------------------------------------------##
28 | ## unlabeled or labeled bracketing          ##
29 | ##    0: unlabeled bracketing               ##
30 | ##    1: labeled bracketing                 ##
31 | ##------------------------------------------##
32 | LABELED 1                 
33 | 
34 | ##------------------------------------------##
35 | ## Delete labels                            ##
36 | ##    list of labels to be ignored.         ##
37 | ##    If it is a pre-terminal label, delete ##
38 | ##    the word along with the brackets.     ##
39 | ##    If it is a non-terminal label, just   ##
40 | ##    delete the brackets (don't delete     ##
41 | ##    deildrens).                           ##
42 | ##------------------------------------------##
43 | DELETE_LABEL TOP
44 | DELETE_LABEL -NONE-
45 | DELETE_LABEL ,
46 | DELETE_LABEL :
47 | DELETE_LABEL ``
48 | DELETE_LABEL ''
49 | 
50 | ##------------------------------------------##
51 | ## Delete labels for length calculation     ##
52 | ##    list of labels to be ignored for      ##
53 | ##    length calculation purpose            ##
54 | ##------------------------------------------##
55 | DELETE_LABEL_FOR_LENGTH -NONE-
56 | 
57 | 
58 | ##------------------------------------------##
59 | ## Equivalent labels, words                 ##
60 | ##     the pairs are considered equivalent  ##
61 | ##     This is non-directional.             ##
62 | ##------------------------------------------##
63 | EQ_LABEL T TT
64 | 
65 | EQ_WORD  This this
66 | 


--------------------------------------------------------------------------------
/EVALB/sample/sample.rsl:
--------------------------------------------------------------------------------
 1 |   Sent.                        Matched  Bracket   Cross        Correct Tag
 2 |  ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
 3 | ============================================================================
 4 |    1    4    0  100.00 100.00     4      4    4      0      4     4   100.00
 5 |    2    4    0   75.00  75.00     3      4    4      0      4     4   100.00
 6 |    3    4    0  100.00 100.00     4      4    4      0      4     3    75.00
 7 |    4    4    0   75.00  75.00     3      4    4      0      4     3    75.00
 8 |    5    4    0   75.00  75.00     3      4    4      0      4     4   100.00
 9 |    6    4    0   50.00  66.67     2      4    3      1      4     4   100.00
10 |    7    4    0   25.00 100.00     1      4    1      0      4     4   100.00
11 |    8    4    0    0.00   0.00     0      4    0      0      4     4   100.00
12 |    9    4    0  100.00  80.00     4      4    5      0      4     4   100.00
13 |   10    4    0  100.00  50.00     4      4    8      0      4     4   100.00
14 |   11    4    2    0.00   0.00     0      0    0      0      4     0     0.00
15 |   12    4    1    0.00   0.00     0      0    0      0      4     0     0.00
16 |   13    4    1    0.00   0.00     0      0    0      0      4     0     0.00
17 |   14    4    2    0.00   0.00     0      0    0      0      4     0     0.00
18 |   15    4    0  100.00 100.00     4      4    4      0      4     4   100.00
19 |   16    4    1    0.00   0.00     0      0    0      0      4     0     0.00
20 |   17    4    1    0.00   0.00     0      0    0      0      4     0     0.00
21 |   18    4    0  100.00 100.00     4      4    4      0      4     4   100.00
22 |   19    4    0  100.00 100.00     4      4    4      0      4     4   100.00
23 |   20    4    1    0.00   0.00     0      0    0      0      4     0     0.00
24 |   21    4    0  100.00 100.00     4      4    4      0      4     4   100.00
25 |   22   44    0  100.00 100.00    34     34   34      0     44    44   100.00
26 |   23    4    0  100.00 100.00     4      4    4      0      4     4   100.00
27 |   24    5    0  100.00 100.00     4      4    4      0      4     4   100.00
28 | ============================================================================
29 |                  87.76  90.53     86     98   95     16    108   106    98.15
30 | === Summary ===
31 | 
32 | -- All --
33 | Number of sentence        =     24
34 | Number of Error sentence  =      5
35 | Number of Skip  sentence  =      2
36 | Number of Valid sentence  =     17
37 | Bracketing Recall         =  87.76
38 | Bracketing Precision      =  90.53
39 | Complete match            =  52.94
40 | Average crossing          =   0.06
41 | No crossing               =  94.12
42 | 2 or less crossing        = 100.00
43 | Tagging accuracy          =  98.15
44 | 
45 | -- len<=40 --
46 | Number of sentence        =     23
47 | Number of Error sentence  =      5
48 | Number of Skip  sentence  =      2
49 | Number of Valid sentence  =     16
50 | Bracketing Recall         =  81.25
51 | Bracketing Precision      =  85.25
52 | Complete match            =  50.00
53 | Average crossing          =   0.06
54 | No crossing               =  93.75
55 | 2 or less crossing        = 100.00
56 | Tagging accuracy          =  96.88
57 | 


--------------------------------------------------------------------------------
/EVALB/sample/sample.tst:
--------------------------------------------------------------------------------
 1 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
 2 | (S (A (P this)) (B (Q is) (C (R a) (T test))))
 3 | (S (A (P this)) (B (Q is) (A (R a) (U test))))
 4 | (S (C (P this)) (B (Q is) (A (R a) (U test))))
 5 | (S (A (P this)) (B (Q is) (R a) (A (T test))))
 6 | (S (A (P this) (Q is)) (A (R a) (T test)))
 7 | (S (P this) (Q is) (R a) (T test))
 8 | (P this) (Q is) (R a) (T test)
 9 | (S (A (P this)) (B (Q is) (A (A (R a) (T test)))))
10 | (S (A (P this)) (B (Q is) (A (A (A (A (A (R a) (T test))))))))
11 | 
12 | (S (A (P this)) (B (Q was) (A (A (R a) (T test)))))
13 | (S (A (P this)) (B (Q is) (U not) (A (A (R a) (T test)))))
14 | 
15 | (TOP (S (A (P this)) (B (Q is) (A (R a) (T test)))))
16 | (S (A (P this)) (NONE *) (B (Q is) (A (R a) (T test))))
17 | (S (A (P this)) (S (NONE abc) (A (NONE *))) (B (Q is) (A (R a) (T test))))
18 | (S (A (P this)) (B (Q is) (A (R a) (TT test))))
19 | (S (A (P This)) (B (Q is) (A (R a) (T test))))
20 | (S (A (P That)) (B (Q is) (A (R a) (T test))))
21 | (S (A (P this)) (B (Q is) (A (R a) (T test))))
22 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))) (A (P this)) (B (Q is) (A (R a) (T test))))
23 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (-NONE- *))
24 | (S (A (P this)) (B (Q is) (A (R a) (T test))) (: *))
25 | 


--------------------------------------------------------------------------------
/EVALB/tgrep_proc.prl:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/perl
 2 | 
 3 | while(<>)
 4 | {
 5 |     if(m/TOP/) #skip lines which are blank
 6 |     {
 7 | 	print;
 8 |     } 
 9 | }
10 | 


--------------------------------------------------------------------------------
/EVALB_SPMRL/Makefile:
--------------------------------------------------------------------------------
 1 | TESTFILE=dev.Arabic.gold.ptb.-feat
 2 | JAVA=java
 3 | 
 4 | 
 5 | all: evalb
 6 | 
 7 | 
 8 | clean:
 9 | 	rm -f evalb_spmrl
10 | 	
11 | install: evalb
12 | 	cp evalb_spmrl /usr/local/bin
13 | 
14 | 
15 | 
16 | evalb: evalb.c
17 | 	gcc -Wall -O3 -g -o evalb_spmrl evalb.c
18 | 	
19 | evalb_linux: evalb.c
20 | 	gcc -Wall -fPIC -O3 -g -o evalb_spmrl evalb.c
21 | #to compile on linux
22 | 
23 |           
24 | 
25 | # note: on the original makefile, the funsigned-char option was applied
26 | 
27 | home: install_home
28 | 
29 | install_home: all
30 | 	cp evalb_spmrl ${PREFIX}/bin
31 | 	
32 | up:
33 | 	tar zcvf ../evalb_spmrl2013.tar.gz ../evalb_spmrl2013/
34 | 	putW ../evalb_spmrl2013.tar.gz
35 | 
36 | 
37 | 
38 | #################################
39 | # stuff to debug some treebanks #
40 | #################################	
41 | test_full: all
42 | 	./evalb dev.Arabic.gold.ptb dev.Arabic.gold.ptb
43 | 
44 | 
45 | test: all
46 | 	./evalb -p ./new.prm ${TESTFILE} ${TESTFILE}
47 | 
48 | debug: all
49 | 	./evalb -D ${TESTFILE} ${TESTFILE}
50 | 	echo "./evalb -D ${TESTFILE} ${TESTFILE}"
51 | 
52 | debug_one: all
53 | 	lines 616 < ${TESTFILE} > ${TESTFILE}.616
54 | 	./evalb -D ${TESTFILE}.616 ${TESTFILE}.616
55 | 	echo "./evalb -D ${TESTFILE}.616 ${TESTFILE}.616"	
56 | 
57 | releaf:
58 | 	./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug
59 | 	echo "./evalb -D dev.Arabic.gold.ptb.-feat.616.bug dev.Arabic.gold.ptb.-feat.616.bug" > /dev/stderr
60 | 
61 | java: 
62 | 	${JAVA} -jar ./evalC/evalC.jar ${TESTFILE} ${TESTFILE} /dev/stdout
63 | 		
64 | 	
65 | 		


--------------------------------------------------------------------------------
/EVALB_SPMRL/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgaddy/parser-analysis/1033017fef59090a48cf25210f7d43e3ff913a4c/EVALB_SPMRL/README


--------------------------------------------------------------------------------
/EVALB_SPMRL/README.orig:
--------------------------------------------------------------------------------
  1 | #################################################################
  2 | #                                                               #
  3 | #      README file for evalb                                    #
  4 | #                                                               #
  5 | #                                         Satoshi Sekine (NYU)  #
  6 | #                                         Mike Collins (UPenn)  #
  7 | #                                                               #
  8 | #                                         October.1997          #
  9 | #################################################################
 10 | 
 11 | Contents of this README:
 12 | 
 13 |    [0] COPYRIGHT
 14 |    [1] INTRODUCTION
 15 |    [2] INSTALLATION AND RUN
 16 |    [3] OPTIONS
 17 |    [4] OUTPUT FORMAT FROM THE SCORER
 18 |    [5] HOW TO CREATE A GOLDFILE FROM THE TREEBANK
 19 |    [6] THE PARAMETER FILE
 20 |    [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
 21 | 
 22 | 
 23 | [0] COPYRIGHT
 24 | 
 25 | The authors abandon the copyright of this program. Everyone is 
 26 | permitted to copy and distribute the program or a portion of the program
 27 | with no charge and no restrictions unless it is harmful to someone.
 28 | 
 29 | However, the authors are delightful for the user's kindness of proper
 30 | usage and letting the authors know bugs or problems.
 31 | 
 32 | This software is provided "AS IS", and the authors make no warranties,
 33 | express or implied.
 34 | 
 35 | 
 36 | [1] INTRODUCTION
 37 | 
 38 | Evaluation of bracketing looks simple, but in fact, there are minor
 39 | differences from system to system. This is a program to parametarize
 40 | such minor differences and to give an informative result.
 41 | 
 42 | "evalb" evaluates bracketing accuracy in a test-file against a gold-file.
 43 | It returns recall, precision, tagging accuracy. It uses an identical 
 44 | algorithm to that used in (Collins ACL97).
 45 | 
 46 | 
 47 | [2] Installation and Run
 48 | 
 49 | To compile the scorer, type 
 50 | 
 51 | > make
 52 | 
 53 | 
 54 | To run the scorer:
 55 | 
 56 | > evalb -p Parameter_file Gold_file Test_file
 57 | 
 58 |  
 59 | For example to use the sample files:
 60 | 
 61 | > evalb -p sample.prm sample.gld sample.tst
 62 | 
 63 | 
 64 | 
 65 | [3] OPTIONS
 66 | 
 67 | You can specify system parameters in the command line options.
 68 | Other options concerning to evaluation metrix should be specified
 69 | in parameter file, described later.
 70 | 
 71 |         -p param_file  parameter file                        
 72 |         -d             debug mode                            
 73 |         -e n           number of error to kill (default=10)  
 74 |         -h             help                                  
 75 | 
 76 | 
 77 | 
 78 | [4] OUTPUT FORMAT FROM THE SCORER
 79 | 
 80 | The scorer gives individual scores for each sentence, for
 81 | example:
 82 | 
 83 |   Sent.                        Matched  Bracket   Cross        Correct Tag
 84 |  ID  Len.  Stat. Recal  Prec.  Bracket gold test Bracket Words  Tags Accracy
 85 | ============================================================================
 86 |    1    8    0  100.00 100.00     5      5    5      0      6     5    83.33
 87 | 
 88 | At the end of the output the === Summary === section gives statistics 
 89 | for all sentences, and for sentences <=40 words in length. The summary
 90 | contains the following information:
 91 | 
 92 | i)   Number of sentences -- total number of sentences.
 93 | 
 94 | ii)  Number of Error/Skip sentences -- should both be 0 if there is no
 95 |     problem with the parsed/gold files.
 96 | 
 97 | iii) Number of valid sentences = Number of sentences - Number of Error/Skip
 98 |     sentences 
 99 | 
100 | iv)  Bracketing recall =     (number of correct constituents)
101 |                          ----------------------------------------
102 |                          (number of constituents in the goldfile)
103 | 
104 | v)   Bracketing precision = (number of correct constituents)
105 |                          ----------------------------------------
106 |                          (number of constituents in the parsed file)
107 | 
108 | vi)  Complete match = percentaage of sentences where recall and precision are
109 |     both 100%. 
110 | 
111 | vii) Average crossing = (number of constituents crossing a goldfile constituen
112 |                          ----------------------------------------------------
113 |                                         (number of sentences)
114 | 
115 | viii) No crossing = percentage of sentences which have 0 crossing brackets.
116 | 
117 | ix)   2 or less crossing = percentage of sentences which have <=2 crossing brackets.
118 | 
119 | x)    Tagging accuracy = percentage of correct POS tags (but see [5].3 for exact
120 |      details of what is counted).
121 | 
122 | 
123 | 
124 | [5] HOW TO CREATE A GOLDFILE FROM THE PENN TREEBANK
125 | 
126 | 
127 | The gold and parsed files are in a format similar to this:
128 | 
129 | (TOP (S (INTJ (RB No)) (, ,) (NP (PRP it)) (VP (VBD was) (RB n't) (NP (NNP Black) (NNP Monday))) (. .)))
130 | 
131 | To create a gold file from the treebank:
132 | 
133 | tgrep -wn '/.*/' | tgrep_proc.prl 
134 | 
135 | will produce a goldfile in the required format.  ("tgrep -wn '/.*/'" prints
136 | parse trees, "tgrep_process.prl" just skips blank lines).
137 | 
138 | For example, to produce a goldfile for section 23 of the treebank:
139 | 
140 | tgrep -wn '/.*/' | tail +90895 | tgrep_process.prl | sed 2416q > sec23.gold
141 | 
142 | 
143 | 
144 | [6] THE PARAMETER (.prm) FILE
145 | 
146 | 
147 | The .prm file sets options regarding the scoring method. COLLINS.prm gives
148 | the same scoring behaviour as the scorer used in (Collins 97). The options 
149 | chosen were: 
150 | 
151 | 1) LABELED 1
152 | 
153 | to give labelled precision/recall figures, i.e. a constituent must have the
154 | same span *and* label as a constituent in the goldfile.
155 | 
156 | 2) DELETE_LABEL TOP   
157 | 
158 | Don't count the "TOP" label (which is always given in the output of tgrep) 
159 | when scoring. 
160 | 
161 | 3) DELETE_LABEL -NONE-  
162 | 
163 | Remove traces (and all constituents which dominate nothing but traces) when
164 | scoring. For example
165 | 
166 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
167 | 
168 | would be processed to give
169 | 
170 | .... (VP (VBD reported)) (. .)))
171 | 
172 | 
173 | 4)
174 | DELETE_LABEL ,     -- for the purposes of scoring remove punctuation
175 | DELETE_LABEL :
176 | DELETE_LABEL ``
177 | DELETE_LABEL ''
178 | DELETE_LABEL .
179 | 
180 | 5) DELETE_LABEL_FOR_LENGTH -NONE-   -- don't include traces when calculating
181 |                                        the length of a sentence (important
182 |                                        when classifying a sentence as <=40
183 |                                        words or >40 words)
184 | 
185 | 6) EQ_LABEL ADVP PRT
186 | 
187 | Count ADVP and PRT as being the same label when scoring.
188 | 
189 | 
190 | 
191 | 
192 | [7] MORE DETAILS ABOUT THE SCORING ALGORITHM
193 | 
194 | 
195 | 1) The scorer initially processes the files to remove all nodes specified
196 | by DELETE_LABEL in the .prm file. It also recursively removes nodes which
197 | dominate nothing due to all their children being removed. For example, if
198 | -NONE- is specified as a label to be deleted, 
199 | 
200 | .... (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1)))) (. .)))
201 | 
202 | would be processed to give
203 | 
204 | .... (VP (VBD reported)) (. .)))
205 | 
206 | 2) The scorer also removes all functional tags attached to non-terminals
207 | (functional tags are prefixed with "-" or "=" in the treebank). For example
208 | "NP-SBJ" is processed to give "NP", "NP=2" is changed to "NP".
209 | 
210 | 
211 | 3) Tagging accuracy counts tags for all words *except* any tags which are
212 | deleted by a DELETE_LABEL specification in the .prm file. (For example, for
213 | COLLINS.prm, punctuation tagged as "," ":" etc. would not be included).
214 | 
215 | 4) When calculating the length of a sentence, all words with POS tags not 
216 | included in the "DELETE_LABEL_FOR_LENGTH" list in the .prm file are
217 | counted. (For COLLINS.prm, only "-NONE-" is specified in this list, so
218 | traces are removed before calculating the length of the sentence).
219 | 
220 | 5) There are some subtleties in scoring when either the goldfile or parsed
221 | file contains multiple constituents for the same span which have the same
222 | non-terminal label. e.g. (NP (NP the man)) If the goldfile contains n 
223 | constituents for the same span, and the parsed file contains m constituents
224 | with that nonterminal, the scorer works as follows:
225 | 
226 | i) If m>n, then the precision is n/m, recall is 100%
227 | 
228 | ii) If n>m, then the precision is 100%, recall is m/n.
229 | 
230 | iii) If n==m, recall and precision are both 100%.
231 | 


--------------------------------------------------------------------------------
/EVALB_SPMRL/spmrl.prm:
--------------------------------------------------------------------------------
 1 | ##------------------------------------------##
 2 | ## Debug mode                               ##
 3 | ##   0: No debugging                        ##
 4 | ##   1: print data for individual sentence  ##
 5 | ##   2: print detailed bracketing info      ##
 6 | ##------------------------------------------##
 7 | DEBUG 0
 8 | 
 9 | ##------------------------------------------##
10 | ## MAX error                                ##
11 | ##    Number of error to stop the process.  ##
12 | ##    This is useful if there could be      ##
13 | ##    tokanization error.                   ##
14 | ##    The process will stop when this number##
15 | ##    of errors are accumulated.            ##
16 | ##------------------------------------------##
17 | MAX_ERROR 10000
18 | 
19 | ##------------------------------------------##
20 | ## Cut-off length for statistics            ##
21 | ##    At the end of evaluation, the         ##
22 | ##    statistics for the senetnces of length##
23 | ##    less than or equal to this number will##
24 | ##    be shown, on top of the statistics    ##
25 | ##    for all the sentences                 ##
26 | ##------------------------------------------##
27 | CUTOFF_LEN 70
28 | 
29 | ##------------------------------------------##
30 | ## unlabeled or labeled bracketing          ##
31 | ##    0: unlabeled bracketing               ##
32 | ##    1: labeled bracketing                 ##
33 | ##------------------------------------------##
34 | LABELED 1
35 | 
36 | ##------------------------------------------##
37 | ## Delete labels                            ##
38 | ##    list of labels to be ignored.         ##
39 | ##    If it is a pre-terminal label, delete ##
40 | ##    the word along with the brackets.     ##
41 | ##    If it is a non-terminal label, just   ##
42 | ##    delete the brackets (don't delete     ##
43 | ##    deildrens).                           ##
44 | ##------------------------------------------##
45 | DELETE_LABEL TOP
46 | DELETE_LABEL ROOT
47 | DELETE_LABEL S1  
48 | DELETE_LABEL -NONE-
49 | DELETE_LABEL VROOT 
50 | 
51 | #DELETE_LABEL ,
52 | #DELETE_LABEL :
53 | #DELETE_LABEL ``
54 | #DELETE_LABEL ''
55 | #DELETE_LABEL .
56 | #DELETE_LABEL ?
57 | #DELETE_LABEL !
58 | #DELETE_LABEL PONCT
59 | 
60 | ##------------------------------------------##
61 | ## Delete labels for length calculation     ##
62 | ##    list of labels to be ignored for      ##
63 | ##    length calculation purpose            ##
64 | ##------------------------------------------##
65 | DELETE_LABEL_FOR_LENGTH -NONE-
66 | 
67 | ##------------------------------------------##
68 | ## Labels to be considered for misquote     ##
69 | ##    (could be possesive or quote)         ##
70 | ##------------------------------------------##
71 | #QUOTE_LABEL ``
72 | #QUOTE_LABEL ''
73 | #QUOTE_LABEL POS
74 | 
75 | ##------------------------------------------##
76 | ## These ones are less common, but          ##
77 | ##    are on occasion output by parsers:    ##      
78 | ##------------------------------------------##
79 | #QUOTE_LABEL NN
80 | #QUOTE_LABEL CD
81 | #QUOTE_LABEL VBZ
82 | #QUOTE_LABEL :
83 | 
84 | ##------------------------------------------##
85 | ## Equivalent labels, words                 ##
86 | ##     the pairs are considered equivalent  ##
87 | ##     This is non-directional.             ##
88 | ##------------------------------------------##
89 | #EQ_LABEL ADVP PRT
90 | 
91 | # EQ_WORD  Example example
92 | 


--------------------------------------------------------------------------------
/EVALB_SPMRL/spmrl_hebrew.prm:
--------------------------------------------------------------------------------
  1 | ##------------------------------------------##
  2 | ## Debug mode                               ##
  3 | ##   0: No debugging                        ##
  4 | ##   1: print data for individual sentence  ##
  5 | ##   2: print detailed bracketing info      ##
  6 | ##------------------------------------------##
  7 | DEBUG 0
  8 | 
  9 | ##------------------------------------------##
 10 | ## MAX error                                ##
 11 | ##    Number of error to stop the process.  ##
 12 | ##    This is useful if there could be      ##
 13 | ##    tokanization error.                   ##
 14 | ##    The process will stop when this number##
 15 | ##    of errors are accumulated.            ##
 16 | ##------------------------------------------##
 17 | MAX_ERROR 10000
 18 | 
 19 | ##------------------------------------------##
 20 | ## Cut-off length for statistics            ##
 21 | ##    At the end of evaluation, the         ##
 22 | ##    statistics for the senetnces of length##
 23 | ##    less than or equal to this number will##
 24 | ##    be shown, on top of the statistics    ##
 25 | ##    for all the sentences                 ##
 26 | ##------------------------------------------##
 27 | CUTOFF_LEN 40
 28 | 
 29 | ##------------------------------------------##
 30 | ## unlabeled or labeled bracketing          ##
 31 | ##    0: unlabeled bracketing               ##
 32 | ##    1: labeled bracketing                 ##
 33 | ##------------------------------------------##
 34 | LABELED 1
 35 | 
 36 | ##------------------------------------------##
 37 | ## Delete labels                            ##
 38 | ##    list of labels to be ignored.         ##
 39 | ##    If it is a pre-terminal label, delete ##
 40 | ##    the word along with the brackets.     ##
 41 | ##    If it is a non-terminal label, just   ##
 42 | ##    delete the brackets (don't delete     ##
 43 | ##    deildrens).                           ##
 44 | ##------------------------------------------##
 45 | DELETE_LABEL TOP
 46 | DELETE_LABEL ROOT
 47 | DELETE_LABEL S1  
 48 | DELETE_LABEL -NONE-
 49 | DELETE_LABEL VROOT 
 50 | #DELETE_LABEL SENT
 51 | 
 52 | #DELETE_LABEL ,
 53 | #DELETE_LABEL :
 54 | #DELETE_LABEL ``
 55 | #DELETE_LABEL ''
 56 | #DELETE_LABEL .
 57 | #DELETE_LABEL ?
 58 | #DELETE_LABEL !
 59 | #DELETE_LABEL PONCT
 60 | 
 61 | ##------------------------------------------##
 62 | ## Delete labels for length calculation     ##
 63 | ##    list of labels to be ignored for      ##
 64 | ##    length calculation purpose            ##
 65 | ##------------------------------------------##
 66 | DELETE_LABEL_FOR_LENGTH -NONE-
 67 | 
 68 | ##------------------------------------------##
 69 | ## Labels to be considered for misquote     ##
 70 | ##    (could be possesive or quote)         ##
 71 | ##------------------------------------------##
 72 | #QUOTE_LABEL ``
 73 | #QUOTE_LABEL ''
 74 | #QUOTE_LABEL POS
 75 | 
 76 | ##------------------------------------------##
 77 | ## These ones are less common, but          ##
 78 | ##    are on occasion output by parsers:    ##      
 79 | ##------------------------------------------##
 80 | #QUOTE_LABEL NN
 81 | #QUOTE_LABEL CD
 82 | #QUOTE_LABEL VBZ
 83 | #QUOTE_LABEL :
 84 | 
 85 | ##------------------------------------------##
 86 | ## Equivalent labels, words                 ##
 87 | ##     the pairs are considered equivalent  ##
 88 | ##     This is non-directional.             ##
 89 | ##------------------------------------------##
 90 | #EQ_LABEL ADVP PRT
 91 | 
 92 | # EQ_WORD  Example example
 93 | DELETE_LABEL SYN_NN
 94 | DELETE_LABEL SYN_NNP
 95 | DELETE_LABEL SYN_NNT
 96 | DELETE_LABEL SYN_PRP
 97 | DELETE_LABEL SYN_JJ
 98 | DELETE_LABEL SYN_JJT
 99 | DELETE_LABEL SYN_RB
100 | DELETE_LABEL SYN_RBR
101 | DELETE_LABEL SYN_MOD
102 | DELETE_LABEL SYN_VB
103 | DELETE_LABEL SYN_AUX
104 | DELETE_LABEL SYN_AGR
105 | DELETE_LABEL SYN_IN
106 | DELETE_LABEL SYN_COM
107 | DELETE_LABEL SYN_REL
108 | DELETE_LABEL SYN_CC
109 | DELETE_LABEL SYN_QW
110 | DELETE_LABEL SYN_HAM
111 | DELETE_LABEL SYN_WDT
112 | DELETE_LABEL SYN_DT
113 | DELETE_LABEL SYN_CD
114 | DELETE_LABEL SYN_CDT
115 | DELETE_LABEL SYN_AT
116 | DELETE_LABEL SYN_H
117 | DELETE_LABEL SYN_FL
118 | DELETE_LABEL SYN_ZVL
119 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Mitchell Stern
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Constituency Parser Analysis
 2 | 
 3 | This repository contains code necessary to reproduce experiments in *What's Going On in Neural Constituency Parsers? An Analysis* from NAACL 2018.
 4 | 
 5 | If you are looking for a parser implementation and not the analysis, we recommend you instead use the code from [Mitchell's repository](https://github.com/mitchellstern/minimal-span-parser), which also includes the model improvements described in the paper.
 6 | 
 7 | ## Requirements and Setup
 8 | 
 9 | * Python 3.5 or higher.
10 | * [DyNet](https://github.com/clab/dynet). We recommend installing DyNet from source with MKL support for significantly faster run time.
11 | * [EVALB](http://nlp.cs.nyu.edu/evalb/). Before starting, run `make` inside the `EVALB/` directory to compile an `evalb` executable. This will be called from Python for evaluation.
12 | 
13 | ## Command Line Arguments
14 | 
15 | The base model can be trained with the command:
16 | ```
17 | python3 src/main.py train --parser-type chart --model-path-base models/base-model
18 | ```
19 | The dev score will be appended to the model file name in the form `_dev=xx.xx`, where each `x` is replaced with a digit, so this will need to be specified when running the program with an already trained model as is done for some experiments.
20 | 
21 | The following table describes the command line arguments to run each experiment in the paper:
22 | 
23 | Paper section | Arguments
24 | --- | ---
25 | 3.1 | Run `python3 src/main.py train-label --model-path-base models/base-model_dev=xx.xx`.
26 | 3.2 | Use the base model command with `--parser-type independent` instead of `chart`.
27 | 4.1 | Add the option `--embedding-type` with combinations of the characters w, t, and c for word, tag, and character (e.g. `--embedding-type wt`).  For character only, we recommend using `--char-lstm-dim 250` as well.
28 | 5.1 | Run `python3 src/main.py derivative --model-path-base models/base-model_dev=xx.xx`.
29 | 5.2 | Add `--lstm-type truncated --lstm-context-size 3` to the base model command and use different values for the context size.
30 | 5.3 | Add `--lstm-type shuffled --lstm-context-size 3`.
31 | 5.4 | Add `--lstm-type no-lstm --lstm-context-size 3 --no-lstm-hidden-dims 1000`.
32 | 
33 | To run on the test set, use
34 | ```
35 | python3 src/main.py test --model-path-base models/base-model_dev=xx.xx
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/src/evaluate.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os.path
  3 | import re
  4 | import subprocess
  5 | import tempfile
  6 | from collections import Counter
  7 | 
  8 | import trees
  9 | 
 10 | class FScore(object):
 11 |     def __init__(self, recall, precision, fscore):
 12 |         self.recall = recall
 13 |         self.precision = precision
 14 |         self.fscore = fscore
 15 | 
 16 |     def __str__(self):
 17 |         return "(Recall={:.2f}, Precision={:.2f}, FScore={:.2f})".format(
 18 |             self.recall, self.precision, self.fscore)
 19 | 
 20 | def evalb(evalb_dir, gold_trees, predicted_trees, ref_gold_path=None):
 21 |     assert os.path.exists(evalb_dir)
 22 |     evalb_program_path = os.path.join(evalb_dir, "evalb")
 23 |     evalb_spmrl_program_path = os.path.join(evalb_dir, "evalb_spmrl")
 24 |     assert os.path.exists(evalb_program_path) or os.path.exists(evalb_spmrl_program_path)
 25 | 
 26 |     if os.path.exists(evalb_program_path):
 27 |         evalb_param_path = os.path.join(evalb_dir, "COLLINS.prm")
 28 |     else:
 29 |         evalb_program_path = evalb_spmrl_program_path
 30 |         evalb_param_path = os.path.join(evalb_dir, "spmrl.prm")
 31 | 
 32 |     assert os.path.exists(evalb_program_path)
 33 |     assert os.path.exists(evalb_param_path)
 34 | 
 35 |     assert len(gold_trees) == len(predicted_trees)
 36 |     for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
 37 |         assert isinstance(gold_tree, trees.TreebankNode)
 38 |         assert isinstance(predicted_tree, trees.TreebankNode)
 39 |         gold_leaves = list(gold_tree.leaves())
 40 |         predicted_leaves = list(predicted_tree.leaves())
 41 |         assert len(gold_leaves) == len(predicted_leaves)
 42 |         assert all(
 43 |             gold_leaf.word == predicted_leaf.word
 44 |             for gold_leaf, predicted_leaf in zip(gold_leaves, predicted_leaves))
 45 | 
 46 |     temp_dir = tempfile.TemporaryDirectory(prefix="evalb-")
 47 |     gold_path = os.path.join(temp_dir.name, "gold.txt")
 48 |     predicted_path = os.path.join(temp_dir.name, "predicted.txt")
 49 |     output_path = os.path.join(temp_dir.name, "output.txt")
 50 | 
 51 |     with open(gold_path, "w") as outfile:
 52 |         if ref_gold_path is None:
 53 |             for tree in gold_trees:
 54 |                 outfile.write("{}\n".format(tree.linearize()))
 55 |         else:
 56 |             # For the SPMRL dataset our data loader performs some modifications
 57 |             # (like stripping morphological features), so we compare to the
 58 |             # raw gold file to be certain that we haven't spoiled the evaluation
 59 |             # in some way.
 60 |             with open(ref_gold_path) as goldfile:
 61 |                 outfile.write(goldfile.read())
 62 | 
 63 |     with open(predicted_path, "w") as outfile:
 64 |         for tree in predicted_trees:
 65 |             outfile.write("{}\n".format(tree.linearize()))
 66 | 
 67 |     command = "{} -p {} {} {} > {}".format(
 68 |         evalb_program_path,
 69 |         evalb_param_path,
 70 |         gold_path,
 71 |         predicted_path,
 72 |         output_path,
 73 |     )
 74 |     subprocess.run(command, shell=True)
 75 | 
 76 |     fscore = FScore(math.nan, math.nan, math.nan)
 77 |     with open(output_path) as infile:
 78 |         for line in infile:
 79 |             match = re.match(r"Bracketing Recall\s+=\s+(\d+\.\d+)", line)
 80 |             if match:
 81 |                 fscore.recall = float(match.group(1))
 82 |             match = re.match(r"Bracketing Precision\s+=\s+(\d+\.\d+)", line)
 83 |             if match:
 84 |                 fscore.precision = float(match.group(1))
 85 |             match = re.match(r"Bracketing FMeasure\s+=\s+(\d+\.\d+)", line)
 86 |             if match:
 87 |                 fscore.fscore = float(match.group(1))
 88 |                 break
 89 | 
 90 |     success = (
 91 |         not math.isnan(fscore.fscore) or
 92 |         fscore.recall == 0.0 or
 93 |         fscore.precision == 0.0)
 94 | 
 95 |     if success:
 96 |         temp_dir.cleanup()
 97 |     else:
 98 |         print("Error reading EVALB results.")
 99 |         print("Gold path: {}".format(gold_path))
100 |         print("Predicted path: {}".format(predicted_path))
101 |         print("Output path: {}".format(output_path))
102 | 
103 |     return fscore
104 | 
105 | def bracket_f1(gold_trees, predicted_trees):
106 |     correct_total = 0
107 |     gold_total = 0
108 |     pred_total = 0
109 |     for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
110 |         gold_brackets = gold_tree.brackets()
111 |         predicted_brackets = predicted_tree.brackets()
112 |         gbc = Counter(gold_brackets)
113 |         pbc = Counter(predicted_brackets)
114 |         correct = 0
115 |         for gb in gbc:
116 |             if gb in pbc:
117 |                 correct += min(gbc[gb], pbc[gb])
118 | 
119 |         correct_total += correct
120 |         gold_total += len(gold_brackets)
121 |         pred_total += len(predicted_brackets)
122 | 
123 |     precision = 100.0 * correct_total/pred_total
124 |     recall = 100.0 * correct_total/gold_total
125 |     f = (2 * precision * recall) / (precision + recall)
126 |     return FScore(recall, precision, f)
127 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import itertools
  3 | import os.path
  4 | import time
  5 | from collections import defaultdict
  6 | import random
  7 | 
  8 | import dynet as dy
  9 | import numpy as np
 10 | 
 11 | import evaluate
 12 | import parse
 13 | import trees
 14 | import vocabulary
 15 | 
 16 | def format_elapsed(start_time):
 17 |     elapsed_time = int(time.time() - start_time)
 18 |     minutes, seconds = divmod(elapsed_time, 60)
 19 |     hours, minutes = divmod(minutes, 60)
 20 |     days, hours = divmod(hours, 24)
 21 |     elapsed_string = "{}h{:02}m{:02}s".format(hours, minutes, seconds)
 22 |     if days > 0:
 23 |         elapsed_string = "{}d{}".format(days, elapsed_string)
 24 |     return elapsed_string
 25 | 
 26 | def run_train(args):
 27 |     print("Running training with arguments:", args)
 28 | 
 29 |     if args.numpy_seed is not None:
 30 |         print("Setting numpy random seed to {}...".format(args.numpy_seed))
 31 |         np.random.seed(args.numpy_seed)
 32 | 
 33 |     print("Loading training trees from {}...".format(args.train_path))
 34 |     train_treebank = trees.load_trees(args.train_path)
 35 |     print("Loaded {:,} training examples.".format(len(train_treebank)))
 36 | 
 37 |     print("Loading development trees from {}...".format(args.dev_path))
 38 |     dev_treebank = trees.load_trees(args.dev_path)
 39 |     print("Loaded {:,} development examples.".format(len(dev_treebank)))
 40 | 
 41 |     print("Processing trees for training...")
 42 |     train_parse = [tree.convert() for tree in train_treebank]
 43 | 
 44 |     print("Constructing vocabularies...")
 45 | 
 46 |     tag_vocab = vocabulary.Vocabulary()
 47 |     tag_vocab.index(parse.START)
 48 |     tag_vocab.index(parse.STOP)
 49 |     tag_vocab.index(parse.COMMON_WORD)
 50 | 
 51 |     char_vocab = vocabulary.Vocabulary()
 52 |     char_vocab.index(parse.START)
 53 |     char_vocab.index(parse.STOP)
 54 |     char_vocab.index(parse.COMMON_WORD)
 55 |     char_vocab.index(parse.UNK)
 56 | 
 57 |     word_vocab = vocabulary.Vocabulary()
 58 |     word_vocab.index(parse.START)
 59 |     word_vocab.index(parse.STOP)
 60 |     word_vocab.index(parse.UNK)
 61 | 
 62 |     label_vocab = vocabulary.Vocabulary()
 63 |     label_vocab.index(())
 64 | 
 65 |     for tree in train_parse:
 66 |         nodes = [tree]
 67 |         while nodes:
 68 |             node = nodes.pop()
 69 |             if isinstance(node, trees.InternalParseNode):
 70 |                 label_vocab.index(node.label)
 71 |                 nodes.extend(reversed(node.children))
 72 |             else:
 73 |                 tag_vocab.index(node.tag)
 74 |                 word_vocab.index(node.word)
 75 |                 for char in node.word:
 76 |                     char_vocab.index(char)
 77 | 
 78 |     tag_vocab.freeze()
 79 |     char_vocab.freeze()
 80 |     word_vocab.freeze()
 81 |     label_vocab.freeze()
 82 | 
 83 |     def print_vocabulary(name, vocab):
 84 |         special = {parse.START, parse.STOP, parse.UNK}
 85 |         print("{} ({:,}): {}".format(
 86 |             name, vocab.size,
 87 |             sorted(value for value in vocab.values if value in special) +
 88 |             sorted(value for value in vocab.values if value not in special)))
 89 | 
 90 |     if args.print_vocabs:
 91 |         print_vocabulary("Tag", tag_vocab)
 92 |         print_vocabulary("Word", word_vocab)
 93 |         print_vocabulary("Character", char_vocab)
 94 |         print_vocabulary("Label", label_vocab)
 95 | 
 96 |     print("Initializing model...")
 97 |     model = dy.ParameterCollection()
 98 |     print("Input LSTM type:", args.lstm_type)
 99 |     assert args.embedding_type != ""
100 |     span_representation_args = [
101 |         tag_vocab,
102 |         char_vocab,
103 |         word_vocab,
104 |         label_vocab,
105 |         args.tag_embedding_dim,
106 |         args.char_embedding_dim,
107 |         args.char_lstm_layers,
108 |         args.char_lstm_dim,
109 |         args.word_embedding_dim,
110 |         args.lstm_layers,
111 |         args.lstm_dim,
112 |         args.dropout,
113 |         args.lstm_type,
114 |         args.lstm_context_size,
115 |         args.embedding_type,
116 |         args.concat_bow,
117 |         args.weight_bow,
118 |         args.random_embeddings,
119 |         args.random_lstm,
120 |         args.common_word_threshold,
121 |         args.no_lstm_hidden_dims,
122 |     ]
123 |  
124 |     if args.parser_type == "top-down":
125 |         parser = parse.TopDownParser(
126 |             model,
127 |             args.label_hidden_dim,
128 |             args.split_hidden_dim,
129 |             span_representation_args
130 |         )
131 |     elif args.parser_type == 'chart':
132 |         parser = parse.ChartParser(
133 |             model,
134 |             args.label_hidden_dim,
135 |             span_representation_args
136 |         )
137 |     elif args.parser_type == 'independent':
138 |         parser = parse.IndependentParser(
139 |             model,
140 |             args.label_hidden_dim,
141 |             span_representation_args
142 |         )
143 |     trainer = dy.AdamTrainer(parser.trainable_parameters)
144 | 
145 |     total_processed = 0
146 |     current_processed = 0
147 |     check_every = len(train_parse) / args.checks_per_epoch
148 |     best_dev_fscore = -np.inf
149 |     best_dev_model_path = None
150 | 
151 |     start_time = time.time()
152 | 
153 |     def check_dev():
154 |         nonlocal best_dev_fscore
155 |         nonlocal best_dev_model_path
156 | 
157 |         dev_start_time = time.time()
158 | 
159 |         dev_predicted = []
160 |         for tree in dev_treebank:
161 |             dy.renew_cg()
162 |             sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
163 |             predicted, _ = parser.parse(sentence)
164 |             dev_predicted.append(predicted.convert())
165 | 
166 |         if args.parser_type == 'independent':
167 |             tree_count = 0
168 |             for pred in dev_predicted:
169 |                 if pred.is_tree():
170 |                     tree_count += 1
171 |             print("Percentage of valid trees:", tree_count/len(dev_predicted))
172 | 
173 |             dev_fscore = evaluate.bracket_f1(dev_treebank, dev_predicted)
174 |         else:
175 |             dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank, dev_predicted)
176 | 
177 |         print(
178 |             "dev-fscore {} "
179 |             "dev-elapsed {} "
180 |             "total-elapsed {}".format(
181 |                 dev_fscore,
182 |                 format_elapsed(dev_start_time),
183 |                 format_elapsed(start_time),
184 |             )
185 |         )
186 | 
187 |         if dev_fscore.fscore > best_dev_fscore:
188 |             if best_dev_model_path is not None:
189 |                 for ext in [".data", ".meta"]:
190 |                     path = best_dev_model_path + ext
191 |                     if os.path.exists(path):
192 |                         print("Removing previous model file {}...".format(path))
193 |                         os.remove(path)
194 | 
195 |             best_dev_fscore = dev_fscore.fscore
196 |             best_dev_model_path = "{}_dev={:.2f}".format(
197 |                 args.model_path_base, dev_fscore.fscore)
198 |             print("Saving new best model to {}...".format(best_dev_model_path))
199 |             dy.save(best_dev_model_path, [parser])
200 | 
201 |     for epoch in itertools.count(start=1):
202 |         if args.epochs is not None and epoch > args.epochs:
203 |             break
204 | 
205 |         np.random.shuffle(train_parse)
206 |         epoch_start_time = time.time()
207 | 
208 |         for start_index in range(0, len(train_parse), args.batch_size):
209 |             dy.renew_cg()
210 |             parser.new_batch()
211 |             batch_losses = []
212 |             for tree in train_parse[start_index:start_index + args.batch_size]:
213 |                 sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
214 |                 if args.parser_type == "top-down":
215 |                     _, loss = parser.parse(sentence, tree, args.explore)
216 |                 else:
217 |                     _, loss = parser.parse(sentence, tree)
218 |                 batch_losses.append(loss)
219 |                 total_processed += 1
220 |                 current_processed += 1
221 | 
222 |             batch_loss = dy.average(batch_losses)
223 |             batch_loss_value = batch_loss.scalar_value()
224 |             batch_loss.backward()
225 |             trainer.update()
226 | 
227 |             if (start_index // args.batch_size + 1) % args.print_frequency == 0:
228 |                 print(
229 |                     "epoch {:,} "
230 |                     "batch {:,}/{:,} "
231 |                     "processed {:,} "
232 |                     "batch-loss {:.4f} "
233 |                     "epoch-elapsed {} "
234 |                     "total-elapsed {}".format(
235 |                         epoch,
236 |                         start_index // args.batch_size + 1,
237 |                         int(np.ceil(len(train_parse) / args.batch_size)),
238 |                         total_processed,
239 |                         batch_loss_value,
240 |                         format_elapsed(epoch_start_time),
241 |                         format_elapsed(start_time),
242 |                     )
243 |                 )
244 | 
245 |             if current_processed >= check_every:
246 |                 current_processed -= check_every
247 |                 check_dev()
248 | 
249 | def run_test(args):
250 |     print("Loading test trees from {}...".format(args.test_path))
251 |     test_treebank = trees.load_trees(args.test_path)
252 |     print("Loaded {:,} test examples.".format(len(test_treebank)))
253 | 
254 |     print("Loading model from {}...".format(args.model_path_base))
255 |     model = dy.ParameterCollection()
256 |     [parser] = dy.load(args.model_path_base, model)
257 | 
258 |     print("Parsing test sentences...")
259 | 
260 |     start_time = time.time()
261 | 
262 |     test_predicted = []
263 |     for tree in test_treebank:
264 |         dy.renew_cg()
265 |         sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
266 |         predicted, _ = parser.parse(sentence)
267 |         test_predicted.append(predicted.convert())
268 | 
269 |     if type(parser) == parse.IndependentParser:
270 |         print('Warning: not using evalb for evaluation')
271 |         test_fscore = evaluate.bracket_f1(test_treebank, test_predicted)
272 |     else:
273 |         test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)
274 | 
275 |     print(
276 |         "test-fscore {} "
277 |         "test-elapsed {}".format(
278 |             test_fscore,
279 |             format_elapsed(start_time),
280 |         )
281 |     )
282 | 
283 | def predict_labels(args):
284 |     print("Loading training trees from {}...".format(args.train_path))
285 |     train_treebank = trees.load_trees(args.train_path)
286 |     print("Loaded {:,} training examples.".format(len(train_treebank)))
287 | 
288 |     print("Loading development trees from {}...".format(args.dev_path))
289 |     dev_treebank = trees.load_trees(args.dev_path)
290 |     print("Loaded {:,} development examples.".format(len(dev_treebank)))
291 | 
292 |     print("Processing trees for training...")
293 |     train_parse = [tree.convert() for tree in train_treebank]
294 |     dev_parse = [tree.convert() for tree in dev_treebank]
295 | 
296 |     print("Calculating baseline...")
297 |     counts = defaultdict(lambda : defaultdict(int))
298 |     for tree in train_parse:
299 |         for node, parent in tree.iterate_spans_with_parents(): # doesn't include top level
300 |             counts[node.label][parent.label] += 1
301 |         counts[tree.label]['<NONE>'] += 1
302 |     predictions = {label:max(counts.keys(), key=lambda x: counts[x]) for label, counts in counts.items()}
303 |     correct = 0
304 |     total = 0
305 |     for tree in dev_parse:
306 |         for node, parent in tree.iterate_spans_with_parents(): # doesn't include top level
307 |             if predictions[node.label] == parent.label:
308 |                 correct += 1
309 |             total += 1
310 |         if predictions[tree.label] == '<NONE>':
311 |             correct += 1
312 |         total += 1
313 |     print("baseline score:", correct/total)
314 | 
315 |     print("Loading model from {}...".format(args.model_path_base))
316 |     model = dy.ParameterCollection()
317 |     [base_parser] = dy.load(args.model_path_base, model)
318 | 
319 |     for self_not_parent in [False, True]:
320 |         parser = parse.LabelPrediction(model, base_parser, args.label_hidden_dim)
321 |         trainer = dy.AdamTrainer(parser.f_label.model)
322 | 
323 |         print('predicting own label' if self_not_parent else 'predicting parent label')
324 |         for epoch_index in range(10):
325 |             np.random.shuffle(train_parse)
326 |             for start_index in range(0, len(train_parse), args.batch_size):
327 |                 dy.renew_cg()
328 |                 batch_losses = []
329 |                 for tree in train_parse[start_index:start_index + args.batch_size]:
330 |                     sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
331 |                     loss, _, _ = parser.predict_parent_label_for_spans(sentence, tree, self_not_parent)
332 |                     batch_losses.append(loss)
333 |                 batch_loss = dy.average(batch_losses)
334 |                 batch_loss_value = batch_loss.scalar_value()
335 |                 batch_loss.backward()
336 |                 trainer.update()
337 | 
338 |             correct = 0
339 |             total = 0
340 |             for tree in dev_parse:
341 |                 dy.renew_cg()
342 |                 sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
343 |                 _, c, t = parser.predict_parent_label_for_spans(sentence, tree, self_not_parent)
344 |                 correct += c
345 |                 total += t
346 |             print("dev score at epoch", epoch_index+1, ":", correct/total)
347 | 
348 | def derivative_analysis(args):
349 |     print("Loading development trees from {}...".format(args.dev_path))
350 |     dev_treebank = trees.load_trees(args.dev_path)
351 |     print("Loaded {:,} development examples.".format(len(dev_treebank)))
352 | 
353 |     print("Processing trees...")
354 |     dev_parse = [tree.convert() for tree in dev_treebank]
355 | 
356 |     print("Loading model from {}...".format(args.model_path_base))
357 |     model = dy.ParameterCollection()
358 |     [parser] = dy.load(args.model_path_base, model)
359 | 
360 |     total_l1_grad = np.zeros(500)
361 |     total_l2_grad = np.zeros(500)
362 |     total_count = np.zeros(500)
363 |     for tree in dev_parse:
364 |         sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
365 |         for position in range(len(sentence)+1):
366 |             index = random.randrange(parser.lstm_dim*2)
367 |             dy.renew_cg()
368 |             gradients = parser.lstm_derivative(sentence, position, index)
369 |             buckets = list(reversed(range(position+1))) + list(range(len(sentence)-position+1))
370 |             assert len(buckets) == len(gradients)
371 |             for position, grad in zip(buckets, gradients):
372 |                 total_l1_grad[position] += np.linalg.norm(grad, ord=1)
373 |                 total_l2_grad[position] += np.linalg.norm(grad, ord=2)
374 |                 total_count[position] += 1
375 | 
376 |     print('l1:')
377 |     for i in range(500):
378 |         if total_count[i] == 0:
379 |             break
380 |         print(total_l1_grad[i]/total_count[i])
381 |     print('l2:')
382 |     for i in range(500):
383 |         if total_count[i] == 0:
384 |             break
385 |         print(total_l2_grad[i]/total_count[i])
386 | 
387 | def main():
388 |     dynet_args = [
389 |         "--dynet-mem",
390 |         "--dynet-weight-decay",
391 |         "--dynet-autobatch",
392 |         "--dynet-gpus",
393 |         "--dynet-gpu",
394 |         "--dynet-devices",
395 |         "--dynet-seed",
396 |     ]
397 | 
398 |     parser = argparse.ArgumentParser()
399 |     subparsers = parser.add_subparsers()
400 | 
401 |     subparser = subparsers.add_parser("train")
402 |     subparser.set_defaults(callback=run_train)
403 |     for arg in dynet_args:
404 |         subparser.add_argument(arg)
405 |     subparser.add_argument("--numpy-seed", type=int)
406 |     subparser.add_argument("--parser-type", choices=["top-down", "chart", "independent"], required=True)
407 |     subparser.add_argument("--tag-embedding-dim", type=int, default=50)
408 |     subparser.add_argument("--char-embedding-dim", type=int, default=50)
409 |     subparser.add_argument("--char-lstm-layers", type=int, default=1)
410 |     subparser.add_argument("--char-lstm-dim", type=int, default=100)
411 |     subparser.add_argument("--word-embedding-dim", type=int, default=100)
412 |     subparser.add_argument("--lstm-layers", type=int, default=2)
413 |     subparser.add_argument("--lstm-dim", type=int, default=250)
414 |     subparser.add_argument("--label-hidden-dim", type=int, default=250)
415 |     subparser.add_argument("--split-hidden-dim", type=int, default=250)
416 |     subparser.add_argument("--dropout", type=float, default=0.4)
417 |     subparser.add_argument("--explore", action="store_true")
418 |     subparser.add_argument("--model-path-base", required=True)
419 |     subparser.add_argument("--evalb-dir", default="EVALB/")
420 |     subparser.add_argument("--train-path", default="data/02-21.10way.clean")
421 |     subparser.add_argument("--dev-path", default="data/22.auto.clean")
422 |     subparser.add_argument("--batch-size", type=int, default=10)
423 |     subparser.add_argument("--epochs", type=int)
424 |     subparser.add_argument("--checks-per-epoch", type=int, default=4)
425 |     subparser.add_argument("--print-vocabs", action="store_true")
426 |     subparser.add_argument("--lstm-type", choices=["basic","truncated","shuffled","inside","no-lstm","untied-truncated"], default="basic")
427 |     subparser.add_argument("--lstm-context-size", type=int, default=3)
428 |     subparser.add_argument("--embedding-type", default="wc") # characters w/t/c for word/tag/character
429 |     subparser.add_argument("--random-embeddings", action="store_true")
430 |     subparser.add_argument("--random-lstm", action="store_true")
431 |     subparser.add_argument("--concat-bow", action="store_true")
432 |     subparser.add_argument("--weight-bow", action="store_true")
433 |     subparser.add_argument("--print-frequency", type=int, default=1)
434 |     subparser.add_argument("--common-word-threshold", type=int, default=float('inf')) # replace tags and character-level inputs with a special token above this threshold
435 |     subparser.add_argument("--no-lstm-hidden-dims", type=int, nargs="+", default=[250])
436 |     train_subparser = subparser
437 | 
438 |     subparser = subparsers.add_parser("train-label", parents=[train_subparser], add_help=False)
439 |     subparser.set_defaults(callback=predict_labels)
440 | 
441 |     subparser = subparsers.add_parser("derivative", parents=[train_subparser], add_help=False)
442 |     subparser.set_defaults(callback=derivative_analysis)
443 | 
444 |     subparser = subparsers.add_parser("test")
445 |     subparser.set_defaults(callback=run_test)
446 |     for arg in dynet_args:
447 |         subparser.add_argument(arg)
448 |     subparser.add_argument("--model-path-base", required=True)
449 |     subparser.add_argument("--evalb-dir", default="EVALB/")
450 |     subparser.add_argument("--test-path", default="data/23.auto.clean")
451 | 
452 |     args = parser.parse_args()
453 |     args.callback(args)
454 | 
455 | if __name__ == "__main__":
456 |     main()
457 | 


--------------------------------------------------------------------------------
/src/parse.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import random
  3 | 
  4 | import dynet as dy
  5 | import numpy as np
  6 | 
  7 | import trees
  8 | 
  9 | START = "<START>"
 10 | STOP = "<STOP>"
 11 | UNK = "<UNK>"
 12 | COMMON_WORD = "<COMMON_WORD>"
 13 | 
 14 | def augment(scores, oracle_index):
 15 |     assert isinstance(scores, dy.Expression)
 16 |     shape = scores.dim()[0]
 17 |     assert len(shape) == 1
 18 |     increment = np.ones(shape)
 19 |     increment[oracle_index] = 0
 20 |     return scores + dy.inputVector(increment)
 21 | 
 22 | def shuffle(items, start, end):
 23 |     if end <= start:
 24 |         return
 25 |     to_shuffle = items[start:end]
 26 |     random.shuffle(to_shuffle)
 27 |     items[start:end] = to_shuffle
 28 | 
 29 | def transpose_lists(nested_list):
 30 |     result = []
 31 |     for i in range(len(nested_list[0])):
 32 |         result.append([l[i] for l in nested_list])
 33 |     return result
 34 | 
 35 | def bow_range(items, start, end):
 36 |     if end <= start:
 37 |         return dy.zeros(*items[0].dim())
 38 |     else:
 39 |         return dy.average(items[start:end]) # or esum
 40 | 
 41 | def weighted_bow_range(items, start, end, params, location):
 42 |     if end <= start:
 43 |         return dy.zeros(*items[0].dim())
 44 |     else:
 45 |         selected_items = items[start:end]
 46 |         n = len(selected_items)
 47 |         selected_params = params[:n]
 48 |         assert location in ['left','right','middle']
 49 |         if location == 'left': # reverse weights to left
 50 |             selected_params = reversed(selected_params)
 51 |         elif location == 'middle' and n > 1: # mirror weights in middle
 52 |             first_half = selected_params[:n//2]
 53 |             selected_params[-(n//2):] = reversed(first_half)
 54 |             assert len(selected_params) == len(selected_items)
 55 |         weighted_items = [i*dy.parameter(p) for i, p in zip(selected_items, selected_params)]
 56 |         return dy.average(weighted_items)
 57 | 
 58 | class Feedforward(object):
 59 |     def __init__(self, model, input_dim, hidden_dims, output_dim, dropout=0):
 60 |         self.spec = locals()
 61 |         self.spec.pop("self")
 62 |         self.spec.pop("model")
 63 | 
 64 |         self.model = model.add_subcollection("Feedforward")
 65 | 
 66 |         self.weights = []
 67 |         self.biases = []
 68 |         dims = [input_dim] + hidden_dims + [output_dim]
 69 |         for prev_dim, next_dim in zip(dims, dims[1:]):
 70 |             self.weights.append(self.model.add_parameters((next_dim, prev_dim)))
 71 |             self.biases.append(self.model.add_parameters(next_dim))
 72 | 
 73 |         self.dropout = dropout
 74 | 
 75 |     def param_collection(self):
 76 |         return self.model
 77 | 
 78 |     @classmethod
 79 |     def from_spec(cls, spec, model):
 80 |         return cls(model, **spec)
 81 | 
 82 |     def __call__(self, x):
 83 |         for i, (weight, bias) in enumerate(zip(self.weights, self.biases)):
 84 |             weight = dy.parameter(weight)
 85 |             bias = dy.parameter(bias)
 86 |             x = dy.affine_transform([bias, weight, x])
 87 |             if i < len(self.weights) - 1:
 88 |                 x = dy.rectify(x)
 89 |             x = dy.dropout(x, self.dropout)
 90 |         return x
 91 | 
 92 | class UntiedLSTMLayer(object):
 93 |     def __init__(self, model, in_size, hidden_size, length, dropout=0.0):
 94 |         self.model = model.add_subcollection("UntiedLSTM")
 95 | 
 96 |         self.in_size = in_size
 97 |         self.hidden_size = hidden_size
 98 |         self.length = length
 99 |         self.dropout = dropout
100 | 
101 |         self.Wxs = [self.model.add_parameters((4*hidden_size,in_size)) for _ in range(length)]
102 |         self.Whs = [self.model.add_parameters((4*hidden_size,hidden_size)) for _ in range(length)]
103 |         self.bs = [self.model.add_parameters(4*hidden_size) for _ in range(length)]
104 |         self.initial_c = self.model.add_parameters(hidden_size)
105 |         self.initial_h = self.model.add_parameters(hidden_size)
106 | 
107 |     def set_dropout(self, dropout):
108 |         self.dropout = dropout
109 | 
110 |     def disable_dropout(self):
111 |         self.dropout = 0.0
112 | 
113 |     def transduce(self, inputs):
114 |         assert len(inputs) == self.length
115 | 
116 |         batch_size = inputs[0].dim()[1]
117 | 
118 |         dropout_retain = 1 - self.dropout
119 |         dropout_mask_x = dy.random_bernoulli(self.in_size, dropout_retain, 1/dropout_retain, batch_size)
120 |         dropout_mask_h = dy.random_bernoulli(self.hidden_size, dropout_retain, 1/dropout_retain, batch_size)
121 | 
122 |         c_init = dy.parameter(self.initial_c)
123 |         c_tm1 = dy.concatenate_to_batch([c_init for _ in range(batch_size)])
124 |         h_init = dy.parameter(self.initial_h)
125 |         h_tm1 = dy.concatenate_to_batch([h_init for _ in range(batch_size)])
126 | 
127 |         outputs = []
128 | 
129 |         for i, x in enumerate(inputs):
130 |             gates = dy.vanilla_lstm_gates_dropout_concat([x], h_tm1,
131 |                     dy.parameter(self.Wxs[i]), dy.parameter(self.Whs[i]), dy.parameter(self.bs[i]),
132 |                     dropout_mask_x, dropout_mask_h)
133 | 
134 |             c = dy.vanilla_lstm_c(c_tm1, gates)
135 |             h = dy.vanilla_lstm_h(c, gates)
136 |             outputs.append(h)
137 | 
138 |             c_tm1 = c
139 |             h_tm1 = h
140 | 
141 |         return outputs
142 | 
143 | class BidirectionalUntiedLSTM(object):
144 |     def __init__(self, model, in_size, hidden_size, n_layers, length, dropout=0.0):
145 |         self.model = model.add_subcollection("Bidirectional")
146 | 
147 |         self.layers = []
148 |         for i in range(n_layers):
149 |             f = UntiedLSTMLayer(self.model, in_size if i == 0 else 2*hidden_size, hidden_size, length, dropout)
150 |             b = UntiedLSTMLayer(self.model, in_size if i == 0 else 2*hidden_size, hidden_size, length, dropout)
151 |             self.layers.append((f,b))
152 | 
153 |     def set_dropout(self, dropout):
154 |         for f,b in self.layers:
155 |             f.set_dropout(dropout)
156 |             b.set_dropout(dropout)
157 | 
158 |     def disable_dropout(self):
159 |         for f,b in self.layers:
160 |             f.disable_dropout()
161 |             b.disable_dropout()
162 | 
163 |     def transduce(self, inputs):
164 |         f,b = self.layers[0]
165 |         fh = f.transduce(inputs)
166 |         bh = reversed(b.transduce(inputs[::-1]))
167 |         h = [dy.concatenate([a,b]) for a,b in zip(fh, bh)]
168 | 
169 |         for i in range(1,len(self.layers)):
170 |             f,b = self.layers[i]
171 |             fh = f.transduce(h)
172 |             bh = reversed(b.transduce(h[::-1]))
173 |             h = [dy.concatenate([a,b]) for a,b in zip(fh, bh)]
174 | 
175 |         return h
176 | 
177 | class ParserBase(object):
178 |     def __init__(
179 |             self,
180 |             model,
181 |             tag_vocab,
182 |             char_vocab,
183 |             word_vocab,
184 |             label_vocab,
185 |             tag_embedding_dim,
186 |             char_embedding_dim,
187 |             char_lstm_layers,
188 |             char_lstm_dim,
189 |             word_embedding_dim,
190 |             lstm_layers,
191 |             lstm_dim,
192 |             dropout,
193 |             lstm_type,
194 |             lstm_context_size,
195 |             embedding_type,
196 |             concat_bow,
197 |             weight_bow,
198 |             random_emb,
199 |             random_lstm,
200 |             common_word_threshold,
201 |             no_lstm_hidden_dims,
202 |     ):
203 |         self.spec = locals()
204 |         self.spec.pop("self")
205 |         self.spec.pop("model")
206 | 
207 |         self.model = model.add_subcollection("Parser")
208 |         self.trainable_parameters = self.model.add_subcollection("Trainable")
209 |         self.tag_vocab = tag_vocab
210 |         self.char_vocab = char_vocab
211 |         self.word_vocab = word_vocab
212 |         self.label_vocab = label_vocab
213 |         self.char_lstm_dim = char_lstm_dim
214 |         self.lstm_dim = lstm_dim
215 | 
216 |         emb_model = self.model if random_emb else self.trainable_parameters
217 | 
218 |         for c in embedding_type:
219 |             assert c in 'wtc'
220 |         self.embedding_type = embedding_type
221 |         emb_dim = 0
222 |         if 'w' in embedding_type:
223 |             emb_dim += word_embedding_dim
224 |             self.word_embeddings = emb_model.add_lookup_parameters(
225 |                 (word_vocab.size, word_embedding_dim), name="word-embeddings")
226 |         if 't' in embedding_type:
227 |             emb_dim += tag_embedding_dim
228 |             self.tag_embeddings = emb_model.add_lookup_parameters(
229 |                 (tag_vocab.size, tag_embedding_dim), name="tag-embeddings")
230 |         if 'c' in embedding_type:
231 |             emb_dim += 2*char_lstm_dim
232 |             self.char_embeddings = emb_model.add_lookup_parameters(
233 |                 (char_vocab.size, char_embedding_dim), name="char-embeddings")
234 | 
235 |             self.char_lstm = dy.BiRNNBuilder(
236 |                 char_lstm_layers,
237 |                 char_embedding_dim,
238 |                 2 * char_lstm_dim,
239 |                 self.trainable_parameters,
240 |                 dy.VanillaLSTMBuilder)
241 | 
242 |         if lstm_type in ["truncated", "untied-truncated", "no-lstm"]:
243 |             self.indexed_starts = [self.trainable_parameters.add_parameters(emb_dim) for _ in range(300)]
244 |             self.indexed_stops = [self.trainable_parameters.add_parameters(emb_dim) for _ in range(300)]
245 | 
246 |         if lstm_type == "no-lstm":
247 |             self.context_network = Feedforward(
248 |                 self.model if random_lstm else self.trainable_parameters,
249 |                 emb_dim*2*(lstm_context_size+1), no_lstm_hidden_dims, 2*lstm_dim, dropout)
250 |         elif lstm_type == "untied-truncated":
251 |             self.lstm = BidirectionalUntiedLSTM(
252 |                 self.model if random_lstm else self.trainable_parameters,
253 |                 emb_dim, lstm_dim, lstm_layers, 2*(lstm_context_size+1))
254 |         else:
255 |             self.lstm = dy.BiRNNBuilder(
256 |                 lstm_layers,
257 |                 emb_dim,
258 |                 2 * lstm_dim,
259 |                 self.model if random_lstm else self.trainable_parameters,
260 |                 dy.VanillaLSTMBuilder)
261 | 
262 |         assert not (concat_bow and not lstm_type == 'truncated'), 'concat-bow only supported with truncated lstm-type'
263 |         self.concat_bow = concat_bow
264 |         self.weight_bow = weight_bow
265 |         output_dim = 2 * lstm_dim
266 |         if concat_bow:
267 |             output_dim += 3 * emb_dim
268 |             if weight_bow:
269 |                 self.bow_weights = [self.trainable_parameters.add_parameters(1) for i in range(300)]
270 |         self.span_representation_dimension = output_dim
271 | 
272 |         self.dropout = dropout
273 | 
274 |         self.lstm_type = lstm_type
275 |         self.lstm_context_size = lstm_context_size
276 | 
277 |         self.lstm_initialized = False
278 | 
279 |         self.common_word_threshold = common_word_threshold
280 | 
281 |     def param_collection(self):
282 |         return self.model
283 | 
284 |     @classmethod
285 |     def from_spec(cls, spec, model):
286 |         return cls(model, **spec)
287 | 
288 |     def new_batch(self):
289 |         self.lstm_initialized = False
290 | 
291 |     def transduce_lstm_batch(self, inputs):
292 |         # this is a workaround for lstm dropout error in dynet
293 |         if self.lstm_initialized:
294 |             batch_size = inputs[0].dim()[1]
295 |             for fb, bb in self.lstm.builder_layers:
296 |                 for b in [fb,bb]:
297 |                     b.set_dropout_masks(batch_size=batch_size)
298 |         self.lstm_initialized = True
299 |         return self.lstm.transduce(inputs)
300 | 
301 |     def get_basic_span_encoding(self, embeddings):
302 |         lstm_outputs = self.lstm.transduce(embeddings)
303 | 
304 |         @functools.lru_cache(maxsize=None)
305 |         def span_encoding(left, right):
306 |             forward = (
307 |                 lstm_outputs[right][:self.lstm_dim] -
308 |                 lstm_outputs[left][:self.lstm_dim])
309 |             backward = (
310 |                 lstm_outputs[left + 1][self.lstm_dim:] -
311 |                 lstm_outputs[right + 1][self.lstm_dim:])
312 |             return dy.concatenate([forward, backward])
313 | 
314 |         return span_encoding
315 | 
316 |     def get_truncated_span_encoding(self, embeddings, distance, concat_bow, weight_bow, untied=False):
317 |         padded_embeddings = [embeddings[0]]*(distance-1)+embeddings+[embeddings[-1]]*(distance-1)
318 |         batched_embeddings = []
319 |         batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
320 |                     self.indexed_starts[:len(embeddings)-1]]))
321 |         for i in range(distance*2):
322 |             selected = padded_embeddings[i:len(padded_embeddings)-(distance*2)+i+1]
323 |             catted = dy.concatenate_to_batch(selected)
324 |             batched_embeddings.append(catted)
325 |         batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
326 |                     self.indexed_stops[:len(embeddings)-1]]))
327 |         assert batched_embeddings[0].dim()[1] == len(embeddings)-1 # batch dimension is length of sentence + 1
328 | 
329 |         if untied:
330 |             lstm_outputs = self.lstm.transduce(batched_embeddings)
331 |         else:
332 |             lstm_outputs = self.transduce_lstm_batch(batched_embeddings)
333 | 
334 |         forward_reps = lstm_outputs[distance-1][:self.lstm_dim]
335 |         backward_reps = lstm_outputs[distance][self.lstm_dim:]
336 | 
337 |         @functools.lru_cache(maxsize=None)
338 |         def span_encoding(left, right):
339 |             forward = (
340 |                 dy.pick_batch_elem(forward_reps, right) -
341 |                 dy.pick_batch_elem(forward_reps, left))
342 |             backward = (
343 |                 dy.pick_batch_elem(backward_reps, left) -
344 |                 dy.pick_batch_elem(backward_reps, right))
345 | 
346 |             if concat_bow:
347 |                 if weight_bow:
348 |                     bow_before = weighted_bow_range(embeddings, 1, left-distance+1,
349 |                             self.bow_weights, 'left')
350 |                     bow_inside = weighted_bow_range(embeddings, left+distance+1, right-distance+1,
351 |                             self.bow_weights, 'middle')
352 |                     bow_after = weighted_bow_range(embeddings, right+distance+1, len(embeddings)-1,
353 |                             self.bow_weights, 'right')
354 |                 else:
355 |                     bow_before = bow_range(embeddings, 1, left-distance+1)
356 |                     bow_inside = bow_range(embeddings, left+distance+1, right-distance+1)
357 |                     bow_after = bow_range(embeddings, right+distance+1, len(embeddings)-1)
358 |                 return dy.concatenate([forward, backward, bow_before, bow_inside, bow_after])
359 |             else:
360 |                 return dy.concatenate([forward, backward])
361 | 
362 |         return span_encoding
363 | 
364 |     def get_truncated_no_lstm_span_encoding(self, embeddings, distance):
365 |         padded_embeddings = [embeddings[0]]*(distance-1)+embeddings+[embeddings[-1]]*(distance-1)
366 |         batched_embeddings = []
367 |         batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
368 |                     self.indexed_starts[:len(embeddings)-1]]))
369 |         for i in range(distance*2):
370 |             selected = padded_embeddings[i:len(padded_embeddings)-(distance*2)+i+1]
371 |             catted = dy.concatenate_to_batch(selected)
372 |             batched_embeddings.append(catted)
373 |         batched_embeddings.append(dy.concatenate_to_batch([dy.parameter(p) for p in
374 |                     self.indexed_stops[:len(embeddings)-1]]))
375 |         assert batched_embeddings[0].dim()[1] == len(embeddings)-1 # batch dimension is length of sentence + 1
376 | 
377 |         context_outputs = self.context_network(dy.concatenate(batched_embeddings))
378 | 
379 |         @functools.lru_cache(maxsize=None)
380 |         def span_encoding(left, right):
381 |             return dy.pick_batch_elem(context_outputs, right) - \
382 |                    dy.pick_batch_elem(context_outputs, left)
383 | 
384 |         return span_encoding
385 | 
386 |     def get_shuffled_span_encoding(self, embeddings, distance):
387 |         all_lstm_inputs = []
388 |         for i in range(len(embeddings)-1):
389 |             lstm_inputs = embeddings[:] # copy
390 |             shuffle(lstm_inputs, 1, i-distance+1) # note not shuffling start/end padding
391 |             shuffle(lstm_inputs, i+distance+1, len(embeddings)-1)
392 |             all_lstm_inputs.append(lstm_inputs)
393 | 
394 |         all_lstm_inputs = [dy.concatenate_to_batch(items) for items in transpose_lists(all_lstm_inputs)]
395 |         all_lstm_outputs = self.transduce_lstm_batch(all_lstm_inputs)
396 | 
397 |         @functools.lru_cache(maxsize=None)
398 |         def span_encoding(left, right):
399 |             forward = (
400 |                 dy.pick_batch_elem(all_lstm_outputs[right], right)[:self.lstm_dim] -
401 |                 dy.pick_batch_elem(all_lstm_outputs[left], left)[:self.lstm_dim])
402 |             backward = (
403 |                 dy.pick_batch_elem(all_lstm_outputs[left + 1], left)[self.lstm_dim:] -
404 |                 dy.pick_batch_elem(all_lstm_outputs[right + 1], right)[self.lstm_dim:])
405 |             return dy.concatenate([forward, backward])
406 | 
407 |         return span_encoding
408 | 
409 |     def get_inside_span_encoding(self, embeddings, distance, shuffle_inside=False):
410 |         padded_embeddings = [embeddings[0]]*distance+embeddings+[embeddings[-1]]*distance
411 |         all_spans = []
412 |         all_lstm_outputs = []
413 |         for i in range(len(embeddings)-2):
414 |             for j in range(i+1,len(embeddings)-1):
415 |                 all_spans.append((i,j))
416 |                 lstm_inputs = padded_embeddings[i+1:j+1+2*distance]
417 |                 if shuffle_inside:
418 |                     shuffle(lstm_inputs, 2*distance, len(lstm_inputs)-2*distance)
419 |                 all_lstm_outputs.append(self.lstm.transduce(lstm_inputs))
420 |         span_map = {span:idx for idx, span in enumerate(all_spans)}
421 | 
422 |         @functools.lru_cache(maxsize=None)
423 |         def span_encoding(left, right):
424 |             lstm_outputs = all_lstm_outputs[span_map[(left,right)]]
425 |             forward = (
426 |                 lstm_outputs[-distance-1][:self.lstm_dim] -
427 |                 lstm_outputs[distance-1][:self.lstm_dim])
428 |             backward = (
429 |                 lstm_outputs[distance][self.lstm_dim:] -
430 |                 lstm_outputs[-distance][self.lstm_dim:])
431 |             return dy.concatenate([forward, backward])
432 | 
433 |         return span_encoding
434 | 
435 |     def get_embeddings(self, sentence, is_train=False):
436 |         embeddings = []
437 |         for tag, word in [(START, START)] + sentence + [(STOP, STOP)]:
438 |             embed = []
439 |             is_common_word = word not in (START,STOP) and \
440 |                              self.word_vocab.count(word) > self.common_word_threshold
441 |             if 't' in self.embedding_type:
442 |                 if is_common_word:
443 |                     tag = COMMON_WORD
444 |                 tag_embedding = self.tag_embeddings[self.tag_vocab.index(tag)]
445 |                 embed.append(tag_embedding)
446 |             if 'c' in self.embedding_type:
447 |                 chars = list(word) if word not in (START, STOP) else [word]
448 |                 if is_common_word:
449 |                     chars = [COMMON_WORD]
450 |                 char_lstm_outputs = self.char_lstm.transduce([
451 |                     self.char_embeddings[self.char_vocab.index_or_unk(char, UNK)]
452 |                     for char in [START] + chars + [STOP]])
453 |                 char_encoding = dy.concatenate([
454 |                     char_lstm_outputs[-1][:self.char_lstm_dim],
455 |                     char_lstm_outputs[0][self.char_lstm_dim:]])
456 |                 embed.append(char_encoding)
457 |             if 'w' in self.embedding_type:
458 |                 if word not in (START, STOP):
459 |                     count = self.word_vocab.count(word)
460 |                     if not count or (is_train and np.random.rand() < 1 / (1 + count)):
461 |                         word = UNK
462 |                 word_embedding = self.word_embeddings[self.word_vocab.index(word)]
463 |                 embed.append(word_embedding)
464 |             embeddings.append(dy.concatenate(embed))
465 |         return embeddings
466 | 
467 |     def get_representation_function(self, sentence, is_train):
468 |         if self.lstm_type != "no-lstm":
469 |             if is_train:
470 |                 self.lstm.set_dropout(self.dropout)
471 |             else:
472 |                 self.lstm.disable_dropout()
473 |         if 'c' in self.embedding_type:
474 |             if is_train:
475 |                 self.char_lstm.set_dropout(self.dropout)
476 |             else:
477 |                 self.char_lstm.disable_dropout()
478 | 
479 | 
480 |         embeddings = self.get_embeddings(sentence, is_train)
481 | 
482 |         if self.lstm_type == 'truncated' or self.lstm_type == 'untied-truncated':
483 |             span_encoding = self.get_truncated_span_encoding(embeddings, self.lstm_context_size, self.concat_bow, self.weight_bow, self.lstm_type == 'untied-truncated')
484 |         elif self.lstm_type == 'no-lstm':
485 |             span_encoding = self.get_truncated_no_lstm_span_encoding(embeddings, self.lstm_context_size)
486 |         elif self.lstm_type == 'shuffled':
487 |             span_encoding = self.get_shuffled_span_encoding(embeddings, self.lstm_context_size)
488 |         elif self.lstm_type == 'inside':
489 |             span_encoding = self.get_inside_span_encoding(embeddings, self.lstm_context_size)
490 |         else:
491 |             span_encoding = self.get_basic_span_encoding(embeddings)
492 | 
493 |         return span_encoding
494 | 
495 |     def lstm_derivative(self, sentence, position, index):
496 |         self.lstm.disable_dropout()
497 |         embeddings = self.get_embeddings(sentence, is_train=False)
498 |         lstm_outputs = self.lstm.transduce(embeddings)
499 | 
500 |         forward = lstm_outputs[position][:self.lstm_dim]
501 |         backward = lstm_outputs[position + 1][self.lstm_dim:]
502 |         c = dy.concatenate([forward, backward])
503 |         s = c[index]
504 |         s.backward()
505 |         gradients = [embed.gradient() for embed in embeddings]
506 |         return gradients
507 | 
508 | class TopDownParser(ParserBase):
509 |     def __init__(
510 |             self,
511 |             model,
512 |             label_hidden_dim,
513 |             split_hidden_dim,
514 |             span_representation_args
515 |     ):
516 |         super().__init__(model, *span_representation_args)
517 | 
518 |         self.spec = {'label_hidden_dim':label_hidden_dim, 'split_hidden_dim':split_hidden_dim, 'span_representation_args':span_representation_args}
519 | 
520 |         self.f_label = Feedforward(
521 |             self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size)
522 |         self.f_split = Feedforward(
523 |             self.trainable_parameters, self.span_representation_dimension, [split_hidden_dim], 1)
524 | 
525 |     def parse(self, sentence, gold=None, explore=True):
526 |         is_train = gold is not None
527 | 
528 |         get_span_encoding = self.get_representation_function(sentence, is_train)
529 | 
530 |         def helper(left, right):
531 |             assert 0 <= left < right <= len(sentence)
532 | 
533 |             label_scores = self.f_label(get_span_encoding(left, right))
534 | 
535 |             if is_train:
536 |                 oracle_label = gold.oracle_label(left, right)
537 |                 oracle_label_index = self.label_vocab.index(oracle_label)
538 |                 label_scores = augment(label_scores, oracle_label_index)
539 | 
540 |             label_scores_np = label_scores.npvalue()
541 |             argmax_label_index = int(
542 |                 label_scores_np.argmax() if right - left < len(sentence) else
543 |                 label_scores_np[1:].argmax() + 1)
544 |             argmax_label = self.label_vocab.value(argmax_label_index)
545 | 
546 |             if is_train:
547 |                 label = argmax_label if explore else oracle_label
548 |                 label_loss = (
549 |                     label_scores[argmax_label_index] -
550 |                     label_scores[oracle_label_index]
551 |                     if argmax_label != oracle_label else dy.zeros(1))
552 |             else:
553 |                 label = argmax_label
554 |                 label_loss = label_scores[argmax_label_index]
555 | 
556 |             if right - left == 1:
557 |                 tag, word = sentence[left]
558 |                 tree = trees.LeafParseNode(left, tag, word)
559 |                 if label:
560 |                     tree = trees.InternalParseNode(label, [tree])
561 |                 return [tree], label_loss
562 | 
563 |             left_encodings = []
564 |             right_encodings = []
565 |             for split in range(left + 1, right):
566 |                 left_encodings.append(get_span_encoding(left, split))
567 |                 right_encodings.append(get_span_encoding(split, right))
568 |             left_scores = self.f_split(dy.concatenate_to_batch(left_encodings))
569 |             right_scores = self.f_split(dy.concatenate_to_batch(right_encodings))
570 |             split_scores = left_scores + right_scores
571 |             split_scores = dy.reshape(split_scores, (len(left_encodings),))
572 | 
573 |             if is_train:
574 |                 oracle_splits = gold.oracle_splits(left, right)
575 |                 oracle_split = min(oracle_splits)
576 |                 oracle_split_index = oracle_split - (left + 1)
577 |                 split_scores = augment(split_scores, oracle_split_index)
578 | 
579 |             split_scores_np = split_scores.npvalue()
580 |             argmax_split_index = int(split_scores_np.argmax())
581 |             argmax_split = argmax_split_index + (left + 1)
582 | 
583 |             if is_train:
584 |                 split = argmax_split if explore else oracle_split
585 |                 split_loss = (
586 |                     split_scores[argmax_split_index] -
587 |                     split_scores[oracle_split_index]
588 |                     if argmax_split != oracle_split else dy.zeros(1))
589 |             else:
590 |                 split = argmax_split
591 |                 split_loss = split_scores[argmax_split_index]
592 | 
593 |             left_trees, left_loss = helper(left, split)
594 |             right_trees, right_loss = helper(split, right)
595 | 
596 |             children = left_trees + right_trees
597 |             if label:
598 |                 children = [trees.InternalParseNode(label, children)]
599 | 
600 |             return children, label_loss + split_loss + left_loss + right_loss
601 | 
602 |         children, loss = helper(0, len(sentence))
603 |         assert len(children) == 1
604 |         tree = children[0]
605 |         if is_train and not explore:
606 |             assert gold.convert().linearize() == tree.convert().linearize()
607 |         return tree, loss
608 | 
609 | class ChartParser(ParserBase):
610 |     def __init__(
611 |             self,
612 |             model,
613 |             label_hidden_dim,
614 |             span_representation_args
615 |     ):
616 |         super().__init__(model, *span_representation_args)
617 | 
618 |         self.spec = {'label_hidden_dim':label_hidden_dim, 'span_representation_args':span_representation_args}
619 | 
620 |         self.f_label = Feedforward(
621 |             self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size - 1)
622 | 
623 |     def parse(self, sentence, gold=None):
624 |         is_train = gold is not None
625 | 
626 |         get_span_encoding = self.get_representation_function(sentence, is_train)
627 | 
628 |         @functools.lru_cache(maxsize=None)
629 |         def get_label_scores(left, right):
630 |             non_empty_label_scores = self.f_label(get_span_encoding(left, right))
631 |             return dy.concatenate([dy.zeros(1), non_empty_label_scores])
632 | 
633 |         def helper(force_gold):
634 |             if force_gold:
635 |                 assert is_train
636 | 
637 |             chart = {}
638 | 
639 |             for length in range(1, len(sentence) + 1):
640 |                 for left in range(0, len(sentence) + 1 - length):
641 |                     right = left + length
642 | 
643 |                     label_scores_expr = get_label_scores(left, right)
644 |                     label_scores_np = label_scores_expr.npvalue()
645 | 
646 |                     if is_train:
647 |                         oracle_label = gold.oracle_label(left, right)
648 |                         oracle_label_index = self.label_vocab.index(oracle_label)
649 | 
650 |                     if force_gold:
651 |                         label = oracle_label
652 |                         label_score_expr = label_scores_expr[oracle_label_index]
653 |                         label_score = label_scores_np[oracle_label_index]
654 |                     else:
655 |                         if is_train:
656 |                             # augment the np version, which we use to get argmax
657 |                             # the _expr versions won't have augmentation, but derivative is same
658 |                             label_scores_np += 1
659 |                             label_scores_np[oracle_label_index] -= 1
660 |                         argmax_label_index = int(
661 |                             label_scores_np.argmax() if length < len(sentence) else
662 |                             label_scores_np[1:].argmax() + 1)
663 |                         argmax_label = self.label_vocab.value(argmax_label_index)
664 |                         label = argmax_label
665 |                         label_score_expr = label_scores_expr[argmax_label_index]
666 |                         label_score = label_scores_np[argmax_label_index]
667 | 
668 |                     if length == 1:
669 |                         tag, word = sentence[left]
670 |                         tree = trees.LeafParseNode(left, tag, word)
671 |                         if label:
672 |                             tree = trees.InternalParseNode(label, [tree])
673 |                         chart[left, right] = [tree], label_score, label_score_expr
674 |                         continue
675 | 
676 |                     if force_gold:
677 |                         oracle_splits = gold.oracle_splits(left, right)
678 |                         oracle_split = min(oracle_splits)
679 |                         best_split = oracle_split
680 |                     else:
681 |                         best_split = max(
682 |                             range(left + 1, right),
683 |                             key=lambda split:
684 |                                 chart[left, split][1] +
685 |                                 chart[split, right][1])
686 | 
687 |                     left_trees, left_score, left_score_expr = chart[left, best_split]
688 |                     right_trees, right_score, right_score_expr = chart[best_split, right]
689 | 
690 |                     children = left_trees + right_trees
691 |                     if label:
692 |                         children = [trees.InternalParseNode(label, children)]
693 | 
694 |                     chart[left, right] = (children, label_score + left_score + right_score,
695 |                         label_score_expr + left_score_expr + right_score_expr)
696 | 
697 |             children, score, score_expr = chart[0, len(sentence)]
698 |             assert len(children) == 1
699 |             return children[0], score, score_expr
700 | 
701 |         tree, score, score_expr = helper(False)
702 |         if is_train:
703 |             oracle_tree, oracle_score, oracle_score_expr = helper(True)
704 |             assert oracle_tree.convert().linearize() == gold.convert().linearize()
705 |             correct = tree.convert().linearize() == gold.convert().linearize()
706 |             loss_expr = dy.zeros(1) if correct else score_expr - oracle_score_expr
707 |             loss = 0 if correct else score - oracle_score
708 |             augmentation = loss - loss_expr.value()
709 |             return tree, loss_expr + augmentation
710 |         else:
711 |             return tree, score_expr
712 | 
713 | class IndependentParser(ParserBase):
714 |     def __init__(
715 |             self,
716 |             model,
717 |             label_hidden_dim,
718 |             span_representation_args
719 |     ):
720 |         super().__init__(model, *span_representation_args)
721 | 
722 |         self.spec = {'label_hidden_dim':label_hidden_dim, 'span_representation_args':span_representation_args}
723 | 
724 |         self.f_label = Feedforward(
725 |             self.trainable_parameters, self.span_representation_dimension, [label_hidden_dim], self.label_vocab.size - 1)
726 | 
727 |     def parse(self, sentence, gold=None):
728 |         is_train = gold is not None
729 | 
730 |         get_span_encoding = self.get_representation_function(sentence, is_train)
731 | 
732 |         @functools.lru_cache(maxsize=None)
733 |         def get_label_scores(left, right):
734 |             non_empty_label_scores = self.f_label(get_span_encoding(left, right))
735 |             return dy.concatenate([dy.zeros(1), non_empty_label_scores])
736 | 
737 |         brackets = trees.SpanList(sentence)
738 |         total_loss = dy.zeros(1)
739 |         for length in range(1, len(sentence) + 1):
740 |             for left in range(0, len(sentence) + 1 - length):
741 |                 right = left + length
742 | 
743 |                 label_scores_expr = get_label_scores(left, right)
744 |                 label_scores_np = label_scores_expr.npvalue()
745 | 
746 |                 if is_train:
747 |                     oracle_label = gold.oracle_label(left, right)
748 |                     oracle_label_index = self.label_vocab.index(oracle_label)
749 |                     oracle_label_score_expr = label_scores_expr[oracle_label_index]
750 | 
751 |                     # augment the np version, which we use to get argmax
752 |                     # the _expr versions won't have augmentation, but derivative is same
753 |                     label_scores_np += 1
754 |                     label_scores_np[oracle_label_index] -= 1
755 | 
756 |                 argmax_label_index = int(
757 |                     label_scores_np.argmax() if length < len(sentence) else
758 |                     label_scores_np[1:].argmax() + 1)
759 |                 argmax_label = self.label_vocab.value(argmax_label_index)
760 |                 label = argmax_label
761 |                 label_score_expr = label_scores_expr[argmax_label_index]
762 |                 label_score = label_scores_np[argmax_label_index]
763 |                 for sublabel in label: # note that no_label is just an empty tuple
764 |                     brackets.add(left, right, sublabel)
765 | 
766 |                 if is_train and argmax_label != oracle_label:
767 |                     total_loss = total_loss + label_score_expr - oracle_label_score_expr
768 | 
769 |         return brackets, total_loss
770 | 
771 | class LabelPrediction(ParserBase):
772 |     def __init__(
773 |             self,
774 |             model,
775 |             parser,
776 |             label_hidden_dim,
777 |     ):
778 | 
779 |         self.parser = parser
780 |         self.label_hidden_dim = label_hidden_dim
781 |         self.f_label = Feedforward(
782 |             model, parser.span_representation_dimension, [label_hidden_dim], parser.label_vocab.size)
783 | 
784 |     def predict_parent_label_for_spans(self, sentence, gold, self_not_parent=False):
785 |         span_encoding = self.parser.get_representation_function(sentence, is_train=False)
786 | 
787 |         correct = 0
788 |         total = 0
789 |         total_loss = dy.zeros(1)
790 |         def accumulate(left, right, target_label_index):
791 |             nonlocal correct, total, total_loss
792 |             label_scores = self.f_label(span_encoding(left, right))
793 | 
794 |             # predicted label
795 |             label_scores_np = label_scores.npvalue()
796 |             argmax_label_index = int(label_scores_np.argmax())
797 |             if argmax_label_index == target_label_index:
798 |                 correct += 1
799 |             total += 1
800 | 
801 |             # loss for training
802 |             augmented_label_scores = augment(label_scores, target_label_index)
803 |             augmented_argmax_label_index = int(augmented_label_scores.npvalue().argmax())
804 |             label_loss = (
805 |                 label_scores[augmented_argmax_label_index] -
806 |                 label_scores[target_label_index]
807 |                 if augmented_argmax_label_index != target_label_index else dy.zeros(1))
808 |             total_loss = total_loss + label_loss
809 | 
810 |         for node, parent in gold.iterate_spans_with_parents(): # doesn't include top level
811 |             label = node.label if self_not_parent else parent.label
812 |             label_index = self.parser.label_vocab.index(label)
813 |             accumulate(node.left, node.right, label_index)
814 |         label = gold.label if self_not_parent else () # () represents no-label, since root has no parent
815 |         label_index = self.parser.label_vocab.index(label)
816 |         accumulate(gold.left, gold.right, label_index)
817 | 
818 |         return total_loss, correct, total
819 | 


--------------------------------------------------------------------------------
/src/trees.py:
--------------------------------------------------------------------------------
  1 | import collections.abc
  2 | 
  3 | class TreebankNode(object):
  4 |     pass
  5 | 
  6 | class InternalTreebankNode(TreebankNode):
  7 |     def __init__(self, label, children):
  8 |         assert isinstance(label, str)
  9 |         self.label = label
 10 | 
 11 |         assert isinstance(children, collections.abc.Sequence)
 12 |         assert all(isinstance(child, TreebankNode) for child in children)
 13 |         assert children
 14 |         self.children = tuple(children)
 15 | 
 16 |     def linearize(self):
 17 |         return "({} {})".format(
 18 |             self.label, " ".join(child.linearize() for child in self.children))
 19 | 
 20 |     def leaves(self):
 21 |         for child in self.children:
 22 |             yield from child.leaves()
 23 | 
 24 |     def convert(self, index=0):
 25 |         tree = self
 26 |         sublabels = [self.label]
 27 | 
 28 |         while len(tree.children) == 1 and isinstance(
 29 |                 tree.children[0], InternalTreebankNode):
 30 |             tree = tree.children[0]
 31 |             sublabels.append(tree.label)
 32 | 
 33 |         children = []
 34 |         for child in tree.children:
 35 |             children.append(child.convert(index=index))
 36 |             index = children[-1].right
 37 | 
 38 |         return InternalParseNode(tuple(sublabels), children)
 39 | 
 40 |     def brackets(self, advp_prt=True):
 41 |         return self._brackets(0, advp_prt)[0]
 42 | 
 43 |     def _brackets(self, start=0, advp_prt=True):
 44 |         results = []
 45 | 
 46 |         position = start
 47 |         for child in self.children:
 48 |             b, e = child._brackets(position, advp_prt)
 49 |             results.extend(b)
 50 |             position = e
 51 |         end = position
 52 | 
 53 |         label = self.label
 54 |         if label != 'TOP':
 55 |             if advp_prt and label =='PRT':
 56 |                 label = 'ADVP'
 57 |             results.append((start, end, label))
 58 | 
 59 |         return results, end
 60 | 
 61 | class LeafTreebankNode(TreebankNode):
 62 |     def __init__(self, tag, word):
 63 |         assert isinstance(tag, str)
 64 |         self.tag = tag
 65 | 
 66 |         assert isinstance(word, str)
 67 |         self.word = word
 68 | 
 69 |     def linearize(self):
 70 |         return "({} {})".format(self.tag, self.word)
 71 | 
 72 |     def leaves(self):
 73 |         yield self
 74 | 
 75 |     def convert(self, index=0):
 76 |         return LeafParseNode(index, self.tag, self.word)
 77 | 
 78 |     def _brackets(self, start=0, advp_prt=True):
 79 |         if self.tag in [",", ".", ":", "``", "''"]:
 80 |             return [], start
 81 |         else:
 82 |             return [], start+1
 83 | 
 84 | class ParseNode(object):
 85 |     pass
 86 | 
 87 | class InternalParseNode(ParseNode):
 88 |     def __init__(self, label, children):
 89 |         assert isinstance(label, tuple)
 90 |         assert all(isinstance(sublabel, str) for sublabel in label)
 91 |         assert label
 92 |         self.label = label
 93 | 
 94 |         assert isinstance(children, collections.abc.Sequence)
 95 |         assert all(isinstance(child, ParseNode) for child in children)
 96 |         assert children
 97 |         assert len(children) > 1 or isinstance(children[0], LeafParseNode)
 98 |         assert all(
 99 |             left.right == right.left
100 |             for left, right in zip(children, children[1:]))
101 |         self.children = tuple(children)
102 | 
103 |         self.left = children[0].left
104 |         self.right = children[-1].right
105 | 
106 |     def leaves(self):
107 |         for child in self.children:
108 |             yield from child.leaves()
109 | 
110 |     def convert(self):
111 |         children = [child.convert() for child in self.children]
112 |         tree = InternalTreebankNode(self.label[-1], children)
113 |         for sublabel in reversed(self.label[:-1]):
114 |             tree = InternalTreebankNode(sublabel, [tree])
115 |         return tree
116 | 
117 |     def enclosing(self, left, right):
118 |         assert self.left <= left < right <= self.right
119 |         for child in self.children:
120 |             if isinstance(child, LeafParseNode):
121 |                 continue
122 |             if child.left <= left < right <= child.right:
123 |                 return child.enclosing(left, right)
124 |         return self
125 | 
126 |     def oracle_label(self, left, right):
127 |         enclosing = self.enclosing(left, right)
128 |         if enclosing.left == left and enclosing.right == right:
129 |             return enclosing.label
130 |         return ()
131 | 
132 |     def oracle_splits(self, left, right):
133 |         return [
134 |             child.left
135 |             for child in self.enclosing(left, right).children
136 |             if left < child.left < right
137 |         ]
138 | 
139 |     def iterate_spans_with_parents(self):
140 |         for child in self.children:
141 |             if isinstance(child, InternalParseNode):
142 |                 yield child, self
143 |             yield from child.iterate_spans_with_parents()
144 | 
145 | class LeafParseNode(ParseNode):
146 |     def __init__(self, index, tag, word):
147 |         assert isinstance(index, int)
148 |         assert index >= 0
149 |         self.left = index
150 |         self.right = index + 1
151 | 
152 |         assert isinstance(tag, str)
153 |         self.tag = tag
154 | 
155 |         assert isinstance(word, str)
156 |         self.word = word
157 | 
158 |     def leaves(self):
159 |         yield self
160 | 
161 |     def convert(self):
162 |         return LeafTreebankNode(self.tag, self.word)
163 | 
164 |     def iterate_spans_with_parents(self):
165 |         return []
166 | 
167 | class SpanList(object):
168 |     def __init__(self, tagged_words):
169 |         # tagged_words is list of (tag, word) tuples
170 |         self.tagged_words = tagged_words
171 |         self.list = []
172 | 
173 |     def add(self, left, right, label):
174 |         assert isinstance(label, str)
175 |         self.list.append((left, right, label))
176 | 
177 |     def convert(self):
178 |         return self
179 | 
180 |     def brackets(self, advp_prt=True):
181 |         location_shift = []
182 |         i = 0
183 |         for tag, word in self.tagged_words:
184 |             location_shift.append(i)
185 |             if tag not in [",", ".", ":", "``", "''"]:
186 |                 i += 1
187 |         location_shift.append(i)
188 |         result = []
189 |         for left, right, label in self.list:
190 |             if label == 'TOP':
191 |                 continue
192 |             if advp_prt and label =='PRT':
193 |                 label = 'ADVP'
194 |             result.append((location_shift[left], location_shift[right], label))
195 |         return result
196 | 
197 |     def is_tree(self):
198 |         # crossing brackets
199 |         n_crossing = 0
200 |         for left1, right1, label1 in self.list:
201 |             for left2, right2, label2 in self.list:
202 |                 if left1 < left2 < right1 < right2:
203 |                     n_crossing += 1
204 |         return n_crossing == 0
205 | 
206 | def load_trees(path, strip_top=True, strip_spmrl_features=True):
207 |     with open(path) as infile:
208 |         treebank = infile.read()
209 | 
210 |     # Features bounded by `##` may contain spaces, so if we strip the features
211 |     # we need to do so prior to tokenization
212 |     if strip_spmrl_features:
213 |         treebank = "".join(treebank.split("##")[::2])
214 | 
215 |     tokens = treebank.replace("(", " ( ").replace(")", " ) ").split()
216 | 
217 |     # XXX(nikita): this should really be passed as an argument
218 |     if 'Hebrew' in path or 'Hungarian' in path or 'Arabic' in path:
219 |         strip_top = False
220 | 
221 |     def helper(index):
222 |         trees = []
223 | 
224 |         while index < len(tokens) and tokens[index] == "(":
225 |             paren_count = 0
226 |             while tokens[index] == "(":
227 |                 index += 1
228 |                 paren_count += 1
229 | 
230 |             label = tokens[index]
231 |             index += 1
232 | 
233 |             if tokens[index] == "(":
234 |                 children, index = helper(index)
235 |                 trees.append(InternalTreebankNode(label, children))
236 |             else:
237 |                 word = tokens[index]
238 |                 index += 1
239 |                 trees.append(LeafTreebankNode(label, word))
240 | 
241 |             while paren_count > 0:
242 |                 assert tokens[index] == ")"
243 |                 index += 1
244 |                 paren_count -= 1
245 | 
246 |         return trees, index
247 | 
248 |     trees, index = helper(0)
249 |     assert index == len(tokens)
250 | 
251 |     # XXX(nikita): this behavior should really be controlled by an argument
252 |     if 'German' in path:
253 |         # Utterances where the root is a terminal symbol break our parser's
254 |         # assumptions, so insert a dummy root node.
255 |         for i, tree in enumerate(trees):
256 |             if isinstance(tree, LeafTreebankNode):
257 |                 trees[i] = InternalTreebankNode("VROOT", [tree])
258 | 
259 |     if strip_top:
260 |         for i, tree in enumerate(trees):
261 |             if tree.label in ("TOP", "ROOT"):
262 |                 assert len(tree.children) == 1
263 |                 trees[i] = tree.children[0]
264 | 
265 |     return trees
266 | 


--------------------------------------------------------------------------------
/src/vocabulary.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | class Vocabulary(object):
 4 |     def __init__(self):
 5 |         self.frozen = False
 6 |         self.values = []
 7 |         self.indices = {}
 8 |         self.counts = collections.defaultdict(int)
 9 | 
10 |     @property
11 |     def size(self):
12 |         return len(self.values)
13 | 
14 |     def value(self, index):
15 |         assert 0 <= index < len(self.values)
16 |         return self.values[index]
17 | 
18 |     def index(self, value):
19 |         if not self.frozen:
20 |             self.counts[value] += 1
21 | 
22 |         if value in self.indices:
23 |             return self.indices[value]
24 | 
25 |         elif not self.frozen:
26 |             self.values.append(value)
27 |             self.indices[value] = len(self.values) - 1
28 |             return self.indices[value]
29 | 
30 |         else:
31 |             raise ValueError("Unknown value: {}".format(value))
32 | 
33 |     def index_or_unk(self, value, unk_value):
34 |         assert self.frozen
35 |         if value in self.indices:
36 |             return self.indices[value]
37 |         else:
38 |             return self.indices[unk_value]
39 | 
40 |     def count(self, value):
41 |         return self.counts[value]
42 | 
43 |     def freeze(self):
44 |         self.frozen = True
45 | 


--------------------------------------------------------------------------------