├── .gitignore
├── LICENSE
├── README.md
├── bmstparser
    └── src
    │   ├── decoder.py
    │   ├── mstlstm.py
    │   ├── parser.py
    │   ├── utils.py
    │   └── utils
    │       ├── eval.pl
    │       └── evaluation_script
    │           ├── conll17_ud_eval.py
    │           └── weights.clas
└── corpus
    ├── en-ud-dev.conllu
    ├── en-ud-test.conllu
    ├── en-ud-train.conllu
    ├── zh-ud-dev.conllu
    ├── zh-ud-test.conllu
    └── zh-ud-train.conllu


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # file for test
  7 | test.py
  8 | 
  9 | #editorial solution source code
 10 | editorial/*.md
 11 | 
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | ## model
109 | model/
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A Pytorch implementation of the BIST Parsers (for graph based parser only)
 2 | This implement is a simplified version which removes some unnecessary flag and applies `nn Module` in Pytorch to construct LSTM network instead of `LSTMCell`. Besides, some more tags are supported and you can refer it from option list.
 3 | The techniques behind the parser are described in the paper [Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations](https://www.transacl.org/ojs/index.php/tacl/article/viewFile/885/198).
 4 | 
 5 | #### Required software
 6 | 
 7 |  * Python 3.x interpreter
 8 |  * [Pytorch library](http://pytorch.org/)
 9 | 
10 | 
11 | #### Data format:
12 | The software requires having a `training.conll` and `development.conll` files formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat), or a `training.conllu` and `development.conllu` files formatted according to the [CoNLLU data format](http://universaldependencies.org/format.html).
13 | 
14 | #### Train a parsing model
15 | 
16 |     python src/parser.py --outdir [results directory] --train training.conll --dev development.conll --epochs 30 --lstmdims 125 --lstmlayers 2 [--extrn extrn.vectors]
17 | 
18 | #### Parse data with your parsing model
19 | 
20 | The command for parsing a `test.conll` file formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat) with a previously trained model is:
21 | 
22 |     python src/parser.py --predict --outdir [results directory] --test test.conll [--extrn extrn.vectors] --model [trained model file] --params [param file generate during training]
23 | 
24 | The parser will store the resulting conll file in the out directory (`--outdir`).
25 | 
26 | #### Some instructions
27 | 
28 | 1. The multiple roots checking of the evaluation script is turned off (See [here](https://github.com/wddabc/bist-parser/blob/pytorch/bmstparser/src/utils/evaluation_script/conll17_ud_eval.py#L168-L172)) as it might generate trees with multiple roots. (See the discussion [here](https://github.com/elikip/bist-parser/issues/10)) 
29 | 2. This version delete some unnecessary flag and set the bi-LSTM to be mandatory(2 bi-LSTM layer)
30 | 3. You can refer forward attribute in mst-parser model for dropout rate of different components.
31 | 4. Anything you think can improve performance please contact and discuss with me.
32 | 


--------------------------------------------------------------------------------
/bmstparser/src/decoder.py:
--------------------------------------------------------------------------------
  1 | # This file contains routines from Lisbon Machine Learning summer school.
  2 | # The code is freely distributed under a MIT license. https://github.com/LxMLS/lxmls-toolkit/
  3 | 
  4 | import numpy as np
  5 | 
  6 | def parse_proj(scores, gold=None):
  7 |     '''
  8 |     Parse using Eisner's algorithm.
  9 |     '''
 10 |     nr, nc = np.shape(scores)
 11 |     if nr != nc:
 12 |         raise ValueError("scores must be a squared matrix with nw+1 rows")
 13 | 
 14 |     N = nr - 1  # Number of words (excluding root).
 15 | 
 16 |     # Initialize CKY table.
 17 |     complete = np.zeros([N + 1, N + 1, 2])  # s, t, direction (right=1).
 18 |     incomplete = np.zeros([N + 1, N + 1, 2])  # s, t, direction (right=1).
 19 |     complete_backtrack = -np.ones([N + 1, N + 1, 2], dtype=int)  # s, t, direction (right=1).
 20 |     incomplete_backtrack = -np.ones([N + 1, N + 1, 2], dtype=int)  # s, t, direction (right=1).
 21 | 
 22 |     incomplete[0, :, 0] -= np.inf
 23 | 
 24 |     # Loop from smaller items to larger items.
 25 |     for k in range(1, N + 1):
 26 |         for s in range(N - k + 1):
 27 |             t = s + k
 28 | 
 29 |             # First, create incomplete items.
 30 |             # left tree
 31 |             incomplete_vals0 = complete[s, s:t, 1] + complete[(s + 1):(t + 1), t, 0] + scores[t, s] + (
 32 |             0.0 if gold is not None and gold[s] == t else 1.0)
 33 |             incomplete[s, t, 0] = np.max(incomplete_vals0)
 34 |             incomplete_backtrack[s, t, 0] = s + np.argmax(incomplete_vals0)
 35 |             # right tree
 36 |             incomplete_vals1 = complete[s, s:t, 1] + complete[(s + 1):(t + 1), t, 0] + scores[s, t] + (
 37 |             0.0 if gold is not None and gold[t] == s else 1.0)
 38 |             incomplete[s, t, 1] = np.max(incomplete_vals1)
 39 |             incomplete_backtrack[s, t, 1] = s + np.argmax(incomplete_vals1)
 40 | 
 41 |             # Second, create complete items.
 42 |             # left tree
 43 |             complete_vals0 = complete[s, s:t, 0] + incomplete[s:t, t, 0]
 44 |             complete[s, t, 0] = np.max(complete_vals0)
 45 |             complete_backtrack[s, t, 0] = s + np.argmax(complete_vals0)
 46 |             # right tree
 47 |             complete_vals1 = incomplete[s, (s + 1):(t + 1), 1] + complete[(s + 1):(t + 1), t, 1]
 48 |             complete[s, t, 1] = np.max(complete_vals1)
 49 |             complete_backtrack[s, t, 1] = s + 1 + np.argmax(complete_vals1)
 50 | 
 51 |     value = complete[0][N][1]
 52 |     heads = [-1 for _ in range(N + 1)]  # -np.ones(N+1, dtype=int)
 53 |     backtrack_eisner(incomplete_backtrack, complete_backtrack, 0, N, 1, 1, heads)
 54 | 
 55 |     value_proj = 0.0
 56 |     for m in range(1, N + 1):
 57 |         h = heads[m]
 58 |         value_proj += scores[h, m]
 59 | 
 60 |     return heads
 61 | 
 62 | 
 63 | def backtrack_eisner(incomplete_backtrack, complete_backtrack, s, t, direction, complete, heads):
 64 |     '''
 65 |     Backtracking step in Eisner's algorithm.
 66 |     - incomplete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position,
 67 |     an end position, and a direction flag (0 means left, 1 means right). This array contains
 68 |     the arg-maxes of each step in the Eisner algorithm when building *incomplete* spans.
 69 |     - complete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position,
 70 |     an end position, and a direction flag (0 means left, 1 means right). This array contains
 71 |     the arg-maxes of each step in the Eisner algorithm when building *complete* spans.
 72 |     - s is the current start of the span
 73 |     - t is the current end of the span
 74 |     - direction is 0 (left attachment) or 1 (right attachment)
 75 |     - complete is 1 if the current span is complete, and 0 otherwise
 76 |     - heads is a (NW+1)-sized numpy array of integers which is a placeholder for storing the
 77 |     head of each word.
 78 |     '''
 79 |     if s == t:
 80 |         return
 81 |     if complete:
 82 |         r = complete_backtrack[s][t][direction]
 83 |         if direction == 0:
 84 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 0, 1, heads)
 85 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 0, 0, heads)
 86 |             return
 87 |         else:
 88 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 0, heads)
 89 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 1, 1, heads)
 90 |             return
 91 |     else:
 92 |         r = incomplete_backtrack[s][t][direction]
 93 |         if direction == 0:
 94 |             heads[s] = t
 95 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads)
 96 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, r + 1, t, 0, 1, heads)
 97 |             return
 98 |         else:
 99 |             heads[t] = s
100 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads)
101 |             backtrack_eisner(incomplete_backtrack, complete_backtrack, r + 1, t, 0, 1, heads)
102 |             return
103 | 


--------------------------------------------------------------------------------
/bmstparser/src/mstlstm.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.init import *
  5 | from torch import optim
  6 | from utils import read_conll
  7 | from operator import itemgetter
  8 | import utils
  9 | import time
 10 | import random
 11 | import decoder
 12 | import numpy as np
 13 | import torch.autograd as autograd
 14 | import os
 15 | 
 16 | use_gpu = True if torch.cuda.is_available() else False
 17 | 
 18 | 
 19 | def get_data(variable):
 20 |     if use_gpu:
 21 |         return variable.data.cpu()
 22 |     else:
 23 |         return variable.data
 24 | 
 25 | 
 26 | def Variable(inner):
 27 |     return torch.autograd.Variable(inner.cuda() if use_gpu else inner)
 28 | 
 29 | 
 30 | def Parameter(shape=None, init=xavier_uniform):
 31 |     if hasattr(init, 'shape'):
 32 |         assert not shape
 33 |         return nn.Parameter(torch.Tensor(init))
 34 |     shape = (1, shape) if type(shape) == int else shape
 35 |     return nn.Parameter(init(torch.Tensor(*shape)))
 36 | 
 37 | 
 38 | def scalar(f):
 39 |     if type(f) == int:
 40 |         return Variable(torch.LongTensor([f]))
 41 |     if type(f) == float:
 42 |         return Variable(torch.FloatTensor([f]))
 43 | 
 44 | 
 45 | def cat(l, dimension=-1):
 46 |     valid_l = [x for x in l if x is not None]
 47 |     if dimension < 0:
 48 |         dimension += len(valid_l[0].size())
 49 |     return torch.cat(valid_l, dimension)
 50 | 
 51 | 
 52 | class MSTParserLSTMModel(nn.Module):
 53 |     def __init__(self, vocab, pos, rels, enum_word, options, onto, cpos):
 54 |         super(MSTParserLSTMModel, self).__init__()
 55 |         random.seed(1)
 56 |         self.activations = {'tanh': F.tanh,
 57 |                             'sigmoid': F.sigmoid, 'relu': F.relu}
 58 |         self.activation = self.activations[options.activation]
 59 | 
 60 |         self.ldims = options.lstm_dims
 61 |         self.wdims = options.wembedding_dims
 62 |         self.pdims = options.pembedding_dims
 63 |         self.rdims = options.rembedding_dims
 64 |         self.odims = options.oembedding_dims
 65 |         self.cdims = options.cembedding_dims
 66 |         self.layers = options.lstm_layers
 67 |         self.wordsCount = vocab
 68 |         self.vocab = {word: ind + 3 for word, ind in enum_word.items()}
 69 |         self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
 70 |         self.onto = {word: ind + 3 for ind, word in enumerate(onto)}
 71 |         self.cpos = {word: ind + 3 for ind, word in enumerate(cpos)}
 72 |         self.rels = {word: ind for ind, word in enumerate(rels)}
 73 |         self.rel_list = rels
 74 |         self.hidden_units = options.hidden_units
 75 |         self.hidden2_units = options.hidden2_units
 76 | 
 77 |         self.vocab['*PAD*'] = 1
 78 |         self.pos['*PAD*'] = 1
 79 |         self.onto['*PAD*'] = 1
 80 |         self.cpos['*PAD*'] = 1
 81 |         self.vocab['*INITIAL*'] = 2
 82 |         self.pos['*INITIAL*'] = 2
 83 |         self.onto['*INITIAL*'] = 2
 84 |         self.cpos['*INITIAL*'] = 2
 85 | 
 86 |         self.external_embedding, self.edim = None, 0
 87 | 
 88 |         if options.external_embedding is not None:
 89 |             external_embedding_fp = open(options.external_embedding, 'r')
 90 |             external_embedding_fp.readline()
 91 |             self.external_embedding = {line.split(' ')[0]: [float(f) for f in line.strip().split(' ')[1:]] for line in
 92 |                                        external_embedding_fp}
 93 |             external_embedding_fp.close()
 94 |             self.edim = len(list(self.external_embedding.values())[0])
 95 |             self.extrnd = {word: i + 3 for i,
 96 |                            word in enumerate(self.external_embedding)}
 97 |             np_emb = np.zeros((len(self.external_embedding) + 3, self.edim))
 98 |             for word, i in self.extrnd.items():
 99 |                 np_emb[i] = self.external_embedding[word]
100 |             self.elookup = nn.Embedding(*np_emb.shape)
101 |             self.elookup.weight = Parameter(init=np_emb)
102 |             self.extrnd['*PAD*'] = 1
103 |             self.extrnd['*INITIAL*'] = 2
104 |             print('Load external embedding. Vector dimensions', self.edim)
105 | 
106 |         # prepare LSTM
107 |         self.lstm_for_1 = nn.LSTM(
108 |             self.wdims + self.pdims + self.edim + self.odims + self.cdims, self.ldims)
109 |         self.lstm_back_1 = nn.LSTM(
110 |             self.wdims + self.pdims + self.edim + self.odims + self.cdims, self.ldims)
111 |         self.lstm_for_2 = nn.LSTM(self.ldims * 2, self.ldims)
112 |         self.lstm_back_2 = nn.LSTM(self.ldims * 2, self.ldims)
113 |         self.hid_for_1, self.hid_back_1, self.hid_for_2, self.hid_back_2 = [
114 |             self.init_hidden(self.ldims) for _ in range(4)]
115 | 
116 |         self.wlookup = nn.Embedding(len(vocab) + 3, self.wdims)
117 |         self.plookup = nn.Embedding(len(pos) + 3, self.pdims)
118 |         self.rlookup = nn.Embedding(len(rels), self.rdims)
119 |         self.olookup = nn.Embedding(len(onto) + 3, self.odims)
120 |         self.clookup = nn.Embedding(len(cpos) + 3, self.cdims)
121 | 
122 |         self.hidLayerFOH = Parameter((self.ldims * 2, self.hidden_units))
123 |         self.hidLayerFOM = Parameter((self.ldims * 2, self.hidden_units))
124 |         self.hidBias = Parameter((self.hidden_units))
125 |         self.catBias = Parameter((self.hidden_units * 2))
126 |         self.rhidLayerFOH = Parameter((2 * self.ldims, self.hidden_units))
127 |         self.rhidLayerFOM = Parameter((2 * self.ldims, self.hidden_units))
128 |         self.rhidBias = Parameter((self.hidden_units))
129 |         self.rcatBias = Parameter((self.hidden_units * 2))
130 |         #
131 |         if self.hidden2_units:
132 |             self.hid2Layer = Parameter(
133 |                 (self.hidden_units * 2, self.hidden2_units))
134 |             self.hid2Bias = Parameter((self.hidden2_units))
135 |             self.rhid2Layer = Parameter(
136 |                 (self.hidden_units * 2, self.hidden2_units))
137 |             self.rhid2Bias = Parameter((self.hidden2_units))
138 | 
139 |         self.outLayer = Parameter(
140 |             (self.hidden2_units if self.hidden2_units > 0 else self.hidden_units, 1))
141 |         self.outBias = 0  # Parameter(1)
142 |         self.routLayer = Parameter(
143 |             (self.hidden2_units if self.hidden2_units > 0 else self.hidden_units, len(self.rel_list)))
144 |         self.routBias = Parameter((len(self.rel_list)))
145 | 
146 |     def init_hidden(self, dim):
147 |         return (autograd.Variable(torch.zeros(1, 1, dim).cuda() if use_gpu else torch.zeros(1, 1, dim)),
148 |                 autograd.Variable(torch.zeros(1, 1, dim).cuda() if use_gpu else torch.zeros(1, 1, dim)))
149 | 
150 |     def __getExpr(self, sentence, i, j, train):
151 | 
152 |         if sentence[i].headfov is None:
153 |             sentence[i].headfov = torch.mm(cat([sentence[i].lstms[0], sentence[i].lstms[1]]),
154 |                                            self.hidLayerFOH)
155 | 
156 |         if sentence[j].modfov is None:
157 |             sentence[j].modfov = torch.mm(cat([sentence[j].lstms[0], sentence[j].lstms[1]]),
158 |                                           self.hidLayerFOM)
159 | 
160 |         if self.hidden2_units > 0:
161 |             output = torch.mm(
162 |                 self.activation(
163 |                     self.hid2Bias +
164 |                     torch.mm(self.activation(cat([sentence[i].headfov, sentence[j].modfov]) + self.catBias),
165 |                              self.hid2Layer)
166 |                 ),
167 |                 self.outLayer
168 |             ) + self.outBias
169 | 
170 |         else:
171 |             output = torch.mm(
172 |                 self.activation(
173 |                     sentence[i].headfov + sentence[j].modfov + self.hidBias),
174 |                 self.outLayer) + self.outBias
175 |         return output
176 | 
177 |     def __evaluate(self, sentence, train):
178 |         exprs = [[self.__getExpr(sentence, i, j, train)
179 |                   for j in range(len(sentence))]
180 |                  for i in range(len(sentence))]
181 |         scores = np.array([[get_data(output).numpy()[0, 0]
182 |                             for output in exprsRow] for exprsRow in exprs])
183 |         return scores, exprs
184 | 
185 |     def __evaluateLabel(self, sentence, i, j):
186 |         if sentence[i].rheadfov is None:
187 |             sentence[i].rheadfov = torch.mm(cat([sentence[i].lstms[0], sentence[i].lstms[1]]),
188 |                                             self.rhidLayerFOH)
189 | 
190 |         if sentence[j].rmodfov is None:
191 |             sentence[j].rmodfov = torch.mm(cat([sentence[j].lstms[0], sentence[j].lstms[1]]),
192 |                                            self.rhidLayerFOM)
193 | 
194 |         if self.hidden2_units > 0:
195 |             output = torch.mm(
196 |                 self.activation(
197 |                     self.rhid2Bias +
198 |                     torch.mm(
199 |                         self.activation(
200 |                             cat([sentence[i].rheadfov, sentence[j].rmodfov]) + self.rcatBias),
201 |                         self.rhid2Layer
202 |                     )),
203 |                 self.routLayer
204 |             ) + self.routBias
205 | 
206 |         else:
207 |             output = torch.mm(
208 |                 self.activation(sentence[i].rheadfov +
209 |                                 sentence[j].rmodfov + self.rhidBias),
210 |                 self.routLayer
211 |             ) + self.routBias
212 | 
213 |         return get_data(output).numpy()[0], output[0]
214 | 
215 |     def predict(self, sentence):
216 |         for entry in sentence:
217 |             wordvec = self.wlookup(
218 |                 scalar(int(self.vocab.get(entry.norm, 0)))) if self.wdims > 0 else None
219 |             posvec = self.plookup(
220 |                 scalar(int(self.pos[entry.pos]))) if self.pdims > 0 else None
221 |             ontovec = self.olookup(
222 |                 scalar(int(self.onto[entry.onto]))) if self.odims > 0 else None
223 |             cposvec = self.clookup(
224 |                 scalar(int(self.cpos[entry.cpos]))) if self.cdims > 0 else None
225 |             evec = self.elookup(scalar(int(self.extrnd.get(entry.form,
226 |                                                            self.extrnd.get(entry.norm, 0))))) if self.external_embedding is not None else None
227 |             entry.vec = cat([wordvec, posvec, ontovec, cposvec, evec])
228 | 
229 |             entry.lstms = [entry.vec, entry.vec]
230 |             entry.headfov = None
231 |             entry.modfov = None
232 | 
233 |             entry.rheadfov = None
234 |             entry.rmodfov = None
235 | 
236 |         num_vec = len(sentence)
237 |         vec_for = torch.cat(
238 |             [entry.vec for entry in sentence]).view(num_vec, 1, -1)
239 |         vec_back = torch.cat(
240 |             [entry.vec for entry in reversed(sentence)]).view(num_vec, 1, -1)
241 |         res_for_1, self.hid_for_1 = self.lstm_for_1(vec_for, self.hid_for_1)
242 |         res_back_1, self.hid_back_1 = self.lstm_back_1(
243 |             vec_back, self.hid_back_1)
244 | 
245 |         vec_cat = [cat([res_for_1[i], res_back_1[num_vec - i - 1]])
246 |                    for i in range(num_vec)]
247 | 
248 |         vec_for_2 = torch.cat(vec_cat).view(num_vec, 1, -1)
249 |         vec_back_2 = torch.cat(list(reversed(vec_cat))).view(num_vec, 1, -1)
250 |         res_for_2, self.hid_for_2 = self.lstm_for_2(vec_for_2, self.hid_for_2)
251 |         res_back_2, self.hid_back_2 = self.lstm_back_2(
252 |             vec_back_2, self.hid_back_2)
253 | 
254 |         for i in range(num_vec):
255 |             sentence[i].lstms[0] = res_for_2[i]
256 |             sentence[i].lstms[1] = res_back_2[num_vec - i - 1]
257 | 
258 |         scores, exprs = self.__evaluate(sentence, True)
259 |         heads = decoder.parse_proj(scores)
260 | 
261 |         for entry, head in zip(sentence, heads):
262 |             entry.pred_parent_id = head
263 |             entry.pred_relation = '_'
264 | 
265 |         head_list = list(heads)
266 |         for modifier, head in enumerate(head_list[1:]):
267 |             scores, exprs = self.__evaluateLabel(
268 |                 sentence, head, modifier + 1)
269 |             sentence[modifier + 1].pred_relation = self.rel_list[max(
270 |                 enumerate(scores), key=itemgetter(1))[0]]
271 | 
272 |     def forward(self, sentence, errs, lerrs):
273 | 
274 |         for entry in sentence:
275 |             c = float(self.wordsCount.get(entry.norm, 0))
276 |             # dropFlag = (random.random() < (c / (0.33 + c)))
277 |             dropFlag = (random.random() < (c / (0.25 + c)))
278 |             wordvec = self.wlookup(scalar(
279 |                 int(self.vocab.get(entry.norm, 0)) if dropFlag else 0)) if self.wdims > 0 else None
280 |             ontovec = self.olookup(scalar(int(self.onto[entry.onto]) if random.random(
281 |             ) < 0.9 else 0)) if self.odims > 0 else None
282 |             cposvec = self.clookup(scalar(int(self.cpos[entry.cpos]) if random.random(
283 |             ) < 0.9 else 0)) if self.cdims > 0 else None
284 |             posvec = self.plookup(
285 |                 scalar(int(self.pos[entry.pos]))) if self.pdims > 0 else None
286 |             # posvec = self.plookup(
287 |             #     scalar(0 if dropFlag and random.random() < 0.1 else int(self.pos[entry.pos]))) if self.pdims > 0 else None
288 |             evec = None
289 |             if self.external_embedding is not None:
290 |                 evec = self.elookup(scalar(self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)) if (
291 |                     dropFlag or (random.random() < 0.5)) else 0))
292 | 
293 |             entry.vec = cat([wordvec, posvec, ontovec, cposvec, evec])
294 |             entry.lstms = [entry.vec, entry.vec]
295 |             entry.headfov = None
296 |             entry.modfov = None
297 | 
298 |             entry.rheadfov = None
299 |             entry.rmodfov = None
300 | 
301 |         num_vec = len(sentence)
302 |         vec_for = torch.cat(
303 |             [entry.vec for entry in sentence]).view(num_vec, 1, -1)
304 |         vec_back = torch.cat(
305 |             [entry.vec for entry in reversed(sentence)]).view(num_vec, 1, -1)
306 |         res_for_1, self.hid_for_1 = self.lstm_for_1(vec_for, self.hid_for_1)
307 |         res_back_1, self.hid_back_1 = self.lstm_back_1(
308 |             vec_back, self.hid_back_1)
309 | 
310 |         vec_cat = [cat([res_for_1[i], res_back_1[num_vec - i - 1]])
311 |                    for i in range(num_vec)]
312 | 
313 |         vec_for_2 = torch.cat(vec_cat).view(num_vec, 1, -1)
314 |         vec_back_2 = torch.cat(list(reversed(vec_cat))).view(num_vec, 1, -1)
315 |         res_for_2, self.hid_for_2 = self.lstm_for_2(vec_for_2, self.hid_for_2)
316 |         res_back_2, self.hid_back_2 = self.lstm_back_2(
317 |             vec_back_2, self.hid_back_2)
318 | 
319 |         for i in range(num_vec):
320 |             sentence[i].lstms[0] = res_for_2[i]
321 |             sentence[i].lstms[1] = res_back_2[num_vec - i - 1]
322 | 
323 |         scores, exprs = self.__evaluate(sentence, True)
324 |         gold = [entry.parent_id for entry in sentence]
325 |         heads = decoder.parse_proj(scores, gold)
326 | 
327 |         for modifier, head in enumerate(gold[1:]):
328 |             rscores, rexprs = self.__evaluateLabel(
329 |                 sentence, head, modifier + 1)
330 |             goldLabelInd = self.rels[sentence[modifier + 1].relation]
331 |             wrongLabelInd = \
332 |                 max(((l, scr) for l, scr in enumerate(rscores)
333 |                      if l != goldLabelInd), key=itemgetter(1))[0]
334 |             if rscores[goldLabelInd] < rscores[wrongLabelInd] + 1:
335 |                 lerrs += [rexprs[wrongLabelInd] - rexprs[goldLabelInd]]
336 | 
337 |         e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
338 |         if e > 0:
339 |             errs += [(exprs[h][i] - exprs[g][i])[0]
340 |                      for i, (h, g) in enumerate(zip(heads, gold)) if h != g]
341 |         return e
342 | 
343 | 
344 | def get_optim(opt, parameters):
345 |     if opt.optim == 'sgd':
346 |         return optim.SGD(parameters, lr=opt.learning_rate)
347 |     elif opt.optim == 'adam':
348 |         return optim.Adam(parameters)
349 | 
350 | 
351 | class MSTParserLSTM:
352 |     def __init__(self, vocab, pos, rels, enum_word, options, onto, cpos):
353 |         model = MSTParserLSTMModel(
354 |             vocab, pos, rels, enum_word, options, onto, cpos)
355 |         self.model = model.cuda() if use_gpu else model
356 |         self.trainer = get_optim(options, self.model.parameters())
357 | 
358 |     def predict(self, conll_path):
359 |         with open(conll_path, 'r') as conllFP:
360 |             for iSentence, sentence in enumerate(read_conll(conllFP)):
361 |                 self.model.hid_for_1, self.model.hid_back_1, self.model.hid_for_2, self.model.hid_back_2 = [
362 |                     self.model.init_hidden(self.model.ldims) for _ in range(4)]
363 |                 conll_sentence = [entry for entry in sentence if isinstance(
364 |                     entry, utils.ConllEntry)]
365 |                 self.model.predict(conll_sentence)
366 |                 yield conll_sentence
367 | 
368 |     def save(self, fn):
369 |         tmp = fn + '.tmp'
370 |         torch.save(self.model.state_dict(), tmp)
371 |         shutil.move(tmp, fn)
372 | 
373 |     def load(self, fn):
374 |         self.model.load_state_dict(torch.load(fn))
375 | 
376 |     def train(self, conll_path):
377 |         print('pytorch version:', torch.__version__)
378 |         batch = 1
379 |         eloss = 0.0
380 |         mloss = 0.0
381 |         eerrors = 0
382 |         etotal = 0
383 |         iSentence = 0
384 |         start = time.time()
385 |         with open(conll_path, 'r') as conllFP:
386 |             shuffledData = list(read_conll(conllFP))
387 |             random.shuffle(shuffledData)
388 |             errs = []
389 |             lerrs = []
390 |             for iSentence, sentence in enumerate(shuffledData):
391 |                 self.model.hid_for_1, self.model.hid_back_1, self.model.hid_for_2, self.model.hid_back_2 = [
392 |                     self.model.init_hidden(self.model.ldims) for _ in range(4)]
393 |                 if iSentence % 100 == 0 and iSentence != 0:
394 |                     print('Processing sentence number:', iSentence,
395 |                           'Loss:', eloss / etotal,
396 |                           'Errors:', (float(eerrors)) / etotal,
397 |                           'Time', time.time() - start)
398 |                     start = time.time()
399 |                     eerrors = 0
400 |                     eloss = 0.0
401 |                     etotal = 0
402 | 
403 |                 conll_sentence = [entry for entry in sentence if isinstance(
404 |                     entry, utils.ConllEntry)]
405 |                 e = self.model.forward(conll_sentence, errs, lerrs)
406 |                 eerrors += e
407 |                 eloss += e
408 |                 mloss += e
409 |                 etotal += len(sentence)
410 |                 if iSentence % batch == 0 or len(errs) > 0 or len(lerrs) > 0:
411 |                     if len(errs) > 0 or len(lerrs) > 0:
412 |                         eerrs = torch.sum(cat(errs + lerrs))
413 |                         eerrs.backward()
414 |                         self.trainer.step()
415 |                         errs = []
416 |                         lerrs = []
417 |                 self.trainer.zero_grad()
418 |         if len(errs) > 0:
419 |             eerrs = (torch.sum(errs + lerrs))
420 |             eerrs.backward()
421 |             self.trainer.step()
422 |         self.trainer.zero_grad()
423 |         print("Loss: ", mloss / iSentence)
424 | 


--------------------------------------------------------------------------------
/bmstparser/src/parser.py:
--------------------------------------------------------------------------------
  1 | from optparse import OptionParser
  2 | import pickle
  3 | import utils
  4 | import mstlstm
  5 | import os
  6 | import os.path
  7 | import time
  8 | import torch
  9 | import multiprocessing
 10 | 
 11 | 
 12 | if __name__ == '__main__':
 13 |     parser = OptionParser()
 14 |     parser.add_option("--outdir", type="string",
 15 |                       dest="output", default="model")
 16 | 
 17 |     parser.add_option("--train", dest="conll_train", help="Annotated CONLL train file", metavar="FILE",
 18 |                       default="corpus/train.conll")
 19 |     parser.add_option("--dev", dest="conll_dev", help="Annotated CONLL dev file", metavar="FILE",
 20 |                       default="corpus/dev.conll")
 21 |     parser.add_option("--test", dest="conll_test", help="Annotated CONLL test file", metavar="FILE",
 22 |                       default="corpus/test.conll")
 23 |     parser.add_option("--extrn", dest="external_embedding", help="External embeddings", metavar="FILE")
 24 |     parser.add_option("--params", dest="params", help="Parameters file",
 25 |                       metavar="FILE", default="params.pickle")
 26 |     parser.add_option("--model", dest="model", help="Load/Save model file", metavar="FILE",
 27 |                       default="model/neuralfirstorder.model")
 28 | 
 29 |     parser.add_option("--multi", dest="multi", help="Annotated CONLL multi-train file", metavar="FILE",
 30 |                       default=False)
 31 |                       # multi-task has been deleted for bloated code
 32 | 
 33 |     parser.add_option("--wembedding", type="int",
 34 |                       dest="wembedding_dims", default=100)
 35 |     parser.add_option("--pembedding", type="int",
 36 |                       dest="pembedding_dims", default=25)
 37 |     parser.add_option("--rembedding", type="int",
 38 |                       dest="rembedding_dims", default=25)
 39 | 
 40 |     parser.add_option("--oembedding", type="int", dest="oembedding_dims", default=0) #ontology
 41 |     parser.add_option("--cembedding", type="int", dest="cembedding_dims", default=0) #cpos
 42 | 
 43 |     parser.add_option("--epochs", type="int", dest="epochs", default=30)
 44 |     parser.add_option("--numthread", type="int", dest="numthread", default=8)
 45 |     parser.add_option("--hidden", type="int", dest="hidden_units", default=100)
 46 |     parser.add_option("--hidden2", type="int", dest="hidden2_units", default=0)
 47 |     parser.add_option("--optim", type="string", dest="optim", default='adam')
 48 |     parser.add_option("--lr", type="float", dest="lr", default=0.1)
 49 |     parser.add_option("--activation", type="string",
 50 |                       dest="activation", default="tanh")
 51 |     parser.add_option("--lstmlayers", type="int",
 52 |                       dest="lstm_layers", default=2)
 53 |     parser.add_option("--lstmdims", type="int", dest="lstm_dims", default=125)
 54 |     parser.add_option("--predict", action="store_true",
 55 |                       dest="predictFlag", default=False)
 56 | 
 57 |     (options, args) = parser.parse_args()
 58 |     max_thread = multiprocessing.cpu_count()
 59 |     active_thread = options.numthread if max_thread>options.numthread else max_thread
 60 |     torch.set_num_threads(active_thread)
 61 |     print(active_thread, "threads are in use")
 62 |     print('Using external embedding:', options.external_embedding)
 63 | 
 64 |     if options.predictFlag:
 65 |         with open(options.params, 'rb') as paramsfp:
 66 |             words, enum_word, pos, rels, onto, cpos, stored_opt = pickle.load(paramsfp)
 67 | 
 68 |         stored_opt.external_embedding = options.external_embedding
 69 | 
 70 |         print('Initializing lstm mstparser:')
 71 |         parser = mstlstm.MSTParserLSTM(words, pos, rels, enum_word, stored_opt, onto, cpos)
 72 |         parser.load(options.model)
 73 |         conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
 74 |         testpath = os.path.join(
 75 |             options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')
 76 | 
 77 |         ts = time.time()
 78 |         test_res = list(parser.predict(options.conll_test))
 79 |         te = time.time()
 80 |         print('Finished predicting test.', te - ts, 'seconds.')
 81 |         utils.write_conll(testpath, test_res)
 82 | 
 83 |         if not conllu:
 84 |             os.system('perl src/utils/eval.pl -g ' + options.conll_test +
 85 |                       ' -s ' + testpath + ' > ' + testpath + '.txt')
 86 |         else:
 87 |             os.system(
 88 |                 'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + testpath + ' > ' + testpath + '.txt')
 89 |             with open(testpath + '.txt', 'r') as f:
 90 |                 for l in f:
 91 |                     if l.startswith('UAS'):
 92 |                         print('UAS:%s' % l.strip().split()[-1])
 93 |                     elif l.startswith('LAS'):
 94 |                         print('LAS:%s' % l.strip().split()[-1])
 95 |     else:
 96 |         print('Preparing vocabulary table')
 97 |         words, enum_word, pos, rels, onto, cpos = list(utils.vocab(options.conll_train))
 98 |         with open(os.path.join(options.output, options.params), 'wb') as paramsfp:
 99 |             pickle.dump((words, enum_word, pos, rels, onto, cpos, options), paramsfp)
100 |         print('Finished collecting vocabulary')
101 | 
102 |         print('Initializing mst-parser:')
103 |         parser = mstlstm.MSTParserLSTM(words, pos, rels, enum_word, options, onto, cpos)
104 | 
105 |         for epoch in range(options.epochs):
106 |             print('Starting epoch', epoch)
107 |             parser.train(options.conll_train)
108 |             conllu = (os.path.splitext(
109 |                 options.conll_dev.lower())[1] == '.conllu')
110 |             devpath = os.path.join(options.output,
111 |                                    'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu'))
112 |             utils.write_conll(devpath, parser.predict(options.conll_dev))
113 |             parser.save(os.path.join(options.output, os.path.basename(
114 |                 options.model) + str(epoch + 1)))
115 | 
116 |             if not conllu:
117 |                 os.system(
118 |                     'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt')
119 |                 with open(devpath + '.txt', 'r') as f:
120 |                     for i in range(0, 3):
121 |                         print(f.readline())
122 |             else:
123 |                 os.system(
124 |                     'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt')
125 |                 with open(devpath + '.txt', 'r') as f:
126 |                     for l in f:
127 |                         if l.startswith('UAS'):
128 |                             print('UAS:%s' % l.strip().split()[-1])
129 |                         elif l.startswith('LAS'):
130 |                             print('LAS:%s' % l.strip().split()[-1])
131 | 


--------------------------------------------------------------------------------
/bmstparser/src/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import re
 3 | 
 4 | numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+")
 5 | 
 6 | 
 7 | def normalize(word):
 8 |     return 'NUM' if numberRegex.match(word) else word.lower()
 9 | 
10 | class ConllEntry:
11 |     def __init__(self, id, form, lemma, pos, cpos, feats=None, parent_id=None, relation=None, deps=None, misc=None):
12 |         self.id = id
13 |         self.form = form
14 |         self.norm = normalize(form)
15 |         self.pos = pos
16 |         self.cpos = cpos
17 |         self.parent_id = parent_id
18 |         self.relation = relation
19 | 
20 |         self.onto = lemma
21 |         self.feats = feats
22 |         self.deps = deps
23 |         self.misc = misc
24 | 
25 |         self.pred_parent_id = None
26 |         self.pred_relation = None
27 | 
28 |     def __str__(self):
29 |         values = [str(self.id), self.form, self.onto, self.pos, self.cpos, self.feats,
30 |                   str(self.pred_parent_id) if self.pred_parent_id is not None else None, self.pred_relation, self.deps, self.misc]
31 |         return '\t'.join(['_' if v is None else v for v in values])
32 | 
33 | 
34 | def read_conll(conllFP):
35 |     root = ConllEntry(0, '*root*', '*root*', 'ROOT-POS',
36 |                       'ROOT-CPOS', '_', -1, 'rroot', '_', '_')
37 |     tokens = [root]
38 |     for line in conllFP:
39 |         tok = line.strip().split('\t')
40 |         if not tok or line.strip() == '':
41 |             if len(tokens) > 1:
42 |                 yield tokens
43 |             tokens = [root]
44 |         else:
45 |             if line[0] == '#' or '-' in tok[0] or '.' in tok[0]:
46 |                 tokens.append(line.strip())
47 |             else:
48 |                 tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2], tok[3], tok[4], tok[5], int(
49 |                     tok[6]) if tok[6] != '_' else -1, tok[7], tok[8], tok[9]))
50 |     if len(tokens) > 1:
51 |         yield tokens
52 | 
53 | 
54 | def vocab(conll_path):
55 |     wordsCount = Counter()
56 |     posCount = Counter()
57 |     relCount = Counter()
58 |     ontoCount = Counter()
59 |     cposCount = Counter()
60 | 
61 |     with open(conll_path, 'r') as conllFP:
62 |         for sentence in read_conll(conllFP):
63 |             wordsCount.update(
64 |                 [node.norm for node in sentence if isinstance(node, ConllEntry)])
65 |             posCount.update(
66 |                 [node.pos for node in sentence if isinstance(node, ConllEntry)])
67 |             relCount.update(
68 |                 [node.relation for node in sentence if isinstance(node, ConllEntry)])
69 |             ontoCount.update(
70 |                 [node.onto for node in sentence if isinstance(node, ConllEntry)])
71 |             cposCount.update(
72 |                 [node.cpos for node in sentence if isinstance(node, ConllEntry)])
73 | 
74 |     print('the amount of kind of words, pos-tag, relations, ontology, cpos_tag:',
75 |           len(wordsCount), len(posCount), len(relCount), len(ontoCount), len(cposCount))
76 |     return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, list(posCount.keys()), list(relCount.keys()), list(ontoCount.keys()), list(cposCount.keys()))
77 | 
78 | 
79 | def write_conll(fn, conll_gen):
80 |     with open(fn, 'w') as fh:
81 |         for sentence in conll_gen:
82 |             for entry in sentence[1:]:
83 |                 fh.write(str(entry) + '\n')
84 |             fh.write('\n')
85 | 


--------------------------------------------------------------------------------
/bmstparser/src/utils/eval.pl:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env perl
   2 | 
   3 | # Author: Yuval Krymolowski
   4 | # Addition of precision and recall 
   5 | #   and of frame confusion list: Sabine Buchholz
   6 | # Addition of DEPREL + ATTACHMENT:
   7 | #   Prokopis Prokopidis (prokopis at ilsp dot gr)
   8 | # Acknowledgements: 
   9 | #   to Markus Kuhn for suggesting the use of 
  10 | #   the Unicode category property
  11 | 
  12 | if ($] < 5.008001)
  13 | {
  14 |   printf STDERR <<EOM
  15 | 
  16 |  This script requires PERL 5.8.1 for running.
  17 |  The new version is needed for proper handling
  18 |  of Unicode characters.
  19 | 
  20 |  Please obtain a new version or contact the shared task team
  21 |  if you are unable to upgrade PERL.
  22 | 
  23 | EOM
  24 | ;
  25 |   exit(1) ;
  26 | }
  27 | 
  28 | require Encode;
  29 | 
  30 | use strict ;
  31 | use warnings;
  32 | use Getopt::Std ;
  33 | 
  34 | my ($usage) = <<EOT
  35 | 
  36 |   CoNLL-X evaluation script:
  37 | 
  38 |    [perl] eval.pl [OPTIONS] -g <gold standard> -s <system output>
  39 | 
  40 |   This script evaluates a system output with respect to a gold standard.
  41 |   Both files should be in UTF-8 encoded CoNLL-X tabular format.
  42 | 
  43 |   Punctuation tokens (those where all characters have the Unicode
  44 |   category property "Punctuation") are ignored for scoring (unless the
  45 |   -p flag is used).
  46 | 
  47 |   The output breaks down the errors according to their type and context.
  48 | 
  49 |   Optional parameters:
  50 |      -o FILE : output: print output to FILE (default is standard output)
  51 |      -q : quiet:       only print overall performance, without the details
  52 |      -b : evalb:       produce output in a format similar to evalb 
  53 |                        (http://nlp.cs.nyu.edu/evalb/); use together with -q
  54 |      -p : punctuation: also score on punctuation (default is not to score on it)
  55 |      -v : version:     show the version number
  56 |      -h : help:        print this help text and exit
  57 | 
  58 | EOT
  59 | ;
  60 | 
  61 | my ($line_num) ;
  62 | my ($sep) = '0x01' ;
  63 | 
  64 | my ($START) = '.S' ;
  65 | my ($END) = '.E' ;
  66 | 
  67 | my ($con_err_num) = 3 ;
  68 | my ($freq_err_num) = 10 ;
  69 | my ($spec_err_loc_con) = 8 ;
  70 | 
  71 | ################################################################################
  72 | ###                              subfunctions                                ###
  73 | ################################################################################
  74 | 
  75 | # Whether a string consists entirely of characters with the Unicode
  76 | # category property "Punctuation" (see "man perlunicode")
  77 | sub is_uni_punct
  78 | {
  79 |   my ($word) = @_ ;
  80 | 
  81 |   return scalar(Encode::decode_utf8($word)=~ /^\p{Punctuation}+$/) ;
  82 | }
  83 | 
  84 | # The length of a unicode string, excluding non-spacing marks
  85 | # (for example vowel marks in Arabic)
  86 | 
  87 | sub uni_len
  88 | {
  89 |   my ($word) = @_ ;
  90 |   my ($ch, $l) ;
  91 | 
  92 |   $l = 0 ;
  93 |   foreach $ch (split(//,  Encode::decode_utf8($word)))
  94 |   {
  95 |     if ($ch !~ /^\p{NonspacingMark}/)
  96 |     {
  97 |       $l++ ;
  98 |     }
  99 |   }
 100 | 
 101 |   return $l ;
 102 | }
 103 | 
 104 | sub filter_context_counts
 105 | { # filter_context_counts
 106 | 
 107 |   my ($vec, $num, $max_len) = @_ ;
 108 |   my ($con, $l, $thresh) ;
 109 | 
 110 |   $thresh = (sort {$b <=> $a} values %{$vec})[$num-1] ;
 111 | 
 112 |   foreach $con (keys %{$vec})
 113 |   {
 114 |     if (${$vec}{$con} < $thresh)
 115 |     {
 116 |       delete ${$vec}{$con} ;
 117 |       next ;
 118 |     }
 119 | 
 120 |     $l = uni_len($con) ;
 121 | 
 122 |     if ($l > ${$max_len})
 123 |     {
 124 |       ${$max_len} = $l ;
 125 |     }
 126 |   }
 127 | 
 128 | } # filter_context_counts
 129 | 
 130 | sub print_context
 131 | { # print_context
 132 | 
 133 |   my ($counts, $counts_pos, $max_con_len, $max_con_pos_len) = @_ ;
 134 |   my (@v_con, @v_con_pos, $con, $con_pos, $i, $n) ;
 135 | 
 136 |   printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_pos_len, 'CPOS', 'any', 'head', 'dep', 'both' ;
 137 |   printf OUT "  ||" ;
 138 |   printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_len, 'word', 'any', 'head', 'dep', 'both' ;
 139 |   printf OUT "\n" ;
 140 |   printf OUT "  %s-+------+------+------+-----", '-' x $max_con_pos_len;
 141 |   printf OUT "--++" ;
 142 |   printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len;
 143 |   printf OUT "\n" ;
 144 | 
 145 |   @v_con = sort {${$counts}{tot}{$b} <=> ${$counts}{tot}{$a}} keys %{${$counts}{tot}} ;
 146 |   @v_con_pos = sort {${$counts_pos}{tot}{$b} <=> ${$counts_pos}{tot}{$a}} keys %{${$counts_pos}{tot}} ;
 147 | 
 148 |   $n = scalar @v_con ;
 149 |   if (scalar @v_con_pos > $n)
 150 |   {
 151 |     $n = scalar @v_con_pos ;
 152 |   }
 153 | 
 154 |   foreach $i (0 .. $n-1)
 155 |   {
 156 |     if (defined $v_con_pos[$i])
 157 |     {
 158 |       $con_pos = $v_con_pos[$i] ;
 159 |       printf OUT "  %-*s | %4d | %4d | %4d | %4d",
 160 | 	$max_con_pos_len, $con_pos, ${$counts_pos}{tot}{$con_pos},
 161 | 	  ${$counts_pos}{err_head}{$con_pos}, ${$counts_pos}{err_dep}{$con_pos},
 162 | 	    ${$counts_pos}{err_dep}{$con_pos}+${$counts_pos}{err_head}{$con_pos}-${$counts_pos}{tot}{$con_pos} ;
 163 |     }
 164 |     else
 165 |     {
 166 |       printf OUT "  %-*s | %4s | %4s | %4s | %4s",
 167 | 	$max_con_pos_len, ' ', ' ', ' ', ' ', ' ' ;
 168 |     }
 169 | 
 170 |     printf OUT "  ||" ;
 171 | 
 172 |     if (defined $v_con[$i])
 173 |     {
 174 |       $con = $v_con[$i] ;
 175 |       printf OUT "  %-*s | %4d | %4d | %4d | %4d",
 176 | 	$max_con_len+length($con)-uni_len($con), $con, ${$counts}{tot}{$con},
 177 | 	  ${$counts}{err_head}{$con}, ${$counts}{err_dep}{$con},
 178 | 	    ${$counts}{err_dep}{$con}+${$counts}{err_head}{$con}-${$counts}{tot}{$con} ;
 179 |     }
 180 |     else
 181 |     {
 182 |       printf OUT "  %-*s | %4s | %4s | %4s | %4s",
 183 | 	$max_con_len, ' ', ' ', ' ', ' ', ' ' ;
 184 |     }
 185 | 
 186 |     printf OUT "\n" ;
 187 |   }
 188 | 
 189 |   printf OUT "  %s-+------+------+------+-----", '-' x $max_con_pos_len;
 190 |   printf OUT "--++" ;
 191 |   printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len;
 192 |   printf OUT "\n" ;
 193 | 
 194 |   printf OUT "\n\n" ;
 195 | 
 196 | } # print_context
 197 | 
 198 | sub num_as_word
 199 | {
 200 |   my ($num) = @_ ;
 201 | 
 202 |   $num = abs($num) ;
 203 | 
 204 |   if ($num == 1)
 205 |   {
 206 |     return ('one word') ;
 207 |   }
 208 |   elsif ($num == 2)
 209 |   {
 210 |     return ('two words') ;
 211 |   }
 212 |   elsif ($num == 3)
 213 |   {
 214 |     return ('three words') ;
 215 |   }
 216 |   elsif ($num == 4)
 217 |   {
 218 |     return ('four words') ;
 219 |   }
 220 |   else
 221 |   {
 222 |     return ($num.' words') ;
 223 |   }
 224 | }
 225 | 
 226 | sub describe_err
 227 | { # describe_err
 228 | 
 229 |   my ($head_err, $head_aft_bef, $dep_err) = @_ ;
 230 |   my ($dep_g, $dep_s, $desc) ;
 231 |   my ($head_aft_bef_g, $head_aft_bef_s) = split(//, $head_aft_bef) ;
 232 | 
 233 |   if ($head_err eq '-')
 234 |   {
 235 |     $desc = 'correct head' ;
 236 | 
 237 |     if ($head_aft_bef_s eq '0')
 238 |     {
 239 |       $desc .= ' (0)' ;
 240 |     }
 241 |     elsif ($head_aft_bef_s eq 'e')
 242 |     {
 243 |       $desc .= ' (the focus word)' ;
 244 |     }
 245 |     elsif ($head_aft_bef_s eq 'a')
 246 |     {
 247 |       $desc .= ' (after the focus word)' ;
 248 |     }
 249 |     elsif ($head_aft_bef_s eq 'b')
 250 |     {
 251 |       $desc .= ' (before the focus word)' ;
 252 |     }
 253 |   }
 254 |   elsif ($head_aft_bef_s eq '0')
 255 |   {
 256 |     $desc = 'head = 0 instead of ' ;
 257 |     if ($head_aft_bef_g eq 'a')
 258 |     {
 259 |       $desc.= 'after ' ;
 260 |     }
 261 |     if ($head_aft_bef_g eq 'b')
 262 |     {
 263 |       $desc.= 'before ' ;
 264 |     }
 265 |     $desc .= 'the focus word' ;
 266 |   }
 267 |   elsif ($head_aft_bef_g eq '0')
 268 |   {
 269 |     $desc = 'head is ' ;
 270 |     if ($head_aft_bef_g eq 'a')
 271 |     {
 272 |       $desc.= 'after ' ;
 273 |     }
 274 |     if ($head_aft_bef_g eq 'b')
 275 |     {
 276 |       $desc.= 'before ' ;
 277 |     }
 278 |     $desc .= 'the focus word instead of 0' ;
 279 |   }
 280 |   else
 281 |   {
 282 |     $desc = num_as_word($head_err) ;
 283 |     if ($head_err < 0)
 284 |     {
 285 |       $desc .= ' before' ;
 286 |     }
 287 |     else
 288 |     {
 289 |       $desc .= ' after' ;
 290 |     }
 291 | 
 292 |     $desc = 'head '.$desc.' the correct head ' ;
 293 | 
 294 |     if ($head_aft_bef_s eq '0')
 295 |     {
 296 |       $desc .= '(0' ;
 297 |     }
 298 |     elsif ($head_aft_bef_s eq 'e')
 299 |     {
 300 |       $desc .= '(the focus word' ;
 301 |     }
 302 |     elsif ($head_aft_bef_s eq 'a')
 303 |     {
 304 |       $desc .= '(after the focus word' ;
 305 |     }
 306 |     elsif ($head_aft_bef_s eq 'b')
 307 |     {
 308 |       $desc .= '(before the focus word' ;
 309 |     }
 310 | 
 311 |     if ($head_aft_bef_g ne $head_aft_bef_s)
 312 |     {
 313 |       $desc .= ' instead of' ;
 314 |       if ($head_aft_bef_s eq '0')
 315 |       {
 316 | 	$desc .= '0' ;
 317 |       }
 318 |       elsif ($head_aft_bef_s eq 'e')
 319 |       {
 320 | 	$desc .= 'the focus word' ;
 321 |       }
 322 |       elsif ($head_aft_bef_s eq 'a')
 323 |       {
 324 | 	$desc .= 'after the focus word' ;
 325 |       }
 326 |       elsif ($head_aft_bef_s eq 'b')
 327 |       {
 328 | 	$desc .= 'before the focus word' ;
 329 |       }
 330 |     }
 331 | 
 332 |     $desc .= ')' ;
 333 |   }
 334 | 
 335 |   $desc .= ', ' ;
 336 | 
 337 |   if ($dep_err eq '-')
 338 |   {
 339 |     $desc .= 'correct dependency' ;
 340 |   }
 341 |   else
 342 |   {
 343 |     ($dep_g, $dep_s) = ($dep_err =~ /^(.*)->(.*)$/) ;
 344 |     $desc .= sprintf('dependency "%s" instead of "%s"', $dep_s, $dep_g) ;
 345 |   }
 346 | 
 347 |   return($desc) ;
 348 | 
 349 | } # describe_err
 350 | 
 351 | sub get_context
 352 | { # get_context
 353 | 
 354 |   my ($sent, $i_w) = @_ ;
 355 |   my ($w_2, $w_1, $w1, $w2) ;
 356 |   my ($p_2, $p_1, $p1, $p2) ;
 357 | 
 358 |   if ($i_w >= 2)
 359 |   {
 360 |     $w_2 = ${${$sent}[$i_w-2]}{word} ;
 361 |     $p_2 = ${${$sent}[$i_w-2]}{pos} ;
 362 |   }
 363 |   else
 364 |   {
 365 |     $w_2 = $START ;
 366 |     $p_2 = $START ;
 367 |   }
 368 | 
 369 |   if ($i_w >= 1)
 370 |   {
 371 |     $w_1 = ${${$sent}[$i_w-1]}{word} ;
 372 |     $p_1 = ${${$sent}[$i_w-1]}{pos} ;
 373 |   }
 374 |   else
 375 |   {
 376 |     $w_1 = $START ;
 377 |     $p_1 = $START ;
 378 |   }
 379 | 
 380 |   if ($i_w <= scalar @{$sent}-2)
 381 |   {
 382 |     $w1 = ${${$sent}[$i_w+1]}{word} ;
 383 |     $p1 = ${${$sent}[$i_w+1]}{pos} ;
 384 |   }
 385 |   else
 386 |   {
 387 |     $w1 = $END ;
 388 |     $p1 = $END ;
 389 |   }
 390 | 
 391 |   if ($i_w <= scalar @{$sent}-3)
 392 |   {
 393 |     $w2 = ${${$sent}[$i_w+2]}{word} ;
 394 |     $p2 = ${${$sent}[$i_w+2]}{pos} ;
 395 |   }
 396 |   else
 397 |   {
 398 |     $w2 = $END ;
 399 |     $p2 = $END ;
 400 |   }
 401 | 
 402 |   return ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) ;
 403 | 
 404 | } # get_context
 405 | 
 406 | sub read_sent
 407 | { # read_sent
 408 | 
 409 |   my ($sent_gold, $sent_sys) = @_ ;
 410 |   my ($line_g, $line_s, $new_sent) ;
 411 |   my (%fields_g, %fields_s) ;
 412 | 
 413 |   $new_sent = 1 ;
 414 | 
 415 |   @{$sent_gold} = () ;
 416 |   @{$sent_sys} = () ;
 417 | 
 418 |   while (1)
 419 |   { # main reading loop
 420 | 
 421 |     $line_g = <GOLD> ;
 422 |     $line_s = <SYS> ;
 423 | 
 424 |     $line_num++ ;
 425 | 
 426 |     # system output has fewer lines than gold standard
 427 |     if ((defined $line_g) && (! defined $line_s))
 428 |     {
 429 | 	printf STDERR "line mismatch, line %d:\n", $line_num ;
 430 | 	printf STDERR " gold: %s", $line_g ;
 431 | 	printf STDERR " sys : past end of file\n" ;
 432 | 	exit(1) ;
 433 |     }
 434 | 
 435 |     # system output has more lines than gold standard
 436 |     if ((! defined $line_g) && (defined $line_s))
 437 |     {
 438 | 	printf STDERR "line mismatch, line %d:\n", $line_num ;
 439 | 	printf STDERR " gold: past end of file\n" ;
 440 | 	printf STDERR " sys : %s", $line_s ;
 441 | 	exit(1) ;
 442 |     }
 443 |     
 444 |     # end of file reached for both
 445 |     if ((! defined $line_g) && (! defined $line_s))
 446 |     {
 447 | 	return (1) ;
 448 |     }
 449 | 
 450 |     # one contains end of sentence but other one does not
 451 |     if (($line_g =~ /^\s+$/) != ($line_s =~ /^\s+$/))
 452 |     {
 453 |       printf STDERR "line mismatch, line %d:\n", $line_num ;
 454 |       printf STDERR " gold: %s", $line_g ;
 455 |       printf STDERR " sys : %s", $line_s ;
 456 |       exit(1) ;
 457 |     }
 458 | 
 459 |     # end of sentence reached
 460 |     if ($line_g =~ /^\s+$/)
 461 |     {
 462 | 	return(0) ;
 463 |     }
 464 | 
 465 |     # now both lines contain information
 466 | 
 467 |     if ($new_sent)
 468 |     {
 469 |       $new_sent = 0 ;
 470 |     }
 471 | 
 472 |     # 'official' column names
 473 |     # options.output = ['id','form','lemma','cpostag','postag',
 474 |     #                   'feats','head','deprel','phead','pdeprel']
 475 | 
 476 |     @fields_g{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_g))[1, 3, 6, 7] ;
 477 | 
 478 |     push @{$sent_gold}, { %fields_g } ;
 479 | 
 480 |     @fields_s{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_s))[1, 3, 6, 7] ;
 481 | 
 482 |     if (($fields_g{word} ne $fields_s{word})
 483 | 	||
 484 | 	($fields_g{pos} ne $fields_s{pos}))
 485 |     {
 486 |       printf STDERR "Word/pos mismatch, line %d:\n", $line_num ;
 487 |       printf STDERR " gold: %s", $line_g ;
 488 |       printf STDERR " sys : %s", $line_s ;
 489 |       #exit(1) ;
 490 |     }
 491 | 
 492 |     push @{$sent_sys}, { %fields_s } ;
 493 | 
 494 |   } # main reading loop
 495 |   
 496 | } # read_sent
 497 | 
 498 | ################################################################################
 499 | ###                                  main                                    ###
 500 | ################################################################################
 501 | 
 502 | our ($opt_g, $opt_s, $opt_o, $opt_h, $opt_v, $opt_q, $opt_p, $opt_b) ;
 503 | 
 504 | my ($sent_num, $eof, $word_num, @err_sent) ;
 505 | my (@sent_gold, @sent_sys, @starts) ;
 506 | my ($word, $pos, $wp, $head_g, $dep_g, $head_s, $dep_s) ;
 507 | my (%counts, $err_head, $err_dep, $con, $con1, $con_pos, $con_pos1, $thresh) ;
 508 | my ($head_err, $dep_err, @cur_err, %err_counts, $err_counter, $err_desc) ;
 509 | my ($loc_con, %loc_con_err_counts, %err_desc) ;
 510 | my ($head_aft_bef_g, $head_aft_bef_s, $head_aft_bef) ;
 511 | my ($con_bef, $con_aft, $con_bef_2, $con_aft_2, @bits, @e_bits, @v_con, @v_con_pos) ;
 512 | my ($con_pos_bef, $con_pos_aft, $con_pos_bef_2, $con_pos_aft_2) ;
 513 | my ($max_word_len, $max_pos_len, $max_con_len, $max_con_pos_len) ;
 514 | my ($max_word_spec_len, $max_con_bef_len, $max_con_aft_len) ;
 515 | my (%freq_err, $err) ;
 516 | 
 517 | my ($i, $j, $i_w, $l, $n_args) ;
 518 | my ($w_2, $w_1, $w1, $w2) ;
 519 | my ($wp_2, $wp_1, $wp1, $wp2) ;
 520 | my ($p_2, $p_1, $p1, $p2) ;
 521 | 
 522 | my ($short_output) ;
 523 | my ($score_on_punct) ;
 524 | $counts{punct} = 0; # initialize
 525 | 
 526 | getopts("g:o:s:qvhpb") ;
 527 | 
 528 | if (defined $opt_v)
 529 | {
 530 |     my $id = '$Id: eval.pl,v 1.9 2006/05/09 20:30:01 yuval Exp $';
 531 |     my @parts = split ' ',$id;
 532 |     print "Version $parts[2]\n";
 533 |     exit(0);
 534 | }
 535 | 
 536 | if ((defined $opt_h) || ((! defined $opt_g) && (! defined $opt_s)))
 537 | {
 538 |   die $usage ;
 539 | }
 540 | 
 541 | if (! defined $opt_g)
 542 | {
 543 |   die "Gold standard file (-g) missing\n" ;
 544 | }
 545 | 
 546 | if (! defined $opt_s)
 547 | {
 548 |   die "System output file (-s) missing\n" ;
 549 | }
 550 | 
 551 | if (! defined $opt_o)
 552 | {
 553 |   $opt_o = '-' ;
 554 | }
 555 | 
 556 | if (defined $opt_q)
 557 | {
 558 |     $short_output = 1 ;
 559 | } else {
 560 |     $short_output = 0 ;
 561 | }
 562 | 
 563 | if (defined $opt_p)
 564 | {
 565 |     $score_on_punct = 1 ;
 566 | } else {
 567 |     $score_on_punct = 0 ;
 568 | }
 569 | 
 570 | $line_num = 0 ;
 571 | $sent_num = 0 ;
 572 | $eof = 0 ;
 573 | 
 574 | @err_sent = () ;
 575 | @starts = () ;
 576 | 
 577 | %{$err_sent[0]} = () ;
 578 | 
 579 | $max_pos_len = length('CPOS') ;
 580 | 
 581 | ################################################################################
 582 | ###                              reading input                               ###
 583 | ################################################################################
 584 | 
 585 | open (GOLD, "<$opt_g") || die "Could not open gold standard file $opt_g\n" ;
 586 | open (SYS,  "<$opt_s") || die "Could not open system output file $opt_s\n" ;
 587 | open (OUT,  ">$opt_o") || die "Could not open output file $opt_o\n" ;
 588 | 
 589 | 
 590 | if (defined $opt_b) {  # produce output similar to evalb
 591 |     print OUT "     Sent.          Attachment      Correct        Scoring          \n";
 592 |     print OUT "    ID Tokens  -   Unlab. Lab.   HEAD HEAD+DEPREL   tokens   - - - -\n";
 593 |     print OUT "  ============================================================================\n";
 594 | }
 595 | 
 596 | 
 597 | while (! $eof)
 598 | { # main reading loop
 599 | 
 600 |   $starts[$sent_num] = $line_num+1 ;
 601 |   $eof = read_sent(\@sent_gold, \@sent_sys) ;
 602 | 
 603 |   $sent_num++ ;
 604 | 
 605 |   %{$err_sent[$sent_num]} = () ;
 606 |   $word_num = scalar @sent_gold ;
 607 | 
 608 |   # for accuracy per sentence
 609 |   my %sent_counts = ( tot      => 0,
 610 | 		      err_any  => 0,
 611 | 		      err_head => 0
 612 | 		      ); 
 613 | 
 614 |   # printf "$sent_num $word_num\n" ;
 615 | 
 616 |   my @frames_g = ('** '); # the initial frame for the virtual root
 617 |   my @frames_s = ('** '); # the initial frame for the virtual root
 618 |   foreach $i_w (0 .. $word_num-1)
 619 |   { # loop on words
 620 |       push @frames_g, ''; # initialize
 621 |       push @frames_s, ''; # initialize
 622 |   }
 623 | 
 624 |   foreach $i_w (0 .. $word_num-1)
 625 |   { # loop on words
 626 | 
 627 |     ($word, $pos, $head_g, $dep_g)
 628 |       = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ;
 629 |     $wp = $word.' / '.$pos ;
 630 | 
 631 |     # printf "%d: %s %s %s %s\n", $i_w,  $word, $pos, $head_g, $dep_g ;
 632 | 
 633 |     if ((! $score_on_punct) && is_uni_punct($word))
 634 |     {
 635 |       $counts{punct}++ ;
 636 |       # ignore punctuations
 637 |       next ;
 638 |     }
 639 | 
 640 |     if (length($pos) > $max_pos_len)
 641 |     {
 642 |       $max_pos_len = length($pos) ;
 643 |     }
 644 | 
 645 |     ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ;
 646 | 
 647 |     $counts{tot}++ ;
 648 |     $counts{word}{$wp}{tot}++ ;
 649 |     $counts{pos}{$pos}{tot}++ ;
 650 |     $counts{head}{$head_g-$i_w-1}{tot}++ ;
 651 | 
 652 |     # for frame confusions
 653 |     # add child to frame of parent
 654 |     $frames_g[$head_g] .= "$dep_g ";
 655 |     $frames_s[$head_s] .= "$dep_s ";
 656 |     # add to frame of token itself
 657 |     $frames_g[$i_w+1] .= "*$dep_g* "; # $i_w+1 because $i_w starts counting at zero
 658 |     $frames_s[$i_w+1] .= "*$dep_g* ";
 659 | 
 660 |     # for precision and recall of DEPREL
 661 |     $counts{dep}{$dep_g}{tot}++ ;     # counts for gold standard deprels
 662 |     $counts{dep2}{$dep_g}{$dep_s}++ ; # counts for confusions
 663 |     $counts{dep_s}{$dep_s}{tot}++ ;   # counts for system deprels
 664 |     $counts{all_dep}{$dep_g} = 1 ;    # list of all deprels that occur ...
 665 |     $counts{all_dep}{$dep_s} = 1 ;    # ... in either gold or system output
 666 | 
 667 |     # for precision and recall of HEAD direction
 668 |     my $dir_g;
 669 |     if ($head_g == 0) {
 670 | 	$dir_g = 'to_root';
 671 |     } elsif ($head_g < $i_w+1) { # $i_w+1 because $i_w starts counting at zero
 672 |                                  # also below
 673 | 	$dir_g = 'left';
 674 |     } elsif ($head_g > $i_w+1) {
 675 | 	$dir_g = 'right';
 676 |     } else {
 677 |         # token links to itself; should never happen in correct gold standard
 678 | 	$dir_g = 'self'; 
 679 |     }
 680 |     my $dir_s;
 681 |     if ($head_s == 0) {
 682 | 	$dir_s = 'to_root';
 683 |     } elsif ($head_s < $i_w+1) {
 684 | 	$dir_s = 'left';
 685 |     } elsif ($head_s > $i_w+1) {
 686 | 	$dir_s = 'right';
 687 |     } else {
 688 |         # token links to itself; should not happen in good system 
 689 |         # (but not forbidden in shared task)
 690 | 	$dir_s = 'self'; 
 691 |     }
 692 |     $counts{dir_g}{$dir_g}{tot}++ ;   # counts for gold standard head direction
 693 |     $counts{dir2}{$dir_g}{$dir_s}++ ; # counts for confusions
 694 |     $counts{dir_s}{$dir_s}{tot}++ ;   # counts for system head direction
 695 | 
 696 |     # for precision and recall of HEAD distance
 697 |     my $dist_g;
 698 |     if ($head_g == 0) {
 699 | 	$dist_g = 'to_root';
 700 |     } elsif ( abs($head_g - ($i_w+1)) <= 1 ) {
 701 | 	$dist_g = '1'; # includes the 'self' cases
 702 |     } elsif ( abs($head_g - ($i_w+1)) <= 2 ) {
 703 | 	$dist_g = '2';
 704 |     } elsif ( abs($head_g - ($i_w+1)) <= 6 ) {
 705 | 	$dist_g = '3-6';
 706 |     } else {
 707 | 	$dist_g = '7-...';
 708 |     }
 709 |     my $dist_s;
 710 |     if ($head_s == 0) {
 711 | 	$dist_s = 'to_root';
 712 |     } elsif ( abs($head_s - ($i_w+1)) <= 1 ) {
 713 | 	$dist_s = '1'; # includes the 'self' cases
 714 |     } elsif ( abs($head_s - ($i_w+1)) <= 2 ) {
 715 | 	$dist_s = '2';
 716 |     } elsif ( abs($head_s - ($i_w+1)) <= 6 ) {
 717 | 	$dist_s = '3-6';
 718 |     } else {
 719 | 	$dist_s = '7-...';
 720 |     }
 721 |     $counts{dist_g}{$dist_g}{tot}++ ;    # counts for gold standard head distance
 722 |     $counts{dist2}{$dist_g}{$dist_s}++ ; # counts for confusions
 723 |     $counts{dist_s}{$dist_s}{tot}++ ;    # counts for system head distance
 724 | 
 725 | 
 726 |     $err_head = ($head_g ne $head_s) ; # error in head
 727 |     $err_dep = ($dep_g ne $dep_s) ;    # error in deprel
 728 | 
 729 |     $head_err = '-' ;
 730 |     $dep_err = '-' ;
 731 | 
 732 |     # for accuracy per sentence
 733 |     $sent_counts{tot}++ ;
 734 |     if ($err_dep || $err_head) {
 735 | 	$sent_counts{err_any}++ ;
 736 |     }
 737 |     if ($err_head) {
 738 | 	$sent_counts{err_head}++ ;
 739 |     }
 740 | 
 741 |     # total counts and counts for CPOS involved in errors
 742 | 
 743 |     if ($head_g eq '0')
 744 |     {
 745 |       $head_aft_bef_g = '0' ;
 746 |     }
 747 |     elsif ($head_g eq $i_w+1)
 748 |     {
 749 |       $head_aft_bef_g = 'e' ;
 750 |     }
 751 |     else
 752 |     {
 753 |       $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ;
 754 |     }
 755 | 
 756 |     if ($head_s eq '0')
 757 |     {
 758 |       $head_aft_bef_s = '0' ;
 759 |     }
 760 |     elsif ($head_s eq $i_w+1)
 761 |     {
 762 |       $head_aft_bef_s = 'e' ;
 763 |     }
 764 |     else
 765 |     {
 766 |       $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ;
 767 |     }
 768 | 
 769 |     $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ;
 770 | 
 771 |     if ($err_head)
 772 |     {
 773 |       if ($head_aft_bef_s eq '0')
 774 |       {
 775 | 	$head_err = 0 ;
 776 |       }
 777 |       else
 778 |       {
 779 | 	$head_err = $head_s-$head_g ;
 780 |       }
 781 | 
 782 |       $err_sent[$sent_num]{head}++ ;
 783 |       $counts{err_head}{tot}++ ;
 784 |       $counts{err_head}{$head_err}++ ;
 785 | 
 786 |       $counts{word}{err_head}{$wp}++ ;
 787 |       $counts{pos}{$pos}{err_head}{tot}++ ;
 788 |       $counts{pos}{$pos}{err_head}{$head_err}++ ;
 789 |     }
 790 | 
 791 |     if ($err_dep)
 792 |     {
 793 |       $dep_err = $dep_g.'->'.$dep_s ;
 794 |       $err_sent[$sent_num]{dep}++ ;
 795 |       $counts{err_dep}{tot}++ ;
 796 |       $counts{err_dep}{$dep_err}++ ;
 797 | 
 798 |       $counts{word}{err_dep}{$wp}++ ;
 799 |       $counts{pos}{$pos}{err_dep}{tot}++ ;
 800 |       $counts{pos}{$pos}{err_dep}{$dep_err}++ ;
 801 | 
 802 |       if ($err_head)
 803 |       {
 804 | 	$counts{err_both}++ ;
 805 | 	$counts{pos}{$pos}{err_both}++ ;
 806 |       }
 807 |     }
 808 | 
 809 |     ### DEPREL + ATTACHMENT
 810 |     if ((!$err_dep) && ($err_head)) {
 811 | 	$counts{err_head_corr_dep}{tot}++ ;
 812 | 	$counts{err_head_corr_dep}{$dep_s}++ ;
 813 |     }
 814 |     ### DEPREL + ATTACHMENT
 815 | 
 816 |     # counts for words involved in errors
 817 | 
 818 |     if (! ($err_head || $err_dep))
 819 |     {
 820 |       next ;
 821 |     }
 822 | 
 823 |     $err_sent[$sent_num]{word}++ ;
 824 |     $counts{err_any}++ ;
 825 |     $counts{word}{err_any}{$wp}++ ;
 826 |     $counts{pos}{$pos}{err_any}++ ;
 827 | 
 828 |     ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ;
 829 | 
 830 |     if ($w_2 ne $START)
 831 |     {
 832 |       $wp_2 = $w_2.' / '.$p_2 ;
 833 |     }
 834 |     else
 835 |     {
 836 |       $wp_2 = $w_2 ;
 837 |     }
 838 | 
 839 |     if ($w_1 ne $START)
 840 |     {
 841 |       $wp_1 = $w_1.' / '.$p_1 ;
 842 |     }
 843 |     else
 844 |     {
 845 |       $wp_1 = $w_1 ;
 846 |     }
 847 | 
 848 |     if ($w1 ne $END)
 849 |     {
 850 |       $wp1 = $w1.' / '.$p1 ;
 851 |     }
 852 |     else
 853 |     {
 854 |       $wp1 = $w1 ;
 855 |     }
 856 | 
 857 |     if ($w2 ne $END)
 858 |     {
 859 |       $wp2 = $w2.' / '.$p2 ;
 860 |     }
 861 |     else
 862 |     {
 863 |       $wp2 = $w2 ;
 864 |     }
 865 | 
 866 |     $con_bef = $wp_1 ;
 867 |     $con_bef_2 = $wp_2.' + '.$wp_1 ;
 868 |     $con_aft = $wp1 ;
 869 |     $con_aft_2 = $wp1.' + '.$wp2 ;
 870 | 
 871 |     $con_pos_bef = $p_1 ;
 872 |     $con_pos_bef_2 = $p_2.'+'.$p_1 ;
 873 |     $con_pos_aft = $p1 ;
 874 |     $con_pos_aft_2 = $p1.'+'.$p2 ;
 875 | 
 876 |     if ($w_1 ne $START)
 877 |     {
 878 |       # do not count '.S' as a word context
 879 |       $counts{con_bef_2}{tot}{$con_bef_2}++ ;
 880 |       $counts{con_bef_2}{err_head}{$con_bef_2} += $err_head ;
 881 |       $counts{con_bef_2}{err_dep}{$con_bef_2} += $err_dep ;
 882 |       $counts{con_bef}{tot}{$con_bef}++ ;
 883 |       $counts{con_bef}{err_head}{$con_bef} += $err_head ;
 884 |       $counts{con_bef}{err_dep}{$con_bef} += $err_dep ;
 885 |     }
 886 | 
 887 |     if ($w1 ne $END)
 888 |     {
 889 |       # do not count '.E' as a word context
 890 |       $counts{con_aft_2}{tot}{$con_aft_2}++ ;
 891 |       $counts{con_aft_2}{err_head}{$con_aft_2} += $err_head ;
 892 |       $counts{con_aft_2}{err_dep}{$con_aft_2} += $err_dep ;
 893 |       $counts{con_aft}{tot}{$con_aft}++ ;
 894 |       $counts{con_aft}{err_head}{$con_aft} += $err_head ;
 895 |       $counts{con_aft}{err_dep}{$con_aft} += $err_dep ;
 896 |     }
 897 | 
 898 |     $counts{con_pos_bef_2}{tot}{$con_pos_bef_2}++ ;
 899 |     $counts{con_pos_bef_2}{err_head}{$con_pos_bef_2} += $err_head ;
 900 |     $counts{con_pos_bef_2}{err_dep}{$con_pos_bef_2} += $err_dep ;
 901 |     $counts{con_pos_bef}{tot}{$con_pos_bef}++ ;
 902 |     $counts{con_pos_bef}{err_head}{$con_pos_bef} += $err_head ;
 903 |     $counts{con_pos_bef}{err_dep}{$con_pos_bef} += $err_dep ;
 904 | 
 905 |     $counts{con_pos_aft_2}{tot}{$con_pos_aft_2}++ ;
 906 |     $counts{con_pos_aft_2}{err_head}{$con_pos_aft_2} += $err_head ;
 907 |     $counts{con_pos_aft_2}{err_dep}{$con_pos_aft_2} += $err_dep ;
 908 |     $counts{con_pos_aft}{tot}{$con_pos_aft}++ ;
 909 |     $counts{con_pos_aft}{err_head}{$con_pos_aft} += $err_head ;
 910 |     $counts{con_pos_aft}{err_dep}{$con_pos_aft} += $err_dep ;
 911 | 
 912 |     $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ;
 913 |     $freq_err{$err}++ ;
 914 | 
 915 |   } # loop on words
 916 | 
 917 |   foreach $i_w (0 .. $word_num) # including one for the virtual root
 918 |   { # loop on words
 919 |       if ($frames_g[$i_w] ne $frames_s[$i_w]) {
 920 | 	  $counts{frame2}{"$frames_g[$i_w]/ $frames_s[$i_w]"}++ ;
 921 |       }
 922 |   }
 923 | 
 924 |   if (defined $opt_b) { # produce output similar to evalb
 925 |       if ($word_num > 0) {
 926 | 	  my ($unlabeled,$labeled) = ('NaN', 'NaN');
 927 | 	  if ($sent_counts{tot} > 0) { # there are scoring tokens
 928 | 	      $unlabeled = 100-$sent_counts{err_head}*100.0/$sent_counts{tot};
 929 | 	      $labeled   = 100-$sent_counts{err_any} *100.0/$sent_counts{tot};
 930 | 	  }
 931 | 	  printf OUT "  %4d %4d    0  %6.2f %6.2f  %4d    %4d        %4d    0 0 0 0\n", 
 932 | 	  $sent_num, $word_num, 
 933 | 	  $unlabeled, $labeled, 
 934 | 	  $sent_counts{tot}-$sent_counts{err_head}, 
 935 | 	  $sent_counts{tot}-$sent_counts{err_any}, 
 936 | 	  $sent_counts{tot},;
 937 |       }
 938 |   }
 939 | 
 940 | } # main reading loop
 941 | 
 942 | ################################################################################
 943 | ###                             printing output                              ###
 944 | ################################################################################
 945 | 
 946 | if (defined $opt_b) {  # produce output similar to evalb
 947 |     print OUT "\n\n";
 948 | }
 949 | printf OUT "  Labeled   attachment score: %d / %d * 100 = %.2f %%\n", 
 950 |     $counts{tot}-$counts{err_any},      $counts{tot}, 100-$counts{err_any}*100.0/$counts{tot} ;
 951 | printf OUT "  Unlabeled attachment score: %d / %d * 100 = %.2f %%\n", 
 952 |     $counts{tot}-$counts{err_head}{tot}, $counts{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot} ;
 953 | printf OUT "  Label accuracy score:       %d / %d * 100 = %.2f %%\n", 
 954 |     $counts{tot}-$counts{err_dep}{tot}, $counts{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot} ;
 955 | 
 956 | if ($short_output)
 957 | {
 958 |     exit(0) ;
 959 | }
 960 | printf OUT "\n  %s\n\n", '=' x 80 ;
 961 | printf OUT "  Evaluation of the results in %s\n  vs. gold standard %s:\n\n", $opt_s, $opt_g ;
 962 | 
 963 | printf OUT "  Legend: '%s' - the beginning of a sentence, '%s' - the end of a sentence\n\n", $START, $END ;
 964 | 
 965 | printf OUT "  Number of non-scoring tokens: $counts{punct}\n\n";
 966 | 
 967 | printf OUT "  The overall accuracy and its distribution over CPOSTAGs\n\n" ;
 968 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
 969 | 
 970 | printf OUT "  %-10s | %-5s | %-5s |   %%  | %-5s |   %%  | %-5s |   %%\n",
 971 |   'Accuracy', 'words', 'right', 'right', 'both' ;
 972 | printf OUT "  %-10s | %-5s | %-5s |      | %-5s |      | %-5s |\n",
 973 |   ' ', ' ', 'head', ' dep', 'right' ;
 974 | 
 975 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
 976 | 
 977 | printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
 978 |   'total', $counts{tot},
 979 |   $counts{tot}-$counts{err_head}{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot},
 980 |   $counts{tot}-$counts{err_dep}{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot},
 981 |   $counts{tot}-$counts{err_any}, 100-$counts{err_any}*100.0/$counts{tot} ;
 982 | 
 983 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
 984 | 
 985 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}})
 986 | {
 987 |     if (! defined($counts{pos}{$pos}{err_head}{tot}))
 988 |     {
 989 | 	$counts{pos}{$pos}{err_head}{tot} = 0 ;
 990 |     }
 991 |     if (! defined($counts{pos}{$pos}{err_dep}{tot}))
 992 |     {
 993 | 	$counts{pos}{$pos}{err_dep}{tot} = 0 ;
 994 |     }
 995 |     if (! defined($counts{pos}{$pos}{err_any}))
 996 |     {
 997 | 	$counts{pos}{$pos}{err_any} = 0 ;
 998 |     }
 999 | 
1000 |     printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
1001 |     $pos, $counts{pos}{$pos}{tot},
1002 |     $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_head}{tot}, 100-$counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot},
1003 |     $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_dep}{tot}, 100-$counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot},
1004 |     $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_any}, 100-$counts{pos}{$pos}{err_any}*100.0/$counts{pos}{$pos}{tot} ;
1005 | }
1006 | 
1007 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
1008 | 
1009 | printf OUT "\n\n" ;
1010 | 
1011 | printf OUT "  The overall error rate and its distribution over CPOSTAGs\n\n" ;
1012 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
1013 | 
1014 | printf OUT "  %-10s | %-5s | %-5s |   %%  | %-5s |   %%  | %-5s |   %%\n",
1015 |   'Error', 'words', 'head', ' dep', 'both' ;
1016 | printf OUT "  %-10s | %-5s | %-5s |      | %-5s |      | %-5s |\n",
1017 | 
1018 |   'Rate', ' ', 'err', ' err', 'wrong' ;
1019 | 
1020 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
1021 | 
1022 | printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
1023 |   'total', $counts{tot},
1024 |   $counts{err_head}{tot}, $counts{err_head}{tot}*100.0/$counts{tot},
1025 |   $counts{err_dep}{tot}, $counts{err_dep}{tot}*100.0/$counts{tot},
1026 |   $counts{err_both}, $counts{err_both}*100.0/$counts{tot} ;
1027 | 
1028 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
1029 | 
1030 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}})
1031 | {
1032 |     if (! defined($counts{pos}{$pos}{err_both}))
1033 |     {
1034 | 	$counts{pos}{$pos}{err_both} = 0 ;
1035 |     }
1036 | 
1037 |     printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
1038 |     $pos, $counts{pos}{$pos}{tot},
1039 |     $counts{pos}{$pos}{err_head}{tot}, $counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot},
1040 |     $counts{pos}{$pos}{err_dep}{tot}, $counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot},
1041 |     $counts{pos}{$pos}{err_both}, $counts{pos}{$pos}{err_both}*100.0/$counts{pos}{$pos}{tot} ;
1042 |     
1043 | }
1044 | 
1045 | printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
1046 | 
1047 | ### added by Sabine Buchholz
1048 | printf OUT "\n\n";
1049 | printf OUT "  Precision and recall of DEPREL\n\n";
1050 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1051 | printf OUT "  deprel          | gold | correct | system | recall (%%) | precision (%%) \n";
1052 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1053 | foreach my $dep (sort keys %{$counts{all_dep}}) {
1054 |     # initialize
1055 |     my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
1056 | 
1057 |     if (defined($counts{dep2}{$dep}{$dep})) {
1058 | 	$tot_corr = $counts{dep2}{$dep}{$dep};
1059 |     } 
1060 |     if (defined($counts{dep}{$dep}{tot})) {
1061 |     	$tot_g = $counts{dep}{$dep}{tot};
1062 | 	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
1063 |     }
1064 |     if (defined($counts{dep_s}{$dep}{tot})) {
1065 | 	$tot_s = $counts{dep_s}{$dep}{tot};
1066 | 	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
1067 |     }
1068 |     printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
1069 |     $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec;
1070 | }
1071 | 
1072 | ### DEPREL + ATTACHMENT:
1073 | ### Same as Sabine's DEPREL apart from $tot_corr calculation
1074 | printf OUT "\n\n";
1075 | printf OUT "  Precision and recall of DEPREL + ATTACHMENT\n\n";
1076 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1077 | printf OUT "  deprel          | gold | correct | system | recall (%%) | precision (%%) \n";
1078 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1079 | foreach my $dep (sort keys %{$counts{all_dep}}) {
1080 |     # initialize
1081 |     my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
1082 | 
1083 |     if (defined($counts{dep2}{$dep}{$dep})) {
1084 | 	if (defined($counts{err_head_corr_dep}{$dep})) {
1085 | 	    $tot_corr = $counts{dep2}{$dep}{$dep} - $counts{err_head_corr_dep}{$dep};
1086 | 	} else {
1087 | 	    $tot_corr = $counts{dep2}{$dep}{$dep};
1088 | 	}
1089 |     } 
1090 |     if (defined($counts{dep}{$dep}{tot})) {
1091 |     	$tot_g = $counts{dep}{$dep}{tot};
1092 | 	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
1093 |     }
1094 |     if (defined($counts{dep_s}{$dep}{tot})) {
1095 | 	$tot_s = $counts{dep_s}{$dep}{tot};
1096 | 	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
1097 |     }
1098 |     printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
1099 |     $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec;
1100 | }
1101 | ### DEPREL + ATTACHMENT
1102 | 
1103 | printf OUT "\n\n";
1104 | printf OUT "  Precision and recall of binned HEAD direction\n\n";
1105 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1106 | printf OUT "  direction       | gold | correct | system | recall (%%) | precision (%%) \n";
1107 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1108 | foreach my $dir ('to_root', 'left', 'right', 'self') {
1109 |     # initialize
1110 |     my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
1111 | 
1112 |     if (defined($counts{dir2}{$dir}{$dir})) {
1113 | 	$tot_corr = $counts{dir2}{$dir}{$dir};
1114 |     } 
1115 |     if (defined($counts{dir_g}{$dir}{tot})) {
1116 |     	$tot_g = $counts{dir_g}{$dir}{tot};
1117 | 	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
1118 |     }
1119 |     if (defined($counts{dir_s}{$dir}{tot})) {
1120 | 	$tot_s = $counts{dir_s}{$dir}{tot};
1121 | 	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
1122 |     }
1123 |     printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
1124 |     $dir, $tot_g, $tot_corr, $tot_s, $rec, $prec;
1125 | }
1126 | 
1127 | printf OUT "\n\n";
1128 | printf OUT "  Precision and recall of binned HEAD distance\n\n";
1129 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1130 | printf OUT "  distance        | gold | correct | system | recall (%%) | precision (%%) \n";
1131 | printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
1132 | foreach my $dist ('to_root', '1', '2', '3-6', '7-...') {
1133 |     # initialize
1134 |     my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
1135 | 
1136 |     if (defined($counts{dist2}{$dist}{$dist})) {
1137 | 	$tot_corr = $counts{dist2}{$dist}{$dist};
1138 |     } 
1139 |     if (defined($counts{dist_g}{$dist}{tot})) {
1140 |     	$tot_g = $counts{dist_g}{$dist}{tot};
1141 | 	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
1142 |     }
1143 |     if (defined($counts{dist_s}{$dist}{tot})) {
1144 | 	$tot_s = $counts{dist_s}{$dist}{tot};
1145 | 	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
1146 |     }
1147 |     printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
1148 |     $dist, $tot_g, $tot_corr, $tot_s, $rec, $prec;
1149 | }
1150 | 
1151 | printf OUT "\n\n";
1152 | printf OUT "  Frame confusions (gold versus system; *...* marks the head token)\n\n";
1153 | foreach my $frame (sort {$counts{frame2}{$b} <=> $counts{frame2}{$a}} keys %{$counts{frame2}})
1154 | {
1155 |     if ($counts{frame2}{$frame} >= 5) # (make 5 a changeable threshold later)
1156 |     {
1157 | 	printf OUT "  %3d  %s\n", $counts{frame2}{$frame}, $frame;
1158 |     }
1159 | }
1160 | ### end of: added by Sabine Buchholz
1161 | 
1162 | 
1163 | #
1164 | # Leave only the 5 words mostly involved in errors
1165 | #
1166 | 
1167 | 
1168 | $thresh = (sort {$b <=> $a} values %{$counts{word}{err_any}})[4] ;
1169 | 
1170 | # ensure enough space for title
1171 | $max_word_len = length('word') ;
1172 | 
1173 | foreach $word (keys %{$counts{word}{err_any}})
1174 | {
1175 |   if ($counts{word}{err_any}{$word} < $thresh)
1176 |   {
1177 |     delete $counts{word}{err_any}{$word} ;
1178 |     next ;
1179 |   }
1180 | 
1181 |   $l = uni_len($word) ;
1182 |   if ($l > $max_word_len)
1183 |   {
1184 |     $max_word_len = $l ;
1185 |   }
1186 | }
1187 | 
1188 | # filter a case when the difference between the error counts
1189 | # for 2-word and 1-word contexts is small
1190 | # (leave the 2-word context)
1191 | 
1192 | foreach $con (keys %{$counts{con_aft_2}{tot}})
1193 | {
1194 |   ($w1) = split(/\+/, $con) ;
1195 |   
1196 |   if (defined $counts{con_aft}{tot}{$w1} &&
1197 |       $counts{con_aft}{tot}{$w1}-$counts{con_aft_2}{tot}{$con} <= 1)
1198 |   {
1199 |     delete $counts{con_aft}{tot}{$w1} ;
1200 |   }
1201 | }
1202 | 
1203 | foreach $con (keys %{$counts{con_bef_2}{tot}})
1204 | {
1205 |   ($w_2, $w_1) = split(/\+/, $con) ;
1206 | 
1207 |   if (defined $counts{con_bef}{tot}{$w_1} &&
1208 |       $counts{con_bef}{tot}{$w_1}-$counts{con_bef_2}{tot}{$con} <= 1)
1209 |   {
1210 |     delete $counts{con_bef}{tot}{$w_1} ;
1211 |   }
1212 | }
1213 | 
1214 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}})
1215 | {
1216 |   ($p1) = split(/\+/, $con_pos) ;
1217 | 
1218 |   if (defined($counts{con_pos_aft}{tot}{$p1}) &&
1219 |       $counts{con_pos_aft}{tot}{$p1}-$counts{con_pos_aft_2}{tot}{$con_pos} <= 1)
1220 |   {
1221 |     delete $counts{con_pos_aft}{tot}{$p1} ;
1222 |   }
1223 | }
1224 | 
1225 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}})
1226 | {
1227 |   ($p_2, $p_1) = split(/\+/, $con_pos) ;
1228 | 
1229 |   if (defined($counts{con_pos_bef}{tot}{$p_1}) &&
1230 |       $counts{con_pos_bef}{tot}{$p_1}-$counts{con_pos_bef_2}{tot}{$con_pos} <= 1)
1231 |   {
1232 |     delete $counts{con_pos_bef}{tot}{$p_1} ;
1233 |   }
1234 | }
1235 | 
1236 | # for each context type, take the three contexts most involved in errors
1237 | 
1238 | $max_con_len = 0 ;
1239 | 
1240 | filter_context_counts($counts{con_bef_2}{tot}, $con_err_num, \$max_con_len) ;
1241 | 
1242 | filter_context_counts($counts{con_bef}{tot}, $con_err_num, \$max_con_len) ;
1243 | 
1244 | filter_context_counts($counts{con_aft}{tot}, $con_err_num, \$max_con_len) ;
1245 | 
1246 | filter_context_counts($counts{con_aft_2}{tot}, $con_err_num, \$max_con_len) ;
1247 | 
1248 | # for each CPOS context type, take the three CPOS contexts most involved in errors
1249 | 
1250 | $max_con_pos_len = 0 ;
1251 | 
1252 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef_2}{tot}})[$con_err_num-1] ;
1253 | 
1254 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}})
1255 | {
1256 |   if ($counts{con_pos_bef_2}{tot}{$con_pos} < $thresh)
1257 |   {
1258 |     delete $counts{con_pos_bef_2}{tot}{$con_pos} ;
1259 |     next ;
1260 |   }
1261 |   if (length($con_pos) > $max_con_pos_len)
1262 |   {
1263 |     $max_con_pos_len = length($con_pos) ;
1264 |   }
1265 | }
1266 | 
1267 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef}{tot}})[$con_err_num-1] ;
1268 | 
1269 | foreach $con_pos (keys %{$counts{con_pos_bef}{tot}})
1270 | {
1271 |   if ($counts{con_pos_bef}{tot}{$con_pos} < $thresh)
1272 |   {
1273 |     delete $counts{con_pos_bef}{tot}{$con_pos} ;
1274 |     next ;
1275 |   }
1276 |   if (length($con_pos) > $max_con_pos_len)
1277 |   {
1278 |     $max_con_pos_len = length($con_pos) ;
1279 |   }
1280 | }
1281 | 
1282 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft}{tot}})[$con_err_num-1] ;
1283 | 
1284 | foreach $con_pos (keys %{$counts{con_pos_aft}{tot}})
1285 | {
1286 |   if ($counts{con_pos_aft}{tot}{$con_pos} < $thresh)
1287 |   {
1288 |     delete $counts{con_pos_aft}{tot}{$con_pos} ;
1289 |     next ;
1290 |   }
1291 |   if (length($con_pos) > $max_con_pos_len)
1292 |   {
1293 |     $max_con_pos_len = length($con_pos) ;
1294 |   }
1295 | }
1296 | 
1297 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft_2}{tot}})[$con_err_num-1] ;
1298 | 
1299 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}})
1300 | {
1301 |   if ($counts{con_pos_aft_2}{tot}{$con_pos} < $thresh)
1302 |   {
1303 |     delete $counts{con_pos_aft_2}{tot}{$con_pos} ;
1304 |     next ;
1305 |   }
1306 |   if (length($con_pos) > $max_con_pos_len)
1307 |   {
1308 |     $max_con_pos_len = length($con_pos) ;
1309 |   }
1310 | }
1311 | 
1312 | # printing
1313 | 
1314 | # ------------- focus words
1315 | 
1316 | printf OUT "\n\n" ;
1317 | printf OUT "  %d focus words where most of the errors occur:\n\n", scalar keys %{$counts{word}{err_any}} ;
1318 | 
1319 | printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s\n", $max_word_len, ' ', 'any', 'head', 'dep', 'both' ;
1320 | printf OUT "  %s-+------+------+------+------\n", '-' x $max_word_len;
1321 | 
1322 | foreach $word (sort {$counts{word}{err_any}{$b} <=> $counts{word}{err_any}{$a}} keys %{$counts{word}{err_any}})
1323 | {
1324 |     if (!defined($counts{word}{err_head}{$word}))
1325 |     {
1326 | 	$counts{word}{err_head}{$word} = 0 ;
1327 |     }
1328 |     if (! defined($counts{word}{err_dep}{$word}))
1329 |     {
1330 | 	$counts{word}{err_dep}{$word} = 0 ;
1331 |     }
1332 |     if (! defined($counts{word}{err_any}{$word}))
1333 |     {
1334 | 	$counts{word}{err_any}{$word} = 0;
1335 |     }
1336 |     printf OUT "  %-*s | %4d | %4d | %4d | %4d\n",
1337 |     $max_word_len+length($word)-uni_len($word), $word, $counts{word}{err_any}{$word},
1338 |     $counts{word}{err_head}{$word},
1339 |     $counts{word}{err_dep}{$word},
1340 |     $counts{word}{err_dep}{$word}+$counts{word}{err_head}{$word}-$counts{word}{err_any}{$word} ;
1341 | }
1342 | 
1343 | printf OUT "  %s-+------+------+------+------\n", '-' x $max_word_len;
1344 | 
1345 | # ------------- contexts
1346 | 
1347 | printf OUT "\n\n" ;
1348 | 
1349 | printf OUT "  one-token preceeding contexts where most of the errors occur:\n\n" ;
1350 | 
1351 | print_context($counts{con_bef}, $counts{con_pos_bef}, $max_con_len, $max_con_pos_len) ;
1352 | 
1353 | printf OUT "  two-token preceeding contexts where most of the errors occur:\n\n" ;
1354 | 
1355 | print_context($counts{con_bef_2}, $counts{con_pos_bef_2}, $max_con_len, $max_con_pos_len) ;
1356 | 
1357 | printf OUT "  one-token following contexts where most of the errors occur:\n\n" ;
1358 | 
1359 | print_context($counts{con_aft}, $counts{con_pos_aft}, $max_con_len, $max_con_pos_len) ;
1360 | 
1361 | printf OUT "  two-token following contexts where most of the errors occur:\n\n" ;
1362 | 
1363 | print_context($counts{con_aft_2}, $counts{con_pos_aft_2}, $max_con_len, $max_con_pos_len) ;
1364 | 
1365 | # ------------- Sentences
1366 | 
1367 | printf OUT "  Sentence with the highest number of word errors:\n" ;
1368 | $i = (sort { (defined($err_sent[$b]{word}) && $err_sent[$b]{word})
1369 | 		 <=> (defined($err_sent[$a]{word}) && $err_sent[$a]{word}) } 1 .. $sent_num)[0] ;
1370 | printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
1371 | printf OUT "%d head errors, %d dependency errors, %d word errors\n",
1372 |   $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
1373 | 
1374 | printf OUT "\n\n" ;
1375 | 
1376 | printf OUT "  Sentence with the highest number of head errors:\n" ;
1377 | $i = (sort { (defined($err_sent[$b]{head}) && $err_sent[$b]{head}) 
1378 | 		 <=> (defined($err_sent[$a]{head}) && $err_sent[$a]{head}) } 1 .. $sent_num)[0] ;
1379 | printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
1380 | printf OUT "%d head errors, %d dependency errors, %d word errors\n",
1381 |   $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
1382 | 
1383 | printf OUT "\n\n" ;
1384 | 
1385 | printf OUT "  Sentence with the highest number of dependency errors:\n" ;
1386 | $i = (sort { (defined($err_sent[$b]{dep}) && $err_sent[$b]{dep}) 
1387 | 		 <=> (defined($err_sent[$a]{dep}) && $err_sent[$a]{dep}) } 1 .. $sent_num)[0] ;
1388 | printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
1389 | printf OUT "%d head errors, %d dependency errors, %d word errors\n",
1390 |   $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
1391 | 
1392 | #
1393 | # Second pass, collect statistics of the frequent errors
1394 | #
1395 | 
1396 | # filter the errors, leave the most frequent $freq_err_num errors
1397 | 
1398 | $i = 0 ;
1399 | 
1400 | $thresh = (sort {$b <=> $a} values %freq_err)[$freq_err_num-1] ;
1401 | 
1402 | foreach $err (keys %freq_err)
1403 | {
1404 |   if ($freq_err{$err} < $thresh)
1405 |   {
1406 |     delete $freq_err{$err} ;
1407 |   }
1408 | }
1409 | 
1410 | # in case there are several errors with the threshold count
1411 | 
1412 | $freq_err_num = scalar keys %freq_err ;
1413 | 
1414 | %err_counts = () ;
1415 | 
1416 | $eof = 0 ;
1417 | 
1418 | seek (GOLD, 0, 0) ;
1419 | seek (SYS, 0, 0) ;
1420 | 
1421 | while (! $eof)
1422 | { # second reading loop
1423 | 
1424 |   $eof = read_sent(\@sent_gold, \@sent_sys) ;
1425 |   $sent_num++ ;
1426 | 
1427 |   $word_num = scalar @sent_gold ;
1428 | 
1429 |   # printf "$sent_num $word_num\n" ;
1430 |   
1431 |   foreach $i_w (0 .. $word_num-1)
1432 |   { # loop on words
1433 |     ($word, $pos, $head_g, $dep_g)
1434 |       = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ;
1435 | 
1436 |     # printf "%d: %s %s %s %s\n", $i_w,  $word, $pos, $head_g, $dep_g ;
1437 | 
1438 |     if ((! $score_on_punct) && is_uni_punct($word))
1439 |     {
1440 |       # ignore punctuations
1441 |       next ;
1442 |     }
1443 | 
1444 |     ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ;
1445 | 
1446 |     $err_head = ($head_g ne $head_s) ;
1447 |     $err_dep = ($dep_g ne $dep_s) ;
1448 | 
1449 |     $head_err = '-' ;
1450 |     $dep_err = '-' ;
1451 | 
1452 |     if ($head_g eq '0')
1453 |     {
1454 |       $head_aft_bef_g = '0' ;
1455 |     }
1456 |     elsif ($head_g eq $i_w+1)
1457 |     {
1458 |       $head_aft_bef_g = 'e' ;
1459 |     }
1460 |     else
1461 |     {
1462 |       $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ;
1463 |     }
1464 | 
1465 |     if ($head_s eq '0')
1466 |     {
1467 |       $head_aft_bef_s = '0' ;
1468 |     }
1469 |     elsif ($head_s eq $i_w+1)
1470 |     {
1471 |       $head_aft_bef_s = 'e' ;
1472 |     }
1473 |     else
1474 |     {
1475 |       $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ;
1476 |     }
1477 | 
1478 |     $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ;
1479 | 
1480 |     if ($err_head)
1481 |     {
1482 |       if ($head_aft_bef_s eq '0')
1483 |       {
1484 | 	$head_err = 0 ;
1485 |       }
1486 |       else
1487 |       {
1488 | 	$head_err = $head_s-$head_g ;
1489 |       }
1490 |     }
1491 | 
1492 |     if ($err_dep)
1493 |     {
1494 |       $dep_err = $dep_g.'->'.$dep_s ;
1495 |     }
1496 | 
1497 |     if (! ($err_head || $err_dep))
1498 |     {
1499 |       next ;
1500 |     }
1501 | 
1502 |     # handle only the most frequent errors
1503 | 
1504 |     $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ;
1505 | 
1506 |     if (! exists $freq_err{$err})
1507 |     {
1508 |       next ;
1509 |     }
1510 | 
1511 |     ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ;
1512 | 
1513 |     $con_bef = $w_1 ;
1514 |     $con_bef_2 = $w_2.' + '.$w_1 ;
1515 |     $con_aft = $w1 ;
1516 |     $con_aft_2 = $w1.' + '.$w2 ;
1517 | 
1518 |     $con_pos_bef = $p_1 ;
1519 |     $con_pos_bef_2 = $p_2.'+'.$p_1 ;
1520 |     $con_pos_aft = $p1 ;
1521 |     $con_pos_aft_2 = $p1.'+'.$p2 ;
1522 | 
1523 |     @cur_err = ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) ;
1524 | 
1525 |     # printf "# %-25s %-15s %-10s %-25s %-3s %-30s\n",
1526 |     #  $con_bef, $word, $pos, $con_aft, $head_err, $dep_err ;
1527 |     
1528 |     @bits = (0, 0, 0, 0, 0, 0) ;
1529 |     $j = 0 ;
1530 | 
1531 |     while ($j == 0)
1532 |     {
1533 |       for ($i = 0; $i <= $#bits; $i++)
1534 |       {
1535 | 	if ($bits[$i] == 0)
1536 | 	{
1537 | 	  $bits[$i] = 1 ;
1538 | 	  $j = 0 ;
1539 | 	  last ;
1540 | 	}
1541 | 	else
1542 | 	{
1543 | 	  $bits[$i] = 0 ;
1544 | 	  $j = 1 ;
1545 | 	}
1546 |       }
1547 | 
1548 |       @e_bits = @cur_err ;
1549 | 
1550 |       for ($i = 0; $i <= $#bits; $i++)
1551 |       {
1552 | 	if (! $bits[$i])
1553 | 	{
1554 | 	  $e_bits[$i] = '*' ;
1555 | 	}
1556 |       }
1557 | 
1558 |       # include also the last case which is the most general
1559 |       # (wildcards for everything)
1560 |       $err_counts{$err}{join($sep, @e_bits)}++ ;
1561 | 
1562 |     }
1563 | 
1564 |   } # loop on words
1565 | } # second reading loop
1566 | 
1567 | printf OUT "\n\n" ;
1568 | printf OUT "  Specific errors, %d most frequent errors:", $freq_err_num ;
1569 | printf OUT "\n  %s\n", '=' x 41 ;
1570 | 
1571 | 
1572 | # deleting local contexts which are too general
1573 | 
1574 | foreach $err (keys %err_counts)
1575 | {
1576 |   foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}}
1577 | 		    keys %{$err_counts{$err}})
1578 |   {
1579 |     @cur_err = split(/\Q$sep\E/, $loc_con) ;
1580 | 
1581 |     # In this loop, one or two elements of the local context are
1582 |     # replaced with '*' to make it more general. If the entry for
1583 |     # the general context has the same count it is removed.
1584 | 
1585 |     foreach $i (0 .. $#cur_err)
1586 |     {
1587 |       $w1 = $cur_err[$i] ;
1588 |       if ($cur_err[$i] eq '*')
1589 |       {
1590 | 	next ;
1591 |       }
1592 |       $cur_err[$i] = '*' ;
1593 |       $con1 = join($sep, @cur_err) ;
1594 |       if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con})
1595 | 	   && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con}))
1596 |       {
1597 | 	delete $err_counts{$err}{$con1} ;
1598 |       }
1599 |       for ($j = $i+1; $j <=$#cur_err; $j++)
1600 |       {
1601 | 	if ($cur_err[$j] eq '*')
1602 | 	{
1603 | 	  next ;
1604 | 	}
1605 | 	$w2 = $cur_err[$j] ;
1606 | 	$cur_err[$j] = '*' ;
1607 | 	$con1 = join($sep, @cur_err) ;
1608 | 	if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con})
1609 | 	     && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con}))
1610 | 	{
1611 | 	  delete $err_counts{$err}{$con1} ;
1612 | 	}
1613 | 	$cur_err[$j] = $w2 ;
1614 |       }
1615 |       $cur_err[$i] = $w1 ;
1616 |     }
1617 |   }
1618 | }
1619 | 
1620 | # Leaving only the topmost local contexts for each error
1621 | 
1622 | foreach $err (keys %err_counts)
1623 | {
1624 |   $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[$spec_err_loc_con-1] || 0 ;
1625 | 
1626 |   # of the threshold is too low, take the 2nd highest count
1627 |   # (the highest may be the total which is the generic case
1628 |   #   and not relevant for printing)
1629 | 
1630 |   if ($thresh < 5)
1631 |   {
1632 |     $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[1] ;
1633 |   }
1634 | 
1635 |   foreach $loc_con (keys %{$err_counts{$err}})
1636 |   {
1637 |     if ($err_counts{$err}{$loc_con} < $thresh)
1638 |     {
1639 |       delete $err_counts{$err}{$loc_con} ;
1640 |     }
1641 |     else
1642 |     {
1643 |       if ($loc_con ne join($sep, ('*', '*', '*', '*', '*', '*')))
1644 |       {
1645 | 	$loc_con_err_counts{$loc_con}{$err} = $err_counts{$err}{$loc_con} ;
1646 |       }
1647 |     }
1648 |   }
1649 | }
1650 | 
1651 | # printing an error summary
1652 | 
1653 | # calculating the context field length
1654 | 
1655 | $max_word_spec_len= length('word') ;
1656 | $max_con_aft_len = length('word') ;
1657 | $max_con_bef_len = length('word') ;
1658 | $max_con_pos_len = length('CPOS') ;
1659 | 
1660 | foreach $err (keys %err_counts)
1661 | {
1662 |   foreach $loc_con (sort keys %{$err_counts{$err}})
1663 |   {
1664 |     ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
1665 |       split(/\Q$sep\E/, $loc_con) ;
1666 | 
1667 |     $l = uni_len($word) ;
1668 |     if ($l > $max_word_spec_len)
1669 |     {
1670 |       $max_word_spec_len = $l ;
1671 |     }
1672 | 
1673 |     $l = uni_len($con_bef) ;
1674 |     if ($l > $max_con_bef_len)
1675 |     {
1676 |       $max_con_bef_len = $l ;
1677 |     }
1678 | 
1679 |     $l = uni_len($con_aft) ;
1680 |     if ($l > $max_con_aft_len)
1681 |     {
1682 |       $max_con_aft_len = $l ;
1683 |     }
1684 | 
1685 |     if (length($con_pos_aft) > $max_con_pos_len)
1686 |     {
1687 |       $max_con_pos_len = length($con_pos_aft) ;
1688 |     }
1689 | 
1690 |     if (length($con_pos_bef) > $max_con_pos_len)
1691 |     {
1692 |       $max_con_pos_len = length($con_pos_bef) ;
1693 |     }
1694 |   }
1695 | }
1696 | 
1697 | $err_counter = 0 ;
1698 | 
1699 | foreach $err (sort {$freq_err{$b} <=> $freq_err{$a}} keys %freq_err)
1700 | {
1701 | 
1702 |   ($head_err, $head_aft_bef, $dep_err) = split(/\Q$sep\E/, $err) ;
1703 | 
1704 |   $err_counter++ ;
1705 |   $err_desc{$err} = sprintf("%2d. ", $err_counter).
1706 |     describe_err($head_err, $head_aft_bef, $dep_err) ;
1707 |   
1708 |   # printf OUT "  %-3s %-30s %d\n", $head_err, $dep_err, $freq_err{$err} ;
1709 |   printf OUT "\n" ;
1710 |   printf OUT "  %s : %d times\n", $err_desc{$err}, $freq_err{$err} ;
1711 | 
1712 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
1713 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1714 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1715 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1716 | 
1717 |   printf OUT "  %-*s | %-*s | %-*s | %s\n",
1718 |       $max_con_pos_len+$max_con_bef_len+3, '  Before',
1719 | 	$max_word_spec_len+$max_pos_len+3, '   Focus',
1720 | 	  $max_con_pos_len+$max_con_aft_len+3, '  After',
1721 | 	    'Count' ;
1722 | 
1723 |   printf OUT "  %-*s   %-*s | %-*s   %-*s | %-*s   %-*s |\n",
1724 |     $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word',
1725 |        $max_pos_len, 'CPOS', $max_word_spec_len, 'word',
1726 | 	$max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ;
1727 |   
1728 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
1729 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1730 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1731 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1732 | 
1733 |   foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}}
1734 | 		    keys %{$err_counts{$err}})
1735 |   {
1736 |     if ($loc_con eq join($sep, ('*', '*', '*', '*', '*', '*')))
1737 |     {
1738 |       next ;
1739 |     }
1740 | 
1741 |     $con1 = $loc_con ;
1742 |     $con1 =~ s/\*/ /g ;
1743 | 
1744 |     ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
1745 |       split(/\Q$sep\E/, $con1) ;
1746 | 
1747 |     printf OUT "  %-*s | %-*s | %-*s | %-*s | %-*s | %-*s | %3d\n",
1748 |       $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef,
1749 | 	  $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word,
1750 | 	    $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft,
1751 | 	      $err_counts{$err}{$loc_con} ;
1752 |   }
1753 |   
1754 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
1755 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1756 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1757 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1758 | 
1759 | }
1760 | 
1761 | printf OUT "\n\n" ;
1762 | printf OUT "  Local contexts involved in several frequent errors:" ;
1763 | printf OUT "\n  %s\n", '=' x 51 ;
1764 | printf OUT "\n\n" ;
1765 | 
1766 | foreach $loc_con (sort {scalar keys %{$loc_con_err_counts{$b}} <=>
1767 | 			  scalar keys %{$loc_con_err_counts{$a}}}
1768 | 		  keys %loc_con_err_counts)
1769 | {
1770 | 
1771 |   if (scalar keys %{$loc_con_err_counts{$loc_con}} == 1)
1772 |   {
1773 |     next ;
1774 |   }
1775 |   
1776 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
1777 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1778 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1779 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1780 | 
1781 |   printf OUT "  %-*s | %-*s | %-*s \n",
1782 |       $max_con_pos_len+$max_con_bef_len+3, '  Before',
1783 | 	$max_word_spec_len+$max_pos_len+3, '   Focus',
1784 | 	  $max_con_pos_len+$max_con_aft_len+3, '  After' ;
1785 | 
1786 |   printf OUT "  %-*s   %-*s | %-*s   %-*s | %-*s   %-*s \n",
1787 |     $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word',
1788 |        $max_pos_len, 'CPOS', $max_word_spec_len, 'word',
1789 | 	$max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ;
1790 |   
1791 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
1792 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1793 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1794 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1795 | 
1796 |   $con1 = $loc_con ;
1797 |   $con1 =~ s/\*/ /g ;
1798 | 
1799 |   ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
1800 |       split(/\Q$sep\E/, $con1) ;
1801 | 
1802 |   printf OUT "  %-*s | %-*s | %-*s | %-*s | %-*s | %-*s \n",
1803 |     $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef,
1804 |       $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word,
1805 | 	$max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft ;
1806 | 	  
1807 |   printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
1808 |     '-' x $max_con_pos_len, '-' x $max_con_bef_len,
1809 |        '-' x $max_pos_len, '-' x $max_word_spec_len,
1810 | 	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
1811 | 
1812 |   foreach $err (sort {$loc_con_err_counts{$loc_con}{$b} <=>
1813 | 			$loc_con_err_counts{$loc_con}{$a}}
1814 | 		keys %{$loc_con_err_counts{$loc_con}})
1815 |   {
1816 |     printf OUT "  %s : %d times\n", $err_desc{$err},
1817 |       $loc_con_err_counts{$loc_con}{$err} ;
1818 |   }
1819 | 
1820 |   printf OUT "\n" ;
1821 | }
1822 | 
1823 | close GOLD ;
1824 | close SYS ;
1825 | 
1826 | close OUT ;
1827 | 


--------------------------------------------------------------------------------
/bmstparser/src/utils/evaluation_script/conll17_ud_eval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # CoNLL 2017 UD Parsing evaluation script.
  4 | #
  5 | # Compatible with Python 2.7 and 3.2+, can be used either as a module
  6 | # or a standalone executable.
  7 | #
  8 | # Copyright 2017 Institute of Formal and Applied Linguistics (UFAL),
  9 | # Faculty of Mathematics and Physics, Charles University, Czech Republic.
 10 | #
 11 | # Changelog:
 12 | # - [02 Jan 2017] Version 0.9: Initial release
 13 | # - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation
 14 | # - [10 Mar 2017] Version 1.0: Add documentation and test
 15 | #                              Compare HEADs correctly using aligned words
 16 | #                              Allow evaluation with errorneous spaces in forms
 17 | #                              Compare forms in LCS case insensitively
 18 | #                              Detect cycles and multiple root nodes
 19 | #                              Compute AlignedAccuracy
 20 | 
 21 | # Command line usage
 22 | # ------------------
 23 | # conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file
 24 | #
 25 | # - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics
 26 | #   is printed
 27 | # - if -v is given, several metrics are printed (as precision, recall, F1 score,
 28 | #   and in case the metric is computed on aligned words also accuracy on these):
 29 | #   - Tokens: how well do the gold tokens match system tokens
 30 | #   - Sentences: how well do the gold sentences match system sentences
 31 | #   - Words: how well can the gold words be aligned to system words
 32 | #   - UPOS: using aligned words, how well does UPOS match
 33 | #   - XPOS: using aligned words, how well does XPOS match
 34 | #   - Feats: using aligned words, how well does FEATS match
 35 | #   - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match
 36 | #   - Lemmas: using aligned words, how well does LEMMA match
 37 | #   - UAS: using aligned words, how well does HEAD match
 38 | #   - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match
 39 | # - if weights_file is given (with lines containing deprel-weight pairs),
 40 | #   one more metric is shown:
 41 | #   - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight
 42 | 
 43 | # API usage
 44 | # ---------
 45 | # - load_conllu(file)
 46 | #   - loads CoNLL-U file from given file object to an internal representation
 47 | #   - the file object should return str on both Python 2 and Python 3
 48 | #   - raises UDError exception if the given file cannot be loaded
 49 | # - evaluate(gold_ud, system_ud)
 50 | #   - evaluate the given gold and system CoNLL-U files (loaded with load_conllu)
 51 | #   - raises UDError if the concatenated tokens of gold and system file do not match
 52 | #   - returns a dictionary with the metrics described above, each metrics having
 53 | #     three fields: precision, recall and f1
 54 | 
 55 | # Description of token matching
 56 | # -----------------------------
 57 | # In order to match tokens of gold file and system file, we consider the text
 58 | # resulting from concatenation of gold tokens and text resulting from
 59 | # concatenation of system tokens. These texts should match -- if they do not,
 60 | # the evaluation fails.
 61 | #
 62 | # If the texts do match, every token is represented as a range in this original
 63 | # text, and tokens are equal only if their range is the same.
 64 | 
 65 | # Description of word matching
 66 | # ----------------------------
 67 | # When matching words of gold file and system file, we first match the tokens.
 68 | # The words which are also tokens are matched as tokens, but words in multi-word
 69 | # tokens have to be handled differently.
 70 | #
 71 | # To handle multi-word tokens, we start by finding "multi-word spans".
 72 | # Multi-word span is a span in the original text such that
 73 | # - it contains at least one multi-word token
 74 | # - all multi-word tokens in the span (considering both gold and system ones)
 75 | #   are completely inside the span (i.e., they do not "stick out")
 76 | # - the multi-word span is as small as possible
 77 | #
 78 | # For every multi-word span, we align the gold and system words completely
 79 | # inside this span using LCS on their FORMs. The words not intersecting
 80 | # (even partially) any multi-word span are then aligned as tokens.
 81 | 
 82 | 
 83 | from __future__ import division
 84 | from __future__ import print_function
 85 | 
 86 | import argparse
 87 | import io
 88 | import sys
 89 | import unittest
 90 | 
 91 | # CoNLL-U column names
 92 | ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10)
 93 | 
 94 | # UD Error is used when raising exceptions in this module
 95 | class UDError(Exception):
 96 |     pass
 97 | 
 98 | # Load given CoNLL-U file into internal representation
 99 | def load_conllu(file):
100 |     # Internal representation classes
101 |     class UDRepresentation:
102 |         def __init__(self):
103 |             # Characters of all the tokens in the whole file.
104 |             # Whitespace between tokens is not included.
105 |             self.characters = []
106 |             # List of UDSpan instances with start&end indices into `characters`.
107 |             self.tokens = []
108 |             # List of UDWord instances.
109 |             self.words = []
110 |             # List of UDSpan instances with start&end indices into `characters`.
111 |             self.sentences = []
112 |     class UDSpan:
113 |         def __init__(self, start, end):
114 |             self.start = start
115 |             # Note that self.end marks the first position **after the end** of span,
116 |             # so we can use characters[start:end] or range(start, end).
117 |             self.end = end
118 |     class UDWord:
119 |         def __init__(self, span, columns, is_multiword):
120 |             # Span of this word (or MWT, see below) within ud_representation.characters.
121 |             self.span = span
122 |             # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,...
123 |             self.columns = columns
124 |             # is_multiword==True means that this word is part of a multi-word token.
125 |             # In that case, self.span marks the span of the whole multi-word token.
126 |             self.is_multiword = is_multiword
127 |             # Reference to the UDWord instance representing the HEAD (or None if root).
128 |             self.parent = None
129 |             # Let's ignore language-specific deprel subtypes.
130 |             self.columns[DEPREL] = columns[DEPREL].split(':')[0]
131 | 
132 |     ud = UDRepresentation()
133 | 
134 |     # Load the CoNLL-U file
135 |     index, sentence_start = 0, None
136 |     while True:
137 |         line = file.readline()
138 |         if not line:
139 |             break
140 |         line = line.rstrip("\r\n")
141 | 
142 |         # Handle sentence start boundaries
143 |         if sentence_start is None:
144 |             # Skip comments
145 |             if line.startswith("#"):
146 |                 continue
147 |             # Start a new sentence
148 |             ud.sentences.append(UDSpan(index, 0))
149 |             sentence_start = len(ud.words)
150 |         if not line:
151 |             # Add parent UDWord links and check there are no cycles
152 |             def process_word(word):
153 |                 if word.parent == "remapping":
154 |                     raise UDError("There is a cycle in a sentence")
155 |                 if word.parent is None:
156 |                     head = int(word.columns[HEAD])
157 |                     if head > len(ud.words) - sentence_start:
158 |                         raise UDError("HEAD '{}' points outside of the sentence".format(word.columns[HEAD]))
159 |                     if head:
160 |                         parent = ud.words[sentence_start + head - 1]
161 |                         word.parent = "remapping"
162 |                         process_word(parent)
163 |                         word.parent = parent
164 | 
165 |             for word in ud.words[sentence_start:]:
166 |                 process_word(word)
167 | 
168 |             # Check there is a single root node
169 | #            if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1:
170 | #                print([word.parent for word in ud.words[sentence_start:]])
171 | #                print([word.columns for word in ud.words[sentence_start:]])
172 | #                raise UDError("There are multiple roots in a sentence")
173 | 
174 |             # End the sentence
175 |             ud.sentences[-1].end = index
176 |             sentence_start = None
177 |             continue
178 | 
179 |         # Read next token/word
180 |         columns = line.split("\t")
181 |         if len(columns) != 10:
182 |             raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(line))
183 | 
184 |         # Skip empty nodes
185 |         if "." in columns[ID]:
186 |             continue
187 | 
188 |         # Delete spaces from FORM  so gold.characters == system.characters
189 |         # even if one of them tokenizes the space.
190 |         columns[FORM] = columns[FORM].replace(" ", "")
191 |         if not columns[FORM]:
192 |             raise UDError("There is an empty FORM in the CoNLL-U file")
193 | 
194 |         # Save token
195 |         ud.characters.extend(columns[FORM])
196 |         ud.tokens.append(UDSpan(index, index + len(columns[FORM])))
197 |         index += len(columns[FORM])
198 | 
199 |         # Handle multi-word tokens to save word(s)
200 |         if "-" in columns[ID]:
201 |             try:
202 |                 start, end = map(int, columns[ID].split("-"))
203 |             except:
204 |                 raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID]))
205 | 
206 |             for _ in range(start, end + 1):
207 |                 word_line = file.readline().rstrip("\r\n")
208 |                 word_columns = word_line.split("\t")
209 |                 if len(word_columns) != 10:
210 |                     raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(word_line))
211 |                 ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True))
212 |         # Basic tokens/words
213 |         else:
214 |             try:
215 |                 word_id = int(columns[ID])
216 |             except:
217 |                 raise UDError("Cannot parse word ID '{}'".format(columns[ID]))
218 |             if word_id != len(ud.words) - sentence_start + 1:
219 |                 raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1))
220 | 
221 |             try:
222 |                 head_id = int(columns[HEAD])
223 |             except:
224 |                 raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD]))
225 |             if head_id < 0:
226 |                 raise UDError("HEAD cannot be negative")
227 | 
228 |             ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False))
229 | 
230 |     if sentence_start is not None:
231 |         raise UDError("The CoNLL-U file does not end with empty line")
232 | 
233 |     return ud
234 | 
235 | # Evaluate the gold and system treebanks (loaded using load_conllu).
236 | def evaluate(gold_ud, system_ud, deprel_weights=None):
237 |     class Score:
238 |         def __init__(self, gold_total, system_total, correct, aligned_total=None):
239 |             self.precision = correct / system_total if system_total else 0.0
240 |             self.recall = correct / gold_total if gold_total else 0.0
241 |             self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0
242 |             self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total
243 |     class AlignmentWord:
244 |         def __init__(self, gold_word, system_word):
245 |             self.gold_word = gold_word
246 |             self.system_word = system_word
247 |             self.gold_parent = None
248 |             self.system_parent_gold_aligned = None
249 |     class Alignment:
250 |         def __init__(self, gold_words, system_words):
251 |             self.gold_words = gold_words
252 |             self.system_words = system_words
253 |             self.matched_words = []
254 |             self.matched_words_map = {}
255 |         def append_aligned_words(self, gold_word, system_word):
256 |             self.matched_words.append(AlignmentWord(gold_word, system_word))
257 |             self.matched_words_map[system_word] = gold_word
258 |         def fill_parents(self):
259 |             # We represent root parents in both gold and system data by '0'.
260 |             # For gold data, we represent non-root parent by corresponding gold word.
261 |             # For system data, we represent non-root parent by either gold word aligned
262 |             # to parent system nodes, or by None if no gold words is aligned to the parent.
263 |             for words in self.matched_words:
264 |                 words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0
265 |                 words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \
266 |                     if words.system_word.parent is not None else 0
267 | 
268 |     def lower(text):
269 |         if sys.version_info < (3, 0) and isinstance(text, str):
270 |             return text.decode("utf-8").lower()
271 |         return text.lower()
272 | 
273 |     def spans_score(gold_spans, system_spans):
274 |         correct, gi, si = 0, 0, 0
275 |         while gi < len(gold_spans) and si < len(system_spans):
276 |             if system_spans[si].start < gold_spans[gi].start:
277 |                 si += 1
278 |             elif gold_spans[gi].start < system_spans[si].start:
279 |                 gi += 1
280 |             else:
281 |                 correct += gold_spans[gi].end == system_spans[si].end
282 |                 si += 1
283 |                 gi += 1
284 | 
285 |         return Score(len(gold_spans), len(system_spans), correct)
286 | 
287 |     def alignment_score(alignment, key_fn, weight_fn=lambda w: 1):
288 |         gold, system, aligned, correct = 0, 0, 0, 0
289 | 
290 |         for word in alignment.gold_words:
291 |             gold += weight_fn(word)
292 | 
293 |         for word in alignment.system_words:
294 |             system += weight_fn(word)
295 | 
296 |         for words in alignment.matched_words:
297 |             aligned += weight_fn(words.gold_word)
298 | 
299 |         if key_fn is None:
300 |             # Return score for whole aligned words
301 |             return Score(gold, system, aligned)
302 | 
303 |         for words in alignment.matched_words:
304 |             if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned):
305 |                 correct += weight_fn(words.gold_word)
306 | 
307 |         return Score(gold, system, correct, aligned)
308 | 
309 |     def beyond_end(words, i, multiword_span_end):
310 |         if i >= len(words):
311 |             return True
312 |         if words[i].is_multiword:
313 |             return words[i].span.start >= multiword_span_end
314 |         return words[i].span.end > multiword_span_end
315 | 
316 |     def extend_end(word, multiword_span_end):
317 |         if word.is_multiword and word.span.end > multiword_span_end:
318 |             return word.span.end
319 |         return multiword_span_end
320 | 
321 |     def find_multiword_span(gold_words, system_words, gi, si):
322 |         # We know gold_words[gi].is_multiword or system_words[si].is_multiword.
323 |         # Find the start of the multiword span (gs, ss), so the multiword span is minimal.
324 |         # Initialize multiword_span_end characters index.
325 |         if gold_words[gi].is_multiword:
326 |             multiword_span_end = gold_words[gi].span.end
327 |             if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start:
328 |                 si += 1
329 |         else: # if system_words[si].is_multiword
330 |             multiword_span_end = system_words[si].span.end
331 |             if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start:
332 |                 gi += 1
333 |         gs, ss = gi, si
334 | 
335 |         # Find the end of the multiword span
336 |         # (so both gi and si are pointing to the word following the multiword span end).
337 |         while not beyond_end(gold_words, gi, multiword_span_end) or \
338 |               not beyond_end(system_words, si, multiword_span_end):
339 |             if gi < len(gold_words) and (si >= len(system_words) or
340 |                                          gold_words[gi].span.start <= system_words[si].span.start):
341 |                 multiword_span_end = extend_end(gold_words[gi], multiword_span_end)
342 |                 gi += 1
343 |             else:
344 |                 multiword_span_end = extend_end(system_words[si], multiword_span_end)
345 |                 si += 1
346 |         return gs, ss, gi, si
347 | 
348 |     def compute_lcs(gold_words, system_words, gi, si, gs, ss):
349 |         lcs = [[0] * (si - ss) for i in range(gi - gs)]
350 |         for g in reversed(range(gi - gs)):
351 |             for s in reversed(range(si - ss)):
352 |                 if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
353 |                     lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0)
354 |                 lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0)
355 |                 lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0)
356 |         return lcs
357 | 
358 |     def align_words(gold_words, system_words):
359 |         alignment = Alignment(gold_words, system_words)
360 | 
361 |         gi, si = 0, 0
362 |         while gi < len(gold_words) and si < len(system_words):
363 |             if gold_words[gi].is_multiword or system_words[si].is_multiword:
364 |                 # A: Multi-word tokens => align via LCS within the whole "multiword span".
365 |                 gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si)
366 | 
367 |                 if si > ss and gi > gs:
368 |                     lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss)
369 | 
370 |                     # Store aligned words
371 |                     s, g = 0, 0
372 |                     while g < gi - gs and s < si - ss:
373 |                         if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
374 |                             alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s])
375 |                             g += 1
376 |                             s += 1
377 |                         elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0):
378 |                             g += 1
379 |                         else:
380 |                             s += 1
381 |             else:
382 |                 # B: No multi-word token => align according to spans.
383 |                 if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end):
384 |                     alignment.append_aligned_words(gold_words[gi], system_words[si])
385 |                     gi += 1
386 |                     si += 1
387 |                 elif gold_words[gi].span.start <= system_words[si].span.start:
388 |                     gi += 1
389 |                 else:
390 |                     si += 1
391 | 
392 |         alignment.fill_parents()
393 | 
394 |         return alignment
395 | 
396 |     # Check that underlying character sequences do match
397 |     if gold_ud.characters != system_ud.characters:
398 |         index = 0
399 |         while gold_ud.characters[index] == system_ud.characters[index]:
400 |             index += 1
401 | 
402 |         raise UDError(
403 |             "The concatenation of tokens in gold file and in system file differ!\n" +
404 |             "First 20 differing characters in gold file: '{}' and system file: '{}'".format(
405 |                 "".join(gold_ud.characters[index:index + 20]),
406 |                 "".join(system_ud.characters[index:index + 20])
407 |             )
408 |         )
409 | 
410 |     # Align words
411 |     alignment = align_words(gold_ud.words, system_ud.words)
412 | 
413 |     # Compute the F1-scores
414 |     result = {
415 |         "Tokens": spans_score(gold_ud.tokens, system_ud.tokens),
416 |         "Sentences": spans_score(gold_ud.sentences, system_ud.sentences),
417 |         "Words": alignment_score(alignment, None),
418 |         "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]),
419 |         "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]),
420 |         "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]),
421 |         "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])),
422 |         "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]),
423 |         "UAS": alignment_score(alignment, lambda w, parent: parent),
424 |         "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])),
425 |     }
426 | 
427 |     # Add WeightedLAS if weights are given
428 |     if deprel_weights is not None:
429 |         def weighted_las(word):
430 |             return deprel_weights.get(word.columns[DEPREL], 1.0)
431 |         result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las)
432 | 
433 |     return result
434 | 
435 | def load_deprel_weights(weights_file):
436 |     if weights_file is None:
437 |         return None
438 | 
439 |     deprel_weights = {}
440 |     for line in weights_file:
441 |         # Ignore comments and empty lines
442 |         if line.startswith("#") or not line.strip():
443 |             continue
444 | 
445 |         columns = line.rstrip("\r\n").split()
446 |         if len(columns) != 2:
447 |             raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line))
448 | 
449 |         deprel_weights[columns[0]] = float(columns[1])
450 | 
451 |     return deprel_weights
452 | 
453 | def load_conllu_file(path):
454 |     _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {}))
455 |     return load_conllu(_file)
456 | 
457 | def evaluate_wrapper(args):
458 |     # Load CoNLL-U files
459 |     gold_ud = load_conllu_file(args.gold_file)
460 |     system_ud = load_conllu_file(args.system_file)
461 | 
462 |     # Load weights if requested
463 |     deprel_weights = load_deprel_weights(args.weights)
464 | 
465 |     return evaluate(gold_ud, system_ud, deprel_weights)
466 | 
467 | def main():
468 |     # Parse arguments
469 |     parser = argparse.ArgumentParser()
470 |     parser.add_argument("gold_file", type=str,
471 |                         help="Name of the CoNLL-U file with the gold data.")
472 |     parser.add_argument("system_file", type=str,
473 |                         help="Name of the CoNLL-U file with the predicted data.")
474 |     parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None,
475 |                         metavar="deprel_weights_file",
476 |                         help="Compute WeightedLAS using given weights for Universal Dependency Relations.")
477 |     parser.add_argument("--verbose", "-v", default=0, action="count",
478 |                         help="Print all metrics.")
479 |     args = parser.parse_args()
480 | 
481 |     # Use verbose if weights are supplied
482 |     if args.weights is not None and not args.verbose:
483 |         args.verbose = 1
484 | 
485 |     # Evaluate
486 |     evaluation = evaluate_wrapper(args)
487 | 
488 |     # Print the evaluation
489 |     if not args.verbose:
490 |         print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1))
491 |     else:
492 |         metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"]
493 |         if args.weights is not None:
494 |             metrics.append("WeightedLAS")
495 | 
496 |         print("Metrics    | Precision |    Recall |  F1 Score | AligndAcc")
497 |         print("-----------+-----------+-----------+-----------+-----------")
498 |         for metric in metrics:
499 |             print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format(
500 |                 metric,
501 |                 100 * evaluation[metric].precision,
502 |                 100 * evaluation[metric].recall,
503 |                 100 * evaluation[metric].f1,
504 |                 "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else ""
505 |             ))
506 | 
507 | if __name__ == "__main__":
508 |     main()
509 | 
510 | # Tests, which can be executed with `python -m unittest conll17_ud_eval`.
511 | class TestAlignment(unittest.TestCase):
512 |     @staticmethod
513 |     def _load_words(words):
514 |         """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors."""
515 |         lines, num_words = [], 0
516 |         for w in words:
517 |             parts = w.split(" ")
518 |             if len(parts) == 1:
519 |                 num_words += 1
520 |                 lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1)))
521 |             else:
522 |                 lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0]))
523 |                 for part in parts[1:]:
524 |                     num_words += 1
525 |                     lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1)))
526 |         return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"])))
527 | 
528 |     def _test_exception(self, gold, system):
529 |         self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system))
530 | 
531 |     def _test_ok(self, gold, system, correct):
532 |         metrics = evaluate(self._load_words(gold), self._load_words(system))
533 |         gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold))
534 |         system_words = sum((max(1, len(word.split(" ")) - 1) for word in system))
535 |         self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1),
536 |                          (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words)))
537 | 
538 |     def test_exception(self):
539 |         self._test_exception(["a"], ["b"])
540 | 
541 |     def test_equal(self):
542 |         self._test_ok(["a"], ["a"], 1)
543 |         self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3)
544 | 
545 |     def test_equal_with_multiword(self):
546 |         self._test_ok(["abc a b c"], ["a", "b", "c"], 3)
547 |         self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4)
548 |         self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4)
549 |         self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5)
550 | 
551 |     def test_alignment(self):
552 |         self._test_ok(["abcd"], ["a", "b", "c", "d"], 0)
553 |         self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1)
554 |         self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2)
555 |         self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2)
556 |         self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4)
557 |         self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2)
558 |         self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1)
559 | 


--------------------------------------------------------------------------------
/bmstparser/src/utils/evaluation_script/weights.clas:
--------------------------------------------------------------------------------
 1 | # Relations used to attach function words to content words
 2 | aux   0.1
 3 | case  0.1
 4 | cc    0.1
 5 | clf   0.1
 6 | cop   0.1
 7 | det   0.1
 8 | mark  0.1
 9 | 
10 | # Punctuation
11 | punct 0
12 | 


--------------------------------------------------------------------------------