├── .gitignore ├── LICENSE ├── README.md ├── bmstparser └── src │ ├── decoder.py │ ├── mstlstm.py │ ├── parser.py │ ├── utils.py │ └── utils │ ├── eval.pl │ └── evaluation_script │ ├── conll17_ud_eval.py │ └── weights.clas └── corpus ├── en-ud-dev.conllu ├── en-ud-test.conllu ├── en-ud-train.conllu ├── zh-ud-dev.conllu ├── zh-ud-test.conllu └── zh-ud-train.conllu /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # file for test 7 | test.py 8 | 9 | #editorial solution source code 10 | editorial/*.md 11 | 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | ## model 109 | model/ 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Pytorch implementation of the BIST Parsers (for graph based parser only) 2 | This implement is a simplified version which removes some unnecessary flag and applies `nn Module` in Pytorch to construct LSTM network instead of `LSTMCell`. Besides, some more tags are supported and you can refer it from option list. 3 | The techniques behind the parser are described in the paper [Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations](https://www.transacl.org/ojs/index.php/tacl/article/viewFile/885/198). 4 | 5 | #### Required software 6 | 7 | * Python 3.x interpreter 8 | * [Pytorch library](http://pytorch.org/) 9 | 10 | 11 | #### Data format: 12 | The software requires having a `training.conll` and `development.conll` files formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat), or a `training.conllu` and `development.conllu` files formatted according to the [CoNLLU data format](http://universaldependencies.org/format.html). 13 | 14 | #### Train a parsing model 15 | 16 | python src/parser.py --outdir [results directory] --train training.conll --dev development.conll --epochs 30 --lstmdims 125 --lstmlayers 2 [--extrn extrn.vectors] 17 | 18 | #### Parse data with your parsing model 19 | 20 | The command for parsing a `test.conll` file formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat) with a previously trained model is: 21 | 22 | python src/parser.py --predict --outdir [results directory] --test test.conll [--extrn extrn.vectors] --model [trained model file] --params [param file generate during training] 23 | 24 | The parser will store the resulting conll file in the out directory (`--outdir`). 25 | 26 | #### Some instructions 27 | 28 | 1. The multiple roots checking of the evaluation script is turned off (See [here](https://github.com/wddabc/bist-parser/blob/pytorch/bmstparser/src/utils/evaluation_script/conll17_ud_eval.py#L168-L172)) as it might generate trees with multiple roots. (See the discussion [here](https://github.com/elikip/bist-parser/issues/10)) 29 | 2. This version delete some unnecessary flag and set the bi-LSTM to be mandatory(2 bi-LSTM layer) 30 | 3. You can refer forward attribute in mst-parser model for dropout rate of different components. 31 | 4. Anything you think can improve performance please contact and discuss with me. 32 | -------------------------------------------------------------------------------- /bmstparser/src/decoder.py: -------------------------------------------------------------------------------- 1 | # This file contains routines from Lisbon Machine Learning summer school. 2 | # The code is freely distributed under a MIT license. https://github.com/LxMLS/lxmls-toolkit/ 3 | 4 | import numpy as np 5 | 6 | def parse_proj(scores, gold=None): 7 | ''' 8 | Parse using Eisner's algorithm. 9 | ''' 10 | nr, nc = np.shape(scores) 11 | if nr != nc: 12 | raise ValueError("scores must be a squared matrix with nw+1 rows") 13 | 14 | N = nr - 1 # Number of words (excluding root). 15 | 16 | # Initialize CKY table. 17 | complete = np.zeros([N + 1, N + 1, 2]) # s, t, direction (right=1). 18 | incomplete = np.zeros([N + 1, N + 1, 2]) # s, t, direction (right=1). 19 | complete_backtrack = -np.ones([N + 1, N + 1, 2], dtype=int) # s, t, direction (right=1). 20 | incomplete_backtrack = -np.ones([N + 1, N + 1, 2], dtype=int) # s, t, direction (right=1). 21 | 22 | incomplete[0, :, 0] -= np.inf 23 | 24 | # Loop from smaller items to larger items. 25 | for k in range(1, N + 1): 26 | for s in range(N - k + 1): 27 | t = s + k 28 | 29 | # First, create incomplete items. 30 | # left tree 31 | incomplete_vals0 = complete[s, s:t, 1] + complete[(s + 1):(t + 1), t, 0] + scores[t, s] + ( 32 | 0.0 if gold is not None and gold[s] == t else 1.0) 33 | incomplete[s, t, 0] = np.max(incomplete_vals0) 34 | incomplete_backtrack[s, t, 0] = s + np.argmax(incomplete_vals0) 35 | # right tree 36 | incomplete_vals1 = complete[s, s:t, 1] + complete[(s + 1):(t + 1), t, 0] + scores[s, t] + ( 37 | 0.0 if gold is not None and gold[t] == s else 1.0) 38 | incomplete[s, t, 1] = np.max(incomplete_vals1) 39 | incomplete_backtrack[s, t, 1] = s + np.argmax(incomplete_vals1) 40 | 41 | # Second, create complete items. 42 | # left tree 43 | complete_vals0 = complete[s, s:t, 0] + incomplete[s:t, t, 0] 44 | complete[s, t, 0] = np.max(complete_vals0) 45 | complete_backtrack[s, t, 0] = s + np.argmax(complete_vals0) 46 | # right tree 47 | complete_vals1 = incomplete[s, (s + 1):(t + 1), 1] + complete[(s + 1):(t + 1), t, 1] 48 | complete[s, t, 1] = np.max(complete_vals1) 49 | complete_backtrack[s, t, 1] = s + 1 + np.argmax(complete_vals1) 50 | 51 | value = complete[0][N][1] 52 | heads = [-1 for _ in range(N + 1)] # -np.ones(N+1, dtype=int) 53 | backtrack_eisner(incomplete_backtrack, complete_backtrack, 0, N, 1, 1, heads) 54 | 55 | value_proj = 0.0 56 | for m in range(1, N + 1): 57 | h = heads[m] 58 | value_proj += scores[h, m] 59 | 60 | return heads 61 | 62 | 63 | def backtrack_eisner(incomplete_backtrack, complete_backtrack, s, t, direction, complete, heads): 64 | ''' 65 | Backtracking step in Eisner's algorithm. 66 | - incomplete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position, 67 | an end position, and a direction flag (0 means left, 1 means right). This array contains 68 | the arg-maxes of each step in the Eisner algorithm when building *incomplete* spans. 69 | - complete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position, 70 | an end position, and a direction flag (0 means left, 1 means right). This array contains 71 | the arg-maxes of each step in the Eisner algorithm when building *complete* spans. 72 | - s is the current start of the span 73 | - t is the current end of the span 74 | - direction is 0 (left attachment) or 1 (right attachment) 75 | - complete is 1 if the current span is complete, and 0 otherwise 76 | - heads is a (NW+1)-sized numpy array of integers which is a placeholder for storing the 77 | head of each word. 78 | ''' 79 | if s == t: 80 | return 81 | if complete: 82 | r = complete_backtrack[s][t][direction] 83 | if direction == 0: 84 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 0, 1, heads) 85 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 0, 0, heads) 86 | return 87 | else: 88 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 0, heads) 89 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 1, 1, heads) 90 | return 91 | else: 92 | r = incomplete_backtrack[s][t][direction] 93 | if direction == 0: 94 | heads[s] = t 95 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads) 96 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r + 1, t, 0, 1, heads) 97 | return 98 | else: 99 | heads[t] = s 100 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads) 101 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r + 1, t, 0, 1, heads) 102 | return 103 | -------------------------------------------------------------------------------- /bmstparser/src/mstlstm.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.init import * 5 | from torch import optim 6 | from utils import read_conll 7 | from operator import itemgetter 8 | import utils 9 | import time 10 | import random 11 | import decoder 12 | import numpy as np 13 | import torch.autograd as autograd 14 | import os 15 | 16 | use_gpu = True if torch.cuda.is_available() else False 17 | 18 | 19 | def get_data(variable): 20 | if use_gpu: 21 | return variable.data.cpu() 22 | else: 23 | return variable.data 24 | 25 | 26 | def Variable(inner): 27 | return torch.autograd.Variable(inner.cuda() if use_gpu else inner) 28 | 29 | 30 | def Parameter(shape=None, init=xavier_uniform): 31 | if hasattr(init, 'shape'): 32 | assert not shape 33 | return nn.Parameter(torch.Tensor(init)) 34 | shape = (1, shape) if type(shape) == int else shape 35 | return nn.Parameter(init(torch.Tensor(*shape))) 36 | 37 | 38 | def scalar(f): 39 | if type(f) == int: 40 | return Variable(torch.LongTensor([f])) 41 | if type(f) == float: 42 | return Variable(torch.FloatTensor([f])) 43 | 44 | 45 | def cat(l, dimension=-1): 46 | valid_l = [x for x in l if x is not None] 47 | if dimension < 0: 48 | dimension += len(valid_l[0].size()) 49 | return torch.cat(valid_l, dimension) 50 | 51 | 52 | class MSTParserLSTMModel(nn.Module): 53 | def __init__(self, vocab, pos, rels, enum_word, options, onto, cpos): 54 | super(MSTParserLSTMModel, self).__init__() 55 | random.seed(1) 56 | self.activations = {'tanh': F.tanh, 57 | 'sigmoid': F.sigmoid, 'relu': F.relu} 58 | self.activation = self.activations[options.activation] 59 | 60 | self.ldims = options.lstm_dims 61 | self.wdims = options.wembedding_dims 62 | self.pdims = options.pembedding_dims 63 | self.rdims = options.rembedding_dims 64 | self.odims = options.oembedding_dims 65 | self.cdims = options.cembedding_dims 66 | self.layers = options.lstm_layers 67 | self.wordsCount = vocab 68 | self.vocab = {word: ind + 3 for word, ind in enum_word.items()} 69 | self.pos = {word: ind + 3 for ind, word in enumerate(pos)} 70 | self.onto = {word: ind + 3 for ind, word in enumerate(onto)} 71 | self.cpos = {word: ind + 3 for ind, word in enumerate(cpos)} 72 | self.rels = {word: ind for ind, word in enumerate(rels)} 73 | self.rel_list = rels 74 | self.hidden_units = options.hidden_units 75 | self.hidden2_units = options.hidden2_units 76 | 77 | self.vocab['*PAD*'] = 1 78 | self.pos['*PAD*'] = 1 79 | self.onto['*PAD*'] = 1 80 | self.cpos['*PAD*'] = 1 81 | self.vocab['*INITIAL*'] = 2 82 | self.pos['*INITIAL*'] = 2 83 | self.onto['*INITIAL*'] = 2 84 | self.cpos['*INITIAL*'] = 2 85 | 86 | self.external_embedding, self.edim = None, 0 87 | 88 | if options.external_embedding is not None: 89 | external_embedding_fp = open(options.external_embedding, 'r') 90 | external_embedding_fp.readline() 91 | self.external_embedding = {line.split(' ')[0]: [float(f) for f in line.strip().split(' ')[1:]] for line in 92 | external_embedding_fp} 93 | external_embedding_fp.close() 94 | self.edim = len(list(self.external_embedding.values())[0]) 95 | self.extrnd = {word: i + 3 for i, 96 | word in enumerate(self.external_embedding)} 97 | np_emb = np.zeros((len(self.external_embedding) + 3, self.edim)) 98 | for word, i in self.extrnd.items(): 99 | np_emb[i] = self.external_embedding[word] 100 | self.elookup = nn.Embedding(*np_emb.shape) 101 | self.elookup.weight = Parameter(init=np_emb) 102 | self.extrnd['*PAD*'] = 1 103 | self.extrnd['*INITIAL*'] = 2 104 | print('Load external embedding. Vector dimensions', self.edim) 105 | 106 | # prepare LSTM 107 | self.lstm_for_1 = nn.LSTM( 108 | self.wdims + self.pdims + self.edim + self.odims + self.cdims, self.ldims) 109 | self.lstm_back_1 = nn.LSTM( 110 | self.wdims + self.pdims + self.edim + self.odims + self.cdims, self.ldims) 111 | self.lstm_for_2 = nn.LSTM(self.ldims * 2, self.ldims) 112 | self.lstm_back_2 = nn.LSTM(self.ldims * 2, self.ldims) 113 | self.hid_for_1, self.hid_back_1, self.hid_for_2, self.hid_back_2 = [ 114 | self.init_hidden(self.ldims) for _ in range(4)] 115 | 116 | self.wlookup = nn.Embedding(len(vocab) + 3, self.wdims) 117 | self.plookup = nn.Embedding(len(pos) + 3, self.pdims) 118 | self.rlookup = nn.Embedding(len(rels), self.rdims) 119 | self.olookup = nn.Embedding(len(onto) + 3, self.odims) 120 | self.clookup = nn.Embedding(len(cpos) + 3, self.cdims) 121 | 122 | self.hidLayerFOH = Parameter((self.ldims * 2, self.hidden_units)) 123 | self.hidLayerFOM = Parameter((self.ldims * 2, self.hidden_units)) 124 | self.hidBias = Parameter((self.hidden_units)) 125 | self.catBias = Parameter((self.hidden_units * 2)) 126 | self.rhidLayerFOH = Parameter((2 * self.ldims, self.hidden_units)) 127 | self.rhidLayerFOM = Parameter((2 * self.ldims, self.hidden_units)) 128 | self.rhidBias = Parameter((self.hidden_units)) 129 | self.rcatBias = Parameter((self.hidden_units * 2)) 130 | # 131 | if self.hidden2_units: 132 | self.hid2Layer = Parameter( 133 | (self.hidden_units * 2, self.hidden2_units)) 134 | self.hid2Bias = Parameter((self.hidden2_units)) 135 | self.rhid2Layer = Parameter( 136 | (self.hidden_units * 2, self.hidden2_units)) 137 | self.rhid2Bias = Parameter((self.hidden2_units)) 138 | 139 | self.outLayer = Parameter( 140 | (self.hidden2_units if self.hidden2_units > 0 else self.hidden_units, 1)) 141 | self.outBias = 0 # Parameter(1) 142 | self.routLayer = Parameter( 143 | (self.hidden2_units if self.hidden2_units > 0 else self.hidden_units, len(self.rel_list))) 144 | self.routBias = Parameter((len(self.rel_list))) 145 | 146 | def init_hidden(self, dim): 147 | return (autograd.Variable(torch.zeros(1, 1, dim).cuda() if use_gpu else torch.zeros(1, 1, dim)), 148 | autograd.Variable(torch.zeros(1, 1, dim).cuda() if use_gpu else torch.zeros(1, 1, dim))) 149 | 150 | def __getExpr(self, sentence, i, j, train): 151 | 152 | if sentence[i].headfov is None: 153 | sentence[i].headfov = torch.mm(cat([sentence[i].lstms[0], sentence[i].lstms[1]]), 154 | self.hidLayerFOH) 155 | 156 | if sentence[j].modfov is None: 157 | sentence[j].modfov = torch.mm(cat([sentence[j].lstms[0], sentence[j].lstms[1]]), 158 | self.hidLayerFOM) 159 | 160 | if self.hidden2_units > 0: 161 | output = torch.mm( 162 | self.activation( 163 | self.hid2Bias + 164 | torch.mm(self.activation(cat([sentence[i].headfov, sentence[j].modfov]) + self.catBias), 165 | self.hid2Layer) 166 | ), 167 | self.outLayer 168 | ) + self.outBias 169 | 170 | else: 171 | output = torch.mm( 172 | self.activation( 173 | sentence[i].headfov + sentence[j].modfov + self.hidBias), 174 | self.outLayer) + self.outBias 175 | return output 176 | 177 | def __evaluate(self, sentence, train): 178 | exprs = [[self.__getExpr(sentence, i, j, train) 179 | for j in range(len(sentence))] 180 | for i in range(len(sentence))] 181 | scores = np.array([[get_data(output).numpy()[0, 0] 182 | for output in exprsRow] for exprsRow in exprs]) 183 | return scores, exprs 184 | 185 | def __evaluateLabel(self, sentence, i, j): 186 | if sentence[i].rheadfov is None: 187 | sentence[i].rheadfov = torch.mm(cat([sentence[i].lstms[0], sentence[i].lstms[1]]), 188 | self.rhidLayerFOH) 189 | 190 | if sentence[j].rmodfov is None: 191 | sentence[j].rmodfov = torch.mm(cat([sentence[j].lstms[0], sentence[j].lstms[1]]), 192 | self.rhidLayerFOM) 193 | 194 | if self.hidden2_units > 0: 195 | output = torch.mm( 196 | self.activation( 197 | self.rhid2Bias + 198 | torch.mm( 199 | self.activation( 200 | cat([sentence[i].rheadfov, sentence[j].rmodfov]) + self.rcatBias), 201 | self.rhid2Layer 202 | )), 203 | self.routLayer 204 | ) + self.routBias 205 | 206 | else: 207 | output = torch.mm( 208 | self.activation(sentence[i].rheadfov + 209 | sentence[j].rmodfov + self.rhidBias), 210 | self.routLayer 211 | ) + self.routBias 212 | 213 | return get_data(output).numpy()[0], output[0] 214 | 215 | def predict(self, sentence): 216 | for entry in sentence: 217 | wordvec = self.wlookup( 218 | scalar(int(self.vocab.get(entry.norm, 0)))) if self.wdims > 0 else None 219 | posvec = self.plookup( 220 | scalar(int(self.pos[entry.pos]))) if self.pdims > 0 else None 221 | ontovec = self.olookup( 222 | scalar(int(self.onto[entry.onto]))) if self.odims > 0 else None 223 | cposvec = self.clookup( 224 | scalar(int(self.cpos[entry.cpos]))) if self.cdims > 0 else None 225 | evec = self.elookup(scalar(int(self.extrnd.get(entry.form, 226 | self.extrnd.get(entry.norm, 0))))) if self.external_embedding is not None else None 227 | entry.vec = cat([wordvec, posvec, ontovec, cposvec, evec]) 228 | 229 | entry.lstms = [entry.vec, entry.vec] 230 | entry.headfov = None 231 | entry.modfov = None 232 | 233 | entry.rheadfov = None 234 | entry.rmodfov = None 235 | 236 | num_vec = len(sentence) 237 | vec_for = torch.cat( 238 | [entry.vec for entry in sentence]).view(num_vec, 1, -1) 239 | vec_back = torch.cat( 240 | [entry.vec for entry in reversed(sentence)]).view(num_vec, 1, -1) 241 | res_for_1, self.hid_for_1 = self.lstm_for_1(vec_for, self.hid_for_1) 242 | res_back_1, self.hid_back_1 = self.lstm_back_1( 243 | vec_back, self.hid_back_1) 244 | 245 | vec_cat = [cat([res_for_1[i], res_back_1[num_vec - i - 1]]) 246 | for i in range(num_vec)] 247 | 248 | vec_for_2 = torch.cat(vec_cat).view(num_vec, 1, -1) 249 | vec_back_2 = torch.cat(list(reversed(vec_cat))).view(num_vec, 1, -1) 250 | res_for_2, self.hid_for_2 = self.lstm_for_2(vec_for_2, self.hid_for_2) 251 | res_back_2, self.hid_back_2 = self.lstm_back_2( 252 | vec_back_2, self.hid_back_2) 253 | 254 | for i in range(num_vec): 255 | sentence[i].lstms[0] = res_for_2[i] 256 | sentence[i].lstms[1] = res_back_2[num_vec - i - 1] 257 | 258 | scores, exprs = self.__evaluate(sentence, True) 259 | heads = decoder.parse_proj(scores) 260 | 261 | for entry, head in zip(sentence, heads): 262 | entry.pred_parent_id = head 263 | entry.pred_relation = '_' 264 | 265 | head_list = list(heads) 266 | for modifier, head in enumerate(head_list[1:]): 267 | scores, exprs = self.__evaluateLabel( 268 | sentence, head, modifier + 1) 269 | sentence[modifier + 1].pred_relation = self.rel_list[max( 270 | enumerate(scores), key=itemgetter(1))[0]] 271 | 272 | def forward(self, sentence, errs, lerrs): 273 | 274 | for entry in sentence: 275 | c = float(self.wordsCount.get(entry.norm, 0)) 276 | # dropFlag = (random.random() < (c / (0.33 + c))) 277 | dropFlag = (random.random() < (c / (0.25 + c))) 278 | wordvec = self.wlookup(scalar( 279 | int(self.vocab.get(entry.norm, 0)) if dropFlag else 0)) if self.wdims > 0 else None 280 | ontovec = self.olookup(scalar(int(self.onto[entry.onto]) if random.random( 281 | ) < 0.9 else 0)) if self.odims > 0 else None 282 | cposvec = self.clookup(scalar(int(self.cpos[entry.cpos]) if random.random( 283 | ) < 0.9 else 0)) if self.cdims > 0 else None 284 | posvec = self.plookup( 285 | scalar(int(self.pos[entry.pos]))) if self.pdims > 0 else None 286 | # posvec = self.plookup( 287 | # scalar(0 if dropFlag and random.random() < 0.1 else int(self.pos[entry.pos]))) if self.pdims > 0 else None 288 | evec = None 289 | if self.external_embedding is not None: 290 | evec = self.elookup(scalar(self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)) if ( 291 | dropFlag or (random.random() < 0.5)) else 0)) 292 | 293 | entry.vec = cat([wordvec, posvec, ontovec, cposvec, evec]) 294 | entry.lstms = [entry.vec, entry.vec] 295 | entry.headfov = None 296 | entry.modfov = None 297 | 298 | entry.rheadfov = None 299 | entry.rmodfov = None 300 | 301 | num_vec = len(sentence) 302 | vec_for = torch.cat( 303 | [entry.vec for entry in sentence]).view(num_vec, 1, -1) 304 | vec_back = torch.cat( 305 | [entry.vec for entry in reversed(sentence)]).view(num_vec, 1, -1) 306 | res_for_1, self.hid_for_1 = self.lstm_for_1(vec_for, self.hid_for_1) 307 | res_back_1, self.hid_back_1 = self.lstm_back_1( 308 | vec_back, self.hid_back_1) 309 | 310 | vec_cat = [cat([res_for_1[i], res_back_1[num_vec - i - 1]]) 311 | for i in range(num_vec)] 312 | 313 | vec_for_2 = torch.cat(vec_cat).view(num_vec, 1, -1) 314 | vec_back_2 = torch.cat(list(reversed(vec_cat))).view(num_vec, 1, -1) 315 | res_for_2, self.hid_for_2 = self.lstm_for_2(vec_for_2, self.hid_for_2) 316 | res_back_2, self.hid_back_2 = self.lstm_back_2( 317 | vec_back_2, self.hid_back_2) 318 | 319 | for i in range(num_vec): 320 | sentence[i].lstms[0] = res_for_2[i] 321 | sentence[i].lstms[1] = res_back_2[num_vec - i - 1] 322 | 323 | scores, exprs = self.__evaluate(sentence, True) 324 | gold = [entry.parent_id for entry in sentence] 325 | heads = decoder.parse_proj(scores, gold) 326 | 327 | for modifier, head in enumerate(gold[1:]): 328 | rscores, rexprs = self.__evaluateLabel( 329 | sentence, head, modifier + 1) 330 | goldLabelInd = self.rels[sentence[modifier + 1].relation] 331 | wrongLabelInd = \ 332 | max(((l, scr) for l, scr in enumerate(rscores) 333 | if l != goldLabelInd), key=itemgetter(1))[0] 334 | if rscores[goldLabelInd] < rscores[wrongLabelInd] + 1: 335 | lerrs += [rexprs[wrongLabelInd] - rexprs[goldLabelInd]] 336 | 337 | e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) 338 | if e > 0: 339 | errs += [(exprs[h][i] - exprs[g][i])[0] 340 | for i, (h, g) in enumerate(zip(heads, gold)) if h != g] 341 | return e 342 | 343 | 344 | def get_optim(opt, parameters): 345 | if opt.optim == 'sgd': 346 | return optim.SGD(parameters, lr=opt.learning_rate) 347 | elif opt.optim == 'adam': 348 | return optim.Adam(parameters) 349 | 350 | 351 | class MSTParserLSTM: 352 | def __init__(self, vocab, pos, rels, enum_word, options, onto, cpos): 353 | model = MSTParserLSTMModel( 354 | vocab, pos, rels, enum_word, options, onto, cpos) 355 | self.model = model.cuda() if use_gpu else model 356 | self.trainer = get_optim(options, self.model.parameters()) 357 | 358 | def predict(self, conll_path): 359 | with open(conll_path, 'r') as conllFP: 360 | for iSentence, sentence in enumerate(read_conll(conllFP)): 361 | self.model.hid_for_1, self.model.hid_back_1, self.model.hid_for_2, self.model.hid_back_2 = [ 362 | self.model.init_hidden(self.model.ldims) for _ in range(4)] 363 | conll_sentence = [entry for entry in sentence if isinstance( 364 | entry, utils.ConllEntry)] 365 | self.model.predict(conll_sentence) 366 | yield conll_sentence 367 | 368 | def save(self, fn): 369 | tmp = fn + '.tmp' 370 | torch.save(self.model.state_dict(), tmp) 371 | shutil.move(tmp, fn) 372 | 373 | def load(self, fn): 374 | self.model.load_state_dict(torch.load(fn)) 375 | 376 | def train(self, conll_path): 377 | print('pytorch version:', torch.__version__) 378 | batch = 1 379 | eloss = 0.0 380 | mloss = 0.0 381 | eerrors = 0 382 | etotal = 0 383 | iSentence = 0 384 | start = time.time() 385 | with open(conll_path, 'r') as conllFP: 386 | shuffledData = list(read_conll(conllFP)) 387 | random.shuffle(shuffledData) 388 | errs = [] 389 | lerrs = [] 390 | for iSentence, sentence in enumerate(shuffledData): 391 | self.model.hid_for_1, self.model.hid_back_1, self.model.hid_for_2, self.model.hid_back_2 = [ 392 | self.model.init_hidden(self.model.ldims) for _ in range(4)] 393 | if iSentence % 100 == 0 and iSentence != 0: 394 | print('Processing sentence number:', iSentence, 395 | 'Loss:', eloss / etotal, 396 | 'Errors:', (float(eerrors)) / etotal, 397 | 'Time', time.time() - start) 398 | start = time.time() 399 | eerrors = 0 400 | eloss = 0.0 401 | etotal = 0 402 | 403 | conll_sentence = [entry for entry in sentence if isinstance( 404 | entry, utils.ConllEntry)] 405 | e = self.model.forward(conll_sentence, errs, lerrs) 406 | eerrors += e 407 | eloss += e 408 | mloss += e 409 | etotal += len(sentence) 410 | if iSentence % batch == 0 or len(errs) > 0 or len(lerrs) > 0: 411 | if len(errs) > 0 or len(lerrs) > 0: 412 | eerrs = torch.sum(cat(errs + lerrs)) 413 | eerrs.backward() 414 | self.trainer.step() 415 | errs = [] 416 | lerrs = [] 417 | self.trainer.zero_grad() 418 | if len(errs) > 0: 419 | eerrs = (torch.sum(errs + lerrs)) 420 | eerrs.backward() 421 | self.trainer.step() 422 | self.trainer.zero_grad() 423 | print("Loss: ", mloss / iSentence) 424 | -------------------------------------------------------------------------------- /bmstparser/src/parser.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionParser 2 | import pickle 3 | import utils 4 | import mstlstm 5 | import os 6 | import os.path 7 | import time 8 | import torch 9 | import multiprocessing 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = OptionParser() 14 | parser.add_option("--outdir", type="string", 15 | dest="output", default="model") 16 | 17 | parser.add_option("--train", dest="conll_train", help="Annotated CONLL train file", metavar="FILE", 18 | default="corpus/train.conll") 19 | parser.add_option("--dev", dest="conll_dev", help="Annotated CONLL dev file", metavar="FILE", 20 | default="corpus/dev.conll") 21 | parser.add_option("--test", dest="conll_test", help="Annotated CONLL test file", metavar="FILE", 22 | default="corpus/test.conll") 23 | parser.add_option("--extrn", dest="external_embedding", help="External embeddings", metavar="FILE") 24 | parser.add_option("--params", dest="params", help="Parameters file", 25 | metavar="FILE", default="params.pickle") 26 | parser.add_option("--model", dest="model", help="Load/Save model file", metavar="FILE", 27 | default="model/neuralfirstorder.model") 28 | 29 | parser.add_option("--multi", dest="multi", help="Annotated CONLL multi-train file", metavar="FILE", 30 | default=False) 31 | # multi-task has been deleted for bloated code 32 | 33 | parser.add_option("--wembedding", type="int", 34 | dest="wembedding_dims", default=100) 35 | parser.add_option("--pembedding", type="int", 36 | dest="pembedding_dims", default=25) 37 | parser.add_option("--rembedding", type="int", 38 | dest="rembedding_dims", default=25) 39 | 40 | parser.add_option("--oembedding", type="int", dest="oembedding_dims", default=0) #ontology 41 | parser.add_option("--cembedding", type="int", dest="cembedding_dims", default=0) #cpos 42 | 43 | parser.add_option("--epochs", type="int", dest="epochs", default=30) 44 | parser.add_option("--numthread", type="int", dest="numthread", default=8) 45 | parser.add_option("--hidden", type="int", dest="hidden_units", default=100) 46 | parser.add_option("--hidden2", type="int", dest="hidden2_units", default=0) 47 | parser.add_option("--optim", type="string", dest="optim", default='adam') 48 | parser.add_option("--lr", type="float", dest="lr", default=0.1) 49 | parser.add_option("--activation", type="string", 50 | dest="activation", default="tanh") 51 | parser.add_option("--lstmlayers", type="int", 52 | dest="lstm_layers", default=2) 53 | parser.add_option("--lstmdims", type="int", dest="lstm_dims", default=125) 54 | parser.add_option("--predict", action="store_true", 55 | dest="predictFlag", default=False) 56 | 57 | (options, args) = parser.parse_args() 58 | max_thread = multiprocessing.cpu_count() 59 | active_thread = options.numthread if max_thread>options.numthread else max_thread 60 | torch.set_num_threads(active_thread) 61 | print(active_thread, "threads are in use") 62 | print('Using external embedding:', options.external_embedding) 63 | 64 | if options.predictFlag: 65 | with open(options.params, 'rb') as paramsfp: 66 | words, enum_word, pos, rels, onto, cpos, stored_opt = pickle.load(paramsfp) 67 | 68 | stored_opt.external_embedding = options.external_embedding 69 | 70 | print('Initializing lstm mstparser:') 71 | parser = mstlstm.MSTParserLSTM(words, pos, rels, enum_word, stored_opt, onto, cpos) 72 | parser.load(options.model) 73 | conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') 74 | testpath = os.path.join( 75 | options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') 76 | 77 | ts = time.time() 78 | test_res = list(parser.predict(options.conll_test)) 79 | te = time.time() 80 | print('Finished predicting test.', te - ts, 'seconds.') 81 | utils.write_conll(testpath, test_res) 82 | 83 | if not conllu: 84 | os.system('perl src/utils/eval.pl -g ' + options.conll_test + 85 | ' -s ' + testpath + ' > ' + testpath + '.txt') 86 | else: 87 | os.system( 88 | 'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + testpath + ' > ' + testpath + '.txt') 89 | with open(testpath + '.txt', 'r') as f: 90 | for l in f: 91 | if l.startswith('UAS'): 92 | print('UAS:%s' % l.strip().split()[-1]) 93 | elif l.startswith('LAS'): 94 | print('LAS:%s' % l.strip().split()[-1]) 95 | else: 96 | print('Preparing vocabulary table') 97 | words, enum_word, pos, rels, onto, cpos = list(utils.vocab(options.conll_train)) 98 | with open(os.path.join(options.output, options.params), 'wb') as paramsfp: 99 | pickle.dump((words, enum_word, pos, rels, onto, cpos, options), paramsfp) 100 | print('Finished collecting vocabulary') 101 | 102 | print('Initializing mst-parser:') 103 | parser = mstlstm.MSTParserLSTM(words, pos, rels, enum_word, options, onto, cpos) 104 | 105 | for epoch in range(options.epochs): 106 | print('Starting epoch', epoch) 107 | parser.train(options.conll_train) 108 | conllu = (os.path.splitext( 109 | options.conll_dev.lower())[1] == '.conllu') 110 | devpath = os.path.join(options.output, 111 | 'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu')) 112 | utils.write_conll(devpath, parser.predict(options.conll_dev)) 113 | parser.save(os.path.join(options.output, os.path.basename( 114 | options.model) + str(epoch + 1))) 115 | 116 | if not conllu: 117 | os.system( 118 | 'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') 119 | with open(devpath + '.txt', 'r') as f: 120 | for i in range(0, 3): 121 | print(f.readline()) 122 | else: 123 | os.system( 124 | 'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') 125 | with open(devpath + '.txt', 'r') as f: 126 | for l in f: 127 | if l.startswith('UAS'): 128 | print('UAS:%s' % l.strip().split()[-1]) 129 | elif l.startswith('LAS'): 130 | print('LAS:%s' % l.strip().split()[-1]) 131 | -------------------------------------------------------------------------------- /bmstparser/src/utils.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import re 3 | 4 | numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+") 5 | 6 | 7 | def normalize(word): 8 | return 'NUM' if numberRegex.match(word) else word.lower() 9 | 10 | class ConllEntry: 11 | def __init__(self, id, form, lemma, pos, cpos, feats=None, parent_id=None, relation=None, deps=None, misc=None): 12 | self.id = id 13 | self.form = form 14 | self.norm = normalize(form) 15 | self.pos = pos 16 | self.cpos = cpos 17 | self.parent_id = parent_id 18 | self.relation = relation 19 | 20 | self.onto = lemma 21 | self.feats = feats 22 | self.deps = deps 23 | self.misc = misc 24 | 25 | self.pred_parent_id = None 26 | self.pred_relation = None 27 | 28 | def __str__(self): 29 | values = [str(self.id), self.form, self.onto, self.pos, self.cpos, self.feats, 30 | str(self.pred_parent_id) if self.pred_parent_id is not None else None, self.pred_relation, self.deps, self.misc] 31 | return '\t'.join(['_' if v is None else v for v in values]) 32 | 33 | 34 | def read_conll(conllFP): 35 | root = ConllEntry(0, '*root*', '*root*', 'ROOT-POS', 36 | 'ROOT-CPOS', '_', -1, 'rroot', '_', '_') 37 | tokens = [root] 38 | for line in conllFP: 39 | tok = line.strip().split('\t') 40 | if not tok or line.strip() == '': 41 | if len(tokens) > 1: 42 | yield tokens 43 | tokens = [root] 44 | else: 45 | if line[0] == '#' or '-' in tok[0] or '.' in tok[0]: 46 | tokens.append(line.strip()) 47 | else: 48 | tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2], tok[3], tok[4], tok[5], int( 49 | tok[6]) if tok[6] != '_' else -1, tok[7], tok[8], tok[9])) 50 | if len(tokens) > 1: 51 | yield tokens 52 | 53 | 54 | def vocab(conll_path): 55 | wordsCount = Counter() 56 | posCount = Counter() 57 | relCount = Counter() 58 | ontoCount = Counter() 59 | cposCount = Counter() 60 | 61 | with open(conll_path, 'r') as conllFP: 62 | for sentence in read_conll(conllFP): 63 | wordsCount.update( 64 | [node.norm for node in sentence if isinstance(node, ConllEntry)]) 65 | posCount.update( 66 | [node.pos for node in sentence if isinstance(node, ConllEntry)]) 67 | relCount.update( 68 | [node.relation for node in sentence if isinstance(node, ConllEntry)]) 69 | ontoCount.update( 70 | [node.onto for node in sentence if isinstance(node, ConllEntry)]) 71 | cposCount.update( 72 | [node.cpos for node in sentence if isinstance(node, ConllEntry)]) 73 | 74 | print('the amount of kind of words, pos-tag, relations, ontology, cpos_tag:', 75 | len(wordsCount), len(posCount), len(relCount), len(ontoCount), len(cposCount)) 76 | return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, list(posCount.keys()), list(relCount.keys()), list(ontoCount.keys()), list(cposCount.keys())) 77 | 78 | 79 | def write_conll(fn, conll_gen): 80 | with open(fn, 'w') as fh: 81 | for sentence in conll_gen: 82 | for entry in sentence[1:]: 83 | fh.write(str(entry) + '\n') 84 | fh.write('\n') 85 | -------------------------------------------------------------------------------- /bmstparser/src/utils/eval.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Author: Yuval Krymolowski 4 | # Addition of precision and recall 5 | # and of frame confusion list: Sabine Buchholz 6 | # Addition of DEPREL + ATTACHMENT: 7 | # Prokopis Prokopidis (prokopis at ilsp dot gr) 8 | # Acknowledgements: 9 | # to Markus Kuhn for suggesting the use of 10 | # the Unicode category property 11 | 12 | if ($] < 5.008001) 13 | { 14 | printf STDERR < -s 39 | 40 | This script evaluates a system output with respect to a gold standard. 41 | Both files should be in UTF-8 encoded CoNLL-X tabular format. 42 | 43 | Punctuation tokens (those where all characters have the Unicode 44 | category property "Punctuation") are ignored for scoring (unless the 45 | -p flag is used). 46 | 47 | The output breaks down the errors according to their type and context. 48 | 49 | Optional parameters: 50 | -o FILE : output: print output to FILE (default is standard output) 51 | -q : quiet: only print overall performance, without the details 52 | -b : evalb: produce output in a format similar to evalb 53 | (http://nlp.cs.nyu.edu/evalb/); use together with -q 54 | -p : punctuation: also score on punctuation (default is not to score on it) 55 | -v : version: show the version number 56 | -h : help: print this help text and exit 57 | 58 | EOT 59 | ; 60 | 61 | my ($line_num) ; 62 | my ($sep) = '0x01' ; 63 | 64 | my ($START) = '.S' ; 65 | my ($END) = '.E' ; 66 | 67 | my ($con_err_num) = 3 ; 68 | my ($freq_err_num) = 10 ; 69 | my ($spec_err_loc_con) = 8 ; 70 | 71 | ################################################################################ 72 | ### subfunctions ### 73 | ################################################################################ 74 | 75 | # Whether a string consists entirely of characters with the Unicode 76 | # category property "Punctuation" (see "man perlunicode") 77 | sub is_uni_punct 78 | { 79 | my ($word) = @_ ; 80 | 81 | return scalar(Encode::decode_utf8($word)=~ /^\p{Punctuation}+$/) ; 82 | } 83 | 84 | # The length of a unicode string, excluding non-spacing marks 85 | # (for example vowel marks in Arabic) 86 | 87 | sub uni_len 88 | { 89 | my ($word) = @_ ; 90 | my ($ch, $l) ; 91 | 92 | $l = 0 ; 93 | foreach $ch (split(//, Encode::decode_utf8($word))) 94 | { 95 | if ($ch !~ /^\p{NonspacingMark}/) 96 | { 97 | $l++ ; 98 | } 99 | } 100 | 101 | return $l ; 102 | } 103 | 104 | sub filter_context_counts 105 | { # filter_context_counts 106 | 107 | my ($vec, $num, $max_len) = @_ ; 108 | my ($con, $l, $thresh) ; 109 | 110 | $thresh = (sort {$b <=> $a} values %{$vec})[$num-1] ; 111 | 112 | foreach $con (keys %{$vec}) 113 | { 114 | if (${$vec}{$con} < $thresh) 115 | { 116 | delete ${$vec}{$con} ; 117 | next ; 118 | } 119 | 120 | $l = uni_len($con) ; 121 | 122 | if ($l > ${$max_len}) 123 | { 124 | ${$max_len} = $l ; 125 | } 126 | } 127 | 128 | } # filter_context_counts 129 | 130 | sub print_context 131 | { # print_context 132 | 133 | my ($counts, $counts_pos, $max_con_len, $max_con_pos_len) = @_ ; 134 | my (@v_con, @v_con_pos, $con, $con_pos, $i, $n) ; 135 | 136 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_pos_len, 'CPOS', 'any', 'head', 'dep', 'both' ; 137 | printf OUT " ||" ; 138 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_len, 'word', 'any', 'head', 'dep', 'both' ; 139 | printf OUT "\n" ; 140 | printf OUT " %s-+------+------+------+-----", '-' x $max_con_pos_len; 141 | printf OUT "--++" ; 142 | printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len; 143 | printf OUT "\n" ; 144 | 145 | @v_con = sort {${$counts}{tot}{$b} <=> ${$counts}{tot}{$a}} keys %{${$counts}{tot}} ; 146 | @v_con_pos = sort {${$counts_pos}{tot}{$b} <=> ${$counts_pos}{tot}{$a}} keys %{${$counts_pos}{tot}} ; 147 | 148 | $n = scalar @v_con ; 149 | if (scalar @v_con_pos > $n) 150 | { 151 | $n = scalar @v_con_pos ; 152 | } 153 | 154 | foreach $i (0 .. $n-1) 155 | { 156 | if (defined $v_con_pos[$i]) 157 | { 158 | $con_pos = $v_con_pos[$i] ; 159 | printf OUT " %-*s | %4d | %4d | %4d | %4d", 160 | $max_con_pos_len, $con_pos, ${$counts_pos}{tot}{$con_pos}, 161 | ${$counts_pos}{err_head}{$con_pos}, ${$counts_pos}{err_dep}{$con_pos}, 162 | ${$counts_pos}{err_dep}{$con_pos}+${$counts_pos}{err_head}{$con_pos}-${$counts_pos}{tot}{$con_pos} ; 163 | } 164 | else 165 | { 166 | printf OUT " %-*s | %4s | %4s | %4s | %4s", 167 | $max_con_pos_len, ' ', ' ', ' ', ' ', ' ' ; 168 | } 169 | 170 | printf OUT " ||" ; 171 | 172 | if (defined $v_con[$i]) 173 | { 174 | $con = $v_con[$i] ; 175 | printf OUT " %-*s | %4d | %4d | %4d | %4d", 176 | $max_con_len+length($con)-uni_len($con), $con, ${$counts}{tot}{$con}, 177 | ${$counts}{err_head}{$con}, ${$counts}{err_dep}{$con}, 178 | ${$counts}{err_dep}{$con}+${$counts}{err_head}{$con}-${$counts}{tot}{$con} ; 179 | } 180 | else 181 | { 182 | printf OUT " %-*s | %4s | %4s | %4s | %4s", 183 | $max_con_len, ' ', ' ', ' ', ' ', ' ' ; 184 | } 185 | 186 | printf OUT "\n" ; 187 | } 188 | 189 | printf OUT " %s-+------+------+------+-----", '-' x $max_con_pos_len; 190 | printf OUT "--++" ; 191 | printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len; 192 | printf OUT "\n" ; 193 | 194 | printf OUT "\n\n" ; 195 | 196 | } # print_context 197 | 198 | sub num_as_word 199 | { 200 | my ($num) = @_ ; 201 | 202 | $num = abs($num) ; 203 | 204 | if ($num == 1) 205 | { 206 | return ('one word') ; 207 | } 208 | elsif ($num == 2) 209 | { 210 | return ('two words') ; 211 | } 212 | elsif ($num == 3) 213 | { 214 | return ('three words') ; 215 | } 216 | elsif ($num == 4) 217 | { 218 | return ('four words') ; 219 | } 220 | else 221 | { 222 | return ($num.' words') ; 223 | } 224 | } 225 | 226 | sub describe_err 227 | { # describe_err 228 | 229 | my ($head_err, $head_aft_bef, $dep_err) = @_ ; 230 | my ($dep_g, $dep_s, $desc) ; 231 | my ($head_aft_bef_g, $head_aft_bef_s) = split(//, $head_aft_bef) ; 232 | 233 | if ($head_err eq '-') 234 | { 235 | $desc = 'correct head' ; 236 | 237 | if ($head_aft_bef_s eq '0') 238 | { 239 | $desc .= ' (0)' ; 240 | } 241 | elsif ($head_aft_bef_s eq 'e') 242 | { 243 | $desc .= ' (the focus word)' ; 244 | } 245 | elsif ($head_aft_bef_s eq 'a') 246 | { 247 | $desc .= ' (after the focus word)' ; 248 | } 249 | elsif ($head_aft_bef_s eq 'b') 250 | { 251 | $desc .= ' (before the focus word)' ; 252 | } 253 | } 254 | elsif ($head_aft_bef_s eq '0') 255 | { 256 | $desc = 'head = 0 instead of ' ; 257 | if ($head_aft_bef_g eq 'a') 258 | { 259 | $desc.= 'after ' ; 260 | } 261 | if ($head_aft_bef_g eq 'b') 262 | { 263 | $desc.= 'before ' ; 264 | } 265 | $desc .= 'the focus word' ; 266 | } 267 | elsif ($head_aft_bef_g eq '0') 268 | { 269 | $desc = 'head is ' ; 270 | if ($head_aft_bef_g eq 'a') 271 | { 272 | $desc.= 'after ' ; 273 | } 274 | if ($head_aft_bef_g eq 'b') 275 | { 276 | $desc.= 'before ' ; 277 | } 278 | $desc .= 'the focus word instead of 0' ; 279 | } 280 | else 281 | { 282 | $desc = num_as_word($head_err) ; 283 | if ($head_err < 0) 284 | { 285 | $desc .= ' before' ; 286 | } 287 | else 288 | { 289 | $desc .= ' after' ; 290 | } 291 | 292 | $desc = 'head '.$desc.' the correct head ' ; 293 | 294 | if ($head_aft_bef_s eq '0') 295 | { 296 | $desc .= '(0' ; 297 | } 298 | elsif ($head_aft_bef_s eq 'e') 299 | { 300 | $desc .= '(the focus word' ; 301 | } 302 | elsif ($head_aft_bef_s eq 'a') 303 | { 304 | $desc .= '(after the focus word' ; 305 | } 306 | elsif ($head_aft_bef_s eq 'b') 307 | { 308 | $desc .= '(before the focus word' ; 309 | } 310 | 311 | if ($head_aft_bef_g ne $head_aft_bef_s) 312 | { 313 | $desc .= ' instead of' ; 314 | if ($head_aft_bef_s eq '0') 315 | { 316 | $desc .= '0' ; 317 | } 318 | elsif ($head_aft_bef_s eq 'e') 319 | { 320 | $desc .= 'the focus word' ; 321 | } 322 | elsif ($head_aft_bef_s eq 'a') 323 | { 324 | $desc .= 'after the focus word' ; 325 | } 326 | elsif ($head_aft_bef_s eq 'b') 327 | { 328 | $desc .= 'before the focus word' ; 329 | } 330 | } 331 | 332 | $desc .= ')' ; 333 | } 334 | 335 | $desc .= ', ' ; 336 | 337 | if ($dep_err eq '-') 338 | { 339 | $desc .= 'correct dependency' ; 340 | } 341 | else 342 | { 343 | ($dep_g, $dep_s) = ($dep_err =~ /^(.*)->(.*)$/) ; 344 | $desc .= sprintf('dependency "%s" instead of "%s"', $dep_s, $dep_g) ; 345 | } 346 | 347 | return($desc) ; 348 | 349 | } # describe_err 350 | 351 | sub get_context 352 | { # get_context 353 | 354 | my ($sent, $i_w) = @_ ; 355 | my ($w_2, $w_1, $w1, $w2) ; 356 | my ($p_2, $p_1, $p1, $p2) ; 357 | 358 | if ($i_w >= 2) 359 | { 360 | $w_2 = ${${$sent}[$i_w-2]}{word} ; 361 | $p_2 = ${${$sent}[$i_w-2]}{pos} ; 362 | } 363 | else 364 | { 365 | $w_2 = $START ; 366 | $p_2 = $START ; 367 | } 368 | 369 | if ($i_w >= 1) 370 | { 371 | $w_1 = ${${$sent}[$i_w-1]}{word} ; 372 | $p_1 = ${${$sent}[$i_w-1]}{pos} ; 373 | } 374 | else 375 | { 376 | $w_1 = $START ; 377 | $p_1 = $START ; 378 | } 379 | 380 | if ($i_w <= scalar @{$sent}-2) 381 | { 382 | $w1 = ${${$sent}[$i_w+1]}{word} ; 383 | $p1 = ${${$sent}[$i_w+1]}{pos} ; 384 | } 385 | else 386 | { 387 | $w1 = $END ; 388 | $p1 = $END ; 389 | } 390 | 391 | if ($i_w <= scalar @{$sent}-3) 392 | { 393 | $w2 = ${${$sent}[$i_w+2]}{word} ; 394 | $p2 = ${${$sent}[$i_w+2]}{pos} ; 395 | } 396 | else 397 | { 398 | $w2 = $END ; 399 | $p2 = $END ; 400 | } 401 | 402 | return ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) ; 403 | 404 | } # get_context 405 | 406 | sub read_sent 407 | { # read_sent 408 | 409 | my ($sent_gold, $sent_sys) = @_ ; 410 | my ($line_g, $line_s, $new_sent) ; 411 | my (%fields_g, %fields_s) ; 412 | 413 | $new_sent = 1 ; 414 | 415 | @{$sent_gold} = () ; 416 | @{$sent_sys} = () ; 417 | 418 | while (1) 419 | { # main reading loop 420 | 421 | $line_g = ; 422 | $line_s = ; 423 | 424 | $line_num++ ; 425 | 426 | # system output has fewer lines than gold standard 427 | if ((defined $line_g) && (! defined $line_s)) 428 | { 429 | printf STDERR "line mismatch, line %d:\n", $line_num ; 430 | printf STDERR " gold: %s", $line_g ; 431 | printf STDERR " sys : past end of file\n" ; 432 | exit(1) ; 433 | } 434 | 435 | # system output has more lines than gold standard 436 | if ((! defined $line_g) && (defined $line_s)) 437 | { 438 | printf STDERR "line mismatch, line %d:\n", $line_num ; 439 | printf STDERR " gold: past end of file\n" ; 440 | printf STDERR " sys : %s", $line_s ; 441 | exit(1) ; 442 | } 443 | 444 | # end of file reached for both 445 | if ((! defined $line_g) && (! defined $line_s)) 446 | { 447 | return (1) ; 448 | } 449 | 450 | # one contains end of sentence but other one does not 451 | if (($line_g =~ /^\s+$/) != ($line_s =~ /^\s+$/)) 452 | { 453 | printf STDERR "line mismatch, line %d:\n", $line_num ; 454 | printf STDERR " gold: %s", $line_g ; 455 | printf STDERR " sys : %s", $line_s ; 456 | exit(1) ; 457 | } 458 | 459 | # end of sentence reached 460 | if ($line_g =~ /^\s+$/) 461 | { 462 | return(0) ; 463 | } 464 | 465 | # now both lines contain information 466 | 467 | if ($new_sent) 468 | { 469 | $new_sent = 0 ; 470 | } 471 | 472 | # 'official' column names 473 | # options.output = ['id','form','lemma','cpostag','postag', 474 | # 'feats','head','deprel','phead','pdeprel'] 475 | 476 | @fields_g{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_g))[1, 3, 6, 7] ; 477 | 478 | push @{$sent_gold}, { %fields_g } ; 479 | 480 | @fields_s{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_s))[1, 3, 6, 7] ; 481 | 482 | if (($fields_g{word} ne $fields_s{word}) 483 | || 484 | ($fields_g{pos} ne $fields_s{pos})) 485 | { 486 | printf STDERR "Word/pos mismatch, line %d:\n", $line_num ; 487 | printf STDERR " gold: %s", $line_g ; 488 | printf STDERR " sys : %s", $line_s ; 489 | #exit(1) ; 490 | } 491 | 492 | push @{$sent_sys}, { %fields_s } ; 493 | 494 | } # main reading loop 495 | 496 | } # read_sent 497 | 498 | ################################################################################ 499 | ### main ### 500 | ################################################################################ 501 | 502 | our ($opt_g, $opt_s, $opt_o, $opt_h, $opt_v, $opt_q, $opt_p, $opt_b) ; 503 | 504 | my ($sent_num, $eof, $word_num, @err_sent) ; 505 | my (@sent_gold, @sent_sys, @starts) ; 506 | my ($word, $pos, $wp, $head_g, $dep_g, $head_s, $dep_s) ; 507 | my (%counts, $err_head, $err_dep, $con, $con1, $con_pos, $con_pos1, $thresh) ; 508 | my ($head_err, $dep_err, @cur_err, %err_counts, $err_counter, $err_desc) ; 509 | my ($loc_con, %loc_con_err_counts, %err_desc) ; 510 | my ($head_aft_bef_g, $head_aft_bef_s, $head_aft_bef) ; 511 | my ($con_bef, $con_aft, $con_bef_2, $con_aft_2, @bits, @e_bits, @v_con, @v_con_pos) ; 512 | my ($con_pos_bef, $con_pos_aft, $con_pos_bef_2, $con_pos_aft_2) ; 513 | my ($max_word_len, $max_pos_len, $max_con_len, $max_con_pos_len) ; 514 | my ($max_word_spec_len, $max_con_bef_len, $max_con_aft_len) ; 515 | my (%freq_err, $err) ; 516 | 517 | my ($i, $j, $i_w, $l, $n_args) ; 518 | my ($w_2, $w_1, $w1, $w2) ; 519 | my ($wp_2, $wp_1, $wp1, $wp2) ; 520 | my ($p_2, $p_1, $p1, $p2) ; 521 | 522 | my ($short_output) ; 523 | my ($score_on_punct) ; 524 | $counts{punct} = 0; # initialize 525 | 526 | getopts("g:o:s:qvhpb") ; 527 | 528 | if (defined $opt_v) 529 | { 530 | my $id = '$Id: eval.pl,v 1.9 2006/05/09 20:30:01 yuval Exp $'; 531 | my @parts = split ' ',$id; 532 | print "Version $parts[2]\n"; 533 | exit(0); 534 | } 535 | 536 | if ((defined $opt_h) || ((! defined $opt_g) && (! defined $opt_s))) 537 | { 538 | die $usage ; 539 | } 540 | 541 | if (! defined $opt_g) 542 | { 543 | die "Gold standard file (-g) missing\n" ; 544 | } 545 | 546 | if (! defined $opt_s) 547 | { 548 | die "System output file (-s) missing\n" ; 549 | } 550 | 551 | if (! defined $opt_o) 552 | { 553 | $opt_o = '-' ; 554 | } 555 | 556 | if (defined $opt_q) 557 | { 558 | $short_output = 1 ; 559 | } else { 560 | $short_output = 0 ; 561 | } 562 | 563 | if (defined $opt_p) 564 | { 565 | $score_on_punct = 1 ; 566 | } else { 567 | $score_on_punct = 0 ; 568 | } 569 | 570 | $line_num = 0 ; 571 | $sent_num = 0 ; 572 | $eof = 0 ; 573 | 574 | @err_sent = () ; 575 | @starts = () ; 576 | 577 | %{$err_sent[0]} = () ; 578 | 579 | $max_pos_len = length('CPOS') ; 580 | 581 | ################################################################################ 582 | ### reading input ### 583 | ################################################################################ 584 | 585 | open (GOLD, "<$opt_g") || die "Could not open gold standard file $opt_g\n" ; 586 | open (SYS, "<$opt_s") || die "Could not open system output file $opt_s\n" ; 587 | open (OUT, ">$opt_o") || die "Could not open output file $opt_o\n" ; 588 | 589 | 590 | if (defined $opt_b) { # produce output similar to evalb 591 | print OUT " Sent. Attachment Correct Scoring \n"; 592 | print OUT " ID Tokens - Unlab. Lab. HEAD HEAD+DEPREL tokens - - - -\n"; 593 | print OUT " ============================================================================\n"; 594 | } 595 | 596 | 597 | while (! $eof) 598 | { # main reading loop 599 | 600 | $starts[$sent_num] = $line_num+1 ; 601 | $eof = read_sent(\@sent_gold, \@sent_sys) ; 602 | 603 | $sent_num++ ; 604 | 605 | %{$err_sent[$sent_num]} = () ; 606 | $word_num = scalar @sent_gold ; 607 | 608 | # for accuracy per sentence 609 | my %sent_counts = ( tot => 0, 610 | err_any => 0, 611 | err_head => 0 612 | ); 613 | 614 | # printf "$sent_num $word_num\n" ; 615 | 616 | my @frames_g = ('** '); # the initial frame for the virtual root 617 | my @frames_s = ('** '); # the initial frame for the virtual root 618 | foreach $i_w (0 .. $word_num-1) 619 | { # loop on words 620 | push @frames_g, ''; # initialize 621 | push @frames_s, ''; # initialize 622 | } 623 | 624 | foreach $i_w (0 .. $word_num-1) 625 | { # loop on words 626 | 627 | ($word, $pos, $head_g, $dep_g) 628 | = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ; 629 | $wp = $word.' / '.$pos ; 630 | 631 | # printf "%d: %s %s %s %s\n", $i_w, $word, $pos, $head_g, $dep_g ; 632 | 633 | if ((! $score_on_punct) && is_uni_punct($word)) 634 | { 635 | $counts{punct}++ ; 636 | # ignore punctuations 637 | next ; 638 | } 639 | 640 | if (length($pos) > $max_pos_len) 641 | { 642 | $max_pos_len = length($pos) ; 643 | } 644 | 645 | ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ; 646 | 647 | $counts{tot}++ ; 648 | $counts{word}{$wp}{tot}++ ; 649 | $counts{pos}{$pos}{tot}++ ; 650 | $counts{head}{$head_g-$i_w-1}{tot}++ ; 651 | 652 | # for frame confusions 653 | # add child to frame of parent 654 | $frames_g[$head_g] .= "$dep_g "; 655 | $frames_s[$head_s] .= "$dep_s "; 656 | # add to frame of token itself 657 | $frames_g[$i_w+1] .= "*$dep_g* "; # $i_w+1 because $i_w starts counting at zero 658 | $frames_s[$i_w+1] .= "*$dep_g* "; 659 | 660 | # for precision and recall of DEPREL 661 | $counts{dep}{$dep_g}{tot}++ ; # counts for gold standard deprels 662 | $counts{dep2}{$dep_g}{$dep_s}++ ; # counts for confusions 663 | $counts{dep_s}{$dep_s}{tot}++ ; # counts for system deprels 664 | $counts{all_dep}{$dep_g} = 1 ; # list of all deprels that occur ... 665 | $counts{all_dep}{$dep_s} = 1 ; # ... in either gold or system output 666 | 667 | # for precision and recall of HEAD direction 668 | my $dir_g; 669 | if ($head_g == 0) { 670 | $dir_g = 'to_root'; 671 | } elsif ($head_g < $i_w+1) { # $i_w+1 because $i_w starts counting at zero 672 | # also below 673 | $dir_g = 'left'; 674 | } elsif ($head_g > $i_w+1) { 675 | $dir_g = 'right'; 676 | } else { 677 | # token links to itself; should never happen in correct gold standard 678 | $dir_g = 'self'; 679 | } 680 | my $dir_s; 681 | if ($head_s == 0) { 682 | $dir_s = 'to_root'; 683 | } elsif ($head_s < $i_w+1) { 684 | $dir_s = 'left'; 685 | } elsif ($head_s > $i_w+1) { 686 | $dir_s = 'right'; 687 | } else { 688 | # token links to itself; should not happen in good system 689 | # (but not forbidden in shared task) 690 | $dir_s = 'self'; 691 | } 692 | $counts{dir_g}{$dir_g}{tot}++ ; # counts for gold standard head direction 693 | $counts{dir2}{$dir_g}{$dir_s}++ ; # counts for confusions 694 | $counts{dir_s}{$dir_s}{tot}++ ; # counts for system head direction 695 | 696 | # for precision and recall of HEAD distance 697 | my $dist_g; 698 | if ($head_g == 0) { 699 | $dist_g = 'to_root'; 700 | } elsif ( abs($head_g - ($i_w+1)) <= 1 ) { 701 | $dist_g = '1'; # includes the 'self' cases 702 | } elsif ( abs($head_g - ($i_w+1)) <= 2 ) { 703 | $dist_g = '2'; 704 | } elsif ( abs($head_g - ($i_w+1)) <= 6 ) { 705 | $dist_g = '3-6'; 706 | } else { 707 | $dist_g = '7-...'; 708 | } 709 | my $dist_s; 710 | if ($head_s == 0) { 711 | $dist_s = 'to_root'; 712 | } elsif ( abs($head_s - ($i_w+1)) <= 1 ) { 713 | $dist_s = '1'; # includes the 'self' cases 714 | } elsif ( abs($head_s - ($i_w+1)) <= 2 ) { 715 | $dist_s = '2'; 716 | } elsif ( abs($head_s - ($i_w+1)) <= 6 ) { 717 | $dist_s = '3-6'; 718 | } else { 719 | $dist_s = '7-...'; 720 | } 721 | $counts{dist_g}{$dist_g}{tot}++ ; # counts for gold standard head distance 722 | $counts{dist2}{$dist_g}{$dist_s}++ ; # counts for confusions 723 | $counts{dist_s}{$dist_s}{tot}++ ; # counts for system head distance 724 | 725 | 726 | $err_head = ($head_g ne $head_s) ; # error in head 727 | $err_dep = ($dep_g ne $dep_s) ; # error in deprel 728 | 729 | $head_err = '-' ; 730 | $dep_err = '-' ; 731 | 732 | # for accuracy per sentence 733 | $sent_counts{tot}++ ; 734 | if ($err_dep || $err_head) { 735 | $sent_counts{err_any}++ ; 736 | } 737 | if ($err_head) { 738 | $sent_counts{err_head}++ ; 739 | } 740 | 741 | # total counts and counts for CPOS involved in errors 742 | 743 | if ($head_g eq '0') 744 | { 745 | $head_aft_bef_g = '0' ; 746 | } 747 | elsif ($head_g eq $i_w+1) 748 | { 749 | $head_aft_bef_g = 'e' ; 750 | } 751 | else 752 | { 753 | $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ; 754 | } 755 | 756 | if ($head_s eq '0') 757 | { 758 | $head_aft_bef_s = '0' ; 759 | } 760 | elsif ($head_s eq $i_w+1) 761 | { 762 | $head_aft_bef_s = 'e' ; 763 | } 764 | else 765 | { 766 | $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ; 767 | } 768 | 769 | $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ; 770 | 771 | if ($err_head) 772 | { 773 | if ($head_aft_bef_s eq '0') 774 | { 775 | $head_err = 0 ; 776 | } 777 | else 778 | { 779 | $head_err = $head_s-$head_g ; 780 | } 781 | 782 | $err_sent[$sent_num]{head}++ ; 783 | $counts{err_head}{tot}++ ; 784 | $counts{err_head}{$head_err}++ ; 785 | 786 | $counts{word}{err_head}{$wp}++ ; 787 | $counts{pos}{$pos}{err_head}{tot}++ ; 788 | $counts{pos}{$pos}{err_head}{$head_err}++ ; 789 | } 790 | 791 | if ($err_dep) 792 | { 793 | $dep_err = $dep_g.'->'.$dep_s ; 794 | $err_sent[$sent_num]{dep}++ ; 795 | $counts{err_dep}{tot}++ ; 796 | $counts{err_dep}{$dep_err}++ ; 797 | 798 | $counts{word}{err_dep}{$wp}++ ; 799 | $counts{pos}{$pos}{err_dep}{tot}++ ; 800 | $counts{pos}{$pos}{err_dep}{$dep_err}++ ; 801 | 802 | if ($err_head) 803 | { 804 | $counts{err_both}++ ; 805 | $counts{pos}{$pos}{err_both}++ ; 806 | } 807 | } 808 | 809 | ### DEPREL + ATTACHMENT 810 | if ((!$err_dep) && ($err_head)) { 811 | $counts{err_head_corr_dep}{tot}++ ; 812 | $counts{err_head_corr_dep}{$dep_s}++ ; 813 | } 814 | ### DEPREL + ATTACHMENT 815 | 816 | # counts for words involved in errors 817 | 818 | if (! ($err_head || $err_dep)) 819 | { 820 | next ; 821 | } 822 | 823 | $err_sent[$sent_num]{word}++ ; 824 | $counts{err_any}++ ; 825 | $counts{word}{err_any}{$wp}++ ; 826 | $counts{pos}{$pos}{err_any}++ ; 827 | 828 | ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ; 829 | 830 | if ($w_2 ne $START) 831 | { 832 | $wp_2 = $w_2.' / '.$p_2 ; 833 | } 834 | else 835 | { 836 | $wp_2 = $w_2 ; 837 | } 838 | 839 | if ($w_1 ne $START) 840 | { 841 | $wp_1 = $w_1.' / '.$p_1 ; 842 | } 843 | else 844 | { 845 | $wp_1 = $w_1 ; 846 | } 847 | 848 | if ($w1 ne $END) 849 | { 850 | $wp1 = $w1.' / '.$p1 ; 851 | } 852 | else 853 | { 854 | $wp1 = $w1 ; 855 | } 856 | 857 | if ($w2 ne $END) 858 | { 859 | $wp2 = $w2.' / '.$p2 ; 860 | } 861 | else 862 | { 863 | $wp2 = $w2 ; 864 | } 865 | 866 | $con_bef = $wp_1 ; 867 | $con_bef_2 = $wp_2.' + '.$wp_1 ; 868 | $con_aft = $wp1 ; 869 | $con_aft_2 = $wp1.' + '.$wp2 ; 870 | 871 | $con_pos_bef = $p_1 ; 872 | $con_pos_bef_2 = $p_2.'+'.$p_1 ; 873 | $con_pos_aft = $p1 ; 874 | $con_pos_aft_2 = $p1.'+'.$p2 ; 875 | 876 | if ($w_1 ne $START) 877 | { 878 | # do not count '.S' as a word context 879 | $counts{con_bef_2}{tot}{$con_bef_2}++ ; 880 | $counts{con_bef_2}{err_head}{$con_bef_2} += $err_head ; 881 | $counts{con_bef_2}{err_dep}{$con_bef_2} += $err_dep ; 882 | $counts{con_bef}{tot}{$con_bef}++ ; 883 | $counts{con_bef}{err_head}{$con_bef} += $err_head ; 884 | $counts{con_bef}{err_dep}{$con_bef} += $err_dep ; 885 | } 886 | 887 | if ($w1 ne $END) 888 | { 889 | # do not count '.E' as a word context 890 | $counts{con_aft_2}{tot}{$con_aft_2}++ ; 891 | $counts{con_aft_2}{err_head}{$con_aft_2} += $err_head ; 892 | $counts{con_aft_2}{err_dep}{$con_aft_2} += $err_dep ; 893 | $counts{con_aft}{tot}{$con_aft}++ ; 894 | $counts{con_aft}{err_head}{$con_aft} += $err_head ; 895 | $counts{con_aft}{err_dep}{$con_aft} += $err_dep ; 896 | } 897 | 898 | $counts{con_pos_bef_2}{tot}{$con_pos_bef_2}++ ; 899 | $counts{con_pos_bef_2}{err_head}{$con_pos_bef_2} += $err_head ; 900 | $counts{con_pos_bef_2}{err_dep}{$con_pos_bef_2} += $err_dep ; 901 | $counts{con_pos_bef}{tot}{$con_pos_bef}++ ; 902 | $counts{con_pos_bef}{err_head}{$con_pos_bef} += $err_head ; 903 | $counts{con_pos_bef}{err_dep}{$con_pos_bef} += $err_dep ; 904 | 905 | $counts{con_pos_aft_2}{tot}{$con_pos_aft_2}++ ; 906 | $counts{con_pos_aft_2}{err_head}{$con_pos_aft_2} += $err_head ; 907 | $counts{con_pos_aft_2}{err_dep}{$con_pos_aft_2} += $err_dep ; 908 | $counts{con_pos_aft}{tot}{$con_pos_aft}++ ; 909 | $counts{con_pos_aft}{err_head}{$con_pos_aft} += $err_head ; 910 | $counts{con_pos_aft}{err_dep}{$con_pos_aft} += $err_dep ; 911 | 912 | $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ; 913 | $freq_err{$err}++ ; 914 | 915 | } # loop on words 916 | 917 | foreach $i_w (0 .. $word_num) # including one for the virtual root 918 | { # loop on words 919 | if ($frames_g[$i_w] ne $frames_s[$i_w]) { 920 | $counts{frame2}{"$frames_g[$i_w]/ $frames_s[$i_w]"}++ ; 921 | } 922 | } 923 | 924 | if (defined $opt_b) { # produce output similar to evalb 925 | if ($word_num > 0) { 926 | my ($unlabeled,$labeled) = ('NaN', 'NaN'); 927 | if ($sent_counts{tot} > 0) { # there are scoring tokens 928 | $unlabeled = 100-$sent_counts{err_head}*100.0/$sent_counts{tot}; 929 | $labeled = 100-$sent_counts{err_any} *100.0/$sent_counts{tot}; 930 | } 931 | printf OUT " %4d %4d 0 %6.2f %6.2f %4d %4d %4d 0 0 0 0\n", 932 | $sent_num, $word_num, 933 | $unlabeled, $labeled, 934 | $sent_counts{tot}-$sent_counts{err_head}, 935 | $sent_counts{tot}-$sent_counts{err_any}, 936 | $sent_counts{tot},; 937 | } 938 | } 939 | 940 | } # main reading loop 941 | 942 | ################################################################################ 943 | ### printing output ### 944 | ################################################################################ 945 | 946 | if (defined $opt_b) { # produce output similar to evalb 947 | print OUT "\n\n"; 948 | } 949 | printf OUT " Labeled attachment score: %d / %d * 100 = %.2f %%\n", 950 | $counts{tot}-$counts{err_any}, $counts{tot}, 100-$counts{err_any}*100.0/$counts{tot} ; 951 | printf OUT " Unlabeled attachment score: %d / %d * 100 = %.2f %%\n", 952 | $counts{tot}-$counts{err_head}{tot}, $counts{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot} ; 953 | printf OUT " Label accuracy score: %d / %d * 100 = %.2f %%\n", 954 | $counts{tot}-$counts{err_dep}{tot}, $counts{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot} ; 955 | 956 | if ($short_output) 957 | { 958 | exit(0) ; 959 | } 960 | printf OUT "\n %s\n\n", '=' x 80 ; 961 | printf OUT " Evaluation of the results in %s\n vs. gold standard %s:\n\n", $opt_s, $opt_g ; 962 | 963 | printf OUT " Legend: '%s' - the beginning of a sentence, '%s' - the end of a sentence\n\n", $START, $END ; 964 | 965 | printf OUT " Number of non-scoring tokens: $counts{punct}\n\n"; 966 | 967 | printf OUT " The overall accuracy and its distribution over CPOSTAGs\n\n" ; 968 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 969 | 970 | printf OUT " %-10s | %-5s | %-5s | %% | %-5s | %% | %-5s | %%\n", 971 | 'Accuracy', 'words', 'right', 'right', 'both' ; 972 | printf OUT " %-10s | %-5s | %-5s | | %-5s | | %-5s |\n", 973 | ' ', ' ', 'head', ' dep', 'right' ; 974 | 975 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 976 | 977 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 978 | 'total', $counts{tot}, 979 | $counts{tot}-$counts{err_head}{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot}, 980 | $counts{tot}-$counts{err_dep}{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot}, 981 | $counts{tot}-$counts{err_any}, 100-$counts{err_any}*100.0/$counts{tot} ; 982 | 983 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 984 | 985 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}}) 986 | { 987 | if (! defined($counts{pos}{$pos}{err_head}{tot})) 988 | { 989 | $counts{pos}{$pos}{err_head}{tot} = 0 ; 990 | } 991 | if (! defined($counts{pos}{$pos}{err_dep}{tot})) 992 | { 993 | $counts{pos}{$pos}{err_dep}{tot} = 0 ; 994 | } 995 | if (! defined($counts{pos}{$pos}{err_any})) 996 | { 997 | $counts{pos}{$pos}{err_any} = 0 ; 998 | } 999 | 1000 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1001 | $pos, $counts{pos}{$pos}{tot}, 1002 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_head}{tot}, 100-$counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot}, 1003 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_dep}{tot}, 100-$counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot}, 1004 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_any}, 100-$counts{pos}{$pos}{err_any}*100.0/$counts{pos}{$pos}{tot} ; 1005 | } 1006 | 1007 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1008 | 1009 | printf OUT "\n\n" ; 1010 | 1011 | printf OUT " The overall error rate and its distribution over CPOSTAGs\n\n" ; 1012 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1013 | 1014 | printf OUT " %-10s | %-5s | %-5s | %% | %-5s | %% | %-5s | %%\n", 1015 | 'Error', 'words', 'head', ' dep', 'both' ; 1016 | printf OUT " %-10s | %-5s | %-5s | | %-5s | | %-5s |\n", 1017 | 1018 | 'Rate', ' ', 'err', ' err', 'wrong' ; 1019 | 1020 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1021 | 1022 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1023 | 'total', $counts{tot}, 1024 | $counts{err_head}{tot}, $counts{err_head}{tot}*100.0/$counts{tot}, 1025 | $counts{err_dep}{tot}, $counts{err_dep}{tot}*100.0/$counts{tot}, 1026 | $counts{err_both}, $counts{err_both}*100.0/$counts{tot} ; 1027 | 1028 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1029 | 1030 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}}) 1031 | { 1032 | if (! defined($counts{pos}{$pos}{err_both})) 1033 | { 1034 | $counts{pos}{$pos}{err_both} = 0 ; 1035 | } 1036 | 1037 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1038 | $pos, $counts{pos}{$pos}{tot}, 1039 | $counts{pos}{$pos}{err_head}{tot}, $counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot}, 1040 | $counts{pos}{$pos}{err_dep}{tot}, $counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot}, 1041 | $counts{pos}{$pos}{err_both}, $counts{pos}{$pos}{err_both}*100.0/$counts{pos}{$pos}{tot} ; 1042 | 1043 | } 1044 | 1045 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1046 | 1047 | ### added by Sabine Buchholz 1048 | printf OUT "\n\n"; 1049 | printf OUT " Precision and recall of DEPREL\n\n"; 1050 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1051 | printf OUT " deprel | gold | correct | system | recall (%%) | precision (%%) \n"; 1052 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1053 | foreach my $dep (sort keys %{$counts{all_dep}}) { 1054 | # initialize 1055 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1056 | 1057 | if (defined($counts{dep2}{$dep}{$dep})) { 1058 | $tot_corr = $counts{dep2}{$dep}{$dep}; 1059 | } 1060 | if (defined($counts{dep}{$dep}{tot})) { 1061 | $tot_g = $counts{dep}{$dep}{tot}; 1062 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1063 | } 1064 | if (defined($counts{dep_s}{$dep}{tot})) { 1065 | $tot_s = $counts{dep_s}{$dep}{tot}; 1066 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1067 | } 1068 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1069 | $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1070 | } 1071 | 1072 | ### DEPREL + ATTACHMENT: 1073 | ### Same as Sabine's DEPREL apart from $tot_corr calculation 1074 | printf OUT "\n\n"; 1075 | printf OUT " Precision and recall of DEPREL + ATTACHMENT\n\n"; 1076 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1077 | printf OUT " deprel | gold | correct | system | recall (%%) | precision (%%) \n"; 1078 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1079 | foreach my $dep (sort keys %{$counts{all_dep}}) { 1080 | # initialize 1081 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1082 | 1083 | if (defined($counts{dep2}{$dep}{$dep})) { 1084 | if (defined($counts{err_head_corr_dep}{$dep})) { 1085 | $tot_corr = $counts{dep2}{$dep}{$dep} - $counts{err_head_corr_dep}{$dep}; 1086 | } else { 1087 | $tot_corr = $counts{dep2}{$dep}{$dep}; 1088 | } 1089 | } 1090 | if (defined($counts{dep}{$dep}{tot})) { 1091 | $tot_g = $counts{dep}{$dep}{tot}; 1092 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1093 | } 1094 | if (defined($counts{dep_s}{$dep}{tot})) { 1095 | $tot_s = $counts{dep_s}{$dep}{tot}; 1096 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1097 | } 1098 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1099 | $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1100 | } 1101 | ### DEPREL + ATTACHMENT 1102 | 1103 | printf OUT "\n\n"; 1104 | printf OUT " Precision and recall of binned HEAD direction\n\n"; 1105 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1106 | printf OUT " direction | gold | correct | system | recall (%%) | precision (%%) \n"; 1107 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1108 | foreach my $dir ('to_root', 'left', 'right', 'self') { 1109 | # initialize 1110 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1111 | 1112 | if (defined($counts{dir2}{$dir}{$dir})) { 1113 | $tot_corr = $counts{dir2}{$dir}{$dir}; 1114 | } 1115 | if (defined($counts{dir_g}{$dir}{tot})) { 1116 | $tot_g = $counts{dir_g}{$dir}{tot}; 1117 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1118 | } 1119 | if (defined($counts{dir_s}{$dir}{tot})) { 1120 | $tot_s = $counts{dir_s}{$dir}{tot}; 1121 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1122 | } 1123 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1124 | $dir, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1125 | } 1126 | 1127 | printf OUT "\n\n"; 1128 | printf OUT " Precision and recall of binned HEAD distance\n\n"; 1129 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1130 | printf OUT " distance | gold | correct | system | recall (%%) | precision (%%) \n"; 1131 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1132 | foreach my $dist ('to_root', '1', '2', '3-6', '7-...') { 1133 | # initialize 1134 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1135 | 1136 | if (defined($counts{dist2}{$dist}{$dist})) { 1137 | $tot_corr = $counts{dist2}{$dist}{$dist}; 1138 | } 1139 | if (defined($counts{dist_g}{$dist}{tot})) { 1140 | $tot_g = $counts{dist_g}{$dist}{tot}; 1141 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1142 | } 1143 | if (defined($counts{dist_s}{$dist}{tot})) { 1144 | $tot_s = $counts{dist_s}{$dist}{tot}; 1145 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1146 | } 1147 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1148 | $dist, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1149 | } 1150 | 1151 | printf OUT "\n\n"; 1152 | printf OUT " Frame confusions (gold versus system; *...* marks the head token)\n\n"; 1153 | foreach my $frame (sort {$counts{frame2}{$b} <=> $counts{frame2}{$a}} keys %{$counts{frame2}}) 1154 | { 1155 | if ($counts{frame2}{$frame} >= 5) # (make 5 a changeable threshold later) 1156 | { 1157 | printf OUT " %3d %s\n", $counts{frame2}{$frame}, $frame; 1158 | } 1159 | } 1160 | ### end of: added by Sabine Buchholz 1161 | 1162 | 1163 | # 1164 | # Leave only the 5 words mostly involved in errors 1165 | # 1166 | 1167 | 1168 | $thresh = (sort {$b <=> $a} values %{$counts{word}{err_any}})[4] ; 1169 | 1170 | # ensure enough space for title 1171 | $max_word_len = length('word') ; 1172 | 1173 | foreach $word (keys %{$counts{word}{err_any}}) 1174 | { 1175 | if ($counts{word}{err_any}{$word} < $thresh) 1176 | { 1177 | delete $counts{word}{err_any}{$word} ; 1178 | next ; 1179 | } 1180 | 1181 | $l = uni_len($word) ; 1182 | if ($l > $max_word_len) 1183 | { 1184 | $max_word_len = $l ; 1185 | } 1186 | } 1187 | 1188 | # filter a case when the difference between the error counts 1189 | # for 2-word and 1-word contexts is small 1190 | # (leave the 2-word context) 1191 | 1192 | foreach $con (keys %{$counts{con_aft_2}{tot}}) 1193 | { 1194 | ($w1) = split(/\+/, $con) ; 1195 | 1196 | if (defined $counts{con_aft}{tot}{$w1} && 1197 | $counts{con_aft}{tot}{$w1}-$counts{con_aft_2}{tot}{$con} <= 1) 1198 | { 1199 | delete $counts{con_aft}{tot}{$w1} ; 1200 | } 1201 | } 1202 | 1203 | foreach $con (keys %{$counts{con_bef_2}{tot}}) 1204 | { 1205 | ($w_2, $w_1) = split(/\+/, $con) ; 1206 | 1207 | if (defined $counts{con_bef}{tot}{$w_1} && 1208 | $counts{con_bef}{tot}{$w_1}-$counts{con_bef_2}{tot}{$con} <= 1) 1209 | { 1210 | delete $counts{con_bef}{tot}{$w_1} ; 1211 | } 1212 | } 1213 | 1214 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}}) 1215 | { 1216 | ($p1) = split(/\+/, $con_pos) ; 1217 | 1218 | if (defined($counts{con_pos_aft}{tot}{$p1}) && 1219 | $counts{con_pos_aft}{tot}{$p1}-$counts{con_pos_aft_2}{tot}{$con_pos} <= 1) 1220 | { 1221 | delete $counts{con_pos_aft}{tot}{$p1} ; 1222 | } 1223 | } 1224 | 1225 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}}) 1226 | { 1227 | ($p_2, $p_1) = split(/\+/, $con_pos) ; 1228 | 1229 | if (defined($counts{con_pos_bef}{tot}{$p_1}) && 1230 | $counts{con_pos_bef}{tot}{$p_1}-$counts{con_pos_bef_2}{tot}{$con_pos} <= 1) 1231 | { 1232 | delete $counts{con_pos_bef}{tot}{$p_1} ; 1233 | } 1234 | } 1235 | 1236 | # for each context type, take the three contexts most involved in errors 1237 | 1238 | $max_con_len = 0 ; 1239 | 1240 | filter_context_counts($counts{con_bef_2}{tot}, $con_err_num, \$max_con_len) ; 1241 | 1242 | filter_context_counts($counts{con_bef}{tot}, $con_err_num, \$max_con_len) ; 1243 | 1244 | filter_context_counts($counts{con_aft}{tot}, $con_err_num, \$max_con_len) ; 1245 | 1246 | filter_context_counts($counts{con_aft_2}{tot}, $con_err_num, \$max_con_len) ; 1247 | 1248 | # for each CPOS context type, take the three CPOS contexts most involved in errors 1249 | 1250 | $max_con_pos_len = 0 ; 1251 | 1252 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef_2}{tot}})[$con_err_num-1] ; 1253 | 1254 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}}) 1255 | { 1256 | if ($counts{con_pos_bef_2}{tot}{$con_pos} < $thresh) 1257 | { 1258 | delete $counts{con_pos_bef_2}{tot}{$con_pos} ; 1259 | next ; 1260 | } 1261 | if (length($con_pos) > $max_con_pos_len) 1262 | { 1263 | $max_con_pos_len = length($con_pos) ; 1264 | } 1265 | } 1266 | 1267 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef}{tot}})[$con_err_num-1] ; 1268 | 1269 | foreach $con_pos (keys %{$counts{con_pos_bef}{tot}}) 1270 | { 1271 | if ($counts{con_pos_bef}{tot}{$con_pos} < $thresh) 1272 | { 1273 | delete $counts{con_pos_bef}{tot}{$con_pos} ; 1274 | next ; 1275 | } 1276 | if (length($con_pos) > $max_con_pos_len) 1277 | { 1278 | $max_con_pos_len = length($con_pos) ; 1279 | } 1280 | } 1281 | 1282 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft}{tot}})[$con_err_num-1] ; 1283 | 1284 | foreach $con_pos (keys %{$counts{con_pos_aft}{tot}}) 1285 | { 1286 | if ($counts{con_pos_aft}{tot}{$con_pos} < $thresh) 1287 | { 1288 | delete $counts{con_pos_aft}{tot}{$con_pos} ; 1289 | next ; 1290 | } 1291 | if (length($con_pos) > $max_con_pos_len) 1292 | { 1293 | $max_con_pos_len = length($con_pos) ; 1294 | } 1295 | } 1296 | 1297 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft_2}{tot}})[$con_err_num-1] ; 1298 | 1299 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}}) 1300 | { 1301 | if ($counts{con_pos_aft_2}{tot}{$con_pos} < $thresh) 1302 | { 1303 | delete $counts{con_pos_aft_2}{tot}{$con_pos} ; 1304 | next ; 1305 | } 1306 | if (length($con_pos) > $max_con_pos_len) 1307 | { 1308 | $max_con_pos_len = length($con_pos) ; 1309 | } 1310 | } 1311 | 1312 | # printing 1313 | 1314 | # ------------- focus words 1315 | 1316 | printf OUT "\n\n" ; 1317 | printf OUT " %d focus words where most of the errors occur:\n\n", scalar keys %{$counts{word}{err_any}} ; 1318 | 1319 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s\n", $max_word_len, ' ', 'any', 'head', 'dep', 'both' ; 1320 | printf OUT " %s-+------+------+------+------\n", '-' x $max_word_len; 1321 | 1322 | foreach $word (sort {$counts{word}{err_any}{$b} <=> $counts{word}{err_any}{$a}} keys %{$counts{word}{err_any}}) 1323 | { 1324 | if (!defined($counts{word}{err_head}{$word})) 1325 | { 1326 | $counts{word}{err_head}{$word} = 0 ; 1327 | } 1328 | if (! defined($counts{word}{err_dep}{$word})) 1329 | { 1330 | $counts{word}{err_dep}{$word} = 0 ; 1331 | } 1332 | if (! defined($counts{word}{err_any}{$word})) 1333 | { 1334 | $counts{word}{err_any}{$word} = 0; 1335 | } 1336 | printf OUT " %-*s | %4d | %4d | %4d | %4d\n", 1337 | $max_word_len+length($word)-uni_len($word), $word, $counts{word}{err_any}{$word}, 1338 | $counts{word}{err_head}{$word}, 1339 | $counts{word}{err_dep}{$word}, 1340 | $counts{word}{err_dep}{$word}+$counts{word}{err_head}{$word}-$counts{word}{err_any}{$word} ; 1341 | } 1342 | 1343 | printf OUT " %s-+------+------+------+------\n", '-' x $max_word_len; 1344 | 1345 | # ------------- contexts 1346 | 1347 | printf OUT "\n\n" ; 1348 | 1349 | printf OUT " one-token preceeding contexts where most of the errors occur:\n\n" ; 1350 | 1351 | print_context($counts{con_bef}, $counts{con_pos_bef}, $max_con_len, $max_con_pos_len) ; 1352 | 1353 | printf OUT " two-token preceeding contexts where most of the errors occur:\n\n" ; 1354 | 1355 | print_context($counts{con_bef_2}, $counts{con_pos_bef_2}, $max_con_len, $max_con_pos_len) ; 1356 | 1357 | printf OUT " one-token following contexts where most of the errors occur:\n\n" ; 1358 | 1359 | print_context($counts{con_aft}, $counts{con_pos_aft}, $max_con_len, $max_con_pos_len) ; 1360 | 1361 | printf OUT " two-token following contexts where most of the errors occur:\n\n" ; 1362 | 1363 | print_context($counts{con_aft_2}, $counts{con_pos_aft_2}, $max_con_len, $max_con_pos_len) ; 1364 | 1365 | # ------------- Sentences 1366 | 1367 | printf OUT " Sentence with the highest number of word errors:\n" ; 1368 | $i = (sort { (defined($err_sent[$b]{word}) && $err_sent[$b]{word}) 1369 | <=> (defined($err_sent[$a]{word}) && $err_sent[$a]{word}) } 1 .. $sent_num)[0] ; 1370 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1371 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1372 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1373 | 1374 | printf OUT "\n\n" ; 1375 | 1376 | printf OUT " Sentence with the highest number of head errors:\n" ; 1377 | $i = (sort { (defined($err_sent[$b]{head}) && $err_sent[$b]{head}) 1378 | <=> (defined($err_sent[$a]{head}) && $err_sent[$a]{head}) } 1 .. $sent_num)[0] ; 1379 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1380 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1381 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1382 | 1383 | printf OUT "\n\n" ; 1384 | 1385 | printf OUT " Sentence with the highest number of dependency errors:\n" ; 1386 | $i = (sort { (defined($err_sent[$b]{dep}) && $err_sent[$b]{dep}) 1387 | <=> (defined($err_sent[$a]{dep}) && $err_sent[$a]{dep}) } 1 .. $sent_num)[0] ; 1388 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1389 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1390 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1391 | 1392 | # 1393 | # Second pass, collect statistics of the frequent errors 1394 | # 1395 | 1396 | # filter the errors, leave the most frequent $freq_err_num errors 1397 | 1398 | $i = 0 ; 1399 | 1400 | $thresh = (sort {$b <=> $a} values %freq_err)[$freq_err_num-1] ; 1401 | 1402 | foreach $err (keys %freq_err) 1403 | { 1404 | if ($freq_err{$err} < $thresh) 1405 | { 1406 | delete $freq_err{$err} ; 1407 | } 1408 | } 1409 | 1410 | # in case there are several errors with the threshold count 1411 | 1412 | $freq_err_num = scalar keys %freq_err ; 1413 | 1414 | %err_counts = () ; 1415 | 1416 | $eof = 0 ; 1417 | 1418 | seek (GOLD, 0, 0) ; 1419 | seek (SYS, 0, 0) ; 1420 | 1421 | while (! $eof) 1422 | { # second reading loop 1423 | 1424 | $eof = read_sent(\@sent_gold, \@sent_sys) ; 1425 | $sent_num++ ; 1426 | 1427 | $word_num = scalar @sent_gold ; 1428 | 1429 | # printf "$sent_num $word_num\n" ; 1430 | 1431 | foreach $i_w (0 .. $word_num-1) 1432 | { # loop on words 1433 | ($word, $pos, $head_g, $dep_g) 1434 | = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ; 1435 | 1436 | # printf "%d: %s %s %s %s\n", $i_w, $word, $pos, $head_g, $dep_g ; 1437 | 1438 | if ((! $score_on_punct) && is_uni_punct($word)) 1439 | { 1440 | # ignore punctuations 1441 | next ; 1442 | } 1443 | 1444 | ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ; 1445 | 1446 | $err_head = ($head_g ne $head_s) ; 1447 | $err_dep = ($dep_g ne $dep_s) ; 1448 | 1449 | $head_err = '-' ; 1450 | $dep_err = '-' ; 1451 | 1452 | if ($head_g eq '0') 1453 | { 1454 | $head_aft_bef_g = '0' ; 1455 | } 1456 | elsif ($head_g eq $i_w+1) 1457 | { 1458 | $head_aft_bef_g = 'e' ; 1459 | } 1460 | else 1461 | { 1462 | $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ; 1463 | } 1464 | 1465 | if ($head_s eq '0') 1466 | { 1467 | $head_aft_bef_s = '0' ; 1468 | } 1469 | elsif ($head_s eq $i_w+1) 1470 | { 1471 | $head_aft_bef_s = 'e' ; 1472 | } 1473 | else 1474 | { 1475 | $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ; 1476 | } 1477 | 1478 | $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ; 1479 | 1480 | if ($err_head) 1481 | { 1482 | if ($head_aft_bef_s eq '0') 1483 | { 1484 | $head_err = 0 ; 1485 | } 1486 | else 1487 | { 1488 | $head_err = $head_s-$head_g ; 1489 | } 1490 | } 1491 | 1492 | if ($err_dep) 1493 | { 1494 | $dep_err = $dep_g.'->'.$dep_s ; 1495 | } 1496 | 1497 | if (! ($err_head || $err_dep)) 1498 | { 1499 | next ; 1500 | } 1501 | 1502 | # handle only the most frequent errors 1503 | 1504 | $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ; 1505 | 1506 | if (! exists $freq_err{$err}) 1507 | { 1508 | next ; 1509 | } 1510 | 1511 | ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ; 1512 | 1513 | $con_bef = $w_1 ; 1514 | $con_bef_2 = $w_2.' + '.$w_1 ; 1515 | $con_aft = $w1 ; 1516 | $con_aft_2 = $w1.' + '.$w2 ; 1517 | 1518 | $con_pos_bef = $p_1 ; 1519 | $con_pos_bef_2 = $p_2.'+'.$p_1 ; 1520 | $con_pos_aft = $p1 ; 1521 | $con_pos_aft_2 = $p1.'+'.$p2 ; 1522 | 1523 | @cur_err = ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) ; 1524 | 1525 | # printf "# %-25s %-15s %-10s %-25s %-3s %-30s\n", 1526 | # $con_bef, $word, $pos, $con_aft, $head_err, $dep_err ; 1527 | 1528 | @bits = (0, 0, 0, 0, 0, 0) ; 1529 | $j = 0 ; 1530 | 1531 | while ($j == 0) 1532 | { 1533 | for ($i = 0; $i <= $#bits; $i++) 1534 | { 1535 | if ($bits[$i] == 0) 1536 | { 1537 | $bits[$i] = 1 ; 1538 | $j = 0 ; 1539 | last ; 1540 | } 1541 | else 1542 | { 1543 | $bits[$i] = 0 ; 1544 | $j = 1 ; 1545 | } 1546 | } 1547 | 1548 | @e_bits = @cur_err ; 1549 | 1550 | for ($i = 0; $i <= $#bits; $i++) 1551 | { 1552 | if (! $bits[$i]) 1553 | { 1554 | $e_bits[$i] = '*' ; 1555 | } 1556 | } 1557 | 1558 | # include also the last case which is the most general 1559 | # (wildcards for everything) 1560 | $err_counts{$err}{join($sep, @e_bits)}++ ; 1561 | 1562 | } 1563 | 1564 | } # loop on words 1565 | } # second reading loop 1566 | 1567 | printf OUT "\n\n" ; 1568 | printf OUT " Specific errors, %d most frequent errors:", $freq_err_num ; 1569 | printf OUT "\n %s\n", '=' x 41 ; 1570 | 1571 | 1572 | # deleting local contexts which are too general 1573 | 1574 | foreach $err (keys %err_counts) 1575 | { 1576 | foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}} 1577 | keys %{$err_counts{$err}}) 1578 | { 1579 | @cur_err = split(/\Q$sep\E/, $loc_con) ; 1580 | 1581 | # In this loop, one or two elements of the local context are 1582 | # replaced with '*' to make it more general. If the entry for 1583 | # the general context has the same count it is removed. 1584 | 1585 | foreach $i (0 .. $#cur_err) 1586 | { 1587 | $w1 = $cur_err[$i] ; 1588 | if ($cur_err[$i] eq '*') 1589 | { 1590 | next ; 1591 | } 1592 | $cur_err[$i] = '*' ; 1593 | $con1 = join($sep, @cur_err) ; 1594 | if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con}) 1595 | && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con})) 1596 | { 1597 | delete $err_counts{$err}{$con1} ; 1598 | } 1599 | for ($j = $i+1; $j <=$#cur_err; $j++) 1600 | { 1601 | if ($cur_err[$j] eq '*') 1602 | { 1603 | next ; 1604 | } 1605 | $w2 = $cur_err[$j] ; 1606 | $cur_err[$j] = '*' ; 1607 | $con1 = join($sep, @cur_err) ; 1608 | if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con}) 1609 | && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con})) 1610 | { 1611 | delete $err_counts{$err}{$con1} ; 1612 | } 1613 | $cur_err[$j] = $w2 ; 1614 | } 1615 | $cur_err[$i] = $w1 ; 1616 | } 1617 | } 1618 | } 1619 | 1620 | # Leaving only the topmost local contexts for each error 1621 | 1622 | foreach $err (keys %err_counts) 1623 | { 1624 | $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[$spec_err_loc_con-1] || 0 ; 1625 | 1626 | # of the threshold is too low, take the 2nd highest count 1627 | # (the highest may be the total which is the generic case 1628 | # and not relevant for printing) 1629 | 1630 | if ($thresh < 5) 1631 | { 1632 | $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[1] ; 1633 | } 1634 | 1635 | foreach $loc_con (keys %{$err_counts{$err}}) 1636 | { 1637 | if ($err_counts{$err}{$loc_con} < $thresh) 1638 | { 1639 | delete $err_counts{$err}{$loc_con} ; 1640 | } 1641 | else 1642 | { 1643 | if ($loc_con ne join($sep, ('*', '*', '*', '*', '*', '*'))) 1644 | { 1645 | $loc_con_err_counts{$loc_con}{$err} = $err_counts{$err}{$loc_con} ; 1646 | } 1647 | } 1648 | } 1649 | } 1650 | 1651 | # printing an error summary 1652 | 1653 | # calculating the context field length 1654 | 1655 | $max_word_spec_len= length('word') ; 1656 | $max_con_aft_len = length('word') ; 1657 | $max_con_bef_len = length('word') ; 1658 | $max_con_pos_len = length('CPOS') ; 1659 | 1660 | foreach $err (keys %err_counts) 1661 | { 1662 | foreach $loc_con (sort keys %{$err_counts{$err}}) 1663 | { 1664 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1665 | split(/\Q$sep\E/, $loc_con) ; 1666 | 1667 | $l = uni_len($word) ; 1668 | if ($l > $max_word_spec_len) 1669 | { 1670 | $max_word_spec_len = $l ; 1671 | } 1672 | 1673 | $l = uni_len($con_bef) ; 1674 | if ($l > $max_con_bef_len) 1675 | { 1676 | $max_con_bef_len = $l ; 1677 | } 1678 | 1679 | $l = uni_len($con_aft) ; 1680 | if ($l > $max_con_aft_len) 1681 | { 1682 | $max_con_aft_len = $l ; 1683 | } 1684 | 1685 | if (length($con_pos_aft) > $max_con_pos_len) 1686 | { 1687 | $max_con_pos_len = length($con_pos_aft) ; 1688 | } 1689 | 1690 | if (length($con_pos_bef) > $max_con_pos_len) 1691 | { 1692 | $max_con_pos_len = length($con_pos_bef) ; 1693 | } 1694 | } 1695 | } 1696 | 1697 | $err_counter = 0 ; 1698 | 1699 | foreach $err (sort {$freq_err{$b} <=> $freq_err{$a}} keys %freq_err) 1700 | { 1701 | 1702 | ($head_err, $head_aft_bef, $dep_err) = split(/\Q$sep\E/, $err) ; 1703 | 1704 | $err_counter++ ; 1705 | $err_desc{$err} = sprintf("%2d. ", $err_counter). 1706 | describe_err($head_err, $head_aft_bef, $dep_err) ; 1707 | 1708 | # printf OUT " %-3s %-30s %d\n", $head_err, $dep_err, $freq_err{$err} ; 1709 | printf OUT "\n" ; 1710 | printf OUT " %s : %d times\n", $err_desc{$err}, $freq_err{$err} ; 1711 | 1712 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1713 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1714 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1715 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1716 | 1717 | printf OUT " %-*s | %-*s | %-*s | %s\n", 1718 | $max_con_pos_len+$max_con_bef_len+3, ' Before', 1719 | $max_word_spec_len+$max_pos_len+3, ' Focus', 1720 | $max_con_pos_len+$max_con_aft_len+3, ' After', 1721 | 'Count' ; 1722 | 1723 | printf OUT " %-*s %-*s | %-*s %-*s | %-*s %-*s |\n", 1724 | $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word', 1725 | $max_pos_len, 'CPOS', $max_word_spec_len, 'word', 1726 | $max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ; 1727 | 1728 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1729 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1730 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1731 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1732 | 1733 | foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}} 1734 | keys %{$err_counts{$err}}) 1735 | { 1736 | if ($loc_con eq join($sep, ('*', '*', '*', '*', '*', '*'))) 1737 | { 1738 | next ; 1739 | } 1740 | 1741 | $con1 = $loc_con ; 1742 | $con1 =~ s/\*/ /g ; 1743 | 1744 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1745 | split(/\Q$sep\E/, $con1) ; 1746 | 1747 | printf OUT " %-*s | %-*s | %-*s | %-*s | %-*s | %-*s | %3d\n", 1748 | $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef, 1749 | $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word, 1750 | $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft, 1751 | $err_counts{$err}{$loc_con} ; 1752 | } 1753 | 1754 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1755 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1756 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1757 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1758 | 1759 | } 1760 | 1761 | printf OUT "\n\n" ; 1762 | printf OUT " Local contexts involved in several frequent errors:" ; 1763 | printf OUT "\n %s\n", '=' x 51 ; 1764 | printf OUT "\n\n" ; 1765 | 1766 | foreach $loc_con (sort {scalar keys %{$loc_con_err_counts{$b}} <=> 1767 | scalar keys %{$loc_con_err_counts{$a}}} 1768 | keys %loc_con_err_counts) 1769 | { 1770 | 1771 | if (scalar keys %{$loc_con_err_counts{$loc_con}} == 1) 1772 | { 1773 | next ; 1774 | } 1775 | 1776 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1777 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1778 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1779 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1780 | 1781 | printf OUT " %-*s | %-*s | %-*s \n", 1782 | $max_con_pos_len+$max_con_bef_len+3, ' Before', 1783 | $max_word_spec_len+$max_pos_len+3, ' Focus', 1784 | $max_con_pos_len+$max_con_aft_len+3, ' After' ; 1785 | 1786 | printf OUT " %-*s %-*s | %-*s %-*s | %-*s %-*s \n", 1787 | $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word', 1788 | $max_pos_len, 'CPOS', $max_word_spec_len, 'word', 1789 | $max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ; 1790 | 1791 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1792 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1793 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1794 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1795 | 1796 | $con1 = $loc_con ; 1797 | $con1 =~ s/\*/ /g ; 1798 | 1799 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1800 | split(/\Q$sep\E/, $con1) ; 1801 | 1802 | printf OUT " %-*s | %-*s | %-*s | %-*s | %-*s | %-*s \n", 1803 | $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef, 1804 | $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word, 1805 | $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft ; 1806 | 1807 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1808 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1809 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1810 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1811 | 1812 | foreach $err (sort {$loc_con_err_counts{$loc_con}{$b} <=> 1813 | $loc_con_err_counts{$loc_con}{$a}} 1814 | keys %{$loc_con_err_counts{$loc_con}}) 1815 | { 1816 | printf OUT " %s : %d times\n", $err_desc{$err}, 1817 | $loc_con_err_counts{$loc_con}{$err} ; 1818 | } 1819 | 1820 | printf OUT "\n" ; 1821 | } 1822 | 1823 | close GOLD ; 1824 | close SYS ; 1825 | 1826 | close OUT ; 1827 | -------------------------------------------------------------------------------- /bmstparser/src/utils/evaluation_script/conll17_ud_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # CoNLL 2017 UD Parsing evaluation script. 4 | # 5 | # Compatible with Python 2.7 and 3.2+, can be used either as a module 6 | # or a standalone executable. 7 | # 8 | # Copyright 2017 Institute of Formal and Applied Linguistics (UFAL), 9 | # Faculty of Mathematics and Physics, Charles University, Czech Republic. 10 | # 11 | # Changelog: 12 | # - [02 Jan 2017] Version 0.9: Initial release 13 | # - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation 14 | # - [10 Mar 2017] Version 1.0: Add documentation and test 15 | # Compare HEADs correctly using aligned words 16 | # Allow evaluation with errorneous spaces in forms 17 | # Compare forms in LCS case insensitively 18 | # Detect cycles and multiple root nodes 19 | # Compute AlignedAccuracy 20 | 21 | # Command line usage 22 | # ------------------ 23 | # conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file 24 | # 25 | # - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics 26 | # is printed 27 | # - if -v is given, several metrics are printed (as precision, recall, F1 score, 28 | # and in case the metric is computed on aligned words also accuracy on these): 29 | # - Tokens: how well do the gold tokens match system tokens 30 | # - Sentences: how well do the gold sentences match system sentences 31 | # - Words: how well can the gold words be aligned to system words 32 | # - UPOS: using aligned words, how well does UPOS match 33 | # - XPOS: using aligned words, how well does XPOS match 34 | # - Feats: using aligned words, how well does FEATS match 35 | # - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match 36 | # - Lemmas: using aligned words, how well does LEMMA match 37 | # - UAS: using aligned words, how well does HEAD match 38 | # - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match 39 | # - if weights_file is given (with lines containing deprel-weight pairs), 40 | # one more metric is shown: 41 | # - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight 42 | 43 | # API usage 44 | # --------- 45 | # - load_conllu(file) 46 | # - loads CoNLL-U file from given file object to an internal representation 47 | # - the file object should return str on both Python 2 and Python 3 48 | # - raises UDError exception if the given file cannot be loaded 49 | # - evaluate(gold_ud, system_ud) 50 | # - evaluate the given gold and system CoNLL-U files (loaded with load_conllu) 51 | # - raises UDError if the concatenated tokens of gold and system file do not match 52 | # - returns a dictionary with the metrics described above, each metrics having 53 | # three fields: precision, recall and f1 54 | 55 | # Description of token matching 56 | # ----------------------------- 57 | # In order to match tokens of gold file and system file, we consider the text 58 | # resulting from concatenation of gold tokens and text resulting from 59 | # concatenation of system tokens. These texts should match -- if they do not, 60 | # the evaluation fails. 61 | # 62 | # If the texts do match, every token is represented as a range in this original 63 | # text, and tokens are equal only if their range is the same. 64 | 65 | # Description of word matching 66 | # ---------------------------- 67 | # When matching words of gold file and system file, we first match the tokens. 68 | # The words which are also tokens are matched as tokens, but words in multi-word 69 | # tokens have to be handled differently. 70 | # 71 | # To handle multi-word tokens, we start by finding "multi-word spans". 72 | # Multi-word span is a span in the original text such that 73 | # - it contains at least one multi-word token 74 | # - all multi-word tokens in the span (considering both gold and system ones) 75 | # are completely inside the span (i.e., they do not "stick out") 76 | # - the multi-word span is as small as possible 77 | # 78 | # For every multi-word span, we align the gold and system words completely 79 | # inside this span using LCS on their FORMs. The words not intersecting 80 | # (even partially) any multi-word span are then aligned as tokens. 81 | 82 | 83 | from __future__ import division 84 | from __future__ import print_function 85 | 86 | import argparse 87 | import io 88 | import sys 89 | import unittest 90 | 91 | # CoNLL-U column names 92 | ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10) 93 | 94 | # UD Error is used when raising exceptions in this module 95 | class UDError(Exception): 96 | pass 97 | 98 | # Load given CoNLL-U file into internal representation 99 | def load_conllu(file): 100 | # Internal representation classes 101 | class UDRepresentation: 102 | def __init__(self): 103 | # Characters of all the tokens in the whole file. 104 | # Whitespace between tokens is not included. 105 | self.characters = [] 106 | # List of UDSpan instances with start&end indices into `characters`. 107 | self.tokens = [] 108 | # List of UDWord instances. 109 | self.words = [] 110 | # List of UDSpan instances with start&end indices into `characters`. 111 | self.sentences = [] 112 | class UDSpan: 113 | def __init__(self, start, end): 114 | self.start = start 115 | # Note that self.end marks the first position **after the end** of span, 116 | # so we can use characters[start:end] or range(start, end). 117 | self.end = end 118 | class UDWord: 119 | def __init__(self, span, columns, is_multiword): 120 | # Span of this word (or MWT, see below) within ud_representation.characters. 121 | self.span = span 122 | # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,... 123 | self.columns = columns 124 | # is_multiword==True means that this word is part of a multi-word token. 125 | # In that case, self.span marks the span of the whole multi-word token. 126 | self.is_multiword = is_multiword 127 | # Reference to the UDWord instance representing the HEAD (or None if root). 128 | self.parent = None 129 | # Let's ignore language-specific deprel subtypes. 130 | self.columns[DEPREL] = columns[DEPREL].split(':')[0] 131 | 132 | ud = UDRepresentation() 133 | 134 | # Load the CoNLL-U file 135 | index, sentence_start = 0, None 136 | while True: 137 | line = file.readline() 138 | if not line: 139 | break 140 | line = line.rstrip("\r\n") 141 | 142 | # Handle sentence start boundaries 143 | if sentence_start is None: 144 | # Skip comments 145 | if line.startswith("#"): 146 | continue 147 | # Start a new sentence 148 | ud.sentences.append(UDSpan(index, 0)) 149 | sentence_start = len(ud.words) 150 | if not line: 151 | # Add parent UDWord links and check there are no cycles 152 | def process_word(word): 153 | if word.parent == "remapping": 154 | raise UDError("There is a cycle in a sentence") 155 | if word.parent is None: 156 | head = int(word.columns[HEAD]) 157 | if head > len(ud.words) - sentence_start: 158 | raise UDError("HEAD '{}' points outside of the sentence".format(word.columns[HEAD])) 159 | if head: 160 | parent = ud.words[sentence_start + head - 1] 161 | word.parent = "remapping" 162 | process_word(parent) 163 | word.parent = parent 164 | 165 | for word in ud.words[sentence_start:]: 166 | process_word(word) 167 | 168 | # Check there is a single root node 169 | # if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1: 170 | # print([word.parent for word in ud.words[sentence_start:]]) 171 | # print([word.columns for word in ud.words[sentence_start:]]) 172 | # raise UDError("There are multiple roots in a sentence") 173 | 174 | # End the sentence 175 | ud.sentences[-1].end = index 176 | sentence_start = None 177 | continue 178 | 179 | # Read next token/word 180 | columns = line.split("\t") 181 | if len(columns) != 10: 182 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(line)) 183 | 184 | # Skip empty nodes 185 | if "." in columns[ID]: 186 | continue 187 | 188 | # Delete spaces from FORM so gold.characters == system.characters 189 | # even if one of them tokenizes the space. 190 | columns[FORM] = columns[FORM].replace(" ", "") 191 | if not columns[FORM]: 192 | raise UDError("There is an empty FORM in the CoNLL-U file") 193 | 194 | # Save token 195 | ud.characters.extend(columns[FORM]) 196 | ud.tokens.append(UDSpan(index, index + len(columns[FORM]))) 197 | index += len(columns[FORM]) 198 | 199 | # Handle multi-word tokens to save word(s) 200 | if "-" in columns[ID]: 201 | try: 202 | start, end = map(int, columns[ID].split("-")) 203 | except: 204 | raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID])) 205 | 206 | for _ in range(start, end + 1): 207 | word_line = file.readline().rstrip("\r\n") 208 | word_columns = word_line.split("\t") 209 | if len(word_columns) != 10: 210 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(word_line)) 211 | ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True)) 212 | # Basic tokens/words 213 | else: 214 | try: 215 | word_id = int(columns[ID]) 216 | except: 217 | raise UDError("Cannot parse word ID '{}'".format(columns[ID])) 218 | if word_id != len(ud.words) - sentence_start + 1: 219 | raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1)) 220 | 221 | try: 222 | head_id = int(columns[HEAD]) 223 | except: 224 | raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD])) 225 | if head_id < 0: 226 | raise UDError("HEAD cannot be negative") 227 | 228 | ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False)) 229 | 230 | if sentence_start is not None: 231 | raise UDError("The CoNLL-U file does not end with empty line") 232 | 233 | return ud 234 | 235 | # Evaluate the gold and system treebanks (loaded using load_conllu). 236 | def evaluate(gold_ud, system_ud, deprel_weights=None): 237 | class Score: 238 | def __init__(self, gold_total, system_total, correct, aligned_total=None): 239 | self.precision = correct / system_total if system_total else 0.0 240 | self.recall = correct / gold_total if gold_total else 0.0 241 | self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0 242 | self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total 243 | class AlignmentWord: 244 | def __init__(self, gold_word, system_word): 245 | self.gold_word = gold_word 246 | self.system_word = system_word 247 | self.gold_parent = None 248 | self.system_parent_gold_aligned = None 249 | class Alignment: 250 | def __init__(self, gold_words, system_words): 251 | self.gold_words = gold_words 252 | self.system_words = system_words 253 | self.matched_words = [] 254 | self.matched_words_map = {} 255 | def append_aligned_words(self, gold_word, system_word): 256 | self.matched_words.append(AlignmentWord(gold_word, system_word)) 257 | self.matched_words_map[system_word] = gold_word 258 | def fill_parents(self): 259 | # We represent root parents in both gold and system data by '0'. 260 | # For gold data, we represent non-root parent by corresponding gold word. 261 | # For system data, we represent non-root parent by either gold word aligned 262 | # to parent system nodes, or by None if no gold words is aligned to the parent. 263 | for words in self.matched_words: 264 | words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0 265 | words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \ 266 | if words.system_word.parent is not None else 0 267 | 268 | def lower(text): 269 | if sys.version_info < (3, 0) and isinstance(text, str): 270 | return text.decode("utf-8").lower() 271 | return text.lower() 272 | 273 | def spans_score(gold_spans, system_spans): 274 | correct, gi, si = 0, 0, 0 275 | while gi < len(gold_spans) and si < len(system_spans): 276 | if system_spans[si].start < gold_spans[gi].start: 277 | si += 1 278 | elif gold_spans[gi].start < system_spans[si].start: 279 | gi += 1 280 | else: 281 | correct += gold_spans[gi].end == system_spans[si].end 282 | si += 1 283 | gi += 1 284 | 285 | return Score(len(gold_spans), len(system_spans), correct) 286 | 287 | def alignment_score(alignment, key_fn, weight_fn=lambda w: 1): 288 | gold, system, aligned, correct = 0, 0, 0, 0 289 | 290 | for word in alignment.gold_words: 291 | gold += weight_fn(word) 292 | 293 | for word in alignment.system_words: 294 | system += weight_fn(word) 295 | 296 | for words in alignment.matched_words: 297 | aligned += weight_fn(words.gold_word) 298 | 299 | if key_fn is None: 300 | # Return score for whole aligned words 301 | return Score(gold, system, aligned) 302 | 303 | for words in alignment.matched_words: 304 | if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned): 305 | correct += weight_fn(words.gold_word) 306 | 307 | return Score(gold, system, correct, aligned) 308 | 309 | def beyond_end(words, i, multiword_span_end): 310 | if i >= len(words): 311 | return True 312 | if words[i].is_multiword: 313 | return words[i].span.start >= multiword_span_end 314 | return words[i].span.end > multiword_span_end 315 | 316 | def extend_end(word, multiword_span_end): 317 | if word.is_multiword and word.span.end > multiword_span_end: 318 | return word.span.end 319 | return multiword_span_end 320 | 321 | def find_multiword_span(gold_words, system_words, gi, si): 322 | # We know gold_words[gi].is_multiword or system_words[si].is_multiword. 323 | # Find the start of the multiword span (gs, ss), so the multiword span is minimal. 324 | # Initialize multiword_span_end characters index. 325 | if gold_words[gi].is_multiword: 326 | multiword_span_end = gold_words[gi].span.end 327 | if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start: 328 | si += 1 329 | else: # if system_words[si].is_multiword 330 | multiword_span_end = system_words[si].span.end 331 | if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start: 332 | gi += 1 333 | gs, ss = gi, si 334 | 335 | # Find the end of the multiword span 336 | # (so both gi and si are pointing to the word following the multiword span end). 337 | while not beyond_end(gold_words, gi, multiword_span_end) or \ 338 | not beyond_end(system_words, si, multiword_span_end): 339 | if gi < len(gold_words) and (si >= len(system_words) or 340 | gold_words[gi].span.start <= system_words[si].span.start): 341 | multiword_span_end = extend_end(gold_words[gi], multiword_span_end) 342 | gi += 1 343 | else: 344 | multiword_span_end = extend_end(system_words[si], multiword_span_end) 345 | si += 1 346 | return gs, ss, gi, si 347 | 348 | def compute_lcs(gold_words, system_words, gi, si, gs, ss): 349 | lcs = [[0] * (si - ss) for i in range(gi - gs)] 350 | for g in reversed(range(gi - gs)): 351 | for s in reversed(range(si - ss)): 352 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 353 | lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0) 354 | lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0) 355 | lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0) 356 | return lcs 357 | 358 | def align_words(gold_words, system_words): 359 | alignment = Alignment(gold_words, system_words) 360 | 361 | gi, si = 0, 0 362 | while gi < len(gold_words) and si < len(system_words): 363 | if gold_words[gi].is_multiword or system_words[si].is_multiword: 364 | # A: Multi-word tokens => align via LCS within the whole "multiword span". 365 | gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si) 366 | 367 | if si > ss and gi > gs: 368 | lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss) 369 | 370 | # Store aligned words 371 | s, g = 0, 0 372 | while g < gi - gs and s < si - ss: 373 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 374 | alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s]) 375 | g += 1 376 | s += 1 377 | elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0): 378 | g += 1 379 | else: 380 | s += 1 381 | else: 382 | # B: No multi-word token => align according to spans. 383 | if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end): 384 | alignment.append_aligned_words(gold_words[gi], system_words[si]) 385 | gi += 1 386 | si += 1 387 | elif gold_words[gi].span.start <= system_words[si].span.start: 388 | gi += 1 389 | else: 390 | si += 1 391 | 392 | alignment.fill_parents() 393 | 394 | return alignment 395 | 396 | # Check that underlying character sequences do match 397 | if gold_ud.characters != system_ud.characters: 398 | index = 0 399 | while gold_ud.characters[index] == system_ud.characters[index]: 400 | index += 1 401 | 402 | raise UDError( 403 | "The concatenation of tokens in gold file and in system file differ!\n" + 404 | "First 20 differing characters in gold file: '{}' and system file: '{}'".format( 405 | "".join(gold_ud.characters[index:index + 20]), 406 | "".join(system_ud.characters[index:index + 20]) 407 | ) 408 | ) 409 | 410 | # Align words 411 | alignment = align_words(gold_ud.words, system_ud.words) 412 | 413 | # Compute the F1-scores 414 | result = { 415 | "Tokens": spans_score(gold_ud.tokens, system_ud.tokens), 416 | "Sentences": spans_score(gold_ud.sentences, system_ud.sentences), 417 | "Words": alignment_score(alignment, None), 418 | "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]), 419 | "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]), 420 | "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]), 421 | "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])), 422 | "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]), 423 | "UAS": alignment_score(alignment, lambda w, parent: parent), 424 | "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])), 425 | } 426 | 427 | # Add WeightedLAS if weights are given 428 | if deprel_weights is not None: 429 | def weighted_las(word): 430 | return deprel_weights.get(word.columns[DEPREL], 1.0) 431 | result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las) 432 | 433 | return result 434 | 435 | def load_deprel_weights(weights_file): 436 | if weights_file is None: 437 | return None 438 | 439 | deprel_weights = {} 440 | for line in weights_file: 441 | # Ignore comments and empty lines 442 | if line.startswith("#") or not line.strip(): 443 | continue 444 | 445 | columns = line.rstrip("\r\n").split() 446 | if len(columns) != 2: 447 | raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line)) 448 | 449 | deprel_weights[columns[0]] = float(columns[1]) 450 | 451 | return deprel_weights 452 | 453 | def load_conllu_file(path): 454 | _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {})) 455 | return load_conllu(_file) 456 | 457 | def evaluate_wrapper(args): 458 | # Load CoNLL-U files 459 | gold_ud = load_conllu_file(args.gold_file) 460 | system_ud = load_conllu_file(args.system_file) 461 | 462 | # Load weights if requested 463 | deprel_weights = load_deprel_weights(args.weights) 464 | 465 | return evaluate(gold_ud, system_ud, deprel_weights) 466 | 467 | def main(): 468 | # Parse arguments 469 | parser = argparse.ArgumentParser() 470 | parser.add_argument("gold_file", type=str, 471 | help="Name of the CoNLL-U file with the gold data.") 472 | parser.add_argument("system_file", type=str, 473 | help="Name of the CoNLL-U file with the predicted data.") 474 | parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None, 475 | metavar="deprel_weights_file", 476 | help="Compute WeightedLAS using given weights for Universal Dependency Relations.") 477 | parser.add_argument("--verbose", "-v", default=0, action="count", 478 | help="Print all metrics.") 479 | args = parser.parse_args() 480 | 481 | # Use verbose if weights are supplied 482 | if args.weights is not None and not args.verbose: 483 | args.verbose = 1 484 | 485 | # Evaluate 486 | evaluation = evaluate_wrapper(args) 487 | 488 | # Print the evaluation 489 | if not args.verbose: 490 | print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1)) 491 | else: 492 | metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"] 493 | if args.weights is not None: 494 | metrics.append("WeightedLAS") 495 | 496 | print("Metrics | Precision | Recall | F1 Score | AligndAcc") 497 | print("-----------+-----------+-----------+-----------+-----------") 498 | for metric in metrics: 499 | print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format( 500 | metric, 501 | 100 * evaluation[metric].precision, 502 | 100 * evaluation[metric].recall, 503 | 100 * evaluation[metric].f1, 504 | "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else "" 505 | )) 506 | 507 | if __name__ == "__main__": 508 | main() 509 | 510 | # Tests, which can be executed with `python -m unittest conll17_ud_eval`. 511 | class TestAlignment(unittest.TestCase): 512 | @staticmethod 513 | def _load_words(words): 514 | """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors.""" 515 | lines, num_words = [], 0 516 | for w in words: 517 | parts = w.split(" ") 518 | if len(parts) == 1: 519 | num_words += 1 520 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1))) 521 | else: 522 | lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0])) 523 | for part in parts[1:]: 524 | num_words += 1 525 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1))) 526 | return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"]))) 527 | 528 | def _test_exception(self, gold, system): 529 | self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system)) 530 | 531 | def _test_ok(self, gold, system, correct): 532 | metrics = evaluate(self._load_words(gold), self._load_words(system)) 533 | gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold)) 534 | system_words = sum((max(1, len(word.split(" ")) - 1) for word in system)) 535 | self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1), 536 | (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words))) 537 | 538 | def test_exception(self): 539 | self._test_exception(["a"], ["b"]) 540 | 541 | def test_equal(self): 542 | self._test_ok(["a"], ["a"], 1) 543 | self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3) 544 | 545 | def test_equal_with_multiword(self): 546 | self._test_ok(["abc a b c"], ["a", "b", "c"], 3) 547 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4) 548 | self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4) 549 | self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5) 550 | 551 | def test_alignment(self): 552 | self._test_ok(["abcd"], ["a", "b", "c", "d"], 0) 553 | self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1) 554 | self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2) 555 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2) 556 | self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4) 557 | self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2) 558 | self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1) 559 | -------------------------------------------------------------------------------- /bmstparser/src/utils/evaluation_script/weights.clas: -------------------------------------------------------------------------------- 1 | # Relations used to attach function words to content words 2 | aux 0.1 3 | case 0.1 4 | cc 0.1 5 | clf 0.1 6 | cop 0.1 7 | det 0.1 8 | mark 0.1 9 | 10 | # Punctuation 11 | punct 0 12 | --------------------------------------------------------------------------------