├── .gitignore ├── example.png ├── requirements.txt ├── js ├── cls_modal_div.js ├── cls_chain_popup.js ├── cls_scrolling.js ├── cls_common.js ├── cls_exporter.js ├── cls_colors.js ├── cls_sacr_parser.js ├── cls_link.js ├── cls_text.js ├── cls_chain.js ├── cls_property.js └── cls_data_loader.js ├── autoannotations.py ├── README.md ├── style.css ├── index.html └── ontonotes.py /.gitignore: -------------------------------------------------------------------------------- 1 | private_mistakes/ 2 | model2.tar.gz 3 | out.txt 4 | temp.txt -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gleb-skobinsky/RuCoref-inference/HEAD/example.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | allennlp==2.2.0 2 | allennlp-models==2.2.0 3 | pytorch-transformers==1.1.0 4 | 5 | torch==1.8.1 #also compatible: torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html 6 | 7 | transformers==4.4.2 8 | click==7.0 -------------------------------------------------------------------------------- /js/cls_modal_div.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | class ModalDiv { 25 | 26 | constructor(title, contentDiv, hideCancel) { 27 | this.title = title; 28 | this.div = null; 29 | this.contentDiv = contentDiv; 30 | this.hideCancel = hideCancel; 31 | this.isCancelled = false; 32 | this.hasBeenShown = false; 33 | } 34 | 35 | show() { 36 | if (this.hasBeenShown) { 37 | document.body.appendChild(this.div); 38 | } else { 39 | // creating the elements 40 | var heading = document.createElement("h1"); 41 | heading.style.margin = "10px"; 42 | heading.appendChild(document.createTextNode(this.title)); 43 | this.div = document.createElement("DIV"); 44 | this.div.style['overflow-y'] = "scroll"; 45 | this.div.style.backgroundColor = "white"; 46 | this.div.style.position = "fixed"; 47 | this.div.style.top = "0px"; 48 | this.div.style.left = "0px"; 49 | this.div.style.height = "100%"; 50 | this.div.style.width = "100%"; 51 | var cancelButton = document.createElement("input"); 52 | cancelButton.type = "button"; 53 | cancelButton.value = "Cancel"; 54 | cancelButton.style.position = "absolute"; 55 | cancelButton.style.right = "20px"; 56 | cancelButton.style.top = "20px"; 57 | var that = this; 58 | cancelButton.onclick = function() { 59 | that.isCancelled = true; 60 | that.close(); 61 | } 62 | this.div.appendChild(heading); 63 | if (!this.hideCancel) this.div.appendChild(cancelButton); 64 | this.div.appendChild(this.contentDiv); 65 | document.body.appendChild(this.div); 66 | this.hasBeenShown = true; 67 | // following line must be after appending to body 68 | //this.contentDiv.style.height = (this.div.clientHeight - 69 | // heading.clientHeight)+"px"; 70 | } 71 | } 72 | 73 | close() { 74 | document.body.removeChild(this.div); 75 | } 76 | 77 | } 78 | 79 | -------------------------------------------------------------------------------- /autoannotations.py: -------------------------------------------------------------------------------- 1 | import nltk.tokenize as tk 2 | from nltk.tokenize import wordpunct_tokenize 3 | import subprocess 4 | import jsonlines 5 | import sys 6 | import os 7 | import argparse 8 | import subprocess 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--model", help="Path to the coref model") 12 | args = parser.parse_args() 13 | MODEL = args.model 14 | TEMP_FILE = r'temp.txt' 15 | OUT_FILE = r'out.txt' 16 | 17 | text = input("Введите текст: ") 18 | tempstring = '' 19 | sentences = tk.sent_tokenize(text, language='russian') 20 | 21 | templist = [] 22 | for sentence in sentences: 23 | inner_sent = wordpunct_tokenize(sentence) 24 | templist.append(inner_sent) 25 | 26 | token_index = 0 27 | for tokenized_sent in templist: 28 | tempstring += '\n' 29 | token_index = 0 30 | for token in tokenized_sent: 31 | tempstring += 'book0\t0\t' + str(token_index) + '\t' + token + '\t' + '_\t_\t_\t_\t_\t_\t-\n' 32 | token_index += 1 33 | tempstring = '#begin document (book0); part 0' + tempstring + '\n' + '#end document' 34 | 35 | with open(TEMP_FILE, 'w', encoding='utf-8') as tempfile: 36 | tempfile.write(tempstring) 37 | tempfile.close() 38 | 39 | print('Loading and building the model...') 40 | p = subprocess.run(["allennlp", "evaluate", f"{MODEL}", f"{TEMP_FILE}", "--predictions-output-file", f"{OUT_FILE}"], capture_output=True) 41 | 42 | def get_mention(begin, text_list, all_cluster_tokens): 43 | for id, cluster in enumerate(all_cluster_tokens[0]): 44 | for i, token_set in enumerate(cluster): 45 | if begin == token_set[0]: 46 | mention_string = '' 47 | indexer = id 48 | if token_set[0]==token_set[-1]: #this means that the mention contains only one word 49 | mention_string += text_list[token_set[0]] + ' ' 50 | else: 51 | token_set = [i for i in range(token_set[0], token_set[-1]+1)] 52 | for offset in token_set: 53 | mention_string += text_list[offset] + ' ' 54 | modifier = '{' + cluster_uids[indexer] + ': ' + mention_string + '}' 55 | return modifier 56 | 57 | cluster_offsets = [] 58 | with jsonlines.open(OUT_FILE) as predicted_file: 59 | for line in predicted_file.iter(): 60 | cluster_offsets.append(line['clusters'][0]) 61 | 62 | all_indices = [] 63 | for cluster_unit in cluster_offsets: 64 | for cluster in cluster_unit: 65 | for mention in cluster: 66 | for offset in range(mention[0], mention[1]+1): 67 | all_indices.append(offset) 68 | 69 | number = len(cluster_offsets[0]) 70 | cluster_uids = [f'M{i+1}' for i in range(number+1)] 71 | 72 | beginnings = [] 73 | for cluster in cluster_offsets[0]: 74 | local_beginnings = [i[0] for i in cluster] 75 | beginnings += local_beginnings 76 | 77 | text_list = wordpunct_tokenize(text) 78 | for idx, token in enumerate(text_list): 79 | if idx not in all_indices: 80 | print(token, end=' ') 81 | else: 82 | if idx in beginnings: 83 | mention = get_mention(idx, text_list, cluster_offsets) 84 | print(mention, end = ' ') 85 | else: 86 | pass 87 | 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Очень простой инференс разрешения кореферентности для русского языка 2 | 3 | ## 1. Установите зависимости 4 | 5 | Создайте виртуальнукю среду python 3.7, активируйте ее и установите зависимости (`pip install -r requirements.txt`).
6 | Модель обучена и инференсится при помощи пакета `allennlp==2.2.0`, поэтому проверьте его нормальное функицонирование: `allennlp evaluate --help`.
7 | 8 | ## 2. Замените датаридер 9 | 10 | Чтобы модель читала вводимые данные правильно, замените файл в dist-packages виртуальной среды с allennlp_models/common/ontonotes.py на ontonotes.py, который лежит в корне этого проекта. 11 | 12 | ## 3. Скачайте и запустите модель 13 | 14 | Скачайте веса модели отсюда: https://disk.yandex.ru/d/0TKZcXkaBCbq3Q 15 | 16 | И запустите инференс командой: `python autoannotations.py --model [ПУТЬ К МОДЕЛИ]`.
17 | Например: `python autoannotations.py --model model2.tar.gz`.
18 | Команда предложит ввести текст и выделит в нем кореферентные цепочки, например:
19 | `Джобс и Возняк сумели разработать первый по-настоящему персональный компьютер Apple I, который на тот момент выглядел как деревянная шкатулка, и поставлялся без монитора. Частота процессора компьютера достигала 1 МГц, а размер оперативной памяти составлял 4 килобайта. Apple I сразу продался партией в 50 машин и дал повод для разработки продолжения. В апреле 1977 года Джобс и Возняк провели официальную презентацию их следующего компьютера Apple II. В 70-х он стал самым массовым и удачно продаваемым персональным компьютером, с более чем 5 миллионами проданных копий по всему миру. На тот момент компьютер предлагал покупателям интегрированную клавиатуру, цветную графику, звук, пластиковый корпус, и два слота для дискет.`
20 | Вывод модели:
21 | `{M2: Джобс } и {M3: Возняк } сумели разработать {M1: первый по - настоящему персональный компьютер Apple I } , который на тот момент выглядел как деревянная шкатулка , и поставлялся без монитора . Частота процессора {M1: компьютера } достигала 1 МГц , а размер оперативной памяти составлял 4 килобайта . {M1: Apple I } сразу продался партией в 50 машин и дал повод для разработки продолжения . В апреле 1977 года {M2: Джобс } и {M3: Возняк } провели официальную презентацию {M4: их следующего компьютера Apple II } . В 70 - х {M4: он } стал самым массовым и удачно продаваемым персональным компьютером , с более чем 5 миллионами проданных копий по всему миру . На тот момент {M4: компьютер } предлагал покупателям интегрированную клавиатуру , цветную графику , звук , пластиковый корпус , и два слота для дискет .`

22 | Комментарии: При первом запуске transformers скачает и распакует RuBERT от Deeppavlov в кэш. Это может занять некоторое время, но привычный прогресс-бар скачивания не отобразится. Обратите внимание, что скрипт выше тестировался на Windows, на Linux что-то может пойти не так. Чем длиннее документ, тем больше размерность тензора, которую модель попытается посчитать, поэтому лучше не загружать длинные документы, если объем ОЗУ на устройсте меньше 10 ГБ. 23 | 24 | ## 4. Визуализируйте результат 25 | 26 | Цепочки можно визулизировать и редактировать, открыв index.html в браузере и вставив полученный выше текст с разметкой в текстовое поле. Не забудьте нажать на кнопку "распарсить документ". Получится вот так: 27 | 28 | ![build the coreference chains](example.png) 29 | 30 | ## Благодарности 31 | 32 | Красивый редактор из последнего пункта - от Bruno Oberle (https://github.com/boberle/sacr), слегка измененный и переведенный на русский. 33 | 34 | ## TODO: 35 | 36 | Планирую добавить поддержку вложенных упоминаний в разметке командной строки. 37 | 38 | 39 | -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | 25 | 26 | /* NOTE: the 'and' selector in CSS is .class1.class2 (WITHOUT space) */ 27 | 28 | /* div text */ 29 | 30 | body { 31 | /*background: black;*/ 32 | } 33 | 34 | div#divText { 35 | /*padding-bottom: 800px;*/ 36 | } 37 | 38 | div#divText p { 39 | font-size: 20pt; 40 | /*color: antiquewhite;*/ 41 | } 42 | 43 | div#divText p.paragraph { 44 | line-height: 2.75em; 45 | padding: 30px; 46 | font-family: Gentium; 47 | } 48 | 49 | div#divText span.parNumber { 50 | /* */ 51 | } 52 | 53 | div#divText p.comment { 54 | font-family: mono; 55 | font-size: 15pt; 56 | background-color: antiquewhite; 57 | padding: 10px; 58 | margin: 0px; 59 | } 60 | 61 | div#divText p.heading.level1 { 62 | font-weight: bold; 63 | font-size: 24pt; 64 | margin-left: 0pt; 65 | } 66 | 67 | div#divText p.heading.level2 { 68 | font-weight: bold; 69 | font-size: 22pt; 70 | margin-left: 20pt; 71 | } 72 | 73 | div#divText p.heading.level3 { 74 | font-weight: bold; 75 | font-size: 20pt; 76 | margin-left: 40pt; 77 | } 78 | 79 | div#divText p.heading.level4 { 80 | font-weight: bold; 81 | font-size: 18pt; 82 | margin-left: 40pt; 83 | } 84 | 85 | div#divText p.heading.level5 { 86 | font-weight: bold; 87 | font-size: 16pt; 88 | margin-left: 40pt; 89 | } 90 | 91 | div#divText p.heading.level6 { 92 | font-weight: bold; 93 | font-size: 14pt; 94 | margin-left: 40pt; 95 | } 96 | 97 | 98 | 99 | /* link */ 100 | 101 | div#divText span.link { 102 | border: solid 2px black; 103 | } 104 | 105 | /* note: must be `span.link.hidden' and not only `span.hidden' for the 106 | * `padding' to work (but `span.hidden' is sufficient for `font-size' for 107 | * example...). 108 | */ 109 | div#divText span.link.hidden { 110 | padding-left: 7px; 111 | } 112 | 113 | /* don't forget the `>', otherwise all nested links of a hidden link will be 114 | * hidden! 115 | */ 116 | div#divText span.hidden>span.metadata { 117 | display: none; 118 | } 119 | 120 | div#divText span.link { padding: 11px; padding-left: 0px; } 121 | div#divText span>span.link { padding: 8px; padding-left: 0px; } 122 | div#divText span>span>span.link { padding: 5px; padding-left: 0px; } 123 | div#divText span>span>span>span.link { padding: 0px; padding-left: 0px; } 124 | div#divText span.link>span.metadata { padding: 11px; padding-bottom: 12px; padding-left: 0px; } 125 | div#divText span>span.link>span.metadata { padding: 8px; padding-bottom: 9px; padding-left: 0px; } 126 | div#divText span>span>span.link>span.metadata { padding: 5px; padding-bottom: 6px; padding-left: 0px; } 127 | div#divText span>span>span>span.link>span.metadata { padding: 0px; padding-bottom: 1px; padding-left: 0px; } 128 | div#divText span.metadata { 129 | margin-right: 3px; 130 | padding-right: 2px; 131 | font-family: mono; 132 | } 133 | 134 | div#divText span.link.selected { 135 | border-style: dotted; 136 | } 137 | 138 | /* referring expressions (token) */ 139 | 140 | div#divText a.token { 141 | text-decoration: none; 142 | color: inherit; 143 | } 144 | 145 | div#divText a.token.selected { 146 | text-decoration: underline; 147 | } 148 | 149 | div#divLinkPropertyAnchor { 150 | background-color: #FEF0C9; 151 | display: none; 152 | position: fixed; 153 | bottom: 0px; 154 | left: 0px; 155 | /*height: 30%;*/ 156 | width: 100%; 157 | padding: 10px; 158 | margin: 0px; 159 | } 160 | 161 | /* divWhiteSpaceAtTheEnd */ 162 | 163 | div#divWhiteSpaceAtTheEnd { 164 | /*background-color: green;*/ 165 | } 166 | 167 | /* misc */ 168 | 169 | a { 170 | cursor: pointer; 171 | } 172 | 173 | /* chain popup */ 174 | 175 | div#divChainPopup p { 176 | font-family: Gentium; 177 | font-size: 15pt; 178 | padding: 0px; 179 | margin: 0px; 180 | } 181 | 182 | div.chainPopupChainDiv { 183 | padding: 10px; 184 | margin: 10px; 185 | /*border: 1px solid black;*/ 186 | } 187 | 188 | p.chainPopupChainName { 189 | font-weight: bold; 190 | } 191 | 192 | p.chainPopupChainName.selected a { 193 | border: 2px black solid; 194 | } 195 | 196 | div.chainPopupLinkDiv { 197 | padding: 0px; 198 | margin: 0px; 199 | margin-left: 30px; 200 | /*border: 1px solid black;*/ 201 | } 202 | 203 | div.chainPopupLinkDiv a.selected { 204 | /*text-decoration: underline;*/ 205 | background: black; 206 | color: white; 207 | } 208 | 209 | -------------------------------------------------------------------------------- /js/cls_chain_popup.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | /* Issues with stylesheets. You can specify a 25 | * thing with JS, but you need a absolute path for href. 26 | * var link = this.win.document.createElement("LINK"); 27 | * link.href = "file:///foo/bar/style.css"; 28 | * link.type = "text/css"; 29 | * link.rel = "stylesheet"; 30 | * this.win.document.head.appendChild(link) 31 | * Otherwise, you need to use the document.stylesheets objects, which 32 | * list all the style sheets. But by default, there is none. To 33 | * create one, use: 34 | * var styleElement = win.document.createElement('style'); 35 | * win.document.head.appendChild(styleElement); 36 | * Now, you have a stylesheet that you can get: 37 | * var styleSheet = win.document.styleSheets[0]; 38 | * or 39 | * var styleSheet = styleElement.styleSheet 40 | * Then use can set the text: 41 | * styleSheet.cssText = "you text" 42 | * or 43 | * stylesheet.insertRule('p.linkParagraph { padding-left: 15px; }', 44 | * index); 45 | * Note that in the last example, you need to specify an index (the 46 | * last one if you want the rule to be inserted at the end of the 47 | * stylesheet: use something with nextIndex++). 48 | */ 49 | 50 | 51 | 52 | class ChainPopup { 53 | 54 | constructor(chainDiv) { 55 | this.visible = false; 56 | this.chainDiv = chainDiv; 57 | // the window 58 | this.win = undefined; 59 | this.winWidth = '350'; 60 | this.winHeight = '400'; 61 | this.winTop = '100'; 62 | this.winLeft = '100'; 63 | // the elements 64 | this.h1 = document.createElement('H1'); 65 | this.h1.appendChild(document.createTextNode('Chains and Links')); 66 | } 67 | 68 | show() { 69 | this.visible = true; 70 | gText.chainColl.sortChainsAndUpdatePopupDiv(); 71 | // if the window is already open, just focus... 72 | if (this.win && !this.win.closed) { 73 | this.win.focus(); 74 | } else { 75 | this.win = window.open("", "_blank", "status=0,width=" 76 | +this.winWidth+",height="+this.winHeight+",top="+this.winTop 77 | +",left="+this.winLeft 78 | +",toolbar=0,menubar=0,resizable=1,scrollbars=1"); 79 | if (!this.win) { 80 | alert("I can't create the popup!"); 81 | return; 82 | } 83 | var that = this; 84 | this.win.onbeforeunload = function(e) { 85 | that.winWidth = that.win.outerWidth; 86 | that.winHeight = that.win.outerHeight; 87 | that.winLeft = that.win.screenX; 88 | that.winTop = that.win.screenY; 89 | that.visible = false; 90 | // next line would be necessary for Chromium, but not FF, 91 | // if we would keep the elements in the div, like in the 92 | // Display Options box. But, because here we remove all 93 | // the elements of the div and create new ones, it is not 94 | // necessary. 95 | //that.win.document.body.removeChild(that.win.document.body.childNodes[1]); 96 | return null; 97 | } 98 | // style sheet 99 | var link = this.win.document.createElement("LINK"); 100 | link.href = document.styleSheets[0].href; 101 | link.type = "text/css"; 102 | link.rel = "stylesheet"; 103 | this.win.document.head.appendChild(link) 104 | // get elements 105 | this.win.document.title = "Chains and Links"; 106 | this.win.document.body.appendChild(this.h1); 107 | this.win.document.body.appendChild(this.chainDiv); 108 | // shortcuts 109 | this.win.document.body.addEventListener('keydown', gKeyDownHandler); 110 | /*this.win.document.body.addEventListener('keydown', function (e) { 111 | var tagName = document.activeElement.tagName; 112 | //console.log(tagName); 113 | //console.log(e.keyCode); 114 | if (tagName != 'BODY') { 115 | return; 116 | } 117 | if (e.keyCode == 65) { // test 118 | //alert('foo'); 119 | } else if (e.keyCode == 69) { // e = expand/collapse all 120 | if (e.shiftKey) { 121 | gText.chainColl.collapseAllChainsInPopup(); 122 | } else { 123 | gText.chainColl.expandAllChainsInPopup(); 124 | } 125 | } else if (e.keyCode == 72) { // h = help 126 | alert("c: collapse all\ne: expand all"); 127 | } 128 | });*/ 129 | } // else 130 | } 131 | 132 | }; 133 | 134 | -------------------------------------------------------------------------------- /js/cls_scrolling.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | 25 | class Scrolling { 26 | 27 | /* 28 | * To scroll to a DOM object, use must use the function window.scroll(), 29 | * which is described here: 30 | * https://developer.mozilla.org/en-US/docs/Web/API/Window/scroll: 31 | * window.scroll(x-coord, y-coord) 32 | * example: 33 | * 35 | * Note that window.scrollTo() is the same method. 36 | * 37 | * But to find the number of pixels you want to scroll, you need to find 38 | * the vertical position of the object. For, that, use the function below, 39 | * found on 40 | * http://stackoverflow.com/questions/5007530/how-do-i-scroll-to-an-element-using-javascript#5007606 41 | * static findPosition(obj) { 42 | * var curTop = 0; 43 | * if (obj.offsetParent) { 44 | * do { 45 | * curTop += obj.offsetTop; 46 | * } while (obj = obj.offsetParent); 47 | * } 48 | * return curTop; 49 | * } 50 | * 51 | * So you just have to call: window.scroll(0, findPosition(yourObject)) 52 | * 53 | /* return 54 | */ 55 | 56 | /* 57 | * Returns the vertical position of an object, in pixels. 58 | */ 59 | static findPosition(obj) { 60 | var curTop = 0; 61 | if (obj.offsetParent) { 62 | do { 63 | curTop += obj.offsetTop; 64 | } while (obj = obj.offsetParent); 65 | } 66 | return curTop; 67 | } 68 | 69 | 70 | /* 71 | * Scroll the window to show the object (a DOM object). 72 | * @param evenIfNotNeeded: scroll even if the object is already visible. 73 | */ 74 | static scrollTo(obj, evenIfNotNeeded, win) { 75 | // scrollOffset is the minimum amount of space (in pixels) to leave at 76 | // the top of the window, so the obj is not directly at the margin. 77 | if (!win) { 78 | win = window; 79 | } 80 | var scrollOffset = win.innerHeight / 5; 81 | var pos = Scrolling.findPosition(obj) - scrollOffset; 82 | if (pos < 0) { 83 | pos = 0; 84 | } 85 | if (evenIfNotNeeded || !Scrolling.isVisible(obj, win)) { 86 | win.scroll(0, pos); 87 | } 88 | } 89 | 90 | /* 91 | * Returns the height of the given object. 92 | */ 93 | static findHeight(obj) { 94 | var styleObject = getComputedStyle(obj); 95 | if (styleObject && styleObject.height) { 96 | var value = styleObject.height.match(/\d+/); // ex.: 40px 97 | if (value) { 98 | return parseInt(value[0]); // match returns an array 99 | } 100 | } 101 | return 0; 102 | } 103 | 104 | /* 105 | * Returns true if given position is visible on the screen. 106 | */ 107 | static isVisible(obj, win) { 108 | if (!win) { 109 | win = window; 110 | } 111 | // the object is above the visible part of the screen 112 | var pos = Scrolling.findPosition(obj); 113 | if (pos < win.scrollY) { 114 | return false; 115 | } 116 | // the object (meaning: its bottom) is below the visible part of the 117 | // screen 118 | var height = Scrolling.findHeight(obj); 119 | // note that spans don't seem to have any computable height. We use one 120 | // sixth of the window as a rule of thumb 121 | if (!height) { 122 | height = win.innerHeight / 6; 123 | } 124 | var bottomPos = pos + height; 125 | var bottomLimit = win.scrollY + win.innerHeight 126 | - Scrolling.findHeight(gDivLinkPropertyAnchor); 127 | if (bottomPos > bottomLimit) { 128 | return false; 129 | } 130 | return true; 131 | } 132 | 133 | 134 | /* 135 | // need to scroll to the link only if it is not in the top half 136 | // part of the windows 137 | function isScrollNeeded(pos) { 138 | var minAllowedPos = window.scrollY; 139 | var maxAllowedPos; 140 | var divControlsHeight; 141 | if (divControlsHeight = gCommonFunctions.getDivControlsHeight()) { 142 | //console.log("using control panel height, which is: " + divControlsHeight); 143 | maxAllowedPos = minAllowedPos + window.innerHeight - divControlsHeight; 144 | } else { 145 | maxAllowedPos = minAllowedPos + (window.innerHeight/2); 146 | } 147 | //console.log("windows inner height is: "+window.innerHeight); 148 | //console.log("element position is: "+pos); 149 | //console.log("min allowed pos: "+ minAllowedPos); 150 | //console.log("max allowed pos: "+ maxAllowedPos); 151 | var estimatedLineHeight = 250; 152 | return (!(minAllowedPos <= pos && (pos + estimatedLineHeight) <= maxAllowedPos)); 153 | } 154 | */ 155 | 156 | 157 | 158 | 159 | } 160 | -------------------------------------------------------------------------------- /js/cls_common.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | var diacriticsMap = new Array(); 25 | diacriticsMap['à'] = 'a'; 26 | diacriticsMap['é'] = 'e'; 27 | diacriticsMap['ß'] = 'ss'; 28 | diacriticsMap['à'] = 'a'; 29 | diacriticsMap['á'] = 'a'; 30 | diacriticsMap['â'] = 'a'; 31 | diacriticsMap['ã'] = 'a'; 32 | diacriticsMap['ä'] = 'a'; 33 | diacriticsMap['å'] = 'a'; 34 | diacriticsMap['æ'] = 'a'; 35 | diacriticsMap['ç'] = 'c'; 36 | diacriticsMap['è'] = 'e'; 37 | diacriticsMap['é'] = 'e'; 38 | diacriticsMap['ê'] = 'e'; 39 | diacriticsMap['ë'] = 'e'; 40 | diacriticsMap['ì'] = 'i'; 41 | diacriticsMap['í'] = 'i'; 42 | diacriticsMap['î'] = 'i'; 43 | diacriticsMap['ï'] = 'i'; 44 | diacriticsMap['ð'] = 'd'; 45 | diacriticsMap['ñ'] = 'n'; 46 | diacriticsMap['ò'] = 'o'; 47 | diacriticsMap['ó'] = 'o'; 48 | diacriticsMap['ô'] = 'o'; 49 | diacriticsMap['õ'] = 'o'; 50 | diacriticsMap['ö'] = 'o'; 51 | diacriticsMap['ø'] = 'o'; 52 | diacriticsMap['ù'] = 'u'; 53 | diacriticsMap['ú'] = 'u'; 54 | diacriticsMap['û'] = 'u'; 55 | diacriticsMap['ü'] = 'u'; 56 | diacriticsMap['ý'] = 'y'; 57 | diacriticsMap['þ'] = 'f'; 58 | diacriticsMap['ÿ'] = 'y'; 59 | diacriticsMap['œ'] = 'oe'; 60 | 61 | 62 | class CommonFunctions { 63 | 64 | static removeDiacritics(text) { 65 | // some ideas:: http://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript 66 | text = text.replace(/[^-A-Za-z0-9]/g, function(a){ return a in diacriticsMap ? diacriticsMap[a] : ''}); 67 | return text.replace(/-./g, function(a){ return a.substring(1).toUpperCase(); }); 68 | } 69 | 70 | static offerNameForChain(words) { 71 | var result = ''; 72 | if (words.length == 1) { 73 | result = CommonFunctions.removeDiacritics(words[0]); 74 | } else { 75 | var c = 0; 76 | for (var i=0; i0 && result == '') { 82 | result += CommonFunctions.removeDiacritics(text); 83 | c++; 84 | } else if (i>0) { 85 | text = CommonFunctions.removeDiacritics(text); 86 | result += text.substring(0, 1).toUpperCase()+text.substring(1); 87 | c++; 88 | } 89 | } // for 90 | } // if 91 | result = result.substring(0, 1).toUpperCase()+result.substring(1); 92 | return result; 93 | } 94 | 95 | /* 96 | * @param chainColl: used to check the name validity 97 | * @param askUser: boolean, if false, get a default name (M1, etc.) 98 | * @param defaultName: if evaluates to false, don't propose a name; if a 99 | * string, propose that string (e.g. the current name of the chain); if an 100 | * array (of strings), propose a default name based on the strings 101 | */ 102 | static getChainName(chainColl, askUser, defaultName) { 103 | var name = undefined; 104 | if (askUser) { 105 | if (!defaultName) { 106 | defaultName = ""; 107 | } else if (typeof(defaultName) == "string") { 108 | // nothing 109 | } else { 110 | defaultName = CommonFunctions.offerNameForChain(defaultName); 111 | } 112 | while (!name) { 113 | name = prompt("Enter a name:", defaultName); 114 | if (name == null) { // cancel 115 | return undefined; 116 | } 117 | if (!name || !chainColl.checkName(name)) { 118 | alert("Bad name!"); 119 | name = undefined; 120 | } 121 | } 122 | } else { 123 | var count = 0; 124 | do { 125 | count++; 126 | name = "M" + count.toString(); 127 | } while (!chainColl.checkName(name)); 128 | } 129 | return name; 130 | } 131 | 132 | 133 | /* 134 | * @return: {startIndex:INT, values:{opt1: val1, etc.}} 135 | */ 136 | static parseValues(text, startIndex) { 137 | var result = {}; 138 | if (text.indexOf(':', startIndex) != startIndex) { 139 | return {startIndex:startIndex, dic:result}; 140 | } 141 | var textLen = text.length; 142 | startIndex++; 143 | var tmp; 144 | while(startIndex < textLen) { 145 | if (((tmp = text.substring(startIndex).match(/^(\w+)=(\w+)/)) != null) 146 | || ((tmp = text.substring(startIndex).match(/^(\w+)="((?:\\"|[^"])*)"/)) != null)) { 147 | result[tmp[1]] = tmp[2]; 148 | startIndex += tmp[0].length; 149 | if (text.substring(startIndex).match(/^[,;]/) != null) { 150 | startIndex++; 151 | } else { 152 | break; 153 | } 154 | } else { 155 | break; 156 | } 157 | } // while 158 | return {startIndex:startIndex, dic:result}; 159 | }; 160 | 161 | }; 162 | 163 | -------------------------------------------------------------------------------- /js/cls_exporter.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | class Exporter { 25 | 26 | /* FUNCTIONS TO EXPORT TO A FILE */ 27 | 28 | /* see: 29 | * - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/btoa 30 | * - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_.22Unicode_Problem.22 31 | */ 32 | static utf8_to_b64(str) { 33 | return window.btoa(unescape(encodeURIComponent(str))); 34 | } 35 | 36 | /* see: 37 | * source: http://stackoverflow.com/questions/5552540/save-as-text-file-javascript 38 | */ 39 | static writeToFile(text, filename) { 40 | var anchor = document.createElement('a'); 41 | //a.href = 'data:'+mimetype+';charset=UTF-8;base64,'+btoa(content); 42 | //data uri scheme 43 | anchor.href = 'data:text/plain;charset=UTF-8;base64,'+ 44 | Exporter.utf8_to_b64(text); 45 | anchor.innerHTML = 'download'; 46 | anchor.download = filename; 47 | document.body.appendChild(anchor); // this is necessary (not in the 48 | // source) 49 | anchor.click(); 50 | document.body.removeChild(anchor); 51 | } 52 | 53 | 54 | /* FUNCTIONS FOR FILENAME */ 55 | 56 | static datePadding(text) { 57 | if ((text+"").length == 1) { 58 | return "0"+text; 59 | } else { 60 | return text+""; 61 | } 62 | } 63 | 64 | static getDateString() { 65 | var d = new Date(); 66 | var str = d.getFullYear() 67 | + Exporter.datePadding(d.getMonth()+1) 68 | + Exporter.datePadding(d.getDate()) 69 | + "-" 70 | + Exporter.datePadding(d.getHours()) 71 | + Exporter.datePadding(d.getMinutes()) 72 | + Exporter.datePadding(d.getSeconds()); 73 | //console.log(str); 74 | return str; 75 | } 76 | 77 | static computeNewFilename(originalFilename) { 78 | var dateString = Exporter.getDateString(); 79 | if (!originalFilename) { 80 | originalFilename = "default"; 81 | } 82 | var newFilename = originalFilename; 83 | newFilename = newFilename.replace(/\d{8}-\d{6}/, dateString); 84 | if (newFilename == originalFilename) { 85 | newFilename = originalFilename; 86 | newFilename = newFilename.replace(/(\.[^.]+)$/, "_"+dateString+"$1"); 87 | if (newFilename == originalFilename) { 88 | newFilename = originalFilename+"_"+dateString; 89 | } 90 | } 91 | return newFilename; 92 | } 93 | 94 | /* FUNCTIONS TO EXPORT TO TEXT */ 95 | 96 | /* the general structure is as follows: 97 | * - paragraph -> series of texts and spans for links. Each link span is 98 | * as follows: 99 | * - span (CLASS_LINK): 100 | * - span (with the name) (CLASS_METADATA) 101 | * - anchor (with the name) 102 | */ 103 | convertElementToText (element, complete) { 104 | var result = ''; 105 | for (var e of element.childNodes) { 106 | if (e.nodeType == 3) { // text 107 | result += e.textContent; 108 | } else if (e.nodeType == 1) { // DOM element 109 | if (e.tagName == 'SPAN') { 110 | if (e.classList.contains(CLASS_PAR_NUMBER)) { 111 | // nothing 112 | } else if (e.classList.contains(CLASS_METADATA)) { 113 | var link = gText.chainColl.getLinkBySpan(e.parentElement); 114 | if (!link) { 115 | alert("One of the link span is not in the dictionary"); 116 | } else { 117 | result += link.name; 118 | if (!gText.schema.isEmpty) { 119 | if (complete) { 120 | result += ':' + link.properties.getString(true, 121 | link.text); 122 | } else { 123 | result += ':' + link.properties.getString(); 124 | } 125 | } 126 | // result += ' '; 127 | } 128 | } else if (e.classList.contains(CLASS_LINK)) { 129 | result += '{' + this.convertElementToText(e, complete) + '}'; 130 | } else { 131 | alert("Found a 'span' which is neither a link nor a metadata (className: '"+elements[i].className+"')..."); 132 | } 133 | } else if (e.tagName == 'A') { 134 | result += e.textContent; 135 | } else if (e.tagName == 'BR') { 136 | //alert('before:\"'+result+"\""); 137 | // result += "\\n\n"; 138 | //alert('after:\"'+result+"\""); 139 | } else { 140 | alert("Found a <"+e.tagName+">..."); 141 | } 142 | } else { 143 | alert("Found a element of node type: "+e.nodeType+"..."); 144 | } 145 | } 146 | return result; 147 | } 148 | 149 | convertDomToString(complete) { 150 | var result = ''; 151 | for (var par of gText.div.childNodes) { 152 | if (par.tagName == 'P') { 153 | if (par.classList.contains(CLASS_PARAGRAPH)) { 154 | result += this.convertElementToText(par, complete) 155 | // + "\n\n"; 156 | } else if (par.classList.contains(CLASS_COMMENT)) { 157 | result += par.textContent // + "\n\n"; 158 | } else { 159 | alert("Found a 'p' which is neither a text nor an info: `"+par.textContent+"'."); 160 | } 161 | } else { 162 | alert("A child of the div 'text' is not a paragraph (node type: "+pars[i].nodeType+")."); 163 | } 164 | } 165 | return result; 166 | } 167 | 168 | getColors() { 169 | var result = ''; 170 | for (var chain of gText.chainColl.chains) { 171 | if (chain.isTrueChain) { 172 | result += "#COLOR:" + chain.name + "=" + chain.color.string + "\n"; 173 | } 174 | } 175 | return result; 176 | } 177 | 178 | getTokenizationType() { 179 | return "#TOKENIZATION-TYPE:"+gText.tokenizationType.toString()+"\n"; 180 | } 181 | 182 | 183 | computeText(complete) { 184 | var result = ""; 185 | result += this.convertDomToString(complete); 186 | // result += "\n\n" + this.getColors() + "\n"; 187 | // result += this.getTokenizationType() + "\n"; 188 | var resultArray = result.split(" "); 189 | var resultArray = resultArray.filter(element => element != ""); 190 | const genRanHex = size => [...Array(size)].map(() => Math.floor(Math.random() * 16).toString(16)).join(''); 191 | const bookid = 'book_' + genRanHex(6) 192 | var resulting = ""; 193 | resulting += '#begin document (' + bookid + '); part 0' 194 | var token_index = 1; 195 | for (var e of resultArray) { 196 | if (e.includes("{") && e.includes("}")) { 197 | var currentcluster = e.substring(2,3) 198 | token_index += 1 199 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(3,e.length - 1) + '\t_\t_\t_\t_\t_\t_\t' + '(' + currentcluster + ')'; 200 | } 201 | else if (e.includes("{")) { // text 202 | var currentcluster = e.substring(2,3) 203 | token_index += 1 204 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(3,) + '\t_\t_\t_\t_\t_\t_\t' + '(' + currentcluster; 205 | } 206 | else if (e.includes("}")) { // text 207 | token_index += 1 208 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(0,e.length - 1) + '\t_\t_\t_\t_\t_\t_\t' + currentcluster + ')'; 209 | } else { 210 | token_index += 1 211 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e + '\t_\t_\t_\t_\t_\t_\t' + '-'; 212 | } 213 | } 214 | resulting += '\n\n#end document' 215 | return resulting; 216 | } 217 | 218 | computeSchema() { 219 | return gText.raw_schema; 220 | } 221 | 222 | exportText(complete) { 223 | var filename = Exporter.computeNewFilename(gText.textFilename); 224 | var text = this.computeText(complete); 225 | Exporter.writeToFile(text, filename); 226 | } 227 | 228 | exportSchema() { 229 | var filename = Exporter.computeNewFilename(gText.textFilename+"-schema"); 230 | var text = this.computeSchema(); 231 | Exporter.writeToFile(text, filename); 232 | } 233 | 234 | showSchema() { 235 | alert(this.computeSchema()); 236 | } 237 | 238 | } 239 | 240 | -------------------------------------------------------------------------------- /js/cls_colors.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | class ColorBuilder { 25 | 26 | static computeNbOfColors(hueStep, saturationStep, lightnessStep) { 27 | var hue = Math.ceil(360/hueStep); 28 | var saturation = Math.ceil(100/saturationStep); 29 | var lightness = Math.ceil(70/lightnessStep); // because ]10;80] 30 | var nb = hue * saturation * lightness; 31 | //var count = 0; 32 | //for (var s=100; s>0; s-=saturationStep) { 33 | // for (var l=80; l>10; l-=lightnessStep) { 34 | // for (var h=0; h<360; h+=hueStep) { 35 | // count++; 36 | // } 37 | // } 38 | //} 39 | //if (nb != count) { 40 | // console.log(nb); 41 | // console.log(count); 42 | // alert('mismatch'); 43 | //} 44 | return nb; 45 | } 46 | 47 | 48 | 49 | static buildColors(hueStep, saturationStep, lightnessStep) { 50 | // defaults 51 | if (!hueStep) hueStep = 25; 52 | if (!saturationStep) saturationStep = 25; 53 | if (!lightnessStep) lightnessStep = 10; 54 | // test not enough colors 55 | //hueStep = 360; 56 | //saturationStep = 100; 57 | //lightnessStep = 100; 58 | var colors = []; 59 | for (var s=100; s>0; s-=saturationStep) { 60 | for (var l=80; l>10; l-=lightnessStep) { 61 | for (var h=0; h<360; h+=hueStep) { 62 | colors.push(new Color(h, s, l)); 63 | } 64 | } 65 | } 66 | console.log("number of colors: "+colors.length.toString()); 67 | return colors; 68 | } 69 | 70 | } 71 | 72 | 73 | 74 | class Color { 75 | 76 | static rgb2yuv(rgb) { 77 | var y = Color.clamp(rgb.r * 0.29900 + rgb.g * 0.587 + rgb.b * 0.114); 78 | var u = Color.clamp(rgb.r * -0.16874 + rgb.g * -0.33126 + rgb.b * 0.50000 + 128); 79 | var v = Color.clamp(rgb.r * 0.50000 + rgb.g * -0.41869 + rgb.b * -0.08131 + 128); 80 | return {y:y, u:u, v:v}; 81 | } 82 | 83 | static clamp(n){ 84 | if (n<0) { return 0;} 85 | if (n>255) { return 255;} 86 | return Math.floor(n); 87 | } 88 | 89 | 90 | static yuv2rgb(yuv){ 91 | var y = yuv.y; 92 | var u = yuv.u; 93 | var v = yuv.v; 94 | var r = Color.clamp(y + (v - 128) * 1.40200); 95 | var g = Color.clamp(y + (u - 128) * -0.34414 + (v - 128) * -0.71414); 96 | var b = Color.clamp(y + (u - 128) * 1.77200); 97 | return {r:r,g:g,b:b}; 98 | } 99 | 100 | /** 101 | * adapted from https://stackoverflow.com/questions/36721830/convert-hsl-to-rgb-and-hex 102 | * adapted from http://en.wikipedia.org/wiki/HSL_color_space. 103 | * Assumes h in [0;360], s/l in [0;100] 104 | */ 105 | static hsl2rgb(h, s, l) { 106 | var r, g, b; 107 | h = h/360; 108 | s = s/100; 109 | l = l/100; 110 | if(s == 0){ 111 | r = g = b = l; // achromatic 112 | }else{ 113 | var hue2rgb = function hue2rgb(p, q, t){ 114 | if(t < 0) t += 1; 115 | if(t > 1) t -= 1; 116 | if(t < 1/6) return p + (q - p) * 6 * t; 117 | if(t < 1/2) return q; 118 | if(t < 2/3) return p + (q - p) * (2/3 - t) * 6; 119 | return p; 120 | } 121 | var q = l < 0.5 ? l * (1 + s) : l + s - l * s; 122 | var p = 2 * l - q; 123 | r = hue2rgb(p, q, h + 1/3); 124 | g = hue2rgb(p, q, h); 125 | b = hue2rgb(p, q, h - 1/3); 126 | } 127 | return {r:Math.round(r * 255), 128 | g:Math.round(g * 255), b:Math.round(b * 255)}; 129 | } 130 | 131 | // adapted from https://stackoverflow.com/questions/9600295/automatically-change-text-color-to-assure-readability 132 | static invertColor(rgb) { 133 | var yuv = Color.rgb2yuv(rgb); 134 | var factor = 180; 135 | var threshold = 100; 136 | yuv.y = Color.clamp(yuv.y + (yuv.y > threshold ? -factor : factor)); 137 | return Color.yuv2rgb(yuv); 138 | } 139 | 140 | static parseString(str) { 141 | var re = /hsl\((\d+), *(\d+)%, *(\d+)%\)/; 142 | var result = re.exec(str); 143 | if (result) { 144 | return new Color(result[1], result[2], result[3]); 145 | } 146 | return null; 147 | } 148 | 149 | constructor(h, s, l) { 150 | this.h = h; 151 | this.s = s; 152 | this.l = l; 153 | this._string = "hsl("+h+", "+s+"%, "+l+"%)"; 154 | var rgb = Color.hsl2rgb(h, s, l); 155 | this._transparentString = 'rgba('+rgb.r+','+rgb.g+','+rgb.b+',0.4)'; 156 | var invertedRgb = Color.invertColor(rgb); 157 | this._invertedString = "rgb("+invertedRgb.r+","+invertedRgb.g+"," 158 | +invertedRgb.b+")"; 159 | } 160 | 161 | get string() { 162 | return this._string; 163 | } 164 | 165 | get invertedString() { 166 | return this._invertedString; 167 | } 168 | 169 | get transparentString() { 170 | return this._transparentString; 171 | } 172 | 173 | equalsString(str) { 174 | return str === this._string; 175 | } 176 | 177 | equalsColor(color) { 178 | return color.string === this._string; 179 | } 180 | 181 | } 182 | 183 | _defaultColor = null; 184 | 185 | class ColorManager { 186 | 187 | static getDefaultColor() { 188 | if (!_defaultColor) { 189 | _defaultColor = new Color(0, 0, 83); 190 | } 191 | return _defaultColor; 192 | } 193 | 194 | constructor(hueStep, saturationStep, lightnessStep) { 195 | this.colors = ColorBuilder.buildColors(hueStep, saturationStep, 196 | lightnessStep); 197 | } 198 | 199 | doesThisColorExist(color) { 200 | for (var c of this.colors) { 201 | if (c.equalsColor(color)) { 202 | return true; 203 | } 204 | } 205 | return false; 206 | } 207 | 208 | isThisColorFree(color, chains) { 209 | if (color.equalsColor(ColorManager.getDefaultColor())) { 210 | return false; 211 | } 212 | for (var chain of chains) { 213 | if (color.equalsColor(chain.color)) { 214 | return false; 215 | } 216 | } 217 | return true; 218 | } 219 | 220 | 221 | /* If there is no more available colors, return the default color 222 | */ 223 | getNextAvailableColor(chains) { 224 | for (var color of this.colors) { 225 | if (this.isThisColorFree(color, chains)) { 226 | return color; 227 | } 228 | } 229 | alert("There is no more color available. Try to export your " 230 | +"annotations, reload the script and define more color on the start " 231 | +"page. In the meantime, I'm using default color (gray)."); 232 | return ColorManager.getDefaultColor(); // if there is no more color 233 | } 234 | 235 | getAvailableColors(chains) { 236 | var that = this; 237 | return this.colors.filter( 238 | function(c) { return that.isThisColorFree(c, chains); }); 239 | } 240 | 241 | changeDefaultColor(chains) { 242 | var colors = this.getAvailableColors(chains); 243 | var chooser = new ColorChooserDialog(colors, function(color) { 244 | _defaultColor = color; 245 | // TODO redraw links and link list 246 | }); 247 | } 248 | 249 | changeChainColor(chain, chains) { 250 | var colors = this.getAvailableColors(chains); 251 | var chooser = new ColorChooserDialog(colors, function(color) { 252 | chain.color = color; 253 | // TODO redraw links and link list 254 | }); 255 | } 256 | 257 | } 258 | 259 | 260 | class ColorChooserDialog { 261 | 262 | constructor(colors, callback) { 263 | this.callback = callback; 264 | var div = document.createElement("div"); 265 | div.style.padding = "20px"; 266 | this.modalDiv = new ModalDiv("Color chooser", div); 267 | var that = this; 268 | for (var color of colors) { 269 | var par = document.createElement("p");; 270 | par.style.padding = "7px"; 271 | par.style.backgroundColor = color.string; 272 | par.style.color = color.invertedString; 273 | var anchor = document.createElement("anchor"); 274 | anchor.style.cursor = "pointer"; 275 | anchor.color = color; 276 | anchor.onclick = function(e) { 277 | that.callback(this.color); 278 | that.modalDiv.close(); 279 | }; 280 | var textNode = document.createTextNode("Choose this color!"); 281 | anchor.appendChild(textNode) 282 | par.appendChild(anchor); 283 | div.appendChild(par); 284 | } 285 | div.style['overflow-y'] = "scroll"; 286 | this.modalDiv.show(); 287 | } 288 | 289 | } 290 | 291 | -------------------------------------------------------------------------------- /js/cls_sacr_parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | var TOKENIZATION_WORD = 1; 25 | var TOKENIZATION_WORD_N_PUNCT = 2; 26 | var TOKENIZATION_CHARACTER = 3; 27 | 28 | /********************************************************************** 29 | * ParsedLink 30 | *********************************************************************/ 31 | 32 | class ParsedLink { 33 | constructor(name) { 34 | this.name = name; 35 | this.properties = {}; // it's just a dictionary 36 | this.startAnchor = null; 37 | this.endAnchor = null; // same as startAnchor if only one token 38 | } 39 | } 40 | 41 | 42 | /********************************************************************** 43 | * SacrParser 44 | *********************************************************************/ 45 | 46 | class SacrParser { 47 | 48 | /* preprocessing of the input text: each line = one paragraph, no empty 49 | * line, etc. NOTE: no need to worry about \r\n because the text is taken 50 | * from a textarea, which returns only \n 51 | */ 52 | static normalizeText(text) { 53 | text = text.replace(/\n(\s+\n)+/g, "\n\n"); 54 | text = text.replace(/\\n\n/g, "\\n"); 55 | text = text.replace(/\n\s*(#[^\n]+)\n/g, "\n\n$1\n\n"); 56 | text = text.replace(/([^\n])[ \t]*\n[ \t]*(?!#|\n+)/g, "$1 "); 57 | text = text.replace(/[ \t]*\n\n+[ \t]*/g, "\n"); 58 | text = text.replace(/^\n+/g, ""); 59 | text = text.replace(/\n+$/g, ""); 60 | return text; 61 | } 62 | 63 | static makeTokenRegex(tokenizationType, additionnalTokens) { 64 | additionnalTokens.sort(function(a,b) { 65 | if (b.length === a.length) { return 0; } 66 | else if (b.length > a.length) { return 1; } 67 | else { return -1; }; }); 68 | var additionnalTokenString = additionnalTokens.join('|'); 69 | var tokenization_string = undefined; 70 | if (tokenizationType == TOKENIZATION_WORD) { 71 | //alert('tokenization: words'); 72 | tokenization_string = 73 | "[а-яa-zёßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿœα-ω]+'?|[-+±]?[.,]?[0-9]+"; 74 | } else if (tokenizationType == TOKENIZATION_WORD_N_PUNCT) { 75 | //alert('tokenization: word and punct'); 76 | tokenization_string = 77 | tokenization_string = 78 | "[а-яa-zёßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿœα-ω]+'?|[-+±]?[.,]?[0-9]+" 79 | + "|[.,;:!?()\\[\\]]|-+"; 80 | } else { 81 | //alert('tokenization: characters'); 82 | tokenization_string = "[^{}]"; 83 | } 84 | return new RegExp("^(" + additionnalTokenString + "|" 85 | + tokenization_string + ")", 'i'); 86 | } 87 | 88 | constructor(div, text, tokenizationType, showPropertyWarnings) { 89 | this.div = div; 90 | this.text = text; 91 | this.tokenizationType = tokenizationType; 92 | this.showPropertyWarnings = showPropertyWarnings; 93 | } 94 | 95 | parseText() { 96 | 97 | var tmp; 98 | var additionnalTokens = new Array('%', '‰', '°', '°C', '°F'); 99 | var tokenRegex = SacrParser.makeTokenRegex(this.tokenizationType, 100 | additionnalTokens); 101 | 102 | var parIsHeading = 0; // 0 = no, 1 = level 1, etc. 103 | // variables for storing actions for creating links and chains 104 | var parsedLinks = new Array(); // array of ParsedLink 105 | // (see the class ParsedLink for details) 106 | // NOTE: each link found ({name:values text}) is stored in the 107 | // filoLinks. When the closing } is encountered, it is popped out from 108 | // the filoLinks array and stored permanently in the parsedLinks array. 109 | var filoLinks = new Array(); // array of ParsedLink 110 | var colors = {}; 111 | // array of {"chain name":] 112 | 113 | // preprocessing 114 | this.text = SacrParser.normalizeText(this.text); 115 | var lines = this.text.split(/\n/); 116 | 117 | var textTitle = ''; // for the document.title 118 | var textId = ''; // idem 119 | 120 | var paragraph_counter = 1; 121 | 122 | // loop 123 | for (var line of lines) { 124 | 125 | if (line.match(/^#+$/)) { 126 | line = line.replace(/#/g, '*'); 127 | } 128 | 129 | if (((tmp = line.match(/^#COLOR\s*:\s*([^ =]+)\s*=\s*(.+)$/)) != null)) { 130 | var chainName = tmp[1]; 131 | var tmp_color = Color.parseString(tmp[2]); // returns null if 132 | // can't parse 133 | if (tmp_color) { 134 | //console.log("parsed color: "+tmp[2]); 135 | colors[chainName] = tmp_color; 136 | //console.log(chainName + ": " + tmp_color.string); 137 | } else { 138 | console.log("can't parse color: "+tmp[2]); 139 | } 140 | 141 | } else if (((tmp = line.match(/^#DEFAULTCOLOR\s*:\s*(\S+)$/)) != null)) { 142 | //var tmp_color = Color.parseString(tmp[1]); 143 | //if (tmp_color) { 144 | // ColorManager.setDefaultColor(tmp_color); 145 | //} 146 | } else if (((tmp = line.match(/^#TOKENIZATION-TYPE/)) != null)) { 147 | // nothing 148 | 149 | } else if (((tmp = line.match(/^#.*$/)) != null) 150 | || ((tmp = line.match(/^\*+$/)) != null)) { 151 | var par = document.createElement('P'); 152 | if (line.match(/^\*+$/)) { 153 | var hiddenSpan = document.createElement('SPAN'); 154 | hiddenSpan.style.display = 'none'; 155 | hiddenSpan.appendChild(document.createTextNode(tmp[0])); 156 | par.appendChild(hiddenSpan); 157 | par.appendChild(document.createElement('HR')); 158 | } else { 159 | var tmp2; 160 | if ((tmp2 = line.match(/^\s*#title\s*:\s*(.+)$/)) != null) { 161 | textTitle = tmp2[1]; 162 | } else if ((tmp2 = line.match(/^\s*#textid\s*:\s*(.+)$/)) != null) { 163 | textId = tmp2[1]; 164 | } else if ((tmp2 = line.match(/^\s*#additionnaltoken\s*:\s*(.+)$/)) != null) { 165 | additionnalTokens.push(tmp2[1]); 166 | tokenRegex = SacrParser.makeTokenRegex(this.tokenizationType, 167 | additionnalTokens); 168 | } 169 | par.appendChild(document.createTextNode(tmp[0])); 170 | } 171 | par.className = CLASS_COMMENT; 172 | this.div.appendChild(par); 173 | if (((tmp = line.match(/^#part-heading:/)) != null)) { 174 | var response = CommonFunctions.parseValues(line, 13); 175 | if (response.startIndex != line.length) { 176 | throw "Can't parse line: "+line+" (error when reading option values)"; 177 | } 178 | parIsHeading = 1; 179 | if ('level' in response.dic) { 180 | parIsHeading = response.dic.level; 181 | } 182 | } 183 | 184 | } else if (line.length) { 185 | var startIndex = 0; 186 | var textLen = line.length; 187 | var lastAnchor; 188 | var lastTokenType = ''; 189 | var thereIsSomeText = ''; 190 | var par = document.createElement('P'); 191 | par.className = CLASS_PARAGRAPH; 192 | if (parIsHeading) { 193 | par.classList.add(CLASS_HEADING); 194 | par.classList.add("level"+parIsHeading); 195 | } 196 | var par_number = document.createElement('SPAN'); 197 | par_number.className = CLASS_PAR_NUMBER; 198 | par_number.appendChild(document.createTextNode('[#'+paragraph_counter+'] ')); 199 | paragraph_counter++; 200 | par.appendChild(par_number); 201 | parIsHeading = 0; 202 | this.div.appendChild(par); 203 | while(startIndex < textLen) { 204 | if ((tmp = line.substring(startIndex).match(tokenRegex)) != null) { 205 | var anchor = gText.createTokenAnchor(tmp[0]); 206 | // WARNING!!! sometimes, there are consecutive 207 | // opening tag ({foo {bar A Word}}), and so you must 208 | // set the anchor for ALL these tags!!! So, just go 209 | // through the array, and complete if something is 210 | // undefined. 211 | for (var link of filoLinks) { 212 | if (!link.startAnchor) { 213 | link.startAnchor = anchor; 214 | } 215 | } 216 | lastAnchor = anchor; 217 | par.appendChild(anchor); 218 | startIndex += tmp[0].length; 219 | lastTokenType = 'text'; 220 | thereIsSomeText = true; 221 | } else if ((tmp = line.substring(startIndex).match(/^{([-a-zA-Z0-9_]+)/)) != null) { 222 | var chainName = tmp[1]; 223 | var response = CommonFunctions.parseValues(line, startIndex+chainName.length+1); 224 | startIndex = response.startIndex; 225 | if (line.substring(startIndex).search(/^\s/) != 0) { 226 | throw "Can't parse line: "+line+" (error when reading property values)."; 227 | } 228 | var parsedLink = new ParsedLink(chainName); 229 | parsedLink.properties = response.dic; 230 | filoLinks.push(parsedLink); 231 | startIndex++; 232 | lastTokenType = 'open'; 233 | thereIsSomeText = false; 234 | } else if ((tmp = line.substring(startIndex).match(/^}/)) != null) { 235 | if (!thereIsSomeText) { 236 | alert("Warning: an annotation has no text!"); 237 | filoLinks.pop(); 238 | } else { 239 | if (filoLinks.length == 0 || !lastAnchor) { 240 | throw "Syntax error in the file (too much }'s)."; 241 | } 242 | filoLinks[filoLinks.length-1].endAnchor = lastAnchor; 243 | parsedLinks.push(filoLinks.pop()); 244 | } 245 | startIndex++; 246 | lastTokenType = 'close'; 247 | } else if ((tmp = line.substring(startIndex).match(/^\\n/)) != null) { 248 | par.appendChild(document.createElement('BR')); 249 | startIndex += 2; 250 | } else if ((tmp = line.substring(startIndex).match(/^./)) != null) { 251 | par.appendChild(document.createTextNode(tmp[0])); 252 | startIndex++; 253 | lastTokenType = 'symbol'; 254 | } 255 | //console.log(lastTokenType); 256 | } // while 257 | if (startIndex != textLen) { 258 | throw "The parser has stopped to early!"; 259 | } 260 | if (filoLinks.length) { 261 | throw "Syntax error in the file (not enough }'s)."; 262 | } 263 | par.normalize(); 264 | 265 | } else { 266 | throw "Can't parse line: "+line; 267 | 268 | } 269 | 270 | } // for each line 271 | 272 | // set the document title 273 | try { 274 | if (textId && textTitle) { 275 | document.title = "SACR: "+textId+", "+textTitle; 276 | } else if (textId) { 277 | document.title = "SACR: "+textId; 278 | } else if (textTitle) { 279 | document.title = "SACR: "+textTitle; 280 | } 281 | } catch (err) { 282 | // buuuuh! 283 | } 284 | 285 | /* creation of links and chains */ 286 | 287 | for (var parsedLink of parsedLinks) { 288 | gText.importLink(parsedLink.startAnchor, parsedLink.endAnchor, 289 | parsedLink.name, parsedLink.properties); 290 | } 291 | 292 | /* colors */ 293 | 294 | //console.log(colors); 295 | for (var chainName in colors) { 296 | //console.log(chainName); 297 | var color = colors[chainName]; 298 | //console.log('color: '+color.string); 299 | //console.log('exists: '+gText.colorManager.doesThisColorExist(color).toString()); 300 | if (gText.colorManager.doesThisColorExist(color) && 301 | gText.colorManager.isThisColorFree(color, 302 | gText.chainColl.chains)) { 303 | var chain = gText.chainColl.getChainByName(chainName); 304 | if (chain && chain.isTrueChain) { 305 | chain.color = color; 306 | //console.log('set color:'+color.string); 307 | } 308 | } 309 | } 310 | 311 | } 312 | 313 | } 314 | 315 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 25 | 26 | 27 | 28 | 29 | Аннотация корефрентности 30 | 31 | 32 | 33 | 34 |

SACR RU

35 | 36 |
37 | 38 |
39 | 40 | 41 | 42 | 43 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 110 | 111 | 317 | 318 | 319 | 320 | 321 | -------------------------------------------------------------------------------- /js/cls_link.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | _selectionCount = 0; 25 | _linkIdCounter = 0; 26 | 27 | class Link { 28 | 29 | static getNextSelectionCount() { 30 | return _selectionCount++; 31 | } 32 | 33 | /* links is sorted in situ */ 34 | static sortLinks(links) { 35 | if (gLoadingTime) { 36 | return; 37 | } 38 | links.sort(function(a,b) { 39 | // a is after b 40 | if (a.span.compareDocumentPosition(b.span) & 2) { 41 | return 1; 42 | // a is before b 43 | } else if (a.span.compareDocumentPosition(b.span) & 4) { 44 | return -1; 45 | } 46 | return 0; }); 47 | } 48 | 49 | /* @param elements: The first and last elements of the link. They may be 50 | * anchors or spans. 51 | */ 52 | constructor(elements, initialProperties) { 53 | this.id = _linkIdCounter++; 54 | this._name = undefined; // set up by setChain() 55 | this._color = ColorManager.getDefaultColor(); // set up by setChain() 56 | this.selectionCount = -1; 57 | // elements 58 | this.span = document.createElement('SPAN'); 59 | this.span.classList.add(CLASS_LINK); 60 | this.nameSpan = document.createElement('SPAN'); 61 | this.span.appendChild(this.nameSpan); 62 | this.nameSpan.className = CLASS_METADATA; 63 | // move all the elements in the span 64 | var toBeMoved = new Array(); 65 | var cur = elements[0]; 66 | while (true) { 67 | toBeMoved.push(cur); 68 | if (elements.length == 1 || cur === elements[1]) { 69 | break; 70 | } 71 | cur = cur.nextSibling; 72 | } 73 | elements[0].parentNode.replaceChild(this.span, elements[0]); 74 | for (var e of toBeMoved) { 75 | this.span.appendChild(e); 76 | } 77 | // name 78 | this.nameAnchor = document.createElement('A'); 79 | this.nameSpan.appendChild(this.nameAnchor); 80 | // elements for the chains popup (after moving all the elements into the 81 | // span, because we use this.text) 82 | this.popupPar = document.createElement('P'); 83 | this.popupAnchor = document.createElement('A'); 84 | this.popupAnchor.appendChild(document.createTextNode(this.text)); 85 | this.popupPar.appendChild(this.popupAnchor); 86 | // add events to all the elements 87 | this._addEvents(); 88 | // misc 89 | this._isSelected = false; 90 | this._isHidden = false; 91 | this.redraw(); 92 | // properties (at the end because of the head property and this.words) 93 | if (gText.schema.isEmpty) { 94 | this.properties = null; 95 | if (gText.showPropertyWarnings && initialProperties 96 | && Object.keys(initialProperties).length) { 97 | alert("No schema has been defined, yet there are some properties " 98 | + "in the file."); 99 | } 100 | } else { 101 | if (!initialProperties) { 102 | initialProperties = {}; 103 | } 104 | this.properties 105 | = gText.schema.buildLinkProperties(initialProperties); 106 | this.properties.resetHeadProperty(this); 107 | } 108 | } 109 | 110 | _addEvents() { 111 | var that = this; 112 | this.span.onclick = function(e) { 113 | if (e.ctrlKey && e.shiftKey) { // attach to new chain (ask for name) 114 | that.select(); 115 | var name = CommonFunctions.getChainName(gText.chainColl, true, 116 | that.name); 117 | if (name) { 118 | var chain = new Chain(name); 119 | gText.chainColl.addChain(chain); 120 | gText.chainColl.transferLink(that, chain); 121 | } 122 | } else if (e.ctrlKey) { // attach to last selected chain 123 | var lastSelectedChain = gText.chainColl.getLastSelectedChain(); 124 | that.select(); 125 | if (lastSelectedChain) { 126 | gText.chainColl.transferLink(that, lastSelectedChain); 127 | } 128 | } else if (e.shiftKey) { // attach to new chain (default name) 129 | that.select(); 130 | var name = CommonFunctions.getChainName(gText.chainColl, false); 131 | if (name) { 132 | var chain = new Chain(name); 133 | gText.chainColl.addChain(chain); 134 | gText.chainColl.transferLink(that, chain); 135 | } 136 | } else { 137 | if (that.isSelected) { 138 | that.deselect(); 139 | } else { 140 | that.select(); 141 | } 142 | } 143 | e.stopPropagation(); 144 | return false; 145 | }; 146 | this.popupAnchor.onclick = this.span.onclick; 147 | // drag and drop 148 | this.span.draggable = true; 149 | this.span.ondragstart = function(e) { 150 | e.stopPropagation(); // when overlapping span (link inside link) 151 | e.dataTransfer.setData("text", that.id.toString()); 152 | // e.target is the source element (ie the element that is dragged) 153 | //useless: e.dataTransfer.effectAllowed = 'all'; 154 | }; 155 | this.span.ondragover = function(e) { 156 | e.preventDefault(); // allow the drop (blocked by default) 157 | }; 158 | this.span.ondrop = function(e) { 159 | // NOTE: e.target is the target element (ie the element on which an 160 | // element is dropped): don't use it, but use `this/that' 161 | e.stopPropagation(); // when overlapping span (link inside link) 162 | //console.log(e.target); 163 | //console.log(this); 164 | e.preventDefault(); 165 | var linkId = parseInt(e.dataTransfer.getData("text")); 166 | var sourceLink = gText.chainColl.getLinkById(linkId); 167 | if (!sourceLink) return; 168 | var targetLink = that; 169 | var shiftKey = e.shiftKey // doesn't seem to work on FF 54 (only 55) 170 | || e.dataTransfer.dropEffect == "link"; // ctrl+shift, for FF54 171 | //console.log(e); 172 | //console.log(shiftKey); 173 | if (shiftKey) { 174 | gText.substituteLink(sourceLink, targetLink); 175 | } else { 176 | if (sourceLink === targetLink) return; 177 | var sourceChain = gText.chainColl.getChainByLink(sourceLink); 178 | var targetChain = gText.chainColl.getChainByLink(targetLink); 179 | if (sourceChain === targetChain) return; 180 | var ctrlKey = e.ctrlKey // doesn't seem to work on FF 54 (only 55), and doesn't work on chromium 181 | || e.dataTransfer.dropEffect == "copy"; // works on all versions of FF, and on chromium if the key is pressed before beginning the d&d 182 | // for chrome, see: https://stackoverflow.com/questions/19010257/event-datatransfer-dropeffect-in-chrome 183 | //console.log(e.dataTransfer.dropEffect); 184 | if (sourceChain.count == 1) { 185 | if (targetChain.count == 1) { 186 | //targetLink.setChain(sourceChain); 187 | gText.chainColl.transferLink(targetLink, sourceChain); 188 | } else { 189 | if (ctrlKey) { 190 | //targetLink.setChain(sourceChain); 191 | gText.chainColl.transferLink(targetLink, sourceChain); 192 | } else { 193 | //sourceLink.setChain(targetChain); 194 | gText.chainColl.transferLink(sourceLink, targetChain); 195 | } 196 | } 197 | } else { 198 | if (targetChain.count == 1) { 199 | //targetLink.setChain(sourceChain); 200 | gText.chainColl.transferLink(targetLink, sourceChain); 201 | } else { 202 | if (ctrlKey) { 203 | //targetLink.setChain(sourceChain); 204 | gText.chainColl.transferLink(targetLink, sourceChain); 205 | } else { 206 | if (confirm("Do you want to merge?")) { 207 | while (targetChain.count) { 208 | //targetChain.links[0].setChain(sourceChain); 209 | gText.chainColl.transferLink(targetChain.links[0], sourceChain); 210 | } 211 | } 212 | } 213 | } 214 | } 215 | } // no shift key 216 | }; // this.span.ondrop 217 | this.popupAnchor.draggable = true; 218 | this.popupAnchor.ondragstart = this.span.ondragstart; 219 | this.popupAnchor.ondragover = this.span.ondragover; 220 | this.popupAnchor.ondrop = this.span.ondrop; 221 | } 222 | 223 | get words() { 224 | var wds = new Array(); 225 | for (var anchor of this.span.getElementsByClassName(CLASS_TOKEN)) { 226 | wds.push(anchor.textContent); 227 | } 228 | return wds; 229 | } 230 | 231 | get text() { 232 | //var text = this.span.textContent; 233 | //return text.substr(text.indexOf(" ")+1); 234 | var clone = this.span.cloneNode(true); 235 | var badguys = clone.getElementsByClassName(CLASS_METADATA); 236 | for (var i=badguys.length-1; i>=0; i--) { 237 | badguys[i].parentNode.removeChild(badguys[i]); 238 | } 239 | return clone.textContent; 240 | } 241 | 242 | setChain(chain) { 243 | this._name = chain.name; 244 | this._color = chain.color; 245 | } 246 | 247 | get name() { 248 | return this._name; 249 | } 250 | 251 | get contentIsEmptySet() { 252 | return this.text === "Ø"; 253 | } 254 | 255 | show() { 256 | if (this._isHidden) { 257 | this._isHidden = false; 258 | this.redraw(); 259 | } 260 | } 261 | 262 | hide() { 263 | if (!this._isHidden) { 264 | this._isHidden = true; 265 | this.redraw(); 266 | } 267 | } 268 | 269 | get isHidden() { 270 | return this._isHidden; 271 | } 272 | 273 | get isVisible() { 274 | return !this._isHidden; 275 | } 276 | 277 | select() { 278 | if (!this._isSelected) { 279 | gText.deselectAllTokensAndLinks(); 280 | gText.chainColl.selectChain(this); 281 | this._isSelected = true; 282 | this.redraw(); 283 | this.selectionCount = Link.getNextSelectionCount(); 284 | } 285 | } 286 | 287 | deselect() { 288 | if (this._isSelected) { 289 | gText.chainColl.deselectChain(this); 290 | this._isSelected = false; 291 | this.redraw(); 292 | } 293 | } 294 | 295 | get isSelected() { 296 | return this._isSelected; 297 | } 298 | 299 | destroy() { 300 | this.deselect(); // remove the div from the property panel 301 | this.span.removeChild(this.nameSpan); 302 | while (this.span.childNodes.length) { 303 | this.span.parentNode.insertBefore(this.span.firstChild, this.span); 304 | } 305 | var parentNode = this.span.parentNode; 306 | parentNode.removeChild(this.span); 307 | parentNode.normalize(); 308 | } 309 | 310 | redraw() { 311 | this.span.style.borderColor = this._color.string; 312 | this.nameSpan.style.borderColor = this._color.string; 313 | this.nameSpan.style.backgroundColor = this._color.string; 314 | this.nameAnchor.style.color = this._color.invertedString; 315 | this.nameAnchor.innerHTML = this._name; 316 | if (this.isHidden) { 317 | this.span.classList.add(CLASS_HIDDEN); 318 | } else { 319 | this.span.classList.remove(CLASS_HIDDEN); 320 | } 321 | if (this.isSelected) { 322 | this.span.classList.add(CLASS_SELECTED); 323 | this.span.style.backgroundColor = this._color.transparentString; 324 | this.popupAnchor.classList.add(CLASS_SELECTED); 325 | } else { 326 | this.span.classList.remove(CLASS_SELECTED); 327 | this.span.style.backgroundColor = "rgba(0,0,0,0)"; // transparent 328 | // (for selection of a link that has nested links) 329 | this.popupAnchor.classList.remove(CLASS_SELECTED); 330 | } 331 | if (!gText.schema.isEmpty) { 332 | if (this.isSelected) { 333 | this.properties.div.insertBefore(gText.schema.button, 334 | this.properties.div.firstChild); 335 | gDivLinkPropertyAnchor.appendChild(this.properties.div); 336 | gDivLinkPropertyAnchor.style.display = 'block'; 337 | } else { 338 | if (gDivLinkPropertyAnchor.childNodes.length 339 | && gDivLinkPropertyAnchor.childNodes[0] == this.properties.div) { 340 | gDivLinkPropertyAnchor.removeChild(this.properties.div); 341 | gDivLinkPropertyAnchor.style.display = 'none'; 342 | } 343 | } 344 | } 345 | } 346 | 347 | scrollTo(win) { 348 | var obj; 349 | if (!win || win == window) { 350 | obj = this.span; 351 | } else { 352 | obj = this.popupPar; 353 | } 354 | Scrolling.scrollTo(obj, true, win); 355 | } 356 | 357 | isEqualTo(name, searchedValue, reversed) { 358 | var val = this.properties.getPropertyByName(name).value == searchedValue; 359 | if (reversed) { 360 | return !val; 361 | } 362 | return val; 363 | } 364 | 365 | matches(name, pattern, reversed) { 366 | //console.log(pattern); 367 | var val = pattern.exec(this.properties.getPropertyByName(name).value); 368 | if (reversed) { 369 | return !val; 370 | } 371 | return val; 372 | } 373 | 374 | } 375 | 376 | -------------------------------------------------------------------------------- /js/cls_text.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | 25 | class Text { 26 | 27 | constructor(autocomplete) { 28 | this.div = document.createElement("DIV"); 29 | this.div.id = "divText"; // for CSS 30 | document.body.appendChild(this.div); 31 | this.chainColl = new ChainCollection(); 32 | this.chainPopup = new ChainPopup(this.chainColl.popupDiv); 33 | this.colorManager = null; 34 | this.searchDialog = null; 35 | var that = this; 36 | this.dataLoader = new DataLoader(function(dataLoader) { 37 | that.textFilename = dataLoader.textFilename; 38 | that.raw_schema = dataLoader.schema; 39 | that.raw_text = dataLoader.text; 40 | that.minLinks = dataLoader.minLinks; 41 | that.showPropertyWarnings = dataLoader.showPropertyWarnings; 42 | that.tokenizationType = dataLoader.tokenizationType; 43 | try { 44 | that.schema = new Schema(that.raw_schema) 45 | that.colorManager = new ColorManager(dataLoader.hueStep, 46 | dataLoader.saturationStep, dataLoader.lightnessStep); 47 | var parser = new SacrParser(that.div, that.raw_text, 48 | that.tokenizationType, false); 49 | parser.parseText(); 50 | } catch(error) { 51 | var errText = "

"+error.name+": "+error.message+"

"; 52 | if (error.fileName) errText += "

File: "+error.fileName+"

"; 53 | if (error.lineNumber) errText += "

Line number: "+error.lineNumber+"

"; 54 | if (error.stack) errText += "

Stack: "+error.stack+"

"; 55 | that.div.innerHTML = "

An error occured:

" + errText; 56 | return; 57 | } 58 | gLoadingTime = false; 59 | that.chainColl.sortLinksOfAllChains(); 60 | }, autocomplete); 61 | // parse the sacr code 62 | //new SacrParser(this, this.propertyColl); 63 | } 64 | 65 | clickOnTheParseButton() { 66 | this.dataLoader.clickOnTheParseButton(); 67 | } 68 | 69 | 70 | createTokenAnchor(textContent) { 71 | var that = this; 72 | var anchor = document.createElement('A'); 73 | anchor.appendChild(document.createTextNode(textContent)); 74 | anchor.className = CLASS_TOKEN; 75 | var func = function(obj, e, dblClick) { 76 | // the next line will remove all selection of text made by the shift 77 | // key 78 | window.getSelection().removeAllRanges(); 79 | e.stopPropagation(); 80 | var selected = that.getSelectedTokens(); 81 | // if no other token is selected, we (de)select the current token 82 | if (!dblClick && selected[0] === obj) { 83 | obj.classList.remove(CLASS_SELECTED); 84 | } else if (!dblClick && selected.length == 0) { 85 | that.chainColl.deselectAllLinks(); 86 | obj.classList.add(CLASS_SELECTED); 87 | // otherwise, we create a link 88 | } else if (dblClick || selected.length == 1) { 89 | obj.classList.add(CLASS_SELECTED); 90 | // shift: ask for a name 91 | if (e.shiftKey && !e.ctrlKey) { 92 | that.createLinkAndChain(true); 93 | // ctrl: attach to previous chain 94 | } else if (e.ctrlKey && !e.shiftKey) { 95 | that.createLinkAndAttachItToLastSelectedChain(); 96 | // otherwise, default name 97 | } else { 98 | that.createLinkAndChain(false); 99 | } 100 | // if we are here, there is a problem somewhere 101 | } else { 102 | alert("Too many words are selected!"); 103 | obj.deselectAllTokensAndLinks(); 104 | return; 105 | } 106 | }; 107 | anchor.onclick = function(e){ func(this, e, false); }; 108 | anchor.ondblclick = function(e){ func(this, e, true); }; 109 | return anchor; 110 | } 111 | 112 | deselectAllTokensAndLinks() { 113 | // this is for words 114 | var selected = Array.from(document.getElementsByClassName(CLASS_SELECTED)); 115 | for (var e of selected) { 116 | if (e.tagName == 'A') { 117 | e.classList.remove(CLASS_SELECTED); 118 | } 119 | } 120 | // this is for links 121 | this.chainColl.deselectAllLinks(); 122 | } 123 | 124 | /* @param includeAll: If false, include only the first and last selected. 125 | */ 126 | getSelectedTokens(includeAll) { 127 | var anchors = document.getElementsByClassName(CLASS_TOKEN); 128 | var start = -1; 129 | var end = -1; 130 | for (var i=0; i size = (size * 72.0 / 96.0); 468 | var pars = this.div.childNodes; 469 | var step = 1; 470 | for (var par of pars) { 471 | var size = parseInt(getComputedStyle(par).fontSize.match(/\d+/)[0]); 472 | if (plus) { 473 | size += step; 474 | } else { 475 | if (size - step > 1) { 476 | size -= step; 477 | } 478 | } 479 | par.style.fontSize = size + "px"; 480 | } 481 | } 482 | 483 | showStatistics() { 484 | var tokenCount = this.div.getElementsByClassName(CLASS_TOKEN).length; 485 | var chainCount = this.chainColl.chains.length; 486 | var linkCount = this.div.getElementsByClassName(CLASS_LINK).length; 487 | var trueChainCount = 0; 488 | var trueLinkCount = 0; 489 | for (var c of this.chainColl.chains) { 490 | if (c.isTrueChain) { 491 | trueChainCount++; 492 | trueLinkCount += c.links.length; 493 | } 494 | } 495 | var mean = trueLinkCount / trueChainCount; 496 | var msg = ''; 497 | msg += "Number of tokens: "+tokenCount+"\n"; 498 | msg += "Number of referents : "+chainCount+"\n"; 499 | msg += "Number of chains: "+trueChainCount+"\n"; 500 | msg += "Number of referring expressions: "+linkCount+"\n"; 501 | msg += "Number of links: "+trueLinkCount+"\n"; 502 | msg += "Average number of links per chain: "+mean+"\n"; 503 | alert(msg); 504 | } 505 | 506 | } 507 | -------------------------------------------------------------------------------- /js/cls_chain.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | class ChainCollection { 25 | 26 | constructor() { 27 | this.chains = new Array(); 28 | this.popupDiv = document.createElement('DIV'); 29 | this.popupDiv.id = CHAIN_POPUP_MAIN_DIV_ID; // for CSS 30 | this.showNonTrueChainsInPopup = true; 31 | } 32 | 33 | /* Because of the updating of the popupDiv, you should insert in the 34 | * collection only empty chain (and add links after). 35 | */ 36 | addChain(chain) { 37 | if (chain.count) { 38 | alert("DEBUG WARNING: newly added chain is not empty"); 39 | } 40 | this.chains.push(chain); 41 | } 42 | 43 | removeChain(chain) { 44 | for (var i=0; i 0; }); 115 | for (var c of this.chains) { 116 | c.redraw(); 117 | } 118 | } 119 | 120 | getChainByLink(link) { 121 | for (var c of this.chains) { 122 | for (var l of c.links) { 123 | if (l === link) { 124 | return c; 125 | } 126 | } 127 | } 128 | return null; 129 | } 130 | 131 | getChainByName(name) { 132 | for (var c of this.chains) { 133 | if (c.name == name) { 134 | return c; 135 | } 136 | } 137 | return null; 138 | } 139 | 140 | deselectAllLinks() { 141 | for (var c of this.chains) { 142 | for (var l of c.links) { 143 | l.deselect(); 144 | } 145 | } 146 | } 147 | 148 | getLastSelectedChain() { 149 | // do we have a link currently selected? 150 | for (var c of this.chains) { 151 | for (var l of c.links) { 152 | if (l.isSelected) { 153 | return c; 154 | } 155 | } 156 | } 157 | // look for the last selected link 158 | // NOTE: if there is no chain, return null. 159 | var index = 0; 160 | var chain = null; 161 | for (var c of this.chains) { 162 | for (var l of c.links) { 163 | if (l.selectionCount >= index) { // yeah: >= and not > 164 | index = l.selectionCount; 165 | chain = c; 166 | } 167 | } 168 | } 169 | return chain; 170 | } 171 | 172 | getSelectedLink() { 173 | for (var c of this.chains) { 174 | for (var l of c.links) { 175 | if (l.isSelected) { 176 | return l; 177 | } 178 | } 179 | } 180 | return null; 181 | } 182 | 183 | getLinkById(id) { 184 | for (var c of this.chains) { 185 | for (var l of c.links) { 186 | if (l.id == id) { 187 | return l; 188 | } 189 | } 190 | } 191 | return null; 192 | } 193 | 194 | getLinkBySpan(span) { 195 | for (var c of this.chains) { 196 | for (var l of c.links) { 197 | if (l.span == span) { 198 | return l; 199 | } 200 | } 201 | } 202 | return null; 203 | } 204 | 205 | transferLink(link, targetChain) { 206 | var sourceChain = this.getChainByLink(link); 207 | if (sourceChain === targetChain) { 208 | return false; 209 | } 210 | link.setChain(targetChain); 211 | sourceChain.removeLink(link); 212 | if (sourceChain.count == 0) { 213 | this.removeChain(sourceChain); 214 | } 215 | targetChain.addLink(link); 216 | } 217 | 218 | getLinks() { 219 | var links = new Array(); 220 | for (var c of this.chains) { 221 | for (var l of c.links) { 222 | links.push(l); 223 | } 224 | } 225 | return links; 226 | } 227 | 228 | /* Returns null if no more link. */ 229 | getNextLink(refLink, backward, onlyVisible) { 230 | var links = this.getLinks(); 231 | Link.sortLinks(links); 232 | var index = undefined; 233 | for (var i=0; i=0; i--) { 244 | if (!onlyVisible || links[i].isVisible) { 245 | return links[i]; 246 | } 247 | } 248 | } else { 249 | for (var i=index+1; i dic[b]) return 1; 352 | return 0; 353 | }); 354 | for (var key of keys) { 355 | res += key + ": " + dic[key] + "\n"; 356 | } 357 | alert(res); 358 | } 359 | } 360 | 361 | /*********************************************************************/ 362 | 363 | class Chain { 364 | 365 | /* links is sorted in situ */ 366 | static sortChains(chains) { 367 | if (gLoadingTime) { 368 | return; 369 | } 370 | chains.sort(function(a,b) { 371 | /*if (!a.count) { 372 | return -1; 373 | } 374 | if (!b.count) { 375 | return 1; 376 | }*/ 377 | // a is after b 378 | if (a.firstLink.span.compareDocumentPosition(b.firstLink.span) & 2) { 379 | return 1; 380 | // a is before b 381 | } else if (a.firstLink.span.compareDocumentPosition(b.firstLink.span) & 4) { 382 | return -1; 383 | } 384 | return 0; }); 385 | } 386 | 387 | constructor(name) { 388 | this._name = name; 389 | this._color = undefined; 390 | this.links = new Array(); 391 | // elements for the popup of all chains and links 392 | this.popupDiv = document.createElement('DIV'); 393 | this.popupDiv.classList.add(CLASS_CHAIN_POPUP_CHAIN_DIV); 394 | this.popupDivHeadingParagraph = document.createElement('P'); 395 | this.popupDivHeadingParagraph.classList.add(CLASS_CHAIN_POPUP_CHAIN_NAME); 396 | this.popupDivHeading = document.createElement('A'); 397 | this.popupDivHeadingParagraph.appendChild(this.popupDivHeading); 398 | this.popupDivHeading.textContent = this._name; 399 | var that = this; 400 | this.popupDivHeading.onclick = function(e) { 401 | if (e.ctrlKey) { 402 | that.firstLink.select(); 403 | } else { 404 | if (that.popupLinkDiv.style.display == 'none') { 405 | that.popupLinkDiv.style.display = 'block'; 406 | } else { 407 | that.popupLinkDiv.style.display = 'none'; 408 | } 409 | } 410 | }; 411 | this.popupDiv.appendChild(this.popupDivHeadingParagraph); 412 | this.popupLinkDiv = document.createElement('DIV'); 413 | this.popupLinkDiv.classList.add(CLASS_CHAIN_POPUP_LINK_DIV); 414 | this.popupLinkDiv.style.display = 'block'; 415 | this.popupDiv.appendChild(this.popupLinkDiv); 416 | // set the color 417 | this.color = ColorManager.getDefaultColor(); 418 | } 419 | 420 | /* note: chains are sorted every time a link is added, so no need to sort 421 | * them here. 422 | */ 423 | get firstLink() { 424 | return this.links[0]; 425 | } 426 | 427 | get isTrueChain() { 428 | return (this.links.length >= gText.minLinks) 429 | || (name.indexOf('_') == 0); 430 | } 431 | 432 | get name() { 433 | return this._name; 434 | } 435 | 436 | // assume the name is valid: use checkName() 437 | set name(val) { 438 | if (val == this._name) { 439 | return; 440 | } 441 | this._name = val; 442 | this.popupDivHeading.textContent = this._name; 443 | for (var link of this.links) { 444 | link.setChain(this); 445 | } 446 | this.redraw(); 447 | } 448 | 449 | get color() { 450 | return this._color; 451 | } 452 | 453 | // assume the color (an object) is valid (ie. not used by another chain) 454 | set color(val) { 455 | this._color = val; 456 | this.popupDiv.style.color = this._color.invertedString; 457 | this.popupDiv.style.backgroundColor = this._color.string; 458 | for (var link of this.links) { 459 | link.setChain(this); 460 | } 461 | this.redraw(); 462 | } 463 | 464 | get count() { 465 | return this.links.length; 466 | } 467 | 468 | upgradeToTrueChain() { 469 | this.color = gText.colorManager.getNextAvailableColor(gText.chainColl.chains); 470 | } 471 | 472 | downgradeToNotTrueChain() { 473 | this.color = ColorManager.getDefaultColor(); 474 | } 475 | 476 | /* Before of the updating of ChainCollection.popupDiv, which is done here 477 | * and not when adding a chain to the collection, you should add only empty 478 | * chain to the collection, and (for this function) add only link to a 479 | * chain already in the collection. 480 | */ 481 | addLink(link) { 482 | if (!gText.chainColl.isThisChainInCollection(this)) { 483 | alert("DEBUG WARNING: before adding link to chain, you should " 484 | +"add the chain to the collection."); 485 | } 486 | var wasTrueChain = this.isTrueChain; 487 | this.links.push(link); 488 | link.setChain(this); 489 | if (!wasTrueChain && this.isTrueChain) { 490 | this.upgradeToTrueChain(); 491 | } else { 492 | this.redraw(); 493 | } 494 | Link.sortLinks(this.links); 495 | this.updatePopupLinkDiv(); 496 | gText.chainColl.sortChainsAndUpdatePopupDiv(); 497 | this.addDraggableEventsToTheChainHeading(); 498 | } 499 | 500 | // NOTE: this will not remove the chain from the chain collection if there 501 | // is no more link left! 502 | removeLink(link) { 503 | var wasTrueChain = this.isTrueChain; 504 | for (var i=0; i=0; i--) { 562 | if (!onlyVisible || this.links[i].isVisible) { 563 | return this.links[i]; 564 | } 565 | } 566 | } else { 567 | for (var i=index+1; i None: 70 | 71 | self.document_id = document_id 72 | self.sentence_id = sentence_id 73 | self.words = words 74 | self.pos_tags = pos_tags 75 | self.parse_tree = parse_tree 76 | self.predicate_lemmas = predicate_lemmas 77 | self.predicate_framenet_ids = predicate_framenet_ids 78 | self.word_senses = word_senses 79 | self.speakers = speakers 80 | self.named_entities = named_entities 81 | self.srl_frames = srl_frames 82 | self.coref_spans = coref_spans 83 | 84 | 85 | class Ontonotes: 86 | """ 87 | This `DatasetReader` is designed to read in the English OntoNotes v5.0 data 88 | in the format used by the CoNLL 2011/2012 shared tasks. In order to use this 89 | Reader, you must follow the instructions provided [here (v12 release):] 90 | (https://cemantix.org/data/ontonotes.html), which will allow you to download 91 | the CoNLL style annotations for the OntoNotes v5.0 release -- LDC2013T19.tgz 92 | obtained from LDC. 93 | 94 | Once you have run the scripts on the extracted data, you will have a folder 95 | structured as follows: 96 | 97 | ``` 98 | conll-formatted-ontonotes-5.0/ 99 | ── data 100 | ├── development 101 | └── data 102 | └── english 103 | └── annotations 104 | ├── bc 105 | ├── bn 106 | ├── mz 107 | ├── nw 108 | ├── pt 109 | ├── tc 110 | └── wb 111 | ├── test 112 | └── data 113 | └── english 114 | └── annotations 115 | ├── bc 116 | ├── bn 117 | ├── mz 118 | ├── nw 119 | ├── pt 120 | ├── tc 121 | └── wb 122 | └── train 123 | └── data 124 | └── english 125 | └── annotations 126 | ├── bc 127 | ├── bn 128 | ├── mz 129 | ├── nw 130 | ├── pt 131 | ├── tc 132 | └── wb 133 | ``` 134 | 135 | The file path provided to this class can then be any of the train, test or development 136 | directories(or the top level data directory, if you are not utilizing the splits). 137 | 138 | The data has the following format, ordered by column. 139 | 140 | 1. Document ID : `str` 141 | This is a variation on the document filename 142 | 2. Part number : `int` 143 | Some files are divided into multiple parts numbered as 000, 001, 002, ... etc. 144 | 3. Word number : `int` 145 | This is the word index of the word in that sentence. 146 | 4. Word : `str` 147 | This is the token as segmented/tokenized in the Treebank. Initially the `*_skel` file 148 | contain the placeholder [WORD] which gets replaced by the actual token from the 149 | Treebank which is part of the OntoNotes release. 150 | 5. POS Tag : `str` 151 | This is the Penn Treebank style part of speech. When parse information is missing, 152 | all part of speeches except the one for which there is some sense or proposition 153 | annotation are marked with a XX tag. The verb is marked with just a VERB tag. 154 | 6. Parse bit : `str` 155 | This is the bracketed structure broken before the first open parenthesis in the parse, 156 | and the word/part-of-speech leaf replaced with a `*`. When the parse information is 157 | missing, the first word of a sentence is tagged as `(TOP*` and the last word is tagged 158 | as `*)` and all intermediate words are tagged with a `*`. 159 | 7. Predicate lemma : `str` 160 | The predicate lemma is mentioned for the rows for which we have semantic role 161 | information or word sense information. All other rows are marked with a "-". 162 | 8. Predicate Frameset ID : `int` 163 | The PropBank frameset ID of the predicate in Column 7. 164 | 9. Word sense : `float` 165 | This is the word sense of the word in Column 3. 166 | 10. Speaker/Author : `str` 167 | This is the speaker or author name where available. Mostly in Broadcast Conversation 168 | and Web Log data. When not available the rows are marked with an "-". 169 | 11. Named Entities : `str` 170 | These columns identifies the spans representing various named entities. For documents 171 | which do not have named entity annotation, each line is represented with an `*`. 172 | 12. Predicate Arguments : `str` 173 | There is one column each of predicate argument structure information for the predicate 174 | mentioned in Column 7. If there are no predicates tagged in a sentence this is a 175 | single column with all rows marked with an `*`. 176 | -1. Co-reference : `str` 177 | Co-reference chain information encoded in a parenthesis structure. For documents that do 178 | not have co-reference annotations, each line is represented with a "-". 179 | """ 180 | 181 | def dataset_iterator(self, file_path: str) -> Iterator[OntonotesSentence]: 182 | """ 183 | An iterator over the entire dataset, yielding all sentences processed. 184 | """ 185 | for conll_file in self.dataset_path_iterator(file_path): 186 | yield from self.sentence_iterator(conll_file) 187 | 188 | @staticmethod 189 | def dataset_path_iterator(file_path: str) -> Iterator[str]: 190 | """ 191 | An iterator returning file_paths in a directory 192 | containing CONLL-formatted files. 193 | """ 194 | logger.info("Reading CONLL sentences from dataset files at: %s", file_path) 195 | for root, _, files in list(os.walk(file_path)): 196 | for data_file in files: 197 | # These are a relic of the dataset pre-processing. Every 198 | # file will be duplicated - one file called filename.gold_skel 199 | # and one generated from the preprocessing called filename.gold_conll. 200 | if not data_file.endswith("gold_conll"): 201 | continue 202 | 203 | yield os.path.join(root, data_file) 204 | 205 | def dataset_document_iterator(self, file_path: str) -> Iterator[List[OntonotesSentence]]: 206 | """ 207 | An iterator over CONLL formatted files which yields documents, regardless 208 | of the number of document annotations in a particular file. This is useful 209 | for conll data which has been preprocessed, such as the preprocessing which 210 | takes place for the 2012 CONLL Coreference Resolution task. 211 | """ 212 | with codecs.open(file_path, "r", encoding="utf8") as open_file: 213 | conll_rows = [] 214 | document: List[OntonotesSentence] = [] 215 | for line in open_file: 216 | line = line.strip() 217 | if line != "" and not line.startswith("#"): 218 | # Non-empty line. Collect the annotation. 219 | conll_rows.append(line) 220 | else: 221 | if conll_rows: 222 | document.append(self._conll_rows_to_sentence(conll_rows)) 223 | conll_rows = [] 224 | if line.startswith("#end document"): 225 | yield document 226 | document = [] 227 | if document: 228 | # Collect any stragglers or files which might not 229 | # have the '#end document' format for the end of the file. 230 | yield document 231 | 232 | def sentence_iterator(self, file_path: str) -> Iterator[OntonotesSentence]: 233 | """ 234 | An iterator over the sentences in an individual CONLL formatted file. 235 | """ 236 | for document in self.dataset_document_iterator(file_path): 237 | for sentence in document: 238 | yield sentence 239 | 240 | def _conll_rows_to_sentence(self, conll_rows: List[str]) -> OntonotesSentence: 241 | document_id: str = None 242 | sentence_id: int = None 243 | # The words in the sentence. 244 | sentence: List[str] = [] 245 | # The pos tags of the words in the sentence. 246 | pos_tags: List[str] = [] 247 | # the pieces of the parse tree. 248 | parse_pieces: List[str] = [] 249 | # The lemmatised form of the words in the sentence which 250 | # have SRL or word sense information. 251 | predicate_lemmas: List[str] = [] 252 | # The FrameNet ID of the predicate. 253 | predicate_framenet_ids: List[str] = [] 254 | # The sense of the word, if available. 255 | word_senses: List[float] = [] 256 | # The current speaker, if available. 257 | speakers: List[str] = [] 258 | 259 | verbal_predicates: List[str] = [] 260 | span_labels: List[List[str]] = [] 261 | current_span_labels: List[str] = [] 262 | 263 | # Cluster id -> List of (start_index, end_index) spans. 264 | clusters: DefaultDict[int, List[Tuple[int, int]]] = defaultdict(list) 265 | # Cluster id -> List of start_indices which are open for this id. 266 | coref_stacks: DefaultDict[int, List[int]] = defaultdict(list) 267 | 268 | for index, row in enumerate(conll_rows): 269 | conll_components = row.split() 270 | 271 | document_id = conll_components[0] 272 | sentence_id = int(conll_components[1]) 273 | word = conll_components[3] 274 | pos_tag = conll_components[4] 275 | parse_piece = conll_components[5] 276 | 277 | # Replace brackets in text and pos tags 278 | # with a different token for parse trees. 279 | if pos_tag != "XX" and word != "XX": 280 | if word == "(": 281 | parse_word = "-LRB-" 282 | elif word == ")": 283 | parse_word = "-RRB-" 284 | else: 285 | parse_word = word 286 | if pos_tag == "(": 287 | pos_tag = "-LRB-" 288 | if pos_tag == ")": 289 | pos_tag = "-RRB-" 290 | (left_brackets, right_hand_side) = ('_', '_') 291 | # only keep ')' if there are nested brackets with nothing in them. 292 | right_brackets = right_hand_side.count(")") * ")" 293 | parse_piece = f"{left_brackets} ({pos_tag} {parse_word}) {right_brackets}" 294 | else: 295 | # There are some bad annotations in the CONLL data. 296 | # They contain no information, so to make this explicit, 297 | # we just set the parse piece to be None which will result 298 | # in the overall parse tree being None. 299 | parse_piece = None 300 | 301 | lemmatised_word = conll_components[6] 302 | framenet_id = conll_components[7] 303 | word_sense = conll_components[8] 304 | speaker = conll_components[9] 305 | 306 | if not span_labels: 307 | # If this is the first word in the sentence, create 308 | # empty lists to collect the NER and SRL BIO labels. 309 | # We can't do this upfront, because we don't know how many 310 | # components we are collecting, as a sentence can have 311 | # variable numbers of SRL frames. 312 | span_labels = [[] for _ in conll_components[10:-1]] 313 | # Create variables representing the current label for each label 314 | # sequence we are collecting. 315 | current_span_labels = [None for _ in conll_components[10:-1]] 316 | 317 | self._process_span_annotations_for_word( 318 | conll_components[10:-1], span_labels, current_span_labels 319 | ) 320 | 321 | # If any annotation marks this word as a verb predicate, 322 | # we need to record its index. This also has the side effect 323 | # of ordering the verbal predicates by their location in the 324 | # sentence, automatically aligning them with the annotations. 325 | word_is_verbal_predicate = any("(V" in x for x in conll_components[11:-1]) 326 | if word_is_verbal_predicate: 327 | verbal_predicates.append(word) 328 | 329 | self._process_coref_span_annotations_for_word( 330 | conll_components[10], index, clusters, coref_stacks 331 | ) 332 | 333 | sentence.append(word) 334 | pos_tags.append(pos_tag) 335 | parse_pieces.append(parse_piece) 336 | predicate_lemmas.append(lemmatised_word if lemmatised_word != "_" else None) 337 | predicate_framenet_ids.append(framenet_id if framenet_id != "_" else None) 338 | word_senses.append(float(word_sense) if word_sense != "_" else None) 339 | speakers.append(speaker if speaker != "_" else None) 340 | 341 | named_entities = '_' 342 | srl_frames = 0 343 | 344 | 345 | parse_tree = None 346 | coref_span_tuples: Set[TypedSpan] = { 347 | (cluster_id, span) for cluster_id, span_list in clusters.items() for span in span_list 348 | } 349 | return OntonotesSentence( 350 | document_id, 351 | sentence_id, 352 | sentence, 353 | pos_tags, 354 | parse_tree, 355 | predicate_lemmas, 356 | predicate_framenet_ids, 357 | word_senses, 358 | speakers, 359 | named_entities, 360 | srl_frames, 361 | coref_span_tuples, 362 | ) 363 | 364 | @staticmethod 365 | def _process_coref_span_annotations_for_word( 366 | label: str, 367 | word_index: int, 368 | clusters: DefaultDict[int, List[Tuple[int, int]]], 369 | coref_stacks: DefaultDict[int, List[int]], 370 | ) -> None: 371 | """ 372 | For a given coref label, add it to a currently open span(s), complete a span(s) or 373 | ignore it, if it is outside of all spans. This method mutates the clusters and coref_stacks 374 | dictionaries. 375 | 376 | # Parameters 377 | 378 | label : `str` 379 | The coref label for this word. 380 | word_index : `int` 381 | The word index into the sentence. 382 | clusters : `DefaultDict[int, List[Tuple[int, int]]]` 383 | A dictionary mapping cluster ids to lists of inclusive spans into the 384 | sentence. 385 | coref_stacks : `DefaultDict[int, List[int]]` 386 | Stacks for each cluster id to hold the start indices of active spans (spans 387 | which we are inside of when processing a given word). Spans with the same id 388 | can be nested, which is why we collect these opening spans on a stack, e.g: 389 | 390 | [Greg, the baker who referred to [himself]_ID1 as 'the bread man']_ID1 391 | """ 392 | if label != "-": 393 | for segment in label.split("|"): 394 | # The conll representation of coref spans allows spans to 395 | # overlap. If spans end or begin at the same word, they are 396 | # separated by a "|". 397 | if segment[0] == "(": 398 | # The span begins at this word. 399 | if segment[-1] == ")": 400 | # The span begins and ends at this word (single word span). 401 | cluster_id = int(segment[1:-1]) 402 | clusters[cluster_id].append((word_index, word_index)) 403 | else: 404 | # The span is starting, so we record the index of the word. 405 | cluster_id = int(segment[1:]) 406 | coref_stacks[cluster_id].append(word_index) 407 | else: 408 | # The span for this id is ending, but didn't start at this word. 409 | # Retrieve the start index from the document state and 410 | # add the span to the clusters for this id. 411 | cluster_id = int(segment[:-1]) 412 | if coref_stacks[cluster_id]: 413 | start = coref_stacks[cluster_id].pop() 414 | clusters[cluster_id].append((start, word_index)) 415 | 416 | @staticmethod 417 | def _process_span_annotations_for_word( 418 | annotations: List[str], 419 | span_labels: List[List[str]], 420 | current_span_labels: List[Optional[str]], 421 | ) -> None: 422 | """ 423 | Given a sequence of different label types for a single word and the current 424 | span label we are inside, compute the BIO tag for each label and append to a list. 425 | 426 | # Parameters 427 | 428 | annotations : `List[str]` 429 | A list of labels to compute BIO tags for. 430 | span_labels : `List[List[str]]` 431 | A list of lists, one for each annotation, to incrementally collect 432 | the BIO tags for a sequence. 433 | current_span_labels : `List[Optional[str]]` 434 | The currently open span per annotation type, or `None` if there is no open span. 435 | """ 436 | for annotation_index, annotation in enumerate(annotations): 437 | # strip all bracketing information to 438 | # get the actual propbank label. 439 | label = annotation.strip("()*") 440 | 441 | if "(" in annotation: 442 | # Entering into a span for a particular semantic role label. 443 | # We append the label and set the current span for this annotation. 444 | bio_label = "B-" + label 445 | span_labels[annotation_index].append(bio_label) 446 | current_span_labels[annotation_index] = label 447 | elif current_span_labels[annotation_index] is not None: 448 | # If there's no '(' token, but the current_span_label is not None, 449 | # then we are inside a span. 450 | bio_label = "I-" + current_span_labels[annotation_index] 451 | span_labels[annotation_index].append(bio_label) 452 | else: 453 | # We're outside a span. 454 | span_labels[annotation_index].append("O") 455 | # Exiting a span, so we reset the current span label for this annotation. 456 | if ")" in annotation: 457 | current_span_labels[annotation_index] = None 458 | -------------------------------------------------------------------------------- /js/cls_property.js: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain 4 | * annotation tool. 5 | * 6 | * Copyright 2017 Bruno Oberlé. 7 | * 8 | * This Source Code Form is subject to the terms of the Mozilla Public License, 9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can 10 | * obtain one at http://mozilla.org/MPL/2.0/. 11 | * 12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public 13 | * License, v. 2.0 for more details. 14 | * 15 | * Some questions about the license may have been answered at 16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/. 17 | * 18 | * If you have any question, contact me at boberle.com. 19 | * 20 | * The source code can be found at boberle.com. 21 | * 22 | */ 23 | 24 | EDIT_NORMAL = 0; 25 | EDIT_TAB = 1; 26 | EDIT_AUTO = 2; 27 | 28 | class Schema { 29 | 30 | constructor(code) { 31 | this.isEmpty = true; 32 | this._rawProperties = {}; // name:{dic found in the code} 33 | this._parse(code); 34 | this.listOfProperties = {}; // name:{type:"TYPE", values:[VALUES]} 35 | // (values is empty for textbox and head) 36 | this.editMode = EDIT_NORMAL; 37 | this.editInChain = false; 38 | this.editOnlyVisible = false; 39 | this._button = null; 40 | var that = this; 41 | this._editModeDialog = new EditModeDialog( 42 | function(editMode, inChain, onlyVisible) { 43 | that.editMode = editMode; 44 | that.editInChain = inChain; 45 | that.editOnlyVisible = onlyVisible; 46 | }); 47 | } 48 | 49 | _parse(text) { 50 | var tmp; 51 | var lines = text.split(/\n+/); 52 | var cur = null; 53 | for (var line of lines) { 54 | if ((line.match(/^\s*(?:#.*)?$/)) != null) { 55 | // nothing: it's a comment 56 | } else if (((tmp = line.match(/^PROP:/)) != null)) { 57 | if (cur) { 58 | this.addPrototypeProperty(cur); 59 | } 60 | var response = CommonFunctions.parseValues(line, 4); 61 | if (response.startIndex != line.length) { 62 | alert("Can't parse line: "+line+" (error when reading option values)"); 63 | } 64 | cur = response.dic; 65 | } else if (((tmp = line.match(/^\s*(.+)$/)) != null)) { 66 | if (cur) { 67 | if (!('values' in cur)) { 68 | cur.values = new Array(); 69 | } 70 | cur.values.push(tmp[1] == '$$$' ? '' : tmp[1]); 71 | } else { 72 | alert("Can't parse line: "+line); 73 | } 74 | } else { 75 | alert("Can't parse line: "+line); 76 | } 77 | } // for 78 | if (cur) { 79 | this.addPrototypeProperty(cur); 80 | } 81 | } 82 | 83 | addPrototypeProperty(dic) { 84 | if ('name' in dic) { 85 | this._rawProperties[dic['name']] = dic; 86 | this.isEmpty = false; 87 | //console.log(dic) 88 | } else { 89 | alert("error in the schema: a property doesn't have any name"); 90 | } 91 | } 92 | 93 | /* @param givenValues: A dictionary (keys are property names (like 94 | * `gramfunction'), values are property values (like `subject')). Give an 95 | * empty dictionary to get a default property list. 96 | */ 97 | buildLinkProperties(givenValues) { 98 | var properties = []; 99 | for (var name in this._rawProperties) { 100 | this.listOfProperties[name] = {}; 101 | this.listOfProperties[name]['type'] = 'normal'; // default 102 | this.listOfProperties[name]['values'] = {}; // default 103 | var setup = this._rawProperties[name]; 104 | var initialValue = ""; 105 | if (name in givenValues) { 106 | initialValue = givenValues[name]; 107 | } else if (gText.showPropertyWarnings 108 | && Object.keys(givenValues).length) { 109 | alert("Property `"+name+"' not found in the file."); 110 | } 111 | var prop = new LinkProperty(name, initialValue); 112 | if ('newline' in setup) 113 | prop.newLineAfter = (setup['newline'] == 'true'); 114 | if ('showname' in setup) 115 | prop.showName = (setup['showname'] == 'true'); 116 | if ('textboxsize' in setup) 117 | prop.textboxSize = setup['textboxsize']; 118 | if ('type' in setup) 119 | prop.type = setup['type']; 120 | this.listOfProperties[name]['type'] = prop.type; 121 | if ('values' in setup) 122 | prop.values = setup['values']; 123 | this.listOfProperties[name]['values'] = prop.values; 124 | if ('addShortcuts' in setup) 125 | prop.addShortcuts = setup['addShortcuts']; 126 | properties.push(prop); 127 | } 128 | if (gText.showPropertyWarnings) { 129 | for (var name in givenValues) { 130 | if (!(name in this._rawProperties)) { 131 | alert("Property `"+name+"' found in the file, but not in the schema."); 132 | } 133 | } 134 | } 135 | return new LinkProperties(properties); 136 | } 137 | 138 | get button() { 139 | if (!this._button) { 140 | this._button = document.createElement('INPUT'); 141 | this._button.type = 'BUTTON'; 142 | this._button.value = 'Edit Mode'; 143 | var that = this; 144 | this._button.onclick = function() { 145 | that._editModeDialog.show(); 146 | }; 147 | } 148 | return this._button; 149 | } 150 | 151 | } 152 | 153 | 154 | class LinkProperties { 155 | 156 | constructor(properties) { 157 | this.properties = properties; 158 | this.div = document.createElement('DIV'); 159 | for (var property of properties) { 160 | if (property.showName) { 161 | this.div.appendChild(document.createTextNode(property.name+": ")); 162 | } 163 | this.div.appendChild(property.element); 164 | } 165 | } 166 | 167 | resetHeadProperty(link) { 168 | for (var prop of this.properties) { 169 | if (prop.type == 'head') { 170 | prop.resetHead(link); 171 | } 172 | } 173 | } 174 | 175 | getString(includeHeadText, content) { 176 | var props = {}; 177 | for (var property of this.properties) { 178 | props[property.name] = property.value; 179 | } 180 | if (includeHeadText) { 181 | for (var property of this.properties) { 182 | if (property.type == 'head') { 183 | props['head_text'] = property.headText; 184 | } 185 | } 186 | } 187 | if (content) { 188 | props['content'] = content; 189 | } 190 | var keys = new Array(); 191 | for (var key in props) { 192 | keys.push(key); 193 | } 194 | keys.sort(); 195 | var strings = new Array(); 196 | for (var key of keys) { 197 | //console.log(key); 198 | var esc = props[key].replace('"', """); 199 | strings.push(key + '="' + esc + '"'); 200 | } 201 | return strings.join(','); 202 | } 203 | 204 | copyPropertiesFrom(properties) { 205 | for (var i=0; i 0) { 522 | equalValue.remove(0); 523 | } 524 | for (var value of values) { 525 | var option = document.createElement('option'); 526 | option.value = value; 527 | option.text = value; 528 | equalValue.appendChild(option); 529 | } 530 | }; 531 | equalProperty.onchange(); 532 | // controls for matchPar 533 | var matchProperty = document.createElement('select'); 534 | matchPar.appendChild(matchProperty); 535 | for (var propName in schema.listOfProperties) { 536 | var option = document.createElement('option'); 537 | option.value = propName; 538 | option.text = propName; 539 | matchProperty.appendChild(option); 540 | } 541 | var matchOperator = document.createElement('select'); 542 | matchPar.appendChild(matchOperator); 543 | for (var op of ['matches', 'does not match']) { 544 | var option = document.createElement('option'); 545 | option.value = op; 546 | option.text = op; 547 | matchOperator.appendChild(option); 548 | } 549 | var matchValue = document.createElement('input'); 550 | matchValue.type = "text"; 551 | matchPar.appendChild(matchValue); 552 | // search button 553 | var buttonPar = document.createElement('P'); 554 | div.appendChild(buttonPar); 555 | var button = document.createElement('input'); 556 | button.type = "button"; 557 | button.value = "search"; 558 | buttonPar.appendChild(button); 559 | buttonPar.onclick = function(e) { 560 | var name, searchedValue, reversed; 561 | if (checkUseRegex.checked) { 562 | name = matchProperty.options.item(matchProperty.selectedIndex).value; 563 | op = matchOperator.options.item(matchOperator.selectedIndex).value; 564 | if (op == 'matches') { 565 | reversed = false; 566 | } else { 567 | reversed = true; 568 | } 569 | var value = matchValue.value; 570 | if (!value) { 571 | value = "^$"; 572 | } 573 | try { 574 | searchedValue = new RegExp(value); 575 | } catch(err) { 576 | searchedValue = null; 577 | alert("Invalid regular expression."); 578 | } 579 | } else { 580 | name = equalProperty.options.item(equalProperty.selectedIndex).value; 581 | op = equalOperator.options.item(equalOperator.selectedIndex).value; 582 | if (op == 'is equal to') { 583 | reversed = false; 584 | } else { 585 | reversed = true; 586 | } 587 | searchedValue = 588 | equalValue.options.item(equalValue.selectedIndex).value; 589 | } 590 | if (searchedValue !== null) { // see the try/catch above 591 | that.callback(name, searchedValue, reversed); // if searchedValue 592 | // is a string, it will be used with an `equal to' function, 593 | // otherwise with a regex function 594 | that.modalDiv.close(); 595 | } 596 | }; 597 | div.style['overflow-y'] = "scroll"; 598 | } 599 | 600 | show() { 601 | this.modalDiv.show(); 602 | } 603 | 604 | } 605 | 606 | 607 | class EditModeDialog { 608 | 609 | constructor(callback) { 610 | this.callback = callback; 611 | var div = document.createElement("div"); 612 | div.style.padding = "20px"; 613 | this.modalDiv = new ModalDiv("Edit mode", div); 614 | var that = this; 615 | var par = document.createElement('P'); 616 | div.appendChild(par); 617 | par.appendChild(document.createTextNode("Choose the edit mode: ")); 618 | // combo 619 | var comboEditMode = document.createElement('select'); 620 | par.appendChild(comboEditMode); 621 | var modes = ["normal: html elements behave normally", 622 | "tab-mode: use tab on a property control to go to the next link", 623 | "auto-mode: setting a property automatically put you on the next link"]; 624 | var first = true; 625 | for (var i=0; iFirefox, " 95 | + "или хотя бы " 96 | + "Chromium " 97 | + "или Google Chrome!"; 98 | //p.style.color = 'red'; 99 | p.style.fontStyle = 'italic'; 100 | p = document.createElement('P'); 101 | div.appendChild(p); 102 | p.innerHTML = "Вы можете ознакомиться с руководством пользователя " 103 | + "здесь" 104 | + ", и видеотуториалами (на французском) " 105 | + "здесь" 106 | + "."; 107 | p.style.fontStyle = 'italic'; 108 | div.appendChild(document.createElement('HR')); 109 | // text 110 | var textareaText = document.createElement('TEXTAREA'); 111 | textareaText.cols = 90; 112 | textareaText.rows = 20; 113 | p = document.createElement('P'); 114 | div.appendChild(p); 115 | p.innerHTML = "Впишите или вставьте текст в поле ввода ниже, либо используйте одну из представленных опций:"; 116 | ul = document.createElement('UL'); 117 | p.appendChild(ul); 118 | li = document.createElement('LI'); 119 | ul.appendChild(li); 120 | t = document.createTextNode("Загрузите файл: "); 121 | li.appendChild(t); 122 | var inputText = document.createElement('INPUT'); 123 | li.appendChild(inputText); 124 | inputText.type = 'file'; 125 | inputText.onchange = function(){ 126 | var reader = new FileReader(); 127 | reader.onload = function(e) { 128 | textareaText.value = e.target.result; 129 | }; 130 | reader.readAsText(this.files[0]); 131 | that.textFilename = this.files[0].name; 132 | }; 133 | li = document.createElement('LI'); 134 | ul.appendChild(li); 135 | t = document.createTextNode("Используйте текст басни Эзопа (фр. яз.): "); 136 | li.appendChild(t); 137 | input = document.createElement('INPUT'); 138 | li.appendChild(input); 139 | input.type = 'button'; 140 | input.value = "with annotations"; 141 | input.onclick = function() { 142 | textareaText.value = DataLoader.getSampleTextWithAnnotations(); 143 | } 144 | if (go == 'withAnnotations') { 145 | textareaText.value = DataLoader.getSampleTextWithAnnotations(); 146 | } 147 | t = document.createTextNode(" или "); 148 | li.appendChild(t); 149 | input = document.createElement('INPUT'); 150 | li.appendChild(input); 151 | input.type = 'button'; 152 | input.value = "without annotation"; 153 | input.onclick = function() { 154 | textareaText.value = DataLoader.getSampleTextWithoutAnnotations(); 155 | } 156 | if (go == 'withoutAnnotations') { 157 | textareaText.value = DataLoader.getSampleTextWithoutAnnotations(); 158 | } 159 | div.appendChild(textareaText); 160 | // properties 161 | var textareaProperties = document.createElement('TEXTAREA'); 162 | textareaProperties.cols = 90; 163 | textareaProperties.rows = 20; 164 | p = document.createElement('P'); 165 | div.appendChild(p); 166 | p.innerHTML = "Впишите или вставьте параметры в поле ввода ниже, либо используйте одну из представленных опций:"; 167 | ul = document.createElement('UL'); 168 | p.appendChild(ul); 169 | li = document.createElement('LI'); 170 | ul.appendChild(li); 171 | t = document.createTextNode("Загрузите файл: "); 172 | li.appendChild(t); 173 | var inputSchema = document.createElement('INPUT'); 174 | li.appendChild(inputSchema); 175 | inputSchema.type = 'file'; 176 | inputSchema.onchange = function(){ 177 | var reader = new FileReader(); 178 | reader.onload = function(e) { 179 | textareaProperties.value = e.target.result; 180 | }; 181 | reader.readAsText(this.files[0]); 182 | }; 183 | li = document.createElement('LI'); 184 | ul.appendChild(li); 185 | t = document.createTextNode("Используйте "); 186 | li.appendChild(t); 187 | input = document.createElement('INPUT'); 188 | li.appendChild(input); 189 | input.type = 'button'; 190 | input.value = "схему по умолчанию (фр. яз.)"; 191 | input.onclick = function() { 192 | textareaProperties.value = DataLoader.getSampleSchema(); 193 | } 194 | if (go == 'withAnnotations') { 195 | textareaProperties.value = DataLoader.getSampleSchema(); 196 | } 197 | div.appendChild(textareaProperties); 198 | // number of link 199 | p = document.createElement('P'); 200 | div.appendChild(p); 201 | p.innerHTML = "Введите минимальное количество связей в кореферентной цепи:"; 202 | input = document.createElement('INPUT'); 203 | p.appendChild(input); 204 | input.type = 'number'; 205 | input.min = '1'; 206 | input.max = '50'; 207 | input.value = '2'; 208 | input.style.width = '70px'; // size attribute doesn't work for `number' 209 | input.onchange = function() { 210 | that.minLinks = this.value; 211 | }; 212 | // number of colors 213 | p = document.createElement('P'); 214 | div.appendChild(p); 215 | p.appendChild(document.createTextNode("Вы можете настроить количество цветов, если вам нужно больше цветов: ")); 216 | var colorSpan = document.createElement('SPAN'); 217 | p.appendChild(colorSpan); 218 | var ul = document.createElement('UL'); 219 | div.appendChild(ul); 220 | var li = document.createElement('LI'); 221 | ul.appendChild(li) 222 | // hue 223 | li.appendChild(document.createTextNode("Оттенок: ")); 224 | input = document.createElement('INPUT'); 225 | li.appendChild(input); 226 | input.type = 'number'; 227 | input.min = '10'; 228 | input.max = '50'; 229 | input.value = this.hueStep; 230 | input.style.width = '70px'; // size attribute doesn't work for `number' 231 | input.onchange = function() { 232 | that.hueStep = parseInt(this.value); 233 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)"; 234 | }; 235 | // saturation 236 | li.appendChild(document.createTextNode(" Насыщенность: ")); 237 | input = document.createElement('INPUT'); 238 | li.appendChild(input); 239 | input.type = 'number'; 240 | input.min = '10'; 241 | input.max = '50'; 242 | input.value = this.saturationStep; 243 | input.style.width = '70px'; // size attribute doesn't work for `number' 244 | input.onchange = function() { 245 | that.saturationStep = parseInt(this.value); 246 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)"; 247 | }; 248 | // lightness 249 | li.appendChild(document.createTextNode(" Яркость: ")); 250 | input = document.createElement('INPUT'); 251 | li.appendChild(input); 252 | input.type = 'number'; 253 | input.min = '5'; 254 | input.max = '25'; 255 | input.value = this.ligthnessStep; 256 | input.style.width = '70px'; // size attribute doesn't work for `number' 257 | input.onchange = function() { 258 | that.ligthnessStep = parseInt(this.value); 259 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)"; 260 | }; 261 | input.onchange(); 262 | // tokenization type 263 | p = document.createElement('P'); 264 | div.appendChild(p); 265 | p.innerHTML = "Выберите тип токенизации: "; 266 | var select = document.createElement('SELECT'); 267 | p.appendChild(select); 268 | for (var type of new Array('word', 'word and punctuation', 'character')) { 269 | var option = document.createElement('OPTION'); 270 | option.text = type; 271 | select.appendChild(option); 272 | } 273 | select.selectedIndex = 0; // default 274 | this.tokenizationType = TOKENIZATION_WORD; // default 275 | select.onchange = function() { 276 | if (this.selectedIndex == 0) { 277 | that.tokenizationType = TOKENIZATION_WORD; 278 | } else if (this.selectedIndex == 1) { 279 | that.tokenizationType = TOKENIZATION_WORD_N_PUNCT; 280 | } else { 281 | that.tokenizationType = TOKENIZATION_CHARACTER; 282 | } 283 | } 284 | // show property warnings 285 | p = document.createElement('P'); 286 | div.appendChild(p); 287 | p.innerHTML = "Показать предупреждения: "; 288 | var checkPropertyWarnings = document.createElement('INPUT'); 289 | checkPropertyWarnings.type = 'CHECKBOX'; 290 | checkPropertyWarnings.checked = true; 291 | p.appendChild(checkPropertyWarnings); 292 | // parse the data 293 | p = document.createElement('P'); 294 | div.appendChild(p); 295 | p.innerHTML = "Затем нажмите кнопку, чтобы "; 296 | input = document.createElement('INPUT'); 297 | p.appendChild(input); 298 | input.type = 'button'; 299 | input.value = "распарсить документ"; 300 | input.onclick = function() { 301 | if (!textareaText.value) { 302 | alert("No text!"); 303 | } else { 304 | // check if there is a tokenization type defined in the metadata 305 | // of the texte 306 | var tmp; 307 | if ((tmp = textareaText.value.match(/^\s*#\s*TOKENIZATION-TYPE\s*:\s*(\d)/mi)) != null) { 308 | if (parseInt(tmp[1]) != that.tokenizationType) { 309 | alert("Tokenization type of the text doesn't match the value of the list box!"); 310 | return; 311 | } 312 | } 313 | that.schema = textareaProperties.value; 314 | that.text = textareaText.value; 315 | that.showPropertyWarnings = checkPropertyWarnings.checked; 316 | that.callback(that); 317 | that.modalDiv.close(); 318 | } 319 | }; 320 | this.parseButton = input; 321 | // license 322 | p = document.createElement('P'); 323 | div.appendChild(p); 324 | p.innerHTML = "SACR -- (C) 2017 Bruno Oberlé. This program " 325 | +"is distributed under the terms of the Mozilla Public License, v.2.0. " 326 | +"This program comes with ABSOLUTELY NO WARRANTY, see the license for more details. " 327 | +"Source code may be found at boberle.com."; 328 | } 329 | 330 | computeNbOfColors() { 331 | return ColorBuilder.computeNbOfColors(this.hueStep, this.saturationStep, 332 | this.ligthnessStep); 333 | } 334 | 335 | clickOnTheParseButton() { 336 | this.parseButton.click(); 337 | } 338 | 339 | } 340 | 341 | 342 | --------------------------------------------------------------------------------