├── .gitignore
├── example.png
├── requirements.txt
├── js
├── cls_modal_div.js
├── cls_chain_popup.js
├── cls_scrolling.js
├── cls_common.js
├── cls_exporter.js
├── cls_colors.js
├── cls_sacr_parser.js
├── cls_link.js
├── cls_text.js
├── cls_chain.js
├── cls_property.js
└── cls_data_loader.js
├── autoannotations.py
├── README.md
├── style.css
├── index.html
└── ontonotes.py
/.gitignore:
--------------------------------------------------------------------------------
1 | private_mistakes/
2 | model2.tar.gz
3 | out.txt
4 | temp.txt
--------------------------------------------------------------------------------
/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gleb-skobinsky/RuCoref-inference/HEAD/example.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | allennlp==2.2.0
2 | allennlp-models==2.2.0
3 | pytorch-transformers==1.1.0
4 |
5 | torch==1.8.1 #also compatible: torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
6 |
7 | transformers==4.4.2
8 | click==7.0
--------------------------------------------------------------------------------
/js/cls_modal_div.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | class ModalDiv {
25 |
26 | constructor(title, contentDiv, hideCancel) {
27 | this.title = title;
28 | this.div = null;
29 | this.contentDiv = contentDiv;
30 | this.hideCancel = hideCancel;
31 | this.isCancelled = false;
32 | this.hasBeenShown = false;
33 | }
34 |
35 | show() {
36 | if (this.hasBeenShown) {
37 | document.body.appendChild(this.div);
38 | } else {
39 | // creating the elements
40 | var heading = document.createElement("h1");
41 | heading.style.margin = "10px";
42 | heading.appendChild(document.createTextNode(this.title));
43 | this.div = document.createElement("DIV");
44 | this.div.style['overflow-y'] = "scroll";
45 | this.div.style.backgroundColor = "white";
46 | this.div.style.position = "fixed";
47 | this.div.style.top = "0px";
48 | this.div.style.left = "0px";
49 | this.div.style.height = "100%";
50 | this.div.style.width = "100%";
51 | var cancelButton = document.createElement("input");
52 | cancelButton.type = "button";
53 | cancelButton.value = "Cancel";
54 | cancelButton.style.position = "absolute";
55 | cancelButton.style.right = "20px";
56 | cancelButton.style.top = "20px";
57 | var that = this;
58 | cancelButton.onclick = function() {
59 | that.isCancelled = true;
60 | that.close();
61 | }
62 | this.div.appendChild(heading);
63 | if (!this.hideCancel) this.div.appendChild(cancelButton);
64 | this.div.appendChild(this.contentDiv);
65 | document.body.appendChild(this.div);
66 | this.hasBeenShown = true;
67 | // following line must be after appending to body
68 | //this.contentDiv.style.height = (this.div.clientHeight -
69 | // heading.clientHeight)+"px";
70 | }
71 | }
72 |
73 | close() {
74 | document.body.removeChild(this.div);
75 | }
76 |
77 | }
78 |
79 |
--------------------------------------------------------------------------------
/autoannotations.py:
--------------------------------------------------------------------------------
1 | import nltk.tokenize as tk
2 | from nltk.tokenize import wordpunct_tokenize
3 | import subprocess
4 | import jsonlines
5 | import sys
6 | import os
7 | import argparse
8 | import subprocess
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("--model", help="Path to the coref model")
12 | args = parser.parse_args()
13 | MODEL = args.model
14 | TEMP_FILE = r'temp.txt'
15 | OUT_FILE = r'out.txt'
16 |
17 | text = input("Введите текст: ")
18 | tempstring = ''
19 | sentences = tk.sent_tokenize(text, language='russian')
20 |
21 | templist = []
22 | for sentence in sentences:
23 | inner_sent = wordpunct_tokenize(sentence)
24 | templist.append(inner_sent)
25 |
26 | token_index = 0
27 | for tokenized_sent in templist:
28 | tempstring += '\n'
29 | token_index = 0
30 | for token in tokenized_sent:
31 | tempstring += 'book0\t0\t' + str(token_index) + '\t' + token + '\t' + '_\t_\t_\t_\t_\t_\t-\n'
32 | token_index += 1
33 | tempstring = '#begin document (book0); part 0' + tempstring + '\n' + '#end document'
34 |
35 | with open(TEMP_FILE, 'w', encoding='utf-8') as tempfile:
36 | tempfile.write(tempstring)
37 | tempfile.close()
38 |
39 | print('Loading and building the model...')
40 | p = subprocess.run(["allennlp", "evaluate", f"{MODEL}", f"{TEMP_FILE}", "--predictions-output-file", f"{OUT_FILE}"], capture_output=True)
41 |
42 | def get_mention(begin, text_list, all_cluster_tokens):
43 | for id, cluster in enumerate(all_cluster_tokens[0]):
44 | for i, token_set in enumerate(cluster):
45 | if begin == token_set[0]:
46 | mention_string = ''
47 | indexer = id
48 | if token_set[0]==token_set[-1]: #this means that the mention contains only one word
49 | mention_string += text_list[token_set[0]] + ' '
50 | else:
51 | token_set = [i for i in range(token_set[0], token_set[-1]+1)]
52 | for offset in token_set:
53 | mention_string += text_list[offset] + ' '
54 | modifier = '{' + cluster_uids[indexer] + ': ' + mention_string + '}'
55 | return modifier
56 |
57 | cluster_offsets = []
58 | with jsonlines.open(OUT_FILE) as predicted_file:
59 | for line in predicted_file.iter():
60 | cluster_offsets.append(line['clusters'][0])
61 |
62 | all_indices = []
63 | for cluster_unit in cluster_offsets:
64 | for cluster in cluster_unit:
65 | for mention in cluster:
66 | for offset in range(mention[0], mention[1]+1):
67 | all_indices.append(offset)
68 |
69 | number = len(cluster_offsets[0])
70 | cluster_uids = [f'M{i+1}' for i in range(number+1)]
71 |
72 | beginnings = []
73 | for cluster in cluster_offsets[0]:
74 | local_beginnings = [i[0] for i in cluster]
75 | beginnings += local_beginnings
76 |
77 | text_list = wordpunct_tokenize(text)
78 | for idx, token in enumerate(text_list):
79 | if idx not in all_indices:
80 | print(token, end=' ')
81 | else:
82 | if idx in beginnings:
83 | mention = get_mention(idx, text_list, cluster_offsets)
84 | print(mention, end = ' ')
85 | else:
86 | pass
87 |
88 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Очень простой инференс разрешения кореферентности для русского языка
2 |
3 | ## 1. Установите зависимости
4 |
5 | Создайте виртуальнукю среду python 3.7, активируйте ее и установите зависимости (`pip install -r requirements.txt`).
6 | Модель обучена и инференсится при помощи пакета `allennlp==2.2.0`, поэтому проверьте его нормальное функицонирование: `allennlp evaluate --help`.
7 |
8 | ## 2. Замените датаридер
9 |
10 | Чтобы модель читала вводимые данные правильно, замените файл в dist-packages виртуальной среды с allennlp_models/common/ontonotes.py на ontonotes.py, который лежит в корне этого проекта.
11 |
12 | ## 3. Скачайте и запустите модель
13 |
14 | Скачайте веса модели отсюда: https://disk.yandex.ru/d/0TKZcXkaBCbq3Q
15 |
16 | И запустите инференс командой: `python autoannotations.py --model [ПУТЬ К МОДЕЛИ]`.
17 | Например: `python autoannotations.py --model model2.tar.gz`.
18 | Команда предложит ввести текст и выделит в нем кореферентные цепочки, например:
19 | `Джобс и Возняк сумели разработать первый по-настоящему персональный компьютер Apple I, который на тот момент выглядел как деревянная шкатулка, и поставлялся без монитора. Частота процессора компьютера достигала 1 МГц, а размер оперативной памяти составлял 4 килобайта. Apple I сразу продался партией в 50 машин и дал повод для разработки продолжения. В апреле 1977 года Джобс и Возняк провели официальную презентацию их следующего компьютера Apple II. В 70-х он стал самым массовым и удачно продаваемым персональным компьютером, с более чем 5 миллионами проданных копий по всему миру. На тот момент компьютер предлагал покупателям интегрированную клавиатуру, цветную графику, звук, пластиковый корпус, и два слота для дискет.`
20 | Вывод модели:
21 | `{M2: Джобс } и {M3: Возняк } сумели разработать {M1: первый по - настоящему персональный компьютер Apple I } , который на тот момент выглядел как деревянная шкатулка , и поставлялся без монитора . Частота процессора {M1: компьютера } достигала 1 МГц , а размер оперативной памяти составлял 4 килобайта . {M1: Apple I } сразу продался партией в 50 машин и дал повод для разработки продолжения . В апреле 1977 года {M2: Джобс } и {M3: Возняк } провели официальную презентацию {M4: их следующего компьютера Apple II } . В 70 - х {M4: он } стал самым массовым и удачно продаваемым персональным компьютером , с более чем 5 миллионами проданных копий по всему миру . На тот момент {M4: компьютер } предлагал покупателям интегрированную клавиатуру , цветную графику , звук , пластиковый корпус , и два слота для дискет .`
22 | Комментарии: При первом запуске transformers скачает и распакует RuBERT от Deeppavlov в кэш. Это может занять некоторое время, но привычный прогресс-бар скачивания не отобразится. Обратите внимание, что скрипт выше тестировался на Windows, на Linux что-то может пойти не так. Чем длиннее документ, тем больше размерность тензора, которую модель попытается посчитать, поэтому лучше не загружать длинные документы, если объем ОЗУ на устройсте меньше 10 ГБ.
23 |
24 | ## 4. Визуализируйте результат
25 |
26 | Цепочки можно визулизировать и редактировать, открыв index.html в браузере и вставив полученный выше текст с разметкой в текстовое поле. Не забудьте нажать на кнопку "распарсить документ". Получится вот так:
27 |
28 | 
29 |
30 | ## Благодарности
31 |
32 | Красивый редактор из последнего пункта - от Bruno Oberle (https://github.com/boberle/sacr), слегка измененный и переведенный на русский.
33 |
34 | ## TODO:
35 |
36 | Планирую добавить поддержку вложенных упоминаний в разметке командной строки.
37 |
38 |
39 |
--------------------------------------------------------------------------------
/style.css:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 |
25 |
26 | /* NOTE: the 'and' selector in CSS is .class1.class2 (WITHOUT space) */
27 |
28 | /* div text */
29 |
30 | body {
31 | /*background: black;*/
32 | }
33 |
34 | div#divText {
35 | /*padding-bottom: 800px;*/
36 | }
37 |
38 | div#divText p {
39 | font-size: 20pt;
40 | /*color: antiquewhite;*/
41 | }
42 |
43 | div#divText p.paragraph {
44 | line-height: 2.75em;
45 | padding: 30px;
46 | font-family: Gentium;
47 | }
48 |
49 | div#divText span.parNumber {
50 | /* */
51 | }
52 |
53 | div#divText p.comment {
54 | font-family: mono;
55 | font-size: 15pt;
56 | background-color: antiquewhite;
57 | padding: 10px;
58 | margin: 0px;
59 | }
60 |
61 | div#divText p.heading.level1 {
62 | font-weight: bold;
63 | font-size: 24pt;
64 | margin-left: 0pt;
65 | }
66 |
67 | div#divText p.heading.level2 {
68 | font-weight: bold;
69 | font-size: 22pt;
70 | margin-left: 20pt;
71 | }
72 |
73 | div#divText p.heading.level3 {
74 | font-weight: bold;
75 | font-size: 20pt;
76 | margin-left: 40pt;
77 | }
78 |
79 | div#divText p.heading.level4 {
80 | font-weight: bold;
81 | font-size: 18pt;
82 | margin-left: 40pt;
83 | }
84 |
85 | div#divText p.heading.level5 {
86 | font-weight: bold;
87 | font-size: 16pt;
88 | margin-left: 40pt;
89 | }
90 |
91 | div#divText p.heading.level6 {
92 | font-weight: bold;
93 | font-size: 14pt;
94 | margin-left: 40pt;
95 | }
96 |
97 |
98 |
99 | /* link */
100 |
101 | div#divText span.link {
102 | border: solid 2px black;
103 | }
104 |
105 | /* note: must be `span.link.hidden' and not only `span.hidden' for the
106 | * `padding' to work (but `span.hidden' is sufficient for `font-size' for
107 | * example...).
108 | */
109 | div#divText span.link.hidden {
110 | padding-left: 7px;
111 | }
112 |
113 | /* don't forget the `>', otherwise all nested links of a hidden link will be
114 | * hidden!
115 | */
116 | div#divText span.hidden>span.metadata {
117 | display: none;
118 | }
119 |
120 | div#divText span.link { padding: 11px; padding-left: 0px; }
121 | div#divText span>span.link { padding: 8px; padding-left: 0px; }
122 | div#divText span>span>span.link { padding: 5px; padding-left: 0px; }
123 | div#divText span>span>span>span.link { padding: 0px; padding-left: 0px; }
124 | div#divText span.link>span.metadata { padding: 11px; padding-bottom: 12px; padding-left: 0px; }
125 | div#divText span>span.link>span.metadata { padding: 8px; padding-bottom: 9px; padding-left: 0px; }
126 | div#divText span>span>span.link>span.metadata { padding: 5px; padding-bottom: 6px; padding-left: 0px; }
127 | div#divText span>span>span>span.link>span.metadata { padding: 0px; padding-bottom: 1px; padding-left: 0px; }
128 | div#divText span.metadata {
129 | margin-right: 3px;
130 | padding-right: 2px;
131 | font-family: mono;
132 | }
133 |
134 | div#divText span.link.selected {
135 | border-style: dotted;
136 | }
137 |
138 | /* referring expressions (token) */
139 |
140 | div#divText a.token {
141 | text-decoration: none;
142 | color: inherit;
143 | }
144 |
145 | div#divText a.token.selected {
146 | text-decoration: underline;
147 | }
148 |
149 | div#divLinkPropertyAnchor {
150 | background-color: #FEF0C9;
151 | display: none;
152 | position: fixed;
153 | bottom: 0px;
154 | left: 0px;
155 | /*height: 30%;*/
156 | width: 100%;
157 | padding: 10px;
158 | margin: 0px;
159 | }
160 |
161 | /* divWhiteSpaceAtTheEnd */
162 |
163 | div#divWhiteSpaceAtTheEnd {
164 | /*background-color: green;*/
165 | }
166 |
167 | /* misc */
168 |
169 | a {
170 | cursor: pointer;
171 | }
172 |
173 | /* chain popup */
174 |
175 | div#divChainPopup p {
176 | font-family: Gentium;
177 | font-size: 15pt;
178 | padding: 0px;
179 | margin: 0px;
180 | }
181 |
182 | div.chainPopupChainDiv {
183 | padding: 10px;
184 | margin: 10px;
185 | /*border: 1px solid black;*/
186 | }
187 |
188 | p.chainPopupChainName {
189 | font-weight: bold;
190 | }
191 |
192 | p.chainPopupChainName.selected a {
193 | border: 2px black solid;
194 | }
195 |
196 | div.chainPopupLinkDiv {
197 | padding: 0px;
198 | margin: 0px;
199 | margin-left: 30px;
200 | /*border: 1px solid black;*/
201 | }
202 |
203 | div.chainPopupLinkDiv a.selected {
204 | /*text-decoration: underline;*/
205 | background: black;
206 | color: white;
207 | }
208 |
209 |
--------------------------------------------------------------------------------
/js/cls_chain_popup.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | /* Issues with stylesheets. You can specify a
25 | * thing with JS, but you need a absolute path for href.
26 | * var link = this.win.document.createElement("LINK");
27 | * link.href = "file:///foo/bar/style.css";
28 | * link.type = "text/css";
29 | * link.rel = "stylesheet";
30 | * this.win.document.head.appendChild(link)
31 | * Otherwise, you need to use the document.stylesheets objects, which
32 | * list all the style sheets. But by default, there is none. To
33 | * create one, use:
34 | * var styleElement = win.document.createElement('style');
35 | * win.document.head.appendChild(styleElement);
36 | * Now, you have a stylesheet that you can get:
37 | * var styleSheet = win.document.styleSheets[0];
38 | * or
39 | * var styleSheet = styleElement.styleSheet
40 | * Then use can set the text:
41 | * styleSheet.cssText = "you text"
42 | * or
43 | * stylesheet.insertRule('p.linkParagraph { padding-left: 15px; }',
44 | * index);
45 | * Note that in the last example, you need to specify an index (the
46 | * last one if you want the rule to be inserted at the end of the
47 | * stylesheet: use something with nextIndex++).
48 | */
49 |
50 |
51 |
52 | class ChainPopup {
53 |
54 | constructor(chainDiv) {
55 | this.visible = false;
56 | this.chainDiv = chainDiv;
57 | // the window
58 | this.win = undefined;
59 | this.winWidth = '350';
60 | this.winHeight = '400';
61 | this.winTop = '100';
62 | this.winLeft = '100';
63 | // the elements
64 | this.h1 = document.createElement('H1');
65 | this.h1.appendChild(document.createTextNode('Chains and Links'));
66 | }
67 |
68 | show() {
69 | this.visible = true;
70 | gText.chainColl.sortChainsAndUpdatePopupDiv();
71 | // if the window is already open, just focus...
72 | if (this.win && !this.win.closed) {
73 | this.win.focus();
74 | } else {
75 | this.win = window.open("", "_blank", "status=0,width="
76 | +this.winWidth+",height="+this.winHeight+",top="+this.winTop
77 | +",left="+this.winLeft
78 | +",toolbar=0,menubar=0,resizable=1,scrollbars=1");
79 | if (!this.win) {
80 | alert("I can't create the popup!");
81 | return;
82 | }
83 | var that = this;
84 | this.win.onbeforeunload = function(e) {
85 | that.winWidth = that.win.outerWidth;
86 | that.winHeight = that.win.outerHeight;
87 | that.winLeft = that.win.screenX;
88 | that.winTop = that.win.screenY;
89 | that.visible = false;
90 | // next line would be necessary for Chromium, but not FF,
91 | // if we would keep the elements in the div, like in the
92 | // Display Options box. But, because here we remove all
93 | // the elements of the div and create new ones, it is not
94 | // necessary.
95 | //that.win.document.body.removeChild(that.win.document.body.childNodes[1]);
96 | return null;
97 | }
98 | // style sheet
99 | var link = this.win.document.createElement("LINK");
100 | link.href = document.styleSheets[0].href;
101 | link.type = "text/css";
102 | link.rel = "stylesheet";
103 | this.win.document.head.appendChild(link)
104 | // get elements
105 | this.win.document.title = "Chains and Links";
106 | this.win.document.body.appendChild(this.h1);
107 | this.win.document.body.appendChild(this.chainDiv);
108 | // shortcuts
109 | this.win.document.body.addEventListener('keydown', gKeyDownHandler);
110 | /*this.win.document.body.addEventListener('keydown', function (e) {
111 | var tagName = document.activeElement.tagName;
112 | //console.log(tagName);
113 | //console.log(e.keyCode);
114 | if (tagName != 'BODY') {
115 | return;
116 | }
117 | if (e.keyCode == 65) { // test
118 | //alert('foo');
119 | } else if (e.keyCode == 69) { // e = expand/collapse all
120 | if (e.shiftKey) {
121 | gText.chainColl.collapseAllChainsInPopup();
122 | } else {
123 | gText.chainColl.expandAllChainsInPopup();
124 | }
125 | } else if (e.keyCode == 72) { // h = help
126 | alert("c: collapse all\ne: expand all");
127 | }
128 | });*/
129 | } // else
130 | }
131 |
132 | };
133 |
134 |
--------------------------------------------------------------------------------
/js/cls_scrolling.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 |
25 | class Scrolling {
26 |
27 | /*
28 | * To scroll to a DOM object, use must use the function window.scroll(),
29 | * which is described here:
30 | * https://developer.mozilla.org/en-US/docs/Web/API/Window/scroll:
31 | * window.scroll(x-coord, y-coord)
32 | * example:
33 | *
35 | * Note that window.scrollTo() is the same method.
36 | *
37 | * But to find the number of pixels you want to scroll, you need to find
38 | * the vertical position of the object. For, that, use the function below,
39 | * found on
40 | * http://stackoverflow.com/questions/5007530/how-do-i-scroll-to-an-element-using-javascript#5007606
41 | * static findPosition(obj) {
42 | * var curTop = 0;
43 | * if (obj.offsetParent) {
44 | * do {
45 | * curTop += obj.offsetTop;
46 | * } while (obj = obj.offsetParent);
47 | * }
48 | * return curTop;
49 | * }
50 | *
51 | * So you just have to call: window.scroll(0, findPosition(yourObject))
52 | *
53 | /* return
54 | */
55 |
56 | /*
57 | * Returns the vertical position of an object, in pixels.
58 | */
59 | static findPosition(obj) {
60 | var curTop = 0;
61 | if (obj.offsetParent) {
62 | do {
63 | curTop += obj.offsetTop;
64 | } while (obj = obj.offsetParent);
65 | }
66 | return curTop;
67 | }
68 |
69 |
70 | /*
71 | * Scroll the window to show the object (a DOM object).
72 | * @param evenIfNotNeeded: scroll even if the object is already visible.
73 | */
74 | static scrollTo(obj, evenIfNotNeeded, win) {
75 | // scrollOffset is the minimum amount of space (in pixels) to leave at
76 | // the top of the window, so the obj is not directly at the margin.
77 | if (!win) {
78 | win = window;
79 | }
80 | var scrollOffset = win.innerHeight / 5;
81 | var pos = Scrolling.findPosition(obj) - scrollOffset;
82 | if (pos < 0) {
83 | pos = 0;
84 | }
85 | if (evenIfNotNeeded || !Scrolling.isVisible(obj, win)) {
86 | win.scroll(0, pos);
87 | }
88 | }
89 |
90 | /*
91 | * Returns the height of the given object.
92 | */
93 | static findHeight(obj) {
94 | var styleObject = getComputedStyle(obj);
95 | if (styleObject && styleObject.height) {
96 | var value = styleObject.height.match(/\d+/); // ex.: 40px
97 | if (value) {
98 | return parseInt(value[0]); // match returns an array
99 | }
100 | }
101 | return 0;
102 | }
103 |
104 | /*
105 | * Returns true if given position is visible on the screen.
106 | */
107 | static isVisible(obj, win) {
108 | if (!win) {
109 | win = window;
110 | }
111 | // the object is above the visible part of the screen
112 | var pos = Scrolling.findPosition(obj);
113 | if (pos < win.scrollY) {
114 | return false;
115 | }
116 | // the object (meaning: its bottom) is below the visible part of the
117 | // screen
118 | var height = Scrolling.findHeight(obj);
119 | // note that spans don't seem to have any computable height. We use one
120 | // sixth of the window as a rule of thumb
121 | if (!height) {
122 | height = win.innerHeight / 6;
123 | }
124 | var bottomPos = pos + height;
125 | var bottomLimit = win.scrollY + win.innerHeight
126 | - Scrolling.findHeight(gDivLinkPropertyAnchor);
127 | if (bottomPos > bottomLimit) {
128 | return false;
129 | }
130 | return true;
131 | }
132 |
133 |
134 | /*
135 | // need to scroll to the link only if it is not in the top half
136 | // part of the windows
137 | function isScrollNeeded(pos) {
138 | var minAllowedPos = window.scrollY;
139 | var maxAllowedPos;
140 | var divControlsHeight;
141 | if (divControlsHeight = gCommonFunctions.getDivControlsHeight()) {
142 | //console.log("using control panel height, which is: " + divControlsHeight);
143 | maxAllowedPos = minAllowedPos + window.innerHeight - divControlsHeight;
144 | } else {
145 | maxAllowedPos = minAllowedPos + (window.innerHeight/2);
146 | }
147 | //console.log("windows inner height is: "+window.innerHeight);
148 | //console.log("element position is: "+pos);
149 | //console.log("min allowed pos: "+ minAllowedPos);
150 | //console.log("max allowed pos: "+ maxAllowedPos);
151 | var estimatedLineHeight = 250;
152 | return (!(minAllowedPos <= pos && (pos + estimatedLineHeight) <= maxAllowedPos));
153 | }
154 | */
155 |
156 |
157 |
158 |
159 | }
160 |
--------------------------------------------------------------------------------
/js/cls_common.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | var diacriticsMap = new Array();
25 | diacriticsMap['à'] = 'a';
26 | diacriticsMap['é'] = 'e';
27 | diacriticsMap['ß'] = 'ss';
28 | diacriticsMap['à'] = 'a';
29 | diacriticsMap['á'] = 'a';
30 | diacriticsMap['â'] = 'a';
31 | diacriticsMap['ã'] = 'a';
32 | diacriticsMap['ä'] = 'a';
33 | diacriticsMap['å'] = 'a';
34 | diacriticsMap['æ'] = 'a';
35 | diacriticsMap['ç'] = 'c';
36 | diacriticsMap['è'] = 'e';
37 | diacriticsMap['é'] = 'e';
38 | diacriticsMap['ê'] = 'e';
39 | diacriticsMap['ë'] = 'e';
40 | diacriticsMap['ì'] = 'i';
41 | diacriticsMap['í'] = 'i';
42 | diacriticsMap['î'] = 'i';
43 | diacriticsMap['ï'] = 'i';
44 | diacriticsMap['ð'] = 'd';
45 | diacriticsMap['ñ'] = 'n';
46 | diacriticsMap['ò'] = 'o';
47 | diacriticsMap['ó'] = 'o';
48 | diacriticsMap['ô'] = 'o';
49 | diacriticsMap['õ'] = 'o';
50 | diacriticsMap['ö'] = 'o';
51 | diacriticsMap['ø'] = 'o';
52 | diacriticsMap['ù'] = 'u';
53 | diacriticsMap['ú'] = 'u';
54 | diacriticsMap['û'] = 'u';
55 | diacriticsMap['ü'] = 'u';
56 | diacriticsMap['ý'] = 'y';
57 | diacriticsMap['þ'] = 'f';
58 | diacriticsMap['ÿ'] = 'y';
59 | diacriticsMap['œ'] = 'oe';
60 |
61 |
62 | class CommonFunctions {
63 |
64 | static removeDiacritics(text) {
65 | // some ideas:: http://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
66 | text = text.replace(/[^-A-Za-z0-9]/g, function(a){ return a in diacriticsMap ? diacriticsMap[a] : ''});
67 | return text.replace(/-./g, function(a){ return a.substring(1).toUpperCase(); });
68 | }
69 |
70 | static offerNameForChain(words) {
71 | var result = '';
72 | if (words.length == 1) {
73 | result = CommonFunctions.removeDiacritics(words[0]);
74 | } else {
75 | var c = 0;
76 | for (var i=0; i0 && result == '') {
82 | result += CommonFunctions.removeDiacritics(text);
83 | c++;
84 | } else if (i>0) {
85 | text = CommonFunctions.removeDiacritics(text);
86 | result += text.substring(0, 1).toUpperCase()+text.substring(1);
87 | c++;
88 | }
89 | } // for
90 | } // if
91 | result = result.substring(0, 1).toUpperCase()+result.substring(1);
92 | return result;
93 | }
94 |
95 | /*
96 | * @param chainColl: used to check the name validity
97 | * @param askUser: boolean, if false, get a default name (M1, etc.)
98 | * @param defaultName: if evaluates to false, don't propose a name; if a
99 | * string, propose that string (e.g. the current name of the chain); if an
100 | * array (of strings), propose a default name based on the strings
101 | */
102 | static getChainName(chainColl, askUser, defaultName) {
103 | var name = undefined;
104 | if (askUser) {
105 | if (!defaultName) {
106 | defaultName = "";
107 | } else if (typeof(defaultName) == "string") {
108 | // nothing
109 | } else {
110 | defaultName = CommonFunctions.offerNameForChain(defaultName);
111 | }
112 | while (!name) {
113 | name = prompt("Enter a name:", defaultName);
114 | if (name == null) { // cancel
115 | return undefined;
116 | }
117 | if (!name || !chainColl.checkName(name)) {
118 | alert("Bad name!");
119 | name = undefined;
120 | }
121 | }
122 | } else {
123 | var count = 0;
124 | do {
125 | count++;
126 | name = "M" + count.toString();
127 | } while (!chainColl.checkName(name));
128 | }
129 | return name;
130 | }
131 |
132 |
133 | /*
134 | * @return: {startIndex:INT, values:{opt1: val1, etc.}}
135 | */
136 | static parseValues(text, startIndex) {
137 | var result = {};
138 | if (text.indexOf(':', startIndex) != startIndex) {
139 | return {startIndex:startIndex, dic:result};
140 | }
141 | var textLen = text.length;
142 | startIndex++;
143 | var tmp;
144 | while(startIndex < textLen) {
145 | if (((tmp = text.substring(startIndex).match(/^(\w+)=(\w+)/)) != null)
146 | || ((tmp = text.substring(startIndex).match(/^(\w+)="((?:\\"|[^"])*)"/)) != null)) {
147 | result[tmp[1]] = tmp[2];
148 | startIndex += tmp[0].length;
149 | if (text.substring(startIndex).match(/^[,;]/) != null) {
150 | startIndex++;
151 | } else {
152 | break;
153 | }
154 | } else {
155 | break;
156 | }
157 | } // while
158 | return {startIndex:startIndex, dic:result};
159 | };
160 |
161 | };
162 |
163 |
--------------------------------------------------------------------------------
/js/cls_exporter.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | class Exporter {
25 |
26 | /* FUNCTIONS TO EXPORT TO A FILE */
27 |
28 | /* see:
29 | * - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/btoa
30 | * - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_.22Unicode_Problem.22
31 | */
32 | static utf8_to_b64(str) {
33 | return window.btoa(unescape(encodeURIComponent(str)));
34 | }
35 |
36 | /* see:
37 | * source: http://stackoverflow.com/questions/5552540/save-as-text-file-javascript
38 | */
39 | static writeToFile(text, filename) {
40 | var anchor = document.createElement('a');
41 | //a.href = 'data:'+mimetype+';charset=UTF-8;base64,'+btoa(content);
42 | //data uri scheme
43 | anchor.href = 'data:text/plain;charset=UTF-8;base64,'+
44 | Exporter.utf8_to_b64(text);
45 | anchor.innerHTML = 'download';
46 | anchor.download = filename;
47 | document.body.appendChild(anchor); // this is necessary (not in the
48 | // source)
49 | anchor.click();
50 | document.body.removeChild(anchor);
51 | }
52 |
53 |
54 | /* FUNCTIONS FOR FILENAME */
55 |
56 | static datePadding(text) {
57 | if ((text+"").length == 1) {
58 | return "0"+text;
59 | } else {
60 | return text+"";
61 | }
62 | }
63 |
64 | static getDateString() {
65 | var d = new Date();
66 | var str = d.getFullYear()
67 | + Exporter.datePadding(d.getMonth()+1)
68 | + Exporter.datePadding(d.getDate())
69 | + "-"
70 | + Exporter.datePadding(d.getHours())
71 | + Exporter.datePadding(d.getMinutes())
72 | + Exporter.datePadding(d.getSeconds());
73 | //console.log(str);
74 | return str;
75 | }
76 |
77 | static computeNewFilename(originalFilename) {
78 | var dateString = Exporter.getDateString();
79 | if (!originalFilename) {
80 | originalFilename = "default";
81 | }
82 | var newFilename = originalFilename;
83 | newFilename = newFilename.replace(/\d{8}-\d{6}/, dateString);
84 | if (newFilename == originalFilename) {
85 | newFilename = originalFilename;
86 | newFilename = newFilename.replace(/(\.[^.]+)$/, "_"+dateString+"$1");
87 | if (newFilename == originalFilename) {
88 | newFilename = originalFilename+"_"+dateString;
89 | }
90 | }
91 | return newFilename;
92 | }
93 |
94 | /* FUNCTIONS TO EXPORT TO TEXT */
95 |
96 | /* the general structure is as follows:
97 | * - paragraph -> series of texts and spans for links. Each link span is
98 | * as follows:
99 | * - span (CLASS_LINK):
100 | * - span (with the name) (CLASS_METADATA)
101 | * - anchor (with the name)
102 | */
103 | convertElementToText (element, complete) {
104 | var result = '';
105 | for (var e of element.childNodes) {
106 | if (e.nodeType == 3) { // text
107 | result += e.textContent;
108 | } else if (e.nodeType == 1) { // DOM element
109 | if (e.tagName == 'SPAN') {
110 | if (e.classList.contains(CLASS_PAR_NUMBER)) {
111 | // nothing
112 | } else if (e.classList.contains(CLASS_METADATA)) {
113 | var link = gText.chainColl.getLinkBySpan(e.parentElement);
114 | if (!link) {
115 | alert("One of the link span is not in the dictionary");
116 | } else {
117 | result += link.name;
118 | if (!gText.schema.isEmpty) {
119 | if (complete) {
120 | result += ':' + link.properties.getString(true,
121 | link.text);
122 | } else {
123 | result += ':' + link.properties.getString();
124 | }
125 | }
126 | // result += ' ';
127 | }
128 | } else if (e.classList.contains(CLASS_LINK)) {
129 | result += '{' + this.convertElementToText(e, complete) + '}';
130 | } else {
131 | alert("Found a 'span' which is neither a link nor a metadata (className: '"+elements[i].className+"')...");
132 | }
133 | } else if (e.tagName == 'A') {
134 | result += e.textContent;
135 | } else if (e.tagName == 'BR') {
136 | //alert('before:\"'+result+"\"");
137 | // result += "\\n\n";
138 | //alert('after:\"'+result+"\"");
139 | } else {
140 | alert("Found a <"+e.tagName+">...");
141 | }
142 | } else {
143 | alert("Found a element of node type: "+e.nodeType+"...");
144 | }
145 | }
146 | return result;
147 | }
148 |
149 | convertDomToString(complete) {
150 | var result = '';
151 | for (var par of gText.div.childNodes) {
152 | if (par.tagName == 'P') {
153 | if (par.classList.contains(CLASS_PARAGRAPH)) {
154 | result += this.convertElementToText(par, complete)
155 | // + "\n\n";
156 | } else if (par.classList.contains(CLASS_COMMENT)) {
157 | result += par.textContent // + "\n\n";
158 | } else {
159 | alert("Found a 'p' which is neither a text nor an info: `"+par.textContent+"'.");
160 | }
161 | } else {
162 | alert("A child of the div 'text' is not a paragraph (node type: "+pars[i].nodeType+").");
163 | }
164 | }
165 | return result;
166 | }
167 |
168 | getColors() {
169 | var result = '';
170 | for (var chain of gText.chainColl.chains) {
171 | if (chain.isTrueChain) {
172 | result += "#COLOR:" + chain.name + "=" + chain.color.string + "\n";
173 | }
174 | }
175 | return result;
176 | }
177 |
178 | getTokenizationType() {
179 | return "#TOKENIZATION-TYPE:"+gText.tokenizationType.toString()+"\n";
180 | }
181 |
182 |
183 | computeText(complete) {
184 | var result = "";
185 | result += this.convertDomToString(complete);
186 | // result += "\n\n" + this.getColors() + "\n";
187 | // result += this.getTokenizationType() + "\n";
188 | var resultArray = result.split(" ");
189 | var resultArray = resultArray.filter(element => element != "");
190 | const genRanHex = size => [...Array(size)].map(() => Math.floor(Math.random() * 16).toString(16)).join('');
191 | const bookid = 'book_' + genRanHex(6)
192 | var resulting = "";
193 | resulting += '#begin document (' + bookid + '); part 0'
194 | var token_index = 1;
195 | for (var e of resultArray) {
196 | if (e.includes("{") && e.includes("}")) {
197 | var currentcluster = e.substring(2,3)
198 | token_index += 1
199 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(3,e.length - 1) + '\t_\t_\t_\t_\t_\t_\t' + '(' + currentcluster + ')';
200 | }
201 | else if (e.includes("{")) { // text
202 | var currentcluster = e.substring(2,3)
203 | token_index += 1
204 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(3,) + '\t_\t_\t_\t_\t_\t_\t' + '(' + currentcluster;
205 | }
206 | else if (e.includes("}")) { // text
207 | token_index += 1
208 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e.substring(0,e.length - 1) + '\t_\t_\t_\t_\t_\t_\t' + currentcluster + ')';
209 | } else {
210 | token_index += 1
211 | resulting += '\n' + bookid + '\t0\t' + String(token_index) + '\t' + e + '\t_\t_\t_\t_\t_\t_\t' + '-';
212 | }
213 | }
214 | resulting += '\n\n#end document'
215 | return resulting;
216 | }
217 |
218 | computeSchema() {
219 | return gText.raw_schema;
220 | }
221 |
222 | exportText(complete) {
223 | var filename = Exporter.computeNewFilename(gText.textFilename);
224 | var text = this.computeText(complete);
225 | Exporter.writeToFile(text, filename);
226 | }
227 |
228 | exportSchema() {
229 | var filename = Exporter.computeNewFilename(gText.textFilename+"-schema");
230 | var text = this.computeSchema();
231 | Exporter.writeToFile(text, filename);
232 | }
233 |
234 | showSchema() {
235 | alert(this.computeSchema());
236 | }
237 |
238 | }
239 |
240 |
--------------------------------------------------------------------------------
/js/cls_colors.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | class ColorBuilder {
25 |
26 | static computeNbOfColors(hueStep, saturationStep, lightnessStep) {
27 | var hue = Math.ceil(360/hueStep);
28 | var saturation = Math.ceil(100/saturationStep);
29 | var lightness = Math.ceil(70/lightnessStep); // because ]10;80]
30 | var nb = hue * saturation * lightness;
31 | //var count = 0;
32 | //for (var s=100; s>0; s-=saturationStep) {
33 | // for (var l=80; l>10; l-=lightnessStep) {
34 | // for (var h=0; h<360; h+=hueStep) {
35 | // count++;
36 | // }
37 | // }
38 | //}
39 | //if (nb != count) {
40 | // console.log(nb);
41 | // console.log(count);
42 | // alert('mismatch');
43 | //}
44 | return nb;
45 | }
46 |
47 |
48 |
49 | static buildColors(hueStep, saturationStep, lightnessStep) {
50 | // defaults
51 | if (!hueStep) hueStep = 25;
52 | if (!saturationStep) saturationStep = 25;
53 | if (!lightnessStep) lightnessStep = 10;
54 | // test not enough colors
55 | //hueStep = 360;
56 | //saturationStep = 100;
57 | //lightnessStep = 100;
58 | var colors = [];
59 | for (var s=100; s>0; s-=saturationStep) {
60 | for (var l=80; l>10; l-=lightnessStep) {
61 | for (var h=0; h<360; h+=hueStep) {
62 | colors.push(new Color(h, s, l));
63 | }
64 | }
65 | }
66 | console.log("number of colors: "+colors.length.toString());
67 | return colors;
68 | }
69 |
70 | }
71 |
72 |
73 |
74 | class Color {
75 |
76 | static rgb2yuv(rgb) {
77 | var y = Color.clamp(rgb.r * 0.29900 + rgb.g * 0.587 + rgb.b * 0.114);
78 | var u = Color.clamp(rgb.r * -0.16874 + rgb.g * -0.33126 + rgb.b * 0.50000 + 128);
79 | var v = Color.clamp(rgb.r * 0.50000 + rgb.g * -0.41869 + rgb.b * -0.08131 + 128);
80 | return {y:y, u:u, v:v};
81 | }
82 |
83 | static clamp(n){
84 | if (n<0) { return 0;}
85 | if (n>255) { return 255;}
86 | return Math.floor(n);
87 | }
88 |
89 |
90 | static yuv2rgb(yuv){
91 | var y = yuv.y;
92 | var u = yuv.u;
93 | var v = yuv.v;
94 | var r = Color.clamp(y + (v - 128) * 1.40200);
95 | var g = Color.clamp(y + (u - 128) * -0.34414 + (v - 128) * -0.71414);
96 | var b = Color.clamp(y + (u - 128) * 1.77200);
97 | return {r:r,g:g,b:b};
98 | }
99 |
100 | /**
101 | * adapted from https://stackoverflow.com/questions/36721830/convert-hsl-to-rgb-and-hex
102 | * adapted from http://en.wikipedia.org/wiki/HSL_color_space.
103 | * Assumes h in [0;360], s/l in [0;100]
104 | */
105 | static hsl2rgb(h, s, l) {
106 | var r, g, b;
107 | h = h/360;
108 | s = s/100;
109 | l = l/100;
110 | if(s == 0){
111 | r = g = b = l; // achromatic
112 | }else{
113 | var hue2rgb = function hue2rgb(p, q, t){
114 | if(t < 0) t += 1;
115 | if(t > 1) t -= 1;
116 | if(t < 1/6) return p + (q - p) * 6 * t;
117 | if(t < 1/2) return q;
118 | if(t < 2/3) return p + (q - p) * (2/3 - t) * 6;
119 | return p;
120 | }
121 | var q = l < 0.5 ? l * (1 + s) : l + s - l * s;
122 | var p = 2 * l - q;
123 | r = hue2rgb(p, q, h + 1/3);
124 | g = hue2rgb(p, q, h);
125 | b = hue2rgb(p, q, h - 1/3);
126 | }
127 | return {r:Math.round(r * 255),
128 | g:Math.round(g * 255), b:Math.round(b * 255)};
129 | }
130 |
131 | // adapted from https://stackoverflow.com/questions/9600295/automatically-change-text-color-to-assure-readability
132 | static invertColor(rgb) {
133 | var yuv = Color.rgb2yuv(rgb);
134 | var factor = 180;
135 | var threshold = 100;
136 | yuv.y = Color.clamp(yuv.y + (yuv.y > threshold ? -factor : factor));
137 | return Color.yuv2rgb(yuv);
138 | }
139 |
140 | static parseString(str) {
141 | var re = /hsl\((\d+), *(\d+)%, *(\d+)%\)/;
142 | var result = re.exec(str);
143 | if (result) {
144 | return new Color(result[1], result[2], result[3]);
145 | }
146 | return null;
147 | }
148 |
149 | constructor(h, s, l) {
150 | this.h = h;
151 | this.s = s;
152 | this.l = l;
153 | this._string = "hsl("+h+", "+s+"%, "+l+"%)";
154 | var rgb = Color.hsl2rgb(h, s, l);
155 | this._transparentString = 'rgba('+rgb.r+','+rgb.g+','+rgb.b+',0.4)';
156 | var invertedRgb = Color.invertColor(rgb);
157 | this._invertedString = "rgb("+invertedRgb.r+","+invertedRgb.g+","
158 | +invertedRgb.b+")";
159 | }
160 |
161 | get string() {
162 | return this._string;
163 | }
164 |
165 | get invertedString() {
166 | return this._invertedString;
167 | }
168 |
169 | get transparentString() {
170 | return this._transparentString;
171 | }
172 |
173 | equalsString(str) {
174 | return str === this._string;
175 | }
176 |
177 | equalsColor(color) {
178 | return color.string === this._string;
179 | }
180 |
181 | }
182 |
183 | _defaultColor = null;
184 |
185 | class ColorManager {
186 |
187 | static getDefaultColor() {
188 | if (!_defaultColor) {
189 | _defaultColor = new Color(0, 0, 83);
190 | }
191 | return _defaultColor;
192 | }
193 |
194 | constructor(hueStep, saturationStep, lightnessStep) {
195 | this.colors = ColorBuilder.buildColors(hueStep, saturationStep,
196 | lightnessStep);
197 | }
198 |
199 | doesThisColorExist(color) {
200 | for (var c of this.colors) {
201 | if (c.equalsColor(color)) {
202 | return true;
203 | }
204 | }
205 | return false;
206 | }
207 |
208 | isThisColorFree(color, chains) {
209 | if (color.equalsColor(ColorManager.getDefaultColor())) {
210 | return false;
211 | }
212 | for (var chain of chains) {
213 | if (color.equalsColor(chain.color)) {
214 | return false;
215 | }
216 | }
217 | return true;
218 | }
219 |
220 |
221 | /* If there is no more available colors, return the default color
222 | */
223 | getNextAvailableColor(chains) {
224 | for (var color of this.colors) {
225 | if (this.isThisColorFree(color, chains)) {
226 | return color;
227 | }
228 | }
229 | alert("There is no more color available. Try to export your "
230 | +"annotations, reload the script and define more color on the start "
231 | +"page. In the meantime, I'm using default color (gray).");
232 | return ColorManager.getDefaultColor(); // if there is no more color
233 | }
234 |
235 | getAvailableColors(chains) {
236 | var that = this;
237 | return this.colors.filter(
238 | function(c) { return that.isThisColorFree(c, chains); });
239 | }
240 |
241 | changeDefaultColor(chains) {
242 | var colors = this.getAvailableColors(chains);
243 | var chooser = new ColorChooserDialog(colors, function(color) {
244 | _defaultColor = color;
245 | // TODO redraw links and link list
246 | });
247 | }
248 |
249 | changeChainColor(chain, chains) {
250 | var colors = this.getAvailableColors(chains);
251 | var chooser = new ColorChooserDialog(colors, function(color) {
252 | chain.color = color;
253 | // TODO redraw links and link list
254 | });
255 | }
256 |
257 | }
258 |
259 |
260 | class ColorChooserDialog {
261 |
262 | constructor(colors, callback) {
263 | this.callback = callback;
264 | var div = document.createElement("div");
265 | div.style.padding = "20px";
266 | this.modalDiv = new ModalDiv("Color chooser", div);
267 | var that = this;
268 | for (var color of colors) {
269 | var par = document.createElement("p");;
270 | par.style.padding = "7px";
271 | par.style.backgroundColor = color.string;
272 | par.style.color = color.invertedString;
273 | var anchor = document.createElement("anchor");
274 | anchor.style.cursor = "pointer";
275 | anchor.color = color;
276 | anchor.onclick = function(e) {
277 | that.callback(this.color);
278 | that.modalDiv.close();
279 | };
280 | var textNode = document.createTextNode("Choose this color!");
281 | anchor.appendChild(textNode)
282 | par.appendChild(anchor);
283 | div.appendChild(par);
284 | }
285 | div.style['overflow-y'] = "scroll";
286 | this.modalDiv.show();
287 | }
288 |
289 | }
290 |
291 |
--------------------------------------------------------------------------------
/js/cls_sacr_parser.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | var TOKENIZATION_WORD = 1;
25 | var TOKENIZATION_WORD_N_PUNCT = 2;
26 | var TOKENIZATION_CHARACTER = 3;
27 |
28 | /**********************************************************************
29 | * ParsedLink
30 | *********************************************************************/
31 |
32 | class ParsedLink {
33 | constructor(name) {
34 | this.name = name;
35 | this.properties = {}; // it's just a dictionary
36 | this.startAnchor = null;
37 | this.endAnchor = null; // same as startAnchor if only one token
38 | }
39 | }
40 |
41 |
42 | /**********************************************************************
43 | * SacrParser
44 | *********************************************************************/
45 |
46 | class SacrParser {
47 |
48 | /* preprocessing of the input text: each line = one paragraph, no empty
49 | * line, etc. NOTE: no need to worry about \r\n because the text is taken
50 | * from a textarea, which returns only \n
51 | */
52 | static normalizeText(text) {
53 | text = text.replace(/\n(\s+\n)+/g, "\n\n");
54 | text = text.replace(/\\n\n/g, "\\n");
55 | text = text.replace(/\n\s*(#[^\n]+)\n/g, "\n\n$1\n\n");
56 | text = text.replace(/([^\n])[ \t]*\n[ \t]*(?!#|\n+)/g, "$1 ");
57 | text = text.replace(/[ \t]*\n\n+[ \t]*/g, "\n");
58 | text = text.replace(/^\n+/g, "");
59 | text = text.replace(/\n+$/g, "");
60 | return text;
61 | }
62 |
63 | static makeTokenRegex(tokenizationType, additionnalTokens) {
64 | additionnalTokens.sort(function(a,b) {
65 | if (b.length === a.length) { return 0; }
66 | else if (b.length > a.length) { return 1; }
67 | else { return -1; }; });
68 | var additionnalTokenString = additionnalTokens.join('|');
69 | var tokenization_string = undefined;
70 | if (tokenizationType == TOKENIZATION_WORD) {
71 | //alert('tokenization: words');
72 | tokenization_string =
73 | "[а-яa-zёßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿœα-ω]+'?|[-+±]?[.,]?[0-9]+";
74 | } else if (tokenizationType == TOKENIZATION_WORD_N_PUNCT) {
75 | //alert('tokenization: word and punct');
76 | tokenization_string =
77 | tokenization_string =
78 | "[а-яa-zёßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿœα-ω]+'?|[-+±]?[.,]?[0-9]+"
79 | + "|[.,;:!?()\\[\\]]|-+";
80 | } else {
81 | //alert('tokenization: characters');
82 | tokenization_string = "[^{}]";
83 | }
84 | return new RegExp("^(" + additionnalTokenString + "|"
85 | + tokenization_string + ")", 'i');
86 | }
87 |
88 | constructor(div, text, tokenizationType, showPropertyWarnings) {
89 | this.div = div;
90 | this.text = text;
91 | this.tokenizationType = tokenizationType;
92 | this.showPropertyWarnings = showPropertyWarnings;
93 | }
94 |
95 | parseText() {
96 |
97 | var tmp;
98 | var additionnalTokens = new Array('%', '‰', '°', '°C', '°F');
99 | var tokenRegex = SacrParser.makeTokenRegex(this.tokenizationType,
100 | additionnalTokens);
101 |
102 | var parIsHeading = 0; // 0 = no, 1 = level 1, etc.
103 | // variables for storing actions for creating links and chains
104 | var parsedLinks = new Array(); // array of ParsedLink
105 | // (see the class ParsedLink for details)
106 | // NOTE: each link found ({name:values text}) is stored in the
107 | // filoLinks. When the closing } is encountered, it is popped out from
108 | // the filoLinks array and stored permanently in the parsedLinks array.
109 | var filoLinks = new Array(); // array of ParsedLink
110 | var colors = {};
111 | // array of {"chain name":]
112 |
113 | // preprocessing
114 | this.text = SacrParser.normalizeText(this.text);
115 | var lines = this.text.split(/\n/);
116 |
117 | var textTitle = ''; // for the document.title
118 | var textId = ''; // idem
119 |
120 | var paragraph_counter = 1;
121 |
122 | // loop
123 | for (var line of lines) {
124 |
125 | if (line.match(/^#+$/)) {
126 | line = line.replace(/#/g, '*');
127 | }
128 |
129 | if (((tmp = line.match(/^#COLOR\s*:\s*([^ =]+)\s*=\s*(.+)$/)) != null)) {
130 | var chainName = tmp[1];
131 | var tmp_color = Color.parseString(tmp[2]); // returns null if
132 | // can't parse
133 | if (tmp_color) {
134 | //console.log("parsed color: "+tmp[2]);
135 | colors[chainName] = tmp_color;
136 | //console.log(chainName + ": " + tmp_color.string);
137 | } else {
138 | console.log("can't parse color: "+tmp[2]);
139 | }
140 |
141 | } else if (((tmp = line.match(/^#DEFAULTCOLOR\s*:\s*(\S+)$/)) != null)) {
142 | //var tmp_color = Color.parseString(tmp[1]);
143 | //if (tmp_color) {
144 | // ColorManager.setDefaultColor(tmp_color);
145 | //}
146 | } else if (((tmp = line.match(/^#TOKENIZATION-TYPE/)) != null)) {
147 | // nothing
148 |
149 | } else if (((tmp = line.match(/^#.*$/)) != null)
150 | || ((tmp = line.match(/^\*+$/)) != null)) {
151 | var par = document.createElement('P');
152 | if (line.match(/^\*+$/)) {
153 | var hiddenSpan = document.createElement('SPAN');
154 | hiddenSpan.style.display = 'none';
155 | hiddenSpan.appendChild(document.createTextNode(tmp[0]));
156 | par.appendChild(hiddenSpan);
157 | par.appendChild(document.createElement('HR'));
158 | } else {
159 | var tmp2;
160 | if ((tmp2 = line.match(/^\s*#title\s*:\s*(.+)$/)) != null) {
161 | textTitle = tmp2[1];
162 | } else if ((tmp2 = line.match(/^\s*#textid\s*:\s*(.+)$/)) != null) {
163 | textId = tmp2[1];
164 | } else if ((tmp2 = line.match(/^\s*#additionnaltoken\s*:\s*(.+)$/)) != null) {
165 | additionnalTokens.push(tmp2[1]);
166 | tokenRegex = SacrParser.makeTokenRegex(this.tokenizationType,
167 | additionnalTokens);
168 | }
169 | par.appendChild(document.createTextNode(tmp[0]));
170 | }
171 | par.className = CLASS_COMMENT;
172 | this.div.appendChild(par);
173 | if (((tmp = line.match(/^#part-heading:/)) != null)) {
174 | var response = CommonFunctions.parseValues(line, 13);
175 | if (response.startIndex != line.length) {
176 | throw "Can't parse line: "+line+" (error when reading option values)";
177 | }
178 | parIsHeading = 1;
179 | if ('level' in response.dic) {
180 | parIsHeading = response.dic.level;
181 | }
182 | }
183 |
184 | } else if (line.length) {
185 | var startIndex = 0;
186 | var textLen = line.length;
187 | var lastAnchor;
188 | var lastTokenType = '';
189 | var thereIsSomeText = '';
190 | var par = document.createElement('P');
191 | par.className = CLASS_PARAGRAPH;
192 | if (parIsHeading) {
193 | par.classList.add(CLASS_HEADING);
194 | par.classList.add("level"+parIsHeading);
195 | }
196 | var par_number = document.createElement('SPAN');
197 | par_number.className = CLASS_PAR_NUMBER;
198 | par_number.appendChild(document.createTextNode('[#'+paragraph_counter+'] '));
199 | paragraph_counter++;
200 | par.appendChild(par_number);
201 | parIsHeading = 0;
202 | this.div.appendChild(par);
203 | while(startIndex < textLen) {
204 | if ((tmp = line.substring(startIndex).match(tokenRegex)) != null) {
205 | var anchor = gText.createTokenAnchor(tmp[0]);
206 | // WARNING!!! sometimes, there are consecutive
207 | // opening tag ({foo {bar A Word}}), and so you must
208 | // set the anchor for ALL these tags!!! So, just go
209 | // through the array, and complete if something is
210 | // undefined.
211 | for (var link of filoLinks) {
212 | if (!link.startAnchor) {
213 | link.startAnchor = anchor;
214 | }
215 | }
216 | lastAnchor = anchor;
217 | par.appendChild(anchor);
218 | startIndex += tmp[0].length;
219 | lastTokenType = 'text';
220 | thereIsSomeText = true;
221 | } else if ((tmp = line.substring(startIndex).match(/^{([-a-zA-Z0-9_]+)/)) != null) {
222 | var chainName = tmp[1];
223 | var response = CommonFunctions.parseValues(line, startIndex+chainName.length+1);
224 | startIndex = response.startIndex;
225 | if (line.substring(startIndex).search(/^\s/) != 0) {
226 | throw "Can't parse line: "+line+" (error when reading property values).";
227 | }
228 | var parsedLink = new ParsedLink(chainName);
229 | parsedLink.properties = response.dic;
230 | filoLinks.push(parsedLink);
231 | startIndex++;
232 | lastTokenType = 'open';
233 | thereIsSomeText = false;
234 | } else if ((tmp = line.substring(startIndex).match(/^}/)) != null) {
235 | if (!thereIsSomeText) {
236 | alert("Warning: an annotation has no text!");
237 | filoLinks.pop();
238 | } else {
239 | if (filoLinks.length == 0 || !lastAnchor) {
240 | throw "Syntax error in the file (too much }'s).";
241 | }
242 | filoLinks[filoLinks.length-1].endAnchor = lastAnchor;
243 | parsedLinks.push(filoLinks.pop());
244 | }
245 | startIndex++;
246 | lastTokenType = 'close';
247 | } else if ((tmp = line.substring(startIndex).match(/^\\n/)) != null) {
248 | par.appendChild(document.createElement('BR'));
249 | startIndex += 2;
250 | } else if ((tmp = line.substring(startIndex).match(/^./)) != null) {
251 | par.appendChild(document.createTextNode(tmp[0]));
252 | startIndex++;
253 | lastTokenType = 'symbol';
254 | }
255 | //console.log(lastTokenType);
256 | } // while
257 | if (startIndex != textLen) {
258 | throw "The parser has stopped to early!";
259 | }
260 | if (filoLinks.length) {
261 | throw "Syntax error in the file (not enough }'s).";
262 | }
263 | par.normalize();
264 |
265 | } else {
266 | throw "Can't parse line: "+line;
267 |
268 | }
269 |
270 | } // for each line
271 |
272 | // set the document title
273 | try {
274 | if (textId && textTitle) {
275 | document.title = "SACR: "+textId+", "+textTitle;
276 | } else if (textId) {
277 | document.title = "SACR: "+textId;
278 | } else if (textTitle) {
279 | document.title = "SACR: "+textTitle;
280 | }
281 | } catch (err) {
282 | // buuuuh!
283 | }
284 |
285 | /* creation of links and chains */
286 |
287 | for (var parsedLink of parsedLinks) {
288 | gText.importLink(parsedLink.startAnchor, parsedLink.endAnchor,
289 | parsedLink.name, parsedLink.properties);
290 | }
291 |
292 | /* colors */
293 |
294 | //console.log(colors);
295 | for (var chainName in colors) {
296 | //console.log(chainName);
297 | var color = colors[chainName];
298 | //console.log('color: '+color.string);
299 | //console.log('exists: '+gText.colorManager.doesThisColorExist(color).toString());
300 | if (gText.colorManager.doesThisColorExist(color) &&
301 | gText.colorManager.isThisColorFree(color,
302 | gText.chainColl.chains)) {
303 | var chain = gText.chainColl.getChainByName(chainName);
304 | if (chain && chain.isTrueChain) {
305 | chain.color = color;
306 | //console.log('set color:'+color.string);
307 | }
308 | }
309 | }
310 |
311 | }
312 |
313 | }
314 |
315 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
25 |
26 |
27 |
28 |
29 | Аннотация корефрентности
30 |
31 |
32 |
33 |
34 |
SACR RU
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
110 |
111 |
317 |
318 |
319 |
320 |
321 |
--------------------------------------------------------------------------------
/js/cls_link.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | _selectionCount = 0;
25 | _linkIdCounter = 0;
26 |
27 | class Link {
28 |
29 | static getNextSelectionCount() {
30 | return _selectionCount++;
31 | }
32 |
33 | /* links is sorted in situ */
34 | static sortLinks(links) {
35 | if (gLoadingTime) {
36 | return;
37 | }
38 | links.sort(function(a,b) {
39 | // a is after b
40 | if (a.span.compareDocumentPosition(b.span) & 2) {
41 | return 1;
42 | // a is before b
43 | } else if (a.span.compareDocumentPosition(b.span) & 4) {
44 | return -1;
45 | }
46 | return 0; });
47 | }
48 |
49 | /* @param elements: The first and last elements of the link. They may be
50 | * anchors or spans.
51 | */
52 | constructor(elements, initialProperties) {
53 | this.id = _linkIdCounter++;
54 | this._name = undefined; // set up by setChain()
55 | this._color = ColorManager.getDefaultColor(); // set up by setChain()
56 | this.selectionCount = -1;
57 | // elements
58 | this.span = document.createElement('SPAN');
59 | this.span.classList.add(CLASS_LINK);
60 | this.nameSpan = document.createElement('SPAN');
61 | this.span.appendChild(this.nameSpan);
62 | this.nameSpan.className = CLASS_METADATA;
63 | // move all the elements in the span
64 | var toBeMoved = new Array();
65 | var cur = elements[0];
66 | while (true) {
67 | toBeMoved.push(cur);
68 | if (elements.length == 1 || cur === elements[1]) {
69 | break;
70 | }
71 | cur = cur.nextSibling;
72 | }
73 | elements[0].parentNode.replaceChild(this.span, elements[0]);
74 | for (var e of toBeMoved) {
75 | this.span.appendChild(e);
76 | }
77 | // name
78 | this.nameAnchor = document.createElement('A');
79 | this.nameSpan.appendChild(this.nameAnchor);
80 | // elements for the chains popup (after moving all the elements into the
81 | // span, because we use this.text)
82 | this.popupPar = document.createElement('P');
83 | this.popupAnchor = document.createElement('A');
84 | this.popupAnchor.appendChild(document.createTextNode(this.text));
85 | this.popupPar.appendChild(this.popupAnchor);
86 | // add events to all the elements
87 | this._addEvents();
88 | // misc
89 | this._isSelected = false;
90 | this._isHidden = false;
91 | this.redraw();
92 | // properties (at the end because of the head property and this.words)
93 | if (gText.schema.isEmpty) {
94 | this.properties = null;
95 | if (gText.showPropertyWarnings && initialProperties
96 | && Object.keys(initialProperties).length) {
97 | alert("No schema has been defined, yet there are some properties "
98 | + "in the file.");
99 | }
100 | } else {
101 | if (!initialProperties) {
102 | initialProperties = {};
103 | }
104 | this.properties
105 | = gText.schema.buildLinkProperties(initialProperties);
106 | this.properties.resetHeadProperty(this);
107 | }
108 | }
109 |
110 | _addEvents() {
111 | var that = this;
112 | this.span.onclick = function(e) {
113 | if (e.ctrlKey && e.shiftKey) { // attach to new chain (ask for name)
114 | that.select();
115 | var name = CommonFunctions.getChainName(gText.chainColl, true,
116 | that.name);
117 | if (name) {
118 | var chain = new Chain(name);
119 | gText.chainColl.addChain(chain);
120 | gText.chainColl.transferLink(that, chain);
121 | }
122 | } else if (e.ctrlKey) { // attach to last selected chain
123 | var lastSelectedChain = gText.chainColl.getLastSelectedChain();
124 | that.select();
125 | if (lastSelectedChain) {
126 | gText.chainColl.transferLink(that, lastSelectedChain);
127 | }
128 | } else if (e.shiftKey) { // attach to new chain (default name)
129 | that.select();
130 | var name = CommonFunctions.getChainName(gText.chainColl, false);
131 | if (name) {
132 | var chain = new Chain(name);
133 | gText.chainColl.addChain(chain);
134 | gText.chainColl.transferLink(that, chain);
135 | }
136 | } else {
137 | if (that.isSelected) {
138 | that.deselect();
139 | } else {
140 | that.select();
141 | }
142 | }
143 | e.stopPropagation();
144 | return false;
145 | };
146 | this.popupAnchor.onclick = this.span.onclick;
147 | // drag and drop
148 | this.span.draggable = true;
149 | this.span.ondragstart = function(e) {
150 | e.stopPropagation(); // when overlapping span (link inside link)
151 | e.dataTransfer.setData("text", that.id.toString());
152 | // e.target is the source element (ie the element that is dragged)
153 | //useless: e.dataTransfer.effectAllowed = 'all';
154 | };
155 | this.span.ondragover = function(e) {
156 | e.preventDefault(); // allow the drop (blocked by default)
157 | };
158 | this.span.ondrop = function(e) {
159 | // NOTE: e.target is the target element (ie the element on which an
160 | // element is dropped): don't use it, but use `this/that'
161 | e.stopPropagation(); // when overlapping span (link inside link)
162 | //console.log(e.target);
163 | //console.log(this);
164 | e.preventDefault();
165 | var linkId = parseInt(e.dataTransfer.getData("text"));
166 | var sourceLink = gText.chainColl.getLinkById(linkId);
167 | if (!sourceLink) return;
168 | var targetLink = that;
169 | var shiftKey = e.shiftKey // doesn't seem to work on FF 54 (only 55)
170 | || e.dataTransfer.dropEffect == "link"; // ctrl+shift, for FF54
171 | //console.log(e);
172 | //console.log(shiftKey);
173 | if (shiftKey) {
174 | gText.substituteLink(sourceLink, targetLink);
175 | } else {
176 | if (sourceLink === targetLink) return;
177 | var sourceChain = gText.chainColl.getChainByLink(sourceLink);
178 | var targetChain = gText.chainColl.getChainByLink(targetLink);
179 | if (sourceChain === targetChain) return;
180 | var ctrlKey = e.ctrlKey // doesn't seem to work on FF 54 (only 55), and doesn't work on chromium
181 | || e.dataTransfer.dropEffect == "copy"; // works on all versions of FF, and on chromium if the key is pressed before beginning the d&d
182 | // for chrome, see: https://stackoverflow.com/questions/19010257/event-datatransfer-dropeffect-in-chrome
183 | //console.log(e.dataTransfer.dropEffect);
184 | if (sourceChain.count == 1) {
185 | if (targetChain.count == 1) {
186 | //targetLink.setChain(sourceChain);
187 | gText.chainColl.transferLink(targetLink, sourceChain);
188 | } else {
189 | if (ctrlKey) {
190 | //targetLink.setChain(sourceChain);
191 | gText.chainColl.transferLink(targetLink, sourceChain);
192 | } else {
193 | //sourceLink.setChain(targetChain);
194 | gText.chainColl.transferLink(sourceLink, targetChain);
195 | }
196 | }
197 | } else {
198 | if (targetChain.count == 1) {
199 | //targetLink.setChain(sourceChain);
200 | gText.chainColl.transferLink(targetLink, sourceChain);
201 | } else {
202 | if (ctrlKey) {
203 | //targetLink.setChain(sourceChain);
204 | gText.chainColl.transferLink(targetLink, sourceChain);
205 | } else {
206 | if (confirm("Do you want to merge?")) {
207 | while (targetChain.count) {
208 | //targetChain.links[0].setChain(sourceChain);
209 | gText.chainColl.transferLink(targetChain.links[0], sourceChain);
210 | }
211 | }
212 | }
213 | }
214 | }
215 | } // no shift key
216 | }; // this.span.ondrop
217 | this.popupAnchor.draggable = true;
218 | this.popupAnchor.ondragstart = this.span.ondragstart;
219 | this.popupAnchor.ondragover = this.span.ondragover;
220 | this.popupAnchor.ondrop = this.span.ondrop;
221 | }
222 |
223 | get words() {
224 | var wds = new Array();
225 | for (var anchor of this.span.getElementsByClassName(CLASS_TOKEN)) {
226 | wds.push(anchor.textContent);
227 | }
228 | return wds;
229 | }
230 |
231 | get text() {
232 | //var text = this.span.textContent;
233 | //return text.substr(text.indexOf(" ")+1);
234 | var clone = this.span.cloneNode(true);
235 | var badguys = clone.getElementsByClassName(CLASS_METADATA);
236 | for (var i=badguys.length-1; i>=0; i--) {
237 | badguys[i].parentNode.removeChild(badguys[i]);
238 | }
239 | return clone.textContent;
240 | }
241 |
242 | setChain(chain) {
243 | this._name = chain.name;
244 | this._color = chain.color;
245 | }
246 |
247 | get name() {
248 | return this._name;
249 | }
250 |
251 | get contentIsEmptySet() {
252 | return this.text === "Ø";
253 | }
254 |
255 | show() {
256 | if (this._isHidden) {
257 | this._isHidden = false;
258 | this.redraw();
259 | }
260 | }
261 |
262 | hide() {
263 | if (!this._isHidden) {
264 | this._isHidden = true;
265 | this.redraw();
266 | }
267 | }
268 |
269 | get isHidden() {
270 | return this._isHidden;
271 | }
272 |
273 | get isVisible() {
274 | return !this._isHidden;
275 | }
276 |
277 | select() {
278 | if (!this._isSelected) {
279 | gText.deselectAllTokensAndLinks();
280 | gText.chainColl.selectChain(this);
281 | this._isSelected = true;
282 | this.redraw();
283 | this.selectionCount = Link.getNextSelectionCount();
284 | }
285 | }
286 |
287 | deselect() {
288 | if (this._isSelected) {
289 | gText.chainColl.deselectChain(this);
290 | this._isSelected = false;
291 | this.redraw();
292 | }
293 | }
294 |
295 | get isSelected() {
296 | return this._isSelected;
297 | }
298 |
299 | destroy() {
300 | this.deselect(); // remove the div from the property panel
301 | this.span.removeChild(this.nameSpan);
302 | while (this.span.childNodes.length) {
303 | this.span.parentNode.insertBefore(this.span.firstChild, this.span);
304 | }
305 | var parentNode = this.span.parentNode;
306 | parentNode.removeChild(this.span);
307 | parentNode.normalize();
308 | }
309 |
310 | redraw() {
311 | this.span.style.borderColor = this._color.string;
312 | this.nameSpan.style.borderColor = this._color.string;
313 | this.nameSpan.style.backgroundColor = this._color.string;
314 | this.nameAnchor.style.color = this._color.invertedString;
315 | this.nameAnchor.innerHTML = this._name;
316 | if (this.isHidden) {
317 | this.span.classList.add(CLASS_HIDDEN);
318 | } else {
319 | this.span.classList.remove(CLASS_HIDDEN);
320 | }
321 | if (this.isSelected) {
322 | this.span.classList.add(CLASS_SELECTED);
323 | this.span.style.backgroundColor = this._color.transparentString;
324 | this.popupAnchor.classList.add(CLASS_SELECTED);
325 | } else {
326 | this.span.classList.remove(CLASS_SELECTED);
327 | this.span.style.backgroundColor = "rgba(0,0,0,0)"; // transparent
328 | // (for selection of a link that has nested links)
329 | this.popupAnchor.classList.remove(CLASS_SELECTED);
330 | }
331 | if (!gText.schema.isEmpty) {
332 | if (this.isSelected) {
333 | this.properties.div.insertBefore(gText.schema.button,
334 | this.properties.div.firstChild);
335 | gDivLinkPropertyAnchor.appendChild(this.properties.div);
336 | gDivLinkPropertyAnchor.style.display = 'block';
337 | } else {
338 | if (gDivLinkPropertyAnchor.childNodes.length
339 | && gDivLinkPropertyAnchor.childNodes[0] == this.properties.div) {
340 | gDivLinkPropertyAnchor.removeChild(this.properties.div);
341 | gDivLinkPropertyAnchor.style.display = 'none';
342 | }
343 | }
344 | }
345 | }
346 |
347 | scrollTo(win) {
348 | var obj;
349 | if (!win || win == window) {
350 | obj = this.span;
351 | } else {
352 | obj = this.popupPar;
353 | }
354 | Scrolling.scrollTo(obj, true, win);
355 | }
356 |
357 | isEqualTo(name, searchedValue, reversed) {
358 | var val = this.properties.getPropertyByName(name).value == searchedValue;
359 | if (reversed) {
360 | return !val;
361 | }
362 | return val;
363 | }
364 |
365 | matches(name, pattern, reversed) {
366 | //console.log(pattern);
367 | var val = pattern.exec(this.properties.getPropertyByName(name).value);
368 | if (reversed) {
369 | return !val;
370 | }
371 | return val;
372 | }
373 |
374 | }
375 |
376 |
--------------------------------------------------------------------------------
/js/cls_text.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 |
25 | class Text {
26 |
27 | constructor(autocomplete) {
28 | this.div = document.createElement("DIV");
29 | this.div.id = "divText"; // for CSS
30 | document.body.appendChild(this.div);
31 | this.chainColl = new ChainCollection();
32 | this.chainPopup = new ChainPopup(this.chainColl.popupDiv);
33 | this.colorManager = null;
34 | this.searchDialog = null;
35 | var that = this;
36 | this.dataLoader = new DataLoader(function(dataLoader) {
37 | that.textFilename = dataLoader.textFilename;
38 | that.raw_schema = dataLoader.schema;
39 | that.raw_text = dataLoader.text;
40 | that.minLinks = dataLoader.minLinks;
41 | that.showPropertyWarnings = dataLoader.showPropertyWarnings;
42 | that.tokenizationType = dataLoader.tokenizationType;
43 | try {
44 | that.schema = new Schema(that.raw_schema)
45 | that.colorManager = new ColorManager(dataLoader.hueStep,
46 | dataLoader.saturationStep, dataLoader.lightnessStep);
47 | var parser = new SacrParser(that.div, that.raw_text,
48 | that.tokenizationType, false);
49 | parser.parseText();
50 | } catch(error) {
51 | var errText = "
"+error.name+": "+error.message+"
";
52 | if (error.fileName) errText += "
File: "+error.fileName+"
";
53 | if (error.lineNumber) errText += "
Line number: "+error.lineNumber+"
";
54 | if (error.stack) errText += "
Stack: "+error.stack+"
";
55 | that.div.innerHTML = "
An error occured:
" + errText;
56 | return;
57 | }
58 | gLoadingTime = false;
59 | that.chainColl.sortLinksOfAllChains();
60 | }, autocomplete);
61 | // parse the sacr code
62 | //new SacrParser(this, this.propertyColl);
63 | }
64 |
65 | clickOnTheParseButton() {
66 | this.dataLoader.clickOnTheParseButton();
67 | }
68 |
69 |
70 | createTokenAnchor(textContent) {
71 | var that = this;
72 | var anchor = document.createElement('A');
73 | anchor.appendChild(document.createTextNode(textContent));
74 | anchor.className = CLASS_TOKEN;
75 | var func = function(obj, e, dblClick) {
76 | // the next line will remove all selection of text made by the shift
77 | // key
78 | window.getSelection().removeAllRanges();
79 | e.stopPropagation();
80 | var selected = that.getSelectedTokens();
81 | // if no other token is selected, we (de)select the current token
82 | if (!dblClick && selected[0] === obj) {
83 | obj.classList.remove(CLASS_SELECTED);
84 | } else if (!dblClick && selected.length == 0) {
85 | that.chainColl.deselectAllLinks();
86 | obj.classList.add(CLASS_SELECTED);
87 | // otherwise, we create a link
88 | } else if (dblClick || selected.length == 1) {
89 | obj.classList.add(CLASS_SELECTED);
90 | // shift: ask for a name
91 | if (e.shiftKey && !e.ctrlKey) {
92 | that.createLinkAndChain(true);
93 | // ctrl: attach to previous chain
94 | } else if (e.ctrlKey && !e.shiftKey) {
95 | that.createLinkAndAttachItToLastSelectedChain();
96 | // otherwise, default name
97 | } else {
98 | that.createLinkAndChain(false);
99 | }
100 | // if we are here, there is a problem somewhere
101 | } else {
102 | alert("Too many words are selected!");
103 | obj.deselectAllTokensAndLinks();
104 | return;
105 | }
106 | };
107 | anchor.onclick = function(e){ func(this, e, false); };
108 | anchor.ondblclick = function(e){ func(this, e, true); };
109 | return anchor;
110 | }
111 |
112 | deselectAllTokensAndLinks() {
113 | // this is for words
114 | var selected = Array.from(document.getElementsByClassName(CLASS_SELECTED));
115 | for (var e of selected) {
116 | if (e.tagName == 'A') {
117 | e.classList.remove(CLASS_SELECTED);
118 | }
119 | }
120 | // this is for links
121 | this.chainColl.deselectAllLinks();
122 | }
123 |
124 | /* @param includeAll: If false, include only the first and last selected.
125 | */
126 | getSelectedTokens(includeAll) {
127 | var anchors = document.getElementsByClassName(CLASS_TOKEN);
128 | var start = -1;
129 | var end = -1;
130 | for (var i=0; i size = (size * 72.0 / 96.0);
468 | var pars = this.div.childNodes;
469 | var step = 1;
470 | for (var par of pars) {
471 | var size = parseInt(getComputedStyle(par).fontSize.match(/\d+/)[0]);
472 | if (plus) {
473 | size += step;
474 | } else {
475 | if (size - step > 1) {
476 | size -= step;
477 | }
478 | }
479 | par.style.fontSize = size + "px";
480 | }
481 | }
482 |
483 | showStatistics() {
484 | var tokenCount = this.div.getElementsByClassName(CLASS_TOKEN).length;
485 | var chainCount = this.chainColl.chains.length;
486 | var linkCount = this.div.getElementsByClassName(CLASS_LINK).length;
487 | var trueChainCount = 0;
488 | var trueLinkCount = 0;
489 | for (var c of this.chainColl.chains) {
490 | if (c.isTrueChain) {
491 | trueChainCount++;
492 | trueLinkCount += c.links.length;
493 | }
494 | }
495 | var mean = trueLinkCount / trueChainCount;
496 | var msg = '';
497 | msg += "Number of tokens: "+tokenCount+"\n";
498 | msg += "Number of referents : "+chainCount+"\n";
499 | msg += "Number of chains: "+trueChainCount+"\n";
500 | msg += "Number of referring expressions: "+linkCount+"\n";
501 | msg += "Number of links: "+trueLinkCount+"\n";
502 | msg += "Average number of links per chain: "+mean+"\n";
503 | alert(msg);
504 | }
505 |
506 | }
507 |
--------------------------------------------------------------------------------
/js/cls_chain.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | class ChainCollection {
25 |
26 | constructor() {
27 | this.chains = new Array();
28 | this.popupDiv = document.createElement('DIV');
29 | this.popupDiv.id = CHAIN_POPUP_MAIN_DIV_ID; // for CSS
30 | this.showNonTrueChainsInPopup = true;
31 | }
32 |
33 | /* Because of the updating of the popupDiv, you should insert in the
34 | * collection only empty chain (and add links after).
35 | */
36 | addChain(chain) {
37 | if (chain.count) {
38 | alert("DEBUG WARNING: newly added chain is not empty");
39 | }
40 | this.chains.push(chain);
41 | }
42 |
43 | removeChain(chain) {
44 | for (var i=0; i 0; });
115 | for (var c of this.chains) {
116 | c.redraw();
117 | }
118 | }
119 |
120 | getChainByLink(link) {
121 | for (var c of this.chains) {
122 | for (var l of c.links) {
123 | if (l === link) {
124 | return c;
125 | }
126 | }
127 | }
128 | return null;
129 | }
130 |
131 | getChainByName(name) {
132 | for (var c of this.chains) {
133 | if (c.name == name) {
134 | return c;
135 | }
136 | }
137 | return null;
138 | }
139 |
140 | deselectAllLinks() {
141 | for (var c of this.chains) {
142 | for (var l of c.links) {
143 | l.deselect();
144 | }
145 | }
146 | }
147 |
148 | getLastSelectedChain() {
149 | // do we have a link currently selected?
150 | for (var c of this.chains) {
151 | for (var l of c.links) {
152 | if (l.isSelected) {
153 | return c;
154 | }
155 | }
156 | }
157 | // look for the last selected link
158 | // NOTE: if there is no chain, return null.
159 | var index = 0;
160 | var chain = null;
161 | for (var c of this.chains) {
162 | for (var l of c.links) {
163 | if (l.selectionCount >= index) { // yeah: >= and not >
164 | index = l.selectionCount;
165 | chain = c;
166 | }
167 | }
168 | }
169 | return chain;
170 | }
171 |
172 | getSelectedLink() {
173 | for (var c of this.chains) {
174 | for (var l of c.links) {
175 | if (l.isSelected) {
176 | return l;
177 | }
178 | }
179 | }
180 | return null;
181 | }
182 |
183 | getLinkById(id) {
184 | for (var c of this.chains) {
185 | for (var l of c.links) {
186 | if (l.id == id) {
187 | return l;
188 | }
189 | }
190 | }
191 | return null;
192 | }
193 |
194 | getLinkBySpan(span) {
195 | for (var c of this.chains) {
196 | for (var l of c.links) {
197 | if (l.span == span) {
198 | return l;
199 | }
200 | }
201 | }
202 | return null;
203 | }
204 |
205 | transferLink(link, targetChain) {
206 | var sourceChain = this.getChainByLink(link);
207 | if (sourceChain === targetChain) {
208 | return false;
209 | }
210 | link.setChain(targetChain);
211 | sourceChain.removeLink(link);
212 | if (sourceChain.count == 0) {
213 | this.removeChain(sourceChain);
214 | }
215 | targetChain.addLink(link);
216 | }
217 |
218 | getLinks() {
219 | var links = new Array();
220 | for (var c of this.chains) {
221 | for (var l of c.links) {
222 | links.push(l);
223 | }
224 | }
225 | return links;
226 | }
227 |
228 | /* Returns null if no more link. */
229 | getNextLink(refLink, backward, onlyVisible) {
230 | var links = this.getLinks();
231 | Link.sortLinks(links);
232 | var index = undefined;
233 | for (var i=0; i=0; i--) {
244 | if (!onlyVisible || links[i].isVisible) {
245 | return links[i];
246 | }
247 | }
248 | } else {
249 | for (var i=index+1; i dic[b]) return 1;
352 | return 0;
353 | });
354 | for (var key of keys) {
355 | res += key + ": " + dic[key] + "\n";
356 | }
357 | alert(res);
358 | }
359 | }
360 |
361 | /*********************************************************************/
362 |
363 | class Chain {
364 |
365 | /* links is sorted in situ */
366 | static sortChains(chains) {
367 | if (gLoadingTime) {
368 | return;
369 | }
370 | chains.sort(function(a,b) {
371 | /*if (!a.count) {
372 | return -1;
373 | }
374 | if (!b.count) {
375 | return 1;
376 | }*/
377 | // a is after b
378 | if (a.firstLink.span.compareDocumentPosition(b.firstLink.span) & 2) {
379 | return 1;
380 | // a is before b
381 | } else if (a.firstLink.span.compareDocumentPosition(b.firstLink.span) & 4) {
382 | return -1;
383 | }
384 | return 0; });
385 | }
386 |
387 | constructor(name) {
388 | this._name = name;
389 | this._color = undefined;
390 | this.links = new Array();
391 | // elements for the popup of all chains and links
392 | this.popupDiv = document.createElement('DIV');
393 | this.popupDiv.classList.add(CLASS_CHAIN_POPUP_CHAIN_DIV);
394 | this.popupDivHeadingParagraph = document.createElement('P');
395 | this.popupDivHeadingParagraph.classList.add(CLASS_CHAIN_POPUP_CHAIN_NAME);
396 | this.popupDivHeading = document.createElement('A');
397 | this.popupDivHeadingParagraph.appendChild(this.popupDivHeading);
398 | this.popupDivHeading.textContent = this._name;
399 | var that = this;
400 | this.popupDivHeading.onclick = function(e) {
401 | if (e.ctrlKey) {
402 | that.firstLink.select();
403 | } else {
404 | if (that.popupLinkDiv.style.display == 'none') {
405 | that.popupLinkDiv.style.display = 'block';
406 | } else {
407 | that.popupLinkDiv.style.display = 'none';
408 | }
409 | }
410 | };
411 | this.popupDiv.appendChild(this.popupDivHeadingParagraph);
412 | this.popupLinkDiv = document.createElement('DIV');
413 | this.popupLinkDiv.classList.add(CLASS_CHAIN_POPUP_LINK_DIV);
414 | this.popupLinkDiv.style.display = 'block';
415 | this.popupDiv.appendChild(this.popupLinkDiv);
416 | // set the color
417 | this.color = ColorManager.getDefaultColor();
418 | }
419 |
420 | /* note: chains are sorted every time a link is added, so no need to sort
421 | * them here.
422 | */
423 | get firstLink() {
424 | return this.links[0];
425 | }
426 |
427 | get isTrueChain() {
428 | return (this.links.length >= gText.minLinks)
429 | || (name.indexOf('_') == 0);
430 | }
431 |
432 | get name() {
433 | return this._name;
434 | }
435 |
436 | // assume the name is valid: use checkName()
437 | set name(val) {
438 | if (val == this._name) {
439 | return;
440 | }
441 | this._name = val;
442 | this.popupDivHeading.textContent = this._name;
443 | for (var link of this.links) {
444 | link.setChain(this);
445 | }
446 | this.redraw();
447 | }
448 |
449 | get color() {
450 | return this._color;
451 | }
452 |
453 | // assume the color (an object) is valid (ie. not used by another chain)
454 | set color(val) {
455 | this._color = val;
456 | this.popupDiv.style.color = this._color.invertedString;
457 | this.popupDiv.style.backgroundColor = this._color.string;
458 | for (var link of this.links) {
459 | link.setChain(this);
460 | }
461 | this.redraw();
462 | }
463 |
464 | get count() {
465 | return this.links.length;
466 | }
467 |
468 | upgradeToTrueChain() {
469 | this.color = gText.colorManager.getNextAvailableColor(gText.chainColl.chains);
470 | }
471 |
472 | downgradeToNotTrueChain() {
473 | this.color = ColorManager.getDefaultColor();
474 | }
475 |
476 | /* Before of the updating of ChainCollection.popupDiv, which is done here
477 | * and not when adding a chain to the collection, you should add only empty
478 | * chain to the collection, and (for this function) add only link to a
479 | * chain already in the collection.
480 | */
481 | addLink(link) {
482 | if (!gText.chainColl.isThisChainInCollection(this)) {
483 | alert("DEBUG WARNING: before adding link to chain, you should "
484 | +"add the chain to the collection.");
485 | }
486 | var wasTrueChain = this.isTrueChain;
487 | this.links.push(link);
488 | link.setChain(this);
489 | if (!wasTrueChain && this.isTrueChain) {
490 | this.upgradeToTrueChain();
491 | } else {
492 | this.redraw();
493 | }
494 | Link.sortLinks(this.links);
495 | this.updatePopupLinkDiv();
496 | gText.chainColl.sortChainsAndUpdatePopupDiv();
497 | this.addDraggableEventsToTheChainHeading();
498 | }
499 |
500 | // NOTE: this will not remove the chain from the chain collection if there
501 | // is no more link left!
502 | removeLink(link) {
503 | var wasTrueChain = this.isTrueChain;
504 | for (var i=0; i=0; i--) {
562 | if (!onlyVisible || this.links[i].isVisible) {
563 | return this.links[i];
564 | }
565 | }
566 | } else {
567 | for (var i=index+1; i None:
70 |
71 | self.document_id = document_id
72 | self.sentence_id = sentence_id
73 | self.words = words
74 | self.pos_tags = pos_tags
75 | self.parse_tree = parse_tree
76 | self.predicate_lemmas = predicate_lemmas
77 | self.predicate_framenet_ids = predicate_framenet_ids
78 | self.word_senses = word_senses
79 | self.speakers = speakers
80 | self.named_entities = named_entities
81 | self.srl_frames = srl_frames
82 | self.coref_spans = coref_spans
83 |
84 |
85 | class Ontonotes:
86 | """
87 | This `DatasetReader` is designed to read in the English OntoNotes v5.0 data
88 | in the format used by the CoNLL 2011/2012 shared tasks. In order to use this
89 | Reader, you must follow the instructions provided [here (v12 release):]
90 | (https://cemantix.org/data/ontonotes.html), which will allow you to download
91 | the CoNLL style annotations for the OntoNotes v5.0 release -- LDC2013T19.tgz
92 | obtained from LDC.
93 |
94 | Once you have run the scripts on the extracted data, you will have a folder
95 | structured as follows:
96 |
97 | ```
98 | conll-formatted-ontonotes-5.0/
99 | ── data
100 | ├── development
101 | └── data
102 | └── english
103 | └── annotations
104 | ├── bc
105 | ├── bn
106 | ├── mz
107 | ├── nw
108 | ├── pt
109 | ├── tc
110 | └── wb
111 | ├── test
112 | └── data
113 | └── english
114 | └── annotations
115 | ├── bc
116 | ├── bn
117 | ├── mz
118 | ├── nw
119 | ├── pt
120 | ├── tc
121 | └── wb
122 | └── train
123 | └── data
124 | └── english
125 | └── annotations
126 | ├── bc
127 | ├── bn
128 | ├── mz
129 | ├── nw
130 | ├── pt
131 | ├── tc
132 | └── wb
133 | ```
134 |
135 | The file path provided to this class can then be any of the train, test or development
136 | directories(or the top level data directory, if you are not utilizing the splits).
137 |
138 | The data has the following format, ordered by column.
139 |
140 | 1. Document ID : `str`
141 | This is a variation on the document filename
142 | 2. Part number : `int`
143 | Some files are divided into multiple parts numbered as 000, 001, 002, ... etc.
144 | 3. Word number : `int`
145 | This is the word index of the word in that sentence.
146 | 4. Word : `str`
147 | This is the token as segmented/tokenized in the Treebank. Initially the `*_skel` file
148 | contain the placeholder [WORD] which gets replaced by the actual token from the
149 | Treebank which is part of the OntoNotes release.
150 | 5. POS Tag : `str`
151 | This is the Penn Treebank style part of speech. When parse information is missing,
152 | all part of speeches except the one for which there is some sense or proposition
153 | annotation are marked with a XX tag. The verb is marked with just a VERB tag.
154 | 6. Parse bit : `str`
155 | This is the bracketed structure broken before the first open parenthesis in the parse,
156 | and the word/part-of-speech leaf replaced with a `*`. When the parse information is
157 | missing, the first word of a sentence is tagged as `(TOP*` and the last word is tagged
158 | as `*)` and all intermediate words are tagged with a `*`.
159 | 7. Predicate lemma : `str`
160 | The predicate lemma is mentioned for the rows for which we have semantic role
161 | information or word sense information. All other rows are marked with a "-".
162 | 8. Predicate Frameset ID : `int`
163 | The PropBank frameset ID of the predicate in Column 7.
164 | 9. Word sense : `float`
165 | This is the word sense of the word in Column 3.
166 | 10. Speaker/Author : `str`
167 | This is the speaker or author name where available. Mostly in Broadcast Conversation
168 | and Web Log data. When not available the rows are marked with an "-".
169 | 11. Named Entities : `str`
170 | These columns identifies the spans representing various named entities. For documents
171 | which do not have named entity annotation, each line is represented with an `*`.
172 | 12. Predicate Arguments : `str`
173 | There is one column each of predicate argument structure information for the predicate
174 | mentioned in Column 7. If there are no predicates tagged in a sentence this is a
175 | single column with all rows marked with an `*`.
176 | -1. Co-reference : `str`
177 | Co-reference chain information encoded in a parenthesis structure. For documents that do
178 | not have co-reference annotations, each line is represented with a "-".
179 | """
180 |
181 | def dataset_iterator(self, file_path: str) -> Iterator[OntonotesSentence]:
182 | """
183 | An iterator over the entire dataset, yielding all sentences processed.
184 | """
185 | for conll_file in self.dataset_path_iterator(file_path):
186 | yield from self.sentence_iterator(conll_file)
187 |
188 | @staticmethod
189 | def dataset_path_iterator(file_path: str) -> Iterator[str]:
190 | """
191 | An iterator returning file_paths in a directory
192 | containing CONLL-formatted files.
193 | """
194 | logger.info("Reading CONLL sentences from dataset files at: %s", file_path)
195 | for root, _, files in list(os.walk(file_path)):
196 | for data_file in files:
197 | # These are a relic of the dataset pre-processing. Every
198 | # file will be duplicated - one file called filename.gold_skel
199 | # and one generated from the preprocessing called filename.gold_conll.
200 | if not data_file.endswith("gold_conll"):
201 | continue
202 |
203 | yield os.path.join(root, data_file)
204 |
205 | def dataset_document_iterator(self, file_path: str) -> Iterator[List[OntonotesSentence]]:
206 | """
207 | An iterator over CONLL formatted files which yields documents, regardless
208 | of the number of document annotations in a particular file. This is useful
209 | for conll data which has been preprocessed, such as the preprocessing which
210 | takes place for the 2012 CONLL Coreference Resolution task.
211 | """
212 | with codecs.open(file_path, "r", encoding="utf8") as open_file:
213 | conll_rows = []
214 | document: List[OntonotesSentence] = []
215 | for line in open_file:
216 | line = line.strip()
217 | if line != "" and not line.startswith("#"):
218 | # Non-empty line. Collect the annotation.
219 | conll_rows.append(line)
220 | else:
221 | if conll_rows:
222 | document.append(self._conll_rows_to_sentence(conll_rows))
223 | conll_rows = []
224 | if line.startswith("#end document"):
225 | yield document
226 | document = []
227 | if document:
228 | # Collect any stragglers or files which might not
229 | # have the '#end document' format for the end of the file.
230 | yield document
231 |
232 | def sentence_iterator(self, file_path: str) -> Iterator[OntonotesSentence]:
233 | """
234 | An iterator over the sentences in an individual CONLL formatted file.
235 | """
236 | for document in self.dataset_document_iterator(file_path):
237 | for sentence in document:
238 | yield sentence
239 |
240 | def _conll_rows_to_sentence(self, conll_rows: List[str]) -> OntonotesSentence:
241 | document_id: str = None
242 | sentence_id: int = None
243 | # The words in the sentence.
244 | sentence: List[str] = []
245 | # The pos tags of the words in the sentence.
246 | pos_tags: List[str] = []
247 | # the pieces of the parse tree.
248 | parse_pieces: List[str] = []
249 | # The lemmatised form of the words in the sentence which
250 | # have SRL or word sense information.
251 | predicate_lemmas: List[str] = []
252 | # The FrameNet ID of the predicate.
253 | predicate_framenet_ids: List[str] = []
254 | # The sense of the word, if available.
255 | word_senses: List[float] = []
256 | # The current speaker, if available.
257 | speakers: List[str] = []
258 |
259 | verbal_predicates: List[str] = []
260 | span_labels: List[List[str]] = []
261 | current_span_labels: List[str] = []
262 |
263 | # Cluster id -> List of (start_index, end_index) spans.
264 | clusters: DefaultDict[int, List[Tuple[int, int]]] = defaultdict(list)
265 | # Cluster id -> List of start_indices which are open for this id.
266 | coref_stacks: DefaultDict[int, List[int]] = defaultdict(list)
267 |
268 | for index, row in enumerate(conll_rows):
269 | conll_components = row.split()
270 |
271 | document_id = conll_components[0]
272 | sentence_id = int(conll_components[1])
273 | word = conll_components[3]
274 | pos_tag = conll_components[4]
275 | parse_piece = conll_components[5]
276 |
277 | # Replace brackets in text and pos tags
278 | # with a different token for parse trees.
279 | if pos_tag != "XX" and word != "XX":
280 | if word == "(":
281 | parse_word = "-LRB-"
282 | elif word == ")":
283 | parse_word = "-RRB-"
284 | else:
285 | parse_word = word
286 | if pos_tag == "(":
287 | pos_tag = "-LRB-"
288 | if pos_tag == ")":
289 | pos_tag = "-RRB-"
290 | (left_brackets, right_hand_side) = ('_', '_')
291 | # only keep ')' if there are nested brackets with nothing in them.
292 | right_brackets = right_hand_side.count(")") * ")"
293 | parse_piece = f"{left_brackets} ({pos_tag} {parse_word}) {right_brackets}"
294 | else:
295 | # There are some bad annotations in the CONLL data.
296 | # They contain no information, so to make this explicit,
297 | # we just set the parse piece to be None which will result
298 | # in the overall parse tree being None.
299 | parse_piece = None
300 |
301 | lemmatised_word = conll_components[6]
302 | framenet_id = conll_components[7]
303 | word_sense = conll_components[8]
304 | speaker = conll_components[9]
305 |
306 | if not span_labels:
307 | # If this is the first word in the sentence, create
308 | # empty lists to collect the NER and SRL BIO labels.
309 | # We can't do this upfront, because we don't know how many
310 | # components we are collecting, as a sentence can have
311 | # variable numbers of SRL frames.
312 | span_labels = [[] for _ in conll_components[10:-1]]
313 | # Create variables representing the current label for each label
314 | # sequence we are collecting.
315 | current_span_labels = [None for _ in conll_components[10:-1]]
316 |
317 | self._process_span_annotations_for_word(
318 | conll_components[10:-1], span_labels, current_span_labels
319 | )
320 |
321 | # If any annotation marks this word as a verb predicate,
322 | # we need to record its index. This also has the side effect
323 | # of ordering the verbal predicates by their location in the
324 | # sentence, automatically aligning them with the annotations.
325 | word_is_verbal_predicate = any("(V" in x for x in conll_components[11:-1])
326 | if word_is_verbal_predicate:
327 | verbal_predicates.append(word)
328 |
329 | self._process_coref_span_annotations_for_word(
330 | conll_components[10], index, clusters, coref_stacks
331 | )
332 |
333 | sentence.append(word)
334 | pos_tags.append(pos_tag)
335 | parse_pieces.append(parse_piece)
336 | predicate_lemmas.append(lemmatised_word if lemmatised_word != "_" else None)
337 | predicate_framenet_ids.append(framenet_id if framenet_id != "_" else None)
338 | word_senses.append(float(word_sense) if word_sense != "_" else None)
339 | speakers.append(speaker if speaker != "_" else None)
340 |
341 | named_entities = '_'
342 | srl_frames = 0
343 |
344 |
345 | parse_tree = None
346 | coref_span_tuples: Set[TypedSpan] = {
347 | (cluster_id, span) for cluster_id, span_list in clusters.items() for span in span_list
348 | }
349 | return OntonotesSentence(
350 | document_id,
351 | sentence_id,
352 | sentence,
353 | pos_tags,
354 | parse_tree,
355 | predicate_lemmas,
356 | predicate_framenet_ids,
357 | word_senses,
358 | speakers,
359 | named_entities,
360 | srl_frames,
361 | coref_span_tuples,
362 | )
363 |
364 | @staticmethod
365 | def _process_coref_span_annotations_for_word(
366 | label: str,
367 | word_index: int,
368 | clusters: DefaultDict[int, List[Tuple[int, int]]],
369 | coref_stacks: DefaultDict[int, List[int]],
370 | ) -> None:
371 | """
372 | For a given coref label, add it to a currently open span(s), complete a span(s) or
373 | ignore it, if it is outside of all spans. This method mutates the clusters and coref_stacks
374 | dictionaries.
375 |
376 | # Parameters
377 |
378 | label : `str`
379 | The coref label for this word.
380 | word_index : `int`
381 | The word index into the sentence.
382 | clusters : `DefaultDict[int, List[Tuple[int, int]]]`
383 | A dictionary mapping cluster ids to lists of inclusive spans into the
384 | sentence.
385 | coref_stacks : `DefaultDict[int, List[int]]`
386 | Stacks for each cluster id to hold the start indices of active spans (spans
387 | which we are inside of when processing a given word). Spans with the same id
388 | can be nested, which is why we collect these opening spans on a stack, e.g:
389 |
390 | [Greg, the baker who referred to [himself]_ID1 as 'the bread man']_ID1
391 | """
392 | if label != "-":
393 | for segment in label.split("|"):
394 | # The conll representation of coref spans allows spans to
395 | # overlap. If spans end or begin at the same word, they are
396 | # separated by a "|".
397 | if segment[0] == "(":
398 | # The span begins at this word.
399 | if segment[-1] == ")":
400 | # The span begins and ends at this word (single word span).
401 | cluster_id = int(segment[1:-1])
402 | clusters[cluster_id].append((word_index, word_index))
403 | else:
404 | # The span is starting, so we record the index of the word.
405 | cluster_id = int(segment[1:])
406 | coref_stacks[cluster_id].append(word_index)
407 | else:
408 | # The span for this id is ending, but didn't start at this word.
409 | # Retrieve the start index from the document state and
410 | # add the span to the clusters for this id.
411 | cluster_id = int(segment[:-1])
412 | if coref_stacks[cluster_id]:
413 | start = coref_stacks[cluster_id].pop()
414 | clusters[cluster_id].append((start, word_index))
415 |
416 | @staticmethod
417 | def _process_span_annotations_for_word(
418 | annotations: List[str],
419 | span_labels: List[List[str]],
420 | current_span_labels: List[Optional[str]],
421 | ) -> None:
422 | """
423 | Given a sequence of different label types for a single word and the current
424 | span label we are inside, compute the BIO tag for each label and append to a list.
425 |
426 | # Parameters
427 |
428 | annotations : `List[str]`
429 | A list of labels to compute BIO tags for.
430 | span_labels : `List[List[str]]`
431 | A list of lists, one for each annotation, to incrementally collect
432 | the BIO tags for a sequence.
433 | current_span_labels : `List[Optional[str]]`
434 | The currently open span per annotation type, or `None` if there is no open span.
435 | """
436 | for annotation_index, annotation in enumerate(annotations):
437 | # strip all bracketing information to
438 | # get the actual propbank label.
439 | label = annotation.strip("()*")
440 |
441 | if "(" in annotation:
442 | # Entering into a span for a particular semantic role label.
443 | # We append the label and set the current span for this annotation.
444 | bio_label = "B-" + label
445 | span_labels[annotation_index].append(bio_label)
446 | current_span_labels[annotation_index] = label
447 | elif current_span_labels[annotation_index] is not None:
448 | # If there's no '(' token, but the current_span_label is not None,
449 | # then we are inside a span.
450 | bio_label = "I-" + current_span_labels[annotation_index]
451 | span_labels[annotation_index].append(bio_label)
452 | else:
453 | # We're outside a span.
454 | span_labels[annotation_index].append("O")
455 | # Exiting a span, so we reset the current span label for this annotation.
456 | if ")" in annotation:
457 | current_span_labels[annotation_index] = None
458 |
--------------------------------------------------------------------------------
/js/cls_property.js:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * SACR (Script d'Annotation de Chaînes de Référence): a coreference chain
4 | * annotation tool.
5 | *
6 | * Copyright 2017 Bruno Oberlé.
7 | *
8 | * This Source Code Form is subject to the terms of the Mozilla Public License,
9 | * v. 2.0. If a copy of the MPL was not distributed with this file, You can
10 | * obtain one at http://mozilla.org/MPL/2.0/.
11 | *
12 | * This program comes with ABSOLUTELY NO WARRANTY. See the Mozilla Public
13 | * License, v. 2.0 for more details.
14 | *
15 | * Some questions about the license may have been answered at
16 | * https://www.mozilla.org/en-US/MPL/2.0/FAQ/.
17 | *
18 | * If you have any question, contact me at boberle.com.
19 | *
20 | * The source code can be found at boberle.com.
21 | *
22 | */
23 |
24 | EDIT_NORMAL = 0;
25 | EDIT_TAB = 1;
26 | EDIT_AUTO = 2;
27 |
28 | class Schema {
29 |
30 | constructor(code) {
31 | this.isEmpty = true;
32 | this._rawProperties = {}; // name:{dic found in the code}
33 | this._parse(code);
34 | this.listOfProperties = {}; // name:{type:"TYPE", values:[VALUES]}
35 | // (values is empty for textbox and head)
36 | this.editMode = EDIT_NORMAL;
37 | this.editInChain = false;
38 | this.editOnlyVisible = false;
39 | this._button = null;
40 | var that = this;
41 | this._editModeDialog = new EditModeDialog(
42 | function(editMode, inChain, onlyVisible) {
43 | that.editMode = editMode;
44 | that.editInChain = inChain;
45 | that.editOnlyVisible = onlyVisible;
46 | });
47 | }
48 |
49 | _parse(text) {
50 | var tmp;
51 | var lines = text.split(/\n+/);
52 | var cur = null;
53 | for (var line of lines) {
54 | if ((line.match(/^\s*(?:#.*)?$/)) != null) {
55 | // nothing: it's a comment
56 | } else if (((tmp = line.match(/^PROP:/)) != null)) {
57 | if (cur) {
58 | this.addPrototypeProperty(cur);
59 | }
60 | var response = CommonFunctions.parseValues(line, 4);
61 | if (response.startIndex != line.length) {
62 | alert("Can't parse line: "+line+" (error when reading option values)");
63 | }
64 | cur = response.dic;
65 | } else if (((tmp = line.match(/^\s*(.+)$/)) != null)) {
66 | if (cur) {
67 | if (!('values' in cur)) {
68 | cur.values = new Array();
69 | }
70 | cur.values.push(tmp[1] == '$$$' ? '' : tmp[1]);
71 | } else {
72 | alert("Can't parse line: "+line);
73 | }
74 | } else {
75 | alert("Can't parse line: "+line);
76 | }
77 | } // for
78 | if (cur) {
79 | this.addPrototypeProperty(cur);
80 | }
81 | }
82 |
83 | addPrototypeProperty(dic) {
84 | if ('name' in dic) {
85 | this._rawProperties[dic['name']] = dic;
86 | this.isEmpty = false;
87 | //console.log(dic)
88 | } else {
89 | alert("error in the schema: a property doesn't have any name");
90 | }
91 | }
92 |
93 | /* @param givenValues: A dictionary (keys are property names (like
94 | * `gramfunction'), values are property values (like `subject')). Give an
95 | * empty dictionary to get a default property list.
96 | */
97 | buildLinkProperties(givenValues) {
98 | var properties = [];
99 | for (var name in this._rawProperties) {
100 | this.listOfProperties[name] = {};
101 | this.listOfProperties[name]['type'] = 'normal'; // default
102 | this.listOfProperties[name]['values'] = {}; // default
103 | var setup = this._rawProperties[name];
104 | var initialValue = "";
105 | if (name in givenValues) {
106 | initialValue = givenValues[name];
107 | } else if (gText.showPropertyWarnings
108 | && Object.keys(givenValues).length) {
109 | alert("Property `"+name+"' not found in the file.");
110 | }
111 | var prop = new LinkProperty(name, initialValue);
112 | if ('newline' in setup)
113 | prop.newLineAfter = (setup['newline'] == 'true');
114 | if ('showname' in setup)
115 | prop.showName = (setup['showname'] == 'true');
116 | if ('textboxsize' in setup)
117 | prop.textboxSize = setup['textboxsize'];
118 | if ('type' in setup)
119 | prop.type = setup['type'];
120 | this.listOfProperties[name]['type'] = prop.type;
121 | if ('values' in setup)
122 | prop.values = setup['values'];
123 | this.listOfProperties[name]['values'] = prop.values;
124 | if ('addShortcuts' in setup)
125 | prop.addShortcuts = setup['addShortcuts'];
126 | properties.push(prop);
127 | }
128 | if (gText.showPropertyWarnings) {
129 | for (var name in givenValues) {
130 | if (!(name in this._rawProperties)) {
131 | alert("Property `"+name+"' found in the file, but not in the schema.");
132 | }
133 | }
134 | }
135 | return new LinkProperties(properties);
136 | }
137 |
138 | get button() {
139 | if (!this._button) {
140 | this._button = document.createElement('INPUT');
141 | this._button.type = 'BUTTON';
142 | this._button.value = 'Edit Mode';
143 | var that = this;
144 | this._button.onclick = function() {
145 | that._editModeDialog.show();
146 | };
147 | }
148 | return this._button;
149 | }
150 |
151 | }
152 |
153 |
154 | class LinkProperties {
155 |
156 | constructor(properties) {
157 | this.properties = properties;
158 | this.div = document.createElement('DIV');
159 | for (var property of properties) {
160 | if (property.showName) {
161 | this.div.appendChild(document.createTextNode(property.name+": "));
162 | }
163 | this.div.appendChild(property.element);
164 | }
165 | }
166 |
167 | resetHeadProperty(link) {
168 | for (var prop of this.properties) {
169 | if (prop.type == 'head') {
170 | prop.resetHead(link);
171 | }
172 | }
173 | }
174 |
175 | getString(includeHeadText, content) {
176 | var props = {};
177 | for (var property of this.properties) {
178 | props[property.name] = property.value;
179 | }
180 | if (includeHeadText) {
181 | for (var property of this.properties) {
182 | if (property.type == 'head') {
183 | props['head_text'] = property.headText;
184 | }
185 | }
186 | }
187 | if (content) {
188 | props['content'] = content;
189 | }
190 | var keys = new Array();
191 | for (var key in props) {
192 | keys.push(key);
193 | }
194 | keys.sort();
195 | var strings = new Array();
196 | for (var key of keys) {
197 | //console.log(key);
198 | var esc = props[key].replace('"', """);
199 | strings.push(key + '="' + esc + '"');
200 | }
201 | return strings.join(',');
202 | }
203 |
204 | copyPropertiesFrom(properties) {
205 | for (var i=0; i 0) {
522 | equalValue.remove(0);
523 | }
524 | for (var value of values) {
525 | var option = document.createElement('option');
526 | option.value = value;
527 | option.text = value;
528 | equalValue.appendChild(option);
529 | }
530 | };
531 | equalProperty.onchange();
532 | // controls for matchPar
533 | var matchProperty = document.createElement('select');
534 | matchPar.appendChild(matchProperty);
535 | for (var propName in schema.listOfProperties) {
536 | var option = document.createElement('option');
537 | option.value = propName;
538 | option.text = propName;
539 | matchProperty.appendChild(option);
540 | }
541 | var matchOperator = document.createElement('select');
542 | matchPar.appendChild(matchOperator);
543 | for (var op of ['matches', 'does not match']) {
544 | var option = document.createElement('option');
545 | option.value = op;
546 | option.text = op;
547 | matchOperator.appendChild(option);
548 | }
549 | var matchValue = document.createElement('input');
550 | matchValue.type = "text";
551 | matchPar.appendChild(matchValue);
552 | // search button
553 | var buttonPar = document.createElement('P');
554 | div.appendChild(buttonPar);
555 | var button = document.createElement('input');
556 | button.type = "button";
557 | button.value = "search";
558 | buttonPar.appendChild(button);
559 | buttonPar.onclick = function(e) {
560 | var name, searchedValue, reversed;
561 | if (checkUseRegex.checked) {
562 | name = matchProperty.options.item(matchProperty.selectedIndex).value;
563 | op = matchOperator.options.item(matchOperator.selectedIndex).value;
564 | if (op == 'matches') {
565 | reversed = false;
566 | } else {
567 | reversed = true;
568 | }
569 | var value = matchValue.value;
570 | if (!value) {
571 | value = "^$";
572 | }
573 | try {
574 | searchedValue = new RegExp(value);
575 | } catch(err) {
576 | searchedValue = null;
577 | alert("Invalid regular expression.");
578 | }
579 | } else {
580 | name = equalProperty.options.item(equalProperty.selectedIndex).value;
581 | op = equalOperator.options.item(equalOperator.selectedIndex).value;
582 | if (op == 'is equal to') {
583 | reversed = false;
584 | } else {
585 | reversed = true;
586 | }
587 | searchedValue =
588 | equalValue.options.item(equalValue.selectedIndex).value;
589 | }
590 | if (searchedValue !== null) { // see the try/catch above
591 | that.callback(name, searchedValue, reversed); // if searchedValue
592 | // is a string, it will be used with an `equal to' function,
593 | // otherwise with a regex function
594 | that.modalDiv.close();
595 | }
596 | };
597 | div.style['overflow-y'] = "scroll";
598 | }
599 |
600 | show() {
601 | this.modalDiv.show();
602 | }
603 |
604 | }
605 |
606 |
607 | class EditModeDialog {
608 |
609 | constructor(callback) {
610 | this.callback = callback;
611 | var div = document.createElement("div");
612 | div.style.padding = "20px";
613 | this.modalDiv = new ModalDiv("Edit mode", div);
614 | var that = this;
615 | var par = document.createElement('P');
616 | div.appendChild(par);
617 | par.appendChild(document.createTextNode("Choose the edit mode: "));
618 | // combo
619 | var comboEditMode = document.createElement('select');
620 | par.appendChild(comboEditMode);
621 | var modes = ["normal: html elements behave normally",
622 | "tab-mode: use tab on a property control to go to the next link",
623 | "auto-mode: setting a property automatically put you on the next link"];
624 | var first = true;
625 | for (var i=0; iFirefox, "
95 | + "или хотя бы "
96 | + "Chromium "
97 | + "или Google Chrome!";
98 | //p.style.color = 'red';
99 | p.style.fontStyle = 'italic';
100 | p = document.createElement('P');
101 | div.appendChild(p);
102 | p.innerHTML = "Вы можете ознакомиться с руководством пользователя "
103 | + "здесь"
104 | + ", и видеотуториалами (на французском) "
105 | + "здесь"
106 | + ".";
107 | p.style.fontStyle = 'italic';
108 | div.appendChild(document.createElement('HR'));
109 | // text
110 | var textareaText = document.createElement('TEXTAREA');
111 | textareaText.cols = 90;
112 | textareaText.rows = 20;
113 | p = document.createElement('P');
114 | div.appendChild(p);
115 | p.innerHTML = "Впишите или вставьте текст в поле ввода ниже, либо используйте одну из представленных опций:";
116 | ul = document.createElement('UL');
117 | p.appendChild(ul);
118 | li = document.createElement('LI');
119 | ul.appendChild(li);
120 | t = document.createTextNode("Загрузите файл: ");
121 | li.appendChild(t);
122 | var inputText = document.createElement('INPUT');
123 | li.appendChild(inputText);
124 | inputText.type = 'file';
125 | inputText.onchange = function(){
126 | var reader = new FileReader();
127 | reader.onload = function(e) {
128 | textareaText.value = e.target.result;
129 | };
130 | reader.readAsText(this.files[0]);
131 | that.textFilename = this.files[0].name;
132 | };
133 | li = document.createElement('LI');
134 | ul.appendChild(li);
135 | t = document.createTextNode("Используйте текст басни Эзопа (фр. яз.): ");
136 | li.appendChild(t);
137 | input = document.createElement('INPUT');
138 | li.appendChild(input);
139 | input.type = 'button';
140 | input.value = "with annotations";
141 | input.onclick = function() {
142 | textareaText.value = DataLoader.getSampleTextWithAnnotations();
143 | }
144 | if (go == 'withAnnotations') {
145 | textareaText.value = DataLoader.getSampleTextWithAnnotations();
146 | }
147 | t = document.createTextNode(" или ");
148 | li.appendChild(t);
149 | input = document.createElement('INPUT');
150 | li.appendChild(input);
151 | input.type = 'button';
152 | input.value = "without annotation";
153 | input.onclick = function() {
154 | textareaText.value = DataLoader.getSampleTextWithoutAnnotations();
155 | }
156 | if (go == 'withoutAnnotations') {
157 | textareaText.value = DataLoader.getSampleTextWithoutAnnotations();
158 | }
159 | div.appendChild(textareaText);
160 | // properties
161 | var textareaProperties = document.createElement('TEXTAREA');
162 | textareaProperties.cols = 90;
163 | textareaProperties.rows = 20;
164 | p = document.createElement('P');
165 | div.appendChild(p);
166 | p.innerHTML = "Впишите или вставьте параметры в поле ввода ниже, либо используйте одну из представленных опций:";
167 | ul = document.createElement('UL');
168 | p.appendChild(ul);
169 | li = document.createElement('LI');
170 | ul.appendChild(li);
171 | t = document.createTextNode("Загрузите файл: ");
172 | li.appendChild(t);
173 | var inputSchema = document.createElement('INPUT');
174 | li.appendChild(inputSchema);
175 | inputSchema.type = 'file';
176 | inputSchema.onchange = function(){
177 | var reader = new FileReader();
178 | reader.onload = function(e) {
179 | textareaProperties.value = e.target.result;
180 | };
181 | reader.readAsText(this.files[0]);
182 | };
183 | li = document.createElement('LI');
184 | ul.appendChild(li);
185 | t = document.createTextNode("Используйте ");
186 | li.appendChild(t);
187 | input = document.createElement('INPUT');
188 | li.appendChild(input);
189 | input.type = 'button';
190 | input.value = "схему по умолчанию (фр. яз.)";
191 | input.onclick = function() {
192 | textareaProperties.value = DataLoader.getSampleSchema();
193 | }
194 | if (go == 'withAnnotations') {
195 | textareaProperties.value = DataLoader.getSampleSchema();
196 | }
197 | div.appendChild(textareaProperties);
198 | // number of link
199 | p = document.createElement('P');
200 | div.appendChild(p);
201 | p.innerHTML = "Введите минимальное количество связей в кореферентной цепи:";
202 | input = document.createElement('INPUT');
203 | p.appendChild(input);
204 | input.type = 'number';
205 | input.min = '1';
206 | input.max = '50';
207 | input.value = '2';
208 | input.style.width = '70px'; // size attribute doesn't work for `number'
209 | input.onchange = function() {
210 | that.minLinks = this.value;
211 | };
212 | // number of colors
213 | p = document.createElement('P');
214 | div.appendChild(p);
215 | p.appendChild(document.createTextNode("Вы можете настроить количество цветов, если вам нужно больше цветов: "));
216 | var colorSpan = document.createElement('SPAN');
217 | p.appendChild(colorSpan);
218 | var ul = document.createElement('UL');
219 | div.appendChild(ul);
220 | var li = document.createElement('LI');
221 | ul.appendChild(li)
222 | // hue
223 | li.appendChild(document.createTextNode("Оттенок: "));
224 | input = document.createElement('INPUT');
225 | li.appendChild(input);
226 | input.type = 'number';
227 | input.min = '10';
228 | input.max = '50';
229 | input.value = this.hueStep;
230 | input.style.width = '70px'; // size attribute doesn't work for `number'
231 | input.onchange = function() {
232 | that.hueStep = parseInt(this.value);
233 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)";
234 | };
235 | // saturation
236 | li.appendChild(document.createTextNode(" Насыщенность: "));
237 | input = document.createElement('INPUT');
238 | li.appendChild(input);
239 | input.type = 'number';
240 | input.min = '10';
241 | input.max = '50';
242 | input.value = this.saturationStep;
243 | input.style.width = '70px'; // size attribute doesn't work for `number'
244 | input.onchange = function() {
245 | that.saturationStep = parseInt(this.value);
246 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)";
247 | };
248 | // lightness
249 | li.appendChild(document.createTextNode(" Яркость: "));
250 | input = document.createElement('INPUT');
251 | li.appendChild(input);
252 | input.type = 'number';
253 | input.min = '5';
254 | input.max = '25';
255 | input.value = this.ligthnessStep;
256 | input.style.width = '70px'; // size attribute doesn't work for `number'
257 | input.onchange = function() {
258 | that.ligthnessStep = parseInt(this.value);
259 | colorSpan.innerHTML = "("+that.computeNbOfColors().toString()+" цветов)";
260 | };
261 | input.onchange();
262 | // tokenization type
263 | p = document.createElement('P');
264 | div.appendChild(p);
265 | p.innerHTML = "Выберите тип токенизации: ";
266 | var select = document.createElement('SELECT');
267 | p.appendChild(select);
268 | for (var type of new Array('word', 'word and punctuation', 'character')) {
269 | var option = document.createElement('OPTION');
270 | option.text = type;
271 | select.appendChild(option);
272 | }
273 | select.selectedIndex = 0; // default
274 | this.tokenizationType = TOKENIZATION_WORD; // default
275 | select.onchange = function() {
276 | if (this.selectedIndex == 0) {
277 | that.tokenizationType = TOKENIZATION_WORD;
278 | } else if (this.selectedIndex == 1) {
279 | that.tokenizationType = TOKENIZATION_WORD_N_PUNCT;
280 | } else {
281 | that.tokenizationType = TOKENIZATION_CHARACTER;
282 | }
283 | }
284 | // show property warnings
285 | p = document.createElement('P');
286 | div.appendChild(p);
287 | p.innerHTML = "Показать предупреждения: ";
288 | var checkPropertyWarnings = document.createElement('INPUT');
289 | checkPropertyWarnings.type = 'CHECKBOX';
290 | checkPropertyWarnings.checked = true;
291 | p.appendChild(checkPropertyWarnings);
292 | // parse the data
293 | p = document.createElement('P');
294 | div.appendChild(p);
295 | p.innerHTML = "Затем нажмите кнопку, чтобы ";
296 | input = document.createElement('INPUT');
297 | p.appendChild(input);
298 | input.type = 'button';
299 | input.value = "распарсить документ";
300 | input.onclick = function() {
301 | if (!textareaText.value) {
302 | alert("No text!");
303 | } else {
304 | // check if there is a tokenization type defined in the metadata
305 | // of the texte
306 | var tmp;
307 | if ((tmp = textareaText.value.match(/^\s*#\s*TOKENIZATION-TYPE\s*:\s*(\d)/mi)) != null) {
308 | if (parseInt(tmp[1]) != that.tokenizationType) {
309 | alert("Tokenization type of the text doesn't match the value of the list box!");
310 | return;
311 | }
312 | }
313 | that.schema = textareaProperties.value;
314 | that.text = textareaText.value;
315 | that.showPropertyWarnings = checkPropertyWarnings.checked;
316 | that.callback(that);
317 | that.modalDiv.close();
318 | }
319 | };
320 | this.parseButton = input;
321 | // license
322 | p = document.createElement('P');
323 | div.appendChild(p);
324 | p.innerHTML = "SACR -- (C) 2017 Bruno Oberlé. This program "
325 | +"is distributed under the terms of the Mozilla Public License, v.2.0. "
326 | +"This program comes with ABSOLUTELY NO WARRANTY, see the license for more details. "
327 | +"Source code may be found at boberle.com.";
328 | }
329 |
330 | computeNbOfColors() {
331 | return ColorBuilder.computeNbOfColors(this.hueStep, this.saturationStep,
332 | this.ligthnessStep);
333 | }
334 |
335 | clickOnTheParseButton() {
336 | this.parseButton.click();
337 | }
338 |
339 | }
340 |
341 |
342 |
--------------------------------------------------------------------------------