├── .gitignore
├── README.md
├── chrome
    ├── background.js
    ├── content.js
    ├── content_script.js
    ├── loader.js
    ├── manifest.json
    ├── options.html
    ├── options.js
    ├── owl_128.png
    ├── owl_16.png
    ├── owl_48.png
    ├── popup.html
    ├── popup.js
    ├── quick_search.js
    └── shared.js
├── conf
    └── config.json
├── docs
    ├── examples.md
    ├── icon.png
    ├── index.md
    ├── Тарзан.png
    ├── зачем.png
    ├── крупнейший.png
    ├── лиса.png
    ├── причем.png
    ├── продано.png
    ├── проще.png
    ├── свет.png
    ├── сумела.png
    ├── творог.png
    └── форматы.png
└── scripts
    ├── Pipfile
    ├── build-indexes.py
    ├── download-pages.py
    ├── download-resources.py
    ├── package-extension.sh
    └── parse-pages.py


/.gitignore:
--------------------------------------------------------------------------------
1 | generated/
2 | build/
3 | apicache*/
4 | throttle.ctrl
5 | Pipfile.lock
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # About
 2 | 
 3 | This Chrome extension adds an accent to mark the stress on words in Russian. Hovering over a words brings up a popover with its definition(s) from wiktionary.
 4 | 
 5 | The extension contains an index of all Russian words from the English wiktionary site with all their grammatical forms and accent position.
 6 | 
 7 | For more information and to install, visit the [extension page](https://chrome.google.com/webstore/detail/slava-russian-dictionary/bcbcmhmpbggnljoapclfcagammaapghi).
 8 | 
 9 | # How to build
10 | 
11 | ```bash
12 | 
13 | pip3 install pipenv
14 | cd scripts
15 | pipenv install
16 | 
17 | pipenv run python3 download-resources.py
18 | pipenv run python3 download-pages.py
19 | pipenv run python3 parse-pages.py
20 | pipenv run python3 build-indexes.py
21 | ./package-extension.sh
22 | 
23 | ```
24 | 
25 | # License
26 | 
27 | This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 Unported License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/3.0/.
28 | 
29 | Icon originally by [karthikeyan](https://openclipart.org/detail/owl-by-karthikeyan), via [Wikimedia Commons](https://commons.wikimedia.org/wiki/File:Clipart_owl.png).
30 | 


--------------------------------------------------------------------------------
/chrome/background.js:
--------------------------------------------------------------------------------
  1 | 
  2 | console.log("loading dictionary data");
  3 | var forms_q = $.getJSON(chrome.extension.getURL('generated/resources/ru/forms.json'));
  4 | var lemmas_q = $.getJSON(chrome.extension.getURL('generated/resources/ru/words.json'));
  5 | var active_tabs = {}
  6 | 
  7 | function load(unload) {
  8 |   apply_to_tab(function (tab) {
  9 |     if (active_tabs[tab.id]) {
 10 |       chrome.tabs.executeScript(null, { file: "generated/underscore.js" });
 11 |       chrome.tabs.executeScript(null, { file: "generated/jquery.js" });
 12 |       chrome.tabs.executeScript(null, { file: "generated/bootstrap.js" });
 13 |       chrome.tabs.executeScript(null, { file: "generated/slavaConfig.js" });
 14 |       chrome.tabs.executeScript(null, { file: "shared.js" });
 15 |       chrome.tabs.executeScript(null, { file: "content_script.js" });
 16 |       chrome.tabs.insertCSS(null, { file: "generated/bootstrap.css" });
 17 |     }
 18 |     else if (unload) {
 19 |       chrome.tabs.executeScript(null, { code: "location.reload()" });
 20 |     }
 21 |   });
 22 | }
 23 | 
 24 | function apply_to_tab(f) {
 25 |   chrome.tabs.query({
 26 |     "currentWindow": true,
 27 |     "active": true //Add any parameters you want
 28 |   }, function (tabs) {//It returns an array
 29 |     $.each(tabs, function (i, tab) {
 30 |       f(tab);
 31 |     });
 32 |   });
 33 | }
 34 | 
 35 | 
 36 | $.when(forms_q, lemmas_q).done(function (forms_r, lemmas_r) {
 37 |   console.log("loaded dictionary data");
 38 |   var forms = forms_r[0];
 39 |   var lemmas = lemmas_r[0];
 40 | 
 41 |   chrome.runtime.onMessage.addListener(
 42 |     function (request, sender, sendResponse) {
 43 |       if (request.type == "resolve") {
 44 |         var retval = {};
 45 |         $.each(request.payload, function (entry_i, entry) {
 46 |           var forms_for_entry = forms[entry];
 47 |           if (forms_for_entry) {
 48 |             var return_entries = Array();
 49 |             $.each(forms_for_entry, function (form_i, form) {
 50 |               return_entries.push([lemmas[form[0]], form[1], form[2], 0]);
 51 | 
 52 |               // e.g. шедшая -> шедший -> идти
 53 |               //      про́ще -> простой -> простоя́ть
 54 |               //      расчлененные -> расчленённый -> расчленить
 55 |               var forms_root = forms[normalize(lemmas[form[0]][0])];
 56 |               if (forms_root) {
 57 |                 $.each(forms_root, function (root_i, root) {
 58 |                   return_entries.push([lemmas[root[0]], [], [], 1]);
 59 |                 });
 60 |               }
 61 | 
 62 | 
 63 | 
 64 |             });
 65 |             retval[entry] = return_entries;
 66 |           }
 67 |         });
 68 |         sendResponse({ payload: { forms: retval } });
 69 |       }
 70 |       else if (request.type == "get-enabled") {
 71 |         apply_to_tab(function (tab) {
 72 |           sendResponse(active_tabs[tab.id])
 73 |         });
 74 |         return true; // mark message response as async
 75 |       }
 76 |       else if (request.type == "set-enabled") {
 77 |         apply_to_tab(function (tab) {
 78 |           active_tabs[tab.id] = request.payload
 79 |         });
 80 |         load(true);
 81 | 
 82 |       }
 83 |       else if (request.type == "load") {
 84 |         load(false);
 85 |       }
 86 |       else if (request.type == "set-language_pref") {
 87 |         chrome.storage.sync.set({ 'language_pref': request.payload });
 88 |       }
 89 |       else if (request.type == "get-language_pref") {
 90 |         chrome.storage.sync.get('language_pref', function (response) {
 91 |           language_pref = response.language_pref || [];
 92 |           for (var key in slavaConfig.wiktionary) {
 93 |             if (!language_pref.includes(key)) {
 94 |               language_pref.push(key);
 95 |             }
 96 |           }
 97 |           sendResponse(language_pref);
 98 |         });
 99 |         return true; // mark message response as async
100 |       }
101 |     });
102 | 
103 | });
104 | 


--------------------------------------------------------------------------------
/chrome/content.js:
--------------------------------------------------------------------------------
1 | (function () {
2 |     'use strict';
3 | 
4 |     $(document).ready(mark_words)
5 | 
6 | })(); //outer function
7 | 


--------------------------------------------------------------------------------
/chrome/content_script.js:
--------------------------------------------------------------------------------
  1 | (function () {
  2 |     'use strict';
  3 | 
  4 |     var entityMap = {
  5 |         '&': '&amp;',
  6 |         '<': '&lt;',
  7 |         '>': '&gt;',
  8 |         '"': '&quot;',
  9 |         "'": '&#39;',
 10 |         '/': '&#x2F;',
 11 |         '`': '&#x60;',
 12 |         '=': '&#x3D;',
 13 |     };
 14 | 
 15 |     // Regexp for matching Russian words
 16 |     var re = /[А-яЁё\-\u0301]+/g;
 17 | 
 18 |     // parse document without loading images. See https://stackoverflow.com/questions/15113910
 19 |     var virtualDocument = document.implementation.createHTMLDocument('virtual');
 20 | 
 21 |     function escapeHtml(string) {
 22 |         return String(string).replace(/[&<>"'`=\/]/g, function (s) {
 23 |             return entityMap[s];
 24 |         });
 25 |     }
 26 | 
 27 |     function getTextNodesIn(node, includeWhitespaceNodes) {
 28 |         var textNodes = [], nonWhitespaceMatcher = /\S/;
 29 | 
 30 |         function getTextNodes(node) {
 31 |             if (node.nodeType == Node.TEXT_NODE) {
 32 |                 if (includeWhitespaceNodes || nonWhitespaceMatcher.test(node.nodeValue)) {
 33 |                     textNodes.push(node);
 34 |                 }
 35 |             } else if (!["SCRIPT", "STYLE", "NOSCRIPT"].includes(node.nodeName)) {
 36 |                 for (var i = 0, len = node.childNodes.length; i < len; ++i) {
 37 |                     getTextNodes(node.childNodes[i]);
 38 |                 }
 39 |             }
 40 |         }
 41 | 
 42 |         getTextNodes(node);
 43 |         return textNodes;
 44 |     }
 45 | 
 46 |     function parse_table(table) {
 47 |         var rows = table.children('tbody').children('tr');
 48 |         var t = [];
 49 |         for (var i = 0; i < rows.length; i++) {
 50 |             var r = [];
 51 |             var row = rows[i];
 52 |             var td = $(row).children('td,th');
 53 |             for (var j = 0; j < td.length; j++) {
 54 |                 var c = td.get(j);
 55 |                 // apply colspan
 56 |                 for (var j2 = 0; j2 < c.colSpan; j2++) {
 57 |                     r.push([c, c.rowSpan, $(c).text()]);
 58 |                 }
 59 |             }
 60 |             t.push(r);
 61 |         }
 62 |         for (var i = 0; i < t.length; i++) {
 63 |             var r = t[i];
 64 |             for (var j = 0; j < r.length; j++) {
 65 |                 var c = r[j];
 66 |                 var c0 = $(c[0]);
 67 |                 // apply rowspan
 68 |                 if (c[1] > 1) {
 69 |                     t[i + 1].splice(j, 0, [c[0], c[1] - 1, c[2]]);
 70 |                 }
 71 |                 // remove span e.g. animate / inanimate in Владимир
 72 |                 if (c[0].tagName == 'TH') {
 73 |                     c0.children('span[style]').remove();
 74 |                 }
 75 |             }
 76 |         }
 77 |         return t;
 78 |     }
 79 | 
 80 |     function add_grammar(grammar, element) {
 81 |         var text = $(element).text().trim();
 82 |         if (!grammar.includes(text)) {
 83 |             grammar.push(text);
 84 |         }
 85 |     }
 86 | 
 87 |     function grammar_from_table(table, element, cases) {
 88 |         var t = parse_table(table);
 89 | 
 90 |         for (var i = 0; i < t.length; i++) {
 91 |             for (var j = 0; j < t[i].length; j++) {
 92 |                 var c = t[i][j];
 93 |                 //multiple elements can match because of colspan
 94 |                 if (c[0] != element) { continue; }
 95 | 
 96 |                 var grammar_tokens = [];
 97 |                 var in_th = false;
 98 |                 for (var i2 = i - 1; i2 >= 0; i2--) {
 99 |                     if (t[i2][j][0].tagName == 'TH') {
100 |                         add_grammar(grammar_tokens, t[i2][j][0]);
101 |                         in_th = true;
102 |                     }
103 |                     else if (in_th) {
104 |                         break;
105 |                     }
106 |                 }
107 |                 in_th = false;
108 |                 for (var j2 = j - 1; j2 >= 0; j2--) {
109 |                     if (t[i][j2][0].tagName == 'TH') {
110 |                         add_grammar(grammar_tokens, t[i][j2][0]);
111 |                         in_th = true;
112 |                     }
113 |                     else if (in_th) {
114 |                         break;
115 |                     }
116 |                 }
117 |                 if (grammar_tokens) {
118 |                     var grammarText = grammar_tokens.reverse().join(" ");
119 |                     if (!cases.includes(grammarText)) {
120 |                         cases.push(grammarText);
121 |                     }
122 |                 }
123 |             }
124 |         }
125 |     }
126 | 
127 | 
128 |     function genCharArray(charA, charZ) {
129 |         var a = [], i = charA.charCodeAt(0), j = charZ.charCodeAt(0);
130 |         for (; i <= j; ++i) {
131 |             a.push(String.fromCharCode(i));
132 |         }
133 |         return a.join("");
134 |     }
135 | 
136 |     function xpath_list(jquery_elements, expr) {
137 | 
138 |         var nodes = [];
139 |         $.each(jquery_elements.get(), function (i, e) {
140 |             var iterator = document.evaluate(expr, e, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
141 |             var thisNode = iterator.iterateNext();
142 |             while (thisNode) {
143 |                 nodes.push(thisNode);
144 |                 thisNode = iterator.iterateNext();
145 |             }
146 |         });
147 |         return nodes;
148 |     }
149 | 
150 |     function parse_wiki(dom, word, lemma, freq, src_lang, lang_pair) {
151 |         var page_url = 'https://' + src_lang + '.wiktionary.org/wiki/' + lemma;
152 |         var lang_span_id = lang_pair.lang_span_id; // FIXME may be _1
153 |         var lang_conf = slavaConfig.wiktionary[src_lang];
154 |         var language_heading = lang_conf.language_heading;
155 |         var langspan = dom.find(language_heading + " > span#" + lang_span_id + ".mw-headline");
156 |         var langsection = langspan.parent().nextUntil(language_heading);
157 | 
158 |         var wordClasses = _.object(_.map(lang_conf.definition_headings, function (v) { return [v, 1]; }));
159 |         //Words may have multiple classes, e.g. под
160 |         //Will be h3, or h4 if multiple etymologies, e.g. погрузиться
161 |         var wordClassHeadings = langsection.find("span.mw-headline").filter(function () { return wordClasses[$(this).text().trim()]; });
162 | 
163 |         // Add word class within definition (since word class heading is removed)
164 |         // Add frequency within definition
165 |         if (freq) {
166 |             var freq_span = ' <span class="slava-wordfreq">' + freq + '</span>';
167 |             wordClassHeadings.each(function () {
168 |                 var s = $('<span class="slava-wordclass">' + $(this).text() + '</span>');
169 |                 if (lang_conf.heading_is_class) {
170 |                     $(this).parent().next().children(':first-child').after(s).after(' ');
171 |                     s.after(freq_span);
172 |                 }
173 |                 else {
174 |                     $(this).parent().next().prepend(freq_span);
175 |                 }
176 |             });
177 |         }
178 | 
179 | 
180 |         var defn = wordClassHeadings.parent().nextUntil('hr,h1,h2,h3,h4,h5'); // e.g. with hr: после
181 |         var full_def = wordClassHeadings.parent().nextUntil(wordClassHeadings.prop('tagName'));
182 | 
183 | 
184 | 
185 |         var upper = genCharArray('A', 'Z') + genCharArray('А', 'Я') + 'Ë';
186 |         var lower = upper.toLowerCase();
187 |         var expr1 = '//td/span[@lang="ru"]';
188 |         var expr2s = ['', '/a']; // свое́й under свой is once not full content of the cell
189 |         var expr3 = '[translate(.,"' + upper + UNICODE_COMBINING_ACUTE_ACCENT + '", "' + lower + '")=translate("' + escapeHtml(word) + '","' + upper + UNICODE_COMBINING_ACUTE_ACCENT + '", "' + lower + '")]/ancestor::td[1]';
190 | 
191 |         var cases = [];
192 |         $.each(expr2s, function (i, expr2) {
193 |             var nodes = xpath_list(full_def, expr1 + expr2 + expr3);
194 | 
195 |             $.each(nodes, function (j, element) {
196 |                 grammar_from_table($(element).closest('table'), element, cases);
197 |             });
198 |         });
199 | 
200 |         var comparatives = xpath_list(full_def, "b[@lang='ru' and preceding-sibling::*[1][name()='i' and text()='comparative']]");
201 |         $.each(comparatives, function (i, element) {
202 |             var comparative = element.textContent;
203 |             var prefix = "по";
204 |             var test;
205 |             if (comparative.startsWith("(" + prefix + ")")) {
206 |                 comparative = comparative.slice(prefix.length + 2);
207 |                 test = [comparative, prefix + comparative];
208 |             }
209 |             else {
210 |                 test = [comparative];
211 |             }
212 |             for (var i = 0; i < test.length; i++) {
213 |                 if (test[i] == word) {
214 |                     cases.push("comparative");
215 |                     break;
216 |                 }
217 |             }
218 |         });
219 | 
220 |         defn = defn.filter(":not(table.flextable)");
221 | 
222 |         // Remove transliterations
223 | 
224 |         defn.find("a[title='Wiktionary:Russian transliteration']").remove();
225 | 
226 |         var translit = defn.find("span.tr, i.tr"); //e.g. with <i>: свет
227 | 
228 |         // Remove parentheses / dashes before / after transliteration
229 |         $.each(translit.get(), function (i, e) {
230 |             var prev = e.previousSibling;
231 |             var next = e.nextSibling;
232 |             if (prev && next) {
233 |                 if (prev.textContent.slice(-1) == "(" && next.textContent.slice(0, 1) == ")") {
234 |                     prev.textContent = prev.textContent.slice(0, -1);
235 |                     next.textContent = next.textContent.slice(1);
236 |                 }
237 |                 if (prev.textContent.trim() == "―") { //e.g. свет
238 |                     prev.textContent = "";
239 |                 }
240 |                 if (next.textContent.slice(0, 2) == ", ") { //e.g. погрузиться
241 |                     next.textContent = next.textContent.slice(2);
242 |                 }
243 |             }
244 |         });
245 | 
246 | 
247 |         translit.remove();
248 | 
249 | 
250 |         // Add hyperlink to original wiktionary page
251 |         // NB e.g. не#Prefix has no headword
252 |         var page_link = document.createElement('a');
253 |         page_link.href = page_url;
254 |         var headword = defn.find("strong.headword");
255 |         if (headword.length) {
256 |             headword.wrap(page_link);
257 |         } else {
258 |             page_link.innerText = lemma;
259 |             defn.prepend('<br/>').prepend(page_link);
260 |         }
261 | 
262 | 
263 |         // Change relative hyperlinks to absolute
264 |         var page_base = page_link.protocol + "//" + page_link.host; // e.g. "https://en.wiktionary.org"
265 |         defn.find('a:not([href*="://"],[href^="mailto:"])').each(function () {
266 |             $(this).attr('href', function (index, value) {
267 |                 if (!value) {
268 |                     return value;
269 |                 }
270 |                 if (value.slice(0, 1) == "#") {
271 |                     return null;
272 |                 }
273 |                 if (value.slice(0, 1) == "/") {
274 |                     return page_base + value;
275 |                 }
276 |                 return page_base + page_link.path + value;
277 |             });
278 | 
279 |             return defn;
280 |         });
281 | 
282 |         // Remove images
283 |         defn.find('img').remove();
284 | 
285 |         // Add cases
286 |         var casesdiv = $("<div class='slava-cases'/>");
287 |         $.each(cases, function (i, e) {
288 |             var casediv = $("<div class='slava-case'/>");
289 |             casediv.append(e);
290 |             casesdiv.append(casediv);
291 |         });
292 | 
293 |         // Build output structure
294 |         var res = $("<div class='slava-res'/>");
295 |         res.append(defn);
296 |         res.append(casesdiv);
297 |         return res;
298 | 
299 |     }
300 | 
301 | 
302 |     function generate_popup(target, lemmas, langs) {
303 |         document.body.style.cursor = "progress";
304 |         get_entries(target, lemmas, langs, function (target, items) {
305 |             if (!target.attr("data-popover_on")) {
306 |                 return;
307 |             }
308 |             var odom = $('<div class="slava-popover"/>');
309 |             $.each(items, function () {
310 |                 odom.append($(this));
311 |             });
312 |             document.body.style.cursor = "auto";
313 | 
314 |             var placement = 'bottom';
315 |             if ((target.offset().top - $(window).scrollTop()) / window.innerHeight > .5)
316 |                 placement = 'top';
317 | 
318 |             target.popover({
319 |                 trigger: 'manual',
320 |                 content: odom,
321 |                 container: 'body',
322 |                 placement: placement,
323 |                 html: true
324 |             });
325 |             target.popover("show");
326 |         });
327 |     }
328 | 
329 |     function get_entries(target, lemmas, langs, callback) {
330 |         var src_lang = langs[0];
331 |         var word = target.text();
332 |         var target_lang = 'ru';
333 |         var lang_pair = slavaConfig.langpairs[src_lang][target_lang];
334 |         var ajax_queries = $.map(_.keys(lemmas), function (lemma) {
335 |             var url = 'https://' + src_lang + '.wiktionary.org/w/api.php?action=parse&format=json&page=' + lemma + '&prop=text&origin=*';
336 |             return $.getJSON(url);
337 |         });
338 | 
339 |         $.when.apply($, ajax_queries).done(function () {
340 |             var odom = Array();
341 | 
342 |             var res = arguments;
343 |             if (ajax_queries.length < 2) {
344 |                 res = [arguments];
345 |             }
346 |             $.each(res, function (i, a1) {
347 |                 var parsed = a1[0].parse;
348 |                 if (parsed) {
349 |                     var html = parsed.text['*'];
350 |                     var dom = $(html, virtualDocument);
351 |                     var freq = lemmas[parsed.title];
352 |                     dom = parse_wiki(dom, word, parsed.title, freq, src_lang, lang_pair);
353 |                     if (dom.children().children().length) {
354 |                         odom.push(dom);
355 |                     }
356 | 
357 |                 }
358 |             });
359 |             if (odom.length || langs.length <= 1) {
360 |                 callback(target, odom);
361 |             }
362 |             else {
363 |                 get_entries(target, lemmas, langs.slice(1), callback);
364 |             }
365 |         });
366 | 
367 |     }
368 | 
369 |     function gen_popover(item) {
370 |         var lemmas = JSON.parse(item.attr("data-lemmas"));
371 |         if (lemmas) {
372 |             chrome.runtime.sendMessage({ type: "get-language_pref" }, function (response) {
373 | 
374 |                 if (response) {
375 |                     generate_popup(item, lemmas, response);
376 |                 }
377 |                 else {
378 |                     console.log("No response to get-language_pref");
379 |                 }
380 |             });
381 |         }
382 |     }
383 | 
384 |     function slava_mouseenter(event) {
385 |         $(".popover").css("display", "none");
386 |         event.target.setAttribute("data-popover_on", "1");
387 |         setTimeout(function () {
388 |             if (!event.target.getAttribute("data-popover_on")) {
389 |                 return;
390 |             }
391 |             gen_popover($(event.target));
392 | 
393 |         }, 100);
394 | 
395 |     }
396 | 
397 |     function slava_mouseleave(event) {
398 |         event.target.removeAttribute("data-popover_on");
399 |         setTimeout(function () { $(event.target).popover("hide"); }, 10000);
400 |     }
401 | 
402 |     function mark_word(word_item, callback) {
403 |         var t1 = word_item.text();
404 | 
405 |         var allWords = Array();
406 |         var match;
407 |         while (match = re.exec(t1)) {
408 |             allWords.push(normalize(match[0]));
409 |         }
410 | 
411 |         chrome.runtime.sendMessage({ type: "resolve", payload: _.unique(allWords) }, function (response) {
412 |             var forms = response.payload.forms;
413 | 
414 |             var str = t1.replace(re, function (match, group) {
415 |                 var normalized_word = normalize(match);
416 |                 var ref = match;
417 |                 var lemmasf = {};
418 |                 if (forms[normalized_word]) {
419 |                     var entry0 = forms[normalized_word];
420 |                     var stress_chars = Array();
421 |                     var spellings = {};
422 |                     $.each(entry0, function (i, entry) {
423 |                         var lemma_entry = entry[0];
424 |                         stress_chars = stress_chars.concat(entry[1]);
425 |                         lemmasf[lemma_entry[0]] = lemma_entry[1];
426 |                         var is_derived = entry[3];
427 |                         if (!is_derived) {
428 |                             var spelling = entry[2].length ? entry[2][0] : normalized_word;
429 |                             spellings[spelling] = 1;
430 |                         }
431 |                     });
432 |                     var matchn = match.replace(UNICODE_COMBINING_ACUTE_ACCENT, '');
433 |                     if (!spellings || spellings[matchn]) { ref = matchn }
434 |                     else {
435 |                         ref = _.keys(spellings)[0];
436 | 
437 |                         // match capitalization
438 |                         if (match[0].toLowerCase() != match[0]) {
439 |                             if (match.length > 1 && match[1].toLowerCase() != match[1]) {
440 |                                 ref = ref.toUpperCase();
441 |                             }
442 |                             else {
443 |                                 ref = ref.charAt(0).toUpperCase() + ref.slice(1);
444 |                             }
445 |                         }
446 |                     }
447 | 
448 |                     // mark stress with accent character
449 |                     if (stress_chars) {
450 |                         var stress_pos = _.uniq(stress_chars).sort();
451 |                         var chars = ref;
452 |                         var accented = "";
453 |                         var s_pos = 0;
454 |                         $.each(stress_pos, function (i, stress_char) {
455 |                             accented += chars.slice(s_pos, stress_char) + UNICODE_COMBINING_ACUTE_ACCENT;
456 |                             s_pos = stress_char;
457 |                         });
458 |                         accented += chars.slice(s_pos);
459 |                         ref = accented;
460 |                     }
461 |                 }
462 |                 else {
463 |                     if (match.length > 3 && match[0] === match[0].toLowerCase()) {
464 |                         console.log("[Slava] No match:" + match);
465 |                     }
466 |                     lemmasf[match] = 0;
467 |                 }
468 |                 var slemmas = JSON.stringify(lemmasf);
469 |                 return "</span>" + '<span class="slava-pop" data-lemmas="' + escapeHtml(slemmas) + '">' + ref + '</span><span>';
470 |             }); // replace
471 |             str = "<span>" + str + "</span>";
472 |             var span = $(str);
473 |             word_item.replaceWith(span);
474 |             if (callback)
475 |                 callback(span);
476 |         });
477 |     }
478 | 
479 |     function mark_words() {
480 | 
481 | 
482 |         $('head').append('<style>div.h-usage-example {font-size:80%} .mw-empty-elt{display:none} .slava-popover { all: initial; align-content: normal ; align-items: normal ; align-self: auto ; alignment-baseline: auto ; all: ; animation-delay: 0s ; animation-direction: normal ; animation-duration: 0s ; animation-fill-mode: none ; animation-iteration-count: 1 ; animation-name: none ; animation-play-state: running ; animation-timing-function: ease ; backface-visibility: visible ; background-attachment: scroll ; background-blend-mode: normal ; background-clip: border-box ; background-color: rgba(0, 0, 0, 0) ; background-image: none ; background-origin: padding-box ; background-position-x: 0% ; background-position-y: 0% ; background-repeat-x: ; background-repeat-y: ; background-size: auto ; baseline-shift: 0px ; border-bottom-color: rgb(51, 51, 51) ; border-bottom-left-radius: 0px ; border-bottom-right-radius: 0px ; border-bottom-style: none ; border-bottom-width: 0px ; border-collapse: separate ; border-image-outset: 0px ; border-image-repeat: stretch ; border-image-slice: 100% ; border-image-source: none ; border-image-width: 1 ; border-left-color: rgb(51, 51, 51) ; border-left-style: none ; border-left-width: 0px ; border-right-color: rgb(51, 51, 51) ; border-right-style: none ; border-right-width: 0px ; border-top-color: rgb(51, 51, 51) ; border-top-left-radius: 0px ; border-top-right-radius: 0px ; border-top-style: none ; border-top-width: 0px ; bottom: auto ; box-shadow: none ; box-sizing: border-box ; break-after: auto ; break-before: auto ; break-inside: auto ; buffered-rendering: auto ; caption-side: top ; caret-color: rgb(51, 51, 51) ; clear: none ; clip: auto ; clip-path: none ; clip-rule: nonzero ; color: rgb(51, 51, 51) ; color-interpolation: sRGB ; color-interpolation-filters: linearRGB ; color-rendering: auto ; column-count: auto ; column-fill: balance ; column-gap: normal ; column-rule-color: rgb(51, 51, 51) ; column-rule-style: none ; column-rule-width: 0px ; column-span: none ; column-width: auto ; contain: none ; content: ; counter-increment: none ; counter-reset: none ; cursor: auto ; cx: 0px ; cy: 0px ; d: none ; direction: ltr ; display: block ; dominant-baseline: auto ; empty-cells: show ; fill: rgb(0, 0, 0) ; fill-opacity: 1 ; fill-rule: nonzero ; filter: none ; flex-basis: auto ; flex-direction: row ; flex-grow: 0 ; flex-shrink: 1 ; flex-wrap: nowrap ; float: none ; flood-color: rgb(0, 0, 0) ; flood-opacity: 1 ; font-family: "Helvetica Neue", Helvetica, Arial, sans-serif ; font-feature-settings: normal ; font-kerning: auto ; font-size: 14px ; font-stretch: 100% ; font-style: normal ; font-variant-caps: normal ; font-variant-east-asian: normal ; font-variant-ligatures: normal ; font-variant-numeric: normal ; font-variation-settings: normal ; font-weight: 400 ; grid-auto-columns: auto ; grid-auto-flow: row ; grid-auto-rows: auto ; grid-column-end: auto ; grid-column-gap: 0px ; grid-column-start: auto ; grid-row-end: auto ; grid-row-gap: 0px ; grid-row-start: auto ; grid-template-areas: none ; grid-template-columns: none ; grid-template-rows: none ; hyphens: manual ; image-rendering: auto ; isolation: auto ; justify-content: normal ; justify-items: normal ; justify-self: auto ; left: auto ; letter-spacing: normal ; lighting-color: rgb(255, 255, 255) ; line-break: auto ; line-height: 20px ; list-style-image: none ; list-style-position: outside ; list-style-type: disc ; margin-bottom: 0px ; margin-left: 0px ; margin-right: 0px ; margin-top: 0px ; marker-end: none ; marker-mid: none ; marker-start: none ; mask: none ; mask-type: luminance ; max-block-size: none ; max-height: 95vh ; max-inline-size: none ; max-width: none ; min-block-size: 0px ; min-height: 0px ; min-inline-size: 0px ; min-width: 0px ; mix-blend-mode: normal ; object-fit: fill ; object-position: 50% 50% ; offset-distance: 0px ; offset-path: none ; offset-rotate: auto 0deg ; opacity: 1 ; order: 0 ; orphans: 2 ; outline-color: rgb(51, 51, 51) ; outline-offset: 0px ; outline-style: none ; outline-width: 0px ; overflow-anchor: auto ; overflow-wrap: normal ; overflow-x: visible ; overflow-y: auto ; overscroll-behavior-x: auto ; overscroll-behavior-y: auto ; padding-bottom: 0px ; padding-left: 0px ; padding-right: 0px ; padding-top: 0px ; page: ; paint-order: fill stroke markers ; perspective: none ; pointer-events: auto ; position: static ; quotes: ; r: 0px ; resize: none ; right: auto ; rx: auto ; ry: auto ; scroll-behavior: auto ; shape-image-threshold: 0 ; shape-margin: 0px ; shape-outside: none ; shape-rendering: auto ; size: ; speak: normal ; stop-color: rgb(0, 0, 0) ; stop-opacity: 1 ; stroke: none ; stroke-dasharray: none ; stroke-dashoffset: 0px ; stroke-linecap: butt ; stroke-linejoin: miter ; stroke-miterlimit: 4 ; stroke-opacity: 1 ; stroke-width: 1px ; tab-size: 8 ; table-layout: auto ; text-align: start ; text-align-last: auto ; text-anchor: start ; text-combine-upright: none ; text-decoration-color: rgb(51, 51, 51) ; text-decoration-line: none ; text-decoration-skip-ink: auto ; text-decoration-style: solid ; text-indent: 0px ; text-orientation: mixed ; text-overflow: clip ; text-rendering: auto ; text-shadow: none ; text-size-adjust: 100% ; text-transform: none ; text-underline-position: auto ; top: auto ; touch-action: auto ; transform: none ; transform-box: view-box ; transform-style: flat ; transition-delay: 0s ; transition-duration: 0s ; transition-property: all ; transition-timing-function: ease ; unicode-bidi: normal ; user-select: auto ; vector-effect: none ; vertical-align: baseline ; visibility: visible ; white-space: normal ; widows: 2 ; will-change: auto ; word-break: normal ; word-spacing: 0px ; word-wrap: normal ; writing-mode: horizontal-tb ; x: 0px ; y: 0px ; z-index: auto ; zoom: 1 ; } span.slava-wordclass{font-variant:small-caps} span.slava-wordfreq{font-size:70%} .slava-cases{font-size:70%; color:gray} .slava-pop {color:inherit; text-decoration: none;} .slava-pop:hover { text-decoration: none; border-bottom: #666666; border-width: 0px 0px 1px 0px; border-style: none none dotted none;}</style>');
483 | 
484 |         var v = getTextNodesIn(document.body);
485 | 
486 |         $.each(v, function () {
487 |             mark_word($(this), null);
488 |         });
489 | 
490 | 
491 |     }
492 | 
493 | 
494 |     $(document).ready(mark_words)
495 | 
496 |     $("body").on("mouseenter", ".slava-pop", slava_mouseenter);
497 |     $("body").on("mouseleave", ".slava-pop", slava_mouseleave);
498 | 
499 |     $("body").append('<div id="slava-quick-input" style="z-index: 2147483647; visibility:hidden; position: fixed; top:0; right:0; background-color:#FAFAFA"><input id="slava-try" type="text"></input><div id="slava-try-res"></div><div>&nbsp;</div></div>');
500 | 
501 |     $('#slava-try').on("input", function () {
502 |         $('#slava-try-res').text($(this).val());
503 |         $.each($('#slava-try-res').contents(), function () {
504 |             mark_word($(this), function (obj) {
505 |                 var target = obj.filter('.slava-pop');
506 |                 target.attr("data-popover_on", "1");
507 |                 gen_popover(target);
508 |             }
509 |             );
510 |         });
511 |     });
512 | 
513 | })(); //outer function
514 | 


--------------------------------------------------------------------------------
/chrome/loader.js:
--------------------------------------------------------------------------------
1 | function load_slava() {
2 |     chrome.runtime.sendMessage({ type: "load" });
3 | }
4 | load_slava();
5 | 


--------------------------------------------------------------------------------
/chrome/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": 2,
 3 |   "name": "Slava Russian Dictionary",
 4 |   "description": "This extension writes accents on Russian words, and adds a popup to each word with its English definition.",
 5 |   "version": "0.0.0.4",
 6 |   "browser_action": {
 7 |     "default_title": "Enrich text with accents and dictionary popups",
 8 |     "default_icon": "owl_16.png",
 9 |     "default_popup": "popup.html"
10 |   },
11 |   "options_ui": {
12 |     "page": "options.html",
13 |     "chrome_style": true
14 |   },
15 |   "icons": {
16 |     "16": "owl_16.png",
17 |     "48": "owl_48.png",
18 |     "128": "owl_128.png"
19 |   },
20 |   "permissions": [
21 |     "storage",
22 |     "activeTab",
23 |     "<all_urls>"
24 |   ],
25 |   "background": {
26 |     "scripts": [
27 |       "generated/jquery.js",
28 |       "generated/slavaConfig.js",
29 |       "shared.js",
30 |       "background.js"
31 |     ],
32 |     "persistent": false
33 |   },
34 |   "content_scripts": [
35 |     {
36 |       "matches": ["<all_urls>"],
37 |       "js": ["loader.js"]
38 |     }
39 |   ],
40 |   "web_accessible_resources": [
41 |     "generated/resources/*"
42 |   ]
43 | }
44 | 


--------------------------------------------------------------------------------
/chrome/options.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <link href="generated/bootstrap.css" rel="stylesheet">
 5 |   <script src="generated/slavaConfig.js" charset="UTF-8"></script>
 6 |   <script src="generated/jquery.js"></script>
 7 |   <script src="generated/bootstrap.js"></script>
 8 |   <style type="text/css">
 9 |       body {
10 |         margin: 10px;
11 |         white-space: nowrap;
12 |       }
13 | 
14 |       h1 {
15 |         font-size: 15px;
16 |       }
17 | 
18 |   </style>
19 |   <script src="generated/Sortable.js"></script>
20 |   <script src="options.js"></script>
21 |   <title></title>
22 | </head>
23 | <body>
24 |   <h1>Slava Translator</h1>
25 | Preference order of definition language (drag to change)
26 |       <div id="slava-langs" class="list-group"></div>
27 | </body>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/chrome/options.js:
--------------------------------------------------------------------------------
 1 | document.addEventListener('DOMContentLoaded', () => {
 2 | 
 3 |   chrome.runtime.sendMessage({ type: "get-language_pref" }, function (lang_pref) {
 4 |     var langs = $('#slava-langs');
 5 |     $.each(lang_pref, function (i, lang) {
 6 |       text = slavaConfig.wiktionary[lang].name;
 7 |       $('<div class="list-group-item"/>').appendTo(langs).text(text).attr("data-lang", lang);
 8 |     });
 9 | 
10 |     var sortable = Sortable.create(langs.get(0), {
11 |       animation: 150,
12 |       onSort: function (evt) {
13 |         langs = $.map($(evt.to).children('div'), function (el) {
14 |           return $(el).attr("data-lang");
15 |         });
16 |         chrome.runtime.sendMessage({ type: "set-language_pref", payload: langs });
17 |       }
18 |     });
19 | 
20 |   });
21 | 
22 | })
23 | 


--------------------------------------------------------------------------------
/chrome/owl_128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/chrome/owl_128.png


--------------------------------------------------------------------------------
/chrome/owl_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/chrome/owl_16.png


--------------------------------------------------------------------------------
/chrome/owl_48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/chrome/owl_48.png


--------------------------------------------------------------------------------
/chrome/popup.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!--
 3 |  This page is shown when the extension button is clicked, because the
 4 |  "browser_action" field in manifest.json contains the "default_popup" key with
 5 |  value "popup.html".
 6 |  -->
 7 | <html>
 8 | <head>
 9 |   <link href="generated/bootstrap.css" rel="stylesheet">
10 |   <link href="generated/bootstrap-toggle.css" rel="stylesheet">
11 |   <script src="generated/jquery.js"></script>
12 |   <script src="generated/bootstrap.js"></script>
13 |   <script src="generated/bootstrap-toggle.js"></script>
14 |   <style type="text/css">
15 |       body {
16 |         margin: 10px;
17 |         white-space: nowrap;
18 |       }
19 | 
20 |       h1 {
21 |         font-size: 15px;
22 |       }
23 | 
24 |       #container {
25 |         align-items: center;
26 |         display: flex;
27 |         justify-content: space-between;
28 |       }
29 |   </style>
30 |   <script src="popup.js"></script>
31 |   <title></title>
32 | </head>
33 | <body>
34 |   <h1>Slava Translator</h1>
35 |   <div id="container">
36 |     <div class="checkbox">
37 |       <label><input type="checkbox" id="slava-enable" data-toggle=
38 |       "toggle"> Enable for this tab</label>
39 |       <div id="slava-disable-reload" style="visibility:hidden">
40 |         Page reloaded with Slava disabled.
41 |       </div>
42 |     </div>
43 |   </div>
44 | <button id="go-to-options">Options</button>
45 | <button id="go-to-search" style="visibility:hidden">Search word</button>
46 | </body>
47 | </html>
48 | 


--------------------------------------------------------------------------------
/chrome/popup.js:
--------------------------------------------------------------------------------
 1 | document.addEventListener('DOMContentLoaded', () => {
 2 |   chrome.runtime.sendMessage({ type: "get-enabled" }, function (response) {
 3 |     if (response) {
 4 |       $('#slava-enable').bootstrapToggle('on');
 5 |       $('#go-to-search').css('visibility', 'visible');
 6 |     }
 7 |     $('#slava-enable').change(function () {
 8 |       var checked = $(this).prop('checked');
 9 |       chrome.runtime.sendMessage({ type: "set-enabled", payload: checked });
10 |       $('#slava-disable-reload').css('visibility', checked ? 'hidden' : 'visible');
11 |       $('#go-to-search').css('visibility', (!checked) ? 'hidden' : 'visible');
12 | 
13 |     });
14 |   });
15 | 
16 |   $('#go-to-options').click(function () {
17 |     chrome.runtime.openOptionsPage();
18 |   });
19 | 
20 |   $('#go-to-search').click(function () {
21 |     chrome.tabs.executeScript(null, { file: "quick_search.js" });
22 |   });
23 | })
24 | 


--------------------------------------------------------------------------------
/chrome/quick_search.js:
--------------------------------------------------------------------------------
1 | document.getElementById("slava-quick-input").style.visibility = "visible";
2 | 


--------------------------------------------------------------------------------
/chrome/shared.js:
--------------------------------------------------------------------------------
 1 | 
 2 |     // Unicode COMBINING ACUTE ACCENT character, used to mark stress on Russian words
 3 |     UNICODE_COMBINING_ACUTE_ACCENT = '\u0301';
 4 | 
 5 | console.log("loading shared");
 6 |     function normalize(str) {
 7 |         str = str.replace(UNICODE_COMBINING_ACUTE_ACCENT, '');
 8 |         str = str.toLowerCase();
 9 |         str = str.replace('ё', 'е');
10 |         return str;
11 |     }
12 | 


--------------------------------------------------------------------------------
/conf/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "langpairs": {
  3 |     "en": {
  4 |       "ru": {
  5 |         "lang_span_name": "Russian",
  6 |         "lang_span_id": "Russian",
  7 |         "include": [
  8 |           {
  9 |             "category": "Russian lemmas",
 10 |             "recurse": 1
 11 |           },
 12 |           {
 13 |             "category": "Russian proper nouns"
 14 |           },
 15 |           {
 16 |             "category": "Russian participles"
 17 |           },
 18 |           {
 19 |             "category": "Russian adjective superlative forms"
 20 |           }
 21 |         ],
 22 |         "exclude": [
 23 |           {
 24 |             "category": "Russian spellings with е instead of ё"
 25 |           },
 26 |           {
 27 |             "category": "Russian phrases"
 28 |           },
 29 |           {
 30 |             "category": "Russian proverbs"
 31 |           },
 32 |           {
 33 |             "category": "Russian obsolete forms"
 34 |           }
 35 |         ]
 36 |       }
 37 |     },
 38 |     "ru": {
 39 |       "ru": {
 40 |         "lang_span_name": "Русский",
 41 |         "lang_span_id": "Русский"
 42 |       }
 43 |     },
 44 |     "fr": {
 45 |       "ru": {
 46 |         "lang_span_name": "Russe",
 47 |         "lang_span_id": "Russe"
 48 |       }
 49 |     }
 50 |   },
 51 |   "languages": {
 52 |     "ru": {
 53 |       "frequency_file": "https://github.com/Baksalyar/mc.hertzbeat.ru-Frequency-Dictionaries/raw/master/mc.hertzbeat.ru_frequency_dict.txt"
 54 |     }
 55 |   },
 56 |   "wiktionary": {
 57 |     "en": {
 58 |       "name": "English",
 59 |       "language_heading": "h2",
 60 |       "heading_is_class": true,
 61 |       "definition_headings": [
 62 |         "Circumfix",
 63 |         "Interfix",
 64 |         "Prefix",
 65 |         "Affix",
 66 |         "Suffix",
 67 |         "Abbreviation",
 68 |         "Adjective",
 69 |         "Adverb",
 70 |         "Conjunction",
 71 |         "Combining form",
 72 |         "Diacritical mark",
 73 |         "Determiner",
 74 |         "Interjection",
 75 |         "Idiom",
 76 |         "Morpheme",
 77 |         "Letter",
 78 |         "Noun",
 79 |         "Numeral",
 80 |         "Particle",
 81 |         "Participle",
 82 |         "Phrase",
 83 |         "Predicative",
 84 |         "Preposition",
 85 |         "Prepositional phrase",
 86 |         "Pronoun",
 87 |         "Proper noun",
 88 |         "Proverb",
 89 |         "Symbol",
 90 |         "Verb"
 91 |       ]
 92 |     },
 93 |     "ru": {
 94 |       "name": "Russian (русский)",
 95 |       "language_heading": "h1",
 96 |       "heading_is_class": false,
 97 |       "definition_headings": [
 98 |         "Значение"
 99 |       ]
100 |     },
101 |     "fr": {
102 |       "name": "French (français)",
103 |       "language_heading": "h2",
104 |       "heading_is_class": true,
105 |       "definition_headings": [
106 |         "Circonfixe",
107 |         "Interfixe",
108 |         "Préfixe",
109 |         "Affixe",
110 |         "Suffixe",
111 |         "Abréviation",
112 |         "Adjectif",
113 |         "Adverbe",
114 |         "Conjonction",
115 |         "Déterminant",
116 |         "Interjection",
117 |         "Idiome",
118 |         "Morphème",
119 |         "Lettre",
120 |         "Nom commun",
121 |         "Adjectif numéral",
122 |         "Particule",
123 |         "Participe",
124 |         "Phrase",
125 |         "Prédicatif",
126 |         "Préposition",
127 |         "Pronom",
128 |         "Nom propre",
129 |         "Proverbe",
130 |         "Symbole",
131 |         "Verbe"
132 |       ]
133 |     }
134 |   }
135 | }
136 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
 1 | | Example word | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Example&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | Feature |
 2 | | ---         |     ---      |          --- |
 3 | | зачем | ![зачем](зачем.png) | Marks accent position in the original word. When cursor is moved over the word, pops up a definition including word base form (here *заче́м*), class (here *Adverb*), frequency rank (here *1624*, meaning it is the 1624th most commonly used word) and a definition. Click on the word base form to navigate to the Wiktionary entry. The frequency rank is useful for language learners, who should focus on learning first the most common 1000 words, then the most common 5000 etc. |
 4 | | свет | ![свет](свет.png) | Removes Latin transcription of Russian text from Wiktionary entries. |
 5 | | причем, четырехсот | ![причем](причем.png) | Restores *ё* letter in original word when spelled *е*. |
 6 | | форматы | ![форматы](форматы.png) | For nouns, indicates the gender (here *m*=male) and kind (here *inan*=inanimate). In the popup, indicates the declension(s) matched by the original word (here *genitive singular* and *nominative plural*). |
 7 | | сумела | ![сумела](сумела.png) | For verbs, indicates the aspect (here *pf*=perfective) and gives the paired verb (here *уме́ть*). In the popup, indicates the conjugation(s) matched by the original word (here *feminine (я/ты/она́) singular*). |
 8 | | продано, расчлененные | ![продано](продано.png) | Recognizes declined forms of verb participles. |
 9 | | проще, попроще | ![проще](проще.png) | Recognizes comparative forms. |
10 | | крупнейший, крупнейшего | ![крупнейший](крупнейший.png) | Recognizes superlative forms. |
11 | | творог, свекла | ![творог](творог.png) | Marks all accents where several accent positions are accepted. The reader must stress only one of the positions. |
12 | | лиса | ![лиса](лиса.png) | Marks all accents where several accent positions are possible depending on grammar. The reader must stress the correct position based on grammar. |
13 | | Тарзан | ![Тарзан](Тарзан.png) | When a word is missing from English Wiktionary, displays entry from Russian Wiktionary instead. The order of language preferences is configurable in the extension options. |
14 | 


--------------------------------------------------------------------------------
/docs/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/icon.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # About
 2 | 
 3 | This Chrome extension adds an accent to mark the stress on words in Russian. Hovering over a words brings up a popover with its definition(s) from wiktionary.
 4 | 
 5 | The extension contains an index of all Russian words from the English wiktionary site with all their grammatical forms and accent position.
 6 | 
 7 | # How to use
 8 | 
 9 | After the extension is installed, click on this icon in the Chrome toolbar:
10 | 
11 | ![icon](icon.png)
12 | 
13 | Toggle the switch to enable the extension for the current tab.
14 | 
15 | # Details
16 | 
17 | See the [examples](examples.md) page for more information.
18 | 
19 | 
20 | ### Reporting Issues and Feedback
21 | 
22 | If you encounter any bugs, please file an issue in the [Issues](https://github.com/algattik/SlavaTranslator/issues) section of our GitHub repo.
23 | 
24 | # More information
25 | 
26 | Source code and license information can be found at [our GitHub repository](https://github.com/algattik/SlavaTranslator/).
27 | 


--------------------------------------------------------------------------------
/docs/Тарзан.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/Тарзан.png


--------------------------------------------------------------------------------
/docs/зачем.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/зачем.png


--------------------------------------------------------------------------------
/docs/крупнейший.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/крупнейший.png


--------------------------------------------------------------------------------
/docs/лиса.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/лиса.png


--------------------------------------------------------------------------------
/docs/причем.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/причем.png


--------------------------------------------------------------------------------
/docs/продано.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/продано.png


--------------------------------------------------------------------------------
/docs/проще.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/проще.png


--------------------------------------------------------------------------------
/docs/свет.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/свет.png


--------------------------------------------------------------------------------
/docs/сумела.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/сумела.png


--------------------------------------------------------------------------------
/docs/творог.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/творог.png


--------------------------------------------------------------------------------
/docs/форматы.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/algattik/SlavaTranslator/61eb2eed5c6cc3ee29805662606f63ffd3672b46/docs/форматы.png


--------------------------------------------------------------------------------
/scripts/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | 
 3 | url = "https://pypi.python.org/simple"
 4 | verify_ssl = true
 5 | name = "pypi"
 6 | 
 7 | 
 8 | [packages]
 9 | 
10 | pywikibot = "*"
11 | lxml = "*"
12 | progressbar2 = "*"
13 | 
14 | 
15 | [dev-packages]
16 | 
17 | 


--------------------------------------------------------------------------------
/scripts/build-indexes.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import json
  3 | from progressbar import progressbar
  4 | from collections import defaultdict
  5 | 
  6 | parsed_top_dir = Path("../build/parsed")
  7 | index_top_dir = Path("../build/index")
  8 | resources_dir = Path("../build/resources")
  9 | 
 10 | config = json.load(open("../conf/config.json"))
 11 | 
 12 | # ranking, adapted from https://stackoverflow.com/a/30801799
 13 | 
 14 | def rank_simple(vector, reverse):
 15 |     return sorted(range(len(vector)), key=vector.__getitem__, reverse=reverse)
 16 | 
 17 | def rankdata(a, method='average', reverse=False):
 18 |     n = len(a)
 19 |     ivec=rank_simple(a, reverse)
 20 |     svec=[a[rank] for rank in ivec]
 21 |     sumranks = 0
 22 |     dupcount = 0
 23 |     newarray = [0]*n
 24 |     for i in range(n):
 25 |         sumranks += i
 26 |         dupcount += 1
 27 |         if i==n-1 or svec[i] != svec[i+1]:
 28 |             for j in range(i-dupcount+1,i+1):
 29 |                 if method=='average':
 30 |                     averank = sumranks / float(dupcount) + 1
 31 |                     newarray[ivec[j]] = averank
 32 |                 elif method=='max':
 33 |                     newarray[ivec[j]] = i+1
 34 |                 elif method=='min':
 35 |                     newarray[ivec[j]] = i+1 -dupcount+1
 36 |                 else:
 37 |                     raise NameError('Unsupported method')
 38 | 
 39 |             sumranks = 0
 40 |             dupcount = 0
 41 | 
 42 | 
 43 |     return newarray
 44 | 
 45 | 
 46 | for src_lang, targets in config["langpairs"].items():
 47 |     for target_lang, langpair in targets.items():
 48 | 
 49 |         print("%s => %s" % (src_lang, target_lang))
 50 |         index_dir=Path(index_top_dir, src_lang, target_lang)
 51 |         parsed_dir=Path(parsed_top_dir, src_lang, target_lang)
 52 |         index_dir.mkdir(parents=True, exist_ok=True)
 53 |         words = dict()
 54 |         forms = defaultdict(lambda : defaultdict(lambda : [set(), set()]))
 55 | 
 56 |         freqfile = Path(resources_dir, target_lang + ".freq.txt")
 57 |         freq2 = defaultdict(lambda : 0)
 58 |         with open(freqfile) as p:
 59 |             for line in p:
 60 |                 (form, count) = line.rstrip('\n').split(' ')
 61 |                 c = int(count)
 62 |                 freq2[form] = freq2[form] + c
 63 | 
 64 |         print("Listing files...")
 65 |         files = sorted(parsed_dir.glob('*.dat'))
 66 | 
 67 |         print("Parsing files...")
 68 |         word_counter = 0
 69 |         for parsed in progressbar(files):
 70 |             with open(parsed) as p:
 71 |                 for line in p:
 72 |                     s = line.rstrip('\n')
 73 |                     (declined, base, stress, canonical) = s.split('\t')
 74 |                     if not base in words:
 75 |                         words[base] = [word_counter, 0]
 76 |                         word_counter = word_counter + 1
 77 |                     b = words[base]
 78 |                     word_i = b[0]
 79 |                     if stress != "0":
 80 |                         forms[declined][word_i][0].add(int(stress))
 81 |                     forms[declined][word_i][1].add(canonical)
 82 |                     if declined in freq2:
 83 |                         b[1] = b[1] + freq2[declined]
 84 | 
 85 |         print("Assembling words...")
 86 |         words_arr = sorted(list(words.items()), key = lambda e: e[1][0])
 87 | 
 88 |         print("Computing frequency ranks...")
 89 |         freq_ranks = rankdata([f[1] for f in words.values()], method='min', reverse=True)
 90 |         words_with_freq = list(zip([w[0] for w in words_arr], freq_ranks))
 91 | 
 92 |         print("Assembling forms...")
 93 |         for declined, d in progressbar(forms.items()):
 94 |             words_new = []
 95 |             for word_i, entry in d.items():
 96 |                 (stresses, canonicals) = entry
 97 |                 if len(canonicals) == 1 and next(iter(canonicals)) == declined:
 98 |                     canonicals = []
 99 |                 words_new.append([word_i, list(stresses), list(canonicals)])
100 |             forms[declined] = words_new
101 | 
102 |         print("Writing output in [%s]..." % index_dir)
103 |         with open(Path(index_dir, "words.json"), "w") as f:
104 |             json.dump(words_with_freq, f, ensure_ascii=False, separators=(',', ':'))
105 |         with open(Path(index_dir, "forms.json"), "w") as f:
106 |             json.dump(forms, f, ensure_ascii=False, separators=(',', ':'))
107 | 
108 | print("Completed.")
109 | 


--------------------------------------------------------------------------------
/scripts/download-pages.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import json
 3 | import os
 4 | import re
 5 | from collections import defaultdict
 6 | import progressbar
 7 | 
 8 | os.environ['PYWIKIBOT2_NO_USER_CONFIG']='1'
 9 | import pywikibot
10 | from pywikibot import pagegenerators
11 | 
12 | download_dir = Path("../build/download")
13 | config = json.load(open("../conf/config.json"))
14 | 
15 | includes = defaultdict(list)
16 | excludes = defaultdict(list)
17 | for src_lang, targets in config["langpairs"].items():
18 |     for target_lang, langpair in targets.items():
19 |         if "include" in langpair:
20 |             includes[src_lang].extend(langpair["include"])
21 |         if "exclude" in langpair:
22 |             excludes[src_lang].extend(langpair["exclude"])
23 | 
24 | def toHex(x):
25 |     return "".join([hex(ord(c))[2:].zfill(4) for c in x])
26 | 
27 | def download_cat(site, cat, callback):
28 |     catName = cat["category"]
29 |     recurse = cat["recurse"] if "recurse" in cat else None
30 |     category = pywikibot.Category(site, catName)
31 |     bar = progressbar.ProgressBar(max_value=category.categoryinfo['pages'])
32 |     count = 0
33 |     for page in pagegenerators.CategorizedPageGenerator(category, recurse=recurse, namespaces="0"):
34 |         count = count + 1
35 |         bar.update(count)
36 |         callback(page)
37 |     bar.finish()
38 | 
39 | 
40 | for src_lang, incl in includes.items():
41 |     print("Language: %s" % src_lang)
42 |     site = pywikibot.Site(code=src_lang, fam='wiktionary')
43 |     
44 |     download_lang_dir = Path(download_dir, src_lang)
45 |     download_lang_dir.mkdir(parents=True, exist_ok=True)
46 |     
47 |     excluded_pages = set()
48 |     for excluded_cat in excludes[src_lang]:
49 |         print("Excluding pages from category [%s]" % excluded_cat['category'])
50 |         download_cat(site, excluded_cat, lambda page: excluded_pages.add(page.title()))
51 |     
52 |     for included_cat in incl:
53 |     
54 |         def download_page(page):
55 |             title = page.title()
56 |     
57 |             if title in excluded_pages:
58 |                 return
59 |             if len(title) > 63:
60 |                 return
61 |     
62 |             fileName = toHex(title)
63 |             my_file = Path(download_lang_dir, fileName + ".json")
64 |             if my_file.is_file():
65 |                 return
66 |     
67 |             html = site.get_parsed_page(title)
68 |             html = re.sub("<!--.*?-->", "", html, flags=re.DOTALL)
69 |     
70 |             my_file.write_text(json.dumps({'title':title, 'text':page.text, 'html':html}, ensure_ascii=False), 'utf-8')
71 |     
72 |         print("Including pages from category [%s]" % included_cat['category'])
73 |         download_cat(site, included_cat, download_page)
74 | 
75 | 


--------------------------------------------------------------------------------
/scripts/download-resources.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import json
 3 | import requests
 4 | 
 5 | config = json.load(open("../conf/config.json"))
 6 | 
 7 | download_dir = Path("../build/resources")
 8 | download_dir.mkdir(parents=True, exist_ok=True)
 9 | 
10 | for lang, conf in config["languages"].items():
11 |     r = requests.get(conf["frequency_file"])
12 |     file = Path(download_dir, lang + ".freq.txt")
13 |     print(conf["frequency_file"])
14 |     with open(file, 'wb') as f:  
15 |         f.write(r.content)
16 | 


--------------------------------------------------------------------------------
/scripts/package-extension.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -euo pipefail
 3 | mkdir -p ../chrome/generated/resources/ru
 4 | cd ../chrome
 5 | cp ../build/index/en/ru/words.json generated/resources/ru
 6 | cp ../build/index/en/ru/forms.json generated/resources/ru
 7 | wget -qO generated/underscore.js "https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"
 8 | wget -qO generated/jquery.js "https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"
 9 | wget -qO generated/bootstrap.js "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"
10 | wget -qO generated/bootstrap.css "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"
11 | wget -qO generated/bootstrap-toggle.css "https://gitcdn.github.io/bootstrap-toggle/2.2.2/css/bootstrap-toggle.min.css"
12 | wget -qO generated/bootstrap-toggle.js "https://gitcdn.github.io/bootstrap-toggle/2.2.2/js/bootstrap-toggle.min.js"
13 | wget -qO generated/Sortable.js "https://cdnjs.cloudflare.com/ajax/libs/Sortable/1.6.0/Sortable.min.js"
14 | (echo "slavaConfig = "; cat ../conf/config.json) > generated/slavaConfig.js
15 | rm -f ../build/slava-package.zip && zip -r ../build/slava-package.zip .
16 | echo Downloads complete.
17 | 


--------------------------------------------------------------------------------
/scripts/parse-pages.py:
--------------------------------------------------------------------------------
  1 | from lxml import etree
  2 | import unicodedata
  3 | from pathlib import Path
  4 | from progressbar import progressbar
  5 | import json
  6 | 
  7 | config = json.load(open("../conf/config.json"))
  8 | 
  9 | download_dir = Path("../build/download")
 10 | parsed_dir = Path("../build/parsed")
 11 | normalize_char_map = {'ё':'е', 'Ё':'Е'}
 12 | vowels = 'аэыуояеёюи'
 13 | 
 14 | def normalize_string(s):
 15 |    norms = unicodedata.normalize('NFC', s)
 16 |    noacc =        [c
 17 |                   for c in norms
 18 |                   if unicodedata.category(c) != 'Mn' #'Mark, Nonspacing' = accents
 19 |                   and (
 20 |                       not unicodedata.category(c).startswith('P') #Punctuation
 21 |                       or c == '-'
 22 |                   )]
 23 |    normalized = ''.join(normalize_char_map[c] if c in normalize_char_map else c
 24 |                   for c in noacc).lower()
 25 |    stress=None
 26 |    if vowel_count(normalized) > 1:
 27 |        for p, c in enumerate(norms):
 28 |            if unicodedata.category(c) == 'Mn':  # 'Mark, Nonspacing' = accents
 29 |                stress = p
 30 |                break
 31 | 
 32 |    return tuple([normalized, stress, ''.join(noacc)])
 33 | 
 34 | def add_norm(forms, html, xpath, optional_prefix=None, prefix_norm=None):
 35 |     matches = html.xpath(xpath)
 36 |     for match in matches:
 37 |         match_text = match.xpath("string(.)")
 38 | 
 39 |         #Cell may contain multiple forms, e.g. свой -> свое́й, свое́ю (https://en.wiktionary.org/wiki/%D1%81%D0%B2%D0%BE%D0%B9)
 40 |         for match_form in match_text.split(", "):
 41 | 
 42 |             # Add comparative with and without comparative suffix, e.g. попроще and проще
 43 |             if optional_prefix and match_form.startswith(optional_prefix):
 44 |                 suffix = match_form[len(optional_prefix):]
 45 |                 forms.add(normalize_string(prefix_norm + suffix))
 46 |                 forms.add(normalize_string(suffix))
 47 |             else:
 48 |                 forms.add(normalize_string(match_form))
 49 | 
 50 | def vowel_count(txt):
 51 |     count = 0
 52 |     txt = txt.lower()
 53 |     for vowel in vowels:
 54 |         count = count + txt.count(vowel)
 55 |     return count
 56 | 
 57 | def parse_file(f, src_lang, destdir):
 58 | 
 59 |     pageJson=json.load(open(str(f)))
 60 |     of=pageJson['html']
 61 |     title = pageJson['title']
 62 |     html = etree.fromstring(of)
 63 | 
 64 |     for target_lang, langpair in config["langpairs"][src_lang].items():
 65 |         lang_name = langpair["lang_span_name"]
 66 |         langs = html.xpath("//h2/span[text()='%s' and contains(@class,'mw-headline')]" % lang_name)
 67 |         if not langs: #does not work for Serbo-Croatian
 68 |             continue
 69 |         forms = set()
 70 | 
 71 |         span_selector = "//*[preceding-sibling::h2[1]/span[text()='%s']]" % lang_name
 72 |         td_selector_template = "%s//table[contains(@class,'inflection-table')]/%s/tr/td//span[@lang='%s']"
 73 |         for tbody_selector in ['tbody', '.']:
 74 |             td_selector = td_selector_template % (span_selector, tbody_selector, target_lang)
 75 |             add_norm(forms, html, td_selector)
 76 |         add_norm(forms, html, "%s//strong[contains(@class,'headword') and @lang='%s']" % (span_selector, target_lang))
 77 | 
 78 |         # Parse comparative. NB тёплый has two variants
 79 |         comp_select = "//b[@lang='%s' and preceding-sibling::*[name()='i' and text()='comparative']]" % target_lang
 80 |         add_norm(forms, html, comp_select, "(по)", "по")
 81 | 
 82 |         dir = Path(destdir, target_lang)
 83 |         dir.mkdir(parents=True, exist_ok=True)
 84 |         file = Path(dir, Path(f).with_suffix('.dat').name)
 85 |         s = ''.join(["%s\t%s\t%s\t%s\n" % (form[0], title, form[1] if form[1] else 0, form[2]) for form in forms])
 86 |         file.write_text(s, encoding='utf8')
 87 | 
 88 |         marker.write_bytes(b'')
 89 | 
 90 | 
 91 | for src_lang, targets in config["langpairs"].items():
 92 |     lang_dir = Path(download_dir, src_lang)
 93 | 
 94 |     destdir = Path(parsed_dir, src_lang)
 95 |     marker_dir = Path(destdir, "_done")
 96 |     marker_dir.mkdir(parents=True, exist_ok=True)
 97 | 
 98 |     print("Source language: [%s]" % src_lang)
 99 |     print("Listing files...")
100 |     files = sorted(lang_dir.glob("*.json"))
101 | 
102 |     new_pages = 0
103 | 
104 |     print("Parsing files...")
105 |     for f in progressbar(files):
106 | 
107 |         marker = Path(marker_dir, Path(f).name)
108 |         if marker.is_file():
109 |             continue
110 | 
111 |         new_pages = new_pages + 1
112 | 
113 |         parse_file(f, src_lang, destdir)
114 | 
115 |     print("Parsed %d new pages out of %d total pages." % (new_pages, len(files)))
116 | 
117 | 


--------------------------------------------------------------------------------