├── .babelrc ├── .clang-format ├── .gitignore ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── gulpfile.js ├── index.html ├── package-lock.json ├── package.json ├── resources ├── logos │ └── README.md ├── poster │ ├── Makefile │ ├── poster.pdf │ └── poster.svg └── screenshot.png ├── scripts ├── compile-ejs.js ├── compress-fa.sh ├── install.sh └── rebuild-tree.sh ├── src ├── client │ ├── app.ts │ ├── collaboration │ │ ├── index.ts │ │ └── user.ts │ ├── config.ts │ ├── corpus.ts │ ├── favicon.png │ ├── graph │ │ ├── config.ts │ │ ├── index.ts │ │ ├── tree.ts │ │ ├── utils.ts │ │ └── visualiser.ts │ ├── gui │ │ ├── chat.ts │ │ ├── config.ts │ │ ├── graph-menu.ts │ │ ├── index.ts │ │ ├── keyboard.ts │ │ ├── labeler.ts │ │ ├── menu.ts │ │ ├── modals │ │ │ ├── help.ts │ │ │ ├── index.ts │ │ │ ├── upload-file.ts │ │ │ └── upload-url.ts │ │ ├── selfcomplete.ts │ │ ├── status.ts │ │ ├── table.ts │ │ └── textarea.ts │ ├── index.ts │ ├── server.ts │ ├── socket.ts │ ├── undo-manager.ts │ └── utils │ │ ├── export.ts │ │ ├── funcs.ts │ │ ├── index.ts │ │ ├── local-storage.ts │ │ └── validate.ts ├── notatrix │ ├── README.md │ ├── base.ts │ ├── detector.ts │ ├── formats │ │ ├── apertium-stream │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ ├── parser.ts │ │ │ └── splitter.ts │ │ ├── brackets │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ └── parser.ts │ │ ├── cg3 │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ └── parser.ts │ │ ├── conllu │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ └── parser.ts │ │ ├── default-splitter.ts │ │ ├── index.ts │ │ ├── notatrix-serial │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ ├── parser.ts │ │ │ └── splitter.ts │ │ ├── params │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ ├── parser.ts │ │ │ └── splitter.ts │ │ ├── plain-text │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ ├── parser.ts │ │ │ └── splitter.ts │ │ └── sd │ │ │ ├── detector.ts │ │ │ ├── fields.ts │ │ │ ├── generator.ts │ │ │ ├── get-loss.ts │ │ │ ├── index.ts │ │ │ └── parser.ts │ ├── generator.ts │ ├── index.ts │ ├── nx │ │ ├── analysis.ts │ │ ├── base-class.ts │ │ ├── base-token.ts │ │ ├── comment.ts │ │ ├── corpus.ts │ │ ├── label.ts │ │ ├── labeler.ts │ │ ├── options.ts │ │ ├── relation-set.ts │ │ ├── root-token.ts │ │ ├── sentence.ts │ │ ├── sub-token.ts │ │ └── token.ts │ ├── parser.ts │ ├── splitter.ts │ └── utils │ │ ├── constants.ts │ │ ├── errors.ts │ │ ├── funcs.ts │ │ ├── index.ts │ │ └── regex.ts └── server │ ├── app.ts │ ├── config.ts │ ├── errors.ts │ ├── list-treebanks.ts │ ├── models │ ├── corpus-json.ts │ ├── sanitize.ts │ ├── treebank.ts │ └── users.ts │ ├── public │ ├── css │ │ ├── bootstrap.min.css │ │ ├── bootstrap.min.css.map │ │ ├── fontawesome.all.min.css │ │ ├── jquery.autocomplete.css │ │ └── style.css │ ├── favicon.png │ ├── fonts │ │ ├── Liberation_Sans-Regular.ttf │ │ └── PT_Sans-Caption-Web-Regular.ttf │ ├── html │ │ ├── annotatrix.html │ │ ├── help.html │ │ ├── index.html │ │ └── webfonts │ │ │ ├── fa-brands-400.eot │ │ │ ├── fa-brands-400.svg │ │ │ ├── fa-brands-400.ttf │ │ │ ├── fa-brands-400.woff │ │ │ ├── fa-brands-400.woff2 │ │ │ ├── fa-regular-400.eot │ │ │ ├── fa-regular-400.svg │ │ │ ├── fa-regular-400.ttf │ │ │ ├── fa-regular-400.woff │ │ │ ├── fa-regular-400.woff2 │ │ │ ├── fa-solid-900.eot │ │ │ ├── fa-solid-900.svg │ │ │ ├── fa-solid-900.ttf │ │ │ ├── fa-solid-900.woff │ │ │ └── fa-solid-900.woff2 │ ├── js │ │ ├── bootstrap.bundle.js │ │ ├── bootstrap.min.js │ │ ├── bundle.js │ │ ├── jquery.js │ │ ├── jquery.min.js │ │ ├── jquery.min.map │ │ └── main.js │ ├── logo.png │ └── logo.svg │ ├── room.ts │ ├── routes.ts │ ├── sockets.ts │ ├── upload.ts │ └── views │ ├── annotatrix.ejs │ ├── help.ejs │ ├── index.ejs │ ├── modals │ ├── github-fork.ejs │ ├── help.ejs │ ├── upload-file.ejs │ └── upload-url.ejs │ └── settings.ejs ├── test └── notatrix │ ├── combine.js │ ├── data │ ├── cg3.js │ ├── conllu.js │ └── index.js │ ├── detector.js │ ├── enhance.js │ ├── examples.js │ ├── generator.js │ ├── index.js │ ├── loss.js │ ├── merge.js │ ├── parser.js │ ├── relation-sets.js │ ├── sentence.js │ ├── setEmpty.js │ ├── split.js │ ├── splitter.js │ └── utils.js └── tsconfig.json /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["@babel/preset-env"], 3 | "plugins": ["transform-es5-property-mutators"] 4 | } 5 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # vim: syntax=yaml 2 | Language: JavaScript 3 | BasedOnStyle: LLVM 4 | TabWidth: 2 5 | IndentWidth: 2 6 | UseTab: Never 7 | SpacesInContainerLiterals: false 8 | IndentCaseLabels: false 9 | JavaScriptQuotes: Double 10 | JavaScriptWrapImports: true 11 | AlignAfterOpenBracket: true 12 | ColumnLimit: 120 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # environment stuff 2 | .env 3 | ud-env 4 | 5 | # python compiled files 6 | __pycache__ 7 | *.pyc 8 | 9 | # db files 10 | corpora 11 | .users.db 12 | 13 | # log stuff 14 | logs 15 | 16 | # vim swap files 17 | *.swp 18 | 19 | # node stuff 20 | node_modules 21 | build 22 | .nyc_output 23 | 24 | # ubuntu process files 25 | .nfs* 26 | 27 | # OS files 28 | .DS_Store 29 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Original UD annotatrix by Jonathan North Washington 2 | jonorthwash.github.io/visualise.html 3 | Google Summer of Code (2017, 2018, 2019) 4 | Mariya Sheyanova (@maryszmary) 5 | Kevin Murphy (@keggsmurph21) 6 | Alyaxey Yaskevich (@yaskevich) 7 | GCI 2016 and 2017 students 8 | Grzegorz Stark (@gstark0) 9 | Jonathan Pan (@JPJPJPOPOP) 10 | Suresh Michael Peiris (@tsuresh) 11 | Diogo Fernandes (@diogoscf) 12 | Robin Richtsfeld (@Androbin) 13 | Ethan Yang (@thatprogrammer1) 14 | Other contributors 15 | Francis Tyers (@ftyers) 16 | Sushain Cherivirala (@sushain97) 17 | Kevin Brubeck Unhammer (@unhammer) 18 | Tai Vongsathorn Warner (@midasdoas) 19 | Daniel Swanson (@mr-martian) 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Greetings! 2 | 3 | Hi, if you're reading this file you probably like Annotatrix and want to contribute. 4 | That's great! We are always looking for people who want to join us in making a great 5 | tool for annotating Universal Dependencies. In this file we'll give some advice 6 | for how to contribute and some norms in the project. 7 | 8 | # General stuff 9 | 10 | * Find a bug? Submit an issue! We'll try and get to it as quickly as possible. This also 11 | goes for feature requests. 12 | * Pull requests wanted! If you fork the repo and want to make a pull request, that's great, 13 | just do it, no need to ask. 14 | * Getting direct commit access... if you think direct commit access would be easier, then 15 | contact us, we're very open to new contributions. 16 | 17 | # Coding style 18 | 19 | Right now the code is a bit... diverse, but in the future we're planning to follow these 20 | guidelines. 21 | 22 | * We use four-space indentation 23 | * Global variables are all in capitals, with underscores for spacing 24 | * For other variables and functions we use camelCase 25 | * Spaces between binary operators and operands 26 | * Each function should be documented in JSDoc style 27 | * Braces go on the same line 28 | * Use semicolons at the end of statements 29 | * Use var for local variables 30 | -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | const gulp = require("gulp"); 2 | const browserify = require("browserify"); 3 | const source = require("vinyl-source-stream"); 4 | const buffer = require("vinyl-buffer"); 5 | const babelify = require("babelify"); 6 | const rename = require("gulp-rename"); 7 | const sourcemaps = require("gulp-sourcemaps"); 8 | const compileEJS = require("./scripts/compile-ejs"); 9 | const terser = require("gulp-terser"); 10 | const size = require("gulp-size"); 11 | const tsify = require("tsify"); 12 | require("dotenv").config(); 13 | 14 | const gulp_mode = process.env.GULP_ENV || "development"; 15 | console.log("Gulp runs in " + gulp_mode + " mode"); 16 | 17 | gulp.task("js", function() { 18 | let stream = browserify({entries: ["src/client/index.ts"], debug: true}) 19 | .plugin(tsify) 20 | .transform(babelify, {global: true}) 21 | .bundle() 22 | .on("error", function(err) { console.error(err); }) 23 | .pipe(source("bundle.js")) 24 | .pipe(buffer()) 25 | .pipe(size()); 26 | 27 | if (gulp_mode == "production") { 28 | stream = stream.pipe(terser({keep_fnames: true, mangle: false, compress: {drop_console: true}})).pipe(size()) 29 | } 30 | return stream.pipe(gulp.dest("src/server/public/js")); 31 | }); 32 | 33 | gulp.task("html", function(done) { 34 | compileEJS(); 35 | done(); 36 | }); 37 | 38 | gulp.task("ico", function() { return gulp.src(["./src/client/favicon.png"]).pipe(gulp.dest("./src/server/public")); }); 39 | 40 | gulp.task("watch", () => { 41 | gulp.watch(["src/**/*.ts", "src/server/views/**/*.ejs"], gulp.parallel("js", "html")); 42 | }); 43 | 44 | gulp.task("default", gulp.parallel("js", "html", "ico")); 45 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | UD Annotatrix 7 | 8 | 9 | 10 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ud-annotatrix", 3 | "version": "1.0.0", 4 | "description": "a client-side browser-only tool for editing dependency trees in CoNLL-U and VISL formats", 5 | "dependencies": { 6 | "body-parser": "^1.19.0", 7 | "cookie-parser": "^1.4.4", 8 | "d3": "^5.16.0", 9 | "dotenv": "^7.0.0", 10 | "ejs": "^2.6.2", 11 | "express": "^4.16.4", 12 | "express-fileupload": "^1.1.4", 13 | "express-session": "^1.16.2", 14 | "jquery": "^3.5.0", 15 | "mkdirp": "^0.5.1", 16 | "moment": "^2.24.0", 17 | "morgan": "^1.9.1", 18 | "nocache": "^2.0.0", 19 | "request": "^2.88.0", 20 | "socket.io": "^2.2.0", 21 | "socket.io-cookie-parser": "^1.0.0", 22 | "tsify": "^5.0.4", 23 | "underscore": "^1.9.1", 24 | "uuid": "^3.3.2" 25 | }, 26 | "devDependencies": { 27 | "@babel/core": "^7.4.3", 28 | "@babel/preset-env": "^7.4.5", 29 | "@babel/register": "^7.4.0", 30 | "@types/body-parser": "^1.19.0", 31 | "@types/cookie-parser": "^1.4.3", 32 | "@types/d3": "^5.16.0", 33 | "@types/express-fileupload": "^1.1.4", 34 | "@types/express-session": "^1.16.2", 35 | "@types/jquery": "^3.5.0", 36 | "@types/mkdirp": "^0.5.1", 37 | "@types/morgan": "^1.9.1", 38 | "@types/request": "^2.48.8", 39 | "@types/socket.io": "^2.1.13", 40 | "@types/socket.io-parser": "^3.0.0", 41 | "@types/sqlite3": "^3.1.8", 42 | "@types/uuid": "^3.3.2", 43 | "babel-plugin-transform-es5-property-mutators": "^6.24.1", 44 | "babel-polyfill": "^6.26.0", 45 | "babel-preset-env": "^1.7.0", 46 | "babelify": "^10.0.0", 47 | "browserify": "^16.2.3", 48 | "chai": "^4.3.4", 49 | "gulp": "^4.0.2", 50 | "gulp-babel": "^8.0.0-beta.2", 51 | "gulp-concat": "^2.6.1", 52 | "gulp-rename": "^1.4.0", 53 | "gulp-size": "^3.0.0", 54 | "gulp-sourcemaps": "^2.6.5", 55 | "gulp-terser": "^1.2.0", 56 | "istanbul": "^0.4.5", 57 | "mocha": "^5.2.0", 58 | "nodemon": "^1.18.11", 59 | "nyc": "^12.0.2", 60 | "sinon": "^5.1.1", 61 | "socket.io-parser": "^4.2.1", 62 | "ts-node": "^10.8.2", 63 | "typescript": "^4.7.4", 64 | "vinyl-buffer": "^1.0.1", 65 | "vinyl-source-stream": "^2.0.0" 66 | }, 67 | "scripts": { 68 | "build": "gulp", 69 | "build-watch": "gulp && gulp watch", 70 | "test": "mocha -r ts-node/register --max-old-space-size=8192 test/notatrix/index.js", 71 | "coverage": "nyc mocha -r ts-node/register --max-old-space-size=8192 test/notatrix/index.js" 72 | }, 73 | "repository": { 74 | "type": "git", 75 | "url": "git+https://github.com/jonorthwash/ud-annotatrix.git" 76 | }, 77 | "contributors": [ 78 | { 79 | "name": "Jonathan Washington", 80 | "url": "https://github.com/jonorthwash" 81 | }, 82 | { 83 | "name": "Francis Tyers", 84 | "url": "https://github.com/ftyers" 85 | }, 86 | { 87 | "name": "Mariya Sheyanova", 88 | "url": "https://github.com/maryszmary" 89 | }, 90 | { 91 | "name": "Kevin Murphy", 92 | "url": "https://github.com/keggsmurph21" 93 | }, 94 | { 95 | "name": "Sushain Cherivirala", 96 | "url": "https://github.com/sushain97" 97 | }, 98 | { 99 | "name": "Kevin Brubeck Unhammer", 100 | "url": "https://gjthub.com/unhammer" 101 | }, 102 | { 103 | "name": "Tai Vongsathorn Warner", 104 | "url": "https://github.com/midasdoas" 105 | }, 106 | { 107 | "name": "Daniel Swanson", 108 | "url": "https://github.com/mr-martian" 109 | }, 110 | { 111 | "name": "Grzegorz Stark", 112 | "url": "https://github.com/gstark0" 113 | }, 114 | { 115 | "name": "Jonathan Pan", 116 | "url": "https://github.com/JPJPJPOPOP" 117 | }, 118 | { 119 | "name": "Suresh Michael Peires", 120 | "url": "https://github.com/tsuresh" 121 | }, 122 | { 123 | "name": "Diogo Fernandes", 124 | "url": "https://github.com/diogoscf" 125 | }, 126 | { 127 | "name": "Robin Richtsfeld", 128 | "url": "https://github.com/androbin" 129 | }, 130 | { 131 | "name": "Ethan Yang", 132 | "url": "https://github.com/thatprogrammer1" 133 | } 134 | ], 135 | "license": "GPL-3.0", 136 | "bugs": { 137 | "url": "https://github.com/jonorthwash/ud-annotatrix/issues" 138 | }, 139 | "homepage": "https://github.com/jonorthwash/ud-annotatrix#readme" 140 | } 141 | -------------------------------------------------------------------------------- /resources/logos/README.md: -------------------------------------------------------------------------------- 1 | This directory houses logos for consideration to use for UD Annotatrix. 2 | -------------------------------------------------------------------------------- /resources/poster/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | qrencode -o qrcode.png -l M "https://maryszmary.github.io/ud-annotatrix/" 3 | -------------------------------------------------------------------------------- /resources/poster/poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/resources/poster/poster.pdf -------------------------------------------------------------------------------- /resources/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/resources/screenshot.png -------------------------------------------------------------------------------- /scripts/compile-ejs.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /* 4 | * Kevin Murphy 5 | * 7/25/18 6 | * 7 | * This script handles compiling the EJS templates into plain HTML files to 8 | * allow clients to open the UD Annotatrix without a server. 9 | */ 10 | 11 | "use strict"; 12 | 13 | const ejs = require("ejs"); 14 | const fs = require("fs"); 15 | const path = require("path"); 16 | 17 | const mkdirp = require("mkdirp"); 18 | const html_base_path = path.join("src", "server", "public", "html"); 19 | 20 | function render(filename, args) { 21 | const ejs_path = path.join("src", "server", "views", `${filename}.ejs`); 22 | const html_path = path.join("src", "server", "public", "html", `${filename}.html`); 23 | 24 | fs.readFile(ejs_path, (err, contents) => { 25 | if (err) 26 | throw err; 27 | 28 | contents = contents.toString(); 29 | const html = ejs.render(contents, args); 30 | 31 | fs.writeFile(html_path, html, err => { 32 | if (err) 33 | throw err; 34 | }); 35 | }); 36 | } 37 | 38 | function render_all() { 39 | 40 | mkdirp(html_base_path); 41 | 42 | render("annotatrix", { 43 | // `src/server/views/modals` 44 | modalPath: path.join(__dirname, "../", "src", "server", "views", "modals"), 45 | github_configured: false, 46 | username: null, 47 | path: path 48 | }); 49 | render("help", {}); 50 | render("index", {base: null, error: null, treebanks: []}); 51 | } 52 | 53 | if (require.main === module) 54 | render_all(); 55 | 56 | module.exports = render_all; 57 | -------------------------------------------------------------------------------- /scripts/compress-fa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Kevin Murphy 5 | # 8/15/18 6 | # 7 | # This script makes compressing the font-awesome woff font into a base64-encoded 8 | # string in the css more replicable. 9 | # 10 | 11 | usage() { 12 | echo "usage: $ $0 /path/to/fontawesome" 13 | exit 1 14 | } 15 | 16 | if [ -z $1 ]; then 17 | usage 18 | fi 19 | 20 | OUTPUT=src/server/public/css/font-awesome-base64.min.css 21 | FA_PATH=$1 22 | SOLID_CSS=`tail -n 1 $FA_PATH/css/solid.min.css` 23 | BRANDS_CSS=`tail -n 1 $FA_PATH/css/brands.min.css` 24 | 25 | echo $SOLID_CSS | sed s/src:.*$// > $OUTPUT 26 | printf 'src:url(data:application/font-woff;base64,' >> $OUTPUT 27 | base64 $FA_PATH/webfonts/fa-solid-900.woff >> $OUTPUT 28 | printf ')' >> $OUTPUT 29 | echo $SOLID_CSS | sed s/^.*\)// >> $OUTPUT 30 | 31 | echo $BRANDS_CSS | sed s/src:.*$// >> $OUTPUT 32 | printf 'src:url(data:application/font-woff;base64,' >> $OUTPUT 33 | base64 $FA_PATH/webfonts/fa-brands-400.woff >> $OUTPUT 34 | printf ')' >> $OUTPUT 35 | echo $BRANDS_CSS | sed s/^.*\)// >> $OUTPUT 36 | 37 | tail -n 1 $FA_PATH/css/fontawesome.min.css >> $OUTPUT 38 | -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # 4 | # Kevin Murphy 5 | # 8/27/18 6 | # 7 | # This script helps install the UD-Annotatrix tool to run in server mode. 8 | # 9 | 10 | read_to_env() { 11 | 12 | echo $1 13 | echo `echo \$$1` 14 | 15 | if [ -z ${!1+NULL} ]; then 16 | return 17 | fi 18 | 19 | echo setting 20 | 21 | printf "(OPTIONAL) client secret for GitHub integration: " 22 | read ENVVAR 23 | 24 | if [ ! -z $ENVVAR ]; then 25 | echo hi 26 | echo "$1=$ENVVAR" >> .env 27 | fi 28 | 29 | # if [ -z $1 ]; 30 | } 31 | 32 | # get the most recent versions 33 | #git checkout master 34 | #git pull 35 | 36 | # install recent version of Node dependencies 37 | #npm install 38 | 39 | # read environment configuration 40 | if [ -f .env ]; then 41 | while read line; do 42 | export $line; 43 | done < .env 44 | fi 45 | 46 | #if [ -z $ANNOTATRIX_GH_CLIENT_SECRET ]; then 47 | # printf "(OPTIONAL) client secret for GitHub integration: " 48 | # read ANNOTATRIX_GH_CLIENT_SECRET 49 | # 50 | # if [ ! -z $ANNOTATRIX_GH_CLIENT_SECRET ]; then 51 | # echo hi 52 | # echo ANNOTATRIX_GH_CLIENT_SECRET=$ANNOTATRIX_GH_CLIENT_SECRET >> .env 53 | # fi 54 | #fi 55 | 56 | read_to_env hello || true 57 | echo 'AFTER THE FUNCTION' 58 | if [ -f .env ]; then cat .env; fi 59 | echo 60 | echo $hello 61 | -------------------------------------------------------------------------------- /scripts/rebuild-tree.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # 4 | # Kevin Murphy 5 | # 5/30/18 6 | # 7 | # This script makes editing the "tree" layout extension for cytoscape 8 | # much simpler and easier. Simply edit the version of "client/tree.js" in the 9 | # current repository, and run this script before viewing your changes 10 | # in the browser. (It takes care of copying and building stuff.) 11 | # 12 | 13 | cytoscape_git_url=https://github.com/cytoscape/cytoscape.js.git 14 | this_dir=`pwd` 15 | cytoscape_dir=${CYTOSCAPE:=../cytoscape.js} 16 | layout_dir=$cytoscape_dir/src/extensions/layout 17 | 18 | if [ "$1" == "auto-install" ]; then 19 | cd /tmp 20 | if [ ! -d cytoscape.js ]; then 21 | git clone $cytoscape_git_url 22 | fi 23 | cd cytoscape.js 24 | git checkout master 25 | git pull 26 | cytoscape_dir=/tmp/cytoscape.js 27 | layout_dir=$cytoscape_dir/src/extensions/layout 28 | elif [ ! -d $layout_dir ]; then 29 | echo "Error: unable to automatically locate $layout_dir; try running" 30 | echo " $ cd .." 31 | echo " $ git clone $cytoscape_git_url" 32 | echo " $ cd cytoscape.js" 33 | echo " $ npm install" 34 | echo "" 35 | echo "OR to install automatically, try running" 36 | echo " $ $0 auto-install" 37 | echo "" 38 | echo "OR if it's installed elsewhere, try running" 39 | echo " $ CYTOSCAPE=/path/to/cytoscape.js $0" 40 | exit 1 41 | fi 42 | 43 | tree_path=$this_dir/client/graph/tree.js 44 | 45 | if [ ! -f $tree_path ]; then 46 | echo "Error: unable to locate $tree_path (aborting)" 47 | exit 1 48 | fi 49 | 50 | cp $tree_path $layout_dir 51 | 52 | # add the implementation to this layout index 53 | if ! grep "name:\W*tree\W*impl:\W*require(\W*tree\W*)" $layout_dir/index.js >/dev/null; then 54 | sed -i .backup $'s/}$/},\\\n { name: \'tree\', impl: require( \'.\/tree\' ) }/g' $layout_dir/index.js 55 | fi 56 | 57 | cd $cytoscape_dir 58 | 59 | if [ ! -d node_modules ]; then 60 | npm install 61 | fi 62 | 63 | npm run build 64 | cp build/cytoscape.* $this_dir/client/graph/cytoscape 65 | cd $this_dir 66 | npm run build 67 | 68 | echo "successfully copied built files into client/cytoscape/" 69 | 70 | exit 0 71 | -------------------------------------------------------------------------------- /src/client/app.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import * as nx from "../notatrix"; 4 | import {storage} from "./utils"; 5 | import {download} from "./utils/funcs"; 6 | 7 | import {CollaborationInterface} from "./collaboration"; 8 | import {_config as config} from "./config"; 9 | import {Corpus} from "./corpus"; 10 | import {Graph} from "./graph"; 11 | import {GUI} from "./gui"; 12 | import {Server} from "./server"; 13 | import {Socket} from "./socket"; 14 | import {UndoManager} from "./undo-manager"; 15 | 16 | interface SaveMessage { 17 | type: unknown; 18 | indices: unknown; 19 | } 20 | 21 | /** 22 | * Wrapper class to hold references to all of our actual client objects (e.g. 23 | * CollaborationInterface, Corpus, GUI, Graph, Server, Socket, UndoManager). 24 | * This class should be instantiated at the beginning of a session. 25 | */ 26 | export class App { 27 | private config: typeof config; 28 | public online: boolean; 29 | public initialized: boolean; 30 | public undoer: UndoManager; 31 | public server: Server; 32 | public socket: Socket; 33 | public gui: GUI; 34 | public collab: CollaborationInterface; 35 | public corpus: Corpus; 36 | public graph: Graph; 37 | 38 | constructor(online: boolean) { 39 | 40 | this.config = config; 41 | this.online = online; 42 | this.initialized = false; 43 | this.undoer = new UndoManager(this); 44 | this.server = new Server(this); 45 | this.socket = new Socket(this); 46 | this.gui = new GUI(this); 47 | this.collab = new CollaborationInterface(this); 48 | this.corpus = new Corpus(this); 49 | this.graph = new Graph(this); 50 | this.initialized = true; 51 | 52 | console.log("mode:", this.online ? "online" : "offline"); 53 | 54 | // jump to sentence from frag id 55 | setTimeout(() => { 56 | const hash = window.location.hash.substring(1); 57 | this.corpus.index = parseInt(hash) - 1; 58 | }, 500); 59 | if (this.online) { 60 | this.server.connect(); 61 | this.socket.connect(); 62 | } else { 63 | let backup = storage.restore(); 64 | if (!$.isEmptyObject(backup)) { 65 | console.log("backup", backup); 66 | this.corpus = new Corpus(this, backup); 67 | } 68 | } 69 | this.gui.refresh(); 70 | } 71 | 72 | /** 73 | * Save all current corpus- and meta-data, either to the server or to 74 | * localStorage. 75 | */ 76 | save(message?: SaveMessage) { 77 | 78 | if (!this.initialized || this.undoer.active) 79 | return; 80 | 81 | this.gui.status.normal("saving..."); 82 | 83 | // save local preference stuff 84 | this.gui.save(); 85 | this.graph.save(); 86 | 87 | // serialize the corpus 88 | let serial = this.corpus.serialize(); 89 | console.log("this.corpus.serialize", serial); 90 | // add it to the undo/redo stack if it's an actual change 91 | this.undoer.push(serial) 92 | 93 | if (message && this.online) { 94 | this.socket.broadcast("modify corpus", { 95 | type: message.type, 96 | indices: message.indices, 97 | serial: serial, 98 | }); 99 | } 100 | 101 | // save it to server/local 102 | if (this.server.is_running) { 103 | this.server.save(serial); 104 | } else { 105 | storage.save(serial); 106 | } 107 | 108 | // refresh the gui stuff 109 | this.gui.refresh(); 110 | } 111 | 112 | /** 113 | * Load a corpus from a serial string. 114 | */ 115 | load(serial: nx.CorpusSerial) { 116 | // this.gui.status.normal('loading...') 117 | this.corpus = new Corpus(this, serial); 118 | this.gui.refresh(); 119 | } 120 | 121 | /** 122 | * Load a fresh/new corpus and overwrite an existing one. 123 | */ 124 | discard() { 125 | 126 | this.corpus = new Corpus(this); 127 | this.save(); 128 | this.gui.menu.is_visible = false; 129 | this.gui.refresh(); 130 | } 131 | 132 | /** 133 | * Download the contents of an application instance. 134 | */ 135 | download() { 136 | 137 | const contents = this.corpus._corpus._sentences 138 | .map((sent, i) => { 139 | try { 140 | const format = this.corpus.format || "plain text"; 141 | return sent.to(format).output; 142 | } catch (e) { 143 | console.error(e); 144 | return `[Unable to generate sentence #${i + 1} in "${this.corpus.format}" format]`; 145 | } 146 | }) 147 | .join("\n\n"); 148 | 149 | download(`${this.corpus.filename}.conllu`, "text/plain", contents); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/client/collaboration/index.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {thin} from "../utils/funcs"; 4 | import type {App} from "../app"; 5 | import type {Chat} from "../gui/chat"; 6 | import {MousePosition, User, UserData} from "./user"; 7 | 8 | export interface SetSelfData extends UserData { 9 | room: {users: UserData[]}; 10 | } 11 | 12 | export interface MouseNode { 13 | id: string; 14 | position: MousePosition; 15 | color: string; 16 | } 17 | 18 | /** 19 | * Abstraction to help with handling multiple users collaborating on a document. 20 | * This module takes care of maintaining: 21 | * - the current user 22 | * - a list of all current users on this document 23 | * - methods for getting the mice and locks for those users 24 | */ 25 | export class CollaborationInterface { 26 | private app: App; 27 | public self: User|null; 28 | private chat: Chat; 29 | private _users: {[id: string]: User}; 30 | 31 | constructor(app: App) { 32 | 33 | this.app = app; 34 | 35 | // pointer to data about the current user 36 | this.self = null; 37 | 38 | // cache a pointer to the chat (since we use it often) 39 | this.chat = app.gui.chat; 40 | 41 | // a list of users on this document 42 | this._users = {}; 43 | } 44 | 45 | /** 46 | * Return the number of online users. 47 | */ 48 | get size() { return Object.keys(this._users).length; } 49 | 50 | /** 51 | * Save data about the current user. This method is called after we establish 52 | * a connection with our socket server. 53 | */ 54 | setSelf(data: SetSelfData) { 55 | 56 | // make a User object from the data 57 | const self = new User(data); 58 | self.name = self.name === "anonymous" ? "me" : self.name; 59 | 60 | // don't overwrite if already set 61 | if (JSON.stringify(self) === JSON.stringify(this.self)) 62 | return; 63 | 64 | // iterate over all the users in the room and add them (this way, even 65 | // connections that aren't the first will have an accurate list) 66 | _.each(data.room.users, user => { this.addUser(user, user.id !== self.id); }); 67 | 68 | // save the reference 69 | this.self = self; 70 | 71 | // log it to the chat 72 | this.chat.alert(`you are logged in as %u`, [self]); 73 | 74 | // draw the mice and locks for everyone in the room 75 | this.app.graph.drawMice(); 76 | this.app.graph.setLocks(); 77 | } 78 | 79 | /** 80 | * Get a User object by . 81 | */ 82 | getUser(id: string) { return this._users[id]; } 83 | 84 | /** 85 | * Add a User to our list. 86 | */ 87 | addUser(data: UserData, notifyChat: boolean = true) { 88 | 89 | const user = new User(data); 90 | this._users[data.id] = user; 91 | 92 | if (notifyChat) 93 | this.chat.alert(`%u connected from ${user.ip}`, [user]); 94 | 95 | this.chat.refresh(); 96 | } 97 | 98 | /** 99 | * Remove a User from our list. 100 | */ 101 | removeUser(data: UserData, notifyChat: boolean = true) { 102 | 103 | const user = this.getUser(data.id); 104 | delete this._users[data.id]; 105 | 106 | if (alert) 107 | this.chat.alert(`%u disconnected from ${user.ip}`, [user]); 108 | 109 | this.chat.refresh(); 110 | } 111 | 112 | /** 113 | * Get a list of mouse nodes (each with a user id, position (x & y coords), and 114 | * hex color code), at most one per user. Mice are only shown for users on 115 | * the same page (i.e. same corpus index) as this.self. 116 | */ 117 | getMouseNodes(): MouseNode[] { 118 | 119 | // map over the users 120 | return _ 121 | .map(this._users, 122 | user => { 123 | // if not self and on same index 124 | if (user.id !== this.self.id && user._viewing === this.app.corpus.index) 125 | 126 | // return some info 127 | return { 128 | id: user.id, 129 | position: user.mouse, 130 | color: user.color, 131 | }; 132 | 133 | // filter out things that didn't match our condition 134 | return null; 135 | }) 136 | .filter(thin); 137 | } 138 | 139 | /** 140 | * Get a list of node locks (each with a user id, cytoscape selector, and 141 | * hex color code), at most one per user. Locks are only shown for users on 142 | * the same page (i.e. same corpus index) as this.self. 143 | */ 144 | getLocks() { 145 | 146 | // map over the users 147 | return _ 148 | .map(this._users, 149 | user => { 150 | // if not self and on same index and locking something 151 | if (user.id !== this.self.id && user._viewing === this.app.corpus.index && user.locked) 152 | 153 | // return some info 154 | return { 155 | id: user.id, 156 | locked: user.locked, 157 | color: user.color, 158 | }; 159 | 160 | // filter out things that didn't match our condition 161 | return null; 162 | }) 163 | .filter(thin); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/client/collaboration/user.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import * as nx from "../../notatrix"; 4 | 5 | export interface MousePosition { 6 | x: number; 7 | y: number; 8 | } 9 | 10 | export interface UserData { 11 | username: string|null; 12 | id: string; 13 | address: string; 14 | index: number|null; 15 | mouse: MousePosition|null; 16 | locked: string|null; // cytoscape selector to locate the node currently being edited 17 | } 18 | 19 | /** 20 | * Data structure to keep track of state and methods associated with a particular 21 | * socket connection. 22 | */ 23 | export class User { 24 | public name: string; 25 | public id: string; 26 | public ip: string; 27 | public color: string; 28 | public _viewing: number|null; 29 | public mouse: MousePosition|null; 30 | public locked: string|null; // cytoscape selector to locate the node currently being edited 31 | 32 | constructor(data: UserData) { 33 | this.name = data.username || "anonymous"; 34 | this.id = data.id; 35 | this.ip = data.address; 36 | this.color = nx.funcs.hashStringToHex(data.id); 37 | this._viewing = data.index; 38 | this.mouse = data.mouse; 39 | this.locked = data.locked; 40 | } 41 | 42 | /** 43 | * Helper function for `this::dom`, gives the index-part associated with a 44 | * user in #chat. 45 | */ 46 | get viewing(): string { 47 | return this._viewing === null ? "" : ` (${this._viewing + 1}) `; 48 | } 49 | 50 | /** 51 | * Wrapper for setting the corpus index of the user. Sanitizes input. 52 | */ 53 | set viewing(index: string) { 54 | let parsedIndex = parseInt(index); 55 | this._viewing = isNaN(parsedIndex) ? null : parsedIndex; 56 | } 57 | 58 | /** 59 | * Wrapper for setting the mosue position of the user. Sanitizes input. 60 | */ 61 | setMouse(pos: {x: number|null, y: number|null}) { 62 | // if x and y not both given, don't save it 63 | this.mouse = (pos.x == null && pos.y == null) ? null : pos; 64 | } 65 | 66 | /** 67 | * Get a DOM object containing some of the user's data (this gets rendered in #chat) 68 | * 69 | * NB: this looks a bit messy, but it should have this structure: 70 | * 71 | * 72 | * 73 | * { name } 74 | * 75 | * 76 | * { viewing } 77 | * 78 | * 79 | */ 80 | dom(): JQuery { 81 | return $("") 82 | .addClass("message-sender-info") 83 | .attr("name", this.id) 84 | .append($("").addClass("message-color-blob fa fa-circle").css("color", "#" + this.color)) 85 | .append($("").addClass("message-sender-name").text(this.name).attr("title", "IP Address: " + this.ip)) 86 | .append($("").addClass("message-sender-viewing").text(this.viewing).attr("title", "Currently viewing")); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/client/config.ts: -------------------------------------------------------------------------------- 1 | import {storage} from "./utils"; 2 | import {getTreebankId} from "./utils/funcs"; 3 | 4 | export const _config = { 5 | 6 | version: "0.0.0", 7 | treebank_id: getTreebankId(), 8 | 9 | graph: require("./graph/config"), 10 | gui: require("./gui/config"), 11 | 12 | }; 13 | -------------------------------------------------------------------------------- /src/client/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/client/favicon.png -------------------------------------------------------------------------------- /src/client/graph/config.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import type {MousePosition} from "../collaboration/user"; 4 | 5 | export interface GraphConfig { 6 | pan: MousePosition; 7 | zoom: number; 8 | drawn_sentence: boolean; 9 | 10 | edge_height: number; 11 | edge_coeff: number; 12 | 13 | mouse_move_delay: number; 14 | 15 | locked_index: number|null; 16 | locked_id: string|null; 17 | locked_classes: string|null; 18 | 19 | set: (params: {[key in keyof GraphConfig]: string}) => void; 20 | } 21 | 22 | export const _graph: GraphConfig = { 23 | 24 | // placeholders (get overwritten on first graph draw) 25 | pan: {x: 0, y: 0}, 26 | zoom: 1, 27 | drawn_sentence: false, 28 | 29 | // affect relative heights of the cytoscape graph edges 30 | edge_height: 40, 31 | edge_coeff: 1, 32 | 33 | // how frequently to send mouse-move updates (msecs) 34 | mouse_move_delay: 100, 35 | 36 | // persist info about user locks in between graph draws 37 | locked_index: null, 38 | locked_id: null, 39 | locked_classes: null, 40 | 41 | set: params => _.each(params, 42 | (value, key) => { 43 | if ((_graph as any)[key] !== undefined) 44 | (_graph as any)[key] = value; 45 | }), 46 | 47 | }; 48 | -------------------------------------------------------------------------------- /src/client/gui/chat.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {CollaborationInterface} from "../collaboration"; 4 | import type {User} from "../collaboration/user"; 5 | import type {GUI} from "."; 6 | 7 | /** 8 | * Abstraction to deal with interaction with #chat element and its descendents. 9 | * Handles outgoing and incoming messages and alerts, event callbacks, and 10 | * updating user spans. 11 | */ 12 | export class Chat { 13 | private gui: GUI; 14 | public is_visible: boolean; 15 | private is_minimized: boolean; 16 | 17 | constructor(gui: GUI) { 18 | this.gui = gui; 19 | this.is_visible = false; 20 | this.is_minimized = true; 21 | } 22 | 23 | /** 24 | * Add an "alert" to the #chat. Alert messages show up centrally aligned in 25 | * the #chat. This method also escapes '%u' strings in the message and 26 | * replaces them with the `User::dom`. 27 | */ 28 | alert(message: string, users: User[] = []) { 29 | 30 | const messages = $("#chat-messages"), alert = $("").addClass("message message-alert"); 31 | 32 | // do the interleaving 33 | message.split("%u").forEach((chunk, i) => { 34 | if (i && users[i - 1]) 35 | alert.append(users[i - 1].dom()) 36 | 37 | if (chunk) 38 | alert.append($("").addClass("content").text(chunk)); 39 | }); 40 | 41 | messages.append(alert).closest("div").scrollTop(messages.prop("scrollHeight")); 42 | } 43 | 44 | /** 45 | * Send a message from the current user to the chat. Also broadcasts the 46 | * message to the other users. 47 | */ 48 | sendMessage(collab: CollaborationInterface) { 49 | 50 | // get the message 51 | const input = $("#chat-input"); 52 | const message = (input.val() as string).trim(); 53 | 54 | // don't send just whitespace 55 | if (!message) 56 | return; 57 | 58 | // broadcast 59 | const self = collab.self; 60 | this.gui.app.socket.broadcast("new message", { 61 | id: self.id, 62 | message: message, 63 | }); 64 | 65 | // add it to #chat 66 | this.newMessage(self, message, true); 67 | 68 | // reset the input 69 | input.val(""); 70 | } 71 | 72 | /** 73 | * Add a message to #chat with content `text` from `user`. If `self == true`, 74 | * then the message will be right-aligned. Otherwise, it will be left- 75 | * aligned. 76 | */ 77 | newMessage(user: User, text: string, self: boolean = false) { 78 | 79 | const messages = $("#chat-messages"); 80 | const dom = 81 | $("
  • ") 82 | .addClass("message") 83 | .addClass(self ? "self" : "other") 84 | .append($("
    ") 85 | .addClass("message-content") 86 | .append($("
    ").addClass("message-text").text(text)) 87 | .append($("").addClass("message-timestamp meta").text((new Date()).toLocaleTimeString()))) 88 | .append($("
    ").addClass("message-sender meta").html(user.dom() as any)); 89 | 90 | messages.append(dom).closest("div").scrollTop(messages.prop("scrollHeight")); 91 | } 92 | 93 | /** 94 | * Scan through #chat and update each `.message-sender-info` span for the given 95 | * `user` to use the most recent values of `user.name` and `user.viewing`. 96 | */ 97 | updateUser(user: User) { 98 | 99 | const dom = $(`.message-sender-info[name="${user.id}"]`); 100 | dom.find(".message-sender-name").text(user.name); 101 | dom.find(".message-sender-viewing").text(user.viewing); 102 | } 103 | 104 | /** 105 | * Force to redraw #chat based on our internal state. Called every time there 106 | * is a change. 107 | */ 108 | refresh() { 109 | 110 | $("#chat").css("display", this.is_visible ? "flex" : "none"); 111 | 112 | $("#chat-expand").css("display", this.is_minimized ? "none" : "flex"); 113 | 114 | $("#chat-minimize i") 115 | .removeClass("fa-window-maximize fa-window-minimize") 116 | .addClass(this.is_minimized ? "fa-window-maximize" : "fa-window-minimize"); 117 | 118 | $("#chat-available").removeClass("red green").addClass(this.gui.app.socket.initialized ? "green" : "red"); 119 | 120 | $("#currently-online-number").text(this.gui.app.collab.size); 121 | } 122 | 123 | /** 124 | * Bind callbacks. 125 | */ 126 | bind() { 127 | 128 | const self = this; 129 | 130 | $("#chat-send").click(e => self.sendMessage(self.gui.app.collab)); 131 | 132 | $("#chat-persist *, #chat-persist").click(e => { 133 | if ($(e.target).is("#chat-close")) 134 | return; 135 | 136 | self.is_minimized = !self.is_minimized; 137 | self.refresh(); 138 | }); 139 | 140 | $("#chat-close").click(e => { 141 | self.is_visible = false; 142 | self.refresh(); 143 | }); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/client/gui/config.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {check_if_browser} from "../utils/funcs"; 4 | 5 | export interface GuiConfig { 6 | is_browser: boolean; 7 | 8 | pinned_menu_items: Set; 9 | is_textarea_visible: boolean; 10 | is_table_visible: boolean; 11 | is_label_bar_visible: boolean; 12 | column_visibilities: boolean[]; 13 | textarea_height: string; 14 | autoparsing: boolean; 15 | 16 | statusNormalFadeout: number; 17 | statusErrorFadeout: number; 18 | 19 | set: (params: {[key in keyof GuiConfig]: string}) => void; 20 | } 21 | 22 | export const _gui: GuiConfig = { 23 | 24 | is_browser: check_if_browser(), 25 | 26 | pinned_menu_items: new Set(["discard-corpus", "show-help", "go-home"]), 27 | is_textarea_visible: true, 28 | is_table_visible: false, 29 | is_label_bar_visible: true, 30 | column_visibilities: new Array(10).fill(true), 31 | textarea_height: "238px", 32 | autoparsing: true, 33 | 34 | statusNormalFadeout: 3000, 35 | statusErrorFadeout: 5000, 36 | 37 | set: params => _.each(params, 38 | (value, key) => { 39 | if ((_gui as any)[key] !== undefined) 40 | (_gui as any)[key] = value; 41 | }), 42 | }; 43 | -------------------------------------------------------------------------------- /src/client/gui/graph-menu.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {GUI} from "."; 4 | 5 | export class GraphMenu { 6 | private gui: GUI; 7 | constructor(gui: GUI) { this.gui = gui; } 8 | 9 | bind() { 10 | 11 | const self = this; 12 | 13 | $("#LTR").click(e => { 14 | const corpus = self.gui.app.corpus; 15 | 16 | corpus.is_ltr = !corpus.is_ltr; 17 | self.gui.app.save({ 18 | type: "set", 19 | indices: [corpus.index], 20 | }); 21 | self.gui.refresh(); 22 | }); 23 | 24 | $("#vertical").click(e => { 25 | const corpus = self.gui.app.corpus; 26 | 27 | corpus.is_vertical = !corpus.is_vertical; 28 | self.gui.app.save({ 29 | type: "set", 30 | indices: [corpus.index], 31 | }); 32 | self.gui.refresh(); 33 | }); 34 | 35 | $("#enhanced").click(e => { 36 | const corpus = self.gui.app.corpus; 37 | 38 | if (corpus.is_enhanced) { 39 | corpus.current.unenhance(); 40 | } else { 41 | corpus.current.enhance(); 42 | } 43 | 44 | self.gui.app.save({ 45 | type: "set", 46 | indices: [corpus.index], 47 | }); 48 | self.gui.refresh(); 49 | }); 50 | } 51 | 52 | refresh() { 53 | 54 | const corpus = this.gui.app.corpus; 55 | 56 | $("#LTR .fa") 57 | .removeClass("fa-align-left fa-align-right") 58 | .addClass(corpus.is_ltr ? "fa-align-left" : "fa-align-right"); 59 | 60 | $("#vertical .fa").toggleClass("fa-rotate-90", corpus.is_vertical); 61 | 62 | $("#enhanced .fa").removeClass("fa-tree fa-magic").addClass(corpus.is_enhanced ? "fa-magic" : "fa-tree"); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/client/gui/modals/help.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {GUI} from ".."; 4 | 5 | var _gui: GUI|null = null; 6 | 7 | /** 8 | * Show the modal. 9 | */ 10 | function show() { 11 | console.log("show") 12 | console.trace(); 13 | $("#help-modal").show(); 14 | } 15 | 16 | /** 17 | * Hide the modal. 18 | */ 19 | function hide() { 20 | $("#help-modal").hide(); 21 | } 22 | 23 | /** 24 | * Bind the click-handler. 25 | */ 26 | function bind() { $("#help-modal").find("[name=\"close\"]").click(hide); } 27 | 28 | export function helpInfo(gui: GUI) { 29 | _gui = gui; 30 | bind(); 31 | return { 32 | hide, 33 | show, 34 | }; 35 | } 36 | -------------------------------------------------------------------------------- /src/client/gui/modals/index.ts: -------------------------------------------------------------------------------- 1 | export {helpInfo} from "./help"; 2 | export {uploadFile} from "./upload-file"; 3 | export {uploadURL} from "./upload-url"; 4 | -------------------------------------------------------------------------------- /src/client/gui/modals/upload-file.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {GUI} from ".."; 4 | 5 | var _gui: GUI|null = null; 6 | 7 | /** 8 | * Show the modal. 9 | */ 10 | function show() { 11 | console.log("show") 12 | console.trace(); 13 | $("#upload-file-modal").show().find("[type=\"submit\"]").prop("disabled", !_gui.app.server.is_running); 14 | } 15 | 16 | /** 17 | * Hide the modal. 18 | */ 19 | function hide() { 20 | $("#upload-file-modal").hide(); 21 | $("#upload-filename").val(null); 22 | } 23 | 24 | /** 25 | * Bind the click-handler. 26 | */ 27 | function bind() { $("#upload-file-modal").find("[name=\"close\"]").click(hide); } 28 | 29 | export function uploadFile(gui: GUI) { 30 | _gui = gui; 31 | bind(); 32 | return { 33 | hide, 34 | show, 35 | }; 36 | } 37 | -------------------------------------------------------------------------------- /src/client/gui/modals/upload-url.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {GUI} from ".."; 4 | 5 | var _gui: GUI|null = null; 6 | 7 | /** 8 | * Show the modal. 9 | */ 10 | function show() { 11 | $("#upload-url-modal").show().find("[type=\"submit\"]").prop("disabled", !_gui.app.server.is_running); 12 | } 13 | 14 | /** 15 | * Hide the modal. 16 | */ 17 | function hide() { $("#upload-url-modal").hide(); } 18 | 19 | /** 20 | * Bind the click-handler. 21 | */ 22 | function bind() { $("#upload-url-modal").find("[name=\"close\"]").click(hide); } 23 | 24 | export function uploadURL(gui: GUI) { 25 | _gui = gui; 26 | bind(); 27 | return { 28 | hide, 29 | show, 30 | }; 31 | } 32 | -------------------------------------------------------------------------------- /src/client/gui/status.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import type {GUI} from "."; 4 | 5 | export class Status { 6 | private gui: GUI; 7 | constructor(gui: GUI) { this.gui = gui; } 8 | 9 | bind() { 10 | 11 | const self = this; 12 | 13 | // turn off autoparsing 14 | $("#parse-status").click(e => { 15 | const gui = self.gui; 16 | gui.config.autoparsing = !gui.config.autoparsing; 17 | 18 | if (gui.config.autoparsing) { 19 | self.gui.app.corpus.parse($("#text-data").val() as string); 20 | } else { 21 | self.gui.app.corpus.current.input = $("#text-data").val() as string; 22 | self.gui.app.corpus.format = null; 23 | } 24 | 25 | gui.save(); 26 | gui.refresh(); 27 | }); 28 | } 29 | 30 | refresh() { 31 | 32 | const corpus = this.gui.app.corpus, graph = this.gui.app.graph, gui = this.gui; 33 | 34 | $("#parse-status") 35 | .removeClass("red green") 36 | .addClass(gui.config.autoparsing ? "green" : "red") 37 | .text(gui.config.autoparsing ? "on" : "off"); 38 | 39 | let graphStatus; 40 | if (!corpus.isParsed) { 41 | 42 | graphStatus = "blocked"; 43 | 44 | } else if (!graph.eles.length) { 45 | 46 | graphStatus = "uninitialised"; 47 | 48 | } else if ($(".splitting").length) { 49 | 50 | graphStatus = "splitting node"; 51 | 52 | } else if ($(".merge-source").length) { 53 | 54 | graphStatus = "merging tokens"; 55 | 56 | } else if ($(".combine-source").length) { 57 | 58 | graphStatus = "forming multiword token"; 59 | 60 | } else if (graph.editing) { 61 | 62 | graphStatus = "editing " + graph.editing.data("name"); 63 | 64 | } else { 65 | 66 | graphStatus = "viewing"; 67 | } 68 | 69 | $("#graph-status").removeClass("red green").addClass(corpus.isParsed ? "green" : "red").text(graphStatus); 70 | } 71 | 72 | normal(message: string) { 73 | 74 | const config = this.gui.config; 75 | 76 | if (!config.is_browser) 77 | return; 78 | 79 | const div = $("
    ").addClass("status normal").text(message).fadeOut(config.statusNormalFadeout); 80 | 81 | $("#status-container .flowing").prepend(div); 82 | 83 | setTimeout(() => div.detach(), config.statusNormalFadeout); 84 | } 85 | 86 | error(message: string) { 87 | 88 | const config = this.gui.config; 89 | 90 | if (!config.is_browser) 91 | return; 92 | 93 | const div = $("
    ").addClass("status error").text(`Error: ${message}`).fadeOut(config.statusErrorFadeout); 94 | 95 | $("#status-container .flowing").prepend(div); 96 | 97 | setTimeout(() => div.detach(), config.statusErrorFadeout); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/client/gui/textarea.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import * as nx from "../../notatrix"; 4 | import {forEachFormat} from "../utils/funcs"; 5 | import {_gui as config} from "./config"; 6 | import type {GUI} from "."; 7 | 8 | export class Textarea { 9 | private gui: GUI; 10 | constructor(gui: GUI) { this.gui = gui; } 11 | 12 | bind() { 13 | 14 | const self = this; 15 | 16 | // textarea resizing 17 | $("#text-data").mouseup(e => { 18 | config.textarea_height = $(e.target).css("height"); 19 | self.gui.app.graph.draw(); 20 | self.gui.save(); 21 | }); 22 | } 23 | 24 | refresh() { 25 | 26 | const corpus = this.gui.app.corpus; 27 | 28 | // show the data 29 | if (config.is_textarea_visible) { 30 | 31 | if (corpus.format !== "CoNLL-U") 32 | config.is_table_visible = false; 33 | 34 | if (config.is_table_visible) { 35 | 36 | $("#table-data").show(); 37 | $("#text-data").hide(); 38 | this.gui.table.rebuild(); 39 | 40 | } else { 41 | 42 | $("#table-data").hide(); 43 | $("#text-data").val(corpus.textdata).css("height", config.textarea_height).show(); 44 | } 45 | } 46 | 47 | // show errors and warnings 48 | $(".format-tab").removeClass("disabled").find(".tab-warning, .tab-error").hide(); 49 | forEachFormat(format => { 50 | if (corpus.current.isParsed) { 51 | 52 | if (corpus.format === format) { 53 | 54 | const loss = corpus.current.to(format).loss; 55 | if (loss.length) 56 | $(`.format-tab[name="${format}"] .tab-warning`).show().attr("title", `Unable to encode ${loss.join(", ")}`); 57 | 58 | } else { 59 | try { 60 | 61 | corpus.current.to(format); 62 | 63 | } catch (e) { 64 | 65 | console.log("error", format); 66 | $(`.format-tab[name="${format}"]`).addClass("disabled").find(`.tab-error`).show().attr("title", e.message); 67 | } 68 | } 69 | } else { 70 | 71 | const s = new nx.Sentence(corpus.textdata, {interpretAs: format}); 72 | if (s.Error) 73 | $(`.format-tab[name="${format}"]`) 74 | .addClass("disabled") 75 | .find(`.tab-error`) 76 | .show() 77 | .attr("title", s.Error.message); 78 | } 79 | }); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/client/index.ts: -------------------------------------------------------------------------------- 1 | require("babel-polyfill"); 2 | 3 | import * as $ from "jquery"; 4 | 5 | import {App} from "./app"; 6 | 7 | // on ready 8 | $(() => { 9 | (window as any).app = new App(location.protocol !== "file:"); 10 | }); 11 | -------------------------------------------------------------------------------- /src/client/server.ts: -------------------------------------------------------------------------------- 1 | import * as $ from "jquery"; 2 | 3 | import {storage} from "./utils"; 4 | import {getTreebankId} from "./utils/funcs"; 5 | import type {App} from "./app"; 6 | 7 | /** 8 | * Abstraction over an AJAX connection. Handles sending and receiving large 9 | * packets from a server. Usually, this means the initial loading of the 10 | * corpus, as well as saving (including (de)serialization). 11 | */ 12 | export class Server { 13 | private app: App; 14 | public is_running: boolean; 15 | private treebank_id: string; 16 | 17 | constructor(app: App) { 18 | 19 | // save a reference to the parent 20 | this.app = app; 21 | this.is_running = false; 22 | this.treebank_id = getTreebankId(); 23 | } 24 | 25 | /** 26 | * Attempt to connect to the server via AJAX. This function updates the 27 | * Server.is_running<\tt> attribute, which is checked by other functions. 28 | */ 29 | connect() { 30 | try { 31 | $.ajax({ 32 | type: "GET", 33 | url: "/running", 34 | success: data => { 35 | console.info("AJAX connect success with response:", data); 36 | 37 | this.is_running = true; 38 | this.app.gui.status.normal("connected to server"); 39 | this.load(); 40 | }, 41 | error: data => { 42 | console.info("AJAX connect failed with response:", data); 43 | this.app.gui.status.error("unable to connect to server"); 44 | 45 | const serial = storage.load(); 46 | if (serial) 47 | this.app.load(serial); 48 | } 49 | }); 50 | } catch (e) { 51 | 52 | console.info("AJAX connected failed with response:", e.message); 53 | this.app.gui.status.error("unable to connect to server"); 54 | 55 | const serial = storage.load(); 56 | if (serial) 57 | this.app.load(serial); 58 | } 59 | } 60 | 61 | /** 62 | * Save a JSON object containing a serial representation of the corpus to the 63 | * server (if running). 64 | */ 65 | save(obj: any) { 66 | if (!this.is_running) 67 | return; 68 | 69 | try { 70 | 71 | const serial = JSON.stringify(obj); 72 | 73 | $.ajax({ 74 | type: "POST", 75 | url: `/save?treebank_id=${this.treebank_id}`, 76 | contentType: "application/json; charset=utf-8", 77 | data: serial, 78 | dataType: "json", 79 | success: data => { 80 | if (data.error) { 81 | console.log(this.treebank_id); 82 | console.info("AJAX save failed with response:", data); 83 | this.app.gui.status.error("unable to save to server"); 84 | 85 | } else { 86 | console.info("AJAX save success with response:", data); 87 | } 88 | }, 89 | error: data => { 90 | console.info("AJAX save failed with response:", data); 91 | this.app.gui.status.error("unable to save to server"); 92 | } 93 | }); 94 | } catch (e) { 95 | 96 | console.info("AJAX save failed with response:", e); 97 | this.app.gui.status.error("unable to save to server"); 98 | } 99 | } 100 | 101 | /** 102 | * Attempt to load a serial representation of the corpus from the server. 103 | */ 104 | load() { 105 | 106 | if (!this.is_running) 107 | return; 108 | 109 | try { 110 | $.ajax({ 111 | type: "GET", 112 | url: `/load?treebank_id=${this.treebank_id}`, 113 | success: data => { 114 | if (data.error) { 115 | 116 | console.info("AJAX load failed with response:", data); 117 | this.app.gui.status.error("unable to load from server"); 118 | 119 | const serial = storage.load(); 120 | if (serial) 121 | this.app.load(serial); 122 | 123 | } else { 124 | 125 | // console.info('AJAX load success with response:', data); 126 | data = JSON.parse(data); 127 | this.app.load(data); 128 | } 129 | }, 130 | error: data => { 131 | console.info("AJAX load failed with response:", data); 132 | this.app.gui.status.error("unable to load from server"); 133 | 134 | const serial = storage.load(); 135 | if (serial) 136 | this.app.load(serial); 137 | } 138 | }); 139 | } catch (e) { 140 | 141 | console.info("AJAX load failed with response:", e); 142 | this.app.gui.status.error("unable to load from server"); 143 | 144 | const serial = storage.load(); 145 | if (serial) 146 | this.app.load(serial); 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/client/socket.ts: -------------------------------------------------------------------------------- 1 | // @ts-ignore: We don't have access to SocketIO types unless we upgrade to v3 :^) 2 | import * as _Socket from "socket.io-client"; 3 | 4 | interface SocketIoClient { 5 | on(channel: string, callback: (data: any) => void): void; 6 | emit(channel: string, data: any): void; 7 | } 8 | 9 | import {check_if_browser} from "./utils/funcs"; 10 | import type {App} from "./app"; 11 | 12 | /** 13 | * Abstraction over a SocketIO connection. Handles sending and receiving small 14 | * packets from a server. 15 | * 16 | * NB: this handles all server communication except for the (de)serialization of 17 | * the corpus (this is handled via AJAX calls). 18 | */ 19 | export class Socket { 20 | private app: App; 21 | private _socket: SocketIoClient|null; 22 | public initialized: boolean; 23 | private isOpen: boolean; 24 | 25 | constructor(app: App) { 26 | this.app = app; 27 | 28 | // save some internal state to avoid loops and errors 29 | this._socket = null; 30 | this.initialized = false; 31 | this.isOpen = false; 32 | } 33 | 34 | /** 35 | * Make a connection to the server and set callbacks for the various messages 36 | * we expect to receive. 37 | */ 38 | connect() { 39 | 40 | // we shouldn't try to connect if we're just testing 41 | if (!check_if_browser() || !this.app.online) 42 | return; 43 | 44 | // cache this access 45 | const collab = this.app.collab, corpus = this.app.corpus, graph = this.app.graph, gui = this.app.gui; 46 | 47 | // request a server connection 48 | this._socket = new _Socket() as SocketIoClient; 49 | 50 | // handle server approving our request for connection 51 | this._socket.on("initialization", data => { 52 | // internals 53 | this.initialized = true; 54 | this.isOpen = true; 55 | 56 | // make a note of our id, name, etc 57 | collab.setSelf(data); 58 | }); 59 | 60 | // another user connected to the document 61 | this._socket.on("connection", d => collab.addUser(d)); 62 | 63 | // a user diconnected from the document 64 | this._socket.on("disconnection", d => collab.removeUser(d)); 65 | 66 | // a user modified the corpus 67 | this._socket.on("modify corpus", data => { 68 | const user = collab.getUser(data.id); 69 | 70 | let index = corpus.index; 71 | 72 | // check whether we need to change our corpus index 73 | switch (data.type) { 74 | case ("insert"): 75 | if (data.indices[0] <= index) 76 | index++; 77 | break; 78 | 79 | case ("remove"): 80 | if (data.indices[0] < index) 81 | index--; 82 | break; 83 | 84 | case ("redo"): 85 | case ("undo"): 86 | index = data.serial.index; 87 | break; 88 | 89 | case ("set"): 90 | break; 91 | 92 | case ("parse"): 93 | if (data.indices[0] < index) 94 | index += data.indices.length - 1; 95 | break; 96 | } 97 | 98 | // send a chat alert 99 | gui.chat.alert(`%u: '${data.type}' index ${data.indices[0]}`, [user]); 100 | 101 | // update the undo stack 102 | this.app.undoer.push(data.serial); 103 | 104 | // save the current index 105 | const currentIndex = this.app.corpus.index; 106 | 107 | // load the newest serialization 108 | this.app.load(data.serial); 109 | 110 | // navigate to the correct index 111 | this.app.corpus.index = currentIndex; 112 | }); 113 | 114 | // a user modified their current index 115 | this._socket.on("modify index", data => { 116 | const user = collab.getUser(data.id); 117 | user.viewing = data.index; 118 | gui.chat.updateUser(user); 119 | }); 120 | 121 | // a user clicked on a graph node 122 | this._socket.on("lock graph", data => { 123 | const user = collab.getUser(data.id); 124 | user.locked = data.locked; 125 | graph.setLocks(); 126 | }); 127 | 128 | // a user clicked off of a graph node 129 | this._socket.on("unlock graph", data => { 130 | const user = collab.getUser(data.id); 131 | user.locked = data.locked; 132 | graph.setLocks(); 133 | }); 134 | 135 | // a user moved their mouse in the graph area 136 | this._socket.on("move mouse", data => { 137 | const user = collab.getUser(data.id); 138 | user.mouse = data.mouse; 139 | graph.drawMice(); 140 | }); 141 | 142 | // a user sent a chat message 143 | this._socket.on("new message", data => { 144 | const user = collab.getUser(data.id); 145 | gui.chat.newMessage(user, data.message, false); 146 | }); 147 | } 148 | 149 | /** 150 | * Broadcast (/emit) a packet of type with arguments to the server. 151 | */ 152 | broadcast(name: string, data?: any) { 153 | 154 | // debugging 155 | // console.log('broadcast', name, data); 156 | 157 | // do the work 158 | this._socket.emit(name, data); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/client/undo-manager.ts: -------------------------------------------------------------------------------- 1 | import * as nx from "../notatrix"; 2 | import {Corpus} from "./corpus"; 3 | import {storage} from "./utils"; 4 | import type {App} from "./app"; 5 | 6 | class Stack { 7 | private _items: T[]; 8 | constructor() { this._items = []; } 9 | 10 | get length() { return this._items.length; } 11 | 12 | push(item: T) { this._items.push(item); } 13 | 14 | pop() { return this._items.pop() } 15 | 16 | peek() { return this._items.slice(-1)[0]; } 17 | 18 | clear() { this._items = []; } 19 | } 20 | 21 | export class UndoManager { 22 | private app: App; 23 | public active: boolean; 24 | public current: any|null; 25 | private undoStack: Stack; 26 | private redoStack: Stack; 27 | 28 | constructor(app: App) { 29 | this.app = app; 30 | this.active = false; 31 | this.current = null; 32 | this.undoStack = new Stack(); 33 | this.redoStack = new Stack(); 34 | } 35 | 36 | hasUndo() { return !!this.undoStack.length; } 37 | 38 | hasRedo() { return !!this.redoStack.length; } 39 | 40 | push(serial: any) { 41 | 42 | if (this.active) 43 | return false; 44 | 45 | // do some comparisons here to change for changes 46 | console.log("serial", serial) // updated 47 | storage.backup(serial); 48 | console 49 | .log("current", this.current) 50 | // if (JSON.stringify(serial) === JSON.stringify(this.current)) 51 | // return false; 52 | 53 | this.undoStack.push(this.current); 54 | this.redoStack.clear(); 55 | this.current = serial; 56 | 57 | return true; 58 | } 59 | 60 | undo() { 61 | 62 | if (!this.hasUndo()) 63 | return false; 64 | 65 | this.active = true; 66 | let current = this.app.corpus.serialize(); 67 | this.redoStack.push(this.current); 68 | this.current = current; 69 | 70 | let undo = this.undoStack.pop(); 71 | this.app.corpus = new Corpus(this.app, undo); 72 | this.app.socket.broadcast("modify corpus", { 73 | type: "undo", 74 | serial: undo, 75 | }); 76 | this.app.save(); 77 | this.app.gui.refresh(); 78 | this.active = false; 79 | 80 | return true; 81 | } 82 | 83 | redo() { 84 | 85 | if (!this.hasRedo()) 86 | return false; 87 | 88 | this.active = true; 89 | let current = this.app.corpus.serialize(); 90 | this.undoStack.push(this.current); 91 | this.current = current; 92 | 93 | let redo = this.redoStack.pop(); 94 | this.app.corpus = new Corpus(this.app, redo); 95 | this.app.socket.broadcast("modify corpus", { 96 | type: "redo", 97 | serial: redo, 98 | }); 99 | this.app.save(); 100 | this.app.gui.refresh(); 101 | this.active = false; 102 | 103 | return true; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/client/utils/funcs.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import * as $ from "jquery"; 3 | import { v4 as uuidv4 } from "uuid"; 4 | 5 | export function check_if_browser() { 6 | try { 7 | return !!window; 8 | } catch (e) { 9 | return false; 10 | } 11 | } 12 | 13 | export function download(filename: string, mimetype: string, uriComponent: string) { 14 | const link = $("") 15 | .attr("download", filename) 16 | .attr("href", `data:${mimetype}; charset=utf-8,${encodeURIComponent(uriComponent)}`); 17 | $("body").append(link); 18 | link[0].click(); 19 | } 20 | 21 | export function getTreebankId(): string { 22 | if (!check_if_browser()) 23 | return null; 24 | 25 | const match = location.href.match(/treebank_id=([0-9a-f-]{36})(#|\/|$|&)/); 26 | return match ? match[1] : uuidv4(); 27 | } 28 | 29 | export function getRootPath() { 30 | let pageURL = window.location.href; 31 | return pageURL.substr(0, pageURL.lastIndexOf("/") + 1); 32 | } 33 | 34 | export function link(href: string, target: string = "_blank") { 35 | const link = $("").attr("href", href).attr("target", target); 36 | $("body").append(link); 37 | console.log(href, target); 38 | link[0].click(); 39 | } 40 | 41 | export function noop(arg: T): T { 42 | return arg; 43 | } 44 | 45 | export function thin(arg: T): T|undefined { 46 | return !!arg ? arg : undefined; 47 | } 48 | 49 | export function forEachFormat(callback: (format: string) => void) { 50 | ["Brackets", "CG3", "CoNLL-U", "plain text", "SD"].forEach(callback); 51 | } 52 | -------------------------------------------------------------------------------- /src/client/utils/index.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | export * as export_ from "./export"; 4 | export * as storage from "./local-storage"; 5 | export * as validate from "./validate"; 6 | export * as funcs from "./funcs"; 7 | -------------------------------------------------------------------------------- /src/client/utils/local-storage.ts: -------------------------------------------------------------------------------- 1 | import {getTreebankId} from "./funcs"; 2 | 3 | const KEY = "__ud_annotatrix_prefs_"; 4 | 5 | function isAvailable() { 6 | 7 | try { 8 | localStorage; 9 | } catch (e) { 10 | return false; 11 | } 12 | 13 | // Taken from https://developer.mozilla.org/en-US/docs/Web/API/Web_Storage_API/Using_the_Web_Storage_API 14 | 15 | try { 16 | const x = "__storage_test__"; 17 | 18 | localStorage.setItem(x, x); 19 | localStorage.removeItem(x); 20 | return true; 21 | 22 | } catch (e) { 23 | 24 | return e instanceof DOMException && 25 | (e.code === 1014 // Firefox 26 | || e.code === 22 // everything else 27 | 28 | // test name field too, because code might not be present 29 | || e.name === "NS_ERROR_DOM_QUOTA_REACHED" // Firefox 30 | || e.name === "QuotaExceededError") // everything else 31 | 32 | // acknowledge QuotaExceededError only if there's something already stored 33 | && localStorage.length !== 0; 34 | } 35 | } 36 | 37 | function formatUploadSize(fileSize: number) { 38 | if (fileSize < 1024) 39 | return `${fileSize} B`; 40 | 41 | if (fileSize < 1048576) 42 | return `${(fileSize / 1024).toFixed(1)} kB`; 43 | 44 | return `${(fileSize / 1048576).toFixed(1)} mB`; 45 | } 46 | 47 | export function backup(value: any): void { 48 | if (!isAvailable()) 49 | return; 50 | const serial = JSON.stringify(value); 51 | localStorage.setItem(KEY + "backup", serial); 52 | } 53 | 54 | export function restore(): any { 55 | if (!isAvailable()) 56 | return null; 57 | let serial = localStorage.getItem(KEY + "backup"); 58 | return JSON.parse(serial); 59 | } 60 | 61 | export function save(value: any): void { 62 | if (!isAvailable()) 63 | return; 64 | const serial = JSON.stringify(value); 65 | localStorage.setItem(getTreebankId(), serial); 66 | } 67 | 68 | export function load(): any { 69 | if (!isAvailable()) 70 | return null; 71 | let serial = localStorage.getItem(getTreebankId()); 72 | return JSON.parse(serial); 73 | } 74 | 75 | export function clear(): void { 76 | if (!isAvailable()) 77 | return; 78 | localStorage.removeItem(getTreebankId()); 79 | } 80 | 81 | export function setPrefs(item: string, prefs: string): void { 82 | if (!isAvailable() || !item) 83 | return; 84 | localStorage.setItem(KEY + item, prefs); 85 | } 86 | 87 | export function getPrefs(item: string): string { 88 | if (!isAvailable() || !item) 89 | return null; 90 | return localStorage.getItem(KEY + item); 91 | } 92 | -------------------------------------------------------------------------------- /src/notatrix/base.ts: -------------------------------------------------------------------------------- 1 | import type {Options} from "./nx/options"; 2 | import type {Sentence, SentenceSerial} from "./nx/sentence"; 3 | 4 | export type Input = string|SentenceSerial|any[]; // :^( 5 | 6 | export type DetectOutput = string; 7 | export type Detect = (input: Input, options: Options) => DetectOutput; 8 | export type DetectByName = {[name: string]: Detect}; 9 | 10 | export type SplitOutput = string[]|void; 11 | export type Split = (text: string, options: Options) => SplitOutput; 12 | export type SplitByName = {[name: string]: Split}; 13 | 14 | export type ParseOutput = SentenceSerial|void; 15 | export type Parse = (input: Input, options: Options) => ParseOutput; 16 | export type ParseByName = {[name: string]: Parse}; 17 | 18 | export interface ParamsOutput { 19 | form: string; 20 | lemma: string; 21 | upostag: string; 22 | xpostag: string; 23 | feats: string|null|undefined; 24 | misc: string|null|undefined; 25 | head: string; 26 | } 27 | export interface GenerateResult { 28 | output: Output; 29 | loss: string[]; 30 | } 31 | export type GenerateOutput = GenerateResult|GenerateResult|GenerateResult[]>|void; // :^( 32 | export type Generate = (sent: Sentence, options: Options) => GenerateOutput; 33 | export type GenerateByName = {[name: string]: Generate}; 34 | 35 | interface Format { 36 | name: string; 37 | fields: { 38 | FIELDS: string[]; 39 | HAS_COMMENTS: boolean; 40 | }; 41 | split: Split; 42 | detect: Detect; 43 | parse: Parse; 44 | generate: Generate; 45 | } 46 | export type FormatByName = {[name: string]: Format}; 47 | -------------------------------------------------------------------------------- /src/notatrix/detector.ts: -------------------------------------------------------------------------------- 1 | import {FORMAT_BY_NAME} from "./formats"; 2 | import {DetectorError} from "./utils/errors"; 3 | import {formats} from "./utils/constants"; 4 | import {thin} from "./utils/funcs"; 5 | import type {DetectByName, DetectOutput, Input} from "./base"; 6 | import type {Options} from "./nx/options"; 7 | 8 | export const DETECT_BY_NAME: DetectByName = { 9 | "apertium stream": FORMAT_BY_NAME.apertiumStream.detect, 10 | apertiumStream: FORMAT_BY_NAME.apertiumStream.detect, 11 | Brackets: FORMAT_BY_NAME.brackets.detect, 12 | brackets: FORMAT_BY_NAME.brackets.detect, 13 | CG3: FORMAT_BY_NAME.cg3.detect, 14 | cg3: FORMAT_BY_NAME.cg3.detect, 15 | "CoNLL-U": FORMAT_BY_NAME.conllu.detect, 16 | conllu: FORMAT_BY_NAME.conllu.detect, 17 | "notatrix serial": FORMAT_BY_NAME.notatrixSerial.detect, 18 | notatrixSerial: FORMAT_BY_NAME.notatrixSerial.detect, 19 | Params: FORMAT_BY_NAME.params.detect, 20 | params: FORMAT_BY_NAME.params.detect, 21 | "plain text": FORMAT_BY_NAME.plainText.detect, 22 | plainText: FORMAT_BY_NAME.plainText.detect, 23 | SD: FORMAT_BY_NAME.sd.detect, 24 | sd: FORMAT_BY_NAME.sd.detect, 25 | }; 26 | 27 | export function detect(input: Input, options?: Options): DetectOutput|DetectOutput[] { 28 | options = { 29 | suppressDetectorErrors: true, 30 | returnAllMatches: true, 31 | requireOneMatch: false, 32 | ...options, 33 | }; 34 | 35 | const matches = formats 36 | .map(format => { 37 | const detect = FORMAT_BY_NAME[format].detect; 38 | try { 39 | return detect(input, options); 40 | } catch (e) { 41 | if (e instanceof DetectorError) 42 | return undefined; 43 | 44 | throw e; 45 | } 46 | }) 47 | .filter(thin); 48 | 49 | if (!matches.length && !options.suppressDetectorErrors) 50 | throw new DetectorError("Unable to detect format", input, options); 51 | 52 | if (matches.length > 1 && !options.suppressDetectorErrors && 53 | options.requireOneMatch) 54 | throw new DetectorError("Detected multiple formats", input, options); 55 | 56 | return options.returnAllMatches ? matches : matches[0]; 57 | } 58 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/detector.ts: -------------------------------------------------------------------------------- 1 | import {DetectorError} from "../../utils/errors"; 2 | import type {Options} from "../../nx/options"; 3 | 4 | export function detect(text: string, options: Options): string { 5 | throw new DetectorError("not implemented", text, options); 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS: string[] = []; // TODO: Implement this? 2 | export const HAS_COMMENTS = false; // TODO: Implement this? 3 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/generator.ts: -------------------------------------------------------------------------------- 1 | import type {Options} from "../../nx/options"; 2 | import type {Sentence} from "../../nx/sentence"; 3 | 4 | export function generate(sent: Sentence, options: Options): void { 5 | // throw new GeneratorError('not implemented'); 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS, HAS_COMMENTS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | throw new Error("not implemented"); 9 | const serial = sent.serialize(); 10 | 11 | let losses: Set = new Set(); 12 | 13 | if (!HAS_COMMENTS && serial.comments.length) 14 | losses.add("comments"); 15 | 16 | serial.tokens.forEach(token => { 17 | Object.keys(_.omit(token, FIELDS)).forEach(field => { 18 | switch (field) { 19 | case ("index"): 20 | break; 21 | 22 | default: 23 | losses.add(field); 24 | } 25 | }); 26 | }); 27 | 28 | return Array.from(losses); 29 | } 30 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "apertium stream"; 2 | export * as fields from "./fields"; 3 | export {split} from "./splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/parser.ts: -------------------------------------------------------------------------------- 1 | import type {Options} from "../../nx/options"; 2 | import type {SentenceSerial} from "../../nx/sentence"; 3 | 4 | export function parse(text: string, options: Options): void { 5 | // throw new ParserError('not implemented'); 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/apertium-stream/splitter.ts: -------------------------------------------------------------------------------- 1 | import {Options} from "../../nx/options"; 2 | 3 | export function split(text: string, options: Options): void { 4 | // throw new SplitterError('not implemented', text, options); 5 | }; 6 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/detector.ts: -------------------------------------------------------------------------------- 1 | import * as re from "../../utils/regex"; 2 | import {DetectorError} from "../../utils/errors"; 3 | import {isJSONSerializable} from "../../utils/funcs"; 4 | import type {Options} from "../../nx/options"; 5 | 6 | export function detect(text: string, options: Options): string { 7 | options = { 8 | allowEmptyString: false, 9 | allowTrailingWhitespace: true, 10 | allowLeadingWhitespace: true, 11 | allowNoDependencies: false, 12 | allowNewlines: false, 13 | ...options, 14 | }; 15 | 16 | if (!text && !options.allowEmptyString) 17 | throw new DetectorError("Illegal Brackets: empty string", text, options); 18 | 19 | if (isJSONSerializable(text)) 20 | throw new DetectorError("Illegal Brackets: JSON object", text, options); 21 | 22 | if (/\n/.test(text) && !options.allowNewlines) 23 | throw new DetectorError("Illegal Brackets: contains newlines", text, 24 | options); 25 | 26 | // internal stuff 27 | let parsing: string|null = null; 28 | let depth = 0; 29 | let sawBracket = false; 30 | 31 | text.split("").forEach((char, i) => { 32 | switch (char) { 33 | case ("["): 34 | if (parsing === "]") 35 | throw new DetectorError("Illegal Brackets: invalid sequence \"][\"", 36 | text, options); 37 | 38 | sawBracket = true; 39 | depth += 1; 40 | break; 41 | 42 | case ("]"): 43 | if (parsing === "[") 44 | throw new DetectorError("Illegal Brackets: invalid sequence \"[]\"", 45 | text, options); 46 | 47 | sawBracket = true; 48 | depth -= 1; 49 | break; 50 | 51 | case (" "): 52 | case ("\t"): 53 | case ("\n"): 54 | 55 | if (!options.allowLeadingWhitespace) { 56 | if (parsing !== null && !re.whitespace.test(parsing)) 57 | throw new DetectorError( 58 | "Illegal Brackets: contains leading whitespace", text, options); 59 | } 60 | break; 61 | } 62 | 63 | parsing = char; 64 | }); 65 | 66 | if (!sawBracket && !options.allowNoDependencies) 67 | throw new DetectorError("Illegal Brackets: contains no dependencies", text, 68 | options); 69 | 70 | if (depth !== 0) 71 | throw new DetectorError("Illegal Brackets: bracket mismatch", text, 72 | options); 73 | 74 | if (re.whitespace.test(parsing) && !options.allowTrailingWhitespace) 75 | throw new DetectorError("Illegal Brackets: contains trailing whitespace", 76 | text, options); 77 | 78 | return "Brackets"; 79 | } 80 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "form", 3 | "heads", 4 | ]; 5 | export const HAS_COMMENTS = false; 6 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {GeneratorError} from "../../utils/errors"; 3 | import {getLoss} from "./get-loss"; 4 | import type {GenerateResult} from "../../base"; 5 | import type {Options} from "../../nx/options"; 6 | import type {Sentence} from "../../nx/sentence"; 7 | import type {RelationItem} from "../../nx/relation-set"; 8 | 9 | interface BracketsNode extends RelationItem { 10 | deps: BracketsNode[]; 11 | } 12 | 13 | export function generate(sent: Sentence, options: Options): GenerateResult { 14 | if (!sent.isParsed) 15 | return { 16 | output: null, 17 | loss: undefined, 18 | }; 19 | 20 | if (!sent || sent.name !== "Sentence") 21 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 22 | options); 23 | 24 | options = _.defaults(options, sent.options, 25 | { 26 | 27 | }); 28 | 29 | sent.index(); 30 | 31 | if (!sent.root) 32 | throw new GeneratorError("Unable to generate, could not find root", sent, options); 33 | 34 | // build the tree structure 35 | let seen = new Set([sent.root]); 36 | let root: BracketsNode = { 37 | token: sent.root, 38 | deprel: null, 39 | deps: [], 40 | }; 41 | 42 | const visit = (node: BracketsNode) => { 43 | node.token.mapDependents((dep: BracketsNode) => { 44 | if (seen.has(dep.token)) 45 | throw new GeneratorError( 46 | "Unable to generate, dependency structure non-linear", sent, options); 47 | 48 | dep.deps = []; 49 | node.deps.push(dep); 50 | seen.add(dep.token); 51 | visit(dep); 52 | }); 53 | }; 54 | visit(root); 55 | 56 | // console.log(root); 57 | 58 | if (seen.size < sent.size + 1) 59 | throw new GeneratorError( 60 | "Unable to generate, sentence not fully connected", sent, options); 61 | 62 | // parse the tree into a string 63 | let output = ""; 64 | const walk = (node: BracketsNode) => { 65 | output += "[" + (node.deprel || "_") + " "; 66 | 67 | node.deps.forEach(dep => { 68 | if (dep.token.indices.absolute < node.token.indices.absolute) 69 | walk(dep); 70 | }); 71 | 72 | output += " " + node.token.form + " "; 73 | 74 | node.deps.forEach((dep: BracketsNode) => { 75 | if (dep.token.indices.absolute > node.token.indices.absolute) 76 | walk(dep); 77 | }); 78 | 79 | output += " ] "; 80 | }; 81 | root.deps.forEach(dep => walk(dep)); 82 | 83 | // clean up the output 84 | output = output.replace(/\s+/g, " ") 85 | .replace(/ \]/g, "]") 86 | .replace(/\[ /g, "[") 87 | .replace(/(\w)_(\w)/, "$1 $2") 88 | .trim(); 89 | 90 | // console.log(output); 91 | 92 | return { 93 | output: output, 94 | loss: getLoss(sent), 95 | }; 96 | } 97 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | const serial = sent.serialize(); 9 | let losses: Set = new Set(); 10 | 11 | if (serial.comments.length) 12 | losses.add("comments"); 13 | 14 | serial.tokens.forEach( 15 | token => {Object.keys(_.omit(token, FIELDS)).forEach(field => { 16 | switch (field) { 17 | case ("uuid"): 18 | case ("index"): 19 | case ("deps"): 20 | break; 21 | 22 | case ("heads"): 23 | if (token.heads.length > 1) 24 | losses.add(field); 25 | break; 26 | 27 | case ("feats"): 28 | case ("misc"): 29 | if (token[field] && token[field].length) 30 | losses.add(field); 31 | break; 32 | 33 | default: 34 | if (token[field as keyof TokenSerial]) 35 | losses.add(field); 36 | } 37 | }) 38 | 39 | }); 40 | 41 | return Array.from(losses); 42 | } 43 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "Brackets"; 2 | export * as fields from "./fields"; 3 | export {split} from "../default-splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/brackets/parser.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {detect} from "./detector"; 4 | import {DetectorError, ParserError} from "../../utils/errors"; 5 | import type {Options} from "../../nx/options"; 6 | import type {SentenceSerial} from "../../nx/sentence"; 7 | import type {TokenSerial} from "../../nx/base-token"; 8 | 9 | export function parse(text: string, options: Options): SentenceSerial { 10 | // console.log(); 11 | // console.log(text); 12 | 13 | options = { 14 | allowEmptyString: false, 15 | ...options, 16 | }; 17 | 18 | try { 19 | detect(text, options); 20 | } catch (e) { 21 | if (e instanceof DetectorError) 22 | throw new ParserError(e.message, text, options); 23 | 24 | throw e; 25 | } 26 | 27 | class _Sentence { 28 | input: string; 29 | options: Options; 30 | parent: _Token|null; 31 | root: _Token|null; 32 | comments: string[]; 33 | 34 | constructor(text: string, options: Options) { 35 | this.input = text; 36 | this.options = options; 37 | this.parent = null; 38 | this.root = null; 39 | this.comments = []; 40 | } 41 | 42 | serialize(): SentenceSerial { 43 | this.root.index(0); 44 | 45 | return { 46 | input: this.input, 47 | options: this.options, 48 | comments: this.comments, 49 | tokens: this.root.serialize([]) 50 | }; 51 | } 52 | 53 | push(token: _Token) { this.root = token; } 54 | } 55 | 56 | class _Token { 57 | parent: _Token; 58 | deprel: string|null; 59 | before: _Token[]; 60 | words: string[]; 61 | after: _Token[]; 62 | num: number|undefined; 63 | 64 | constructor(parent: _Token) { 65 | this.parent = parent; 66 | this.deprel = null; 67 | this.before = []; 68 | this.words = []; 69 | this.after = []; 70 | } 71 | 72 | eachBefore(callback: (token: _Token, index: number) => void): void { 73 | for (let i = 0; i < this.before.length; i++) { 74 | callback(this.before[i], i); 75 | } 76 | } 77 | 78 | eachAfter(callback: (token: _Token, index: number) => void): void { 79 | for (let i = 0; i < this.after.length; i++) { 80 | callback(this.after[i], i); 81 | } 82 | } 83 | 84 | index(num: number): number { 85 | this.eachBefore(before => { num = before.index(num); }); 86 | this.num = ++num; 87 | this.eachAfter(after => {num = after.index(num)}); 88 | 89 | return num; 90 | } 91 | 92 | serialize(tokens: TokenSerial[]): TokenSerial[] { 93 | this.eachBefore(before => { before.serialize(tokens); }); 94 | 95 | tokens.push({ 96 | form: this.form, 97 | heads: [{ 98 | index: this.parent.num || 0, 99 | deprel: this.deprel, 100 | }], 101 | index: this.num, 102 | }); 103 | 104 | this.eachAfter(after => { after.serialize(tokens); }); 105 | 106 | return tokens; 107 | } 108 | 109 | get form(): string { return this.words.join("_"); } 110 | 111 | push(token: _Token): void { 112 | if (this.words.length) { 113 | this.after.push(token); 114 | } else { 115 | this.before.push(token); 116 | } 117 | } 118 | 119 | addWord(word: string): void { 120 | if (!word) 121 | return; 122 | 123 | if (this.deprel) { 124 | this.words.push(word); 125 | } else { 126 | this.deprel = word; 127 | } 128 | } 129 | } 130 | 131 | let sent = new _Sentence(text, options); 132 | let parsing: _Sentence|_Token = sent; 133 | let parent: _Sentence|_Token|null = null; 134 | let word = ""; 135 | 136 | _.each(text, char => { 137 | switch (char) { 138 | case ("["): 139 | parent = parsing; 140 | parsing = new _Token(parent as _Token); 141 | if (parent && parent.push) 142 | parent.push(parsing) 143 | word = ""; 144 | break; 145 | 146 | case ("]"): 147 | if ((parsing as _Token).addWord) 148 | (parsing as _Token).addWord(word); 149 | parsing = parsing.parent; 150 | parent = parsing.parent; 151 | word = ""; 152 | break; 153 | 154 | case (" "): 155 | if ((parsing as _Token).addWord) 156 | (parsing as _Token).addWord(word); 157 | word = ""; 158 | break; 159 | 160 | default: 161 | word += char; 162 | break; 163 | } 164 | }); 165 | 166 | // console.log(sent.serialize()) 167 | return sent.serialize(); 168 | } 169 | -------------------------------------------------------------------------------- /src/notatrix/formats/cg3/detector.ts: -------------------------------------------------------------------------------- 1 | import * as re from "../../utils/regex"; 2 | import {DetectorError} from "../../utils/errors"; 3 | import {isJSONSerializable} from "../../utils/funcs"; 4 | import type {Options} from "../../nx/options"; 5 | 6 | export function detect(text: string, options: Options): string { 7 | options = { 8 | allowEmptyString: false, 9 | allowTrailingWhitespace: true, 10 | allowLeadingWhitespace: true, 11 | ...options, 12 | }; 13 | 14 | if (!text && !options.allowEmptyString) 15 | throw new DetectorError("Illegal CG3: empty string", text, options); 16 | 17 | if (isJSONSerializable(text)) 18 | throw new DetectorError("Illegal CG3: JSON object", text, options); 19 | 20 | // internal stuff 21 | let parsing: string|null = null; 22 | 23 | // iterate over the lines and check each one 24 | text.split(/\n/).forEach(line => { 25 | if (re.whiteline.test(line)) { 26 | if (parsing === null) { 27 | if (!options.allowLeadingWhitespace) 28 | throw new DetectorError("Illegal CG3: contains leading whitespace", 29 | text, options); 30 | 31 | } else { 32 | if (parsing !== "token-body" || !options.allowTrailingWhitespace) 33 | throw new DetectorError("Illegal CG3: contains trailing whitespace", 34 | text, options); 35 | } 36 | 37 | parsing = "whitespace"; 38 | 39 | } else if (re.comment.test(line)) { 40 | if (parsing === "token-start" || parsing === "token-body") 41 | throw new DetectorError( 42 | `Illegal CG3: invalid sequence ${parsing}=>comment`, text, options); 43 | 44 | parsing = "comment"; 45 | 46 | } else if (re.cg3TokenStart.test(line)) { 47 | if (parsing === "token-start") 48 | throw new DetectorError( 49 | `Illegal CG3: invalid sequence ${parsing}=>token-start`, text, 50 | options); 51 | 52 | parsing = "token-start"; 53 | 54 | } else if (re.cg3TokenContent.test(line)) { 55 | if (parsing === "comment" || parsing === "whitespace") 56 | throw new DetectorError( 57 | `Illegal CG3: invalid sequence ${parsing}=>token-body`, text, 58 | options); 59 | 60 | parsing = "token-body"; 61 | 62 | } else { 63 | throw new DetectorError(`Illegal CG3: unmatched line`, text, options); 64 | } 65 | }); 66 | 67 | return "CG3"; 68 | } 69 | -------------------------------------------------------------------------------- /src/notatrix/formats/cg3/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "semicolon", 3 | "index", 4 | "form", 5 | "lemma", 6 | "heads", 7 | "xpostag", 8 | "other", 9 | "analyses", 10 | ]; 11 | export const HAS_COMMENTS = true; 12 | -------------------------------------------------------------------------------- /src/notatrix/formats/cg3/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {fallback} from "../../utils/constants"; 3 | import {GeneratorError} from "../../utils/errors"; 4 | import {getLoss} from "./get-loss"; 5 | import {thin} from "../../utils/funcs"; 6 | import type {GenerateResult} from "../../base"; 7 | import type {Options} from "../../nx/options"; 8 | import type {Sentence} from "../../nx/sentence"; 9 | import type {BaseToken} from "../../nx/base-token"; 10 | 11 | export function generate(sent: Sentence, options: Options): GenerateResult { 12 | if (!sent.isParsed) 13 | return { 14 | output: null, 15 | loss: undefined, 16 | }; 17 | 18 | if (!sent || sent.name !== "Sentence") 19 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 20 | options); 21 | 22 | options = _.defaults(options, sent.options, { 23 | omitIndices: false, 24 | allowMissingLemma: true, 25 | }); 26 | 27 | sent.index(); 28 | 29 | let lines: string[] = []; 30 | sent.comments.forEach(comment => lines.push("# " + comment.body)); 31 | sent.tokens.forEach(token => { 32 | const isSet = 33 | (value: string) => { return value && value !== fallback ? value : null; }; 34 | 35 | const push = (token: BaseToken, indentLevel: number) => { 36 | if (!token.lemma && !options.allowMissingLemma) 37 | throw new GeneratorError(`Unable to generate, token has no lemma`, sent, 38 | options); 39 | 40 | const indent = (token.semicolon ? ";" : "") + "\t".repeat(indentLevel); 41 | 42 | const head = token.heads.first; 43 | const dependency = 44 | options.omitIndices 45 | ? null 46 | : "#" + token.indices.cg3 + "->" + 47 | (head == undefined ? "" : head.token.indices.cg3); 48 | 49 | let lineParts = 50 | [`"${isSet(token.lemma) || isSet(token.form) || fallback}"`] 51 | .concat(isSet(token.xpostag) || isSet(token.upostag)) 52 | .concat((token._feats || []).join(" ")) 53 | .concat((token._misc || []).join(" ")) 54 | .concat(head && isSet(head.deprel) ? "@" + head.deprel : null) 55 | .concat(dependency); 56 | 57 | const line = indent + lineParts.filter(thin).join(" "); 58 | lines.push(line); 59 | }; 60 | 61 | lines.push(`"<${token.form || fallback}>"`); 62 | 63 | if (token._analyses && token._analyses.length) { 64 | token._analyses.forEach(analysis => { 65 | analysis.subTokens.forEach((subToken, i) => { push(subToken, i + 1); }); 66 | }); 67 | 68 | } else { 69 | push(token, 1); 70 | } 71 | }); 72 | 73 | return { 74 | output: lines.join("\n"), 75 | loss: getLoss(sent), 76 | }; 77 | }; 78 | -------------------------------------------------------------------------------- /src/notatrix/formats/cg3/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | const serial = sent.serialize(); 9 | let losses: Set = new Set(); 10 | 11 | const tokenCalcLoss = (token: TokenSerial) => { 12 | if (token.heads && token.heads.length > 1) 13 | losses.add("enhanced dependencies"); 14 | 15 | Object.keys(_.omit(token, FIELDS)).forEach(field => { 16 | switch (field) { 17 | case ("uuid"): 18 | case ("index"): 19 | case ("deps"): 20 | case ("feats"): 21 | case ("misc"): 22 | break; 23 | 24 | case ("upostag"): 25 | if (token.xpostag && token.upostag) 26 | losses.add(field); 27 | break; 28 | 29 | case ("isEmpty"): 30 | if (token.isEmpty) 31 | losses.add(field); 32 | break; 33 | 34 | default: 35 | losses.add(field); 36 | } 37 | }); 38 | }; 39 | 40 | serial.tokens.map(token => { 41 | tokenCalcLoss(token); 42 | 43 | (token.analyses || []).forEach(analysis => { 44 | const analysisKeys = Object.keys(analysis); 45 | if (analysisKeys.length > 1 || analysisKeys[0] !== "subTokens") { 46 | losses.add("analyses"); 47 | } else { 48 | analysis.subTokens.map(subToken => { 49 | tokenCalcLoss(subToken); 50 | 51 | if (subToken.form != undefined) 52 | losses.add("form"); 53 | }); 54 | } 55 | }); 56 | }); 57 | 58 | return Array.from(losses); 59 | } 60 | -------------------------------------------------------------------------------- /src/notatrix/formats/cg3/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "cg3"; 2 | export * as fields from "./fields"; 3 | export {split} from "../default-splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/conllu/detector.ts: -------------------------------------------------------------------------------- 1 | import * as re from "../../utils/regex"; 2 | import {DetectorError} from "../../utils/errors"; 3 | import {isJSONSerializable} from "../../utils/funcs"; 4 | import type {Options} from "../../nx/options"; 5 | 6 | export function detect(text: string, options: Options): string { 7 | options = { 8 | allowEmptyString: false, 9 | requireTenParams: false, 10 | allowTrailingWhitespace: true, 11 | ...options, 12 | }; 13 | 14 | if (!text && !options.allowEmptyString) 15 | throw new DetectorError(`Illegal CoNLL-U: empty string`, text, options); 16 | 17 | if (isJSONSerializable(text)) 18 | throw new DetectorError(`Illegal CoNLL-U: JSON object`, text, options); 19 | 20 | // be more or less strict about the fields we require being set 21 | const tokenLine = options.requireTenParams ? re.conlluTokenLineTenParams 22 | : re.conlluTokenLine; 23 | 24 | // internal stuff 25 | let doneComments = false; 26 | let doneContent = false; 27 | 28 | // iterate over the lines and check each one 29 | const lines = text.split(/\n/); 30 | lines.forEach((line, i) => { 31 | if (re.comment.test(line)) { 32 | // can only have comments at the beginning 33 | if (doneComments) 34 | throw new DetectorError(`Illegal CoNLL-U: misplaced comment`, text, 35 | options); 36 | 37 | } else { 38 | // done parsing comments 39 | doneComments = true; 40 | 41 | if (line) { 42 | if (!tokenLine.test(line)) 43 | throw new DetectorError(`Illegal CoNLL-U: unmatched line`, text, 44 | options); 45 | 46 | if (doneContent) 47 | throw new DetectorError(`Illegal CoNLL-U: misplaced whitespace`, text, 48 | options); 49 | 50 | } else { 51 | // only allow empty lines after we've looked at all the content 52 | if (!options.allowTrailingWhitespace) 53 | throw new DetectorError( 54 | `Illegal CoNLL-U: contains trailing whitespace`, text, options); 55 | 56 | doneContent = true; 57 | } 58 | } 59 | }); 60 | 61 | return "CoNLL-U"; 62 | } 63 | -------------------------------------------------------------------------------- /src/notatrix/formats/conllu/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "isEmpty", 3 | "index", 4 | "form", 5 | "lemma", 6 | "upostag", 7 | "xpostag", 8 | "feats", 9 | "heads", 10 | "misc", 11 | "subTokens", 12 | ]; 13 | export const HAS_COMMENTS = true; 14 | -------------------------------------------------------------------------------- /src/notatrix/formats/conllu/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {fallback} from "../../utils/constants"; 3 | import {GeneratorError} from "../../utils/errors"; 4 | import {getLoss} from "./get-loss"; 5 | import type {GenerateResult} from "../../base"; 6 | import type {Options} from "../../nx/options"; 7 | import type {Sentence} from "../../nx/sentence"; 8 | import type {BaseToken} from "../../nx/base-token"; 9 | 10 | export function generate(sent: Sentence, options: Options): GenerateResult { 11 | if (!sent.isParsed) 12 | return { 13 | output: null, 14 | loss: undefined, 15 | }; 16 | 17 | if (!sent || sent.name !== "Sentence") 18 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 19 | options); 20 | 21 | options = _.defaults(options, sent.options, 22 | { 23 | 24 | }); 25 | 26 | sent.index(); 27 | 28 | let lines: string[] = []; 29 | sent.comments.forEach(comment => { lines.push("# " + comment.body); }); 30 | sent.tokens.forEach(token => { 31 | const toString = (token: BaseToken) => { 32 | const head = !token.isEmpty && token.heads.first; 33 | 34 | return [ 35 | 36 | token.indices.conllu, 37 | token.form || fallback, 38 | token.lemma || fallback, 39 | token.upostag || fallback, 40 | token.xpostag || fallback, 41 | token.feats || fallback, 42 | head ? head.token.indices.conllu : fallback, 43 | head && head.deprel ? head.deprel : fallback, 44 | token._getDeps("CoNLL-U").join("|") || fallback, 45 | token.misc || fallback, 46 | 47 | ].join("\t"); 48 | }; 49 | 50 | lines.push(toString(token)); 51 | token.subTokens.forEach(subToken => { lines.push(toString(subToken)); }); 52 | }); 53 | 54 | return { 55 | output: lines.join("\n"), 56 | loss: getLoss(sent), 57 | }; 58 | }; 59 | -------------------------------------------------------------------------------- /src/notatrix/formats/conllu/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | const serial = sent.serialize(); 9 | let losses: Set = new Set(); 10 | 11 | const tokenCalcLoss = (token: TokenSerial) => { 12 | if (token.heads.length > 1 && !sent.options.enhanced) 13 | losses.add("enhanced dependencies"); 14 | 15 | Object.keys(_.omit(token, FIELDS)).forEach(field => { 16 | switch (field) { 17 | case ("uuid"): 18 | case ("index"): 19 | case ("other"): 20 | break; 21 | 22 | case ("analyses"): 23 | if (token.analyses.length > 1) { 24 | losses.add("analyses"); 25 | } else { 26 | const analysis = token.analyses[0], 27 | analysisKeys = Object.keys(analysis); 28 | 29 | if (analysisKeys.length > 1 || analysisKeys[0] !== "subTokens") { 30 | losses.add("analyses"); 31 | } else { 32 | analysis.subTokens.map(tokenCalcLoss); 33 | } 34 | } 35 | break; 36 | 37 | default: 38 | losses.add(field); 39 | } 40 | }); 41 | }; 42 | 43 | serial.tokens.map(tokenCalcLoss); 44 | 45 | return Array.from(losses); 46 | } 47 | -------------------------------------------------------------------------------- /src/notatrix/formats/conllu/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "CoNLL-U"; 2 | export * as fields from "./fields"; 3 | export {split} from "../default-splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/default-splitter.ts: -------------------------------------------------------------------------------- 1 | import {Options} from "../nx/options"; 2 | import * as re from "../utils/regex"; 3 | import {thin} from "../utils/funcs"; 4 | 5 | export function split(text: string, options: Options): string[] { 6 | options = {trimChunks: true, ...options}; 7 | 8 | return text.split(re.multiNewlines) 9 | .map(chunk => { 10 | if (options.trimChunks) { 11 | return chunk.trim(); 12 | } else { 13 | return chunk; 14 | } 15 | }) 16 | .filter(thin); 17 | } 18 | -------------------------------------------------------------------------------- /src/notatrix/formats/index.ts: -------------------------------------------------------------------------------- 1 | import * as apertiumStream from "./apertium-stream"; 2 | import * as brackets from "./brackets"; 3 | import * as cg3 from "./cg3"; 4 | import * as conllu from "./conllu"; 5 | import * as notatrixSerial from "./notatrix-serial"; 6 | import * as params from "./params"; 7 | import * as plainText from "./plain-text"; 8 | import * as sd from "./sd"; 9 | 10 | import type {FormatByName} from "../base"; 11 | 12 | export const FORMAT_BY_NAME: FormatByName = { 13 | "apertium stream": apertiumStream, 14 | apertiumStream: apertiumStream, 15 | Brackets: brackets, 16 | brackets: brackets, 17 | CG3: cg3, 18 | cg3: cg3, 19 | "CoNLL-U": conllu, 20 | conllu: conllu, 21 | "notatrix serial": notatrixSerial, 22 | notatrixSerial: notatrixSerial, 23 | Params: params, 24 | params: params, 25 | "plain text": plainText, 26 | plainText: plainText, 27 | SD: sd, 28 | sd: sd, 29 | }; 30 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/detector.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {DetectorError} from "../../utils/errors"; 4 | import {isJSONSerializable} from "../../utils/funcs"; 5 | import {nxSentenceFields, nxSentenceTokensFields} from "../../utils/constants"; 6 | import type {Input} from "../../base"; 7 | import type {Options} from "../../nx/options"; 8 | import type {SentenceSerial} from "../../nx/sentence"; 9 | import type {TokenSerial} from "../../nx/base-token"; 10 | 11 | export function detect(textOrSerial: string|SentenceSerial, options: Options): string { 12 | options = { 13 | allowZeroTokens: true, 14 | allowZeroFields: true, 15 | ...options, 16 | }; 17 | 18 | function restrict(obj: O, fields: {[fieldName: string]: string}, allowUndefined: boolean = false): void { 19 | if (obj === undefined) 20 | throw new DetectorError(`Illegal notatrix serial: missing field`, obj as unknown as Input, 21 | options); 22 | 23 | // @ts-ignore: This is (probably) never true, since `omit()` returns an Object, not an Array. 24 | if (_.omit(obj, Object.keys(fields)).length) 25 | throw new DetectorError(`Illegal notatrix serial: unexpected field`, obj as unknown as Input, 26 | options); 27 | 28 | _.each(fields, (fieldType: string, fieldName: string) => { 29 | const value = obj[fieldName as keyof O]; 30 | 31 | switch (fieldType) { 32 | case ("number"): 33 | if (value !== undefined || !allowUndefined) 34 | if (isNaN(parseFloat(value as unknown as string))) 35 | throw new DetectorError( 36 | `Illegal notatrix serial: could not parse ${value} as float`, 37 | obj as unknown as Input, options); 38 | break; 39 | 40 | case ("string"): 41 | if (value !== undefined || !allowUndefined) 42 | if (typeof value !== "string") 43 | throw new DetectorError( 44 | `Illegal notatrix serial: expected 'string', got ${ 45 | typeof value}`, 46 | obj as unknown as Input, options); 47 | break; 48 | 49 | case ("string*"): 50 | if (value !== undefined || !allowUndefined) 51 | if (value !== null && typeof value !== "string") 52 | throw new DetectorError( 53 | `Illegal notatrix serial: expected 'string', got ${ 54 | typeof value}`, 55 | obj as unknown as Input, options); 56 | break; 57 | 58 | case ("object"): 59 | // pass 60 | break; 61 | 62 | case ("array"): 63 | if (value != undefined || !allowUndefined) 64 | if (!Array.isArray(value)) 65 | throw new DetectorError( 66 | `Illegal notatrix serial: expected Array, got ${typeof value}`, 67 | obj as unknown as Input, options); 68 | break; 69 | } 70 | }); 71 | } 72 | 73 | if (!isJSONSerializable(textOrSerial)) 74 | throw new DetectorError(`Illegal notatrix serial: not JSON object`, textOrSerial, 75 | options); 76 | 77 | const obj: SentenceSerial = typeof textOrSerial === "string" ? JSON.parse(textOrSerial) : textOrSerial; 78 | 79 | restrict(obj, nxSentenceFields); 80 | _.each(obj.comments, comment => { 81 | if (typeof comment !== "string") 82 | throw new DetectorError( 83 | `Illegal notatrix serial: comments should be strings`, obj, options); 84 | }); 85 | _.each(obj.tokens, 86 | token => { restrict(token, nxSentenceTokensFields, true); }); 87 | if (obj.tokens.length === 0 && !options.allowZeroTokens) 88 | throw new DetectorError( 89 | `Illegal notatrix serial: cannot have empty token list`, obj, options); 90 | 91 | _.each(obj.tokens, token => { 92 | if (Object.keys(token).length === 0 && !options.allowZeroFields) 93 | throw new DetectorError( 94 | `Illegal notatrix serial: cannot have token without fields`, obj, 95 | options); 96 | 97 | if (token.analyses) 98 | _.each(token.analyses, analysis => { 99 | const analysisKeys = Object.keys(analysis); 100 | if (analysisKeys.length !== 1 || analysisKeys[0] !== "subTokens") 101 | throw new DetectorError( 102 | `Illegal notatrix serial: got unexpected analyses field`, obj, 103 | options); 104 | 105 | _.each(analysis.subTokens, subToken => { 106 | restrict(subToken, nxSentenceTokensFields, true); 107 | if (subToken.analyses !== undefined) 108 | throw new DetectorError( 109 | `Illegal notatrix serial: subTokens can only have one analysis`, 110 | obj, options); 111 | }); 112 | }); 113 | }); 114 | 115 | return "notatrix serial"; 116 | } 117 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS: string[] = []; // TODO: Implement this? 2 | export const HAS_COMMENTS = true; 3 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {GeneratorError} from "../../utils/errors"; 3 | import {getLoss} from "./get-loss"; 4 | import type {GenerateResult} from "../../base"; 5 | import type {Options} from "../../nx/options"; 6 | import type {Sentence, SentenceSerial} from "../../nx/sentence"; 7 | 8 | export function generate(sent: Sentence, options: Options): GenerateResult { 9 | if (!sent.isParsed) 10 | return { 11 | output: null, 12 | loss: undefined, 13 | }; 14 | 15 | if (!sent || sent.name !== "Sentence") 16 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 17 | options); 18 | 19 | options = _.defaults(options, sent.options, 20 | { 21 | 22 | }); 23 | 24 | sent.index(); 25 | 26 | return { 27 | output: sent.serialize(), 28 | loss: getLoss(sent), 29 | }; 30 | }; 31 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/get-loss.ts: -------------------------------------------------------------------------------- 1 | import type {Sentence} from "../../nx/sentence"; 2 | 3 | export function getLoss(sent: Sentence): string[] { 4 | // do nothing, can't lose info on this one 5 | return []; 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "notatrix serial"; 2 | export * as fields from "./fields"; 3 | export {split} from "./splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/parser.ts: -------------------------------------------------------------------------------- 1 | import {detect} from "./detector"; 2 | import {DetectorError, ParserError} from "../../utils/errors"; 3 | import type {Options} from "../../nx/options"; 4 | import type {SentenceSerial} from "../../nx/sentence"; 5 | 6 | export function parse(textOrSerial: string|SentenceSerial, options: Options): SentenceSerial { 7 | try { 8 | detect(textOrSerial, options); 9 | } catch (e) { 10 | if (e instanceof DetectorError) 11 | throw new ParserError(e.message, textOrSerial, options); 12 | 13 | throw e; 14 | } 15 | 16 | return textOrSerial as SentenceSerial; 17 | } 18 | -------------------------------------------------------------------------------- /src/notatrix/formats/notatrix-serial/splitter.ts: -------------------------------------------------------------------------------- 1 | import {Options} from "../../nx/options"; 2 | import {SplitterError} from "../../utils/errors"; 3 | 4 | export function split(text: string, options: Options): void { 5 | throw new SplitterError("Can't split notatrix serial", text, options); 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/detector.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {DetectorError} from "../../utils/errors"; 4 | import {isJSONSerializable} from "../../utils/funcs"; 5 | import {fields} from "../../utils/constants"; 6 | import type {Options} from "../../nx/options"; 7 | 8 | export function detect(textOrArray: string|any[], options: Options): string { 9 | options = { 10 | allowEmptyList: false, 11 | allowTrailingWhitespace: true, 12 | allowLeadingWhitespace: true, 13 | ...options, 14 | }; 15 | 16 | if (!isJSONSerializable(textOrArray)) 17 | throw new DetectorError(`Illegal Params: not JSON object`, textOrArray, options); 18 | 19 | const obj: any[] = typeof textOrArray === "string" ? JSON.parse(textOrArray) : textOrArray; 20 | 21 | if (Array.isArray(obj)) { 22 | if (!obj.length && !options.allowEmptyList) 23 | throw new DetectorError(`Illegal Params: contains no tokens`, obj, 24 | options); 25 | 26 | obj.forEach(obj => { 27 | const omitted = Object.keys(_.omit(obj, fields)); 28 | if (omitted.length) 29 | throw new DetectorError( 30 | `Illegal Params: contains illegal keys (${omitted.join(", ")})`, 31 | obj, options); 32 | 33 | const picked = Object.keys(_.pick(obj, fields)); 34 | if (!picked.length) 35 | throw new DetectorError(`Illegal Params: missing required keys`, obj, 36 | options); 37 | }); 38 | 39 | } else { 40 | throw new DetectorError( 41 | `Illegal Params: expected array of parameters, got ${typeof obj}`, obj, 42 | options) 43 | } 44 | 45 | return "Params"; 46 | } 47 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "isEmpty", 3 | "index", 4 | "form", 5 | "lemma", 6 | "upostag", 7 | "xpostag", 8 | "feats", 9 | "head", 10 | "deprel", 11 | "deps", 12 | "misc", 13 | ]; 14 | export const HAS_COMMENTS = false; 15 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {GeneratorError} from "../../utils/errors"; 3 | import {getLoss} from "./get-loss"; 4 | import type {GenerateResult, ParamsOutput} from "../../base"; 5 | import type {Options} from "../../nx/options"; 6 | import type {Sentence} from "../../nx/sentence"; 7 | 8 | export function generate(sent: Sentence, options: Options): GenerateResult[]> { 9 | if (!sent.isParsed) 10 | return { 11 | output: null, 12 | loss: undefined, 13 | }; 14 | 15 | if (!sent || sent.name !== "Sentence") 16 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 17 | options); 18 | 19 | options = _.defaults(options, sent.options, 20 | { 21 | 22 | }); 23 | 24 | sent.index(); 25 | 26 | const output = sent.tokens.map(token => { 27 | if (token.analysis) 28 | throw new GeneratorError( 29 | "Unable to generate, contains ambiguous analyses or multiword tokens", sent, options); 30 | 31 | let params: ParamsOutput = { 32 | form: token.form, 33 | lemma: token.lemma, 34 | upostag: token.upostag, 35 | xpostag: token.xpostag, 36 | feats: token.feats, 37 | misc: token.misc, 38 | head: token.getHead(), 39 | }; 40 | 41 | return _.pick(params, value => value != undefined); 42 | }); 43 | 44 | return { 45 | output: output, 46 | loss: getLoss(sent), 47 | }; 48 | } 49 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | 6 | export function getLoss(sent: Sentence): string[] { 7 | const serial = sent.serialize(); 8 | let losses: Set = new Set(); 9 | 10 | if (serial.comments.length) 11 | losses.add("comments"); 12 | 13 | serial.tokens.forEach( 14 | token => {Object.keys(_.omit(token, FIELDS)).forEach(field => { 15 | switch (field) { 16 | case ("uuid"): 17 | case ("index"): 18 | break; 19 | 20 | case ("heads"): 21 | if (token.heads.length > 1) 22 | losses.add("enhanced dependencies"); 23 | break; 24 | 25 | default: 26 | losses.add(field); 27 | } 28 | })}); 29 | 30 | return Array.from(losses); 31 | } 32 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "Params"; 2 | export * as fields from "./fields"; 3 | export {split} from "./splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/parser.ts: -------------------------------------------------------------------------------- 1 | import {detect} from "./detector"; 2 | import {DetectorError, ParserError} from "../../utils/errors"; 3 | import type {Options} from "../../nx/options"; 4 | import type {SentenceSerial} from "../../nx/sentence"; 5 | 6 | export function parse(textOrArray: string|any[], options: Options): SentenceSerial { 7 | try { 8 | detect(textOrArray, options); 9 | } catch (e) { 10 | if (e instanceof DetectorError) 11 | throw new ParserError(e.message, textOrArray, options); 12 | 13 | throw e; 14 | } 15 | 16 | return { 17 | input: JSON.stringify(textOrArray), 18 | options: options, 19 | comments: [], 20 | tokens: (textOrArray as any[]).map((token, i) => { 21 | token.index = `${i}`; 22 | return token; 23 | }), 24 | }; 25 | } 26 | -------------------------------------------------------------------------------- /src/notatrix/formats/params/splitter.ts: -------------------------------------------------------------------------------- 1 | import {Options} from "../../nx/options"; 2 | import {SplitterError} from "../../utils/errors"; 3 | 4 | export function split(text: string, options: Options): void { 5 | throw new SplitterError("Can't split Params", text, options); 6 | } 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/detector.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import * as re from "../../utils/regex"; 4 | import {DetectorError} from "../../utils/errors"; 5 | import {isJSONSerializable} from "../../utils/funcs"; 6 | import type {Options} from "../../nx/options"; 7 | 8 | export function detect(text: string, options: Options): string { 9 | options = { 10 | allowEmptyString: true, 11 | allowNewlines: false, 12 | bracketsAllowanceTreshold: 0.2, // set to <0 or >1 to avoid 13 | ...options, 14 | }; 15 | 16 | /* 17 | if (!text && !options.allowEmptyString) 18 | throw new DetectorError(`Illegal plain text: empty string`, text, 19 | options); 20 | */ 21 | 22 | if (isJSONSerializable(text)) 23 | throw new DetectorError(`Illegal plain text: JSON object`, text, options); 24 | 25 | if (/\n/.test(text) && !options.allowNewlines) 26 | throw new DetectorError(`Illegal plain text: contains newlines`, text, 27 | options); 28 | 29 | if (options.bracketsAllowanceTreshold >= 0) { 30 | const numWords = text.split(re.whitespace).length; 31 | const numBrackets = (text.match(/[\[\]]/g) || []).length; 32 | const ratio = numBrackets / numWords; 33 | 34 | if (ratio > options.bracketsAllowanceTreshold) 35 | throw new DetectorError( 36 | `Illegal plain text: contains too many brackets (${ratio})`, text, 37 | options); 38 | } 39 | 40 | return "plain text"; 41 | } 42 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "form", 3 | ]; 4 | export const HAS_COMMENTS = false; 5 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import * as re from "../../utils/regex"; 3 | import {GeneratorError} from "../../utils/errors"; 4 | import {getLoss} from "./get-loss"; 5 | import type {GenerateResult} from "../../base"; 6 | import type {Options} from "../../nx/options"; 7 | import type {Sentence} from "../../nx/sentence"; 8 | 9 | export function generate(sent: Sentence, options: Options): GenerateResult { 10 | if (!sent.isParsed) 11 | return { 12 | output: null, 13 | loss: undefined, 14 | }; 15 | 16 | if (!sent || sent.name !== "Sentence") 17 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 18 | options); 19 | 20 | options = _.defaults(options, sent.options, 21 | { 22 | 23 | }); 24 | 25 | sent.index(); 26 | 27 | const output = 28 | sent.tokens 29 | .map(token => { 30 | return token.isSuperToken 31 | ? token.subTokens.map(subToken => subToken.value) 32 | .join(" ") 33 | : token.form; 34 | }) 35 | .join(" ") 36 | .replace(re.spaceBeforePunctuation, "$1"); 37 | 38 | return { 39 | output: output, 40 | loss: getLoss(sent), 41 | }; 42 | } 43 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | const serial = sent.serialize(); 9 | let losses: Set = new Set(); 10 | 11 | if (serial.comments.length) 12 | losses.add("comments"); 13 | 14 | serial.tokens.forEach( 15 | token => {Object.keys(_.omit(token, FIELDS)).forEach(field => { 16 | switch (field) { 17 | case ("uuid"): 18 | case ("index"): 19 | break; 20 | 21 | case ("feats"): 22 | case ("misc"): 23 | if (token[field] && token[field].length) 24 | losses.add(field); 25 | break; 26 | 27 | case ("heads"): 28 | if (token.heads.length) 29 | losses.add("heads"); 30 | break; 31 | 32 | default: 33 | if (token[field as keyof TokenSerial]) 34 | losses.add(field); 35 | } 36 | })}); 37 | 38 | return Array.from(losses); 39 | } 40 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "plain text"; 2 | export * as fields from "./fields"; 3 | export {split} from "./splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/parser.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import * as re from "../../utils/regex"; 4 | import {detect} from "./detector"; 5 | import {DetectorError, ParserError} from "../../utils/errors"; 6 | import {thin} from "../../utils/funcs"; 7 | import type {Options} from "../../nx/options"; 8 | import type {SentenceSerial} from "../../nx/sentence"; 9 | 10 | export function parse(text: string|undefined, options: Options): SentenceSerial { 11 | options = { 12 | allowEmptyString: true, 13 | ...options, 14 | }; 15 | 16 | text = text || ""; 17 | 18 | try { 19 | detect(text, options); 20 | } catch (e) { 21 | if (e instanceof DetectorError) 22 | throw new ParserError(e.message, text, options); 23 | 24 | throw e; 25 | } 26 | 27 | // console.log(); 28 | // console.log(text); 29 | 30 | let chunks = []; 31 | let word = ""; 32 | 33 | _.each(text, (char, i) => { 34 | if (re.whitespace.test(char)) { 35 | chunks.push(word); 36 | word = ""; 37 | 38 | } else if (re.punctuation.test(char)) { 39 | if (!re.allPunctuation.test(word)) { 40 | chunks.push(word); 41 | word = ""; 42 | } 43 | word += char; 44 | 45 | } else { 46 | word += char; 47 | } 48 | }); 49 | 50 | chunks.push(word); 51 | 52 | // console.log(chunks); 53 | 54 | let tokens = chunks.filter(thin).map((chunk, i) => { 55 | return { 56 | form: chunk, 57 | index: i, 58 | }; 59 | }); 60 | 61 | // console.log(comments); 62 | // console.log(tokens); 63 | 64 | return { 65 | input: text, 66 | options: options, 67 | comments: [], 68 | tokens: tokens, 69 | }; 70 | } 71 | -------------------------------------------------------------------------------- /src/notatrix/formats/plain-text/splitter.ts: -------------------------------------------------------------------------------- 1 | import {Options} from "../../nx/options"; 2 | import * as re from "../../utils/regex"; 3 | import {thin} from "../../utils/funcs"; 4 | 5 | export function split(text: string, options: Options): string[] { 6 | options = {trimChunks: true, ...options}; 7 | 8 | return text.split(re.sentenceThenPunctuation) 9 | .map(chunk => { 10 | if (options.trimChunks) { 11 | return chunk.trim(); 12 | } else { 13 | return chunk; 14 | } 15 | }) 16 | .filter(thin); 17 | } 18 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/detector.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import * as re from "../../utils/regex"; 4 | import {DetectorError} from "../../utils/errors"; 5 | import {isJSONSerializable} from "../../utils/funcs"; 6 | import type {Options} from "../../nx/options"; 7 | 8 | export function detect(text: string, options: Options): string { 9 | options = { 10 | allowEmptyString: false, 11 | allowLeadingWhitespace: true, 12 | allowBookendWhitespace: true, 13 | allowTrailingWhitespace: true, 14 | allowNoDependencies: false, 15 | ...options, 16 | }; 17 | 18 | if (!text && !options.allowEmptyString) 19 | throw new DetectorError(`Illegal SD: empty string`, text, options); 20 | 21 | if (isJSONSerializable(text)) 22 | throw new DetectorError(`Illegal SD: JSON object`, text, options); 23 | 24 | // be more or less strict about whitespace 25 | const dependencyRegex = options.allowBookendWhitespace 26 | ? re.sdDependency 27 | : re.sdDependencyNoWhitespace; 28 | 29 | // internal stuff 30 | let parsingDeps = false; 31 | let parsingWhitespace = false; 32 | let parsedDeps = 0; 33 | 34 | const lines = text.split(/\n/); 35 | lines.forEach((line, i) => { 36 | if (re.whiteline.test(line)) { 37 | if (parsingDeps) { 38 | if (!options.allowTrailingWhitespace) 39 | throw new DetectorError(`Illegal SD: contains trailing whitespace`, 40 | text, options); 41 | 42 | } else { 43 | if (!options.allowLeadingWhitespace) 44 | throw new DetectorError(`Illegal SD: contains leading whitespace`, 45 | text, options); 46 | } 47 | } 48 | 49 | if (re.comment.test(line)) { 50 | } else if (!parsingDeps) { 51 | if (dependencyRegex.test(line)) 52 | throw new DetectorError(`Illegal SD: missing text line`, text, options); 53 | 54 | parsingDeps = true; 55 | 56 | } else if (!dependencyRegex.test(line)) { 57 | throw new DetectorError(`Illegal SD: expected dependency line`, text, 58 | options); 59 | 60 | } else { 61 | parsedDeps += 1; 62 | } 63 | }); 64 | 65 | if (parsedDeps === 0 && !options.allowNoDependencies) 66 | throw new DetectorError(`Illegal SD: contains no dependencies`, text, 67 | options); 68 | 69 | return "SD"; 70 | } 71 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/fields.ts: -------------------------------------------------------------------------------- 1 | export const FIELDS = [ 2 | "form", 3 | "heads", 4 | ]; 5 | export const HAS_COMMENTS = true; 6 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/generator.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import {generate as generateText} from "../plain-text/generator"; 3 | import {GeneratorError} from "../../utils/errors"; 4 | import {getLoss} from "./get-loss"; 5 | import type {GenerateResult} from "../../base"; 6 | import type {Options} from "../../nx/options"; 7 | import type {Sentence} from "../../nx/sentence"; 8 | 9 | export function generate(sent: Sentence, options: Options): GenerateResult { 10 | if (!sent.isParsed) 11 | return { 12 | output: null, 13 | loss: undefined, 14 | }; 15 | 16 | if (!sent || sent.name !== "Sentence") 17 | throw new GeneratorError(`Unable to generate, input not a Sentence`, sent, 18 | options); 19 | 20 | options = _.defaults(options, sent.options, 21 | { 22 | 23 | }); 24 | 25 | sent.index(); 26 | 27 | let lines = []; 28 | sent.comments.forEach(comment => { lines.push("# " + comment.body); }); 29 | 30 | lines.push(generateText(sent, {}).output); 31 | 32 | [sent.root].concat(sent.tokens).forEach(token => { 33 | token.mapDependents(dependent => { 34 | lines.push( 35 | `${dependent.deprel || "_"}(${token.form}, ${dependent.token.form})`); 36 | }); 37 | }); 38 | 39 | /* 40 | sent.root.mapDependents(dependent => lines.push(`${dependent.deprel}(${})`)) 41 | if (sent.root) 42 | lines.push(`root(ROOT, ${sent.root.form})`); 43 | 44 | sent.tokens.forEach(token => { 45 | 46 | if (token._head && token.deprel && token._head.name !== 'RootToken') 47 | lines.push(`${token.deprel}(${token._head.form}, ${token.form})`); 48 | 49 | }); 50 | */ 51 | 52 | return { 53 | output: lines.join("\n"), 54 | loss: getLoss(sent), 55 | }; 56 | } 57 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/get-loss.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import {FIELDS} from "./fields"; 4 | import type {Sentence} from "../../nx/sentence"; 5 | import type {TokenSerial} from "../../nx/base-token"; 6 | 7 | export function getLoss(sent: Sentence): string[] { 8 | const serial = sent.serialize(); 9 | let losses: Set = new Set(); 10 | 11 | serial.tokens.forEach(token => { 12 | if (token.heads && token.heads.length > 1) 13 | losses.add("enhanced dependencies"); 14 | 15 | Object.keys(_.omit(token, FIELDS)).forEach(field => { 16 | switch (field) { 17 | case ("uuid"): 18 | case ("index"): 19 | case ("deps"): 20 | break; 21 | 22 | case ("heads"): 23 | if (token.heads.length > 1) 24 | losses.add(field); 25 | break; 26 | 27 | case ("feats"): 28 | case ("misc"): 29 | if (token[field] && token[field].length) 30 | losses.add(field); 31 | break; 32 | 33 | default: 34 | if (token[field as keyof TokenSerial]) 35 | losses.add(field); 36 | } 37 | }) 38 | }); 39 | 40 | return Array.from(losses); 41 | } 42 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/index.ts: -------------------------------------------------------------------------------- 1 | export const name = "SD"; 2 | export * as fields from "./fields"; 3 | export {split} from "../default-splitter"; 4 | export {detect} from "./detector"; 5 | export {parse} from "./parser"; 6 | export {generate} from "./generator"; 7 | -------------------------------------------------------------------------------- /src/notatrix/formats/sd/parser.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import * as re from "../../utils/regex"; 4 | import {detect} from "./detector"; 5 | import {DetectorError, ParserError} from "../../utils/errors"; 6 | import {parse as parseText} from "../plain-text/parser"; 7 | import type {Options} from "../../nx/options"; 8 | import type {SentenceSerial} from "../../nx/sentence"; 9 | import type {TokenSerial} from "../../nx/base-token"; 10 | 11 | interface CommentChunk { 12 | type: "comment"; 13 | body: string; 14 | } 15 | 16 | interface DependencyChunk { 17 | type: "dependency"; 18 | deprel: string; 19 | head: string; 20 | dep: string; 21 | } 22 | 23 | interface TextChunk { 24 | type: "text"; 25 | body: string; 26 | } 27 | 28 | type Chunk = CommentChunk|DependencyChunk|TextChunk; 29 | 30 | export function parse(text: string|undefined, options: Options): SentenceSerial { 31 | function getTokenIndexFromString(tokens: TokenSerial[], token: string): number|null { 32 | for (let i = 0; i < tokens.length; i++) { 33 | if (tokens[i].form.toLowerCase() === token.toLowerCase()) 34 | return i; 35 | } 36 | 37 | return null; 38 | } 39 | 40 | // console.log(); 41 | // console.log(text); 42 | 43 | options = { 44 | allowEmptyString: false, 45 | allowBookendWhitespace: true, 46 | allowWhiteLines: true, 47 | ...options, 48 | }; 49 | 50 | try { 51 | detect(text, options); 52 | } catch (e) { 53 | if (e instanceof DetectorError) 54 | throw new ParserError(e.message, text, options); 55 | 56 | throw e; 57 | } 58 | 59 | const lines = text.split("\n"); 60 | const depRegex = options.allowBookendWhitespace 61 | ? re.sdDependencyNoWhitespace 62 | : re.sdDependency; 63 | 64 | let chunks: Chunk[] = []; 65 | lines.forEach(line => { 66 | const whiteline = line.match(re.whiteline), 67 | comment = line.match(re.comment), dep = line.match(depRegex); 68 | 69 | if (whiteline) { 70 | } else if (comment) { 71 | chunks.push({type: "comment", body: comment[2]}); 72 | 73 | } else if (dep) { 74 | chunks.push( 75 | {type: "dependency", deprel: dep[1], head: dep[2], dep: dep[3]}); 76 | 77 | } else { 78 | chunks.push({ 79 | type: "text", 80 | body: line, 81 | }); 82 | } 83 | }); 84 | 85 | // console.log(chunks); 86 | 87 | let tokens: TokenSerial[]; 88 | let comments: string[] = []; 89 | let expecting = ["comment", "text"]; 90 | 91 | chunks.forEach(chunk => { 92 | if (expecting.indexOf(chunk.type) === -1) 93 | throw new ParserError( 94 | `expecting ${expecting.join("|")}, got ${chunk.type}`, text, options); 95 | 96 | if (chunk.type === "comment") { 97 | comments.push(chunk.body); 98 | expecting = ["comment", "text"]; 99 | 100 | } else if (chunk.type === "text") { 101 | tokens = parseText(chunk.body, options).tokens; 102 | expecting = ["dependency"]; 103 | 104 | } else if (chunk.type === "dependency") { 105 | let index = getTokenIndexFromString(tokens, chunk.dep); 106 | if (index === null) 107 | throw new ParserError(`unable to find token with form ${chunk.dep}`, 108 | text, options); 109 | 110 | tokens[index].heads = [{ 111 | index: getTokenIndexFromString(tokens, chunk.head), 112 | deprel: chunk.deprel, 113 | }]; 114 | expecting = ["dependency"]; 115 | } 116 | }); 117 | 118 | // console.log(comments); 119 | // console.log(tokens); 120 | 121 | return { 122 | input: text, 123 | options: options, 124 | comments: comments, 125 | tokens: tokens, 126 | }; 127 | } 128 | -------------------------------------------------------------------------------- /src/notatrix/generator.ts: -------------------------------------------------------------------------------- 1 | import {FORMAT_BY_NAME} from "./formats"; 2 | import type {GenerateByName} from "./base"; 3 | 4 | export const GENERATE_BY_NAME: GenerateByName = { 5 | "apertium stream": FORMAT_BY_NAME.apertiumStream.generate, 6 | apertiumStream: FORMAT_BY_NAME.apertiumStream.generate, 7 | Brackets: FORMAT_BY_NAME.brackets.generate, 8 | brackets: FORMAT_BY_NAME.brackets.generate, 9 | CG3: FORMAT_BY_NAME.cg3.generate, 10 | cg3: FORMAT_BY_NAME.cg3.generate, 11 | "CoNLL-U": FORMAT_BY_NAME.conllu.generate, 12 | conllu: FORMAT_BY_NAME.conllu.generate, 13 | "notatrix serial": FORMAT_BY_NAME.notatrixSerial.generate, 14 | notatrixSerial: FORMAT_BY_NAME.notatrixSerial.generate, 15 | Params: FORMAT_BY_NAME.params.generate, 16 | params: FORMAT_BY_NAME.params.generate, 17 | "plain text": FORMAT_BY_NAME.plainText.generate, 18 | plainText: FORMAT_BY_NAME.plainText.generate, 19 | SD: FORMAT_BY_NAME.sd.generate, 20 | sd: FORMAT_BY_NAME.sd.generate, 21 | }; 22 | -------------------------------------------------------------------------------- /src/notatrix/index.ts: -------------------------------------------------------------------------------- 1 | export {ConverterError} from "./utils/errors"; 2 | export {DBError} from "./utils/errors"; 3 | export {DetectorError} from "./utils/errors"; 4 | export {GeneratorError} from "./utils/errors"; 5 | export {NotatrixError} from "./utils/errors"; 6 | export {NxError} from "./utils/errors"; 7 | export {ParserError} from "./utils/errors"; 8 | export {SplitterError} from "./utils/errors"; 9 | export {ToolError} from "./utils/errors"; 10 | 11 | export {Analysis} from "./nx/analysis"; 12 | export {BaseToken, ConlluIndex, Cg3Index} from "./nx/base-token"; 13 | export {Comment} from "./nx/comment"; 14 | export {Corpus, CorpusSerial} from "./nx/corpus"; 15 | export {Labeler} from "./nx/labeler"; 16 | export {NxBaseClass} from "./nx/base-class"; 17 | export {RelationItem, RelationSet} from "./nx/relation-set"; 18 | export {RootToken} from "./nx/root-token"; 19 | export {Sentence, SentenceSerial} from "./nx/sentence"; 20 | export {SubToken} from "./nx/sub-token"; 21 | export {Token} from "./nx/token"; 22 | 23 | export * as constants from "./utils/constants"; 24 | export * as errors from "./utils/errors"; 25 | export * as formats from "./formats"; 26 | export * as funcs from "./utils/funcs"; 27 | export * as regex from "./utils/regex"; 28 | export {detect, DETECT_BY_NAME as detectAs} from "./detector"; 29 | export {GENERATE_BY_NAME as generate} from "./generator"; 30 | export {parse, PARSE_BY_NAME as parseAs} from "./parser"; 31 | export {split} from "./splitter"; 32 | -------------------------------------------------------------------------------- /src/notatrix/nx/analysis.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import {NxBaseClass} from "./base-class"; 4 | import {SubToken} from "./sub-token"; 5 | import {TokenSerial} from "./base-token"; 6 | import type {Sentence} from "./sentence"; 7 | 8 | export interface AnalysisSerial { 9 | subTokens: TokenSerial[]; 10 | } 11 | 12 | /** 13 | * Abstraction over a CG3 analysis. Most sentences have just one of these for 14 | * each token. 15 | */ 16 | export class Analysis extends NxBaseClass { 17 | _subTokens: SubToken[]; 18 | 19 | constructor(sent: Sentence, serial: AnalysisSerial) { 20 | super("Analysis"); 21 | this._subTokens = 22 | (serial.subTokens || []).map(sub => new SubToken(sent, sub)); 23 | } 24 | 25 | get subTokens(): SubToken[] { return this._subTokens; } 26 | } 27 | -------------------------------------------------------------------------------- /src/notatrix/nx/base-class.ts: -------------------------------------------------------------------------------- 1 | export class NxBaseClass { 2 | name: string; 3 | constructor(name: string) { this.name = name; } 4 | }; 5 | -------------------------------------------------------------------------------- /src/notatrix/nx/comment.ts: -------------------------------------------------------------------------------- 1 | import * as re from "../utils/regex"; 2 | import {NxBaseClass} from "./base-class"; 3 | import {Label} from "./label"; 4 | import type {Sentence} from "./sentence"; 5 | 6 | /** 7 | * Abstraction over a CoNLL-U or CG3 comment, allows us to extract and then 8 | * manipulate data in some useful ways across a Corpus. 9 | */ 10 | export class Comment extends NxBaseClass { 11 | id: string|undefined; 12 | type: string; 13 | body: string; 14 | labels: string[]|undefined; 15 | 16 | constructor(sent: Sentence, body: string) { 17 | super("Comment"); 18 | 19 | this.type = "normal"; 20 | this.body = body; 21 | 22 | const label = body.match(re.commentLabel), 23 | sentId = body.match(re.commentSentId); 24 | 25 | if (label) { 26 | let labels: string[] = []; 27 | label[3].split(/\s/).forEach(label => { 28 | if (label && labels.indexOf(label) === -1) 29 | labels.push(label) 30 | }); 31 | 32 | this.type = "label"; 33 | this.labels = labels; 34 | 35 | } else if (sentId) { 36 | this.type = "sent-id"; 37 | this.id = sentId[2]; 38 | } 39 | } 40 | 41 | serialize(): string { return this.body; } 42 | } 43 | -------------------------------------------------------------------------------- /src/notatrix/nx/options.ts: -------------------------------------------------------------------------------- 1 | export interface Options { 2 | allowBookendWhitespace?: boolean; 3 | allowEmptyList?: boolean; 4 | allowEmptyString?: boolean; 5 | allowLeadingWhitespace?: boolean; 6 | allowMissingIndices?: boolean; 7 | allowMissingLemma?: boolean; 8 | allowNewlines?: boolean; 9 | allowNoDependencies?: boolean; 10 | allowTrailingWhitespace?: boolean; 11 | allowWhiteLines?: boolean; 12 | allowZeroFields?: boolean; 13 | allowZeroTokens?: boolean; 14 | bracketsAllowanceTreshold?: number; 15 | coerceMultipleSpacesAfterSemicolonToTab?: boolean; 16 | enhanced?: boolean; 17 | equalizeWhitespace?: boolean; 18 | indentString?: string|RegExp|null, 19 | interpretAs?: string; 20 | omitIndices?: boolean; 21 | requireOne?: boolean; 22 | requireOneMatch?: boolean; 23 | requireTenParams?: boolean; 24 | returnAllMatches?: boolean; 25 | returnAllPossibilities?: boolean; 26 | spacesPerTab?: null, 27 | suppressDetectorErrors?: boolean; 28 | suppressParserErrors?: boolean; 29 | trimChunks?: boolean; 30 | useTabIndent?: boolean; 31 | } 32 | -------------------------------------------------------------------------------- /src/notatrix/nx/relation-set.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import {NxBaseClass} from "./base-class"; 4 | import type {BaseToken} from "./base-token"; 5 | 6 | export interface RelationItem { 7 | token: BaseToken; 8 | deprel: string; 9 | } 10 | 11 | type PartnerKind = "heads"|"dependents"; 12 | 13 | export class RelationSet extends NxBaseClass { 14 | token: BaseToken; 15 | partner: PartnerKind; 16 | _items: RelationItem[]; 17 | 18 | constructor(token: BaseToken, partner: PartnerKind) { 19 | super("RelationSet"); 20 | this.token = token; 21 | this.partner = partner; 22 | this._items = []; 23 | } 24 | 25 | get length(): number { return this._items.length; } 26 | 27 | get first(): RelationItem|null { return this._items[0] || null; } 28 | 29 | map(callback: (item: RelationItem, index?: number) => T): T[] { return this._items.map(callback); } 30 | 31 | has(token: BaseToken): boolean { 32 | let has = false; 33 | this.map(item => { 34 | if (item.token === token) 35 | has = true; 36 | }); 37 | 38 | return has; 39 | } 40 | 41 | add(token: BaseToken, deprel: string, origin: boolean = true): boolean { 42 | if (this.has(token)) { 43 | this.modify(token, deprel); 44 | return false; 45 | } 46 | 47 | this._items.push({ 48 | token: token, 49 | deprel: deprel, 50 | }); 51 | 52 | if (origin) 53 | (token[this.partner] as RelationSet).add(this.token, deprel, false); 54 | 55 | return true; 56 | } 57 | 58 | modify(token: BaseToken, deprel: string, origin: boolean = true): boolean { 59 | if (!this.has(token)) 60 | return false; 61 | 62 | let ret; 63 | this.map(item => { 64 | if (item.token === token) { 65 | ret = item.deprel !== deprel; 66 | item.deprel = deprel; 67 | } 68 | }); 69 | 70 | if (origin) 71 | token[this.partner].modify(this.token, deprel, false); 72 | 73 | return ret; 74 | } 75 | 76 | remove(token: BaseToken, origin: boolean = true): RelationItem|null { 77 | let at = -1; 78 | 79 | this.map((item, i) => { 80 | if (item.token === token) 81 | at = i; 82 | }); 83 | 84 | if (at === -1) 85 | return null; 86 | 87 | const removed = this._items.splice(at, 1)[0]; 88 | 89 | if (origin) 90 | token[this.partner].remove(this.token); 91 | 92 | return removed || null; 93 | } 94 | 95 | clear(origin: boolean = true): void { 96 | this.map(item => { 97 | if (origin) 98 | item.token[this.partner].remove(this.token) 99 | }); 100 | this._items = []; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/notatrix/nx/root-token.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import {BaseToken} from "./base-token"; 4 | import type {Sentence} from "./sentence"; 5 | 6 | export class RootToken extends BaseToken { 7 | constructor(sent: Sentence) { 8 | super(sent, "RootToken"); 9 | 10 | this.form = "ROOT"; 11 | this.indices = { 12 | absolute: 0, 13 | conllu: 0, 14 | cg3: 0, 15 | cytoscape: 0, 16 | }; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/notatrix/nx/sub-token.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import {BaseToken, TokenSerial} from "./base-token"; 4 | import type {Sentence} from "./sentence"; 5 | 6 | export class SubToken extends BaseToken { 7 | constructor(sent: Sentence, serial: TokenSerial) { super(sent, "SubToken", serial); } 8 | } 9 | -------------------------------------------------------------------------------- /src/notatrix/nx/token.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import {Analysis} from "./analysis"; 4 | import {BaseToken, TokenSerial} from "./base-token"; 5 | import {SubToken} from "./sub-token"; 6 | import type {Sentence} from "./sentence"; 7 | 8 | export class Token extends BaseToken { 9 | _i: number|null; 10 | 11 | constructor(sent: Sentence, serial: TokenSerial) { 12 | super(sent, "Token", serial); 13 | 14 | this._analyses = 15 | (serial.analyses || []).map(ana => new Analysis(sent, ana)); 16 | this._i = (this._analyses.length ? 0 : null); 17 | } 18 | 19 | get analysis(): Analysis|null { 20 | if (this._i === null) 21 | return null; 22 | 23 | return this._analyses[this._i]; 24 | } 25 | 26 | get subTokens(): SubToken[] { return this.analysis ? this.analysis.subTokens : []; } 27 | } 28 | -------------------------------------------------------------------------------- /src/notatrix/parser.ts: -------------------------------------------------------------------------------- 1 | import {FORMAT_BY_NAME} from "./formats"; 2 | import {formats} from "./utils/constants"; 3 | import {ParserError} from "./utils/errors"; 4 | import {thin} from "./utils/funcs"; 5 | import type {Input, ParseByName, ParseOutput} from "./base"; 6 | import type {Options} from "./nx/options"; 7 | 8 | export const PARSE_BY_NAME: ParseByName = { 9 | "apertium stream": FORMAT_BY_NAME.apertiumStream.parse, 10 | apertiumStream: FORMAT_BY_NAME.apertiumStream.parse, 11 | Brackets: FORMAT_BY_NAME.brackets.parse, 12 | brackets: FORMAT_BY_NAME.brackets.parse, 13 | CG3: FORMAT_BY_NAME.cg3.parse, 14 | cg3: FORMAT_BY_NAME.cg3.parse, 15 | "CoNLL-U": FORMAT_BY_NAME.conllu.parse, 16 | conllu: FORMAT_BY_NAME.conllu.parse, 17 | "notatrix serial": FORMAT_BY_NAME.notatrixSerial.parse, 18 | notatrixSerial: FORMAT_BY_NAME.notatrixSerial.parse, 19 | Params: FORMAT_BY_NAME.params.parse, 20 | params: FORMAT_BY_NAME.params.parse, 21 | "plain text": FORMAT_BY_NAME.plainText.parse, 22 | plainText: FORMAT_BY_NAME.plainText.parse, 23 | SD: FORMAT_BY_NAME.sd.parse, 24 | sd: FORMAT_BY_NAME.sd.parse, 25 | }; 26 | 27 | export function parse(input: Input, options: Options): ParseOutput|ParseOutput[] { 28 | options = { 29 | suppressDetectorErrors: true, 30 | suppressParserErrors: true, 31 | returnAllPossibilities: true, 32 | requireOne: false, 33 | ...options, 34 | }; 35 | 36 | const possibilities = formats 37 | .map(format => { 38 | const parse = FORMAT_BY_NAME[format].parse; 39 | try { 40 | return parse(input, options); 41 | } catch (e) { 42 | if (e instanceof ParserError && options.suppressParserErrors) 43 | return undefined; 44 | throw e; 45 | } 46 | }) 47 | .filter(thin); 48 | 49 | if (!possibilities.length && !options.suppressDetectorErrors) 50 | throw new ParserError("Unable to detect format", input, options); 51 | 52 | if (options.requireOne && possibilities.length > 1) 53 | throw new ParserError("Unable to detect, ambiguous input", input, options); 54 | 55 | return options.returnAllPossibilities ? possibilities : possibilities[0]; 56 | } 57 | -------------------------------------------------------------------------------- /src/notatrix/splitter.ts: -------------------------------------------------------------------------------- 1 | import {detect} from "./detector"; 2 | import {FORMAT_BY_NAME} from "./formats"; 3 | import {split as defaultSplit} from "./formats/default-splitter"; 4 | import type {Input, SplitByName, SplitOutput} from "./base"; 5 | import type {Options} from "./nx/options"; 6 | 7 | export const SPLIT_BY_NAME: SplitByName = { 8 | "apertium stream": FORMAT_BY_NAME.apertiumStream.split, 9 | apertiumStream: FORMAT_BY_NAME.apertiumStream.split, 10 | Brackets: FORMAT_BY_NAME.brackets.split, 11 | brackets: FORMAT_BY_NAME.brackets.split, 12 | CG3: FORMAT_BY_NAME.cg3.split, 13 | cg3: FORMAT_BY_NAME.cg3.split, 14 | "CoNLL-U": FORMAT_BY_NAME.conllu.split, 15 | conllu: FORMAT_BY_NAME.conllu.split, 16 | "notatrix serial": FORMAT_BY_NAME.notatrixSerial.split, 17 | notatrixSerial: FORMAT_BY_NAME.notatrixSerial.split, 18 | Params: FORMAT_BY_NAME.params.split, 19 | params: FORMAT_BY_NAME.params.split, 20 | "plain text": FORMAT_BY_NAME.plainText.split, 21 | plainText: FORMAT_BY_NAME.plainText.split, 22 | SD: FORMAT_BY_NAME.sd.split, 23 | sd: FORMAT_BY_NAME.sd.split, 24 | }; 25 | 26 | export function split(input: Input, options: Options): SplitOutput { 27 | let fromDefault = new Set(); 28 | const splitAsDefault = defaultSplit(input as string, options); 29 | splitAsDefault.forEach(line => { 30 | const detected = detect(line, options); 31 | const detecteds = Array.isArray(detected) ? detected : [detected]; 32 | detecteds.forEach(format => fromDefault.add(format)); 33 | }); 34 | 35 | let fromPlainText = new Set(); 36 | const splitAsPlainText = SPLIT_BY_NAME.plainText(input as string, options); 37 | (splitAsPlainText || []).forEach(line => { 38 | const detected = detect(line, options); 39 | const detecteds = Array.isArray(detected) ? detected : [detected]; 40 | detecteds.forEach(format => fromPlainText.add(format)); 41 | }); 42 | 43 | if (fromDefault.size !== 1 && fromPlainText.size === 1 && 44 | fromPlainText.has("plain text")) 45 | return splitAsPlainText; 46 | 47 | return splitAsDefault; 48 | } 49 | -------------------------------------------------------------------------------- /src/notatrix/utils/constants.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import type {SentenceSerial} from "../nx/sentence"; 4 | 5 | export const fields = [ 6 | "index", "form", "lemma", "upostag", "xpostag", "feats", "head", "deprel", 7 | "deps", "misc" 8 | ]; 9 | 10 | export const formats = [ 11 | //'apertium stream', 12 | "Brackets", "CG3", "CoNLL-U", "notatrix serial", "Params", "plain text", 13 | "SD" 14 | ]; 15 | 16 | type _SentenceFields = {[k in keyof Partial]: string}; 17 | export const nxSentenceFields: _SentenceFields = { 18 | input: "string", 19 | options: "object", 20 | comments: "array", 21 | tokens: "array", 22 | }; 23 | 24 | export const nxSentenceTokensFields = { 25 | semicolon: "boolean", 26 | isEmpty: "boolean", 27 | index: "number", 28 | form: "string*", 29 | lemma: "string*", 30 | upostag: "string*", 31 | xpostag: "string*", 32 | feats: "array", 33 | heads: "array", 34 | analyses: "array", 35 | }; 36 | 37 | export const nxAllOptions = { 38 | 39 | }; 40 | 41 | export const fallback = "_"; 42 | 43 | export const hexConstant = 0xffffff; 44 | -------------------------------------------------------------------------------- /src/notatrix/utils/errors.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import type {Options} from "../nx/options"; 4 | import type {Sentence, SentenceSerial} from "../nx/sentence"; 5 | import type {TokenSerial} from "../nx/base-token"; 6 | import type {Input} from "../base"; 7 | 8 | export class NotatrixError extends Error {} 9 | 10 | export class ToolError extends NotatrixError {} 11 | 12 | export class SplitterError extends ToolError { 13 | text: string; 14 | options: Options; 15 | 16 | constructor(message: string, text: string, options: Options) { 17 | super(message); 18 | 19 | this.name = "SplitterError"; 20 | this.text = text; 21 | this.options = options; 22 | 23 | Object.setPrototypeOf(this, SplitterError.prototype); 24 | } 25 | } 26 | 27 | export class DetectorError extends ToolError { 28 | input: Input; 29 | options: Options; 30 | 31 | constructor(message: string, input: Input, options: Options) { 32 | super(message); 33 | 34 | this.name = "DetectorError"; 35 | this.input = input; 36 | this.options = options; 37 | 38 | Object.setPrototypeOf(this, DetectorError.prototype); 39 | } 40 | } 41 | 42 | export class ParserError extends ToolError { 43 | input: Input; 44 | options: Options; 45 | 46 | constructor(message: string, input: Input, options: Options) { 47 | super(message); 48 | 49 | this.name = "ParserError"; 50 | this.input = input; 51 | this.options = options; 52 | 53 | Object.setPrototypeOf(this, ParserError.prototype); 54 | } 55 | } 56 | 57 | export class GeneratorError extends ToolError { 58 | nx: Sentence; 59 | options: Options; 60 | 61 | constructor(message: string, nx: Sentence, options: Options) { 62 | super(message); 63 | 64 | this.name = "GeneratorError"; 65 | this.nx = nx; 66 | this.options = options; 67 | 68 | Object.setPrototypeOf(this, GeneratorError.prototype); 69 | } 70 | } 71 | 72 | export class ConverterError extends ToolError { 73 | constructor(message: string) { 74 | super(message); 75 | 76 | this.name = "ConverterError"; 77 | 78 | Object.setPrototypeOf(this, ConverterError.prototype); 79 | } 80 | } 81 | 82 | export class NxError extends NotatrixError { 83 | constructor(message: string) { 84 | super(message); 85 | this.name = "NxError"; 86 | 87 | Object.setPrototypeOf(this, NxError.prototype); 88 | } 89 | } 90 | 91 | export class DBError extends NotatrixError {} 92 | -------------------------------------------------------------------------------- /src/notatrix/utils/funcs.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import * as _ from "underscore"; 4 | import * as constants from "./constants"; 5 | import * as re from "./regex"; 6 | 7 | export function combine(arr: T[], k: number): T[][] { 8 | if (k > arr.length || k <= 0) 9 | return []; 10 | 11 | if (k === arr.length) 12 | return [arr]; 13 | 14 | if (k === 1) 15 | return arr.map(e => [e]); 16 | 17 | let combs: T[][] = []; 18 | for (let i = 0; i < arr.length - k + 1; i++) { 19 | const head = arr.slice(i, i + 1); 20 | const tailCombs = combine(arr.slice(i + 1), k - 1); 21 | tailCombs.forEach(tailComb => { combs.push(head.concat(tailComb)); }); 22 | } 23 | return combs; 24 | } 25 | 26 | export function hexToRGB(hex: string): [number, number, number]|undefined { 27 | const match = hex.match(re.hexColor); 28 | 29 | if (match) 30 | return [ 31 | parseInt(match[1], 16), parseInt(match[2], 16), parseInt(match[3], 16) 32 | ]; 33 | 34 | return undefined; 35 | } 36 | 37 | export function isJSONSerializable(obj: any): boolean { 38 | if (typeof obj === "string") { 39 | try { 40 | JSON.parse(obj); 41 | } catch (e) { 42 | return false; 43 | } 44 | 45 | } else { 46 | try { 47 | JSON.stringify(obj); 48 | } catch (e) { 49 | return false; 50 | } 51 | } 52 | 53 | return true; 54 | } 55 | 56 | export function noop(t: T): T { 57 | return t; 58 | } 59 | 60 | export function thin(t: T): T|undefined { 61 | return !!t ? t : undefined; 62 | } 63 | 64 | export function hashStringToHex(s: string): string { 65 | let hash = 0; 66 | for (let i = 0; i < s.length; i++) { 67 | hash = s.charCodeAt(i) + ((hash << 5) - hash); 68 | } 69 | 70 | let hex = ""; 71 | for (let i = 0; i < 3; i++) { 72 | const value = (hash >> (i * 8)) & 0xFF; 73 | hex += ("00" + value.toString(16)).substr(-2); 74 | } 75 | return hex; 76 | } 77 | 78 | export function getRandomHexColor(): string { 79 | let color = ""; 80 | do { 81 | color = Math.floor(Math.random() * constants.hexConstant).toString(16); 82 | } while (color.length !== 7); 83 | 84 | return color; 85 | } 86 | 87 | export function getContrastingColor(background: string): string { 88 | let color = "ffffff"; 89 | 90 | const rgb = hexToRGB(background); 91 | if (!rgb) 92 | return color; 93 | 94 | const [r, g, b] = rgb; 95 | if ((r ** 2 + g ** 2 + b ** 2) > 96 | ((255 - r) ** 2 + (255 - g) ** 2 + (255 - b) ** 2)) 97 | color = "000000"; 98 | 99 | return color; 100 | } 101 | -------------------------------------------------------------------------------- /src/notatrix/utils/index.ts: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | import * as _ from "underscore"; 4 | 5 | import * as constants from "./constants"; 6 | import * as errors from "./errors"; 7 | import * as funcs from "./funcs"; 8 | import * as regex from "./regex"; 9 | 10 | module.exports = _.extend({re: regex}, errors, constants, funcs); 11 | -------------------------------------------------------------------------------- /src/notatrix/utils/regex.ts: -------------------------------------------------------------------------------- 1 | export const multiNewlines = /\n{2,}/g; 2 | export const punctuation = /[.,!?;]+/g; 3 | export const allPunctuation = /^[.,!?;]+$/; 4 | export const sentenceThenPunctuation = /([^.!?]*[.!?]*)/g; 5 | export const spaceBeforePunctuation = /\s+([.,!?;]+)/g; 6 | export const comment = /^(#\s*(.*))(\n|$)/; 7 | export const conlluTokenLine = /^((\d+(\.\d+)?)(\-(\d+(\.\d+)?))?)(.+)/; 8 | export const conlluTokenLineTenParams = /^((\d+(\.\d+)?)(\-(\d+(\.\d+)?))?)((\s+\S+){8,9})/; 9 | export const conlluEmptyIndex = /^(\d+)(\.\d+)?/; 10 | export const cg3TokenStart = /^["']<((.|\\")*)>["']/; 11 | export const cg3TokenContent = /^(;?)(\s+)"((.|\\")*)"(([ \t]+\S+)*)/; 12 | export const cg3Dependency = /#?\d+(->\d*)?$/; 13 | export const cg3Head = /#\d+->(\d*)$/; 14 | export const cg3Index = /#(\d+)/; 15 | export const cg3Deprel = /\s@([\w:]*)/; 16 | export const cg3Other = /([^;].*(:.+)?)/; 17 | export const whitespace = /(\s+)/; 18 | export const whitespaceLine = /^(\s*)$/; 19 | export const whiteline = /^(\s*)(\n|$)/; 20 | export const sdDependency = /^\s*([\w.]+)\(([\w.]+),\s*([\w.]+)\)\s*$/; 21 | export const sdDependencyNoWhitespace = /^([\w.]+)\(([\w.]+),\s*([\w.]+)\)$/; 22 | export const fallback = /^_$/; 23 | export const commentLabel = /(\s*)(labels?|tags?)\s*=\s*(\w.*)/; 24 | export const commentSentId = /(\s*)sent.?id\s*=\s*(\w*)/i; 25 | 26 | export const hexColor = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i; 27 | export const hexColorSixDigit = /^#?([a-f\d]{6})/i; 28 | -------------------------------------------------------------------------------- /src/server/app.ts: -------------------------------------------------------------------------------- 1 | import * as bodyParser from "body-parser"; 2 | import * as cookieParser from "cookie-parser"; 3 | import * as express from "express"; 4 | import * as fileUpload from "express-fileupload"; 5 | import * as http from "http"; 6 | import * as morgan from "morgan"; 7 | import * as nocache from "nocache"; 8 | import * as session from "express-session"; 9 | import * as socketIO from "socket.io"; 10 | 11 | // @ts-ignore 12 | import * as socketIOCookieParser from "socket.io-cookie-parser"; 13 | 14 | import {cfg} from "./config"; 15 | import {configureRoutes} from "./routes"; 16 | import {configureSocketIO} from "./sockets"; 17 | 18 | const app = express(); 19 | const MemoryStore = new session.MemoryStore(); 20 | app.use(morgan(cfg.environment === "development" ? "dev" : "tiny")); 21 | app.use(bodyParser.json({limit: "500mb"})); 22 | app.use(bodyParser.urlencoded({extended: true})); 23 | app.use(cookieParser()); 24 | app.use(fileUpload()); 25 | app.use(session({ 26 | store: MemoryStore, 27 | secret: cfg.secret, 28 | key: "express.sid", 29 | saveUninitialized: true, 30 | resave: false 31 | } as session.SessionOptions)); 32 | if (cfg.environment === "development") 33 | app.use(nocache()); 34 | app.set("view engine", "ejs"); 35 | app.set("views", "src/server/views"); 36 | 37 | // routes 38 | configureRoutes(app); 39 | app.use(express.static("src/server/public")); 40 | 41 | // run server 42 | const server = http.createServer(app).listen(cfg.port, () => { 43 | console.log(`Express server listening at ${cfg.protocol}://${cfg.host}:${cfg.port}`); 44 | console.log("ENV:", cfg.environment); 45 | if (cfg.environment === "development") 46 | console.dir(cfg); 47 | }); 48 | 49 | // set up sockets 50 | const sio = socketIO.listen(server); 51 | sio.use((socketIOCookieParser as () => ((socket: socketIO.Socket) => void))()); 52 | configureSocketIO(sio, MemoryStore); 53 | -------------------------------------------------------------------------------- /src/server/config.ts: -------------------------------------------------------------------------------- 1 | import * as crypto from "crypto"; 2 | import * as mkdirp from "mkdirp"; 3 | 4 | // @ts-ignore 5 | import {config as configureDotenv} from "dotenv"; 6 | 7 | import {ConfigError} from "./errors"; 8 | import {UsersDB} from "./models/users"; 9 | 10 | configureDotenv(); 11 | 12 | interface GithubConfig { 13 | readonly client_id: string; 14 | readonly client_secret: string; 15 | readonly login_uri: string; 16 | readonly callback_uri: string; 17 | readonly state: string; 18 | } 19 | 20 | interface Config { 21 | readonly port: number|string; 22 | readonly host: string; 23 | readonly protocol: string; 24 | readonly corpora_path: string; 25 | readonly secret: string; 26 | readonly environment: string; 27 | readonly github: GithubConfig; 28 | readonly users_db_path: string; 29 | readonly users: UsersDB; 30 | } 31 | 32 | // basic app config 33 | const port = process.env.ANNOTATRIX_PORT || process.env.PORT || 5316; 34 | const host = process.env.ANNOTATRIX_HOST || process.env.HOST || "localhost"; 35 | const protocol = process.env.ANNOTATRIX_PROTOCOL || process.env.PROTOCOL || "http"; 36 | const corpora_path = 37 | process.env.ANNOTATRIX_CORPORA_PATH || process.env.CORPORA_PATH || process.env.PATH_TO_CORPORA || "corpora"; 38 | const secret = process.env.ANNOTATRIX_SECRET || process.env.SECRET || "dev secret"; 39 | const environment = process.env.ANNOTATRIX_ENV || process.env.NODE_ENV || "development"; 40 | 41 | // oauth config 42 | let github = { 43 | client_id: process.env.ANNOTATRIX_GH_CLIENT_ID || process.env.GH_CLIENT_ID || 44 | "298b7a22eb8bc53567d1", // keggsmurph21 'UD-Annotatrix test 2' 45 | client_secret: process.env.ANNOTATRIX_GH_CLIENT_SECRET || process.env.GH_CLIENT_SECRET, 46 | login_uri: `${protocol}://${host}:${port}/oauth/login`, 47 | callback_uri: `${protocol}://${host}:${port}/oauth/callback`, 48 | state: crypto.randomBytes(8).toString("hex") 49 | }; 50 | if (!github.client_secret) { 51 | new ConfigError("Please provide ANNOTATRIX_GH_CLIENT_SECRET"); 52 | github = null; 53 | } 54 | 55 | // database config 56 | mkdirp(corpora_path, () => {}); // TODO: We should use the callback here! 57 | const users_db_path = process.env.ANNOTATRIX_USERS_DB_PATH || ".users"; 58 | const users = UsersDB.create(users_db_path); 59 | 60 | export const cfg: Config = { 61 | port, 62 | host, 63 | protocol, 64 | corpora_path, 65 | secret, 66 | environment, 67 | github, 68 | users_db_path, 69 | users, 70 | }; 71 | -------------------------------------------------------------------------------- /src/server/errors.ts: -------------------------------------------------------------------------------- 1 | export class UDAnnotatrixError extends Error { 2 | constructor(message: string) { 3 | super(message); 4 | 5 | Object.setPrototypeOf(this, UDAnnotatrixError.prototype); 6 | } 7 | } 8 | 9 | export class ConfigError extends UDAnnotatrixError { 10 | constructor(message: string) { 11 | console.log("ConfigError:", message); 12 | super(message); 13 | 14 | Object.setPrototypeOf(this, ConfigError.prototype); 15 | } 16 | } 17 | 18 | export class DBError extends UDAnnotatrixError { 19 | constructor(message: string) { 20 | console.log("DBError:", message); 21 | super(message); 22 | 23 | Object.setPrototypeOf(this, DBError.prototype); 24 | } 25 | } 26 | 27 | export class UploadError extends UDAnnotatrixError { 28 | constructor(message: string) { 29 | console.log("UploadError:", message); 30 | super(message); 31 | 32 | Object.setPrototypeOf(this, UploadError.prototype); 33 | } 34 | } 35 | 36 | export class SocketError extends UDAnnotatrixError { 37 | constructor(message: string) { 38 | console.log("SocketError:", message); 39 | super(message); 40 | 41 | Object.setPrototypeOf(this, SocketError.prototype); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/server/list-treebanks.ts: -------------------------------------------------------------------------------- 1 | import * as fs from "fs"; 2 | import * as moment from "moment"; 3 | import * as path from "path"; 4 | 5 | import * as nx from "../notatrix"; 6 | import {cfg} from "./config"; 7 | import {Treebank} from "./models/treebank"; 8 | 9 | interface Corpus { 10 | path: string; 11 | modified: unknown; 12 | } 13 | 14 | export function listTreebanks(next: (err: Error|null, treebanks: Treebank[]) => void) { 15 | fs.readdir(cfg.corpora_path, (err, dirs) => { 16 | if (err || (!dirs || !dirs.length)) 17 | return next(err, []); 18 | 19 | let corpora: Corpus[] = []; 20 | let touched = 0; 21 | 22 | dirs.forEach(dir => { 23 | const filepath = path.join(cfg.corpora_path, dir); 24 | 25 | console.log(filepath) 26 | fs.lstat(filepath, (err, stat) => { 27 | touched++; 28 | 29 | if (err) 30 | throw err; 31 | 32 | if (filepath.endsWith(".json")) 33 | corpora.push({ 34 | path: filepath, 35 | modified: stat.mtime, 36 | }); 37 | 38 | if (touched === dirs.length) { 39 | 40 | const treebanks = corpora 41 | .sort((x, y) => { 42 | if (x.modified < y.modified) 43 | return 1; 44 | if (x.modified > y.modified) 45 | return -1; 46 | return 0; 47 | }) 48 | .map(info => { 49 | try { 50 | const buffer = fs.readFileSync(info.path); 51 | const serial = buffer.toString(); 52 | const parsed = JSON.parse(serial); 53 | const snapshot = nx.Corpus.deserialize(parsed).snapshot; 54 | 55 | return { 56 | id: path.basename(info.path).slice(0, -5), 57 | modified: info.modified, 58 | modified_ago: moment(info.modified).fromNow(), 59 | filename: snapshot.filename, 60 | sentences: snapshot.sentences, 61 | errors: snapshot.errors, 62 | labels: snapshot.labels.slice(0, 3), 63 | }; 64 | } catch (e) { 65 | console.warn(`Failed to parse '${info.path}': ${e}`); 66 | return null; 67 | } 68 | }).filter(corpus => !!corpus); 69 | 70 | next(null, treebanks); 71 | } 72 | }); 73 | }); 74 | }); 75 | } 76 | -------------------------------------------------------------------------------- /src/server/models/corpus-json.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import * as path from "path"; 3 | import * as fs from "fs"; 4 | 5 | import {cfg} from "../config"; 6 | import {DBError} from "../errors"; 7 | 8 | export class CorpusDB { 9 | private path: string; 10 | 11 | private constructor(filename: string) { 12 | if (!filename) 13 | throw new DBError("Missing required argument: filename"); 14 | 15 | this.path = path.join(cfg.corpora_path, filename + ".json"); 16 | } 17 | 18 | public static create(filename: string): CorpusDB { 19 | return new CorpusDB(filename); 20 | } 21 | 22 | load(next: (err: Error, loaded?: string) => void) { 23 | fs.readFile(this.path, (err, data) => { 24 | if (err) 25 | return next(err); 26 | 27 | next(null, data.toString()); 28 | }); 29 | } 30 | 31 | save(filename: string, state: any, next: (_: null, contents: string, err: Error|null) => void) { 32 | state.meta = _.defaults(state.meta, { 33 | filename: filename, 34 | }); 35 | state = JSON.stringify(state, null, 2); 36 | fs.writeFile(this.path, state, next as () => void); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/server/models/sanitize.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | import * as nx from "../../notatrix"; 4 | 5 | function sanitize(obj: any, keys: string[]): any { 6 | obj = _.pick(obj, keys); 7 | _.each(obj, (value, key, obj) => { obj[key] = JSON.stringify(value); }); 8 | 9 | return obj; 10 | } 11 | 12 | interface Meta { 13 | current_index: unknown; 14 | owner: unknown; 15 | github_url: unknown; 16 | gui: unknown; 17 | labeler: unknown; 18 | permissions: unknown; 19 | editors: unknown; 20 | } 21 | 22 | interface SanitizedMeta { 23 | current_index: string; 24 | owner: string; 25 | github_url: string; 26 | gui: string; 27 | labeler: string; 28 | permissions: string; 29 | editors: string; 30 | } 31 | 32 | export function sanitizeMeta(meta: Meta): SanitizedMeta { 33 | return sanitize( 34 | meta, 35 | [ 36 | "current_index", 37 | "owner", 38 | "github_url", 39 | "gui", 40 | "labeler", 41 | "permissions", 42 | "editors", 43 | ] 44 | ); 45 | } 46 | 47 | interface Sentence { 48 | column_visibilities: unknown; 49 | format: unknown; 50 | is_table_view: unknown; 51 | nx_initialized: unknown; 52 | nx: nx.Sentence; 53 | } 54 | 55 | interface SanitizedSentence { 56 | column_visibilities: string; 57 | format: string; 58 | is_table_view: string; 59 | nx_initialized: string; 60 | nx: string; 61 | } 62 | 63 | export function sanitizeSentence(sentence: Sentence): SanitizedSentence { 64 | return sanitize( 65 | sentence, 66 | [ 67 | "column_visibilities", 68 | "format", 69 | "is_table_view", 70 | "nx_initialized", 71 | "nx", 72 | ] 73 | ); 74 | } 75 | -------------------------------------------------------------------------------- /src/server/models/treebank.ts: -------------------------------------------------------------------------------- 1 | export interface Treebank { 2 | id: string; 3 | modified: unknown; 4 | modified_ago: unknown; 5 | filename: string; 6 | sentences: unknown; 7 | errors: unknown; 8 | labels: unknown[]; 9 | } 10 | -------------------------------------------------------------------------------- /src/server/models/users.ts: -------------------------------------------------------------------------------- 1 | import * as sqlite3 from "sqlite3"; 2 | import * as fs from "fs"; 3 | import {DBError} from "../errors"; 4 | 5 | function open(filename: string, next: (err: Error|null, db: sqlite3.Database) => void) { 6 | if (fs.existsSync(filename)) { 7 | 8 | next(null, new sqlite3.Database(filename)); 9 | 10 | } else { 11 | 12 | const db = new sqlite3.Database(filename); 13 | db.run(` 14 | CREATE TABLE users ( 15 | id INTEGER PRIMARY KEY, 16 | username UNIQUE, 17 | token UNIQUE 18 | )`, 19 | err => next(err, db)); 20 | } 21 | } 22 | 23 | interface Params { 24 | id?: unknown; 25 | username: unknown; 26 | token?: unknown; 27 | } 28 | 29 | interface UpdateValues { 30 | username?: unknown; 31 | token: unknown; 32 | } 33 | 34 | export class UsersDB { 35 | private path: string; 36 | 37 | private constructor(filename: string) { 38 | if (!filename) 39 | throw new DBError("Missing required argument: filename"); 40 | 41 | this.path = filename + ".db"; 42 | } 43 | 44 | insert(params: Params, next: (err: Error|null, data: {id: string, changes: unknown}) => void) { 45 | open(this.path, (err, db) => { 46 | if (err) 47 | return next(new DBError(err.toString()), null); 48 | 49 | if (!params || (!params.username && !params.token)) 50 | return next(new DBError("Missing required param: username OR token"), null); 51 | 52 | db.run("INSERT INTO users (username, token) VALUES (?, ?)", params.username || null, params.token || null, 53 | function(err: Error) { // don't use an anonymous function b/c we need this-binding 54 | if (err) 55 | return next(new DBError(err.toString()), null); 56 | 57 | if (isNaN(parseInt(this.lastID))) 58 | return next(new DBError("Unable to insert"), null); 59 | 60 | next(null, {id: this.lastID, changes: this.changes}); 61 | }); 62 | }); 63 | } 64 | 65 | query(params: Params, next: (err: Error|null, data: unknown) => void) { 66 | open(this.path, (err, db) => { 67 | if (err) 68 | return next(new DBError(err.toString()), null); 69 | 70 | if (params) { 71 | 72 | db.get("SELECT * FROM users WHERE username = (?) or token = (?)", params.username, params.token, 73 | (err: Error|null, data: unknown) => { 74 | if (err) 75 | return next(new DBError(err.toString()), null); 76 | 77 | next(null, data); 78 | }); 79 | 80 | } else { 81 | 82 | db.all("SELECT * FROM users", (err, data) => { 83 | if (err) 84 | return next(new DBError(err.toString()), null); 85 | 86 | next(null, data); 87 | }); 88 | } 89 | }); 90 | } 91 | 92 | update(params: Params, values: UpdateValues|null, next: (err: Error|null, data: {id: string, changes: unknown}) => void) { 93 | open(this.path, (err, db) => { 94 | if (err) 95 | return next(new DBError(err.toString()), null); 96 | 97 | if (!params || !values) 98 | return next(new DBError("Missing required arguments: params AND values"), null); 99 | 100 | this.query(params, (err, data) => { 101 | if (err) 102 | return next(new DBError(err.toString()), null); 103 | 104 | if (data) { // it already exists, overwrite 105 | 106 | db.run(` 107 | UPDATE users 108 | SET username=IFNULL(?, username), token=IFNULL(?, username) 109 | WHERE id=(?) OR username=(?) OR token=(?)`, 110 | values.username, values.token, params.id, params.username, params.token, function(err: Error|null) { 111 | if (err) 112 | return next(new DBError(err.toString()), null); 113 | 114 | next(null, {id: this.lastID, changes: this.changes}); 115 | }); 116 | 117 | } else { // insert new 118 | this.insert({username: values.username || params.username, token: values.token || params.token}, next); 119 | } 120 | }); 121 | }); 122 | } 123 | 124 | remove(params: Params, next: (err: Error|null, data: {id: string, changes: unknown}) => void) { 125 | open(this.path, (err, db) => { 126 | if (err) 127 | return next(new DBError(err.toString()), null); 128 | 129 | if (!params) 130 | return next(new DBError("Missing required argument: params"), null); 131 | 132 | db.run(` 133 | DELETE FROM users 134 | WHERE id=(?) OR username=(?) OR token=(?)`, 135 | params.id, params.username, params.token, function(err: Error|null) { 136 | if (err) 137 | return next(new DBError(err.toString()), null); 138 | 139 | next(null, {id: this.lastID, changes: this.changes}); 140 | }); 141 | }); 142 | } 143 | 144 | public static create(filename: string): UsersDB { 145 | return new UsersDB(filename); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/server/public/css/jquery.autocomplete.css: -------------------------------------------------------------------------------- 1 | .xdsoft_autocomplete, 2 | .xdsoft_autocomplete div, 3 | .xdsoft_autocomplete span{ 4 | /* -moz-box-sizing: border-box !important; 5 | box-sizing: border-box !important;*/ 6 | } 7 | 8 | .xdsoft_autocomplete{ 9 | display:inline; 10 | position:relative; 11 | word-spacing: normal; 12 | text-transform: none; 13 | text-indent: 0px; 14 | text-shadow: none; 15 | text-align: start; 16 | } 17 | 18 | .xdsoft_autocomplete .xdsoft_input{ 19 | position:relative; 20 | z-index:2; 21 | } 22 | .xdsoft_autocomplete .xdsoft_autocomplete_dropdown{ 23 | position:absolute; 24 | border: 1px solid #ccc; 25 | border-top-color: #d9d9d9; 26 | box-shadow: 0 2px 4px rgba(0,0,0,0.2); 27 | -webkit-box-shadow: 0 2px 4px rgba(0,0,0,0.2); 28 | cursor: default; 29 | display:none; 30 | z-index: 1001; 31 | margin-top:-1px; 32 | background-color:#fff; 33 | min-width:100%; 34 | overflow:auto; 35 | } 36 | .xdsoft_autocomplete .xdsoft_autocomplete_hint{ 37 | position:absolute; 38 | z-index:1; 39 | color:#ccc !important; 40 | -webkit-text-fill-color:#ccc !important; 41 | text-fill-color:#ccc !important; 42 | overflow:hidden !important; 43 | white-space: pre !important; 44 | } 45 | 46 | .xdsoft_autocomplete .xdsoft_autocomplete_hint span{ 47 | color:transparent; 48 | opacity: 0.0; 49 | } 50 | 51 | .xdsoft_autocomplete .xdsoft_autocomplete_dropdown > .xdsoft_autocomplete_copyright{ 52 | color:#ddd; 53 | font-size:10px; 54 | text-decoration:none; 55 | right:5px; 56 | position:absolute; 57 | margin-top:-15px; 58 | z-index:1002; 59 | } 60 | .xdsoft_autocomplete .xdsoft_autocomplete_dropdown > div{ 61 | background:#fff; 62 | white-space: nowrap; 63 | cursor: pointer; 64 | line-height: 1.5em; 65 | padding: 2px 0px 2px 0px; 66 | } 67 | .xdsoft_autocomplete .xdsoft_autocomplete_dropdown > div.active{ 68 | background: #0097CF; 69 | color: #FFFFFF; 70 | } 71 | -------------------------------------------------------------------------------- /src/server/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/favicon.png -------------------------------------------------------------------------------- /src/server/public/fonts/Liberation_Sans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/fonts/Liberation_Sans-Regular.ttf -------------------------------------------------------------------------------- /src/server/public/fonts/PT_Sans-Caption-Web-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/fonts/PT_Sans-Caption-Web-Regular.ttf -------------------------------------------------------------------------------- /src/server/public/html/help.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | UD Annotatrix / Help 7 | 8 | 9 | 10 | 11 | 12 | 13 |
    14 |
    15 |

    User guide

    16 |

    About

    17 | 18 |

    19 | UD Annotatrix is a client-side, browser only, tool for editing dependency trees in CoNLL-U and VISL formsts. The point of this is to make manual editing of dependency corpora quicker. The aim of this project is to create an easy-to-use, quick and interactive interface tool for Universal Dependencies annotation, which would work both online and offline and allow the user to edit the annotation in both graphical and text modes. 20 |

    21 | 22 |

    Editing functionality

    23 | 24 |

    Dependency relations (deprels)

    25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
    Create new deprelLeft click on head, then left click on dependent.
    Delete a deprelSelect all the deprels to be removed with right click, then press either del or backspace. If you want to unselect all the arcs you've just selected, press esc.
    Change a deprel labelRight click on the arc with the left button (a textbox completed with the previous deprel should appear), then edit the label and press enter
    39 | 40 |

    Editing tokens

    41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
    Edit a tokenRight click on the token node (a textbox will appear), navigate with arrows, edit the token, then press enter
    Split a tokenRight click on the token node (a textbox will appear), navigate with arrows, insert a space where you need to split the token. Press enter.
    Merge two tokens into a single tokenLeft click on the main token (the token you want to take the markup from), then press m, then select with an arrow, which neighbour you want to append to the main token.
    Merge two tokens into a supertokenLeft click on a token, then press c, then select with left/right arrow key or by clicking, which neighbour you want to merge with it.
    59 | 60 |

    Editing POS labels

    61 | 62 | 63 | 64 | 65 | 66 |
    Edit a POS labelRight click on the POS node (a textbox will appear), navigate with arrows, edit the POS label, then press enter
    67 | 68 |

    Visualisation settings

    69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 |
    Vertical alignmentUse vertical alignment, when the sentence is too long (i.e. the tree is too big) that the labels cannot be distinguished.
    RTL (right to left)Use RTL alignment for languages like Hebrew or Arabic.
    80 |
    81 |
    82 | 83 | 84 | -------------------------------------------------------------------------------- /src/server/public/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | UD Annotatrix 9 | 10 | 11 | 12 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 80 | 81 | 82 |
    83 |
    84 | 85 | 86 | 87 | 91 | 92 | 96 | 97 | 106 |
    107 | Loading... 108 |
    109 |
    110 | 111 |

    Welcome to UD Annotatrix!

    112 |
    113 |

    You can make a corpus by downloading Github file or by uploading local one!

    114 |
    115 | 116 |
    117 | 118 |
    119 |
    120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-brands-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-brands-400.eot -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-brands-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-brands-400.woff -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-regular-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-regular-400.eot -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-regular-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-regular-400.woff -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-solid-900.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-solid-900.eot -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-solid-900.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-solid-900.woff -------------------------------------------------------------------------------- /src/server/public/html/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/html/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /src/server/public/js/main.js: -------------------------------------------------------------------------------- 1 | $(document).on("change", ".custom-file-input", 2 | function(event) { $(this).next(".custom-file-label").html(event.target.files[0].name); }); 3 | $(document).ready(function() { 4 | if (location.protocol === "file:") { 5 | window.location.href = "./annotatrix.html"; 6 | } else { 7 | $.ajax({ 8 | type: "GET", 9 | url: "/running", 10 | success: function(data) { console.info("AJAX connect success with response:", data); }, 11 | error: function(data) { 12 | console.info("AJAX connect failed with response:", data) 13 | window.location.href = "./annotatrix.html"; 14 | } 15 | }); 16 | } 17 | 18 | var deletemode = false; 19 | $(".actmode").click(function() { 20 | // const url = `${base}/annotatrix?treebank_id=${treebank.id}`; %> 21 | var this_el = $(this); 22 | var this_id = this_el.data("id"); 23 | console.log(deletemode, this_id); 24 | if (deletemode) { 25 | var formData = new FormData(); 26 | formData.append("id", this_id); 27 | $.ajax({ 28 | url: "delete", 29 | type: "POST", 30 | data: formData, 31 | processData: false, // tell jQuery not to process the data 32 | contentType: false, // tell jQuery not to set contentType 33 | success: function(data) { 34 | if (data.hasOwnProperty("success") && data.success === true) { 35 | this_el.parent().parent().empty(); 36 | } else { 37 | alert(data.error); 38 | } 39 | } 40 | }); 41 | 42 | } else { 43 | window.location.href = "/annotatrix?treebank_id=" + this_id; 44 | } 45 | }); 46 | 47 | $("#actionswitch").change(function() { 48 | if (this.checked) { 49 | // var returnVal = confirm("Are you sure?"); 50 | // $(this).prop("checked", returnVal); 51 | $(".actmode").html("Delete"); 52 | console.log("checked"); 53 | deletemode = true; 54 | } else { 55 | $(".actmode").html("Edit"); 56 | console.log("unchecked"); 57 | deletemode = false; 58 | } 59 | // $('#textbox1').val(this.checked); 60 | }); 61 | 62 | $("#upload-to-server").click(function() { 63 | $(".navbar-collapse").collapse("hide"); 64 | var file2load = $("input[type=file]").val(); 65 | if (file2load) { 66 | // alert(file2load); 67 | var formData = new FormData(); 68 | formData.append("file", $("#fileupload")[0].files[0]); 69 | formData.append("src", "main"); 70 | $(".spinner-border").removeClass("d-none"); 71 | 72 | $.ajax({ 73 | url: "upload", 74 | type: "POST", 75 | data: formData, 76 | processData: false, // tell jQuery not to process the data 77 | contentType: false, // tell jQuery not to set contentType 78 | success: function(data) { 79 | // console.log(data); 80 | // alert(data); 81 | // location.reload(true); 82 | if (data.error) { 83 | alert(data.error); 84 | } 85 | $(".spinner-border").addClass("d-none"); 86 | location.reload(true); 87 | } 88 | }); 89 | } else { 90 | alert("No file selected!"); 91 | } 92 | return false; 93 | }); 94 | $("#webload").click(function() { 95 | $(".navbar-collapse").collapse("hide"); 96 | var url2load = $("#gitlink").val(); 97 | if (url2load) { 98 | var formData = new FormData(); 99 | formData.append("url", url2load); 100 | formData.append("src", "main"); 101 | $(".spinner-border").removeClass("d-none"); 102 | 103 | $.ajax({ 104 | url: "upload", 105 | type: "POST", 106 | data: formData, 107 | processData: false, // tell jQuery not to process the data 108 | contentType: false, // tell jQuery not to set contentType 109 | success: function(data) { 110 | if (data.error) { 111 | alert(data.error); 112 | } 113 | $(".spinner-border").addClass("d-none"); 114 | location.reload(true); 115 | } 116 | }); 117 | } else { 118 | alert("No file selected!"); 119 | } 120 | return false; 121 | }); 122 | }); 123 | -------------------------------------------------------------------------------- /src/server/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/src/server/public/logo.png -------------------------------------------------------------------------------- /src/server/room.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | 3 | interface SocketData { 4 | id: string; 5 | address: string; 6 | username: string; 7 | } 8 | 9 | interface User { 10 | id: string; 11 | index: unknown|null; 12 | mouse: unknown|null; 13 | locked: unknown|null; 14 | address: unknown; 15 | username: unknown; 16 | room?: {users: {[id: string]: User}}; 17 | } 18 | 19 | export class Room { 20 | public users: {[id: string]: User}; 21 | constructor() { this.users = {}; } 22 | 23 | serialize() { 24 | return { 25 | users: this.users, 26 | }; 27 | } 28 | 29 | addUser(socketData: SocketData) { 30 | 31 | const user: User = { 32 | id: socketData.id, 33 | index: null, 34 | mouse: null, 35 | locked: null, 36 | address: socketData.address, 37 | username: socketData.username, 38 | }; 39 | 40 | this.users[socketData.id] = user; 41 | return user; 42 | } 43 | 44 | editUser(socketData: SocketData, fields: Partial): User { 45 | 46 | const user = this.users[socketData.id] || this.addUser(socketData); 47 | _.each(fields, (value, key) => { (user as any)[key] = value; }); 48 | 49 | return user; 50 | } 51 | 52 | removeUser(socketData: SocketData) { 53 | 54 | const user = this.users[socketData.id]; 55 | delete this.users[socketData.id]; 56 | 57 | return user; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/server/upload.ts: -------------------------------------------------------------------------------- 1 | import * as _ from "underscore"; 2 | import * as fs from "fs"; 3 | import * as request from "request"; 4 | import {v4 as uuidv4} from "uuid"; 5 | import {UploadedFile} from "express-fileupload"; 6 | 7 | import * as nx from "../notatrix"; 8 | import {UploadError} from "./errors"; 9 | import {CorpusDB} from "./models/corpus-json"; 10 | 11 | function upload(treebank: string, filename: string, contents: string, next: (err: Error|null, data?: unknown) => void) { 12 | 13 | console.log("uploading"); 14 | try { 15 | 16 | const corpus = nx.Corpus.fromString(contents); 17 | // console.log(corpus); 18 | corpus.filename = filename; 19 | return CorpusDB.create(treebank).save(filename, corpus.serialize(), next); 20 | 21 | } catch (e) { 22 | 23 | next(new UploadError(e.message)); 24 | } 25 | } 26 | 27 | export function fromFile(treebank: string, file: UploadedFile, next: (err: Error|null, data?: unknown) => void) { 28 | 29 | if (!file) 30 | return next(new UploadError(`No file provided.`)); 31 | 32 | const contents = file.data.toString(); 33 | return upload(treebank, file.name, contents, next); 34 | } 35 | 36 | export function fromGitHub(treebank: string, url: string, next: (err: Error|null, data?: unknown) => void) { 37 | 38 | if (!url) 39 | return next(new UploadError(`No URL provided.`)); 40 | 41 | // regex magic 42 | const match = url.match( 43 | /^(https?:\/\/)?(github\.com|raw\.githubusercontent\.com)\/([\w\d]*)\/([^/]*)\/(tree\/|blob\/)?([^/]*)\/(.*)$/); 44 | if (!match) 45 | return next(new UploadError(`Unsupported URL format: ${url}`)); 46 | 47 | const [string, protocol, domain, owner, repo, blob_or_tree, branch, filepath] = match; 48 | 49 | const filename = `${repo}__${branch}__${filepath.replace(/\//g, "__")}`; 50 | const rawURL = `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${filepath}`; 51 | 52 | request.get(rawURL, (err, _res, body) => { 53 | if (err) 54 | return next(err); 55 | 56 | return upload(treebank, filename, body, next); 57 | }); 58 | } 59 | -------------------------------------------------------------------------------- /src/server/views/help.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | UD Annotatrix / Help 7 | 8 | 9 | 10 | 11 | 12 | 13 |
    14 |
    15 |

    User guide

    16 |

    About

    17 | 18 |

    19 | UD Annotatrix is a client-side, browser only, tool for editing dependency trees in CoNLL-U and VISL formsts. The point of this is to make manual editing of dependency corpora quicker. The aim of this project is to create an easy-to-use, quick and interactive interface tool for Universal Dependencies annotation, which would work both online and offline and allow the user to edit the annotation in both graphical and text modes. 20 |

    21 | 22 |

    Editing functionality

    23 | 24 |

    Dependency relations (deprels)

    25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
    Create new deprelLeft click on head, then left click on dependent.
    Delete a deprelSelect all the deprels to be removed with right click, then press either del or backspace. If you want to unselect all the arcs you've just selected, press esc.
    Change a deprel labelRight click on the arc with the left button (a textbox completed with the previous deprel should appear), then edit the label and press enter
    39 | 40 |

    Editing tokens

    41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
    Edit a tokenRight click on the token node (a textbox will appear), navigate with arrows, edit the token, then press enter
    Split a tokenRight click on the token node (a textbox will appear), navigate with arrows, insert a space where you need to split the token. Press enter.
    Merge two tokens into a single tokenLeft click on the main token (the token you want to take the markup from), then press m, then select with an arrow, which neighbour you want to append to the main token.
    Merge two tokens into a supertokenLeft click on a token, then press c, then select with left/right arrow key or by clicking, which neighbour you want to merge with it.
    59 | 60 |

    Editing POS labels

    61 | 62 | 63 | 64 | 65 | 66 |
    Edit a POS labelRight click on the POS node (a textbox will appear), navigate with arrows, edit the POS label, then press enter
    67 | 68 |

    Visualisation settings

    69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 |
    Vertical alignmentUse vertical alignment, when the sentence is too long (i.e. the tree is too big) that the labels cannot be distinguished.
    RTL (right to left)Use RTL alignment for languages like Hebrew or Arabic.
    80 |
    81 |
    82 | 83 | 84 | -------------------------------------------------------------------------------- /src/server/views/modals/github-fork.ejs: -------------------------------------------------------------------------------- 1 | 35 | -------------------------------------------------------------------------------- /src/server/views/modals/help.ejs: -------------------------------------------------------------------------------- 1 | 16 | -------------------------------------------------------------------------------- /src/server/views/modals/upload-file.ejs: -------------------------------------------------------------------------------- 1 | 35 | -------------------------------------------------------------------------------- /src/server/views/modals/upload-url.ejs: -------------------------------------------------------------------------------- 1 | 25 | -------------------------------------------------------------------------------- /test/notatrix/data/index.js: -------------------------------------------------------------------------------- 1 | const conlluData = require("./conllu"); 2 | const cg3Data = require("./cg3"); 3 | 4 | module.exports = { 5 | conllu: conlluData, 6 | "CoNLL-U": conlluData, 7 | cg3: cg3Data, 8 | CG3: cg3Data, 9 | }; 10 | -------------------------------------------------------------------------------- /test/notatrix/detector.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"), expect = require("chai").expect, 4 | sinon = require("sinon"), utils = require("./utils"), nx = require("../../src/notatrix"); 5 | 6 | describe("detector", () => { 7 | describe("detect formats explicitly", () => { 8 | const options = { 9 | requireTenParams: true, 10 | }; 11 | 12 | utils.forEachText((text, format, name) => { 13 | it(`${format}:${name} is ${format}`, () => { 14 | expect(nx.detectAs[format](text, options)).to.equal(format); 15 | }); 16 | }); 17 | }); 18 | 19 | describe("avoid cross-detection", () => { 20 | const options = {}; 21 | 22 | utils.forEachText((text, format, name) => { 23 | utils.forEachFormat(castedFormat => { 24 | if (format !== castedFormat) 25 | it(`${format}:${name} is not ${castedFormat}`, () => { 26 | const cast = nx.detectAs[castedFormat]; 27 | expect(() => { cast(text, options); }).to.throw(nx.DetectorError); 28 | }); 29 | }); 30 | }); 31 | }); 32 | 33 | describe("detect formats implicitly", () => { 34 | const options = { 35 | suppressDetectorErrors: true, 36 | returnAllMatches: true, 37 | }; 38 | 39 | utils.forEachText((text, format, name) => { 40 | it(`${format}:${name} is ${format}`, () => { 41 | const possibilities = nx.detect(text, options); 42 | expect(possibilities.indexOf(format) > -1).to.equal(true); 43 | }); 44 | }); 45 | }); 46 | }); 47 | -------------------------------------------------------------------------------- /test/notatrix/enhance.js: -------------------------------------------------------------------------------- 1 | const _ = require("underscore"), expect = require("chai").expect, 2 | sinon = require("sinon"), utils = require("./utils"), nx = require("../../src/notatrix"); 3 | 4 | describe("enhance", () => { 5 | it(`should write the enhanced dependencies to the deps column`, () => { 6 | const sent = new nx.Sentence(`# sent_id = chapID01:paragID1:sentID1 7 | # text = Кечаень сыргозтизь налкставтыця карвот . 8 | # text[eng] = Kechai was awoken by annoying flies. 9 | 1 Кечаень Кечай N N Sem/Ant_Mal|Prop|SP|Gen|Indef 2 obj _ Кечаень 10 | 2 сыргозтизь сыргозтемс V V TV|Ind|Prt1|ScPl3|OcSg3 0 root _ сыргозтизь 11 | 3 налкставтыця налкставтомс PRC Prc V|TV|PrcPrsL|Sg|Nom|Indef 4 amod _ налкставтыця 12 | 4 карвот карво N N Sem/Ani|N|Pl|Nom|Indef 2 nsubj _ карвот 13 | 5 . . CLB CLB CLB 2 punct _ .`); 14 | 15 | expect(sent.options.enhanced).to.equal(false); 16 | 17 | sent.enhance(); 18 | 19 | expect(sent.options.enhanced).to.equal(true); 20 | expect(sent.to("CoNLL-U").output) 21 | .to.equal(`# sent_id = chapID01:paragID1:sentID1 22 | # text = Кечаень сыргозтизь налкставтыця карвот . 23 | # text[eng] = Kechai was awoken by annoying flies. 24 | 1 Кечаень Кечай N N Sem/Ant_Mal|Prop|SP|Gen|Indef 2 obj 2:obj Кечаень 25 | 2 сыргозтизь сыргозтемс V V TV|Ind|Prt1|ScPl3|OcSg3 0 root 0:root сыргозтизь 26 | 3 налкставтыця налкставтомс PRC Prc V|TV|PrcPrsL|Sg|Nom|Indef 4 amod 4:amod налкставтыця 27 | 4 карвот карво N N Sem/Ani|N|Pl|Nom|Indef 2 nsubj 2:nsubj карвот 28 | 5 . . CLB CLB CLB 2 punct 2:punct .`); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /test/notatrix/examples.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"), expect = require("chai").expect, 4 | utils = require("./utils"); 5 | 6 | describe("integration tests", () => { 7 | it("from brackets", () => { 8 | const nx = require("../../src/notatrix"); 9 | const brackets = 10 | "[root [nsubj I] have [obj [amod [advmod too] many] commitments] [advmod right now] [punct .]]"; 11 | const sent = new nx.Sentence(brackets); 12 | }); 13 | 14 | it("from CG3", () => { 15 | const nx = require("../../src/notatrix"); 16 | const cg3 = `# sent_id = mst-0001 17 | # text = Peşreve başlamalı. 18 | "" 19 | "peşrev" Noun @obl #1->2 20 | "" 21 | "başla" Verb SpaceAfter=No @root #2->0 22 | "<.>" 23 | "." Punc @punct #3->2`; 24 | const sent = new nx.Sentence(cg3); 25 | }); 26 | 27 | it("from CoNLL-U", () => { 28 | const nx = require("../../src/notatrix"); 29 | const conllu = `# sent_id = chapID01:paragID1:sentID1 30 | # text = Кечаень сыргозтизь налкставтыця карвот . 31 | # text[eng] = Kechai was awoken by annoying flies. 32 | 1 Кечаень Кечай N N Sem/Ant_Mal|Prop|SP|Gen|Indef 2 obj _ Кечаень 33 | 2 сыргозтизь сыргозтемс V V TV|Ind|Prt1|ScPl3|OcSg3 0 root _ сыргозтизь 34 | 3 налкставтыця налкставтомс PRC Prc V|TV|PrcPrsL|Sg|Nom|Indef 4 amod _ налкставтыця 35 | 4 карвот карво N N Sem/Ani|N|Pl|Nom|Indef 2 nsubj _ карвот 36 | 5 . . CLB CLB CLB 2 punct _ .`; 37 | const sent = new nx.Sentence(conllu); 38 | }); 39 | 40 | it("from params", () => { 41 | const nx = require("../../src/notatrix"); 42 | const params = [{form: "hello"}, {form: "world"}]; 43 | const sent = new nx.Sentence(params); 44 | }); 45 | 46 | it("from plain text", () => { 47 | const nx = require("../../src/notatrix"); 48 | const text = "this is my test string"; 49 | const sent = new nx.Sentence(text); 50 | }); 51 | 52 | it("from SD", () => { 53 | const nx = require("../../src/notatrix"); 54 | const sd = `He says that you like to swim 55 | ccomp(says, like) 56 | mark(like, that)`; 57 | const sent = new nx.Sentence(sd); 58 | }); 59 | 60 | it("inspecting", () => { 61 | const nx = require("../../src/notatrix"); 62 | const conllu = `# text = He boued e tebr Mona er gegin. 63 | # text[eng] = Mona eats her food here in the kitchen. 64 | # labels = press_1986 ch_syntax p_197 to_check 65 | 1 He he det _ pos|f|sp 2 det _ _ 66 | 2 boued boued n _ m|sg 4 obj _ _ 67 | 3 e e vpart _ obj 4 aux _ _ 68 | 4 tebr debriñ vblex _ pri|p3|sg 0 root _ _ 69 | 5 Mona Mona np _ ant|f|sg 4 nsubj _ _ 70 | 6-7 er _ _ _ _ _ _ _ _ 71 | 6 _ e pr _ _ 8 case _ _ 72 | 7 _ an det _ def|sp 8 det _ _ 73 | 8 gegin kegin n _ f|sg 4 obl _ _ 74 | 9 . . sent _ _ 4 punct _ _`; 75 | const sent = new nx.Sentence(conllu); 76 | 77 | expect(sent.comments.length).to.equal(3); 78 | expect(sent.tokens.length).to.equal(8); 79 | expect(sent.size).to.equal(10); 80 | }); 81 | 82 | it("converting", () => { 83 | const nx = require("../../src/notatrix"); 84 | const conllu = `# this is my first comment 85 | # here is another comment 86 | 1 hello hello _ _ _ 0 root _ 87 | 2 , , PUNCT _ _ 1 punct _ _ 88 | 3 world world _ _ _ 1 _ _`; 89 | const sent = new nx.Sentence(conllu); 90 | 91 | expect(sent.to("apertium stream")).to.equal(undefined); 92 | expect(sent.to("brackets")).to.deep.equal({ 93 | output: "[root hello [punct ,] [_ world]]", 94 | loss: ["comments", "lemma", "upostag"] 95 | }); 96 | expect(sent.to("cg3")).to.deep.equal({ 97 | output: 98 | "# this is my first comment\n# here is another comment\n\"\"\n\t\"hello\" @root #1->0\n\"<,>\"\n\t\",\" PUNCT @punct #2->1\n\"\"\n\t\"world\" #3->1", 99 | loss: [] 100 | }); 101 | expect(sent.to("conllu")).to.deep.equal({ 102 | output: 103 | "# this is my first comment\n# here is another comment\n1\thello\thello\t_\t_\t_\t0\troot\t_\t_\n2\t,\t,\tPUNCT\t_\t_\t1\tpunct\t_\t_\n3\tworld\tworld\t_\t_\t_\t1\t_\t_\t_", 104 | loss: [] 105 | }); 106 | /*expect(sent.to('notatrix serial')).to.deep.equal({ 107 | output: { ... }, 108 | loss: [] 109 | });*/ 110 | expect(sent.to("params")).to.deep.equal({ 111 | output: [ 112 | {form: "hello", lemma: "hello", head: "0"}, 113 | {form: ",", lemma: ",", upostag: "PUNCT", head: "1"}, 114 | {form: "world", lemma: "world", head: "1"} 115 | ], 116 | loss: ["comments"] 117 | }); 118 | expect(sent.to("plain text")).to.deep.equal({ 119 | output: "hello, world", 120 | loss: ["comments", "lemma", "heads", "upostag"] 121 | }); 122 | expect(sent.to("sd")).to.deep.equal({ 123 | output: 124 | "# this is my first comment\n# here is another comment\nhello, world\nroot(ROOT, hello)\npunct(hello, ,)\n_(hello, world)", 125 | loss: ["lemma", "upostag"] 126 | }); 127 | }); 128 | }); 129 | -------------------------------------------------------------------------------- /test/notatrix/index.js: -------------------------------------------------------------------------------- 1 | // external format tools first 2 | require("./splitter"); 3 | require("./detector"); 4 | require("./parser"); 5 | 6 | // then internal stuff 7 | require("./relation-sets"); 8 | require("./sentence"); 9 | require("./merge"); 10 | require("./combine"); 11 | require("./split"); 12 | 13 | // then externalizers 14 | require("./generator"); 15 | require("./loss"); 16 | 17 | // then editing stuff 18 | require("./enhance"); 19 | require("./setEmpty"); 20 | 21 | // integration 22 | require("./examples"); 23 | -------------------------------------------------------------------------------- /test/notatrix/loss.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"), expect = require("chai").expect, 4 | sinon = require("sinon"), utils = require("./utils"), nx = require("../../src/notatrix"); 5 | 6 | describe("check loss", () => { 7 | it("no dependencies", () => { 8 | const s = new nx.Sentence(`1 one _ _ _ _ _ _ _ _ 9 | 2 two _ _ _ _ _ _ _ _ 10 | 3 three _ _ _ _ _ _ _ _`); 11 | 12 | expect(s.options.enhanced).to.equal(false); 13 | expect(() => s.to("Brackets").loss).to.throw(nx.GeneratorError); 14 | expect(s.to("CG3").loss).to.deep.equal([]); 15 | expect(s.to("CoNLL-U").loss).to.deep.equal([]); 16 | expect(s.to("Params").loss).to.deep.equal([]); 17 | expect(s.to("plain text").loss).to.deep.equal([]); 18 | expect(s.to("SD").loss).to.deep.equal([]); 19 | }); 20 | 21 | it("only dependencies", () => { 22 | const s = new nx.Sentence(`1 one _ _ _ _ 0 root _ _ 23 | 2 two _ _ _ _ 3 _ 3 _ 24 | 3 three _ _ _ _ 1 _ 1 _`); 25 | 26 | expect(s.options.enhanced).to.equal(false); 27 | expect(s.to("Brackets").loss).to.deep.equal([]); 28 | expect(s.to("CG3").loss).to.deep.equal([]); 29 | expect(s.to("CoNLL-U").loss).to.deep.equal([]); 30 | expect(s.to("Params").loss).to.deep.equal([]); 31 | expect(s.to("plain text").loss).to.deep.equal(["heads"]); 32 | expect(s.to("SD").loss).to.deep.equal([]); 33 | }); 34 | 35 | it("enhanced", () => { 36 | const s = new nx.Sentence(`1 one _ _ _ _ 0 root _ _ 37 | 2 two _ _ _ _ 3 _ 1|3 _ 38 | 3 three _ _ _ _ 1 _ 1 _`); 39 | 40 | expect(s.options.enhanced).to.equal(true); 41 | expect(() => s.to("Brackets").loss).to.throw(nx.GeneratorError); 42 | expect(s.to("CG3").loss).to.deep.equal(["enhanced dependencies"]); 43 | expect(s.to("CoNLL-U").loss).to.deep.equal([]); 44 | expect(s.to("Params").loss).to.deep.equal(["enhanced dependencies"]); 45 | expect(s.to("plain text").loss).to.deep.equal(["heads"]); 46 | expect(s.to("SD").loss).to.deep.equal(["enhanced dependencies"]); 47 | }); 48 | 49 | it("unenhanced", () => { 50 | const s = new nx.Sentence(`1 one _ _ _ _ 0 root _ _ 51 | 2 two _ _ _ _ 3 _ 1|3 _ 52 | 3 three _ _ _ _ 1 _ 1 _`); 53 | 54 | expect(s.options.enhanced).to.equal(true); 55 | s.unenhance(); 56 | expect(s.options.enhanced).to.equal(false); 57 | expect(() => s.to("Brackets").loss).to.throw(nx.GeneratorError); 58 | expect(s.to("CG3").loss).to.deep.equal(["enhanced dependencies"]); 59 | expect(s.to("CoNLL-U").loss).to.deep.equal(["enhanced dependencies"]); 60 | expect(s.to("Params").loss).to.deep.equal(["enhanced dependencies"]); 61 | expect(s.to("plain text").loss).to.deep.equal(["heads"]); 62 | expect(s.to("SD").loss).to.deep.equal(["enhanced dependencies"]); 63 | }); 64 | }); 65 | -------------------------------------------------------------------------------- /test/notatrix/parser.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"), expect = require("chai").expect, 4 | sinon = require("sinon"), utils = require("./utils"), nx = require("../../src/notatrix"); 5 | 6 | describe("parser", () => { 7 | describe("parse formats explicitly to notatrix serial", () => { 8 | const options = {}; 9 | 10 | utils.forEachText((text, format, name) => { 11 | it(`${format}:${name}`, () => { 12 | const parsed = nx.parseAs[format](text, options); 13 | expect(() => nx.detectAs.notatrixSerial(parsed)).to.not.throw(); 14 | }); 15 | }); 16 | }); 17 | 18 | describe("parse formats implicitly to notatrix serial", () => { 19 | const options = {}; 20 | 21 | utils.forEachText((text, format, name) => { 22 | it(`${format}:${name}`, () => { 23 | const possibilities = nx.parse(text, options); 24 | _.each(possibilities, possibility => { 25 | expect(() => nx.detectAs.notatrixSerial(possibility)).to.not.throw(); 26 | }); 27 | }); 28 | }); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /test/notatrix/sentence.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"), expect = require("chai").expect, 4 | sinon = require("sinon"), utils = require("./utils"), nx = require("../../src/notatrix"); 5 | 6 | describe("Sentence", () => { 7 | describe("instantiate nx.Sentence with explicit format", () => { 8 | utils.forEachText((text, format, name) => { 9 | const options = { 10 | interpretAs: format, 11 | }; 12 | 13 | it(`${format}:${name}`, () => { 14 | expect(() => {new nx.Sentence(text, options)}).to.not.throw(); 15 | }); 16 | }); 17 | }); 18 | 19 | describe("instantiate nx.Sentence without explicit format", () => { 20 | utils.forEachText((text, format, name) => { 21 | it(`${format}:${name}`, 22 | () => { expect(() => {new nx.Sentence(text)}).to.not.throw(); }); 23 | }); 24 | }); 25 | 26 | describe("serialize nx.Sentence back into notatrix-serial format", () => { 27 | utils.forEachText((text, format, name) => { 28 | it(`${format}:${name}`, () => { 29 | const parsed = nx.parse(text, {returnAllPossibilities: false}); 30 | const serial = (new nx.Sentence(parsed)).serialize(); 31 | 32 | // get some sort of notatrix serial output 33 | expect(() => { nx.detectAs.notatrixSerial(serial); }).to.not.throw(); 34 | 35 | // in fact, get the same exact notatrix serial 36 | const clean = serial => { 37 | serial.tokens = serial.tokens.map(token => _.omit(token, "index")); 38 | }; 39 | expect(clean(serial)).to.equal(clean(parsed)); 40 | }); 41 | }); 42 | }); 43 | }); 44 | -------------------------------------------------------------------------------- /test/notatrix/setEmpty.js: -------------------------------------------------------------------------------- 1 | const _ = require("underscore"); 2 | const expect = require("chai").expect; 3 | const sinon = require("sinon"); 4 | const utils = require("./utils"); 5 | const nx = require("../../src/notatrix"); 6 | const data = require("./data"); 7 | 8 | describe("setEmpty", () => { 9 | it("toggling isEmpty on trivial data", () => { 10 | const s = new nx.Sentence(data["CoNLL-U"]["empty"]); 11 | expect(s.to("CoNLL-U").output).to.equal(`1 Sue Sue _ _ _ _ _ _ _ 12 | 2 likes like _ _ _ _ _ _ _ 13 | 3 coffee coffee _ _ _ _ _ _ _ 14 | 4 and and _ _ _ _ _ _ _ 15 | 5 Bill Bill _ _ _ _ _ _ _ 16 | 5.1 likes like _ _ _ _ _ _ _ 17 | 6 tea tea _ _ _ _ _ _ _`); 18 | 19 | s.tokens[1].setEmpty(true); 20 | expect(s.to("CoNLL-U").output).to.equal(`1 Sue Sue _ _ _ _ _ _ _ 21 | 1.1 likes like _ _ _ _ _ _ _ 22 | 2 coffee coffee _ _ _ _ _ _ _ 23 | 3 and and _ _ _ _ _ _ _ 24 | 4 Bill Bill _ _ _ _ _ _ _ 25 | 4.1 likes like _ _ _ _ _ _ _ 26 | 5 tea tea _ _ _ _ _ _ _`); 27 | 28 | s.tokens[2].setEmpty(true); 29 | expect(s.to("CoNLL-U").output).to.equal(`1 Sue Sue _ _ _ _ _ _ _ 30 | 1.1 likes like _ _ _ _ _ _ _ 31 | 1.2 coffee coffee _ _ _ _ _ _ _ 32 | 2 and and _ _ _ _ _ _ _ 33 | 3 Bill Bill _ _ _ _ _ _ _ 34 | 3.1 likes like _ _ _ _ _ _ _ 35 | 4 tea tea _ _ _ _ _ _ _`); 36 | 37 | s.tokens[1].setEmpty(false); 38 | s.tokens[2].setEmpty(false); 39 | expect(s.to("CoNLL-U").output).to.equal(`1 Sue Sue _ _ _ _ _ _ _ 40 | 2 likes like _ _ _ _ _ _ _ 41 | 3 coffee coffee _ _ _ _ _ _ _ 42 | 4 and and _ _ _ _ _ _ _ 43 | 5 Bill Bill _ _ _ _ _ _ _ 44 | 5.1 likes like _ _ _ _ _ _ _ 45 | 6 tea tea _ _ _ _ _ _ _`); 46 | }); 47 | 48 | it("toggling isEmpty on data with relations", () => { 49 | const s = new nx.Sentence(data["CoNLL-U"]["ud_example_tabs"]); 50 | expect(s.to("CoNLL-U").output) 51 | .to.equal( 52 | `1 They they PRON PRP Case=Nom|Number=Plur 2 nsubj 2:nsubj|4:nsubj _ 53 | 2 buy buy VERB VBP Number=Plur|Person=3|Tense=Pres 0 root 0:root _ 54 | 3 and and CONJ CC _ 4 cc 4:cc _ 55 | 4 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 2:conj _ 56 | 5 books book NOUN NNS Number=Plur 2 obj 2:obj|4:obj _ 57 | 6 . . PUNCT . _ 2 punct 2:punct _`); 58 | 59 | s.tokens[2].setEmpty(true); 60 | expect(s.to("CoNLL-U").output) 61 | .to.equal( 62 | `1 They they PRON PRP Case=Nom|Number=Plur 2 nsubj 2:nsubj|3:nsubj _ 63 | 2 buy buy VERB VBP Number=Plur|Person=3|Tense=Pres 0 root 0:root _ 64 | 2.1 and and CONJ CC _ _ _ 3:cc _ 65 | 3 sell sell VERB VBP Number=Plur|Person=3|Tense=Pres 2 conj 2:conj _ 66 | 4 books book NOUN NNS Number=Plur 2 obj 2:obj|3:obj _ 67 | 5 . . PUNCT . _ 2 punct 2:punct _`); 68 | }); 69 | }); 70 | -------------------------------------------------------------------------------- /test/notatrix/splitter.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonorthwash/ud-annotatrix/7489909318acc1f4b943181a6da59bb7df011ef6/test/notatrix/splitter.js -------------------------------------------------------------------------------- /test/notatrix/utils.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const _ = require("underscore"); 4 | 5 | const srcUtils = require("../../src/notatrix/utils"); 6 | const data = require("./data"); 7 | 8 | function spacesToTabs(str) { return str.replace(/[ \t]+/g, "\t"); } 9 | 10 | module.exports = _.extend({ 11 | 12 | forEachText: callback => { 13 | callback = callback || srcUtils.noop; 14 | 15 | _.each(data, (texts, format) => { 16 | if (srcUtils.formats.indexOf(format) > -1) 17 | _.each(texts, (text, name) => { callback(text, format, name); }); 18 | }); 19 | }, 20 | 21 | randomInt: (min, max) => { 22 | if (max === undefined) { 23 | max = min; 24 | min = 0; 25 | } 26 | return Math.floor(Math.random() * max) + min; 27 | }, 28 | 29 | forEachFormat: callback => { 30 | callback = callback || srcUtils.noop; 31 | _.each(srcUtils.formats, callback); 32 | }, 33 | 34 | spacesToTabs, 35 | 36 | cleanConllu: str => { 37 | return str.split("\n") 38 | .map(spacesToTabs) 39 | .map(line => { return line.trim(); }) 40 | .filter(srcUtils.thin) 41 | .join("\n"); 42 | }, 43 | 44 | clean: (str, maps) => { 45 | let lines = str.split("\n"); 46 | maps.forEach(map => { lines = lines.map(map); }); 47 | 48 | return lines.filter(srcUtils.thin).join("\n"); 49 | } 50 | 51 | }, 52 | srcUtils); 53 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "noEmit": true, 4 | "noFallthroughCasesInSwitch": true, 5 | "noImplicitAny": true, 6 | "noImplicitReturns": true, 7 | "outDir": "./build/js", 8 | "target": "es5" 9 | }, 10 | "include": [ 11 | "./src/" 12 | ], 13 | "exclude": [ 14 | "./src/server/public/" 15 | ] 16 | } 17 | --------------------------------------------------------------------------------