├── .dockerignore ├── dist ├── example │ ├── .gitignore │ ├── ocr-corrections │ │ └── digi │ │ │ └── 417576986 │ │ │ └── gt │ │ │ └── 0013 │ │ │ ├── line-0001.txt │ │ │ ├── line-0019.txt │ │ │ ├── line-0020.txt │ │ │ ├── line-0021.txt │ │ │ ├── line-0022.txt │ │ │ ├── line-0023.txt │ │ │ ├── line-0024.txt │ │ │ ├── line-0025.txt │ │ │ ├── line-0027.txt │ │ │ ├── comment-line-0001.txt │ │ │ ├── comment-line-0008.txt │ │ │ ├── comment-line-0010.txt │ │ │ ├── comment-line-0011.txt │ │ │ ├── comment-line-0013.txt │ │ │ ├── comment-line-0014.txt │ │ │ ├── comment-line-0015.txt │ │ │ ├── comment-line-0016.txt │ │ │ ├── comment-line-0017.txt │ │ │ ├── comment-line-0018.txt │ │ │ ├── comment-line-0019.txt │ │ │ ├── comment-line-0020.txt │ │ │ ├── comment-line-0021.txt │ │ │ ├── comment-line-0022.txt │ │ │ ├── comment-line-0023.txt │ │ │ ├── comment-line-0024.txt │ │ │ ├── comment-line-0025.txt │ │ │ ├── line-0018.txt │ │ │ ├── line-0026.txt │ │ │ ├── comment-line-0005.txt │ │ │ ├── comment-line-0004.txt │ │ │ ├── comment-line-0006.txt │ │ │ ├── comment-line-0009.txt │ │ │ ├── comment-line-0012.txt │ │ │ ├── comment-line-0002.txt │ │ │ ├── comment-line-0003.txt │ │ │ ├── comment-line-0026.txt │ │ │ ├── comment-line-0027.txt │ │ │ ├── comment-page.txt │ │ │ ├── line-0002.txt │ │ │ ├── line-0013.txt │ │ │ ├── line-0016.txt │ │ │ ├── line-0003.txt │ │ │ ├── line-0004.txt │ │ │ ├── line-0007.txt │ │ │ ├── line-0008.txt │ │ │ ├── line-0010.txt │ │ │ ├── line-0012.txt │ │ │ ├── line-0014.txt │ │ │ ├── line-0015.txt │ │ │ ├── line-0005.txt │ │ │ ├── line-0006.txt │ │ │ ├── line-0011.txt │ │ │ ├── line-0017.txt │ │ │ ├── comment-line-0007.txt │ │ │ ├── line-0009.txt │ │ │ ├── line-0001.png │ │ │ ├── line-0002.png │ │ │ ├── line-0003.png │ │ │ ├── line-0004.png │ │ │ ├── line-0005.png │ │ │ ├── line-0006.png │ │ │ ├── line-0007.png │ │ │ ├── line-0008.png │ │ │ ├── line-0009.png │ │ │ ├── line-0010.png │ │ │ ├── line-0011.png │ │ │ ├── line-0012.png │ │ │ ├── line-0013.png │ │ │ ├── line-0014.png │ │ │ ├── line-0015.png │ │ │ ├── line-0016.png │ │ │ ├── line-0017.png │ │ │ ├── line-0018.png │ │ │ ├── line-0019.png │ │ │ ├── line-0020.png │ │ │ ├── line-0021.png │ │ │ ├── line-0022.png │ │ │ ├── line-0023.png │ │ │ ├── line-0024.png │ │ │ ├── line-0025.png │ │ │ ├── line-0026.png │ │ │ └── line-0027.png │ └── fileadmin │ │ ├── 417576986 │ │ ├── max │ │ │ └── 417576986_0013.jpg │ │ ├── thumbs │ │ │ └── 417576986_0013.jpg │ │ └── hocr │ │ │ └── 417576986_0013.hocr │ │ └── digi │ │ └── 445442158 │ │ ├── max │ │ ├── 445442158_0010.jpg │ │ └── 445442158_0126.jpg │ │ └── thumbs │ │ ├── 445442158_0010.jpg │ │ └── 445442158_0126.jpg ├── favicon.ico ├── .htaccess ├── fonts │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.ttf │ ├── fontawesome-webfont.woff │ ├── fontawesome-webfont.woff2 │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ ├── glyphicons-halflings-regular.woff2 │ └── ebgaramond-d272d39ffc142ce90cbc15e70627bf6721558adb.ttf ├── ocropus-gtedit-wrapper.sh ├── ocr-gt-tools.default.yml ├── error-tags.json ├── ocr-gt-tools.css ├── ocr-gt-tools.cgi └── index.html ├── doc ├── screenshots │ ├── screenshot.png │ ├── sidebar-2016-05-04.png │ ├── cheatsheet-2016-05-04.png │ └── screenshot-2016-04-26.png ├── user-scripts │ ├── README.md │ └── scrape-wiki.user.js ├── ocr-gt-tools.apache.yml ├── ocr-gt-tools.docker.yml └── error-tags.json ├── .gitignore ├── ocr-gt-tools.js ├── js ├── views │ ├── history-view.js │ ├── dropzone-view.js │ ├── sidebar.js │ ├── cheatsheet-view.js │ ├── page-view.js │ ├── animation-view.js │ ├── selectbar.js │ ├── toolbar.js │ └── line-view.js ├── model │ ├── history.js │ ├── error-tags.js │ ├── settings.js │ ├── line.js │ ├── cheatsheet.js │ └── page.js ├── utils.js └── app.js ├── dev ├── apache.mk ├── docker.mk ├── debian.mk ├── plackup.mk └── docker-ocr-gt.sh ├── app.psgi ├── Dockerfile ├── test.sh ├── package.json ├── .jscsrc ├── README.md ├── ocr-gt-tools.styl ├── Makefile ├── INSTALL.md ├── API.md ├── ocr-gt-tools.pug └── ocr-gt-tools.cgi /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | -------------------------------------------------------------------------------- /dist/example/.gitignore: -------------------------------------------------------------------------------- 1 | .pynative 2 | ocr-corrections/digi/445442158 3 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0001.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0019.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0020.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0021.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0022.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0023.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0024.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0025.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0027.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0001.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0008.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0010.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0011.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0013.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0014.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0015.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0016.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0017.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0018.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0019.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0020.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0021.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0022.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0023.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0024.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0025.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0018.txt: -------------------------------------------------------------------------------- 1 | nes. 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0026.txt: -------------------------------------------------------------------------------- 1 | a iij 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0005.txt: -------------------------------------------------------------------------------- 1 | dqdwq 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0004.txt: -------------------------------------------------------------------------------- 1 | #text-italic 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0006.txt: -------------------------------------------------------------------------------- 1 | #text-italic 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0009.txt: -------------------------------------------------------------------------------- 1 | #text-italic 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0012.txt: -------------------------------------------------------------------------------- 1 | #text-italic 2 | -------------------------------------------------------------------------------- /dist/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/favicon.ico -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0002.txt: -------------------------------------------------------------------------------- 1 | #wrong-image-section 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0003.txt: -------------------------------------------------------------------------------- 1 | #wrong-image-section 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0026.txt: -------------------------------------------------------------------------------- 1 | oben abgeschnitten 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0027.txt: -------------------------------------------------------------------------------- 1 | #wrong-image-section 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-page.txt: -------------------------------------------------------------------------------- 1 | Unvollst\xE4ndig erfasst 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0002.txt: -------------------------------------------------------------------------------- 1 | distinguer les principes des 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0013.txt: -------------------------------------------------------------------------------- 1 | cours on marche à tâtons, on 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0016.txt: -------------------------------------------------------------------------------- 1 | venu, on ne forme que des 2 | -------------------------------------------------------------------------------- /dist/.htaccess: -------------------------------------------------------------------------------- 1 | 2 | Order allow,deny 3 | Deny from all 4 | 5 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0003.txt: -------------------------------------------------------------------------------- 1 | consequences & les regles des 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0004.txt: -------------------------------------------------------------------------------- 1 | exceptions; & c'est ce que fait 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0007.txt: -------------------------------------------------------------------------------- 1 | je desire qu'il y en ait en tou- 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0008.txt: -------------------------------------------------------------------------------- 1 | tes les matieres qu'il importe 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0010.txt: -------------------------------------------------------------------------------- 1 | porté à composer le Catechis- 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0012.txt: -------------------------------------------------------------------------------- 1 | methode des études. Sans ce se- 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0014.txt: -------------------------------------------------------------------------------- 1 | commence par de petits détail, 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0015.txt: -------------------------------------------------------------------------------- 1 | on suit l'autorité du premier 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0005.txt: -------------------------------------------------------------------------------- 1 | une institution. Il y a long temts 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0006.txt: -------------------------------------------------------------------------------- 1 | que j'en voi la ncecessité &amp; que 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0011.txt: -------------------------------------------------------------------------------- 1 | me historique &amp; le traité de la 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0017.txt: -------------------------------------------------------------------------------- 1 | doutes &amp; des optinions incertai- 2 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/comment-line-0007.txt: -------------------------------------------------------------------------------- 1 | #text-italic 2 | #wrong-image-section 3 | -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0009.txt: -------------------------------------------------------------------------------- 1 | de favoir. C'est aussi ce qui m'a<br class=""> 2 | -------------------------------------------------------------------------------- /doc/screenshots/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/doc/screenshots/screenshot.png -------------------------------------------------------------------------------- /dist/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /dist/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /dist/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /dist/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /doc/screenshots/sidebar-2016-05-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/doc/screenshots/sidebar-2016-05-04.png -------------------------------------------------------------------------------- /doc/screenshots/cheatsheet-2016-05-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/doc/screenshots/cheatsheet-2016-05-04.png -------------------------------------------------------------------------------- /doc/screenshots/screenshot-2016-04-26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/doc/screenshots/screenshot-2016-04-26.png -------------------------------------------------------------------------------- /dist/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /dist/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /dist/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/fonts/*.woff 2 | /dist/fonts/*.ttf 3 | /dist/log 4 | /dist/ocr-gt-tools.yml 5 | /dist/vendor 6 | /node_modules 7 | npm-debug.log 8 | -------------------------------------------------------------------------------- /dist/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /dist/example/fileadmin/417576986/max/417576986_0013.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/417576986/max/417576986_0013.jpg -------------------------------------------------------------------------------- /dist/example/fileadmin/417576986/thumbs/417576986_0013.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/417576986/thumbs/417576986_0013.jpg -------------------------------------------------------------------------------- /dist/example/fileadmin/digi/445442158/max/445442158_0010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/digi/445442158/max/445442158_0010.jpg -------------------------------------------------------------------------------- /dist/example/fileadmin/digi/445442158/max/445442158_0126.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/digi/445442158/max/445442158_0126.jpg -------------------------------------------------------------------------------- /dist/example/fileadmin/digi/445442158/thumbs/445442158_0010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/digi/445442158/thumbs/445442158_0010.jpg -------------------------------------------------------------------------------- /dist/example/fileadmin/digi/445442158/thumbs/445442158_0126.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/fileadmin/digi/445442158/thumbs/445442158_0126.jpg -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0001.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0002.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0003.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0004.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0005.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0006.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0007.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0008.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0009.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0010.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0011.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0012.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0013.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0014.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0015.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0016.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0017.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0018.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0019.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0020.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0021.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0022.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0023.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0024.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0025.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0026.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0026.png -------------------------------------------------------------------------------- /dist/example/ocr-corrections/digi/417576986/gt/0013/line-0027.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/example/ocr-corrections/digi/417576986/gt/0013/line-0027.png -------------------------------------------------------------------------------- /dist/fonts/ebgaramond-d272d39ffc142ce90cbc15e70627bf6721558adb.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UB-Mannheim/ocr-gt-tools/HEAD/dist/fonts/ebgaramond-d272d39ffc142ce90cbc15e70627bf6721558adb.ttf -------------------------------------------------------------------------------- /doc/user-scripts/README.md: -------------------------------------------------------------------------------- 1 | # User scripts 2 | 3 | Requires Greasemonkey (Firefox) or Tampermonkey (Chrome). 4 | 5 | ## scrape-wiki.user.js 6 | 7 | Turns the wiki page where we collect the files into a JSON list. 8 | -------------------------------------------------------------------------------- /ocr-gt-tools.js: -------------------------------------------------------------------------------- 1 | // Name: ocr-gt-tools.js 2 | 3 | $(function onPageLoaded() { 4 | var app = window.app = new App(); 5 | app.on('app:initialized', function onInit() { 6 | console.info("Initialized ocr-gt app."); 7 | }); 8 | app.init(); 9 | }); 10 | -------------------------------------------------------------------------------- /doc/ocr-gt-tools.apache.yml: -------------------------------------------------------------------------------- 1 | defaults: 2 | path-prefix: '/var/www/html' 3 | correction-root: '/ocr-corrections' 4 | images-root: '/fileadmin' 5 | hostname: 'http://localhost' 6 | url_prefix: 'http://localhost' 7 | 8 | logging: 9 | stderr: false 10 | -------------------------------------------------------------------------------- /doc/ocr-gt-tools.docker.yml: -------------------------------------------------------------------------------- 1 | defaults: 2 | path-prefix: '/data' 3 | correction-root: '/ocr-corrections' 4 | images-root: '/fileadmin' 5 | hostname: 'http://localhost:8888' 6 | url_prefix: 'http://localhost:8888' 7 | 8 | logging: 9 | stderr: true 10 | -------------------------------------------------------------------------------- /js/views/history-view.js: -------------------------------------------------------------------------------- 1 | function HistoryView(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | HistoryView.prototype.render = function() { 6 | this.$el.find("tbody").empty(); 7 | for (let i = 0; i < this.model.items.length ; i++) { 8 | this.$el.find("tbody").append(this.tpl(this.model.items[i])); 9 | } 10 | }; 11 | 12 | -------------------------------------------------------------------------------- /js/views/dropzone-view.js: -------------------------------------------------------------------------------- 1 | function Dropzone(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | Dropzone.prototype.render = function() { 6 | var self = this; 7 | 8 | $("#load-image button").on('click', function() { 9 | window.location.hash = '#' + $("#load-image input").val(); 10 | }); 11 | 12 | window.app.on('app:loading', function hideDropzone() { self.$el.addClass('hidden'); }); 13 | 14 | }; 15 | -------------------------------------------------------------------------------- /js/model/history.js: -------------------------------------------------------------------------------- 1 | function History() { } 2 | History.prototype.url = 'ocr-gt-tools.cgi?action=history&mine=true'; 3 | History.prototype.load = function(cb) { 4 | var self = this; 5 | $.ajax({ 6 | url: this.url, 7 | dataType: "json", 8 | error: cb, 9 | success: function(data) { 10 | notie.alert(1, "History geladen", 1); 11 | self.items = data; 12 | cb(null, self); 13 | }, 14 | }); 15 | }; 16 | 17 | 18 | -------------------------------------------------------------------------------- /js/views/sidebar.js: -------------------------------------------------------------------------------- 1 | function Sidebar(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | 6 | Sidebar.prototype.render = function renderSidebar() { 7 | this.$el.empty().html(window.app.templates.rightSidebar(this.model)); 8 | 9 | var self = this; 10 | this.$el.on('input', function() { 11 | self.model['page-comment'] = self.$el.find('textarea').val(); 12 | window.app.emit('app:changed'); 13 | }); 14 | }; 15 | -------------------------------------------------------------------------------- /dev/apache.mk: -------------------------------------------------------------------------------- 1 | .PHONY: deploy 2 | 3 | APACHE_USER = www-data 4 | APACHE_GROUP = www-data 5 | APACHE_DIR = /var/www/html 6 | APACHE_BASEURL = ocr-gt 7 | SUDO = sudo 8 | 9 | # 10 | # Deploy on apache 11 | # 12 | 13 | deploy: 14 | $(SUDO) mkdir -p "$(APACHE_DIR)/$(APACHE_BASEURL)" 15 | $(SUDO) cp -r dist/* dist/.htaccess "$(APACHE_DIR)/$(APACHE_BASEURL)" 16 | $(SUDO) chown -R $(APACHE_USER):$(APACHE_GROUP) "$(APACHE_DIR)/$(APACHE_BASEURL)" 17 | $(SUDO) chmod -R u+w "$(APACHE_DIR)/$(APACHE_BASEURL)" 18 | -------------------------------------------------------------------------------- /js/model/error-tags.js: -------------------------------------------------------------------------------- 1 | function ErrorTags() { 2 | this.items = []; 3 | } 4 | ErrorTags.prototype.url = 'error-tags.json'; 5 | ErrorTags.prototype.load = function(cb) { 6 | var self = this; 7 | $.ajax({ 8 | url: this.url, 9 | dataType: "json", 10 | error: cb, 11 | success: function(data) { 12 | self.items = []; 13 | var keys = Object.keys(data); 14 | for (let i = 0; i < keys.length; i++) { 15 | self.items.push(data[keys[i]]); 16 | } 17 | cb(); 18 | }, 19 | }); 20 | }; 21 | -------------------------------------------------------------------------------- /js/model/settings.js: -------------------------------------------------------------------------------- 1 | var defaultSettings = { 2 | zoomInFactor: 1.4, 3 | zoomOutFactor: 0.8, 4 | debug: true, 5 | cgiUrl: 'ocr-gt-tools.cgi', 6 | defaultViews: ['.transcription','img'], 7 | animationTimeout: 5000, 8 | animationsPerRound: 50, 9 | animationInterval: 100, 10 | }; 11 | 12 | function Settings(opts) { 13 | for (let k in defaultSettings) { this[k] = defaultSettings[k]; } 14 | } 15 | 16 | Settings.prototype.load = function loadSettings() { 17 | console.log("NOT IMPLEMENTED"); 18 | }; 19 | 20 | Settings.prototype.save = function loadSettings() { 21 | console.log("NOT IMPLEMENTED"); 22 | }; 23 | 24 | -------------------------------------------------------------------------------- /dev/docker.mk: -------------------------------------------------------------------------------- 1 | .PHONY: docker docker-run docker-run-bash 2 | 3 | DOCKER_IMAGE = ocr-gt-tools 4 | DOCKER_PORT = 12345 5 | MOUNT_DIR = $(PWD)/dist/example 6 | CONFIG_FILE = $(PWD)/doc/ocr-gt-tools.docker.yml 7 | 8 | # 9 | # Docker related 10 | # 11 | docker: 12 | docker build -t kbai/$(DOCKER_IMAGE) . 13 | 14 | docker-run: docker 15 | @echo Running on http://localhost:$(DOCKER_PORT)/ocr-gt/ 16 | docker run -it --rm -p $(DOCKER_PORT):80 \ 17 | --volume="$(MOUNT_DIR):/data" \ 18 | --volume="$(CONFIG_FILE):/usr/local/apache2/htdocs/ocr-gt/ocr-gt-tools.yml" \ 19 | $(DOCKER_IMAGE) $(DOCKER_COMMAND) 20 | 21 | docker-run-bash: 22 | $(MAKE) docker-run DOCKER_COMMAND=bash 23 | -------------------------------------------------------------------------------- /js/model/line.js: -------------------------------------------------------------------------------- 1 | function Line(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.changed = false; 4 | } 5 | 6 | Line.prototype.getTags = function getTags() { 7 | var ret = {}; 8 | this.comment.replace(/(#[a-z0-9-]+)\s*([^\n#]+)?/g, function(_, tag, desc) { 9 | ret[tag] = desc; 10 | }); 11 | return ret; 12 | }; 13 | 14 | Line.prototype.addTag = function addTag(tag, desc) { 15 | desc = desc || ''; 16 | if (this.getTags().hasOwnProperty(tag)) { 17 | console.info("Already has this tag: " + tag); 18 | return; 19 | } 20 | this.comment = (this.comment.trim() + "\n" + tag.trim() + " " + desc.trim()).trim(); 21 | return true; 22 | }; 23 | -------------------------------------------------------------------------------- /js/model/cheatsheet.js: -------------------------------------------------------------------------------- 1 | function Cheatsheet() { 2 | this.items = []; 3 | } 4 | Cheatsheet.prototype.url = 'special-chars.json'; 5 | Cheatsheet.prototype.load = function(cb) { 6 | var self = this; 7 | $.ajax({ 8 | url: this.url, 9 | dataType: "json", 10 | error: cb, 11 | success: function(data) { 12 | self.items = []; 13 | var keys = Object.keys(data); 14 | for (let i = 0; i < keys.length; i++) { 15 | var cheatsheetEntry = data[keys[i]]; 16 | // console.log(cheatsheetEntry.id, cheatsheetEntry.recognition); 17 | self.items.push(cheatsheetEntry); 18 | } 19 | cb(); 20 | }, 21 | }); 22 | }; 23 | 24 | -------------------------------------------------------------------------------- /dist/ocropus-gtedit-wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | ocrBaseDir="$(dirname "$(readlink -f "$0")")" 6 | 7 | hocr="$1" 8 | imageDir="$2" 9 | correctionDir="$3" 10 | 11 | if [[ ! -e "$hocr" ]];then 12 | echo "hocr '$hocr' does not exist" 13 | exit 1 14 | fi 15 | if [[ ! -d "$imageDir" ]];then 16 | echo "imageDir '$imageDir' is not a directory" 17 | exit 1 18 | fi 19 | if [[ -z "$correctionDir" ]];then 20 | echo "correctionDir not given" 21 | exit 1 22 | fi 23 | 24 | # Make sure the 'correction-dir' exists 25 | mkdir -p "$correctionDir" 26 | cd "$correctionDir" 27 | "$ocrBaseDir/vendor/hocr-tools/hocr-extract-images" -b "$imageDir" -p 'line-%04d.png' "$hocr" 28 | for i in line-*.txt;do 29 | mv "$i" "ocr-$i" 30 | echo "" > "$i" 31 | echo "" > "comment-$i" 32 | done 33 | echo "" > "comment-page.txt" 34 | -------------------------------------------------------------------------------- /dev/debian.mk: -------------------------------------------------------------------------------- 1 | .PHONY: apt-get dev-apt-get 2 | 3 | # Debian packages required for running the backend 4 | DEBIAN_PACKAGES = \ 5 | git \ 6 | libcgi-pm-perl \ 7 | libjson-perl \ 8 | libhash-merge-perl \ 9 | libyaml-perl \ 10 | libipc-run-perl \ 11 | python-fabio \ 12 | python-lxml \ 13 | python-numpy \ 14 | python-scipy \ 15 | python-matplotlib 16 | 17 | # Debian packages required for running the dev-server and rebuild the frontend 18 | DEV_DEBIAN_PACKAGES = \ 19 | cleancss \ 20 | npm \ 21 | libplack-perl \ 22 | curl 23 | 24 | # Install debian packages, non-interactively 25 | APT_GET = apt-get -y 26 | 27 | # 28 | # Dependencies to execute ocropy / hocr-tools in a CGI environment 29 | # 30 | apt-get: 31 | $(APT_GET) install $(DEBIAN_PACKAGES) 32 | 33 | dev-apt-get: 34 | $(APT_GET) install $(DEV_DEBIAN_PACKAGES) 35 | 36 | -------------------------------------------------------------------------------- /app.psgi: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use File::Basename qw(dirname); 5 | use Cwd qw(abs_path); 6 | use Config::IniFiles qw( :all); 7 | 8 | use Plack::App::WrapCGI; 9 | use Plack::Builder; 10 | 11 | my $CGI_SCRIPT = "dist/ocr-gt-tools.cgi"; 12 | print $CGI_SCRIPT; 13 | print "\n"; 14 | my $app = Plack::App::WrapCGI->new( 15 | script => $CGI_SCRIPT, 16 | execute => 1 17 | )->to_app; 18 | builder { 19 | enable( 20 | "Plack::Middleware::Static", 21 | path => qr{^/(fileadmin|ocr-corrections)}, 22 | root => './dist/example/' 23 | ); 24 | enable( 25 | "Plack::Middleware::Static", 26 | path => qr{^/(?!ocr-gt-tools.cgi).*}, 27 | pass_through => 1, 28 | root => './dist' 29 | ); 30 | enable( 31 | "Plack::Middleware::Static", 32 | path => qr{^/(?!ocr-gt-tools.cgi).*}, 33 | root => './dist' 34 | ); 35 | mount "/" => $app; 36 | }; 37 | 38 | # vim: ft=perl : 39 | -------------------------------------------------------------------------------- /dev/plackup.mk: -------------------------------------------------------------------------------- 1 | .PHONY: dev-server dev-browser dist-watch 2 | 3 | # Port of the dev server 4 | PLACKUP_PORT = 9090 5 | 6 | # "Plackup is a command line utility to run PSGI applications from the command line" 7 | # https://en.wikipedia.org/wiki/Plack_(software) 8 | PLACKUP_OPTS = --port $(PLACKUP_PORT) -R 9 | PLACKUP = plackup $(PLACKUP_OPTS) 10 | 11 | # Chokidar is a file system change watcher (think: inotify) 12 | # https://github.com/kimmobrunfeldt/chokidar-cli 13 | CHOKIDAR_OPTS = --verbose --polling --initial --debounce 100 14 | CHOKIDAR = chokidar $(CHOKIDAR_OPTS) 15 | 16 | # The files to watch for changes for to trigger a rebuild 17 | WATCH_FILES = Makefile ocr-gt-tools.* ${PUG_FILES} *.json js/**/*.js js/*.js 18 | 19 | # 20 | # Run the development standalone server on port 9090 21 | # 22 | 23 | dev-server: 24 | echo $(PWD) 25 | $(PLACKUP) dev/app.psgi 26 | 27 | dev-browser: 28 | xdg-open http://localhost:9090/dist/index.html 29 | 30 | # 31 | # Automatically rebuild on file change 32 | # 33 | dist-watch: 34 | $(CHOKIDAR) $(WATCH_FILES) -c 'time $(MAKE) --no-print-directory dist' 35 | 36 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM httpd:2.4 2 | 3 | # Install dependencies 4 | ADD dev/debian.mk debian.mk 5 | RUN apt-get update && apt-get install make && make -f debian.mk apt-get 6 | # Enable CGI in Apache 7 | # Enable .htaccess support 8 | RUN sed -i \ 9 | -e 's/#LoadModule cgid_module/LoadModule cgid_module/' \ 10 | -e 's/^\s*Options.*/\0 ExecCGI/' \ 11 | -e 's/^\s*#AddHandler cgi-script/AddHandler cgi-script/' \ 12 | -e 's/AllowOverride None/AllowOverride All/' \ 13 | /usr/local/apache2/conf/httpd.conf 14 | # Set up a data volume 15 | RUN mkdir /data && chown daemon:www-data /data && ln -s /data/fileadmin /data/ocr-corrections /usr/local/apache2/htdocs/ 16 | VOLUME ["/data"] 17 | ADD dev/apache.mk apache.mk 18 | # Add dist folder 19 | ADD dist dist 20 | # Create configuration 21 | # RUN cat dist/ocr-gt-tools.dev.yml \ 22 | # | sed 's,path-prefix:.*,path-prefix: "/data",' \ 23 | # | sed 's,stderr:.*,stderr: true,' \ 24 | # > dist/ocr-gt-tools.yml 25 | RUN make -f apache.mk \ 26 | SUDO_APACHE="" \ 27 | APACHE_DIR="/usr/local/apache2/htdocs" \ 28 | APACHE_BASEURL="ocr-gt" \ 29 | APACHE_USER="daemon" \ 30 | APACHE_GROUP="www-data" \ 31 | deploy 32 | ADD doc/ocr-gt-tools.docker.yml htdocs/ocr-gt/ocr-gt-tools.yml 33 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PORT=9091 3 | SAMPLE_URL="http://example.org/fileadmin/digi/445442158/thumbs/445442158_0126.jpg" 4 | SAMPLE_THUMB="example/fileadmin/digi/445442158/thumbs/445442158_0126.jpg" 5 | SAMPLE_CORR="example/ocr-corrections/digi/445442158/gt/0126/correction.html" 6 | SAMPLE_COMM="example/ocr-corrections/digi/445442158/gt/0126/anmerkungen.txt" 7 | 8 | # rm request log files, samples, start the server, wait a second, see if it's still running 9 | start_server() { 10 | rm -f dist/log/* $SAMPLE_COMM $SAMPLE_CORR 11 | plackup --port=$PORT app.psgi & SERVER_PID=$! 12 | sleep 1 13 | ps -p "$SERVER_PID" 2>/dev/null 14 | } 15 | 16 | stop_server() { 17 | kill "$SERVER_PID" && true 18 | cat dist/log/ocr-gt-tools.log 19 | } 20 | 21 | # Test history 22 | test_history() { 23 | curl -i "http://localhost:$PORT/ocr-gt-tools.cgi?action=history" 24 | [[ "$(wc -l dist/log/request.log|cut -d' ' -f1)" = "1" ]]; 25 | } 26 | 27 | # Test create 28 | test_create() { 29 | [[ ! -e $SAMPLE_COMM ]]; 30 | [[ ! -e $SAMPLE_CORR ]]; 31 | curl -i "http://localhost:$PORT/ocr-gt-tools.cgi?action=create&imageUrl=$SAMPLE_URL" 32 | [[ -e $SAMPLE_COMM ]]; 33 | [[ -e $SAMPLE_CORR ]]; 34 | } 35 | 36 | 37 | # -x Trace steps 38 | # -e exit on first non-null return value 39 | set -e 40 | 41 | # Stop the server for failing tests 42 | trap stop_server EXIT 43 | 44 | start_server 45 | 46 | test_create 47 | test_history 48 | 49 | stop_server 50 | -------------------------------------------------------------------------------- /js/views/cheatsheet-view.js: -------------------------------------------------------------------------------- 1 | function CheatsheetView(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | // Setup clipboard 5 | new Clipboard('.code'); 6 | } 7 | 8 | CheatsheetView.prototype.applyFilter = function applyFilter() { 9 | var self = this; 10 | $.each(self.model.items, function(id, desc) { 11 | if (self.filter && 12 | self.filter !== "" && 13 | desc.baseLetter.indexOf(self.filter) === -1 && 14 | desc.baseLetter.indexOf(self.filter.toLowerCase()) === -1) { 15 | $("#cheatsheet-" + desc.id).addClass('hidden'); 16 | } else { 17 | $("#cheatsheet-" + desc.id).removeClass('hidden'); 18 | } 19 | }); 20 | }; 21 | 22 | CheatsheetView.prototype.render = function render() { 23 | var self = this; 24 | self.$el.find(".cheatsheet").empty(); 25 | $.each(self.model.items, function(idx, model) { 26 | self.$el.find('.cheatsheet').append(self.tpl(model)); 27 | }); 28 | self.$el.find('button').on('click', function() { 29 | notie.alert(1, "In Zwischenablage kopiert: '" + $(this).attr('data-clipboard-text') + "'", 1); 30 | }); 31 | self.$el.find('input[type="text"]').on('keydown', function(e) { 32 | self.filter = (e.keyCode < 32 || e.ctrlKey || e.altKey) ? null : String.fromCharCode(e.keyCode); 33 | self.applyFilter(); 34 | $(this).val(''); 35 | }); 36 | return self; 37 | }; 38 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ocr-gt-tools", 3 | "version": "1.0.1", 4 | "description": "A web interface for creating ground truth for improving OCR", 5 | "keywords": [ 6 | "ocr", 7 | "gt" 8 | ], 9 | "main": "Gruntfile.js", 10 | "directories": { 11 | "example": "example" 12 | }, 13 | "dependencies": {}, 14 | "devDependencies": { 15 | "async": "^1.5.2", 16 | "bootstrap": "^3.3.6", 17 | "clipboard": "^1.5.10", 18 | "font-awesome": "^4.6.1", 19 | "handlebars": "^4.0.5", 20 | "jquery": ">=3.4.0", 21 | "notie": "^3.2.0", 22 | "reset-css": "^2.0.2011012603", 23 | "chokidar-cli": "^1.2.0", 24 | "clean-css": ">=4.1.11", 25 | "jscs": "^3.0.3", 26 | "pug-cli": "^1.0.0-alpha6", 27 | "stylus": "^0.54.2", 28 | "uglify-js": "^2.6.2", 29 | "webfont-dl": "^0.1.2" 30 | }, 31 | "scripts": { 32 | "test": "echo \"Error: no test specified\" && exit 1" 33 | }, 34 | "repository": { 35 | "type": "git", 36 | "url": "git+https://github.com/UB-Mannheim/ocr-gt-tools.git" 37 | }, 38 | "author": "bfallert, kba", 39 | "contributors": [ 40 | { 41 | "name": "Konstantin Baierer" 42 | }, 43 | { 44 | "name": "Per Broman" 45 | }, 46 | { 47 | "name": "Bernd Fallert" 48 | }, 49 | { 50 | "name": "Stefan Weil" 51 | }, 52 | { 53 | "name": "Philipp Zumstein" 54 | } 55 | ], 56 | "license": "GPL-3.0", 57 | "bugs": { 58 | "url": "https://github.com/UB-Mannheim/ocr-gt-tools/issues" 59 | }, 60 | "homepage": "https://github.com/UB-Mannheim/ocr-gt-tools#readme" 61 | } 62 | -------------------------------------------------------------------------------- /js/views/page-view.js: -------------------------------------------------------------------------------- 1 | function PageView(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | this.lineViews = []; 5 | } 6 | 7 | /** 8 | * Sort the rows by image width 9 | * 10 | * @param {number} order Sort descending (-1) or ascending (1, default) 11 | */ 12 | PageView.prototype.sortRowsByWidth = function sortRowsByWidth(order) { 13 | var order = order || 1; 14 | this.$el.html( 15 | this.$el.find(".row").sort(function(a, b) { 16 | var aWidth = Utils.getImageWidth(a); 17 | var bWidth = Utils.getImageWidth(b); 18 | return (aWidth - bWidth) * order; 19 | }).detach() 20 | ); 21 | }; 22 | 23 | /** 24 | * Sort the rows by line number 25 | * 26 | * @param {number} order Sort descending (-1) or ascending (1, default) 27 | */ 28 | PageView.prototype.sortRowsByLine = function sortRowsByLine(order) { 29 | var order = order || 1; 30 | this.$el.html( 31 | this.$el.find(".row").sort(function(a, b) { 32 | var aLine = $(a).attr('id').replace(/[^\d]/g, ''); 33 | var bLine = $(b).attr('id').replace(/[^\d]/g, ''); 34 | return (aLine - bLine) * order; 35 | }).detach() 36 | ); 37 | }; 38 | 39 | PageView.prototype.render = function() { 40 | this.$el.find('*').off().empty(); 41 | // render lines 42 | for (let i = 0; i < this.model.lines.length; i++) { 43 | var lineModel = this.model.lines[i]; 44 | var lineEl = $(window.app.templates.lineContainer(lineModel)).appendTo(this.$el); 45 | var lineView = new LineView({"$el": lineEl, "model": lineModel}); 46 | lineView.render(); 47 | this.lineViews.push(lineView); 48 | } 49 | }; 50 | -------------------------------------------------------------------------------- /dev/docker-ocr-gt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DOCKER_PORT=8888 4 | 5 | usage () { 6 | echo "Usage: $(basename "$0") [options...] [image-dir [correction-dir]] 7 | 8 | Options: 9 | -h --help Show this help 10 | -p --port Use this local port. Default: $DOCKER_PORT 11 | 12 | image-dir Directory with images (Default: ./dist/example/fileadmin) 13 | correction-dir Directory with corrections (Default: ./dist/example/ocr-corrections) 14 | " 15 | } 16 | 17 | while [[ "$1" = -* ]];do 18 | case "$1" in 19 | -h|--help) usage; exit ;; 20 | -p|--port) DOCKER_PORT="$2"; shift ;; 21 | esac 22 | shift 23 | done 24 | 25 | # path to the scanned images on the **host machine** 26 | IMAGE_PATH=$1 27 | IMAGE_PATH=${1:-$PWD/dist/example/fileadmin} 28 | # Path to the corrections on the **host macine** 29 | CORRECTIONS_PATH=$2 30 | CORRECTIONS_PATH=${CORRECTIONS_PATH:-$PWD/dist/example/ocr-corrections} 31 | # Name of the image file 32 | DOCKER_IMAGE="kbai/ocr-gt-tools" 33 | # Name of the container 34 | DOCKER_APP="ocr-gt-app" 35 | 36 | declare -a DOCKER_RUNARGS 37 | # Map ports 38 | DOCKER_RUNARGS+=("-p" "${DOCKER_PORT}:80") 39 | # Interactive and with terminal 40 | # DOCKER_RUNARGS+=("-it") 41 | # Delete container after run 42 | DOCKER_RUNARGS+=("--rm") 43 | # Ignore SIGWINCH 44 | DOCKER_RUNARGS+=("--sig-proxy=false") 45 | # Set application name 46 | DOCKER_RUNARGS+=("--name" "$DOCKER_APP") 47 | # Mount 48 | DOCKER_RUNARGS+=("--volume=${IMAGE_PATH}:/data/fileadmin") 49 | # Mount 50 | DOCKER_RUNARGS+=("--volume=${CORRECTIONS_PATH}:/data/ocr-corrections") 51 | # DOCKER_RUNARGS+=("--privileged=true") 52 | 53 | # Run docker 54 | echo docker run "${DOCKER_RUNARGS[@]}" "$DOCKER_IMAGE" 55 | docker run "${DOCKER_RUNARGS[@]}" "$DOCKER_IMAGE" 56 | -------------------------------------------------------------------------------- /js/views/animation-view.js: -------------------------------------------------------------------------------- 1 | function WaitingAnimation(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | WaitingAnimation.prototype.render = function() { 6 | window.app.on('app:loading', this.start.bind(this)); 7 | window.app.on('app:loaded', this.stop.bind(this)); 8 | window.app.on('app:ajaxError', this.stop.bind(this)); 9 | this.glyphs = []; 10 | for (let i = 0; i < this.model.items.length; i++) { 11 | for (let j = 0 ; j < this.model.items[i].sample.length; j++) { 12 | this.glyphs.push(this.model.items[i].sample[j]); 13 | } 14 | } 15 | }; 16 | 17 | WaitingAnimation.prototype.stop = function stopWaitingAnimation() { 18 | this.$el.addClass('hidden'); 19 | this.$el.empty(); 20 | clearInterval(this.animationId); 21 | clearTimeout(this.timeoutId); 22 | }; 23 | 24 | WaitingAnimation.prototype.start = function startWaitingAnimation() { 25 | var self = this; 26 | this.$el.removeClass('hidden'); 27 | this.animationId = setInterval(function() { 28 | var perRound = window.app.settings.animationsPerRound; 29 | while (perRound-- > 0) { 30 | $(self.glyphs[parseInt(Math.random() * self.glyphs.length)]) 31 | .css('top', parseInt(Math.random() * 100) + "vh") 32 | .css('left', parseInt(Math.random() * 100) + "vw") 33 | .appendTo(self.$el) ; 34 | } 35 | }, window.app.settings.animationInterval); 36 | this.timeoutId = setTimeout(function timeoutAnimation() { 37 | console.error("Animation ran too long, stopping it"); 38 | notie.alert(3, "Loading took more than " + 39 | (window.app.settings.animationTimeout / 1000) + 40 | " seconds. Please see the console for possible errors"); 41 | clearInterval(self.animationId); 42 | }, window.app.settings.animationTimeout); 43 | }; 44 | -------------------------------------------------------------------------------- /dist/ocr-gt-tools.default.yml: -------------------------------------------------------------------------------- 1 | defaults: 2 | path-prefix: '/example' 3 | correction-root: '/ocr-corrections' 4 | images-root: '/fileadmin' 5 | debuglog: '/tmp/ocr-gt.log' 6 | hostname: 'http://localhost:9090' 7 | url_prefix: 'http://localhost:9090' 8 | page-comment-filename: 'comment-page.txt' 9 | 10 | logging: 11 | logdir: '/log' 12 | stderr: true 13 | 14 | pattern: 15 | - | 16 | ^ 17 | (?https?://[^/]+) 18 | /fileadmin 19 | /(?[^/]+) 20 | /(?[^/]+) 21 | /[^\/]+ 22 | /[^_]+_(?\d+) 23 | .* 24 | $ 25 | - | 26 | /(?[^/]+) 27 | /(?[^/]+) 28 | /gt 29 | /(?[^/]+) 30 | /[^/]+ 31 | $ 32 | - | 33 | /(?[^/]+) 34 | /(?[^/]+) 35 | /(?[^/]+) 36 | $ 37 | 38 | 39 | # in this order 40 | template: 41 | url: 42 | thumb-url: '///thumbs/_.jpg' 43 | hires-url: '///max/_.jpg' 44 | hocr-url: '///hocr/_.hocr' 45 | line-image-url: '///gt//line-%04d.png' 46 | landing-page-url: 'https://digi.bib.uni-mannheim.de/en/suchergebnis/?tx_dlf[query]=' 47 | path: 48 | correction-dir: '///gt/' 49 | hocr-file: '///hocr/_.hocr' 50 | hires-dir: '///max' 51 | hires-file: '///max/_.jpg' 52 | command: 53 | find-corrections-for-work: 54 | - 'find' 55 | - '//' 56 | - '-type' 57 | - 'f' 58 | - '-name' 59 | - '' 60 | extract-images: 61 | - '/ocropus-gtedit-wrapper.sh' 62 | - '' 63 | - '' 64 | - '' 65 | - '' 66 | -------------------------------------------------------------------------------- /js/model/page.js: -------------------------------------------------------------------------------- 1 | function Page(urlOrOpts) { 2 | var self = this; 3 | self.lines = []; 4 | if (typeof urlOrOpts === 'string') { 5 | self.imageUrl = urlOrOpts; 6 | } else { 7 | self.imageUrl = urlOrOpts.imageUrl; 8 | for (let key in urlOrOpts) { self[key] = urlOrOpts[key]; } 9 | } 10 | self.changed = false; 11 | window.app.on('app:changed', function setChanged() { self.changed = true; }); 12 | window.app.on('app:saved', function setUnChanged() { self.changed = false; }); 13 | } 14 | 15 | Page.prototype.toJSON = function() { 16 | var ret = { 17 | 'line-comments': [], 18 | 'line-transcriptions': [], 19 | 'page-comment': this['page-comment'], 20 | 'ids': this.ids, 21 | 'url': this.url, 22 | }; 23 | for (let i = 0; i < this.lines.length ; i++) { 24 | ret['line-comments'][i] = this.lines[i].comment.trim(); 25 | ret['line-transcriptions'][i] = this.lines[i].transcription.trim(); 26 | } 27 | return ret; 28 | }; 29 | 30 | Page.prototype.save = function savePage(cb) { 31 | $.ajax({ 32 | type: 'POST', 33 | url: 'ocr-gt-tools.cgi?action=save', 34 | contentType: 'application/json; charset=UTF-8', 35 | data: JSON.stringify(this.toJSON()), 36 | success: function() { cb(); }, 37 | error: cb 38 | }); 39 | }; 40 | 41 | Page.prototype.load = function(cb) { 42 | var self = this; 43 | $.ajax({ 44 | type: 'GET', 45 | url: 'ocr-gt-tools.cgi?action=get&imageUrl=' + this.imageUrl, 46 | error: cb, 47 | success: function(res) { 48 | for (let key in res) { self[key] = res[key]; } 49 | // Sort 'pages' 50 | self.pages = self.pages.sort(function(a, b) { return parseInt(a.ids.page) - parseInt(b.ids.page); }); 51 | // Create line models 52 | for (let i = 0; i < self['line-transcriptions'].length; i++) { 53 | self.lines.push(new Line({ 54 | id: i, 55 | transcription: self['line-transcriptions'][i], 56 | comment: self['line-comments'][i], 57 | image: self['line-images'][i], 58 | })); 59 | } 60 | cb(); 61 | }, 62 | }); 63 | }; 64 | -------------------------------------------------------------------------------- /js/utils.js: -------------------------------------------------------------------------------- 1 | /********************/ 2 | /* Utiliy functions */ 3 | /********************/ 4 | 5 | var Utils = {}; 6 | 7 | /** 8 | * Scale the 'height' attribute of an element by a factor, 9 | * effectively zooming images. 10 | * 11 | * @param {DOMElement} el the element to scale 12 | * @param {float} factor the scale factor 13 | */ 14 | Utils.scaleHeight = function scaleHeight(el, factor) { 15 | var curHeight = el.getAttribute('height') || el.offsetHeight; 16 | if (!el.hasAttribute('data-original-height')) { 17 | el.setAttribute('data-original-height', curHeight); 18 | } 19 | var originalHeight = el.getAttribute('data-original-height'); 20 | var newHeight = factor == 1 ? originalHeight : curHeight * factor; 21 | el.setAttribute('height', newHeight); 22 | }; 23 | 24 | /** 25 | * Get the width of the first image in an element. 26 | */ 27 | Utils.getImageWidth = function getImageWidth(el) { 28 | if (el.tagName !== 'IMG') { 29 | el = $(el).find('img')[0]; 30 | if (!el) { 31 | return -1; 32 | } 33 | } 34 | return el.clientWidth; 35 | }; 36 | 37 | Utils.encodeForBrowser = function encodeForBrowser(str) { 38 | if (typeof str === 'undefined') { 39 | return ''; 40 | } 41 | return str 42 | .replace(/&/g, '&') 43 | .replace(/>/g, '>') 44 | .replace(/</g, '<') 45 | .replace(/^\n*/, '') 46 | .replace(/\n*$/, '') 47 | .replace(/\n/g, '
'); 48 | }; 49 | 50 | Utils.getUrlFromDragEvent = function getUrlFromDragEvent(e) { 51 | var elem = e.originalEvent.dataTransfer.getData('text/html'); 52 | var url = $(elem).find('img').addBack('img').attr('src'); 53 | if (!url) { 54 | url = $(elem).find('a').addBack('a').attr('href'); 55 | } 56 | if (!url) { 57 | url = e.originalEvent.dataTransfer.getData('text/plain'); 58 | } 59 | return url; 60 | }; 61 | 62 | /** 63 | * Compile the Handlebars templates 64 | */ 65 | Utils.compileTemplates = function compileTemplates() { 66 | var templates = {}; 67 | $("*[id^='tpl-']").each(function() { 68 | var $this = $(this); 69 | var tplId = $this.attr('id').replace(/^tpl-/, ''); 70 | templates[tplId] = Handlebars.compile($this.html()); 71 | }); 72 | return templates; 73 | }; 74 | 75 | /** 76 | * Shrink/expand a textarea to fit its contents 77 | */ 78 | Utils.fitHeight = function fitHeight(selector) { 79 | $(selector).each(function() { 80 | $(this) 81 | .attr('rows', 1) // Must be one for single-line textareas 82 | .css({'height': 'auto', 'overflow-y': 'hidden', 'resize': 'none'}) 83 | .height(this.scrollHeight); 84 | }); 85 | }; 86 | -------------------------------------------------------------------------------- /js/views/selectbar.js: -------------------------------------------------------------------------------- 1 | function Selectbar(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | 6 | Selectbar.prototype.enter = function enter() { 7 | this.selectLines('unselect'); 8 | window.app.selectMode = true; 9 | this.$el.removeClass('hidden'); 10 | window.app.emit('app:enter-select-mode'); 11 | }; 12 | 13 | Selectbar.prototype.exit = function exit() { 14 | this.selectLines('unselect'); 15 | window.app.selectMode = false; 16 | this.$el.addClass('hidden'); 17 | window.app.emit('app:exit-select-mode'); 18 | }; 19 | 20 | Selectbar.prototype.toggle = function toggle() { 21 | this[window.app.selectMode ? 'exit' : 'enter'](); 22 | }; 23 | 24 | Selectbar.prototype.getSelection = function getSelection() { 25 | var ret = []; 26 | for (let i = 0; i < app.pageView.lineViews.length; i++) { 27 | var lineView = app.pageView.lineViews[i]; 28 | if (lineView.selected) ret.push(lineView); 29 | } 30 | return ret; 31 | }; 32 | 33 | Selectbar.prototype.selectLines = function selectLines(action, ids) { 34 | var app = window.app; 35 | // If no id was passed, use all ids 36 | if (!ids) { 37 | ids = []; 38 | for (let i = 0; i < app.currentPage.lines.length; i++) { 39 | ids.push(i); 40 | } 41 | } 42 | for (let i = 0; i < ids.length; i++) { 43 | var lineView = app.pageView.lineViews[ids[i]]; 44 | lineView.selected = (action === 'select' ? true : action === 'unselect' ? false : !lineView.selected); 45 | lineView.renderCheckbox(); 46 | } 47 | }; 48 | 49 | Selectbar.prototype.render = function renderSelectBar() { 50 | var self = this; 51 | var app = window.app; 52 | 53 | this.$el.find('.select-all').on('click', function selectAll() { 54 | self.selectLines('select'); 55 | }); 56 | this.$el.find('.select-none').on('click', function selectNone() { 57 | self.selectLines('unselect'); 58 | }); 59 | this.$el.find('.select-toggle').on('click', function selectToggle() { 60 | self.selectLines('toggle'); 61 | }); 62 | this.$el.find('*[data-tag]').on('click', function addTagMultiple() { 63 | var tag = $(this).attr('data-tag'); 64 | var selection = self.getSelection(); 65 | for (let i = 0; i < selection.length; i++) { 66 | selection[i].addTag(tag); 67 | } 68 | window.app.emit('app:changed'); 69 | }); 70 | 71 | // app.on('app:select-line', this.selectLines.bind(self)); 72 | var toggleBound = this.toggle.bind(this); 73 | app.on('app:loading', function() { $(".toggle-select-mode").off('click', toggleBound); }); 74 | app.on('app:loaded', function() { $(".toggle-select-mode").on('click', toggleBound); }); 75 | }; 76 | -------------------------------------------------------------------------------- /js/views/toolbar.js: -------------------------------------------------------------------------------- 1 | function Toolbar(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.$el = $(this.el); 4 | } 5 | /** 6 | * Increase image zoom by Settings.zoomInFactor 7 | */ 8 | Toolbar.prototype.zoomIn = function(e) { 9 | e.stopPropagation(); 10 | $('#file-correction img').each(function() { 11 | Utils.scaleHeight(this, window.app.settings.zoomInFactor); 12 | }); 13 | }; 14 | 15 | /** 16 | * Decrease image zoom by Settings.zoomOutFactor 17 | */ 18 | Toolbar.prototype.zoomOut = function zoomOut(e) { 19 | e.stopPropagation(); 20 | $('#file-correction img').each(function() { 21 | Utils.scaleHeight(this, window.app.settings.zoomOutFactor); 22 | }); 23 | }; 24 | 25 | /** 26 | * Reset all images to their original size 27 | */ 28 | Toolbar.prototype.zoomReset = function zoomReset(e) { 29 | e.stopPropagation(); 30 | $('#file-correction img').each(function() { 31 | Utils.scaleHeight(this, 1); 32 | }); 33 | }; 34 | 35 | 36 | Toolbar.prototype.render = function() { 37 | var self = this; 38 | var app = window.app; 39 | 40 | // Save current page 41 | $("#save_button").on("click", app.savePage.bind(app)); 42 | 43 | // Open history modal 44 | $('button[data-target="#history-modal"]').on('click', app.showHistory.bind(app)); 45 | 46 | // Handle zooming 47 | $("#zoom-in").on("click", this.zoomIn); 48 | $("#zoom-out").on("click", this.zoomOut); 49 | $("#zoom-reset").on("click", this.zoomReset); 50 | 51 | // Handle view filtering by selectors 52 | this.$el.find(".set-view").on('click', function reduceView() { 53 | $(".view-hidden").removeClass("view-hidden"); 54 | $("ul.list-group > *").addClass('view-hidden'); 55 | $("ul.list-group > " + $(this).attr('data-target')).removeClass('view-hidden'); 56 | app.emit('app:filter-view'); 57 | }); 58 | 59 | // Handle sorting 60 | $("#sort-line").on('click', function() { app.pageView.sortRowsByLine(1); }); 61 | $("#sort-line-desc").on('click', function() { app.pageView.sortRowsByLine(-1); }); 62 | $("#sort-width").on('click', function() { app.pageView.sortRowsByWidth(1); }); 63 | $("#sort-width-desc").on('click', function() { app.pageView.sortRowsByWidth(-1); }); 64 | 65 | // 66 | // React to events 67 | // 68 | app.on('app:changed', function enableSaveButton() { 69 | $("#save_button").removeClass("disabled"); 70 | }); 71 | 72 | app.on('app:loaded', function disableSaveButton() { 73 | self.$el.find(".disabled").removeClass('disabled'); 74 | $("#save_button").addClass("disabled"); 75 | }); 76 | 77 | app.on('app:saving', function startSaveSpinner() { 78 | $("#wait_save").addClass("wait").removeClass("hidden"); 79 | $("#disk").addClass("hidden"); 80 | }); 81 | 82 | app.on('app:saved', function stopSaveSpinner() { 83 | $("#wait_save").removeClass("wait").addClass("hidden"); 84 | $("#disk").removeClass("hidden"); 85 | $("#save_button").addClass("disabled"); 86 | }); 87 | }; 88 | -------------------------------------------------------------------------------- /.jscsrc: -------------------------------------------------------------------------------- 1 | { 2 | "requireCurlyBraces": [ 3 | "for", 4 | "while", 5 | "do", 6 | "try", 7 | "catch" 8 | ], 9 | "requireOperatorBeforeLineBreak": true, 10 | "requireCamelCaseOrUpperCaseIdentifiers": { 11 | "allowedPrefixes": ["opt_", "GM_"], 12 | "allExcept": ["var_args"] 13 | }, 14 | "maximumLineLength": { 15 | "value": 200, 16 | "allExcept": ["comments", "regex"] 17 | }, 18 | "validateIndentation": 4, 19 | "validateQuoteMarks": false, 20 | 21 | "disallowMultipleLineStrings": true, 22 | "disallowMixedSpacesAndTabs": true, 23 | "disallowTrailingWhitespace": true, 24 | "disallowSpaceAfterPrefixUnaryOperators": true, 25 | "disallowMultipleVarDecl": true, 26 | "disallowKeywordsOnNewLine": ["else"], 27 | 28 | "requireSpaceAfterKeywords": [ 29 | "if", 30 | "else", 31 | "for", 32 | "while", 33 | "do", 34 | "switch", 35 | "return", 36 | "try", 37 | "catch" 38 | ], 39 | "requireSpaceBeforeBinaryOperators": [ 40 | "=", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", ">>>=", 41 | "&=", "|=", "^=", "+=", 42 | 43 | "+", "-", "*", "/", "%", "<<", ">>", ">>>", "&", 44 | "|", "^", "&&", "||", "===", "==", ">=", 45 | "<=", "<", ">", "!=", "!==" 46 | ], 47 | "requireSpaceAfterBinaryOperators": true, 48 | "requireSpacesInConditionalExpression": true, 49 | "requireSpaceBeforeBlockStatements": true, 50 | "requireSpacesInForStatement": true, 51 | "requireLineFeedAtFileEnd": true, 52 | "requireSpacesInFunctionExpression": { 53 | "beforeOpeningCurlyBrace": true 54 | }, 55 | "disallowSpacesInAnonymousFunctionExpression": { 56 | "beforeOpeningRoundBrace": true 57 | }, 58 | "disallowSpacesInsideObjectBrackets": "all", 59 | "disallowSpacesInsideArrayBrackets": "all", 60 | "disallowSpacesInsideParentheses": true, 61 | 62 | "disallowMultipleLineBreaks": false, 63 | "disallowNewlineBeforeBlockStatements": true, 64 | "disallowKeywords": ["with"], 65 | "disallowSpacesInFunctionExpression": { 66 | "beforeOpeningRoundBrace": true 67 | }, 68 | "disallowSpacesInFunctionDeclaration": { 69 | "beforeOpeningRoundBrace": true 70 | }, 71 | "disallowSpacesInCallExpression": true, 72 | "disallowSpaceAfterObjectKeys": true, 73 | "requireSpaceBeforeObjectValues": true, 74 | "requireCapitalizedConstructors": true, 75 | "requireDotNotation": true, 76 | "requireSemicolons": true, 77 | "validateParameterSeparator": ", ", 78 | "requireSpaceBetweenArguments": true, 79 | 80 | "jsDoc": { 81 | "checkAnnotations": "closurecompiler", 82 | "checkParamNames": true, 83 | "requireParamTypes": true, 84 | "checkRedundantParams": true, 85 | "checkReturnTypes": true, 86 | "checkRedundantReturns": true, 87 | "requireReturnTypes": true, 88 | "checkTypes": true, 89 | "checkRedundantAccess": true, 90 | "requireNewlineAfterDescription": true 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /js/views/line-view.js: -------------------------------------------------------------------------------- 1 | function LineView(opts) { 2 | for (let key in opts) { this[key] = opts[key]; } 3 | this.tpl = window.app.templates.line; 4 | window.app.once('app:loaded', this.render.bind(this)); 5 | window.app.on('app:filter-view', this.renderToggler.bind(this)); 6 | window.app.on('app:enter-select-mode', this.renderCheckbox.bind(this)); 7 | window.app.on('app:exit-select-mode', this.renderCheckbox.bind(this)); 8 | } 9 | 10 | /* 11 | * Render (or do not render) the checkbox for multi-select mode 12 | */ 13 | LineView.prototype.renderCheckbox = function renderComment() { 14 | var self = this; 15 | // Selectionmode 16 | this.$el.find('.select-col').toggleClass('hidden', !window.app.selectMode); 17 | this.$el.find('.button-col').toggleClass('hidden', window.app.selectMode); 18 | this.$el.toggleClass('selected', this.selected); 19 | this.$el.find(':checkbox').prop('checked', this.selected); 20 | }; 21 | 22 | LineView.prototype.renderTextarea = function renderTextarea() { 23 | // fit height 24 | Utils.fitHeight(this.$el.find('textarea')); 25 | }; 26 | 27 | /** 28 | * Update the color of the comment toggle button depending on whether line has 29 | * comments or not. 30 | */ 31 | LineView.prototype.renderToggler = function renderToggler() { 32 | var lineComment = this.$el.find('.line-comment'); 33 | var isVisible = lineComment.is(':visible'); 34 | var hasComment = this.model.comment.length > 0; 35 | var $toggler = this.$el.find(".toggle-line-comment"); 36 | $toggler.find(".show-line-comment").toggleClass('hidden', isVisible); 37 | $toggler.find(".hide-line-comment").toggleClass('hidden', !isVisible); 38 | $toggler.toggleClass('btn-default', !hasComment).toggleClass('btn-info', hasComment); 39 | }; 40 | 41 | LineView.prototype.addTag = function addTag(tag) { 42 | this.model.addTag(tag); 43 | this.render(); 44 | }; 45 | 46 | LineView.prototype.onInput = function onInput() { 47 | this.model.comment = this.$el.find('.line-comment textarea ').val().trim(); 48 | this.model.transcription = this.$el.find('.line-transcription input').val().trim(); 49 | this.renderToggler(); 50 | this.renderTextarea(); 51 | window.app.emit('app:changed'); 52 | }; 53 | 54 | LineView.prototype.render = function() { 55 | var self = this; 56 | console.log("Rendering", this.model.id); 57 | 58 | // Build from template 59 | this.$el.off().find("*").off(); 60 | this.$el.html($(self.tpl(this.model))); 61 | 62 | // data binding 63 | this.$el.on('input', self.onInput.bind(this)); 64 | 65 | // Mark line selected on click in select mode 66 | this.$el.on('click', function() { 67 | if (window.app.selectMode) { 68 | self.selected = !self.selected; 69 | self.renderCheckbox(); 70 | } 71 | }); 72 | 73 | // Add error tag on click 74 | this.$el.find("*[data-tag]").on('click', function() { 75 | self.addTag($(this).attr('data-tag')); 76 | window.app.emit('app:changed'); 77 | }); 78 | 79 | // On clicking the comment toggler 80 | this.$el.find(".toggle-line-comment").on('click', function() { 81 | var commentField = self.$el.find('.line-comment'); 82 | commentField.toggleClass('hidden', commentField.is(':visible')).removeClass('view-hidden'); 83 | self.renderToggler(); 84 | }); 85 | 86 | // Render the toggle button 87 | this.renderToggler(); 88 | 89 | // Render (or don't) the checkbox 90 | this.renderCheckbox(); 91 | 92 | // Fit height of text area 93 | this.renderTextarea(); 94 | 95 | return this; 96 | }; 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ocr-gt-tools 2 | ============ 3 | 4 | A web interface for creating ground truth for evaluating and training OCR. 5 | 6 | [![Docker Stars](https://img.shields.io/docker/stars/ubma/ocr-gt-tools.svg)](https://hub.docker.com/r/ubma/ocr-gt-tools/) 7 | [![Docker Pulls](https://img.shields.io/docker/pulls/ubma/ocr-gt-tools.svg)](https://hub.docker.com/r/ubma/ocr-gt-tools/) 8 | [![license](https://img.shields.io/github/license/UB-Mannheim/ocr-gt-tools.svg)](https://github.com/UB-Mannheim/ocr-gt-tools/blob/master/LICENSE) 9 | [![label](https://img.shields.io/github/issues/UB-Mannheim/ocr-gt-tools.svg)](https://github.com/UB-Mannheim/ocr-gt-tools/issues) 10 | [![Travis](https://img.shields.io/travis/kba/ocr-gt-tools.svg?maxAge=2592000)](https://travis-ci.org/UB-Mannheim/ocr-gt-tools) 11 | [![GitHub stars](https://img.shields.io/github/stars/UB-Mannheim/ocr-gt-tools.svg?style=social&label=Star)](https://github.com/UB-Mannheim/ocr-gt-tools/stargazers) 12 | 13 | ## Table of Contents 14 | 15 | * [Summary](#summary) 16 | * [Screenshot](#screenshot) 17 | * [Features](#features) 18 | * [Installation](#installation) 19 | * [About the code](#about-the-code) 20 | * [Usage](#usage) 21 | * [Contributing](#contributing) 22 | * [Pull Requests](#pull-requests) 23 | * [Bug reports](#bug-reports) 24 | * [Acknowledgments](#acknowledgments) 25 | 26 | ## Summary 27 | 28 | ocr-gt-tools allows editing 29 | [hOCR](https://github.com/kba/hocr-spec/blob/master/hocr-spec.md) files, 30 | such as those produced by the 31 | [tesseract](https://github.com/tesseract-ocr/tesseract) or 32 | [ocropy](https://github.com/tmbdev/ocropy) OCR frameworks. 33 | 34 | ### Screenshot 35 | 36 | 37 | 38 | ### Features 39 | 40 | * Editing transcriptions of lines 41 | * Commenting on line and page level 42 | * Use [standardized comment tags](https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Error-Tags) to mark common problems 43 | * [Cheatsheet](./doc/screenshots/cheatsheet-2016-05-04.png) 44 | * Zoom in / Zoom out 45 | * Filter visible elements 46 | * Select multiple lines and apply tags. 47 | 48 | ### Installation 49 | 50 | See [INSTALL.md](./INSTALL.md). 51 | 52 | ### About the code 53 | 54 | The server-side code is written in Perl. 55 | 56 | The frontend is written in HTML and Javascript. 57 | 58 | ## Usage 59 | 60 | - Open 'ocr-gt-tools/index.html' with a browser 61 | - open in a second Window 'Page Previews' from Kitodo 62 | - Search the book from which you created the hOCR file 63 | - Drag and drop a image from the Kitodo 'Page Preview' Window to the Window with 'ocr-gt-tools/index.html' 64 | - The perl script ocr-gt-tools.cgi will create in the background all files, which takes a few seconds 65 | - with ajax a json objects will be returned to index.html 66 | - index.html will load with ajax the created 'correction.html' and 'anmerkungen.txt' inline 67 | - 'Speichern' will get active if you have written a comment or a text line 68 | 69 | ## Contributing 70 | 71 | ### Expand the wiki 72 | 73 | We are using the wiki to collect [transcription hints for unusual 74 | glyphs](wiki/Special-Characters) and [frequent errors](wiki/Error-Tags). 75 | 76 | ### Pull Requests 77 | 78 | Bug fixes, new functions, suggestions for new features and other user feedback 79 | are appreciated. 80 | 81 | The source code is available from https://github.com/UB-Mannheim/ocr-gt-tools. 82 | Please prepare your code contributions also on Github. 83 | 84 | ### Bug reports 85 | 86 | Please feel free to [open 87 | issues](https://github.com/UB-Mannheim/ocr-gt-tools/issues) for any bug you 88 | encounter and features you'd like to have. 89 | 90 | 91 | ## Acknowledgments 92 | 93 | This is free software. You may use it under the terms of the 94 | GNU AFFERO General Public License (AGPL) version 3 or newer. 95 | See [LICENSE](LICENSE) for details. 96 | 97 | This project bundles other free software: 98 | 99 | * [EB Garamond Font](https://www.google.com/fonts/specimen/EB+Garamond) (SIL Open Font License) 100 | * [Font Awesome by Dave Gandy](http://fontawesome.io/) (SIL OFL 1.1, MIT) 101 | * [bootstrap](http://getbootstrap.com/) (MIT) 102 | * [clipboard.js](https://github.com/zenorocha/clipboard.js) (MIT) 103 | * [handlebars.js](https://github.com/wycats/handlebars.js) (MIT) 104 | * [hocr-extract-images](https://github.com/tmbdev/hocr-tools) (Apache) 105 | * [jQuery](http://jquery.com/) (MIT) 106 | * [ocropus-gtedit](https://github.com/tmbdev/ocropy) (Apache) 107 | * [reset-css](https://github.com/shannonmoeller/reset-css) (Public Domain) 108 | -------------------------------------------------------------------------------- /ocr-gt-tools.styl: -------------------------------------------------------------------------------- 1 | selectedColor = red 2 | paperColor = #b9af96 3 | 4 | @media (min-width: 768px) 5 | body 6 | padding-top 0 7 | .container-fluid 8 | margin-left 100px 9 | .navbar-collapse 10 | height auto 11 | border-top 0 12 | box-shadow none 13 | max-height none 14 | padding-left 0 15 | padding-right 0 16 | &.collapse 17 | display block !important 18 | width auto !important 19 | padding-bottom 0 20 | overflow visible !important 21 | &.in 22 | overflow-x visible 23 | .navbar-nav 24 | &.navbar-right 25 | &:last-child 26 | margin-right 0 27 | .navbar 28 | max-width 96px 29 | height 100vh 30 | margin-right 0 31 | margin-left 0 32 | float left 33 | position fixed 34 | z-index 10001 35 | &:after 36 | clear both 37 | .btn-group 38 | display block 39 | .dropdown-menu 40 | top 0 41 | left 100% 42 | padding 0 43 | width 220px 44 | .btn.disabled 45 | opacity .35 46 | .navbar-nav, 47 | .navbar-nav > li, 48 | .navbar-left, 49 | .navbar-right, 50 | .navbar-header 51 | float none !important 52 | .navbar-right 53 | .dropdown-menu 54 | left 0 55 | right auto 56 | .navbar-default 57 | .dropdown-menu 58 | i 59 | display inline-block 60 | float left 61 | top 2px 62 | font-size 2em 63 | .modal-admin 64 | width 80% 65 | left 100px 66 | position fixed 67 | max-height 100vh 68 | overflow scroll 69 | .line 70 | .list-group-item 71 | padding 0 0 0 0 72 | .btn 73 | i 74 | padding-top 0 75 | padding-bottom 0 76 | font-size: 20px 77 | 78 | #dropzone 79 | position fixed 80 | height 90vh 81 | margin 5vh 82 | padding-top 40vh 83 | font-size 300% 84 | text-align center 85 | &.droppable 86 | border-style dashed 87 | border-width 10px 88 | border-color blue 89 | 90 | #waiting-animation 91 | position: fixed 92 | height: 100vh 93 | width: 100vw 94 | z-index: 10000 95 | a 96 | position: absolute 97 | height: 32px 98 | width: 32px 99 | img 100 | height: 32px !important 101 | max-width: 32px !important 102 | display: block !important 103 | 104 | .hidden 105 | display none 106 | 107 | .view-hidden 108 | display none 109 | 110 | .selected 111 | background: selectedColor 112 | .list-group-item.image 113 | background: selectedColor 114 | .panel 115 | background: selectedColor 116 | 117 | textarea 118 | input[type='text'] 119 | display inline-block 120 | white-space pre-wrap 121 | border none 122 | padding 0 123 | color #000060 124 | font-family 'EB Garamond', serif 125 | font-size 20px 126 | min-height 24px 127 | width 100% 128 | // Important to make textareas auto-expand 129 | height auto 130 | overflow hidden 131 | resize none 132 | 133 | .line-comment, .line-comment *, 134 | #page-comment textarea 135 | background-color lightyellow 136 | font-family 'EB Garamond', serif 137 | font-size 20px 138 | min-height 24px 139 | 140 | #file-correction 141 | .panel 142 | margin 0 143 | .panel-heading 144 | padding 0 145 | h4 146 | padding-top 7.5px 147 | .col-sm-1 148 | width:10% 149 | padding-left 0 150 | padding-right 0 151 | .btn-group > .btn 152 | padding: 0 153 | width: 35px 154 | .btn 155 | text-align left 156 | i 157 | padding: 5px; 158 | .col-sm-11 159 | width: 88% 160 | padding-left: 0 161 | 162 | 163 | #right-sidebar 164 | position fixed 165 | right 0 166 | .list-group-item 167 | i 168 | padding-right: 5px; 169 | 170 | #select-bar 171 | position fixed 172 | background-color #eee 173 | border-radius 0 0 10px 10px 174 | border-left: 2px solid blue 175 | border-right 2px solid blue 176 | border-bottom 2px solid blue 177 | top 0 178 | left 200px 179 | z-index: 1001 180 | .close 181 | opacity .5 182 | display inline-block 183 | padding-right 5px 184 | font-size xx-large 185 | 186 | #cheatsheet-modal 187 | // disable selection 188 | -webkit-touch-callout: none; 189 | -webkit-user-select: none; 190 | -khtml-user-select: none; 191 | -moz-user-select: none; 192 | -ms-user-select: none; 193 | user-select: none; 194 | h4 195 | float: left 196 | .cheatsheet-entry 197 | float: left 198 | margin-left 25% 199 | font-size: 200% 200 | max-width: 64px 201 | img 202 | height: 64px 203 | th 204 | font-weight: bold 205 | td:nth-child(1) 206 | width: 40% 207 | td 208 | vertical-align: middle 209 | font-size: 120% 210 | button.code 211 | width 100% 212 | font-family monospace 213 | font-size 48px 214 | height 64px 215 | .clipboard 216 | float: left 217 | font-size: 50% 218 | 219 | .select-col 220 | padding-left: 20px 221 | // margin-top: 32px 222 | input[type="checkbox"] 223 | width 23px 224 | height 23px 225 | padding: 0 226 | margin: 4px 0 0 0 227 | // padding-top 32px 228 | 229 | 230 | #work-info 231 | overflow-y: scroll 232 | max-height: 200px; 233 | 234 | // vim: sw=4 ts=4 noet : 235 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Add node_modules/.bin to $PATH so the CLI tools 2 | # installed locally by npm can be used 3 | export PATH := $(PWD)/node_modules/.bin:$(PATH) 4 | 5 | include \ 6 | dev/apache.mk \ 7 | dev/debian.mk \ 8 | dev/docker.mk \ 9 | dev/plackup.mk 10 | # 11 | # Define all the CLI tools to use 12 | # 13 | 14 | # Standard UNIX tools, recurse, create parents, force delete 15 | MKDIR = mkdir -p 16 | RM = rm -rf 17 | CP = cp -r 18 | GIT_CLONE = git clone --depth 1 19 | 20 | # cURL to download files 21 | CURL = curl -s 22 | # clean-css is a CSS minifier and optimizer 23 | CLEANCSS = cleancss 24 | # UglifyJS minifies, merges and optimizes Javascript 25 | UGLIFYJS = uglifyjs 26 | # webfont-dl is a tool to download web fonts from the Google Fonts API 27 | WEBFONTDL = webfont-dl --eot=omit --ttf=data --woff1=data 28 | # Pug is a templating engine 29 | PUG = pug --pretty 30 | # Stylus is a CSS compiler 31 | STYLUS = stylus 32 | 33 | # 34 | # Define lists of assets 35 | # 36 | 37 | # URLs of Web Fonts to embed 38 | FONT_URLS = https://fonts.googleapis.com/css?family=EB+Garamond&subset=latin,latin-ext 39 | # Font files (eot, ttf, woff...) to bundle 40 | FONT_FILES = node_modules/font-awesome/fonts/fontawesome-webfont.* \ 41 | node_modules/bootstrap/fonts/glyphicons-halflings-regular.* 42 | # URLs of CSS to download 43 | CSS_URLS = https://getbootstrap.com/docs/3.4/examples/dashboard/dashboard.css 44 | # CSS files to bundle into one minified `dist/vendor.css` 45 | # NOTE: Our CSS should not be bundled here 46 | CSS_FILES = node_modules/reset-css/reset.css \ 47 | node_modules/bootstrap/dist/css/bootstrap.css \ 48 | node_modules/notie/dist/notie.css \ 49 | node_modules/font-awesome/css/font-awesome.css 50 | # JS scripts to bundle into one minified `dist/vendor.js` 51 | # NOTE: Javascript developed by us should not be bundled here 52 | VENDOR_JS_FILES = node_modules/jquery/dist/jquery.js \ 53 | node_modules/async/dist/async.min.js \ 54 | node_modules/bootstrap/dist/js/bootstrap.js \ 55 | node_modules/handlebars/dist/handlebars.min.js \ 56 | node_modules/clipboard/dist/clipboard.js \ 57 | node_modules/notie/dist/notie.js 58 | 59 | JS_FILES = js/*.js js/**/*.js ocr-gt-tools.js ocr-gt-tools.js 60 | # The HTML files, described in the Pug shorthand / templating language 61 | PUG_FILES = ocr-gt-tools.pug 62 | 63 | # 64 | # Define the list of targets that will "always fail", i.e. the CLI api 65 | # 66 | # clean-js clean-html clean-fonts clean-css \ 67 | 68 | .PHONY: debug clean vendor 69 | 70 | # 71 | # Debugging 72 | # 73 | print-%: ; @echo $*=$($*) 74 | 75 | __: clean dist 76 | 77 | _.%: ; $(MAKE) -C . clean-$* dist 78 | 79 | debug: 80 | @grep '^[A-Z0-9_]\+\s*=' Makefile \ 81 | |grep -o '^[A-Z0-9_]*' \ 82 | |xargs -I{} make -s . print-{} 83 | 84 | # 85 | # Dependencies to execute ocropy / hocr-tools in a CGI environment 86 | # 87 | 88 | vendor: dist/vendor/hocr-tools dist/vendor/ocropy 89 | 90 | dist/vendor/hocr-tools: 91 | $(MKDIR) dist/vendor 92 | $(GIT_CLONE) https://github.com/UB-Mannheim/hocr-tools $@ 93 | 94 | dist/vendor/ocropy: 95 | $(MKDIR) dist/vendor 96 | $(GIT_CLONE) https://github.com/tmbdev/ocropy $@ 97 | 98 | # 99 | # Set up dist folder 100 | # 101 | 102 | dist: \ 103 | dist/special-chars.json\ 104 | dist/error-tags.json\ 105 | dist/vendor\ 106 | dist/log\ 107 | dist/vendor.css\ 108 | dist/vendor.js\ 109 | dist/fonts\ 110 | dist/index.html\ 111 | dist/ocr-gt-tools.js\ 112 | dist/ocr-gt-tools.css\ 113 | dist/ocr-gt-tools.cgi 114 | 115 | dist/%.json: doc/%.json 116 | $(CP) $< $@ 117 | 118 | dist/log: 119 | $(MKDIR) $@ 120 | 121 | dist/ocr-gt-tools.cgi: ocr-gt-tools.cgi 122 | $(CP) $< $@ 123 | chmod a+x $@ 124 | 125 | #$(UGLIFYJS) --compress --output $@ $^ 126 | dist/ocr-gt-tools.js: $(JS_FILES) 127 | cat $^ > $@ 128 | 129 | dist/ocr-gt-tools.css: ocr-gt-tools.styl 130 | $(STYLUS) < $< > $@ 131 | 132 | dist/fonts: 133 | $(MKDIR) $@ 134 | $(CP) ${FONT_FILES} $@ 135 | 136 | dist/fonts.css: dist/fonts 137 | $(WEBFONTDL) -o $@ --font-out=dist/fonts $(FONT_URLS) && wait 138 | 139 | dist/vendor.css: ${CSS_FILES} dist/fonts.css 140 | cat dist/fonts.css ${CSS_FILES} \ 141 | | sed 's,\.\./fonts,fonts,g' \ 142 | > dist/temp.css 143 | $(CURL) ${CSS_URLS} >> dist/temp.css 144 | $(CLEANCSS) --skip-rebase --output $@ dist/temp.css 145 | $(RM) dist/temp.css 146 | 147 | dist/vendor.js: ${VENDOR_JS_FILES} 148 | $(UGLIFYJS) --output $@ \ 149 | --prefix 1 \ 150 | --source-map $@.map \ 151 | --source-map-url vendor.js.map \ 152 | $^ 153 | 154 | # sed "s,\(=.\)dist/,\1,g" $< | $(PUG) > $@ 155 | dist/index.html: ${PUG_FILES} 156 | $(MKDIR) dist 157 | $(PUG) < $< > $@ 158 | 159 | # 160 | # Clean up, delete files 161 | # 162 | 163 | clean-fonts: 164 | $(RM) dist/fonts dist/fonts.css 165 | 166 | clean-%: 167 | $(RM) dist/$* dist/*.$* dist/*.$*.map 168 | 169 | clean: clean-js clean-css clean-fonts clean-html 170 | 171 | realclean: 172 | $(RM) node_modules 173 | $(RM) dist 174 | 175 | test: 176 | bash ./test.sh 177 | -------------------------------------------------------------------------------- /dist/error-tags.json: -------------------------------------------------------------------------------- 1 | { 2 | "wrong-image-section": { 3 | "name": { 4 | "en": "Incorrectly captured image section which contains no straight line of text but empty spaces or page margins etc.", 5 | "de": "Falsch erfasste Bildausschnitte, die keine Textzeile enthalten, sondern leere Seitenbereiche oder Seitenränder usw." 6 | }, 7 | "total": 443, 8 | "frequencyAvg": 5.7, 9 | "id": "wrong-image-section" 10 | }, 11 | "text-blocked": { 12 | "name": { 13 | "en": "Blocked text with visible blanks between the letters", 14 | "de": "Gesperrter Text mit sichtbaren Leerzeichen zwischen den Buchstaben" 15 | }, 16 | "total": 51, 17 | "frequencyAvg": 0.7, 18 | "id": "text-blocked" 19 | }, 20 | "text-italic": { 21 | "name": { 22 | "en": "Line completely or in parts in italics", 23 | "de": "Zeile vollständig oder teilweise in kursiver Schrift" 24 | }, 25 | "total": 156, 26 | "frequencyAvg": 2, 27 | "id": "text-italic" 28 | }, 29 | "initial": { 30 | "name": { 31 | "en": "initial character", 32 | "de": "Initiale" 33 | }, 34 | "comment": { 35 | "en": "This most likely causes wrong-image-section" 36 | }, 37 | "total": 10, 38 | "frequencyAvg": 0.1, 39 | "id": "initial" 40 | }, 41 | "letter-faded": { 42 | "name": { 43 | "en": "Partially or completely faded letters", 44 | "de": "Teilweise oder vollständig ausgebleichte Buchstaben" 45 | }, 46 | "total": 6, 47 | "frequencyAvg": 0.1, 48 | "id": "letter-faded" 49 | }, 50 | "notes-within-line": { 51 | "name": { 52 | "en": "Notes on page margin captured within a text line", 53 | "de": "Anmerkungen am Seitenrand mit Textzeile erfasst" 54 | }, 55 | "total": 93, 56 | "frequencyAvg": 1.2, 57 | "id": "notes-within-line" 58 | }, 59 | "notes-separate": { 60 | "name": { 61 | "en": "Notes on page margin captured as separate lines", 62 | "de": "Anmerkungen am Seitenrand als separate Zeilen erfasst" 63 | }, 64 | "total": 95, 65 | "frequencyAvg": 1.2, 66 | "id": "notes-separate" 67 | }, 68 | "letter-handling-unclear": { 69 | "name": { 70 | "en": "Characters whose treatment is not yet clear", 71 | "de": "Buchstaben deren Behandlung noch nicht klar ist" 72 | }, 73 | "comment": { 74 | "de": "Zum Beispiel q mit Akut, que-Ligatur" 75 | }, 76 | "total": 17, 77 | "frequencyAvg": 0.2, 78 | "id": "letter-handling-unclear" 79 | }, 80 | "line-incomplete": { 81 | "name": { 82 | "en": "line not captured completely", 83 | "de": "Zeile nicht vollständig erfasst" 84 | }, 85 | "comment": { 86 | "de": "Zeile wurde zwar korrekt erfasst, aber Buchtsaben links oder rechts in der Zeile fehlen", 87 | "en": "Line was captured correctly, but letters in the left or right of the line are missing" 88 | }, 89 | "total": 33, 90 | "frequencyAvg": 0.4, 91 | "id": "line-incomplete" 92 | }, 93 | "line-incorrect": { 94 | "name": { 95 | "en": "line not captured correctly", 96 | "de": "Zeile nicht richtig erfasst" 97 | }, 98 | "comment": { 99 | "en": "more than just one line inside the image; line lies at an angle in image", 100 | "de": "Mehrere Zeilen im Bild erfasst; Zeile liegt schräg im Bild" 101 | }, 102 | "total": 57, 103 | "frequencyAvg": 0.7, 104 | "id": "line-incorrect" 105 | }, 106 | "line-captured-twice": { 107 | "name": { 108 | "en": "line partially or completely captured tice", 109 | "de": "Zeile teilweise oder vollständig doppelt erfasst" 110 | }, 111 | "total": 33, 112 | "frequencyAvg": 0.4, 113 | "id": "line-captured-twice" 114 | }, 115 | "text-greek": { 116 | "name": { 117 | "en": "Greek text", 118 | "de": "Griechischer Text" 119 | }, 120 | "total": 3, 121 | "frequencyAvg": 0, 122 | "id": "text-greek" 123 | }, 124 | "letter-unidentified": { 125 | "name": { 126 | "en": "Letter not jet identified", 127 | "de": "Noch nicht genau identifizierter Buchstabe" 128 | }, 129 | "comment": { 130 | "en": "May be similar with letter-handling-unclear" 131 | }, 132 | "total": 12, 133 | "frequencyAvg": 0.2, 134 | "id": "letter-unidentified" 135 | }, 136 | "letter-unreadable": { 137 | "name": { 138 | "en": "letter not faded but still unreadable", 139 | "de": "Buchstabe nicht ausgebleicht aber trotzdem unleserlich" 140 | }, 141 | "total": 2, 142 | "frequencyAvg": 0, 143 | "id": "letter-unreadable" 144 | }, 145 | "line-not-in-order": { 146 | "name": { 147 | "en": "line not captured in correct order", 148 | "de": "Zeile nicht in richtiger Reihenfolge erfasst" 149 | }, 150 | "total": 4, 151 | "frequencyAvg": 0.1, 152 | "id": "line-not-in-order" 153 | }, 154 | "dividing-line": { 155 | "name": { 156 | "en": "Dividing line captured as line", 157 | "de": "Trennlinie als Zeile erfasst" 158 | }, 159 | "total": 2, 160 | "id": "dividing-line" 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /doc/error-tags.json: -------------------------------------------------------------------------------- 1 | { 2 | "wrong-image-section": { 3 | "name": { 4 | "en": "Incorrectly captured image section which contains no straight line of text but empty spaces or page margins etc.", 5 | "de": "Falsch erfasste Bildausschnitte, die keine Textzeile enthalten, sondern leere Seitenbereiche oder Seitenränder usw." 6 | }, 7 | "total": 443, 8 | "frequencyAvg": 5.7, 9 | "id": "wrong-image-section" 10 | }, 11 | "text-blocked": { 12 | "name": { 13 | "en": "Blocked text with visible blanks between the letters", 14 | "de": "Gesperrter Text mit sichtbaren Leerzeichen zwischen den Buchstaben" 15 | }, 16 | "total": 51, 17 | "frequencyAvg": 0.7, 18 | "id": "text-blocked" 19 | }, 20 | "text-italic": { 21 | "name": { 22 | "en": "Line completely or in parts in italics", 23 | "de": "Zeile vollständig oder teilweise in kursiver Schrift" 24 | }, 25 | "total": 156, 26 | "frequencyAvg": 2, 27 | "id": "text-italic" 28 | }, 29 | "initial": { 30 | "name": { 31 | "en": "initial character", 32 | "de": "Initiale" 33 | }, 34 | "comment": { 35 | "en": "This most likely causes wrong-image-section" 36 | }, 37 | "total": 10, 38 | "frequencyAvg": 0.1, 39 | "id": "initial" 40 | }, 41 | "letter-faded": { 42 | "name": { 43 | "en": "Partially or completely faded letters", 44 | "de": "Teilweise oder vollständig ausgebleichte Buchstaben" 45 | }, 46 | "total": 6, 47 | "frequencyAvg": 0.1, 48 | "id": "letter-faded" 49 | }, 50 | "notes-within-line": { 51 | "name": { 52 | "en": "Notes on page margin captured within a text line", 53 | "de": "Anmerkungen am Seitenrand mit Textzeile erfasst" 54 | }, 55 | "total": 93, 56 | "frequencyAvg": 1.2, 57 | "id": "notes-within-line" 58 | }, 59 | "notes-separate": { 60 | "name": { 61 | "en": "Notes on page margin captured as separate lines", 62 | "de": "Anmerkungen am Seitenrand als separate Zeilen erfasst" 63 | }, 64 | "total": 95, 65 | "frequencyAvg": 1.2, 66 | "id": "notes-separate" 67 | }, 68 | "letter-handling-unclear": { 69 | "name": { 70 | "en": "Characters whose treatment is not yet clear", 71 | "de": "Buchstaben deren Behandlung noch nicht klar ist" 72 | }, 73 | "comment": { 74 | "de": "Zum Beispiel q mit Akut, que-Ligatur" 75 | }, 76 | "total": 17, 77 | "frequencyAvg": 0.2, 78 | "id": "letter-handling-unclear" 79 | }, 80 | "line-incomplete": { 81 | "name": { 82 | "en": "line not captured completely", 83 | "de": "Zeile nicht vollständig erfasst" 84 | }, 85 | "comment": { 86 | "de": "Zeile wurde zwar korrekt erfasst, aber Buchtsaben links oder rechts in der Zeile fehlen", 87 | "en": "Line was captured correctly, but letters in the left or right of the line are missing" 88 | }, 89 | "total": 33, 90 | "frequencyAvg": 0.4, 91 | "id": "line-incomplete" 92 | }, 93 | "line-incorrect": { 94 | "name": { 95 | "en": "line not captured correctly", 96 | "de": "Zeile nicht richtig erfasst" 97 | }, 98 | "comment": { 99 | "en": "more than just one line inside the image; line lies at an angle in image", 100 | "de": "Mehrere Zeilen im Bild erfasst; Zeile liegt schräg im Bild" 101 | }, 102 | "total": 57, 103 | "frequencyAvg": 0.7, 104 | "id": "line-incorrect" 105 | }, 106 | "line-captured-twice": { 107 | "name": { 108 | "en": "line partially or completely captured tice", 109 | "de": "Zeile teilweise oder vollständig doppelt erfasst" 110 | }, 111 | "total": 33, 112 | "frequencyAvg": 0.4, 113 | "id": "line-captured-twice" 114 | }, 115 | "text-greek": { 116 | "name": { 117 | "en": "Greek text", 118 | "de": "Griechischer Text" 119 | }, 120 | "total": 3, 121 | "frequencyAvg": 0, 122 | "id": "text-greek" 123 | }, 124 | "letter-unidentified": { 125 | "name": { 126 | "en": "Letter not jet identified", 127 | "de": "Noch nicht genau identifizierter Buchstabe" 128 | }, 129 | "comment": { 130 | "en": "May be similar with letter-handling-unclear" 131 | }, 132 | "total": 12, 133 | "frequencyAvg": 0.2, 134 | "id": "letter-unidentified" 135 | }, 136 | "letter-unreadable": { 137 | "name": { 138 | "en": "letter not faded but still unreadable", 139 | "de": "Buchstabe nicht ausgebleicht aber trotzdem unleserlich" 140 | }, 141 | "total": 2, 142 | "frequencyAvg": 0, 143 | "id": "letter-unreadable" 144 | }, 145 | "line-not-in-order": { 146 | "name": { 147 | "en": "line not captured in correct order", 148 | "de": "Zeile nicht in richtiger Reihenfolge erfasst" 149 | }, 150 | "total": 4, 151 | "frequencyAvg": 0.1, 152 | "id": "line-not-in-order" 153 | }, 154 | "dividing-line": { 155 | "name": { 156 | "en": "Dividing line captured as line", 157 | "de": "Trennlinie als Zeile erfasst" 158 | }, 159 | "total": 2, 160 | "id": "dividing-line" 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation Instructions 2 | 3 | * [Docker Quickstart](#docker-quickstart) 4 | * [Install dependencies](#install-dependencies) 5 | * [Create configuration](#create-configuration) 6 | * [Deploy on a server](#deploy-on-a-server) 7 | * [On Apache](#on-apache) 8 | * [Bundled standalone server](#bundled-standalone-server) 9 | * [Testing the server](#testing-the-server) 10 | * [Developing the frontend](#developing-the-frontend) 11 | * [Perl](#perl) 12 | * [Log-Files / Error-Log-Files](#log-files--error-log-files) 13 | 14 | ## Docker Quickstart 15 | 16 | To get the tool up and running in a docker container: 17 | 18 | ``` 19 | git clone https://github.com/UB-Mannheim/ocr-gt-tools 20 | cd ocr-gt-tools 21 | ./dev/run-docker.sh 22 | ``` 23 | 24 | The first time you run this, it will download the [docker 25 | image](https://hub.docker.com/r/ubma/ocr-gt-tools/) and run an Apache server in the 26 | container with all the configuration taken care of. 27 | 28 | Navigate to http://localhost:8888/ocr-gt to use it. 29 | 30 | ## Install dependencies 31 | 32 | Install Debian packages (for other distros, YMMV). 33 | 34 | ``` 35 | make apt-get 36 | ``` 37 | 38 | (See `dev/debian.mk`) 39 | 40 | Install current Git revisions of hocr-tools and ocropus: 41 | 42 | ``` 43 | make vendor 44 | ``` 45 | 46 | ## Create configuration 47 | 48 | Copy the default configuration and shorten/edit as needed: 49 | 50 | ``` 51 | cp dist/ocr-gt-tools.default.yml dist/ocr-gt-tools.yml 52 | ``` 53 | 54 | ## Deploy on a server 55 | 56 | ### On Apache 57 | 58 | * Enable CGI on Apache 59 | 60 | ```sh 61 | sudo a2enmod cgi 62 | ``` 63 | 64 | * Deploy to Apache document folder: 65 | 66 | ``` 67 | make deploy 68 | ``` 69 | 70 | (See `dev/apache.mk`) 71 | 72 | This will recreate out-of-date files in `./dist`, create a folder 73 | `$APACHE_BASEURL` in `$APACHE_DIR` and copy all the files from `./dist` to 74 | `$APACHE_DIR/$APACHE_BASEURL` using `sudo` with user `$APACHE_USER`. 75 | 76 | Deployment can be customized with four environment variables, the default is: 77 | 78 | ``` 79 | make APACHE_USER=www-data APACHE_GROUP=www-data APACHE_DIR=/var/www/html APACHE_BASEURL=ocr-gt deploy 80 | ``` 81 | 82 | * Make sure scripts ending in `.cgi` are executable in the 83 | `$APACHE_DIR/$APACHE_BASEURL` folder: 84 | 85 | ``` 86 | $ sudo $EDITOR /etc/apache2/sites-available/000-default.conf 87 | 88 | Options +ExecCGI 89 | AddHandler cgi-script .cgi 90 | 91 | ``` 92 | 93 | * Copy the configuration (unless [already configured](#create-configuration): 94 | 95 | ``` 96 | sudo -u www-data cp dist/ocr-gt-tools.default.yml $APACHE_DIR/$APACHE_BASEURL/ocr-gt-tools.yml 97 | # "sudo $EDITOR $APACHE_DIR/$APACHE_BASEURL/ocr-gt-tools.yml" as needed 98 | ``` 99 | 100 | * Restart apache 101 | 102 | ``` 103 | sudo systemctl restart apache2 104 | ``` 105 | 106 | The web application will be available under [http://localhost/ocr-gt](http://localhost/ocr-gt). 107 | 108 | ### Docker 109 | 110 | docker run -t -p kbai/ocr-gt-tools 111 | 112 | The server is available on port 9090. 113 | 114 | 115 | ### Bundled standalone server 116 | 117 | For development and quick experimentation, we ship a standalone server, 118 | wrapping the CGI in a Plack app: 119 | 120 | ``` 121 | make dev-server 122 | ``` 123 | 124 | ### Testing the server 125 | 126 | Navigate to [http://localhost:9090/index.html](http://localhost:9090/index.html). 127 | 128 | Drop a file, such as [this thumbnail](https://digi.bib.uni-mannheim.de/fileadmin/digi/445442158/thumbs/445442158_0126.jpg) onto the document. 129 | 130 | Do some transliterating and commenting. 131 | 132 | Click "Speichern". 133 | 134 | Checkout the contents of [./example/ocr-corrections/](./example/ocr-corrections/). 135 | 136 | 137 | ## Developing the frontend 138 | 139 | Install the development dependencies: The `npm` package (which pulls in nodejs) and some nodejs-based tools: 140 | 141 | ``` 142 | make dev-apt-get 143 | ``` 144 | 145 | Then npm to bootstrap the tools for building HTML from Pug, CSS from LESS etc. 146 | and to install the frontend assets: 147 | 148 | ``` 149 | npm install 150 | ``` 151 | 152 | After changing CSS/Javascript, make sure to regenerate the `dist` folder: 153 | 154 | ``` 155 | make dist 156 | ``` 157 | 158 | This will 159 | 160 | * Download web fonts to `./dist/fonts/` and generate a matching CSS file in `./dist/css/` 161 | * copy all CSS stylesheets to `./dist/css/` and minify them to `./dist/style.css` 162 | * copy all JS scripts to `./dist/js/` and minify them, in the right order, to `./dist/script.js` with source map 163 | 164 | ### Perl 165 | 166 | For local tests in Windows I use [Strawberry Perl](http://strawberryperl.com/). 167 | 168 | The scripts used the following perl modules. You can download them from cpan. 169 | 170 | - CGI 171 | - CGI::Carp 172 | - JSON 173 | - Config::IniFiles 174 | 175 | ### Log-Files / Error-Log-Files 176 | Infos from perlscript ocr-gt-tools.cgi are stored in log/ocr-gt-tools.log 177 | 178 | Debug log can be written on stderr or in log/ocr-gt-tools.log, default is stderr. If you wish to become debug log in log/ocr-gt-tools.log, please edit ocr-gt-tools.yml and set logging:stderr to false. 179 | -------------------------------------------------------------------------------- /dist/ocr-gt-tools.css: -------------------------------------------------------------------------------- 1 | @media (min-width: 768px) { 2 | body { 3 | padding-top: 0; 4 | } 5 | .container-fluid { 6 | margin-left: 100px; 7 | } 8 | .navbar-collapse { 9 | height: auto; 10 | border-top: 0; 11 | box-shadow: none; 12 | max-height: none; 13 | padding-left: 0; 14 | padding-right: 0; 15 | } 16 | .navbar-collapse.collapse { 17 | display: block !important; 18 | width: auto !important; 19 | padding-bottom: 0; 20 | overflow: visible !important; 21 | } 22 | .navbar-collapse.in { 23 | overflow-x: visible; 24 | } 25 | .navbar-collapse .navbar-nav.navbar-right:last-child { 26 | margin-right: 0; 27 | } 28 | .navbar { 29 | max-width: 96px; 30 | height: 100vh; 31 | margin-right: 0; 32 | margin-left: 0; 33 | float: left; 34 | position: fixed; 35 | z-index: 10001; 36 | } 37 | .navbar:after { 38 | clear: both; 39 | } 40 | .navbar .btn-group { 41 | display: block; 42 | } 43 | .navbar .dropdown-menu { 44 | top: 0; 45 | left: 100%; 46 | padding: 0; 47 | width: 220px; 48 | } 49 | .navbar .btn.disabled { 50 | opacity: 0.35; 51 | } 52 | .navbar .navbar-nav, 53 | .navbar .navbar-nav > li, 54 | .navbar .navbar-left, 55 | .navbar .navbar-right, 56 | .navbar .navbar-header { 57 | float: none !important; 58 | } 59 | .navbar .navbar-right .dropdown-menu { 60 | left: 0; 61 | right: auto; 62 | } 63 | .navbar-default .dropdown-menu i { 64 | display: inline-block; 65 | float: left; 66 | top: 2px; 67 | font-size: 2em; 68 | } 69 | .modal-admin { 70 | width: 80%; 71 | left: 100px; 72 | position: fixed; 73 | max-height: 100vh; 74 | overflow: scroll; 75 | } 76 | .line .list-group-item { 77 | padding: 0 0 0 0; 78 | } 79 | .line .btn i { 80 | padding-top: 0; 81 | padding-bottom: 0; 82 | font-size: 20px; 83 | } 84 | } 85 | #dropzone { 86 | position: fixed; 87 | height: 90vh; 88 | margin: 5vh; 89 | padding-top: 40vh; 90 | font-size: 300%; 91 | text-align: center; 92 | } 93 | #dropzone.droppable { 94 | border-style: dashed; 95 | border-width: 10px; 96 | border-color: #00f; 97 | } 98 | #waiting-animation { 99 | position: fixed; 100 | height: 100vh; 101 | width: 100vw; 102 | z-index: 10000; 103 | } 104 | #waiting-animation a { 105 | position: absolute; 106 | height: 32px; 107 | width: 32px; 108 | } 109 | #waiting-animation a img { 110 | height: 32px !important; 111 | max-width: 32px !important; 112 | display: block !important; 113 | } 114 | .hidden { 115 | display: none; 116 | } 117 | .view-hidden { 118 | display: none; 119 | } 120 | .selected { 121 | background: #f00; 122 | } 123 | .selected .list-group-item.image { 124 | background: #f00; 125 | } 126 | .selected .panel { 127 | background: #f00; 128 | } 129 | textarea, 130 | input[type='text'] { 131 | display: inline-block; 132 | white-space: pre-wrap; 133 | border: none; 134 | padding: 0; 135 | color: #000060; 136 | font-family: 'EB Garamond', serif; 137 | font-size: 20px; 138 | min-height: 24px; 139 | width: 100%; 140 | height: auto; 141 | overflow: hidden; 142 | resize: none; 143 | } 144 | .line-comment, 145 | .line-comment *, 146 | #page-comment textarea { 147 | background-color: #ffffe0; 148 | font-family: 'EB Garamond', serif; 149 | font-size: 20px; 150 | min-height: 24px; 151 | } 152 | #file-correction .panel { 153 | margin: 0; 154 | } 155 | #file-correction .panel-heading { 156 | padding: 0; 157 | } 158 | #file-correction .panel-heading h4 { 159 | padding-top: 7.5px; 160 | } 161 | #file-correction .col-sm-1 { 162 | width: 10%; 163 | padding-left: 0; 164 | padding-right: 0; 165 | } 166 | #file-correction .col-sm-1 .btn-group > .btn { 167 | padding: 0; 168 | width: 35px; 169 | } 170 | #file-correction .col-sm-1 .btn { 171 | text-align: left; 172 | } 173 | #file-correction .col-sm-1 .btn i { 174 | padding: 5px; 175 | } 176 | #file-correction .col-sm-11 { 177 | width: 88%; 178 | padding-left: 0; 179 | } 180 | #right-sidebar { 181 | position: fixed; 182 | right: 0; 183 | } 184 | #right-sidebar .list-group-item i { 185 | padding-right: 5px; 186 | } 187 | #select-bar { 188 | position: fixed; 189 | background-color: #eee; 190 | border-radius: 0 0 10px 10px; 191 | border-left: 2px solid #00f; 192 | border-right: 2px solid #00f; 193 | border-bottom: 2px solid #00f; 194 | top: 0; 195 | left: 200px; 196 | z-index: 1001; 197 | } 198 | #select-bar .close { 199 | opacity: 0.5; 200 | display: inline-block; 201 | padding-right: 5px; 202 | font-size: xx-large; 203 | } 204 | #cheatsheet-modal { 205 | -webkit-touch-callout: none; 206 | -webkit-user-select: none; 207 | -khtml-user-select: none; 208 | -moz-user-select: none; 209 | -ms-user-select: none; 210 | user-select: none; 211 | } 212 | #cheatsheet-modal h4 { 213 | float: left; 214 | } 215 | #cheatsheet-modal .cheatsheet-entry { 216 | float: left; 217 | margin-left: 25%; 218 | font-size: 200%; 219 | max-width: 64px; 220 | } 221 | #cheatsheet-modal img { 222 | height: 64px; 223 | } 224 | #cheatsheet-modal th { 225 | font-weight: bold; 226 | } 227 | #cheatsheet-modal td:nth-child(1) { 228 | width: 40%; 229 | } 230 | #cheatsheet-modal td { 231 | vertical-align: middle; 232 | font-size: 120%; 233 | } 234 | #cheatsheet-modal td button.code { 235 | width: 100%; 236 | font-family: monospace; 237 | font-size: 48px; 238 | height: 64px; 239 | } 240 | #cheatsheet-modal td .clipboard { 241 | float: left; 242 | font-size: 50%; 243 | } 244 | .select-col { 245 | padding-left: 20px; 246 | } 247 | .select-col input[type="checkbox"] { 248 | width: 23px; 249 | height: 23px; 250 | padding: 0; 251 | margin: 4px 0 0 0; 252 | } 253 | #work-info { 254 | overflow-y: scroll; 255 | max-height: 200px; 256 | } 257 | 258 | -------------------------------------------------------------------------------- /API.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | Data model 4 | ---------- 5 | 6 | The system handles images and text: 7 | 8 | * images 9 | * page 10 | * line in page 11 | * text 12 | * transcription of a line 13 | * comment on a line 14 | * comment on a page 15 | 16 | Images are transferred as URL between server and client. 17 | 18 | Text is exchanged inline. 19 | 20 | Protocol 21 | -------- 22 | 23 | Clients ask for editable representations of URL (`imageUrl`) 24 | 25 | `GET http://ocr-gt/?action=create&imageUrl=http://ocr-gt/image/page1_thumb.png` 26 | 27 | Server tries to resolve that into an editable location (i.e. a page). If no 28 | such representation currently exists and the server has all the required assets 29 | (high-res image and hCOR file), it creates it. If found, it sends that location 30 | as JSON to the client: 31 | 32 | ```yaml 33 | title: '1' 34 | url: 35 | thumb-url: http://ocr-gt/image/page1_thumb.png 36 | hires-url: http://ocr-gt/image/page1_max.png 37 | hocr-url: http://ocr-gt/hocr/page1.hocr 38 | landing-page-url: http://some.where/kitodo/1 39 | line-images: ["http://ocr-gt/image/page1/line-001.png", …] 40 | transcriptions: ["…", …] 41 | page-comment: "…" 42 | line-comments: ["…", …] 43 | ``` 44 | 45 | All these fields are **required** (client must not check), except for 'title' 46 | (fall back to url.thumb-url) and page-comment (fall back to empty string). 47 | 48 | Client edits transcriptions/comments as needed and sends back representation as JSON: 49 | 50 | ``` 51 | POST http://ocr-gt/?action=save&imageUrl=http://ocr-gt/image/page1_thumb.png 52 | ``` 53 | 54 | ```yaml 55 | page-comment: "This page is awesome" 56 | transcriptions: ["Fist line transcribed", …] 57 | line-comments: ["I like this line. It is nice.", …] 58 | ``` 59 | 60 | Server merges this with its representation of the location and stores it. 61 | 62 | Formats 63 | ------- 64 | 65 | * Clients should be able to load a full representation (e.g. zipped folder of 66 | transcriptions, comments, images, hocr and full size) 67 | 68 | Storage 69 | ------- 70 | 71 | Server-side storage should be opaque to the client, the following is only 72 | relevant for this particular implementation. 73 | 74 | All generated data lives in one directory that can be uniquely mapped to a 75 | page, e.g. `/path/to/work1/page1/`. 76 | 77 | * Every line image is saved in a file `line-XXXX.png` 78 | * Every line transcription is saved in a file `line-XXXX.txt` 79 | * Every line OCR output is saved in a file `ocr-XXXX.txt` 80 | * Every line comment is saved in a file `comment-line-XXXX.txt` 81 | * Page transcription is saved in a file `comment-page.txt` 82 | 83 | `XXXX` is a four-digit zero-padded positive integer, e.g. `0023`. 84 | 85 | `XXXX` is continuously incrementing from 1 (i.e. `line-0001.txt`, 86 | `line-0002.txt`) and represents the position of the line in the document order 87 | of the original document 88 | 89 | * For every `line-XXXX.txt` there must be one `comment-line-XXXX.txt`. 90 | * For every `comment-line-XXXX.txt` there must be one `line-XXXX.txt`. 91 | * For every `line-XXXX.png` there must be one `line-XXXX.txt` 92 | 93 | JS API 94 | ------ 95 | 96 | * One window-global object 'wndow.app' (app.js) 97 | * constructor: 98 | * templates: The compiled handlebar templates as functions 99 | * models: Set up the models used in the app 100 | * currentPage: The page model currently edited 101 | * init(): 102 | * Load read-only data from server (special-chars/error-tags) 103 | * Set up the app-global views: 104 | * pageView: A view on the current page, doesn't change 105 | * loadPage(url): 106 | * Replace the currentPage model with one loaded from url 107 | * Set pageView.model to the new model and re-render 108 | * savePage(): 109 | * Save the current page 110 | * Models: Contain the data, that can be saved/loaded 111 | * constructor: url or object 112 | * Default values 113 | * State vars (e.g. was the model modified since last save 114 | * load: Load the model from storage (server, localStorage...) 115 | * save: Save the model to storage (server, localStorage...) 116 | * Views: Contain the visual representation and business logic 117 | * constructor(obj) 118 | * obj.el: jQuery selector of the element backed by this view 119 | * obj.$el: jQuery object wrapping obj.el 120 | * obj.tpl: template function to build html 121 | * obj.model: The model backing this view 122 | * render: 123 | * create HTML form templates 124 | * bind events to HTML elements 125 | * bind to app-global events 126 | * Events: 127 | * app:loading: When starting to load a page 128 | * app:loaded: Successfully loaded a page 129 | * app:saving: When starting to save a page 130 | * app:saved: Successfully saved a page 131 | * app:changed: When any input field is changed 132 | * app:enter-select-mode: When beginning multi-select 133 | * app:exit-select-mode: When leaving multi-select 134 | * ocr-gt-tools.js now merely constructs the app 135 | * All JS files are concatenated to dist/ocr-gt-tools.js 136 | * By file: 137 | * js/models 138 | * cheatsheet: Represents special-chars.json 139 | * error-tags: Represents error-tags.json 140 | * history: The request log for the current user 141 | * line: A line in a page, generated from the 'page' array properties 142 | without the 'line-' prefix 143 | * page: A page, containing lines and a comment 144 | * settings: The UI settings for the current user 145 | * js/views 146 | * animation-view: Animation of glyphs while page is loading 147 | * cheatsheet-view: Table of characters, images, copy-to-clipboard functionality.. 148 | * dropzone-view: The area of the page where URLs can be dropped 149 | * history-view: Table of past requests to keep track 150 | * line-view: View of a single line, handles button logic and model-sync 151 | * page-view: View of a page, creates line-views as necessary, model-sync 152 | * sidebar: The sidebar containing work/collection/page info 153 | * toolbar: THe toolbar with buttons for accessing all the actions 154 | * app: Global app class, entry point to the application 155 | * utils: 156 | * Various, stateless helper functions 157 | -------------------------------------------------------------------------------- /js/app.js: -------------------------------------------------------------------------------- 1 | function setupDragAndDrop() { 2 | // Prevent the default browser drop action 3 | $(document).bind('drop dragover', function(e) { 4 | e.preventDefault(); 5 | }); 6 | // Show the drop zone on as soon as something is dragged 7 | $(document) 8 | .bind('dragenter', function onDragEnter(e) { 9 | e.preventDefault(); 10 | $("#file-correction").addClass('hidden'); 11 | $("#dropzone").removeClass('hidden'); 12 | }) 13 | .bind('dragend', function onDragEnd(e) { 14 | e.preventDefault(); 15 | $("#file-correction").removeClass('hidden'); 16 | $("#dropzone").addClass('hidden'); 17 | }); 18 | $("#dropzone") 19 | .bind('dragover dragenter', function onDragOver(e) { 20 | e.preventDefault(); 21 | $("#dropzone").addClass('droppable').removeClass('hidden'); 22 | }) 23 | .bind('dragenter', function onDragEnterDropZone(e) { 24 | e.stopPropagation(); 25 | }) 26 | .bind('dragleave', function onDragLeaveDropZone(e) { 27 | e.preventDefault(); 28 | $("#dropzone").removeClass('droppable').addClass('hidden'); 29 | }) 30 | .bind('drop', function onDrop(e) { 31 | e.preventDefault(); 32 | 33 | if (window.app.currentPage && window.app.currentPage.changed) { 34 | notie.alert(2, "Ungesicherte Inhalte vorhanden, bitte zuerst speichern!", 2); 35 | } else { 36 | var url = Utils.getUrlFromDragEvent(e); 37 | if (url) { 38 | window.location.hash = '#' + url; 39 | } else { 40 | notie.alert(3, "Konnte keine URL erkennen."); 41 | } 42 | } 43 | }); 44 | } 45 | 46 | function App() { 47 | 48 | // A dummy element, just used for event emitting/listening 49 | this.$el = $("
"); 50 | 51 | // Set up models 52 | this.settings = new Settings(); 53 | this.history = new History(); 54 | this.errorTags = new ErrorTags(); 55 | this.cheatsheet = new Cheatsheet(); 56 | 57 | // Compile templates 58 | this.templates = Utils.compileTemplates(); 59 | } 60 | 61 | App.prototype.on = function() { 62 | if (this.settings.debug) console.info('bind', arguments[0], ' -> ', arguments[1].name || arguments[1]); 63 | this.$el.on.apply(this.$el, arguments); 64 | return this; 65 | }; 66 | App.prototype.once = function() { 67 | if (this.settings.debug) console.info('bind', arguments[0], ' -> ', arguments[1].name || arguments[1]); 68 | this.$el.one.apply(this.$el, arguments); 69 | return this; 70 | }; 71 | 72 | 73 | App.prototype.emit = function() { 74 | if (this.settings.debug) console.log('emit', arguments[0], arguments[1] || ''); 75 | var event = arguments[0]; 76 | if (event === 'app:saved') { 77 | notie.alert(1, "Gespeichert", 1); 78 | } else if (event === 'app:ajaxError') { 79 | var xhr = arguments[1]; 80 | notie.alert(3, "HTTP Fehler " + xhr.status + ":\n
" + xhr.responseText + "
"); 81 | } 82 | this.$el.trigger.apply(this.$el, arguments); 83 | return this; 84 | }; 85 | 86 | App.prototype.confirmExit = function confirmExit() { 87 | if (this.currentPage && this.currentPage.changed) { 88 | notie.alert(2, "Ungesicherte Inhalte vorhanden, bitte zuerst speichern!", 5); 89 | return "Ungesicherte Inhalte vorhanden, bitte zuerst speichern!"; 90 | } 91 | }; 92 | 93 | App.prototype.onHashChange = function onHashChange(e) { 94 | e.preventDefault(); 95 | var newHash = window.location.hash; 96 | console.log(e.oldURL); 97 | if (!e.oldURL) { 98 | console.info('HashChange (initial) -> ', newHash); 99 | } else { 100 | var oldHash = e.oldURL.substr(e.oldURL.indexOf('#')); 101 | console.info('HashChange', oldHash, ' -> ', newHash); 102 | if (oldHash === newHash) { 103 | return; 104 | } 105 | if (this.confirmExit()) { 106 | window.location.hash = '#' + this.currentPage.imageUrl; 107 | return; 108 | } 109 | } 110 | if (newHash.length > 2) 111 | this.loadPage(newHash.substr(1)); 112 | }; 113 | 114 | App.prototype.showHistory = function() { 115 | var self = this; 116 | this.history.load(function(err) { 117 | if (err) { 118 | return self.emit('app:ajaxError', err); 119 | } 120 | self.historyView.render(); 121 | }); 122 | }; 123 | 124 | App.prototype.render = function() { 125 | 126 | var self = this; 127 | 128 | // Select mode initially off 129 | this.selectMode = false; 130 | 131 | // Setup event handlers for drag and drop 132 | // TODO 133 | setupDragAndDrop(); 134 | 135 | // Render views 136 | this.waitingAnimation.render(); 137 | this.cheatsheetView.render(); 138 | this.toolbar.render(); 139 | this.selectbar.render(); 140 | this.dropzone.render(); 141 | 142 | this.on('app:loading', function hideSidebar() { self.sidebar.$el.addClass('hidden'); }); 143 | this.on('app:loading', function hidePageView() { self.pageView.$el.addClass('hidden'); }); 144 | this.on('app:loaded', function showSidebar() { self.sidebar.$el.removeClass('hidden'); }); 145 | this.on('app:loaded', function showPageView() { self.pageView.$el.removeClass('hidden'); }); 146 | }; 147 | 148 | App.prototype.init = function init() { 149 | var self = this; 150 | 151 | // window global events 152 | window.onbeforeunload = self.confirmExit.bind(self); 153 | window.onhashchange = self.onHashChange.bind(self); 154 | 155 | // Set up views 156 | this.pageView = new PageView({'el': "#file-correction",}); 157 | this.dropzone = new Dropzone({'el': '#dropzone'}); 158 | this.toolbar = new Toolbar({'el': '#toolbar'}); 159 | this.sidebar = new Sidebar({'el': '#right-sidebar'}); 160 | this.selectbar = new Selectbar({'el': '#select-bar'}); 161 | this.waitingAnimation = new WaitingAnimation({ 162 | 'el': "#waiting-animation", 163 | 'model': this.cheatsheet 164 | }); 165 | this.historyView = new HistoryView({ 166 | 'el': "#history-modal", 167 | 'model': this.history, 168 | 'tpl': this.templates.historyItem, 169 | }); 170 | this.cheatsheetView = new CheatsheetView({ 171 | 'el': "#cheatsheet-modal", 172 | 'model': this.cheatsheet, 173 | 'tpl': this.templates.cheatsheetEntry, 174 | }); 175 | 176 | // Load cheatsheet and errorTags 177 | async.each([this.cheatsheet, this.errorTags], function(model, done) { 178 | model.load(done); 179 | }, function(err) { 180 | if (err) return self.emit('app:ajaxError', err); 181 | self.settings.load(); 182 | self.render(); 183 | // Trigger hash change 184 | $(window).trigger('hashchange'); 185 | self.$el.trigger('app:initialized'); 186 | }); 187 | }; 188 | 189 | App.prototype.savePage = function savePage() { 190 | var self = this; 191 | let currentPage = window.app.currentPage; 192 | if (!currentPage) { 193 | notie.alert(1, "Nichts zu speichern", 1); 194 | } else { 195 | this.emit('app:saving'); 196 | window.app.currentPage.save(function(err) { 197 | if (err) { 198 | self.emit('app:ajaxError', err); 199 | } else { 200 | self.emit('app:saved'); 201 | } 202 | }); 203 | } 204 | }; 205 | 206 | App.prototype.loadPage = function loadPage(url) { 207 | var self = this; 208 | if (self.confirmExit()) return; 209 | this.emit('app:loading'); 210 | this.currentPage = new Page(url); 211 | this.currentPage.load(function(err) { 212 | if (err) { 213 | return self.emit('app:ajaxError', err); 214 | } 215 | self.pageView.model = self.currentPage; 216 | self.pageView.render(); 217 | self.sidebar.model = self.currentPage; 218 | self.sidebar.render(); 219 | self.emit('app:loaded'); 220 | }); 221 | }; 222 | -------------------------------------------------------------------------------- /doc/user-scripts/scrape-wiki.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name Extract Special Characters 3 | // @namespace http://github.com/kba/ 4 | // @include https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Special-Characters 5 | // @include https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Error-Tags 6 | // @description Extract special character data from ocr-gt-tools wiki 7 | // @version 1 8 | // @require https://code.jquery.com/jquery-2.2.3.min.js 9 | // @require https://cdnjs.cloudflare.com/ajax/libs/z-schema/3.17.0/ZSchema-browser.js 10 | // @grant GM_addStyle 11 | // @grant GM_setClipboard 12 | // ==/UserScript== 13 | /*globals GM_addStyle */ 14 | /*globals ZSchema */ 15 | 16 | var CSS = ` 17 | pre.schema-error 18 | { 19 | background: #a00; 20 | color: white; 21 | white-space: pre-wrap; 22 | } 23 | div#glyph-bar 24 | { 25 | font-size: x-large; 26 | position:fixed; 27 | bottom: 0; 28 | height: 48px; 29 | border: 2px solid black; 30 | background: white; 31 | width: 100%; 32 | } 33 | div#glyph-bar .left * { float: left; } 34 | div#glyph-bar .right * { float: right; } 35 | div#glyph-bar * 36 | { 37 | height: 100%; 38 | font-size: x-large; 39 | } 40 | div#glyph-bar input[type='text'] 41 | { 42 | font-family: "Garamond", "Bookman", serif; 43 | } 44 | div#schema-bar 45 | { 46 | position: fixed; 47 | z-index: 3000; 48 | top: 0; 49 | background: #900; 50 | color: white !important; 51 | width: 100%; 52 | font-size: x-large; 53 | height: 48px; 54 | border: 2px solid black; 55 | } 56 | div#schema-invalid 57 | { 58 | display: none; 59 | } 60 | div#schema-invalid a 61 | { 62 | display: inline-block; 63 | color: white !important; 64 | float: none; 65 | margin: 0 2px; 66 | } 67 | `; 68 | 69 | var SCHEMAS = { 70 | 'Special-Characters': { 71 | 'type': 'object', 72 | "additionalProperties": false, 73 | 'properties': { 74 | 'id': { 75 | 'type': 'string', 76 | 'pattern': '^[a-z0-9-]+$', 77 | }, 78 | 'sample': { 79 | 'type': 'array', 80 | 'items': { 81 | 'type': 'string', 82 | 'pattern': '^ 163 |
164 | 165 | 166 |
 
167 |
168 |
169 | `); 170 | $("#glyph-input").on('keyup', function(e) { 171 | var $input = $("#glyph-input"); 172 | var from = $input[0].selectionStart; 173 | var to = $input[0].selectionEnd; 174 | if (from == to) { 175 | from -= 1; 176 | } 177 | $('#glyph-propose').empty(); 178 | var $propose = $('#glyph-propose'); 179 | var val = $input.val(); 180 | var chosen = val.substring(from, to); 181 | console.log(chosen, from, to); 182 | $.each(scraped, function() { 183 | var glyphDesc = this; 184 | if (glyphDesc.baseLetter.indexOf(chosen) === -1) { 185 | return; 186 | } 187 | $.each(glyphDesc.sample, function(i, sample) { 188 | $propose.append($(sample) 189 | .on('click', function(e) { 190 | e.preventDefault(); 191 | $input.val(val.substr(0, from) + glyphDesc.recognition + val.substr(to)); 192 | })); 193 | }); 194 | }); 195 | }); 196 | }, 197 | 'Error-Tags': function(scraped) { 198 | console.log('Not Implemented for Error-Tags (yet?)'); 199 | } 200 | }; 201 | 202 | function scrapeJsonFromWikiPage(schema) { 203 | var parsed = {}; 204 | var validator = new ZSchema(); 205 | var h2s = $(".markdown-body h2").get(); 206 | for (let i = 0; i < h2s.length; i++) { 207 | var $h2 = $(h2s[i]); 208 | var thingDesc = {}; 209 | var thingId = $h2.text().trim(); 210 | parsed[thingId] = thingDesc; 211 | var lis = $h2.next('ul').find('li').get(); 212 | for (let j = 0; j < lis.length; j++) { 213 | var liHtml = $(lis[j]).html(); 214 | var colonIndex = liHtml.indexOf(':'); 215 | var varName = liHtml.substring(0, colonIndex) 216 | .toLowerCase() 217 | .replace(/[^a-z0-9]+/g, '_') 218 | .replace(/_([a-z])/g, function(orig, ch) { 219 | return ch.toUpperCase(); 220 | }) 221 | .replace(/^_|_$/, ''); 222 | // console.log(`Parsing '${varName}'`); 223 | var rawValue = liHtml.substring(colonIndex + 1).trim(); 224 | rawValue = rawValue.replace('&', '&'); 225 | if (schema.properties[varName] && schema.properties[varName].type === 'array') { 226 | thingDesc[varName] = rawValue.split(/\s*;\s*/); 227 | } else if (schema.properties[varName] && schema.properties[varName].type === 'number') { 228 | thingDesc[varName] = parseFloat(rawValue); 229 | } else if (/[A-Z][a-z]$/.test(varName)) { 230 | var lang = varName.substr(-2).toLowerCase(); 231 | varName = varName.substring(0, varName.length - 2); 232 | thingDesc[varName] = thingDesc[varName] || {}; 233 | thingDesc[varName][lang] = rawValue; 234 | } else { 235 | thingDesc[varName] = rawValue; 236 | } 237 | } 238 | thingDesc.id = thingId; 239 | console.log([thingDesc, schema]); 240 | if (!validator.validate(thingId, schema.properties.id)) { 241 | showError(thingId, validator.getLastErrors()); 242 | } 243 | if (!validator.validate(thingDesc, schema)) { 244 | showError(thingId, validator.getLastErrors()); 245 | } 246 | } 247 | return parsed; 248 | }; 249 | 250 | function escapeHTML(str) { 251 | var entityMap = { 252 | "&": "&", 253 | "<": "<", 254 | ">": ">", 255 | '"': '"', 256 | "'": ''', 257 | "/": '/' 258 | }; 259 | return String(str).replace(/[&<>"'\/]/g, function(s) { 260 | return entityMap[s]; 261 | }); 262 | } 263 | 264 | function showError(faultyId, err) { 265 | $(`h2:contains('${faultyId}')`).append( 266 | `
${escapeHTML(JSON.stringify(err, null, 2))}
`); 267 | $("#schema-invalid").show().append( 268 | `[${ $("#schema-invalid a").length + 1}]`); 269 | } 270 | 271 | $(function() { 272 | GM_addStyle(CSS); 273 | $("body").prepend( 274 | ` 275 |
276 |
!! INVALID
277 |
278 | 279 | 280 |
281 |
282 | `); 283 | var wikiPage = window.location.href.replace(/.*\//, '').replace(/#.*$/, ''); 284 | var schema = SCHEMAS[wikiPage]; 285 | var scraped = scrapeJsonFromWikiPage(schema); 286 | ON_LOAD[wikiPage](scraped); 287 | $("#copy-schema").on('click', function() { 288 | GM_setClipboard(JSON.stringify(SCHEMAS[schema], null, 2)); 289 | window.alert("Copied JSON schema to clipboard"); 290 | }); 291 | $("#copy-json").on('click', function() { 292 | GM_setClipboard(JSON.stringify(scraped, null, 2)); 293 | window.alert("Copied JSON schema to clipboard"); 294 | }); 295 | }); 296 | -------------------------------------------------------------------------------- /ocr-gt-tools.pug: -------------------------------------------------------------------------------- 1 | doctype html 2 | html(lang='de') 3 | head 4 | meta(charset='utf-8') 5 | title OCR-GT 6 | link(href='vendor.css', rel='stylesheet') 7 | link(rel='shortcut icon', type='image/x-icon', href='favicon.ico') 8 | link(href='ocr-gt-tools.css', rel='stylesheet') 9 | //- meta(http-equiv="Content-Type",content="text/html;charset=utf-8") 10 | body 11 | 12 | //* 13 | // Page info on the right 14 | //* 15 | script#tpl-rightSidebar(type='text/x-handlebars-template') 16 | .panel.panel-success 17 | .panel-heading 18 | h4.panel-title 19 | a(data-toggle='collapse', href='#page-comment') Seiten-Kommentar 20 | #page-comment.panel-collapse.collapse.in 21 | a.list-group-item(target='_blank', href='{{ this.url.hires-url }}') 22 | img(src='{{ this.url.thumb-url }}') 23 | textarea(placeholder="Seitenkommentar...") {{ page-comment }} 24 | .panel.panel-info 25 | .panel-heading 26 | h4.panel-title 27 | a(data-toggle='collapse', href='#work-info') Werk-Info 28 | #work-info.panel-collapse.collapse.in 29 | .list-group 30 | a.list-group-item(target='_blank', href='{{ this.url.landing-page-url }}') 31 | i.fa.fa-download 32 | | Werk in Digitaler Bibliothek 33 | | {{#each pages}} 34 | a.list-group-item(href='\#{{ this.url.thumb-url }}') 35 | i.fa.fa-history 36 | | Seite {{this.ids.page}} 37 | | {{/each}} 38 | .panel.panel-info 39 | .panel-heading 40 | h4.panel-title 41 | a(data-toggle='collapse', href='#page-info') Seiten-Info 42 | #page-info.panel-collapse.collapse.in 43 | .list-group 44 | a#download-comments.list-group-item(target='_blank', href='{{ this.url.comment-url }}') 45 | i.fa.fa-download 46 | | Anmerkungen 47 | a#download-correction.list-group-item(target='_blank', href='{{ this.url.correction-url }}') 48 | i.fa.fa-download 49 | | Korrektur 50 | 51 | //* 52 | // A cheatsheet entry 53 | //* 54 | script#tpl-cheatsheetEntry(type='text/x-handlebars-template') 55 | tr(id="cheatsheet-{{ id }}") 56 | td 57 | a(target="_blank", href="https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Special-Characters\#{{ id }}") 58 | | {{ name.de }} 59 | td {{{ sample }}} 60 | td 61 | button.code(data-clipboard-text="{{ recognition }}") 62 | span.fa.fa-clipboard.clipboard 63 | span.recognition {{ recognition }} 64 | 65 | //* 66 | // A line of page 67 | //* 68 | script#tpl-lineContainer(type='text/x-handlebars-template') 69 | .row.line(id="line-{{ id }}") 70 | 71 | script#tpl-line(type='text/x-handlebars-template') 72 | .col-sm-1.button-col(data-target="#line-comment-{{ id }}") 73 | .btn-group.btn-group-sm(role='group') 74 | .btn-group(role='group') 75 | a.btn.btn-default.toggle-line-comment(data-target="#line-comment-{{ id }}") 76 | i.show-line-comment.fa.fa-commenting-o 77 | i.hide-line-comment.fa.fa-map-o.hidden 78 | .btn-group(role='group') 79 | button.btn.btn-default.dropdown-toggle(data-toggle="dropdown", type='button', aria-haspopup="true", aria-expanded="false", tabindex="-1") 80 | i.fa.fa-exclamation 81 | span.caret 82 | ul.dropdown-menu 83 | li 84 | a.btn.btn-default.add-comment(data-target="#line-comment-{{ id }}", data-tag="#text-italic") 85 | i.fa.fa-italic 86 | | Kursiv 87 | li 88 | a.btn.btn-warning.add-comment(data-target="#line-comment-{{ id }}", data-tag="#wrong-image-section") 89 | i.fa.fa-exclamation-circle 90 | | Fehler in Vorlage 91 | 92 | .col-sm-1.select-col.hidden(data-target="#line-comment-{{ id }}") 93 | input(type="checkbox") 94 | 95 | .col-sm-11.lines-col 96 | .panel.panel-default 97 | ul.list-group 98 | li.list-group-item.line-image 99 | img(src="{{ image }}", height="30") 100 | li.list-group-item.line-transcription 101 | input(type='text', value='{{ transcription }}') 102 | li.list-group-item.line-comment(id="line-comment-{{ id }}") 103 | textarea(placeholder="Kommentar zu dieser Zeile") {{ comment }} 104 | 105 | //* 106 | // A row of history 107 | //* 108 | script#tpl-historyItem(type='text/x-handlebars-template') 109 | tr 110 | td {{ date }} 111 | td 112 | a(href="\#{{ url }}") {{ url }} 113 | td {{ action }} 114 | td {{ ip }} 115 | 116 | //* 117 | // Modal popup containing the history 118 | //* 119 | #history-modal.modal.fade(tabindex="-1") 120 | .modal-admin 121 | .modal-content 122 | .modal-header 123 | h2.modal-title Verlauf 124 | button.close(type='button', data-dismiss='modal') 125 | span × 126 | .modal-body 127 | table.table 128 | thead 129 | tr 130 | th Date 131 | th URL 132 | th Action 133 | th IP 134 | tbody 135 | 136 | //* 137 | // Help modal 138 | //* 139 | #cheatsheet-modal.modal.fade(tabindex="-1") 140 | .modal-admin 141 | .modal-content 142 | .modal-header 143 | h4.modal-title Eingabe Spickzettel 144 | .cheatsheet-entry 145 | input(type='text') 146 | button.close(type='button', data-dismiss='modal') 147 | span × 148 | .modal-body 149 | table.table.table-condensed 150 | tbody.cheatsheet 151 | 152 | //* 153 | // Will contain the raw HTML 154 | //* 155 | #raw-html.hidden 156 | 157 | //* 158 | // Will contain the toolbar for the multi select mode 159 | //* 160 | #select-bar.hidden 161 | .panel-heading.clearfix 162 | button.toggle-select-mode.close.pull-left(type='button', data-dismiss='modal') 163 | span × 164 | .btn-group.pull-right 165 | a.btn.btn-sm.btn-default.select-all 166 | i.fa.btn-sm.fa-arrows-alt 167 | | Alle auswählen 168 | a.btn.btn-sm.btn-default.select-none 169 | i.fa.btn-sm.fa-arrows-alt 170 | | Auswahl aufheben 171 | a.btn.btn-sm.btn-default.select-toggle 172 | i.fa.btn-sm.fa-arrows-alt 173 | | Auswahl umkehren 174 | a.btn.btn-sm.btn-default.add-multi-comment(data-tag="#text-italic") 175 | i.fa.btn-sm.fa-italic 176 | | Kursiv 177 | a.btn.btn-sm.btn-warning.add-multi-comment(data-tag="#wrong-image-section") 178 | i.fa.btn-sm.fa-exclamation-circle 179 | | Fehler in Vorlage 180 | 181 | //* 182 | // The navbar on the left side 183 | //* 184 | #toolbar.nav.navbar.navbar-default(role='navigation') 185 | 186 | // Brand and toggle get grouped for better mobile display 187 | .navbar-header 188 | button.navbar-toggle(type='button', data-toggle='collapse', data-target='.navbar-ex1-collapse') 189 | span.sr-only Toggle navigation 190 | 191 | // Collect the nav links, forms, and other content for toggling 192 | .collapse.navbar-collapse.navbar-ex1-collapse 193 | ul.nav.navbar-nav 194 | li 195 | button#save_button.btn.btn-success.disabled 196 | span#wait_save.hidden 197 | i.fa.fa-4x.fa-spinner.fa-pulse.fa-fw 198 | span#disk 199 | i.fa.fa-4x.fa-save.fa-fw 200 | 201 | //- Info 202 | li.dropdown.info 203 | a.btn.dropdown-toggle(data-toggle='dropdown') 204 | i.fa.fa-4x.fa-info-circle 205 | ul.dropdown-menu.navbar-default 206 | li 207 | .btn-group.btn-group-vertical 208 | a.btn.btn-default.pull-right(target="_blank", href="https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Error-Tags") 209 | i.fa.fa-exclamation 210 | | Fehler-Kategorien 211 | button.btn.btn-default(data-toggle="modal",data-target="#cheatsheet-modal",data-keyboard="true") 212 | i.fa.fa-question 213 | | Spickzettel 214 | button.btn.btn-default(data-toggle="modal",data-target="#history-modal",data-keyboard="true") 215 | i.fa.fa-history 216 | | Verlauf 217 | 218 | //- Zoom 219 | li.dropdown.disabled.zoom 220 | a.btn.dropdown-toggle(data-toggle='dropdown') 221 | i.fa.fa-search-plus.fa-4x 222 | ul.dropdown-menu.navbar-default 223 | li 224 | .btn-group.btn-group-vertical 225 | button#zoom-in.btn.btn-default(type='button') 226 | i.fa.fa-search-plus 227 | | Größer 228 | button#zoom-reset.btn.btn-default(type='button') 229 | i.fa.fa-search 230 | | Zurücksetzen 231 | button#zoom-out.btn.btn-default(type='button') 232 | i.fa.fa-search-minus 233 | | Kleiner 234 | 235 | //- Show/hide comments 236 | li.dropdown.disabled.views 237 | a.btn.dropdown-toggle(data-toggle='dropdown') 238 | i.fa.fa-4x.fa-eye 239 | ul.dropdown-menu.navbar-default 240 | li 241 | .btn-group.btn-group-vertical 242 | button.set-view.btn.btn-default(type='button', data-target='*') 243 | i.fa.fa-picture-o 244 | i.fa.fa-commenting-o 245 | i.fa.fa-align-left 246 | | Vollansicht 247 | button.set-view.btn.btn-default(type='button', data-target='.line-transcription,.line-image') 248 | i.fa.fa-picture-o 249 | i.fa.fa-align-left 250 | | Bild + Text 251 | button.set-view.btn.btn-default(type='button', data-target='.line-comment,.line-image') 252 | i.fa.fa-picture-o 253 | i.fa.fa-commenting-o 254 | | Bild + Kommentar 255 | button.set-view.btn.btn-default(type='button', data-target='.line-comment,.line-transcription') 256 | i.fa.fa-align-left 257 | i.fa.fa-commenting-o 258 | | Text + Kommentar 259 | button.set-view.btn.btn-default(type='button', data-target='.line-image') 260 | i.fa.fa-picture-o 261 | | Bild 262 | button.set-view.btn.btn-default(type='button', data-target='.line-transcription') 263 | i.fa.fa-align-left 264 | | Text 265 | button.set-view.btn.btn-default(type='button', data-target='.line-comment') 266 | i.fa.fa-commenting-o 267 | | Kommentar 268 | 269 | li.dropdown.disabled.filter 270 | a.toggle-select-mode.btn.dropdown-toggle(data-toggle='dropdown') 271 | i.fa.fa-4x.fa-plus-square-o 272 | 273 | //- Filter 274 | li.dropdown.disabled.filter 275 | a.btn.dropdown-toggle(data-toggle='dropdown') 276 | i.fa.fa-4x.fa-sort 277 | ul.dropdown-menu.navbar-default 278 | li 279 | .btn-group.btn-group-vertical 280 | button#sort-line.btn.btn-default 281 | i.fa.fa-sort 282 | | Normale Sortierung (auf) 283 | button#sort-line-desc.btn.btn-default 284 | i.fa.fa-sort 285 | | Umgekehrte Sortierung (ab) 286 | button#sort-width.btn.btn-default 287 | i.fa.fa-sort 288 | | Nach Breite sortieren (auf) 289 | button#sort-width-desc.btn.btn-default 290 | i.fa.fa-sort 291 | | Nach Breite sortieren (ab) 292 | 293 | //* 294 | //- Here be dragons 295 | //* 296 | .container-fluid 297 | .row 298 | .col-sm-9#root 299 | #dropzone.jumbotron 300 | span#wait-load.hidden 301 | i.fa.fa-spinner.fa-pulse.fa-fw 302 | | Grafik auf dem Dokument ablegen 303 | br 304 | | oder 305 | br 306 | | URL angeben: 307 | br 308 | div.input-group#load-image 309 | input.input-lg.form-control(type='text') 310 | span.input-group-btn 311 | button.btn.btn-default.btn-lg(type="button") Los! 312 | #file-correction 313 | #waiting-animation.hidden 314 | #right-sidebar.col-sm-3.panel-group 315 | 316 | script(src='vendor.js') 317 | script(src='ocr-gt-tools.js') 318 | //- vim: sw=2 ts=2 noet ft=pug : 319 | -------------------------------------------------------------------------------- /ocr-gt-tools.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use CGI; 5 | use CGI::Carp qw(carpout); 6 | use Cwd qw(abs_path); 7 | use Hash::Merge; 8 | use File::Basename qw(dirname); 9 | use File::Path qw(make_path); 10 | use IPC::Run qw(run); 11 | use JSON; 12 | use POSIX qw(strftime); 13 | use Time::HiRes qw(time); 14 | use YAML::XS qw(LoadFile Dump); 15 | use open ":encoding(utf8)"; 16 | 17 | my $DEBUGLOG; 18 | my $DATE_FORMAT = "%Y-%m-%d"; 19 | my $TIME_FORMAT = "%H:%M:%S"; 20 | 21 | # Directory containing the CGI script 22 | my $OCR_GT_BASEDIR = dirname(abs_path($0)); 23 | my $cgi = CGI->new; 24 | my $config = loadConfig(); 25 | 26 | =head2 setupLogging 27 | 28 | Setup logging 29 | 30 | =cut 31 | 32 | sub setupLogging 33 | { 34 | my $logdir = $config->{'logging'}->{'logdir'}; 35 | if (! -d "$logdir") { 36 | make_path "$logdir", {mode => oct(777)}; 37 | } 38 | if ($config->{'logging'}->{'stderr'}) { 39 | $DEBUGLOG = *STDERR; 40 | } else { 41 | open($DEBUGLOG, ">>", "$logdir/ocr-gt-tools.log") 42 | or die "Cannot write to log file '$logdir/ocr-gt-tools.log': $!\n"; 43 | } 44 | carpout(*$DEBUGLOG); 45 | } 46 | 47 | =head2 debug 48 | 49 | Log a message to the log file. 50 | 51 | =cut 52 | 53 | sub debug 54 | { 55 | my $msg = sprintf(shift(), @_); 56 | my $t = time; 57 | my $timestamp = strftime $TIME_FORMAT, localtime $t; 58 | $timestamp .= sprintf ".%03d", ($t-int($t))*1000; # without rounding 59 | printf $DEBUGLOG "%s: %s\n", $timestamp, $msg; 60 | } 61 | 62 | 63 | =head2 slurp 64 | 65 | Read file contents into a string. 66 | 67 | =cut 68 | 69 | sub slurp 70 | { 71 | my ($fname) = @_; 72 | my $out = ''; 73 | open my $fh, "<:encoding(UTF-8)", $fname or httpError(500, "Could not read file %s: $!", $fname); 74 | $out .= $_ while(<$fh>); 75 | close $fh; 76 | chomp $out; 77 | return $out; 78 | } 79 | 80 | =head2 logRequest 81 | 82 | Log the IP and scan URL to request.log 83 | 84 | =cut 85 | 86 | sub logRequest 87 | { 88 | my $url = $cgi->param('imageUrl'); 89 | if (!$url) { 90 | debug("No URL to log for this request"); 91 | return; 92 | } 93 | my $action = $cgi->url_param('action'); 94 | my $t = time; 95 | my $timestamp = strftime sprintf("%sT%sZ", $DATE_FORMAT, $TIME_FORMAT), localtime $t; 96 | my $json = JSON->new->utf8->pretty(0)->encode({ 97 | date => $timestamp, 98 | action => $action, 99 | url => $url, 100 | ip => $ENV{REMOTE_ADDR} 101 | }); 102 | my $logdir = $config->{'logging'}->{'logdir'}; 103 | open(my $REQUESTLOG, ">>", "$logdir/request.log") 104 | or httpError(500, "Cannot write to log file '$logdir/request.log': $!\n"); 105 | print $REQUESTLOG $json . "\n"; 106 | close $REQUESTLOG; 107 | } 108 | 109 | 110 | =head2 httpError 111 | 112 | Send an HTTP error message 113 | 114 | =cut 115 | 116 | sub httpError 117 | { 118 | my $status = shift; 119 | my $msg = sprintf(shift(), @_); 120 | print $cgi->header(-type => 'text/plain', -status => $status); 121 | debug("********* ERROR $status - $msg *********"); 122 | print $msg; 123 | exit 1; 124 | } 125 | 126 | 127 | =head2 httpJSON 128 | 129 | convert data to JSON and send it 130 | 131 | =cut 132 | 133 | sub httpJSON 134 | { 135 | my ($obj, $compact) = @_; 136 | if (ref $obj eq 'ARRAY') { 137 | $obj = [ 138 | map { delete $_->{'path'}; delete $_->{'command'}; $_; } 139 | @{ $obj } 140 | ]; 141 | } elsif (ref $obj) { 142 | delete $obj->{'path'}; delete $obj->{'command'} 143 | } 144 | my $json = JSON->new->utf8->pretty(1)->encode($obj); 145 | print $cgi->header( -type => 'application/json', -charset => 'utf-8'); 146 | print $json; 147 | } 148 | 149 | 150 | =head2 loadConfig 151 | 152 | Load the configuration from the YAML file 153 | 154 | =cut 155 | 156 | sub loadConfig 157 | { 158 | my $ymlFile = "$OCR_GT_BASEDIR/ocr-gt-tools.default.yml"; 159 | my $config = LoadFile($ymlFile); 160 | 161 | # Merge with non-default config 162 | if (-e "$OCR_GT_BASEDIR/ocr-gt-tools.yml") { 163 | my $merge = Hash::Merge->new('RIGHT_PRECEDENT'); 164 | $config = $merge->merge($config, LoadFile("$OCR_GT_BASEDIR/ocr-gt-tools.yml")); 165 | } 166 | 167 | # Compile all 'pattern' as regexes 168 | for (my $i = 0; $i < scalar @{$config->{'pattern'}}; $i++) { 169 | my $pat = $config->{'pattern'}->[$i]; 170 | $config->{'pattern'}->[$i] = qr/$pat/smx; 171 | } 172 | 173 | # Remove all newlines from 'template' 174 | for my $category (keys %{$config->{'template'}}) { 175 | while (my ($k, $v) = each %{$config->{'template'}->{$category}}) { 176 | $v =~ s/\n//g; 177 | $config->{'template'}->{$category}->{$k} = $v; 178 | } 179 | } 180 | 181 | # Replace in logging->logdir 182 | $config->{'logging'}->{'logdir'} =~ s//$OCR_GT_BASEDIR/g; 183 | 184 | return $config; 185 | } 186 | 187 | 188 | 189 | =head2 parse 190 | 191 | Map a URL/path to a location 192 | 193 | =cut 194 | 195 | sub parse 196 | { 197 | my ($str) = @_; 198 | 199 | # Try to match all the 'pattern' 200 | for (my $i = 0; $i < scalar @{$config->{'pattern'}}; $i++) { 201 | my $pat = $config->{'pattern'}->[$i]; 202 | if ($str =~ $pat) { 203 | debug("Pattern # $i matched '$str'"); 204 | return { 205 | ids => {%+}, 206 | renderTemplates(%+), 207 | } 208 | } 209 | # debug("No match: $str =~ $pat"); 210 | } 211 | httpError(400, "Could not match '$str' to any known pattern"); 212 | } 213 | 214 | 215 | =head2 renderTemplates 216 | 217 | Replace all variables appropriately 218 | 219 | =cut 220 | 221 | sub renderTemplates 222 | { 223 | my %tokens = (%{ $config->{'defaults'} }, @_); 224 | my %obj; 225 | 226 | # Copy templates from the configuration and fill them with the tokens parsed 227 | for my $category ('url', 'path', 'command') { 228 | $obj{$category} = {}; 229 | while (my ($tplName, $tpl) = each %{$config->{'template'}->{$category}}) { 230 | 231 | # Initially set to the template string 232 | $obj{$category}->{$tplName} = $tpl; 233 | 234 | # First replace the tokens from the pattern 235 | while (my ($key, $value) = each %tokens) { 236 | if (ref $obj{$category}->{$tplName} eq 'ARRAY') { 237 | $obj{$category}->{$tplName} = [ 238 | map { 239 | my $foo = $_; 240 | $foo =~ s/<$key>/$value/g; 241 | $foo =~ s//$OCR_GT_BASEDIR/g; 242 | $foo; 243 | } @{ $obj{$category}->{$tplName} } 244 | ]; 245 | } else { 246 | $obj{$category}->{$tplName} =~ s/<$key>/$value/g; 247 | $obj{$category}->{$tplName} =~ s//$OCR_GT_BASEDIR/g; 248 | } 249 | } 250 | 251 | # Add this to the list of expanded tokens 252 | unless (ref $obj{$category}->{$tplName}) { 253 | $tokens{$tplName} = $obj{$category}->{$tplName}; 254 | } 255 | } 256 | } 257 | # debug( "Rendered object: %s", Dump(\%obj)); 258 | return %obj; 259 | } 260 | 261 | 262 | =head2 executeCommand 263 | 264 | Execute one of the location's commands. 265 | 266 | =cut 267 | 268 | sub executeCommand 269 | { 270 | my ($cmd) = @_; 271 | # Seiten in Bildzeilen und Textzeilen aufteilen 272 | debug("About to execute '%s'", join(' ', @{$cmd})); 273 | run $cmd, '>', \my $stdout, '2>', \my $stderr; 274 | debug($stdout); 275 | debug($stderr); 276 | if($?) { 277 | return httpError(500, "'%s' returned non-zero exit code '$?':\n\t%s\n%s", join(' ', @{$cmd}), $stdout, $stderr); 278 | } else { 279 | debug("Successfully run '%s': %s" , join(' ', @{$cmd}), substr($stdout, 0, 100)); 280 | } 281 | return split /\n/, $stdout; 282 | } 283 | 284 | 285 | =head2 handleGet 286 | 287 | Get process to create the files necessary 288 | 289 | =cut 290 | 291 | sub handleGet 292 | { 293 | my $url = $cgi->param('imageUrl'); 294 | if (! $url) { 295 | return httpError(400, "Missing parameter 'imageUrl' \n\n"); 296 | } 297 | # Get file object 298 | my $location = parse($url); 299 | if (! -e $location->{'path'}->{'correction-dir'}) { 300 | executeCommand($location->{'command'}->{'extract-images'}); 301 | $location->{'page-comment'} = ''; 302 | for ('line-comments', 'line-transcriptions', 'line-images') { 303 | $location->{$_} = []; 304 | } 305 | } 306 | chdir $location->{'path'}->{'correction-dir'}; 307 | for my $fname (glob 'line-*.*') { 308 | my $idx = int(substr($fname, 5, 4)); 309 | debug($idx); 310 | if ($fname =~ /txt$/) { 311 | $location->{'line-transcriptions'}->[$idx-1] = slurp($fname); 312 | } else { 313 | $location->{'line-images'}->[$idx-1] = sprintf($location->{'url'}->{'line-image-url'}, $idx); 314 | } 315 | } 316 | for my $fname (glob 'line-*.txt') { 317 | $location->{'line-transcriptions'}->[-1+substr($fname, 5, 4)] = slurp($fname); 318 | } 319 | for my $fname (glob 'comment-line-*.txt') { 320 | $location->{'line-comments'}->[-1+substr($fname, 13, 4)] = slurp($fname); 321 | } 322 | $location->{'page-comment'} = slurp('comment-page.txt'); 323 | $location->{'pages'} = [ map {parse($_)} executeCommand($location->{'command'}->{'find-corrections-for-work'}) ]; 324 | # Send JSON response 325 | httpJSON($location); 326 | } 327 | 328 | 329 | =head2 handleSave 330 | 331 | Save transcriptions and comments passed via POST params. 332 | 333 | =cut 334 | 335 | sub handleSave 336 | { 337 | my $postdata = $cgi->param('POSTDATA'); 338 | my $body = JSON->new->utf8->decode($postdata) or httpError(400, "Could not parse POST body"); 339 | my $location = parse($body->{'url'}->{'thumb-url'}); 340 | # Save line comments and transcriptions 341 | for (my $i = 0; $i < scalar @{ $body->{'line-comments'}}; $i++) { 342 | my %saveMap = ( 343 | 'line-transcriptions' , 'line-%04d.txt', 344 | 'line-comments' , 'comment-line-%04d.txt', 345 | ); 346 | while (my ($key, $fname_pat) = each(%saveMap)) { 347 | my $fname = join('/', $location->{'path'}->{'correction-dir'}, sprintf($fname_pat, $i+1)); 348 | # debug ("WRITE: $fname"); 349 | open my $fh, ">", $fname or httpError(500, "Could not write to '%s': %s\n", $fname, $!); 350 | print $fh $body->{$key}->[$i] . "\n"; 351 | close $fh; 352 | } 353 | } 354 | # Save page comment 355 | my $pageCommentFile = join('/', $location->{'path'}->{'correction-dir'}, 'comment-page.txt'); 356 | open my $fh, ">", $pageCommentFile or httpError(500, "Could not write to '%s': %s\n", $pageCommentFile, $!); 357 | print $fh $body->{'page-comment'} . "\n"; 358 | close $fh; 359 | print $cgi->header(-type => 'text/plain', -status => 200); 360 | } 361 | 362 | =head2 handleList 363 | 364 | 365 | =cut 366 | 367 | sub handleList 368 | { 369 | my $queryName = $cgi->param('name'); 370 | return httpError(400, "Must set parameter 'name'\n") unless $queryName; 371 | my $queryStr = $cgi->param('q'); 372 | return httpError(400, "Must set parameter 'q'\n") unless $queryStr; 373 | my $queryLocation = parse($queryStr); 374 | if (!$queryLocation->{'command'}->{'find-' . $queryName}) { 375 | return httpError(400, "Invalid parameter 'name' %s. Must be one of [%s]", 376 | $queryName, join('|', 377 | grep { /^find-/ } keys %{$queryLocation->{'command'}} 378 | )); 379 | } 380 | my $cmd = $queryLocation->{'command'}->{'find-' . $queryName}; 381 | debug(Dump($cmd)); 382 | my @locations = map { parse($_) } executeCommand($cmd); 383 | return httpJSON(\@locations); 384 | } 385 | 386 | 387 | =head2 handleHistory 388 | 389 | Send the request log for the calling IP address. 390 | 391 | =cut 392 | 393 | sub handleHistory 394 | { 395 | my $query = $cgi->param('q'); 396 | my $mine = defined $cgi->param('mine'); 397 | open my $RL, "<", $config->{'logging'}->{'logdir'} . '/request.log'; 398 | my $n = 100; 399 | my @lines; 400 | my $ip = $ENV{REMOTE_ADDR}; 401 | while (<$RL>) { 402 | if ($mine) { 403 | next unless m/.*\Q$ip\E.*/; 404 | } 405 | if ($query) { 406 | next unless m/.*\Q$query\E.*/; 407 | } 408 | push @lines, $_; 409 | } 410 | @lines = ($n >= @lines ? @lines : @lines[-$n .. -1]); 411 | print $cgi->header( -type => 'application/json', -charset => 'utf-8'); 412 | printf("[%s]", join(',', reverse @lines)); 413 | } 414 | 415 | 416 | setupLogging(); 417 | debug('********* START REQUEST *********'); 418 | my $action = $cgi->url_param('action'); 419 | # debug "CGI Params: %s", Dump($cgi->{param}); 420 | httpError(400, "URL parameter 'action' missing.") unless $action; 421 | if ( $action eq 'get' ) { handleGet(); } 422 | elsif ( $action eq 'save' ) { handleSave(); } 423 | elsif ( $action eq 'list' ) { handleList(); } 424 | elsif ( $action eq 'history' ) { handleHistory(); } 425 | else { httpError(400, "URL parameter 'action' must be 'create', 'save', 'list' or 'history'. Not %s", $action); } 426 | # TODO handle only 'save' and 'get' 427 | logRequest(); 428 | debug('********* END REQUEST *********'); 429 | 430 | # vim: sw=4 ts=4 : 431 | -------------------------------------------------------------------------------- /dist/ocr-gt-tools.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use CGI; 5 | use CGI::Carp qw(carpout); 6 | use Cwd qw(abs_path); 7 | use Hash::Merge; 8 | use File::Basename qw(dirname); 9 | use File::Path qw(make_path); 10 | use IPC::Run qw(run); 11 | use JSON; 12 | use POSIX qw(strftime); 13 | use Time::HiRes qw(time); 14 | use YAML::XS qw(LoadFile Dump); 15 | use open ":encoding(utf8)"; 16 | 17 | my $DEBUGLOG; 18 | my $DATE_FORMAT = "%Y-%m-%d"; 19 | my $TIME_FORMAT = "%H:%M:%S"; 20 | 21 | # Directory containing the CGI script 22 | my $OCR_GT_BASEDIR = dirname(abs_path($0)); 23 | my $cgi = CGI->new; 24 | my $config = loadConfig(); 25 | 26 | =head2 setupLogging 27 | 28 | Setup logging 29 | 30 | =cut 31 | 32 | sub setupLogging 33 | { 34 | my $logdir = $config->{'logging'}->{'logdir'}; 35 | if (! -d "$logdir") { 36 | make_path "$logdir", {mode => oct(777)}; 37 | } 38 | if ($config->{'logging'}->{'stderr'}) { 39 | $DEBUGLOG = *STDERR; 40 | } else { 41 | open($DEBUGLOG, ">>", "$logdir/ocr-gt-tools.log") 42 | or die "Cannot write to log file '$logdir/ocr-gt-tools.log': $!\n"; 43 | } 44 | carpout(*$DEBUGLOG); 45 | } 46 | 47 | =head2 debug 48 | 49 | Log a message to the log file. 50 | 51 | =cut 52 | 53 | sub debug 54 | { 55 | my $msg = sprintf(shift(), @_); 56 | my $t = time; 57 | my $timestamp = strftime $TIME_FORMAT, localtime $t; 58 | $timestamp .= sprintf ".%03d", ($t-int($t))*1000; # without rounding 59 | printf $DEBUGLOG "%s: %s\n", $timestamp, $msg; 60 | } 61 | 62 | 63 | =head2 slurp 64 | 65 | Read file contents into a string. 66 | 67 | =cut 68 | 69 | sub slurp 70 | { 71 | my ($fname) = @_; 72 | my $out = ''; 73 | open my $fh, "<:encoding(UTF-8)", $fname or httpError(500, "Could not read file %s: $!", $fname); 74 | $out .= $_ while(<$fh>); 75 | close $fh; 76 | chomp $out; 77 | return $out; 78 | } 79 | 80 | =head2 logRequest 81 | 82 | Log the IP and scan URL to request.log 83 | 84 | =cut 85 | 86 | sub logRequest 87 | { 88 | my $url = $cgi->param('imageUrl'); 89 | if (!$url) { 90 | debug("No URL to log for this request"); 91 | return; 92 | } 93 | my $action = $cgi->url_param('action'); 94 | my $t = time; 95 | my $timestamp = strftime sprintf("%sT%sZ", $DATE_FORMAT, $TIME_FORMAT), localtime $t; 96 | my $json = JSON->new->utf8->pretty(0)->encode({ 97 | date => $timestamp, 98 | action => $action, 99 | url => $url, 100 | ip => $ENV{REMOTE_ADDR} 101 | }); 102 | my $logdir = $config->{'logging'}->{'logdir'}; 103 | open(my $REQUESTLOG, ">>", "$logdir/request.log") 104 | or httpError(500, "Cannot write to log file '$logdir/request.log': $!\n"); 105 | print $REQUESTLOG $json . "\n"; 106 | close $REQUESTLOG; 107 | } 108 | 109 | 110 | =head2 httpError 111 | 112 | Send an HTTP error message 113 | 114 | =cut 115 | 116 | sub httpError 117 | { 118 | my $status = shift; 119 | my $msg = sprintf(shift(), @_); 120 | print $cgi->header(-type => 'text/plain', -status => $status); 121 | debug("********* ERROR $status - $msg *********"); 122 | print $msg; 123 | exit 1; 124 | } 125 | 126 | 127 | =head2 httpJSON 128 | 129 | convert data to JSON and send it 130 | 131 | =cut 132 | 133 | sub httpJSON 134 | { 135 | my ($obj, $compact) = @_; 136 | if (ref $obj eq 'ARRAY') { 137 | $obj = [ 138 | map { delete $_->{'path'}; delete $_->{'command'}; $_; } 139 | @{ $obj } 140 | ]; 141 | } elsif (ref $obj) { 142 | delete $obj->{'path'}; delete $obj->{'command'} 143 | } 144 | my $json = JSON->new->utf8->pretty(1)->encode($obj); 145 | print $cgi->header( -type => 'application/json', -charset => 'utf-8'); 146 | print $json; 147 | } 148 | 149 | 150 | =head2 loadConfig 151 | 152 | Load the configuration from the YAML file 153 | 154 | =cut 155 | 156 | sub loadConfig 157 | { 158 | my $ymlFile = "$OCR_GT_BASEDIR/ocr-gt-tools.default.yml"; 159 | my $config = LoadFile($ymlFile); 160 | 161 | # Merge with non-default config 162 | if (-e "$OCR_GT_BASEDIR/ocr-gt-tools.yml") { 163 | my $merge = Hash::Merge->new('RIGHT_PRECEDENT'); 164 | $config = $merge->merge($config, LoadFile("$OCR_GT_BASEDIR/ocr-gt-tools.yml")); 165 | } 166 | 167 | # Compile all 'pattern' as regexes 168 | for (my $i = 0; $i < scalar @{$config->{'pattern'}}; $i++) { 169 | my $pat = $config->{'pattern'}->[$i]; 170 | $config->{'pattern'}->[$i] = qr/$pat/smx; 171 | } 172 | 173 | # Remove all newlines from 'template' 174 | for my $category (keys %{$config->{'template'}}) { 175 | while (my ($k, $v) = each %{$config->{'template'}->{$category}}) { 176 | $v =~ s/\n//g; 177 | $config->{'template'}->{$category}->{$k} = $v; 178 | } 179 | } 180 | 181 | # Replace in logging->logdir 182 | $config->{'logging'}->{'logdir'} =~ s//$OCR_GT_BASEDIR/g; 183 | 184 | return $config; 185 | } 186 | 187 | 188 | 189 | =head2 parse 190 | 191 | Map a URL/path to a location 192 | 193 | =cut 194 | 195 | sub parse 196 | { 197 | my ($str) = @_; 198 | 199 | # Try to match all the 'pattern' 200 | for (my $i = 0; $i < scalar @{$config->{'pattern'}}; $i++) { 201 | my $pat = $config->{'pattern'}->[$i]; 202 | if ($str =~ $pat) { 203 | debug("Pattern # $i matched '$str'"); 204 | return { 205 | ids => {%+}, 206 | renderTemplates(%+), 207 | } 208 | } 209 | # debug("No match: $str =~ $pat"); 210 | } 211 | httpError(400, "Could not match '$str' to any known pattern"); 212 | } 213 | 214 | 215 | =head2 renderTemplates 216 | 217 | Replace all variables appropriately 218 | 219 | =cut 220 | 221 | sub renderTemplates 222 | { 223 | my %tokens = (%{ $config->{'defaults'} }, @_); 224 | my %obj; 225 | 226 | # Copy templates from the configuration and fill them with the tokens parsed 227 | for my $category ('url', 'path', 'command') { 228 | $obj{$category} = {}; 229 | while (my ($tplName, $tpl) = each %{$config->{'template'}->{$category}}) { 230 | 231 | # Initially set to the template string 232 | $obj{$category}->{$tplName} = $tpl; 233 | 234 | # First replace the tokens from the pattern 235 | while (my ($key, $value) = each %tokens) { 236 | if (ref $obj{$category}->{$tplName} eq 'ARRAY') { 237 | $obj{$category}->{$tplName} = [ 238 | map { 239 | my $foo = $_; 240 | $foo =~ s/<$key>/$value/g; 241 | $foo =~ s//$OCR_GT_BASEDIR/g; 242 | $foo; 243 | } @{ $obj{$category}->{$tplName} } 244 | ]; 245 | } else { 246 | $obj{$category}->{$tplName} =~ s/<$key>/$value/g; 247 | $obj{$category}->{$tplName} =~ s//$OCR_GT_BASEDIR/g; 248 | } 249 | } 250 | 251 | # Add this to the list of expanded tokens 252 | unless (ref $obj{$category}->{$tplName}) { 253 | $tokens{$tplName} = $obj{$category}->{$tplName}; 254 | } 255 | } 256 | } 257 | # debug( "Rendered object: %s", Dump(\%obj)); 258 | return %obj; 259 | } 260 | 261 | 262 | =head2 executeCommand 263 | 264 | Execute one of the location's commands. 265 | 266 | =cut 267 | 268 | sub executeCommand 269 | { 270 | my ($cmd) = @_; 271 | # Seiten in Bildzeilen und Textzeilen aufteilen 272 | debug("About to execute '%s'", join(' ', @{$cmd})); 273 | run $cmd, '>', \my $stdout, '2>', \my $stderr; 274 | debug($stdout); 275 | debug($stderr); 276 | if($?) { 277 | return httpError(500, "'%s' returned non-zero exit code '$?':\n\t%s\n%s", join(' ', @{$cmd}), $stdout, $stderr); 278 | } else { 279 | debug("Successfully run '%s': %s" , join(' ', @{$cmd}), substr($stdout, 0, 100)); 280 | } 281 | return split /\n/, $stdout; 282 | } 283 | 284 | 285 | =head2 handleGet 286 | 287 | Get process to create the files necessary 288 | 289 | =cut 290 | 291 | sub handleGet 292 | { 293 | my $url = $cgi->param('imageUrl'); 294 | if (! $url) { 295 | return httpError(400, "Missing parameter 'imageUrl' \n\n"); 296 | } 297 | # Get file object 298 | my $location = parse($url); 299 | if (! -e $location->{'path'}->{'correction-dir'}) { 300 | executeCommand($location->{'command'}->{'extract-images'}); 301 | $location->{'page-comment'} = ''; 302 | for ('line-comments', 'line-transcriptions', 'line-images') { 303 | $location->{$_} = []; 304 | } 305 | } 306 | chdir $location->{'path'}->{'correction-dir'}; 307 | for my $fname (glob 'line-*.*') { 308 | my $idx = int(substr($fname, 5, 4)); 309 | debug($idx); 310 | if ($fname =~ /txt$/) { 311 | $location->{'line-transcriptions'}->[$idx-1] = slurp($fname); 312 | } else { 313 | $location->{'line-images'}->[$idx-1] = sprintf($location->{'url'}->{'line-image-url'}, $idx); 314 | } 315 | } 316 | for my $fname (glob 'line-*.txt') { 317 | $location->{'line-transcriptions'}->[-1+substr($fname, 5, 4)] = slurp($fname); 318 | } 319 | for my $fname (glob 'comment-line-*.txt') { 320 | $location->{'line-comments'}->[-1+substr($fname, 13, 4)] = slurp($fname); 321 | } 322 | $location->{'page-comment'} = slurp('comment-page.txt'); 323 | $location->{'pages'} = [ map {parse($_)} executeCommand($location->{'command'}->{'find-corrections-for-work'}) ]; 324 | # Send JSON response 325 | httpJSON($location); 326 | } 327 | 328 | 329 | =head2 handleSave 330 | 331 | Save transcriptions and comments passed via POST params. 332 | 333 | =cut 334 | 335 | sub handleSave 336 | { 337 | my $postdata = $cgi->param('POSTDATA'); 338 | my $body = JSON->new->utf8->decode($postdata) or httpError(400, "Could not parse POST body"); 339 | my $location = parse($body->{'url'}->{'thumb-url'}); 340 | # Save line comments and transcriptions 341 | for (my $i = 0; $i < scalar @{ $body->{'line-comments'}}; $i++) { 342 | my %saveMap = ( 343 | 'line-transcriptions' , 'line-%04d.txt', 344 | 'line-comments' , 'comment-line-%04d.txt', 345 | ); 346 | while (my ($key, $fname_pat) = each(%saveMap)) { 347 | my $fname = join('/', $location->{'path'}->{'correction-dir'}, sprintf($fname_pat, $i+1)); 348 | # debug ("WRITE: $fname"); 349 | open my $fh, ">", $fname or httpError(500, "Could not write to '%s': %s\n", $fname, $!); 350 | print $fh $body->{$key}->[$i] . "\n"; 351 | close $fh; 352 | } 353 | } 354 | # Save page comment 355 | my $pageCommentFile = join('/', $location->{'path'}->{'correction-dir'}, 'comment-page.txt'); 356 | open my $fh, ">", $pageCommentFile or httpError(500, "Could not write to '%s': %s\n", $pageCommentFile, $!); 357 | print $fh $body->{'page-comment'} . "\n"; 358 | close $fh; 359 | print $cgi->header(-type => 'text/plain', -status => 200); 360 | } 361 | 362 | =head2 handleList 363 | 364 | 365 | =cut 366 | 367 | sub handleList 368 | { 369 | my $queryName = $cgi->param('name'); 370 | return httpError(400, "Must set parameter 'name'\n") unless $queryName; 371 | my $queryStr = $cgi->param('q'); 372 | return httpError(400, "Must set parameter 'q'\n") unless $queryStr; 373 | my $queryLocation = parse($queryStr); 374 | if (!$queryLocation->{'command'}->{'find-' . $queryName}) { 375 | return httpError(400, "Invalid parameter 'name' %s. Must be one of [%s]", 376 | $queryName, join('|', 377 | grep { /^find-/ } keys %{$queryLocation->{'command'}} 378 | )); 379 | } 380 | my $cmd = $queryLocation->{'command'}->{'find-' . $queryName}; 381 | debug(Dump($cmd)); 382 | my @locations = map { parse($_) } executeCommand($cmd); 383 | return httpJSON(\@locations); 384 | } 385 | 386 | 387 | =head2 handleHistory 388 | 389 | Send the request log for the calling IP address. 390 | 391 | =cut 392 | 393 | sub handleHistory 394 | { 395 | my $query = $cgi->param('q'); 396 | my $mine = defined $cgi->param('mine'); 397 | open my $RL, "<", $config->{'logging'}->{'logdir'} . '/request.log'; 398 | my $n = 100; 399 | my @lines; 400 | my $ip = $ENV{REMOTE_ADDR}; 401 | while (<$RL>) { 402 | if ($mine) { 403 | next unless m/.*\Q$ip\E.*/; 404 | } 405 | if ($query) { 406 | next unless m/.*\Q$query\E.*/; 407 | } 408 | push @lines, $_; 409 | } 410 | @lines = ($n >= @lines ? @lines : @lines[-$n .. -1]); 411 | print $cgi->header( -type => 'application/json', -charset => 'utf-8'); 412 | printf("[%s]", join(',', reverse @lines)); 413 | } 414 | 415 | 416 | setupLogging(); 417 | debug('********* START REQUEST *********'); 418 | my $action = $cgi->url_param('action'); 419 | # debug "CGI Params: %s", Dump($cgi->{param}); 420 | httpError(400, "URL parameter 'action' missing.") unless $action; 421 | if ( $action eq 'get' ) { handleGet(); } 422 | elsif ( $action eq 'save' ) { handleSave(); } 423 | elsif ( $action eq 'list' ) { handleList(); } 424 | elsif ( $action eq 'history' ) { handleHistory(); } 425 | else { httpError(400, "URL parameter 'action' must be 'create', 'save', 'list' or 'history'. Not %s", $action); } 426 | # TODO handle only 'save' and 'get' 427 | logRequest(); 428 | debug('********* END REQUEST *********'); 429 | 430 | # vim: sw=4 ts=4 : 431 | -------------------------------------------------------------------------------- /dist/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | OCR-GT 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 40 | 41 | 42 | 43 | 52 | 53 | 54 | 55 | 58 | 88 | 89 | 90 | 91 | 99 | 100 | 101 | 102 | 126 | 127 | 128 | 129 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 160 | 161 | 162 | 163 | 226 | 227 | 228 |
229 |
230 |
231 |
Grafik auf dem Dokument ablegen
oder
URL angeben:
232 |
233 | 234 | 235 |
236 |
237 |
238 | 239 |
240 | 241 |
242 |
243 | 244 | 245 | 246 | -------------------------------------------------------------------------------- /dist/example/fileadmin/417576986/hocr/417576986_0013.hocr: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |

15 | P R E F A C E. 16 | 17 | dillinguer les principes des 18 | 19 | confequences & les regles des 20 | 21 | exceprjons s & c’et’t ce qu: fait 22 | 23 | une infljrution Il y a long rems 24 | 25 | que j’en voi la necelllre & que 26 | 27 | je desire qu’jl y en ait en tou- 28 | 29 | tes les marieres qu’jl importe 30 | 31 | de favoir. Ceft aum ce qui m’a 32 | 33 | porte ä compol'er le Carechle 34 | 35 | me historjque & le trajtä de la 36 | 37 | merhode des etudes. Sans ce ('e- 38 | 39 | cours on marche ä tärons , on 40 | 41 | commence par de petits details, 42 | 43 | on fuit Paurorjte du premier 44 | 45 | venu , on ne forme que des 46 | 47 | doutes sc des opinions incertai- 48 | 49 | nes. 50 | 51 |

52 | 53 |

54 | Tel eft l*etat des pures prati- 55 | 56 | ciens, qui n'apprennent la juer- 57 | 58 | pruclence canonjque que com- 59 | 60 | meles artjfans apprcnnent les 61 | 62 | metjers leg plus vle Sen voyant 63 | 64 | travailler leurs mairres , & re- 65 | 66 | tenant ce quïls leur difent ä 67 | 68 |

69 | 70 |

71 | lll] 72 | 73 |

74 |
75 |
76 |

77 | 78 | 79 |

80 |
81 |
82 | 83 | 84 | --------------------------------------------------------------------------------