├── .gitignore ├── Makefile ├── README.md ├── doc ├── hocrjs-1.png ├── hocrjs-2.png ├── hocrjs-userjs-gm4.png ├── hocrjs-userjs.png └── pre-commit ├── example ├── 309515696-2.html ├── 309515696-2.tif.png ├── 426117689_0459.html ├── 426117689_0459.jpg └── 426117689_0459_noscript.html ├── hocrjs ├── LICENSE ├── Makefile ├── bin │ └── hocrjs-inject.js ├── dist │ ├── fullscreen.map │ ├── hocr.fullscreen.js │ ├── hocr.fullscreen.js.LICENSE.txt │ └── hocr.user.js ├── package-lock.json ├── package.json ├── src │ ├── _variables.scss │ ├── fullscreen.js │ ├── hocr.user.js │ ├── normalize.scss │ └── utils.js ├── test.html ├── test │ └── test-string.html └── webpack.config.js ├── lerna.json ├── package-lock.json ├── package.json └── vue-hocr ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── dist ├── main.map └── vue-hocr.js ├── package-lock.json ├── package.json ├── src ├── _variables.scss ├── components │ ├── HocrToolbar │ │ ├── index.vue │ │ ├── script.js │ │ ├── style.scss │ │ └── template.html │ ├── HocrViewer │ │ ├── feature │ │ │ ├── BackgroundImage.js │ │ │ ├── ContentEditable.js │ │ │ ├── Font.js │ │ │ ├── HighlightBlank.js │ │ │ ├── Layout.js │ │ │ ├── ScaleFont.js │ │ │ ├── Tooltip.js │ │ │ └── index.js │ │ ├── index.vue │ │ ├── script.js │ │ ├── style.scss │ │ └── template.html │ └── index.js ├── store │ └── state.js └── utils.js ├── test.html └── webpack.config.js /.gitignore: -------------------------------------------------------------------------------- 1 | .sass-cache 2 | test/test-string.html 3 | dist/hocr.fullscreen.js.map 4 | dist/hocr.viewer.js.map 5 | doc/large 6 | */node_modules 7 | example/tesseract4.html 8 | node_modules 9 | *.log 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PATH := $(PWD)/node_modules/.bin:$(PATH) 2 | export NODE_ENV 3 | 4 | # Version of the latest git tag 5 | VERSION = $(shell node -e "console.log(require('./lerna.json').version)") 6 | 7 | # BEGIN-EVAL makefile-parser --make-help Makefile 8 | 9 | help: 10 | @echo "" 11 | @echo " Targets" 12 | @echo "" 13 | @echo " bootstrap lerna bootstrap" 14 | @echo " dist webpack all" 15 | @echo " clean Remove built targets" 16 | @echo " test Run unit tests" 17 | @echo " link link" 18 | @echo " publish publish packages" 19 | @echo "" 20 | @echo " Variables" 21 | @echo "" 22 | @echo " VERSION Version of the latest git tag" 23 | 24 | # END-EVAL 25 | 26 | # lerna bootstrap 27 | bootstrap: 28 | lerna bootstrap 29 | 30 | # webpack all 31 | dist: 32 | lerna exec -- make -B dist 33 | 34 | # Remove built targets 35 | clean: 36 | lerna exec -- make clean 37 | 38 | # Run unit tests 39 | .PHONY: test 40 | test: 41 | lerna exec make test 42 | 43 | # link 44 | link: 45 | lerna exec -- npm link 46 | 47 | # publish packages 48 | publish: 49 | lerna version --no-push --no-git-tag-version 50 | VERSION=`node -e "console.log(require('./lerna.json').version)"`; \ 51 | $(MAKE) -B dist VERSION=$$VERSION NODE_ENV=production; \ 52 | git add lerna.json */dist */package*.json; \ 53 | git commit -m ":package: v$$VERSION"; \ 54 | git tag v$$VERSION; \ 55 | lerna exec npm publish 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hocrjs 2 | 3 | > Working with [hOCR](https://kba.github.io/hocr-spec/1.2/) in Javascript 4 | 5 | 6 | * [Showcase](#showcase) 7 | * [Demo](#demo) 8 | * [Video](#video) 9 | * [Screenshots](#screenshots) 10 | * [Usage](#usage) 11 | * [Simple Usage](#simple-usage) 12 | * [User script](#user-script) 13 | * [Command line interface](#command-line-interface) 14 | * [Development](#development) 15 | * [Layout](#layout) 16 | * [Features and SASS](#features-and-sass) 17 | * [Adding a feature](#adding-a-feature) 18 | 19 | 20 | 21 | ## Showcase 22 | 23 | ### Demo 24 | 25 | See this demo document: [Demo](https://kba.github.io/hocrjs/example/426117689_0459.html) 26 | 27 | ### Video 28 | 29 | [![video of hocrjs](https://img.youtube.com/vi/II1lkjFc6zU/0.jpg)](https://youtu.be/II1lkjFc6zU) 30 | 31 | ### Screenshots 32 | 33 | ![background image, transparent text](./doc/hocrjs-1.png) 34 | 35 | ![text only, scaled font](./doc/hocrjs-2.png) 36 | 37 | 38 | ## Usage 39 | 40 | ### Simple Usage 41 | 42 | To add the interface to a plain hOCR file, add this line just before the closing `` tag: 43 | 44 | ```html 45 | 46 | ``` 47 | 48 | In addition, your webserver must set the `Content-Type` to a value that allows 49 | loading scripts, such as `text/html`. If your hOCR file uses an extension 50 | `.html` or `.htm` extension, the media type should be set correctly. 51 | 52 | For files with a `.hocr` extension (e.g. generated by tesseract), you will need 53 | to add a mapping from extension to media type: 54 | 55 | * Apache: Add the following to you server configuration or `.htaccess` file: 56 | 57 | ```apache 58 | AddType text/html hocr 59 | ``` 60 | 61 | * nginx: Add to `mime.types`: 62 | 63 | ```mime 64 | text/html hocr; 65 | ``` 66 | 67 | 68 | ### User script 69 | 70 | Also bundled is a browser extension that lets you add the hocrjs interface to 71 | any hOCR document you browse on the web. 72 | 73 | - It is a [user script](https://github.com/OpenUserJs/OpenUserJS.org/wiki/Userscript-beginners-HOWTO), 74 | so you need to have a user script runner like 75 | [Greasemonkey](https://addons.mozilla.org/en-US/firefox/addon/greasemonkey/) or 76 | [Tampermonkey](tampermonkey.net) installed. 77 | - Browse to [unpkg.com/hocrjs/dist/hocr.user.js](https://unpkg.com/hocrjs/dist/hocr.user.js) 78 | - Install the user script when asked 79 | - Browse to a plain hOCR document, e.g. [this one](https://kba.github.io/hocrjs/example/426117689_0459_noscript.html) 80 | - Choose `Inject hOCR viewer` from the menu of the user script manager 81 | 82 | Tampermonkey: 83 | 84 | ![Tampermonkey](./doc/hocrjs-userjs.png) 85 | 86 | Greasemonkey: 87 | 88 | ![Greasemonkey](./doc/hocrjs-userjs-gm4.png) 89 | 90 | ### Command line interface 91 | 92 | hocrjs comes with a command line tool `hocrjs-inject` that inserts the 93 | necessary ` 105 | 106 | 107 | -------------------------------------------------------------------------------- /example/426117689_0459.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kba/hocrjs/fc49294422dd397ceff2fc359a6587f59c717596/example/426117689_0459.jpg -------------------------------------------------------------------------------- /example/426117689_0459_noscript.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 |

15 | de veteri uile/Le jiutu. « 429 16 | 17 | clelIamJMeZiwi multitudinfm intelligäs ; ti vcro 18 | 19 | coctum vna focietate Copulatum, tunc nihil fallitis 20 | 21 | dicipotefi. Ecjanirum forctJI differcnrjas rc- 22 | 23 | liquas coaceruarc vellem, quæ ex haåenus diåis 24 | 25 | tamen facile fluunt. 26 | 27 | 5.V. His prælibatis,videamus, quid T E RTV L- sumu'; 28 | 29 | , LIANO noftro fit excl-Jfo ltå vero illc: Vbi III-« 30 | 31 | Irer, ECCIB/id ef, licet laici , unu; quisque enim þu: «'l—"jia. 32 | 33 | fide viuit, nec, (jl perfouarum May-je eorum Deo. ' 34 | 35 | In antecedentibus dixerat,» in ecclesia quidem ell-c 36 | 37 | ordinis ccclcfiaftici confeüiim, limul tamen fub- 38 | 39 | iungit, huncad eius essentiam non requiri, cum « , 40 | 41 | cx paucis laicis etiam ecccha colligi Pollit, nec- 42 | 43 | adeo Deus perfonarum rationem habe-an Idem- 44 | 45 | de »Nimm c. mihf. ecclcliäm aitech trium car- 46 | 47 | .Puy, & in tr. [iiij-liguinper/E'cut.[.14.þzribi€fin « 48 | 49 | tribu; ecxlengupponcns cælum ,vvbi tuto congre- 50 | 51 | gaxifideles non pollint. Interim id fatis ex *ad- 52 | 53 | diuitis locis cOnflat, eccleG-am TERTVLLIANO 54 | 55 | "hic effe coctum congregatum, etiam ex pauciori- 56 | 57 | bus, cultus diuinigraïia, quem omnino ccclcy'iarm 58 | 59 | Pcrfeäam conllituere ait, licet ordinis ec'clcfiaflig 60 | 61 | cideHciat confcll'ust quialias ex decreto & »in-Fo- 62 | 63 | rizute fic/We adelTe lbleat, non tamen dc iure cli- 64 | 65 | uino neccllärius Gt, adeoque in cæfu ufier/imm 66 | 67 | abe/llt. polny cum decrjmen inter ordinem &ple- 68 | 69 | bem conllitucrit eccleliæ auäoritas. Hæc eft defi- 70 | 71 | nitio gener/alis T E R T v LL 1 A N EA ccclefiæ, a qua 72 | 73 | tamen pollerioribus temporibus difcelliim efi,vti 74 | 75 | infra ottendam. CL EM As ALEXANDRINVS 76 | 77 | lib. IV./iiamuf. facit cum Septimio noftro vbiita : 78 | 79 | non [muui/fd clfc'larum' congregurianem appello ec- 80 | 81 |

82 |
83 |
84 |

85 | clcfum. Congregatio clcåorum ecclcüamconfti- 86 | 87 | ' tuit - 88 | 89 |

90 |
91 |
92 |

93 | 94 | 95 |

96 |
97 |
98 |

99 | 100 | 101 |

102 |
103 |
104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /hocrjs/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2017 Konstantin Baierer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /hocrjs/Makefile: -------------------------------------------------------------------------------- 1 | PATH := $(PWD)/node_modules/.bin:$(PATH) 2 | 3 | # Version of the latest git tag ('$(VERSION)') 4 | VERSION = $(shell git describe --abbrev=0 --tags|sed -e 's/v//') 5 | 6 | # URL of the asset server, serving the built files and userscript ('$(ASSET_SERVER)') 7 | ASSET_SERVER = https://unpkg.com/hocrjs@$(VERSION)/dist 8 | 9 | # URL of the userscript update server ('$(UPDATE_SERVER)', will automatically redirect to latest version) 10 | UPDATE_SERVER = https://unpkg.com/hocrjs 11 | 12 | # Command to run a static server ('$(STATIC_SERVER)') 13 | STATIC_SERVER = @python2 -m SimpleHTTPServer $(PORT) 14 | 15 | # Server port. ('$(PORT)') 16 | PORT = 8888 17 | 18 | # BEGIN-EVAL makefile-parser --make-help Makefile 19 | 20 | help: 21 | @echo "" 22 | @echo " Targets" 23 | @echo "" 24 | @echo " dist webpack" 25 | @echo " clean Remove built targets" 26 | @echo " test Run unit tests" 27 | @echo " serve Run a development server" 28 | @echo " watch Continuously rebuild dist" 29 | @echo "" 30 | @echo " Variables" 31 | @echo "" 32 | @echo " VERSION Version of the latest git tag ('$(VERSION)')" 33 | @echo " ASSET_SERVER URL of the asset server, serving the built files and userscript ('$(ASSET_SERVER)')" 34 | @echo " UPDATE_SERVER URL of the userscript update server ('$(UPDATE_SERVER)', will automatically redirect to latest version)" 35 | @echo " STATIC_SERVER Command to run a static server ('$(STATIC_SERVER)')" 36 | @echo " PORT Server port. ('$(PORT)')" 37 | 38 | # END-EVAL 39 | 40 | # webpack 41 | .PHONY: dist 42 | dist: dist/hocr.user.js 43 | webpack 44 | 45 | dist/hocr.user.js: src/hocr.user.js 46 | mkdir -p $(dir $@) 47 | sed -e "s,__VERSION__,$(VERSION)," \ 48 | -e "s,__ASSET_SERVER__,$(ASSET_SERVER)," \ 49 | -e "s,__UPDATE_SERVER__,$(UPDATE_SERVER)," \ 50 | $< > $@ 51 | 52 | # Remove built targets 53 | clean: 54 | $(RM) -r ./dist 55 | 56 | # Run unit tests 57 | .PHONY: test 58 | test: 59 | babel-tap test/*.test.js 60 | 61 | # Run a development server 62 | serve: 63 | $(MAKE) clean dist ASSET_SERVER=$(ASSET_SERVER) 64 | $(STATIC_SERVER) 65 | 66 | # Continuously rebuild dist 67 | watch: 68 | while true;do \ 69 | nodemon --exec "make clean dist ASSET_SERVER=$(ASSET_SERVER)" \ 70 | -w src \ 71 | -e 'js scss html' \ 72 | ; sleep 5 || break; \ 73 | done 74 | -------------------------------------------------------------------------------- /hocrjs/bin/hocrjs-inject.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* 3 | * Copyright (c) 2016-2017 Konstantin Baierer 4 | * 5 | * This software may be modified and distributed under the terms 6 | * of the MIT license. See the LICENSE file for details. 7 | */ 8 | 9 | const fs = require('fs') 10 | 11 | const argv = require('yargs').argv 12 | argv.scriptUrl = (argv.scriptUrl || 'https://unpkg.com/hocrjs/dist/hocr.fullscreen.js') 13 | const scriptSnippet = `` 14 | argv._.forEach((infile) => { 15 | const outfile = `${infile}.hocrjs.html` 16 | fs.readFile(infile, {encoding: 'utf-8'}, (err, data) => { 17 | if (err) throw err 18 | data = data.replace('', `${scriptSnippet}`) 19 | fs.writeFile(outfile, data, {encoding: 'utf-8'}, (err) => { 20 | if (err) throw err 21 | console.log(`Written to ${outfile}`) 22 | }) 23 | }) 24 | }) 25 | -------------------------------------------------------------------------------- /hocrjs/dist/hocr.fullscreen.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * Vue.js v2.6.12 3 | * (c) 2014-2020 Evan You 4 | * Released under the MIT License. 5 | */ 6 | -------------------------------------------------------------------------------- /hocrjs/dist/hocr.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name hocr-viewer 3 | // @namespace http://github.com/kba/hocrjs 4 | // @updateURL https://unpkg.com/hocrjs/dist/hocr.user.js 5 | // @version 0.5.3 6 | // @description Add hocr-viewer controls to a apage 7 | // @author kba 8 | // @include http://*/* 9 | // @include https://*/* 10 | // @include file:///* 11 | // @grant GM_registerMenuCommand 12 | // @require https://greasemonkey.github.io/gm4-polyfill/gm4-polyfill.js 13 | // @run-at document-end 14 | // ==/UserScript== 15 | 16 | /* 17 | * Copyright (c) 2016-2017 Konstantin Baierer 18 | * 19 | * This software may be modified and distributed under the terms 20 | * of the MIT license. See the LICENSE file for details. 21 | */ 22 | 23 | (function() { 24 | 'use strict' 25 | function injectHocrViewer() { 26 | const script = document.createElement('script') 27 | script.src = "https://unpkg.com/hocrjs@0.5.3/dist/hocr.fullscreen.js?cachebuster=" + Math.random() * 10000000000000000 28 | script.type = 'text/javascript' 29 | document.querySelector('body').appendChild(script) 30 | } 31 | 32 | // Add menu option 33 | GM_registerMenuCommand('Inject hOCR viewer', injectHocrViewer) 34 | 35 | // Automatically inject hOCR viewer for local files 36 | if (document.location.protocol === 'file:' && document.querySelector('.ocr_page')) { 37 | injectHocrViewer() 38 | } 39 | 40 | })() 41 | -------------------------------------------------------------------------------- /hocrjs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hocrjs", 3 | "version": "0.5.4", 4 | "description": "Vue component for viewing hOCR files, also usable standalone", 5 | "main": "dist/hocr.fullscreen.js", 6 | "bin": { 7 | "hocrjs-inject": "bin/hocrjs-inject.js" 8 | }, 9 | "scripts": { 10 | "test": "make webpack test" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/kba/hocrjs.git" 15 | }, 16 | "author": "kba", 17 | "license": "MIT", 18 | "bugs": { 19 | "url": "https://github.com/kba/hocrjs/issues" 20 | }, 21 | "homepage": "https://github.com/kba/hocrjs#readme", 22 | "babel": { 23 | "presets": [ 24 | "@babel/preset-env" 25 | ] 26 | }, 27 | "devDependencies": { 28 | "@babel/core": "7.12.17", 29 | "@babel/preset-env": "7.12.17", 30 | "@kba/makefile-parser": "0.0.3", 31 | "babel-loader": "8.2.2", 32 | "babel-tap": "5.0.0", 33 | "css-loader": "5.0.2", 34 | "express": "4.18.2", 35 | "html-loader": "2.1.0", 36 | "node-sass": "5.0.0", 37 | "nodemon": "1.12.1", 38 | "normalize.css": "8.0.1", 39 | "sass-loader": "11.0.1", 40 | "style-loader": "2.0.0", 41 | "tap": "14.11.0", 42 | "tiny-emitter": "2.1.0", 43 | "uglifyjs-webpack-plugin": "2.2.0", 44 | "vue": "2.6.12", 45 | "webpack": "5.23.0", 46 | "webpack-cli": "4.5.0", 47 | "webpack-node-externals": "1.6.0" 48 | }, 49 | "dependencies": { 50 | "hocr-dom": "0.1.4", 51 | "vue-hocr": "^0.5.4", 52 | "yargs": "10.0.3" 53 | }, 54 | "gitHead": "17f48d51d58f784cbf5e7e1fdc0c8324f73b20cd" 55 | } 56 | -------------------------------------------------------------------------------- /hocrjs/src/_variables.scss: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | $selector-ocr-page: '.ocr_page'; 9 | $selector-ocr-classes: '*[class^="ocr"]'; 10 | $selector-ocr-line: '#{$selector-ocr-classes}[class*="line"]'; 11 | $selector-ocr-par: '.ocr_par'; 12 | $selector-ocr-carea: '.ocr_carea'; 13 | $selector-ocr-blank: '#{$selector-ocr-classes}.hocrjs-blank'; 14 | $selector-ocr-inline-not-blank: '.ocr_line #{$selector-ocr-classes}:not(.hocrjs-blank)'; 15 | $selector-ocr-inline-blank: '.ocr_line #{$selector-ocr-classes}.hocrjs-blank'; 16 | $selector-ocr-not-page: '#{$selector-ocr-classes}:not(.ocr_page)'; 17 | 18 | $border-width: 3px; 19 | $color-transparent: rgba(0,0,0,0); 20 | $color-unchecked: #ffcccc; 21 | $color-checked: #ccffcc; 22 | $color-toolbar-bg: rgba(180,180,190,0.85); 23 | $color-toolbar-toggler: white; 24 | $color-toolbar-border: #333; 25 | 26 | $color-highlight-page: #8B4513; 27 | $color-highlight-not-page: red; 28 | $color-highlight-inline-not-blank: green; 29 | $color-highlight-inline-blank: lighten(green, 30%); 30 | $color-highlight-carea: blue; 31 | $color-highlight-par: purple; 32 | $color-highlight-line: gold; 33 | 34 | @mixin transform($args) { 35 | -webkit-transform: $args; 36 | -ms-transform: $args; 37 | transform: $args; 38 | } 39 | 40 | @mixin tooltip { 41 | display: block; 42 | background: white; 43 | color: black !important; 44 | border: 1px solid black; 45 | font-family: monospace; 46 | position: absolute; 47 | font-size: 12px; 48 | font-weight: bold; 49 | line-height: 100%; 50 | height: 15px; 51 | top: -15px; 52 | } 53 | 54 | @mixin highlight-area($selector, $color) { 55 | #{$selector} { 56 | border: $border-width solid $color; 57 | &:hover { 58 | background: rgba(lighten($color, 30%), 0.2); 59 | } 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /hocrjs/src/fullscreen.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | import Vue from 'vue' 9 | import '@/normalize.scss' 10 | import 'normalize.css' 11 | import butwhy from 'vue-hocr' 12 | const {HocrViewer, HocrViewerStyle, HocrToolbarStyle} = butwhy // for some reason 'import {HocrViewer} from 'vue-hocr' wont work 13 | import Utils from 'vue-hocr/src/utils' 14 | 15 | Vue.component('hocr-viewer', HocrViewer) 16 | 17 | const hocr = document.querySelector('html').innerHTML 18 | Utils.addCssFragment('hocr-viewer-styles', HocrViewerStyle.toString()) 19 | Utils.addCssFragment('hocr-toolbar', HocrToolbarStyle.toString()) 20 | document.body.innerHTML = '
' 21 | window.hocrapp = new Vue({ 22 | el: "#app", 23 | components: {HocrViewer}, 24 | template: ``, 25 | data: {hocr} 26 | }) 27 | -------------------------------------------------------------------------------- /hocrjs/src/hocr.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name hocr-viewer 3 | // @namespace http://github.com/kba/hocrjs 4 | // @updateURL __UPDATE_SERVER__/dist/hocr.user.js 5 | // @version __VERSION__ 6 | // @description Add hocr-viewer controls to a apage 7 | // @author kba 8 | // @include http://*/* 9 | // @include https://*/* 10 | // @include file:///* 11 | // @grant GM_registerMenuCommand 12 | // @require https://greasemonkey.github.io/gm4-polyfill/gm4-polyfill.js 13 | // @run-at document-end 14 | // ==/UserScript== 15 | 16 | /* 17 | * Copyright (c) 2016-2017 Konstantin Baierer 18 | * 19 | * This software may be modified and distributed under the terms 20 | * of the MIT license. See the LICENSE file for details. 21 | */ 22 | 23 | (function() { 24 | 'use strict' 25 | function injectHocrViewer() { 26 | const script = document.createElement('script') 27 | script.src = "__ASSET_SERVER__/hocr.fullscreen.js?cachebuster=" + Math.random() * 10000000000000000 28 | script.type = 'text/javascript' 29 | document.querySelector('body').appendChild(script) 30 | } 31 | 32 | // Add menu option 33 | GM_registerMenuCommand('Inject hOCR viewer', injectHocrViewer) 34 | 35 | // Automatically inject hOCR viewer for local files 36 | if (document.location.protocol === 'file:' && document.querySelector('.ocr_page')) { 37 | injectHocrViewer() 38 | } 39 | 40 | })() 41 | -------------------------------------------------------------------------------- /hocrjs/src/normalize.scss: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | html { 9 | font-family: sans-serif; 10 | line-height: 1.15; 11 | -ms-text-size-adjust: 100%; 12 | -webkit-text-size-adjust: 100%; 13 | } 14 | -------------------------------------------------------------------------------- /hocrjs/src/utils.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | export default class Utils { 9 | 10 | static removeCssFragment(styleId, css) { 11 | const style = document.querySelector(`#${styleId}`) 12 | if (style) style.remove() 13 | } 14 | 15 | static addCssFragment(styleId, css) { 16 | let style = document.querySelector(`#${styleId}`) 17 | if (!style) { 18 | style = document.createElement('style') 19 | style.id = styleId 20 | document.head.appendChild(style) 21 | } 22 | style.appendChild(document.createTextNode(css)) 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /hocrjs/test/test-string.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test hocr template 4 | 5 | 6 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /hocrjs/webpack.config.js: -------------------------------------------------------------------------------- 1 | //const UglifyJS = require('uglifyjs-webpack-plugin') 2 | const path = require('path') 3 | const {SourceMapDevToolPlugin} = require('webpack') 4 | 5 | module.exports = { 6 | entry: { 7 | 'fullscreen': "./src/fullscreen.js", 8 | }, 9 | resolve: { 10 | alias: { 11 | "@": path.join(__dirname, "src"), 12 | vue: 'vue/dist/vue.esm.js', 13 | }, 14 | }, 15 | output: { 16 | path: __dirname + "/dist", 17 | filename: `hocr.[name].js`, 18 | }, 19 | module: { 20 | rules: [ 21 | { 22 | test: /.*\.js$/, 23 | exclude: /node_modules/, 24 | loader: 'babel-loader', 25 | options: {cacheDirectory: true}, 26 | }, 27 | { 28 | test: /\.s?[ac]ss$/, 29 | use: [ 30 | 'style-loader', 31 | {loader: 'css-loader', options: {sourceMap: true}}, 32 | {loader: 'sass-loader', options: {sourceMap: true}} 33 | ], 34 | }, 35 | {test: /\.vue/, loader: "vue-loader"}, 36 | ] 37 | }, 38 | devtool: false, 39 | plugins: [ 40 | new SourceMapDevToolPlugin({ 41 | append: '\n//# sourceMappingURL=https://unpkg.com/hocrjs/dist/[url]', 42 | filename: '[name].map', 43 | }) 44 | ] 45 | //plugins: [ 46 | // new UglifyJS({}), 47 | //] 48 | } 49 | -------------------------------------------------------------------------------- /lerna.json: -------------------------------------------------------------------------------- 1 | { 2 | "packages": [ 3 | "hocrjs", 4 | "vue-hocr" 5 | ], 6 | "version": "0.5.4" 7 | } 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hocrjs", 3 | "dependencies": { 4 | "lerna": "4.0.0" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /vue-hocr/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | .sass-cache 3 | test/test-string.html 4 | dist/hocr.fullscreen.js.map 5 | dist/hocr.viewer.js.map 6 | doc/large 7 | -------------------------------------------------------------------------------- /vue-hocr/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2017 Konstantin Baierer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vue-hocr/Makefile: -------------------------------------------------------------------------------- 1 | PATH := $(PWD)/node_modules/.bin:$(PATH) 2 | 3 | # BEGIN-EVAL makefile-parser --make-help Makefile 4 | 5 | help: 6 | @echo "" 7 | @echo " Targets" 8 | @echo "" 9 | @echo " dist webpack for vue component" 10 | @echo " watch webpack for vue component continuously" 11 | @echo " clean rm dist" 12 | @echo "" 13 | @echo " Variables" 14 | @echo "" 15 | 16 | # END-EVAL 17 | 18 | # webpack for vue component 19 | dist: 20 | webpack 21 | 22 | # webpack for vue component continuously 23 | watch: 24 | webpack -w 25 | 26 | # rm dist 27 | clean: 28 | rm -fr dist 29 | -------------------------------------------------------------------------------- /vue-hocr/README.md: -------------------------------------------------------------------------------- 1 | # hocrjs 2 | 3 | > Working with [hOCR](https://kba.github.io/hocr-spec/1.2/) in Javascript 4 | 5 | 6 | * [Showcase](#showcase) 7 | * [Demo](#demo) 8 | * [Video](#video) 9 | * [Screenshots](#screenshots) 10 | * [Usage](#usage) 11 | * [Simple Usage](#simple-usage) 12 | * [User script](#user-script) 13 | * [Command line interface](#command-line-interface) 14 | * [Development](#development) 15 | * [Layout](#layout) 16 | * [Features and SASS](#features-and-sass) 17 | * [Adding a feature](#adding-a-feature) 18 | 19 | 20 | 21 | ## Showcase 22 | 23 | ### Demo 24 | 25 | See this demo document: [Demo](https://kba.github.io/hocrjs/example/426117689_0459.html) 26 | 27 | ### Video 28 | 29 | [![video of hocrjs](https://img.youtube.com/vi/II1lkjFc6zU/0.jpg)](https://youtu.be/II1lkjFc6zU) 30 | 31 | ### Screenshots 32 | 33 | ![background image, transparent text](./doc/hocrjs-1.png) 34 | 35 | ![text only, scaled font](./doc/hocrjs-2.png) 36 | 37 | 38 | ## Usage 39 | 40 | ### Simple Usage 41 | 42 | To add the interface to a plain hOCR file, add this line just before the closing `` tag: 43 | 44 | ```html 45 | 4 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrToolbar/script.js: -------------------------------------------------------------------------------- 1 | export default { 2 | name: 'HocrToolbar', 3 | computed: { 4 | classList() {return { 5 | 'hocrjs-toolbar': true, 6 | expanded: this.expanded, 7 | }}, 8 | }, 9 | props: { 10 | expandedInitial: {type: Boolean, default: false}, 11 | }, 12 | data() {return { 13 | expanded: this.expandedInitial, 14 | }}, 15 | methods: { 16 | toggle() { 17 | this.expanded = ! this.expanded 18 | } 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrToolbar/style.scss: -------------------------------------------------------------------------------- 1 | @import '../../variables'; 2 | 3 | .hocrjs-toolbar { 4 | position: fixed; 5 | z-index: 1; 6 | top: 0; 7 | height: 100%; 8 | border: none; 9 | .toggler { 10 | float: left; 11 | position: fixed; 12 | left: 0; 13 | font-family: monospace; 14 | color: $color-toolbar-toggler; 15 | background: $color-toolbar-border; 16 | height: 100vh; 17 | width: 1em; 18 | .toggler-inner { 19 | font-size: 1.5em; 20 | top: 40vh; 21 | position: fixed; 22 | } 23 | .toggler-hide { display:none; } 24 | .toggler-show { display:block; } 25 | } 26 | .wrapper { 27 | position: fixed; 28 | margin-left: 1em; 29 | background-color: $color-toolbar-bg; 30 | overflow: hidden; 31 | left: -32em; 32 | transition: all 0.5s ease; 33 | height: 100vh; 34 | } 35 | &.expanded { 36 | border-right: $border-width solid $color-toolbar-border; 37 | .wrapper { 38 | padding-left: .5em; 39 | padding-right: .5em; 40 | width: 15em; 41 | left: 0; 42 | } 43 | .toggler-show { display:none; } 44 | .toggler-hide { display:block; } 45 | } 46 | ul.features { 47 | list-style-type: none; 48 | padding: 0; 49 | // width: 19em; 50 | li { 51 | background-color: $color-unchecked; 52 | margin-bottom: 2px; 53 | padding: 5px 0; 54 | &:before { content: '✗ '; } 55 | &.checked { 56 | background-color: $color-checked; 57 | &:before { content: '✓ '; } 58 | } 59 | input[type='checkbox'] { display: none; } 60 | label { width: 100%; } 61 | } 62 | } 63 | input.zoom { 64 | // width: 19em; 65 | } 66 | summary { 67 | font-size: 120%; 68 | span.font { 69 | font-size: 100%; 70 | } 71 | } 72 | select.font { 73 | width: 80%; 74 | font-size: 110%; 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrToolbar/template.html: -------------------------------------------------------------------------------- 1 |
2 |
5 |
6 | >
>
>
>
>
>
>
>
>
7 |
8 |
9 | <
<
<
<
<
<
<
<
<
10 |
11 |
12 |
13 | 14 | 15 | 16 | 17 | 18 |
19 | Features 20 |
    21 |
  • 26 | 27 |
  • 28 |
29 |
30 | 31 |
32 | Highlighting 33 |
    34 |
  • 39 | 40 | 41 |
  • 42 |
43 |
44 | 45 |
46 | Zoom: {{ $parent.currentZoomRounded }}% 47 | 48 | 49 | 53 | 54 |

55 | 56 | 57 | 58 |

59 |
60 | 61 | 62 |
64 | Font: 65 | 75 |
76 | 77 |
78 |
79 | 80 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/BackgroundImage.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | 3 | export default class BackgroundImage { 4 | 5 | constructor({imagePrefix=''}) { 6 | this.imagePrefix = imagePrefix 7 | } 8 | 9 | apply(dom) { 10 | let page = HocrDOM.queryHocr(dom, 'page') 11 | HocrDOM.queryHocrAll(dom, { 12 | title: 'image' 13 | }).forEach((el) => { 14 | let imageFile = HocrDOM.getHocrProperties(el).image 15 | page.style.backgroundImage = `url(${this.imagePrefix}${imageFile})` 16 | }) 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/ContentEditable.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | 3 | export default class ContentEditable { 4 | 5 | $emit(...args) { 6 | console.log({args}) 7 | } 8 | 9 | apply(dom) { 10 | HocrDOM.queryHocrAll(dom, { 11 | class: ['line', 'x_word'], 12 | clauses: '', 13 | }).forEach((el) => { 14 | el.setAttribute('contentEditable', 'true') 15 | el.addEventListener('input', () => this.$emit('contentEdited', el)) 16 | }) 17 | } 18 | 19 | } 20 | 21 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/Font.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | import Utils from '@/utils' 3 | 4 | export default class Font { 5 | 6 | constructor({fontFamily='x', fontsAvailable}) { 7 | this.fontFamily = fontFamily 8 | 9 | const styleId = 'hocr-view-font-styles' 10 | Utils.removeCssFragment(styleId) 11 | Object.keys(fontsAvailable).forEach((font) => { 12 | let cssUrl = fontsAvailable[font].cssUrl 13 | if (cssUrl) Utils.addCssFragment(styleId, `@import "${cssUrl}";\n`) 14 | }) 15 | } 16 | 17 | apply(dom) { 18 | HocrDOM.queryHocrAll(dom).forEach(el => { 19 | el.style.fontFamily = this.fontFamily 20 | }) 21 | } 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/HighlightBlank.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | 3 | export default class HighlightBlank { 4 | 5 | apply(dom) { 6 | HocrDOM.queryHocrAll(dom).forEach(el => { 7 | if (el.innerHTML.trim() === '') 8 | el.classList.add('hocrjs-blank') 9 | }) 10 | } 11 | 12 | } 13 | 14 | 15 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/Layout.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | 3 | export default class Layout { 4 | 5 | apply(dom) { 6 | HocrDOM.queryHocrAll(dom, { 7 | title: 'bbox' 8 | }).forEach((el) => { 9 | let {bbox} = HocrDOM.getHocrProperties(el) 10 | // XXX this can happen because e.g. "x_bboxes".contains('bbox') 11 | // TODO implement glyph bounding boxes 12 | if (!bbox) 13 | return 14 | el.style.position = 'fixed' 15 | el.style.left = bbox[0] + "px" 16 | el.style.top = bbox[1] + "px" 17 | el.style.width = bbox[2] - bbox[0] + 1 + "px" 18 | el.style.height = bbox[3] - bbox[1] + 1 + "px" 19 | }) 20 | } 21 | 22 | } 23 | 24 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/ScaleFont.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | 3 | export default class ScaleFont { 4 | 5 | constructor({fontFamily='x', fonts={}}) { 6 | this.fontFamily = fontFamily 7 | this.minFontSize = 2 8 | this.wrapClass = 'hocr-viewer-wrap' 9 | this.wrap = {} 10 | } 11 | 12 | apply(dom) { 13 | console.time('toggleScaleFont') 14 | 15 | // wrapper element containing wrappers for font-size expansion 16 | this.wrap = document.createElement('span') 17 | this.wrap.classList.add(this.wrapClass) 18 | document.body.appendChild(this.wrap) 19 | HocrDOM.queryHocrAll(dom, {terminal: true}).forEach((el) => this.scaleFont(el)) 20 | this.wrap.remove() 21 | 22 | console.timeEnd('toggleScaleFont') 23 | } 24 | 25 | scaleFont(el) { 26 | const text = el.textContent.trim() 27 | if (text.length === 0) return 28 | this.wrap.style.fontFamily = el.style.fontFamily 29 | this.wrap.innerHTML = text 30 | const height = parseInt(el.style.height.replace('px', '')) 31 | const width = parseInt(el.style.width.replace('px', '')) 32 | let fontSize = height 33 | if (fontSize > this.minFontSize) { 34 | this.wrap.style.fontSize = fontSize + 'px' 35 | const fontSizeH = fontSize * height / this.wrap.offsetHeight 36 | const fontSizeW = fontSize * width / this.wrap.offsetWidth 37 | fontSize = (fontSizeH < fontSizeW) ? fontSizeH : fontSizeW 38 | } 39 | if (fontSize < this.minFontSize) { 40 | fontSize = this.minFontSize 41 | } 42 | el.style.fontSize = fontSize + 'px' 43 | } 44 | 45 | } 46 | 47 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/Tooltip.js: -------------------------------------------------------------------------------- 1 | import {HocrDOM} from 'hocr-dom' 2 | import Utils from '@/utils' 3 | 4 | export default class ScaleFont { 5 | 6 | constructor() { 7 | this.styleId = 'hocr-viewer-tooltip-style' 8 | } 9 | 10 | apply(dom) { 11 | let ocrClasses = {} 12 | for (let el of HocrDOM.queryHocrAll(dom)) { 13 | ocrClasses[el.getAttribute('class')] = true 14 | } 15 | console.log("Detected OCR classes", Object.keys(ocrClasses)) 16 | 17 | Utils.removeCssFragment(this.styleId) 18 | Utils.addCssFragment(this.styleId, Object.keys(ocrClasses).map( 19 | cls => `.${cls}:hover::before { content: "${cls}"; }\n` 20 | ).join("\n") 21 | ) 22 | } 23 | 24 | } 25 | 26 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/feature/index.js: -------------------------------------------------------------------------------- 1 | import BackgroundImage from './BackgroundImage' 2 | import ContentEditable from './ContentEditable' 3 | import Font from './Font' 4 | import HighlightBlank from './HighlightBlank' 5 | import Layout from './Layout' 6 | import ScaleFont from './ScaleFont' 7 | import Tooltip from './Tooltip' 8 | 9 | export default { 10 | BackgroundImage, 11 | ContentEditable, 12 | Font, 13 | HighlightBlank, 14 | Layout, 15 | ScaleFont, 16 | Tooltip, 17 | } 18 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/script.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | import {HocrDOM} from 'hocr-dom' 9 | import HocrToolbar from '../HocrToolbar/index.vue' 10 | import defaultConfig from '@/store/state' 11 | 12 | import featuresAvailable from './feature' 13 | 14 | export default { 15 | name: 'HocrViewer', 16 | components: {HocrToolbar}, 17 | props: { 18 | hocr: {type: String, required: true}, 19 | initialZoom: {type: Number, default: 1}, 20 | featureBackgroundImage: {type: Boolean, default: true}, 21 | featureContentEditable: {type: Boolean, default: true}, 22 | featureFont: {type: Boolean, default: true}, 23 | featureLayout: {type: Boolean, default: true}, 24 | featureScaleFont: {type: Boolean, default: false}, 25 | featureTransparentText: {type: Boolean, default: false}, 26 | featureTooltip: {type: Boolean, default: false}, 27 | featureHighlight: {type: Boolean, default: true}, 28 | featureHighlightPage: {type: Boolean, default: false}, 29 | featureHighlightNotPage: {type: Boolean, default: false}, 30 | featureHighlightBlank: {type: Boolean, default: true}, 31 | featureHighlightInlineNotBlank: {type: Boolean, default: true}, 32 | featureHighlightInlineBlank: {type: Boolean, default: false}, 33 | featureHighlightLine: {type: Boolean, default: true}, 34 | featureHighlightPar: {type: Boolean, default: true}, 35 | featureHighlightCarea: {type: Boolean, default: true}, 36 | featureDisableEmStrong: {type: Boolean, default: true}, 37 | enableToolbar: {type: Boolean, default: true}, 38 | expandToolbar: {type: Boolean, default: true}, 39 | imagePrefix: {type: String, default: ''}, 40 | font: {type: String, default: 'sans-serif'}, 41 | fontsAvailable: {type: Object, default() {return { 42 | 'sans-serif': {}, 43 | serif: {}, 44 | monospace: {}, 45 | UnifrakturCook: {cssUrl: 'https://fonts.googleapis.com/css?family=UnifrakturCook:700'}, 46 | UnifrakturMaguntia: {cssUrl: 'https://fonts.googleapis.com/css?family=UnifrakturMaguntia'}, 47 | 'Old Standard TT': {cssUrl: 'https://fonts.googleapis.com/css?family=Old+Standard+TT'}, 48 | Cardo: {cssUrl: 'https://fonts.googleapis.com/css?family=Cardo'}, 49 | 'Noto Serif': {cssUrl: 'https://fonts.googleapis.com/css?family=Noto+Serif:400,400i,700&subset=latin-ext'}, 50 | 'Libre Baskerville': {cssUrl: 'https://fonts.googleapis.com/css?family=Libre+Baskerville:400,400i,700&subset=latin-ext'}, 51 | }}}, 52 | }, 53 | data() { 54 | return { 55 | enableLayout: false, 56 | currentPageIdx: 0, 57 | config: defaultConfig, 58 | featuresEnabled: Object.keys(this.$props) 59 | .filter(k => k.startsWith('feature') && this[k]) 60 | .map(k => k.replace('feature', '')), 61 | fontFamily: this.font, 62 | currentZoom: this.initialZoom, 63 | }}, 64 | computed: { 65 | 66 | classList() { 67 | const ret = { 68 | 'hocr-viewer': true, 69 | 'hocr-viewer-toolbar-enabled': this.enableToolbar 70 | } 71 | this.featuresEnabled.map(featureName => ret[`hocr-viewer-feature-${featureName}`] = true) 72 | return ret 73 | }, 74 | 75 | lastPageIdx() { 76 | const pages = HocrDOM.queryHocrAll(this.shadowDom, 'page') 77 | return pages.length - 1 78 | }, 79 | 80 | currentPage() { 81 | console.log("enter currentPage") 82 | const pages = HocrDOM.queryHocrAll(this.shadowDom, 'page') 83 | if (!pages.length) { 84 | console.warn("No .ocr_page element found. Is this hOCR?") 85 | return {} 86 | } 87 | return pages[this.currentPageIdx] 88 | }, 89 | 90 | containerStyle() { 91 | const page = this.currentPage 92 | const {bbox} = HocrDOM.getHocrProperties(page) 93 | const pageHeight = bbox[3] - bbox[1] + 1 94 | return { 95 | transform: `scale(${this.currentZoom})`, 96 | 'transform-origin': 'top left', 97 | height: `${pageHeight}px`, 98 | } 99 | }, 100 | 101 | featuresAvailable() { 102 | const ret = {} 103 | Object.keys(this.$props) 104 | .filter(k => k.startsWith('feature')) 105 | .map(k => k.replace('feature', '')) 106 | .map(k => {if (!(k in ret)) ret[k] = true}) 107 | Object.assign(ret, featuresAvailable) 108 | return ret 109 | }, 110 | 111 | features() { 112 | const ret = {} 113 | Object.keys(featuresAvailable).map(featureName => { 114 | if (this.featuresEnabled.includes(featureName)) { 115 | const featureClass = featuresAvailable[featureName] 116 | ret[featureName] = (typeof featureClass === 'function') 117 | ? new featureClass(this) 118 | : true 119 | } 120 | }) 121 | return ret 122 | }, 123 | 124 | shadowDom() { 125 | console.log("enter shadowDom") 126 | const dom = document.createElement('div') 127 | dom.innerHTML = this.hocr 128 | return dom 129 | }, 130 | 131 | hocrDom() { 132 | const dom = document.createElement('div') 133 | dom.innerHTML = this.currentPage.outerHTML 134 | Object.keys(this.features).map(featureName => { 135 | const featureClass = this.features[featureName] 136 | if (featureClass.apply) { 137 | // console.log(`Applying ${featureName}`) 138 | this.features[featureName].apply(dom) 139 | } 140 | }) 141 | return dom 142 | }, 143 | 144 | currentZoomRounded() { 145 | return Math.floor(this.currentZoom * 10000) / 100.0 146 | }, 147 | 148 | }, 149 | 150 | mounted() { 151 | this.zoom('height') 152 | }, 153 | 154 | methods: { 155 | 156 | prevPage() { 157 | this.currentPageIdx = Math.max(this.currentPageIdx - 1, 0) 158 | }, 159 | 160 | nextPage() { 161 | this.currentPageIdx = Math.min(this.currentPageIdx + 1, this.lastPageIdx) 162 | }, 163 | 164 | isFeatureEnabled(featureName) {return this.featuresEnabled.includes(featureName)}, 165 | 166 | toggleFeature(featureName) { 167 | if (this.isFeatureEnabled(featureName)) 168 | this.featuresEnabled.splice(this.featuresEnabled.indexOf(featureName), 1) 169 | else 170 | this.featuresEnabled.push(featureName) 171 | }, 172 | 173 | zoom(scaleFactor) { 174 | let container = this.$el.querySelector('.hocr-viewer-container') 175 | let {bbox} = HocrDOM.getHocrProperties(HocrDOM.queryHocr(container)) 176 | if (typeof scaleFactor === 'string') { 177 | if (scaleFactor === 'height') { 178 | scaleFactor = window.innerHeight / bbox[3] 179 | } else if (scaleFactor === 'width') { 180 | scaleFactor = window.innerWidth / bbox[2] 181 | } else if (scaleFactor === 'reset') { 182 | scaleFactor = 1 183 | } else if (scaleFactor.match(/^[+-]/)) { 184 | scaleFactor = this.currentZoom + parseFloat(scaleFactor) 185 | } else { 186 | console.error(`Bad scaleFactor: '${scaleFactor}'`) 187 | } 188 | } 189 | this.currentZoom = scaleFactor 190 | this.$emit('scale-to', this.config.scaleFactor) 191 | }, 192 | 193 | }, 194 | } 195 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/style.scss: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | @import '../../variables.scss'; 9 | 10 | .hocr-viewer { 11 | 12 | &.hocr-viewer-toolbar-enabled { 13 | > .hocr-viewer-container { 14 | transform: rotate(0); 15 | margin-left: 1em; 16 | } 17 | } 18 | 19 | .hocr-viewer-container { 20 | min-height: 100vh; 21 | position: relative !important; 22 | // NOTE: This is important otherwise fixed will be relative to page not 23 | // containing element 24 | @include transform(rotate(0deg)); 25 | /* .transform(scale(0.7)); */ 26 | /* position: relative !important; */ 27 | & > div { 28 | overflow: auto; 29 | } 30 | p { 31 | margin: 0; 32 | } 33 | } 34 | 35 | 36 | #{$selector-ocr-classes} { 37 | &:hover::before { 38 | display: none; 39 | } 40 | } 41 | 42 | &.hocr-viewer-feature-Layout { 43 | #{$selector-ocr-classes} { 44 | position: fixed; 45 | white-space: nowrap; 46 | // XXX 47 | // display: flex; 48 | justify-content: left; /* align horizontal */ 49 | align-items: center; /* align vertical */ 50 | } 51 | &.hocr-viewer-feature-Tooltip { 52 | #{$selector-ocr-classes} { 53 | &:hover::before { 54 | @include tooltip 55 | } 56 | } 57 | } 58 | } 59 | 60 | 61 | &.hocr-viewer-feature-Highlight { 62 | margin: -1px; 63 | &.hocr-viewer-feature-HighlightNotPage {@include highlight-area($selector-ocr-not-page, $color-highlight-not-page)} 64 | &.hocr-viewer-feature-HighlightPage {@include highlight-area($selector-ocr-page, $color-highlight-page)} 65 | &.hocr-viewer-feature-HighlightInlineNotBlank {@include highlight-area($selector-ocr-inline-not-blank, $color-highlight-inline-not-blank);} 66 | &.hocr-viewer-feature-HighlightInlineBlank {@include highlight-area($selector-ocr-inline-blank, $color-highlight-inline-blank);} 67 | &.hocr-viewer-feature-HighlightLine {@include highlight-area($selector-ocr-line, $color-highlight-line);} 68 | &.hocr-viewer-feature-HighlightPar {@include highlight-area($selector-ocr-par, $color-highlight-par);} 69 | &.hocr-viewer-feature-HighlightCarea {@include highlight-area($selector-ocr-carea, $color-highlight-carea);} 70 | } 71 | 72 | &.hocr-viewer-feature-BackgroundImage { 73 | background-repeat: no-repeat; 74 | 75 | #{$selector-ocr-page} { 76 | background-size: contain; 77 | 78 | } 79 | } 80 | 81 | &.hocr-viewer-feature-DisableEmStrong { 82 | em { font-style: normal; } 83 | strong { font-weight: normal; } 84 | } 85 | 86 | &.hocr-viewer-feature-TransparentText { 87 | .ocr_page { 88 | color: $color-transparent; 89 | } 90 | } 91 | 92 | 93 | } 94 | -------------------------------------------------------------------------------- /vue-hocr/src/components/HocrViewer/template.html: -------------------------------------------------------------------------------- 1 |
2 | 5 |
9 |
10 |
11 | 12 | -------------------------------------------------------------------------------- /vue-hocr/src/components/index.js: -------------------------------------------------------------------------------- 1 | import HocrViewer from './HocrViewer/index.vue' 2 | import HocrToolbar from './HocrToolbar/index.vue' 3 | import HocrViewerStyle from './HocrViewer/style.scss' 4 | import HocrToolbarStyle from './HocrToolbar/style.scss' 5 | 6 | export default { 7 | HocrViewer, 8 | HocrViewerStyle, 9 | HocrToolbar, 10 | HocrToolbarStyle, 11 | } 12 | -------------------------------------------------------------------------------- /vue-hocr/src/store/state.js: -------------------------------------------------------------------------------- 1 | export default { 2 | features: { 3 | disableEmStrong: {enabled: false}, 4 | contentEditable: {enabled: false}, 5 | tooltips: { 6 | enabled: true, 7 | styleId: 'hocr-viewer-tooltip-style', 8 | }, 9 | transparentText: {enabled: false}, 10 | }, 11 | } 12 | -------------------------------------------------------------------------------- /vue-hocr/src/utils.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2017 Konstantin Baierer 3 | * 4 | * This software may be modified and distributed under the terms 5 | * of the MIT license. See the LICENSE file for details. 6 | */ 7 | 8 | export default class Utils { 9 | 10 | static removeCssFragment(styleId, css) { 11 | const style = document.querySelector(`#${styleId}`) 12 | if (style) style.remove() 13 | } 14 | 15 | static addCssFragment(styleId, css) { 16 | let style = document.querySelector(`#${styleId}`) 17 | if (!style) { 18 | style = document.createElement('style') 19 | style.id = styleId 20 | document.head.appendChild(style) 21 | } 22 | style.appendChild(document.createTextNode(css)) 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /vue-hocr/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path') 2 | const srcPath = path.join(__dirname, "src") 3 | const VueLoaderPlugin = require('vue-loader/lib/plugin') 4 | const {SourceMapDevToolPlugin} = require('webpack') 5 | 6 | module.exports = { 7 | entry: `${srcPath}/components/index.js`, 8 | resolve: { 9 | alias: { 10 | "@": srcPath, 11 | vue: 'vue/dist/vue.esm.js', 12 | }, 13 | }, 14 | output: { 15 | path: __dirname + "/dist", 16 | filename: `vue-hocr.js`, 17 | libraryTarget: 'umd', 18 | library: 'VueHocr', 19 | }, 20 | devtool: false, 21 | plugins: [ 22 | new VueLoaderPlugin(), 23 | new SourceMapDevToolPlugin({ 24 | append: '\n//# sourceMappingURL=https://unpkg.com/vue-hocr/dist/[url]', 25 | filename: '[name].map', 26 | }) 27 | ], 28 | module: { 29 | rules: [ 30 | {test: /.*\.js$/, loader: 'babel-loader', options: {cacheDirectory: true}}, 31 | {test: /\.scss$/, use: ['css-loader', 'sass-loader']}, 32 | {test: /\.vue/, loader: "vue-loader"}, 33 | ] 34 | } 35 | } 36 | --------------------------------------------------------------------------------