├── LICENSE ├── README.md ├── editor.css ├── fonts.css ├── hocr-proofreader.css ├── hocr-proofreader.js ├── index.html ├── main.css └── main.js /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Mark Plomer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hOCR-Proofreader 2 | ================ 3 | 4 | Web based JavaScript GUI library for proofreading/editing hOCR. 5 | 6 | Features: 7 | 8 | - Two view concept: Original layout vs. hOCR text – linked together (i.e. hovering words etc. on both sides) 9 | - Original layout can be switched between the original image and the text rendered from hOCR at the same positions – 10 | really powerful to find OCR errors 11 | - Pure JavaScript without dependencies just using current browser features 12 | - Embeddable in other projects 13 | 14 | Online-Demo: http://www.not-implemented.de/hocr-proofreader/ 15 | 16 | 17 | TODO 18 | ---- 19 | 20 | - Full editor features (currently it's just a "contentEditable = true") ... there is a lot of work to do 21 | - Handling bounding-boxes on word/line/paragraph merge/split correctly 22 | - ... 23 | -------------------------------------------------------------------------------- /editor.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Times New Roman, serif; 3 | font-size: 13pt; 4 | } 5 | 6 | div.ocr_page { 7 | border-bottom: 3px dashed #3399ff; 8 | padding: 10px 0; 9 | } 10 | 11 | div.ocr_carea { 12 | padding-left: 10px; 13 | border-left: 3px solid transparent; 14 | } 15 | div.ocr_carea.hover { 16 | border-left: 3px solid #3399ff; 17 | } 18 | 19 | p.ocr_par { 20 | border: 1px solid transparent; 21 | } 22 | p.ocr_par.hover { 23 | border: 1px dashed #3399ff; 24 | } 25 | 26 | span.ocr_line.hover { 27 | background: #cce5ff; 28 | } 29 | 30 | span.ocrx_word.hover { 31 | background: #99cfff; 32 | } 33 | -------------------------------------------------------------------------------- /fonts.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: "Liberation Serif"; 3 | src: url("../Tools/Fonts/liberation-fonts/LiberationSerif-Regular.ttf"); 4 | } 5 | @font-face { 6 | font-family: "Liberation Serif"; 7 | src: url("../Tools/Fonts/liberation-fonts/LiberationSerif-Bold.ttf"); 8 | font-weight: bold; 9 | } 10 | @font-face { 11 | font-family: "Liberation Serif"; 12 | src: url("../Tools/Fonts/liberation-fonts/LiberationSerif-Italic.ttf"); 13 | font-style: italic; 14 | } 15 | @font-face { 16 | font-family: "Liberation Serif"; 17 | src: url("../Tools/Fonts/liberation-fonts/LiberationSerif-BoldItalic.ttf"); 18 | font-weight: bold; 19 | font-style: italic; 20 | } 21 | 22 | @font-face { 23 | font-family: "Courier New"; 24 | src: url("../Tools/Fonts/microsoft-core-fonts/CourierNew-Regular.ttf"); 25 | } 26 | @font-face { 27 | font-family: "Courier New"; 28 | src: url("../Tools/Fonts/microsoft-core-fonts/CourierNew-Bold.ttf"); 29 | font-weight: bold; 30 | } 31 | @font-face { 32 | font-family: "Courier New"; 33 | src: url("../Tools/Fonts/microsoft-core-fonts/CourierNew-Italic.ttf"); 34 | font-style: italic; 35 | } 36 | @font-face { 37 | font-family: "Courier New"; 38 | src: url("../Tools/Fonts/microsoft-core-fonts/CourierNew-BoldItalic.ttf"); 39 | font-weight: bold; 40 | font-style: italic; 41 | } 42 | -------------------------------------------------------------------------------- /hocr-proofreader.css: -------------------------------------------------------------------------------- 1 | .layout, .editor { 2 | display: block; 3 | margin: auto; 4 | background: #fff; 5 | box-sizing: border-box; 6 | border: 1px solid #c1c1c1; 7 | box-shadow: 0 0 6px 0 rgba(0, 0, 0, 0.2); 8 | } 9 | 10 | .layout .rects rect { 11 | fill: none; 12 | pointer-events: fill; 13 | } 14 | 15 | .layout .rects .ocr_carea.hover rect.ocr_carea { 16 | /* TODO: show ocr_carea just with a line on the left (with some padding?) */ 17 | stroke: #007eff; 18 | stroke-width: 3; 19 | stroke-dasharray: 50,50; 20 | } 21 | 22 | .layout .rects .ocr_par.hover rect.ocr_par { 23 | stroke: #007eff; 24 | stroke-width: 1; 25 | stroke-dasharray: 10,10; 26 | } 27 | 28 | .layout .rects .ocr_line.hover rect.ocr_line { 29 | fill: #cce5ff; 30 | opacity: 0.5; 31 | stroke: #007eff; 32 | stroke-width: 3; 33 | } 34 | 35 | .layout .rects rect.ocrx_word.hover { 36 | fill: #99cfff; 37 | opacity: 0.5; 38 | stroke: #007eff; 39 | stroke-width: 3; 40 | } 41 | 42 | .editor { 43 | width: 100%; 44 | height: 100%; 45 | } 46 | -------------------------------------------------------------------------------- /hocr-proofreader.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var Util = { 4 | onReady: function (callback) { 5 | if (document.readyState != 'loading') callback(); 6 | else document.addEventListener('DOMContentLoaded', callback); 7 | }, 8 | 9 | get: function (url, callback) { 10 | var request = new XMLHttpRequest(); 11 | request.open('GET', url); 12 | request.onload = function () { 13 | if (request.status >= 200 && request.status < 400) { 14 | callback(null, request.responseText); 15 | } else { 16 | callback(new Error('Error loading url "' + url + '": HTTP error: ' + request.status + ' ' + request.statusText)); 17 | } 18 | }; 19 | request.onerror = function () { 20 | callback(new Error('Error loading url "' + url + '": HTTP connection error')); 21 | }; 22 | request.send(); 23 | }, 24 | 25 | handleError: function (err) { 26 | alert(err.message); // TODO 27 | }, 28 | 29 | createElem: function (name, attributes) { 30 | var node = document.createElement(name); 31 | for (var name in attributes) { 32 | node.setAttribute(name, attributes[name]); 33 | } 34 | return node; 35 | }, 36 | 37 | createSvgElem: function (name, attributes) { 38 | var node = document.createElementNS('http://www.w3.org/2000/svg', name); 39 | for (var name in attributes) { 40 | node.setAttribute(name, attributes[name]); 41 | } 42 | return node; 43 | }, 44 | 45 | removeChildren: function (node) { 46 | while (node.hasChildNodes()) { 47 | node.removeChild(node.lastChild); 48 | } 49 | } 50 | }; 51 | 52 | 53 | function HocrProofreader(config) { 54 | this.config = config; 55 | 56 | this.layoutSvg = Util.createSvgElem('svg', {'class': 'layout'}); 57 | 58 | this.layoutBackground = Util.createSvgElem('rect', {'class': 'background', 'x': 0, 'y': 0, 'width': '100%', 'height': '100%', 'style': 'fill: none'}); 59 | this.layoutSvg.appendChild(this.layoutBackground); 60 | 61 | this.layoutImage = Util.createSvgElem('image', {'x': 0, 'y': 0, 'width': '100%', 'height': '100%'}); 62 | this.layoutSvg.appendChild(this.layoutImage); 63 | 64 | this.layoutWords = Util.createSvgElem('g', {'class': 'words'}); 65 | this.layoutSvg.appendChild(this.layoutWords); 66 | 67 | this.layoutRects = Util.createSvgElem('g', {'class': 'rects'}); 68 | this.layoutSvg.appendChild(this.layoutRects); 69 | 70 | this.layoutContainer = document.getElementById(config.layoutContainer); 71 | this.layoutContainer.appendChild(this.layoutSvg); 72 | this.layoutContainer.style.overflow = 'scroll'; 73 | 74 | this.editorIframe = Util.createElem('iframe', {'class': 'editor', 'frameborder': 0}); 75 | 76 | var editorContainer = document.getElementById(config.editorContainer); 77 | editorContainer.appendChild(this.editorIframe); 78 | 79 | var self = this; 80 | self.hoveredNode = null; 81 | self.mousePosition = null; 82 | 83 | this.layoutSvg.addEventListener('mousemove', function (event) { 84 | self.mousePosition = {container: 'layout', x: event.clientX, y: event.clientY}; 85 | self.onHover(event.target); 86 | }); 87 | this.layoutSvg.addEventListener('mouseleave', function (event) { 88 | self.mousePosition = null; 89 | self.onHover(null); 90 | }); 91 | this.layoutContainer.addEventListener('scroll', function (event) { 92 | if (!self.mousePosition || self.mousePosition.container !== 'layout') return; 93 | self.onHover(document.elementFromPoint(self.mousePosition.x, self.mousePosition.y)); 94 | }); 95 | 96 | // init some defaults: 97 | this.currentPage = null; 98 | this.toggleLayoutImage(); 99 | this.setZoom('page-width'); 100 | } 101 | 102 | HocrProofreader.prototype.setHocr = function (hocr, baseUrl) { 103 | this.hocrBaseUrl = baseUrl; 104 | var hocrDoc = this.editorIframe.contentDocument; 105 | 106 | // TODO: use baseUrl for images/components in hOCR - use ? 107 | 108 | hocrDoc.open(); 109 | hocrDoc.write(hocr); 110 | hocrDoc.close(); 111 | 112 | var self = this; 113 | var hocrRoot = hocrDoc.documentElement; 114 | hocrRoot.addEventListener('mousemove', function (event) { 115 | self.mousePosition = {container: 'editor', x: event.clientX, y: event.clientY}; 116 | self.onHover(event.target, true); 117 | }); 118 | hocrRoot.addEventListener('mouseleave', function (event) { 119 | self.mousePosition = null; 120 | self.onHover(null, true); 121 | }); 122 | hocrDoc.addEventListener('scroll', function (event) { 123 | if (!self.mousePosition || self.mousePosition.container !== 'editor') return; 124 | self.onHover(hocrDoc.elementFromPoint(self.mousePosition.x, self.mousePosition.y), true); 125 | }); 126 | 127 | this.editorStylesheet = Util.createElem('link', {'type': 'text/css', 'rel': 'stylesheet', 'href': 'editor.css'}); 128 | hocrDoc.head.appendChild(this.editorStylesheet); 129 | 130 | hocrDoc.body.contentEditable = true; 131 | 132 | this.setPage('first'); 133 | }; 134 | 135 | HocrProofreader.prototype.getHocr = function () { 136 | var hocrDoc = this.editorIframe.contentDocument; 137 | 138 | hocrDoc.head.removeChild(this.editorStylesheet); 139 | hocrDoc.body.contentEditable = 'inherit'; // this removes the attribute from DOM 140 | this.onHover(null); // ensure there are no "hover" classes left 141 | 142 | var serializer = new XMLSerializer(); 143 | var hocr = serializer.serializeToString(hocrDoc); 144 | 145 | hocrDoc.head.appendChild(this.editorStylesheet); 146 | hocrDoc.body.contentEditable = true; 147 | 148 | return hocr; 149 | }; 150 | 151 | HocrProofreader.prototype.setZoom = function (zoom) { 152 | if (zoom) this.currentZoom = zoom; 153 | 154 | if (this.currentZoom === 'page-full') { 155 | this.layoutSvg.style.width = null; 156 | this.layoutSvg.style.height = null; 157 | this.layoutSvg.style.maxWidth = '100%'; 158 | this.layoutSvg.style.maxHeight = '100%'; 159 | } else if (this.currentZoom === 'page-width') { 160 | this.layoutSvg.style.width = null; 161 | this.layoutSvg.style.height = null; 162 | this.layoutSvg.style.maxWidth = '100%'; 163 | this.layoutSvg.style.maxHeight = null; 164 | } else if (this.currentZoom === 'original') { 165 | if (this.currentPage) { 166 | var options = this.getNodeOptions(this.currentPage); 167 | this.layoutSvg.style.width = '' + (options.bbox[2] - options.bbox[0]) + 'px'; 168 | this.layoutSvg.style.height = '' + (options.bbox[3] - options.bbox[1]) + 'px'; 169 | } else { 170 | this.layoutSvg.style.width = null; 171 | this.layoutSvg.style.height = null; 172 | } 173 | 174 | this.layoutSvg.style.maxWidth = null; 175 | this.layoutSvg.style.maxHeight = null; 176 | } 177 | }; 178 | 179 | HocrProofreader.prototype.toggleLayoutImage = function () { 180 | if (!this.layoutWords.style.display || this.layoutWords.style.display === 'block') { 181 | this.layoutWords.style.display = 'none'; 182 | this.layoutImage.style.display = 'block'; 183 | } else { 184 | this.layoutWords.style.display = 'block'; 185 | this.layoutImage.style.display = 'none'; 186 | } 187 | }; 188 | 189 | HocrProofreader.prototype.setPage = function (page) { 190 | var pageNode, backwards = false, skipCurrent = false; 191 | var hocrDoc = this.editorIframe.contentDocument; 192 | 193 | if (page === 'first') { 194 | pageNode = hocrDoc.body.firstElementChild; 195 | } else if (page === 'last') { 196 | pageNode = hocrDoc.body.lastElementChild; 197 | backwards = true; 198 | } else if (page === 'next') { 199 | pageNode = this.currentPage || hocrDoc.body.firstElementChild; 200 | skipCurrent = true; 201 | } else if (page === 'previous') { 202 | pageNode = this.currentPage || hocrDoc.body.lastElementChild; 203 | backwards = true; 204 | skipCurrent = true; 205 | } 206 | 207 | while (pageNode && (skipCurrent || !pageNode.classList.contains('ocr_page'))) { 208 | pageNode = backwards ? pageNode.previousElementSibling : pageNode.nextElementSibling; 209 | skipCurrent = false; 210 | } 211 | 212 | this.renderPage(pageNode || null); 213 | }; 214 | 215 | HocrProofreader.prototype.renderPage = function (pageNode) { 216 | this.layoutContainer.scrollTop = 0; 217 | this.layoutContainer.scrollLeft = 0; 218 | 219 | var scrollToBottom = false, tmpNode = this.currentPage; 220 | while (tmpNode) { 221 | tmpNode = tmpNode.previousElementSibling; 222 | if (tmpNode === pageNode) { 223 | scrollToBottom = true; 224 | break; 225 | } 226 | } 227 | 228 | function removeLinkedNodes(node) { 229 | if (node.linkedNode) node.linkedNode = null; 230 | 231 | var childNode = node.firstElementChild; 232 | while (childNode) { 233 | removeLinkedNodes(childNode); 234 | childNode = childNode.nextElementSibling; 235 | } 236 | } 237 | if (this.currentPage) removeLinkedNodes(this.currentPage); 238 | 239 | Util.removeChildren(this.layoutWords); 240 | Util.removeChildren(this.layoutRects); 241 | 242 | this.currentPage = pageNode; 243 | 244 | this.setZoom(); 245 | this.layoutImage.removeAttribute('transform'); 246 | 247 | if (!this.currentPage) { 248 | // TODO: hide completely? reset image/font/viewBox/...? 249 | return; 250 | } 251 | 252 | var pageOptions = this.getNodeOptions(this.currentPage); 253 | 254 | this.layoutSvg.setAttribute('viewBox', pageOptions.bbox.join(' ')); 255 | this.layoutWords.style.fontFamily = 'Liberation Serif, serif'; // TODO: use font from hOCR (per page) 256 | 257 | this.layoutImage.setAttributeNS('http://www.w3.org/1999/xlink', 'href', this.hocrBaseUrl + pageOptions.image); 258 | 259 | if (pageOptions.textangle) { 260 | // textangle is counter-clockwise, so we have to rotate the image clockwise - and transform-rotate() is clockwise: 261 | this.layoutImage.setAttribute('transform', 'rotate(' + pageOptions.textangle + ' ' + 262 | ((pageOptions.bbox[2] - pageOptions.bbox[0]) / 2) + ' ' + 263 | ((pageOptions.bbox[3] - pageOptions.bbox[1]) / 2) + ')'); 264 | } 265 | 266 | this.renderNodesRecursive(this.currentPage, pageOptions); 267 | 268 | if (scrollToBottom) { 269 | this.layoutContainer.scrollTop = this.layoutContainer.scrollHeight - this.layoutContainer.clientHeight; 270 | } 271 | }; 272 | 273 | HocrProofreader.prototype.renderNodesRecursive = function (node, options, parentRectsNode) { 274 | if (!parentRectsNode) parentRectsNode = this.layoutRects; 275 | 276 | var className = null; 277 | if (node.classList.contains('ocr_carea')) { 278 | className = 'ocr_carea'; 279 | } else if (node.classList.contains('ocr_par')) { 280 | className = 'ocr_par'; 281 | } else if (node.classList.contains('ocr_line')) { 282 | className = 'ocr_line'; 283 | } else if (node.classList.contains('ocrx_word')) { 284 | className = 'ocrx_word'; 285 | } 286 | 287 | if (className) { 288 | if (className !== 'ocrx_word') { 289 | var groupNode = Util.createSvgElem('g', {'class': className}); 290 | parentRectsNode.appendChild(groupNode); 291 | parentRectsNode = groupNode; 292 | } 293 | 294 | options = this.inheritOptions(this.getNodeOptions(node), options); 295 | 296 | if (options.bbox) { 297 | if (className === 'ocrx_word' && options.baselineBbox) { 298 | var word = node.textContent; 299 | 300 | // TODO: calculate font-size and y based on bbox, not baseline (font-metrics needed): 301 | var textNode = Util.createSvgElem('text', { 302 | 'x': options.bbox[0], 303 | 'y': parseFloat(options.baselineBbox[3]) + parseFloat(options.baseline[1]), 304 | 'font-size': options.x_fsize * options.scan_res[1] / 72, // 1 pt = 1/72 inch 305 | 'textLength': options.bbox[2] - options.bbox[0], 306 | 'lengthAdjust': 'spacingAndGlyphs' 307 | }); 308 | textNode.textContent = word; 309 | this.layoutWords.appendChild(textNode); 310 | } 311 | 312 | var rectNode = Util.createSvgElem('rect', { 313 | 'x': options.bbox[0], 314 | 'y': options.bbox[1], 315 | 'width': options.bbox[2] - options.bbox[0], 316 | 'height': options.bbox[3] - options.bbox[1], 317 | 'class': className 318 | }); 319 | parentRectsNode.appendChild(rectNode); 320 | 321 | // cross-link both nodes: 322 | rectNode.linkedNode = node; 323 | node.linkedNode = rectNode; 324 | } 325 | } 326 | 327 | var childNode = node.firstElementChild; 328 | while (childNode) { 329 | this.renderNodesRecursive(childNode, options, parentRectsNode); 330 | childNode = childNode.nextElementSibling; 331 | } 332 | }; 333 | 334 | HocrProofreader.prototype.getNodeOptions = function (node) { 335 | var asArray = ['bbox', 'baseline', 'scan_res']; 336 | var optionsStr = node.title ? node.title : ''; 337 | var match, regex = /(?:^|;)\s*(\w+)\s+(?:([^;"']+?)|"((?:\\"|[^"])+?)"|'((?:\\'|[^'])+?)')\s*(?=;|$)/g; 338 | 339 | var options = {}; 340 | while (match = regex.exec(optionsStr)) { 341 | var name = match[1]; 342 | var value = match[4] || match[3] || match[2]; 343 | 344 | if (asArray.indexOf(name) !== -1) { 345 | value = value.split(/\s+/); 346 | } 347 | 348 | options[name] = value; 349 | } 350 | 351 | return options; 352 | }; 353 | 354 | HocrProofreader.prototype.inheritOptions = function (options, parentOptions) { 355 | var inheritableOptions = ['baseline', 'baselineBbox', 'x_fsize', 'scan_res']; 356 | 357 | // baseline is relative to the bbox of the node where the baseline is defined, so we have to remember this bbox: 358 | if ('baseline' in options && 'bbox' in options) { 359 | options.baselineBbox = options.bbox; 360 | } 361 | 362 | if (parentOptions) { 363 | for (var name in parentOptions) { 364 | if (inheritableOptions.indexOf(name) === -1) continue; 365 | if (name in options) continue; 366 | options[name] = parentOptions[name]; 367 | } 368 | } 369 | 370 | return options; 371 | }; 372 | 373 | HocrProofreader.prototype.onHover = function (target, isEditorContainer) { 374 | if (target === this.hoveredNode) return; 375 | 376 | if (this.hoveredNode) { 377 | this.hoverTreeNodes(this.hoveredNode, false); 378 | this.hoverTreeNodes(this.hoveredNode.linkedNode, false); 379 | this.hoveredNode = null; 380 | } 381 | 382 | if (isEditorContainer) { 383 | // check for page change: 384 | var pageNode = target; 385 | while (pageNode && (!pageNode.classList.contains('ocr_page'))) { 386 | pageNode = pageNode.parentElement; 387 | } 388 | if (pageNode && pageNode !== this.currentPage) { 389 | this.renderPage(pageNode); 390 | } 391 | } 392 | 393 | var linkedNode = target && target.linkedNode; 394 | if (linkedNode) { 395 | this.hoverTreeNodes(target, true); 396 | this.hoverTreeNodes(linkedNode, true); 397 | this.hoveredNode = target; 398 | 399 | var linkedContainer = isEditorContainer ? this.layoutContainer : this.editorIframe.contentDocument.documentElement; 400 | this.scrollIntoViewIfNeeded(linkedNode, linkedContainer); 401 | } 402 | }; 403 | 404 | HocrProofreader.prototype.hoverTreeNodes = function (node, isActive) { 405 | while (node) { 406 | if (node.classList.contains('ocr_page') || node.classList.contains('rects')) break; 407 | if (isActive) { 408 | node.classList.add('hover'); 409 | } else { 410 | node.classList.remove('hover'); 411 | } 412 | node = node.parentElement; 413 | } 414 | }; 415 | 416 | HocrProofreader.prototype.scrollIntoViewIfNeeded = function (node, scrollParentNode) { 417 | var rect = node.getBoundingClientRect(); 418 | // do not substract the bounding-rect of the scrollParent if it is the documentElement (e.g. the iframe), 419 | // otherwise scroll-position is added twice - set to 0: 420 | var parentRect = scrollParentNode.parentElement ? scrollParentNode.getBoundingClientRect() : {left: 0, top: 0}; 421 | var nodeRect = { 422 | left: rect.left - parentRect.left + scrollParentNode.scrollLeft, 423 | top: rect.top - parentRect.top + scrollParentNode.scrollTop, 424 | right: rect.right - parentRect.left + scrollParentNode.scrollLeft, 425 | bottom: rect.bottom - parentRect.top + scrollParentNode.scrollTop 426 | }; 427 | 428 | if (nodeRect.bottom - nodeRect.top <= scrollParentNode.clientHeight) { // ignore nodes higher than scroll area 429 | if (nodeRect.bottom > scrollParentNode.scrollTop + scrollParentNode.clientHeight) { 430 | node.scrollIntoView({behavior: 'smooth', block: 'end'}); 431 | } else if (nodeRect.top < scrollParentNode.scrollTop) { 432 | node.scrollIntoView({behavior: 'smooth', block: 'start'}); 433 | } 434 | } 435 | if (nodeRect.right - nodeRect.left <= scrollParentNode.clientWidth) { // ignore nodes wider than scroll area 436 | if (nodeRect.right > scrollParentNode.scrollLeft + scrollParentNode.clientWidth) { 437 | node.scrollIntoView({behavior: 'smooth', block: 'end'}); 438 | } else if (nodeRect.left < scrollParentNode.scrollLeft) { 439 | node.scrollIntoView({behavior: 'smooth', block: 'end'}); 440 | } 441 | } 442 | }; 443 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hOCR-Proofreader 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 |
15 | 16 | 17 | 18 |
19 | 20 | Zoom: 21 | 22 | 23 | 24 |
25 | 26 | 27 |
28 | 29 |
30 |
31 |
32 | 33 | 34 | -------------------------------------------------------------------------------- /main.css: -------------------------------------------------------------------------------- 1 | html, body { 2 | height: 100%; 3 | margin: 0; 4 | padding: 0; 5 | } 6 | 7 | .viewport { 8 | width: 100%; 9 | height: 100%; 10 | background: #f0f0f0; 11 | } 12 | 13 | .toolbar { 14 | height: 35px; 15 | box-sizing: border-box; 16 | padding: 5px 5px 0; 17 | } 18 | 19 | .toolbar .logo { 20 | float: left; 21 | margin-right: 5px; 22 | font-family: Verdana, sans-serif; 23 | font-weight: bold; 24 | } 25 | 26 | .toolbar .separator { 27 | display: inline; 28 | margin-right: 5px; 29 | border-right: 2px solid #7f7f7f; 30 | } 31 | 32 | .container { 33 | float: left; 34 | width: 50%; 35 | height: calc(100% - 35px); 36 | box-sizing: border-box; 37 | padding: 5px; 38 | } 39 | -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | Util.onReady(function () { 4 | var hocrProofreader = new HocrProofreader({ 5 | layoutContainer: 'layout-container', 6 | editorContainer: 'editor-container' 7 | }); 8 | 9 | document.getElementById('toggle-layout-image').addEventListener('click', function () { 10 | hocrProofreader.toggleLayoutImage(); 11 | }); 12 | 13 | document.getElementById('zoom-page-full').addEventListener('click', function () { 14 | hocrProofreader.setZoom('page-full'); 15 | }); 16 | 17 | document.getElementById('zoom-page-width').addEventListener('click', function () { 18 | hocrProofreader.setZoom('page-width'); 19 | }); 20 | 21 | document.getElementById('zoom-original').addEventListener('click', function () { 22 | hocrProofreader.setZoom('original'); 23 | }); 24 | 25 | document.getElementById('button-save').addEventListener('click', function () { 26 | var hocr = hocrProofreader.getHocr(); 27 | 28 | var request = new XMLHttpRequest(); 29 | request.open('POST', 'save.php'); 30 | request.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded; charset=utf-8'); 31 | request.send('hocr=' + encodeURIComponent(hocr)); 32 | }); 33 | 34 | var hocrBaseUrl = 'demo/'; 35 | var hocrUrl = hocrBaseUrl + 'demo.hocr'; 36 | 37 | Util.get(hocrUrl, function (err, hocr) { 38 | if (err) return Util.handleError(err); 39 | 40 | hocrProofreader.setHocr(hocr, hocrBaseUrl); 41 | }); 42 | }); 43 | --------------------------------------------------------------------------------