├── LICENSE ├── README.md ├── css └── main.css ├── favicon.png ├── index.html ├── js └── main.js ├── manifest.json ├── poema.jpg ├── screenshot.png └── sw.js /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Victor Ribeiro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OCR 2 | 3 | A simple app (PWA) to extract text from images using [Tesseract](https://tesseract.projectnaptha.com/). 4 | 5 | ![screenshot](screenshot.png) 6 | 7 | [Live version](https://victorribeiro.com/ocr) 8 | 9 | ## About 10 | 11 | No image upload. Everything runs locally on your device. Choose a image, edit the text if you must, then just copy and paste. All credits from this app goes to the good people working on [Tesseract](https://tesseract.projectnaptha.com/). 12 | -------------------------------------------------------------------------------- /css/main.css: -------------------------------------------------------------------------------- 1 | 2 | html, body { 3 | margin: 0; 4 | padding: 0; 5 | height: 100%; 6 | } 7 | 8 | html, body, input, button { 9 | font-family: "Arial"; 10 | font-size: 1em; 11 | } 12 | 13 | img { 14 | max-width: 100%; 15 | } 16 | 17 | progress:after { 18 | content: 'Working...'; 19 | position: relative; 20 | font-style: italic; 21 | top: 20px; 22 | left: 0; 23 | } 24 | 25 | 26 | #main { 27 | height: 100%; 28 | width: 100%; 29 | display: flex; 30 | flex-direction: column; 31 | align-items: center; 32 | justify-content: middle; 33 | } 34 | 35 | #toolbar { 36 | width: 100%; 37 | line-height: 2em; 38 | background-color: rgb(200,200,200); 39 | display: flex; 40 | align-items: center; 41 | justify-content: center; 42 | flex-wrap: wrap; 43 | } 44 | 45 | #result { 46 | width: 100%; 47 | flex: 1 0 auto; 48 | display: flex; 49 | align-items: stretch; 50 | justify-content: center; 51 | } 52 | 53 | #preview { 54 | width: 50%; 55 | } 56 | 57 | #recognizedText { 58 | width: 50%; 59 | } 60 | 61 | @media only screen and (max-width: 600px) { 62 | #result { 63 | display: flex; 64 | align-items: center; 65 | justify-content: center; 66 | flex-direction: column; 67 | } 68 | 69 | #preview { 70 | height: 50%; 71 | width: 95%; 72 | } 73 | 74 | #recognizedText { 75 | height: 50%; 76 | width: 95%; 77 | } 78 | } 79 | 80 | -------------------------------------------------------------------------------- /favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/victorqribeiro/ocr/cc8bc1b730084e5b51ac67a5c5921b0afbf134e1/favicon.png -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | OCR 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 |
30 | 31 |
32 | 33 | 34 | 35 |
36 | 37 |
38 | 39 | 107 | 108 |
109 | 110 |
111 | 112 |
113 | 114 |
115 | 116 |
117 | 118 |
119 | 120 |
121 | 122 |
123 | 124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /js/main.js: -------------------------------------------------------------------------------- 1 | let imgSrc = null 2 | 3 | const $ = _ => document.querySelector(_) 4 | 5 | const $c = _ => document.createElement(_) 6 | 7 | const open = e => { 8 | if(e.target.files.length){ 9 | const url = window.URL || window.webkitURL 10 | imgSrc = url.createObjectURL(e.target.files[0]) 11 | draw() 12 | } 13 | } 14 | 15 | const draw = img => { 16 | $('#preview').style.backgroundImage = `url(${imgSrc})` 17 | $('#preview').style.backgroundRepeat = 'no-repeat' 18 | $('#preview').style.backgroundSize = 'contain' 19 | OCR() 20 | } 21 | 22 | const OCR = () => { 23 | const progress = $c('progress') 24 | progress.value = 0 25 | progress.style.display = 'block' 26 | progress.style.margin = '25% auto' 27 | $('#recognizedText').innerHTML = "" 28 | $('#recognizedText').appendChild( progress ) 29 | 30 | Tesseract.recognize( 31 | imgSrc, 32 | $('#lang').value, 33 | { 34 | logger: m => { progress.value = m.progress } 35 | }) 36 | .then(({ data: { text } }) => { 37 | $('#recognizedText').style.padding = '1em' 38 | $('#recognizedText').innerText = text 39 | }) 40 | .catch( e => { $('#recognizedText').innerText = e } ) 41 | } 42 | 43 | $('#import').addEventListener('change', open ) 44 | $('#lang').addEventListener('change', _ => { if(imgSrc) OCR() }) 45 | 46 | window.addEventListener('DOMContentLoaded', () => { 47 | const parsedUrl = new URL(window.location); 48 | const title = parsedUrl.searchParams.get('title'), 49 | text = parsedUrl.searchParams.get('text'), 50 | url = parsedUrl.searchParams.get('url') 51 | 52 | if(title) alert('Title shared: ' + title); 53 | if(text) alert('Text shared: ' + text); 54 | if(url) alert('URL shared: ' + url); 55 | 56 | }); 57 | 58 | if('serviceWorker' in navigator) { 59 | navigator.serviceWorker 60 | .register('/ocr/sw.js', {scope: './'}) 61 | .then(response => response) 62 | .catch(reason => reason); 63 | } 64 | 65 | let deferredPrompt; 66 | const addBtn = document.createElement('button'); 67 | 68 | window.addEventListener('beforeinstallprompt', (e) => { 69 | e.preventDefault(); 70 | deferredPrompt = e; 71 | addBtn.style.display = 'block'; 72 | addBtn.addEventListener('click', (e) => { 73 | addBtn.style.display = 'none'; 74 | deferredPrompt.prompt(); 75 | deferredPrompt.userChoice.then((choiceResult) => { 76 | deferredPrompt = null; 77 | }); 78 | }); 79 | }); 80 | 81 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "OCR", 3 | "name": "OCR", 4 | "icons": [ 5 | { 6 | "src": "favicon.png", 7 | "type": "image/png", 8 | "sizes": "192x192" 9 | }, 10 | { 11 | "src": "favicon.png", 12 | "type": "image/png", 13 | "sizes": "512x512" 14 | } 15 | ], 16 | "share_target": 17 | { 18 | "action": "./share", 19 | "params": 20 | { 21 | "title": "title", 22 | "text": "text", 23 | "url": "url" 24 | } 25 | }, 26 | "start_url": "index.html", 27 | "background_color": "#ffffff", 28 | "display": "standalone", 29 | "orientation": "portrait", 30 | "theme_color": "#ffffff", 31 | "description": "OCR with Tesseract" 32 | } 33 | -------------------------------------------------------------------------------- /poema.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/victorqribeiro/ocr/cc8bc1b730084e5b51ac67a5c5921b0afbf134e1/poema.jpg -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/victorqribeiro/ocr/cc8bc1b730084e5b51ac67a5c5921b0afbf134e1/screenshot.png -------------------------------------------------------------------------------- /sw.js: -------------------------------------------------------------------------------- 1 | 2 | const filesToCache = [ 3 | './', 4 | './index.html', 5 | './js/main.js', 6 | './css/main.css', 7 | './favicon.png', 8 | './manifest.json' 9 | ]; 10 | 11 | const staticCacheName = 'pages-cache-v1.1'; 12 | 13 | self.addEventListener('install', event => { 14 | event.waitUntil( 15 | caches.open(staticCacheName) 16 | .then(cache => { 17 | return cache.addAll(filesToCache); 18 | }) 19 | ); 20 | }); 21 | 22 | self.addEventListener('fetch', event => { 23 | event.respondWith( 24 | caches.match(event.request) 25 | .then(response => { 26 | if (response) { 27 | return response; 28 | } 29 | 30 | return fetch(event.request) 31 | 32 | .then(response => { 33 | return caches.open(staticCacheName).then(cache => { 34 | cache.put(event.request.url, response.clone()); 35 | return response; 36 | }); 37 | }); 38 | 39 | }).catch(error => {}) 40 | ); 41 | }); 42 | 43 | self.addEventListener('activate', event => { 44 | 45 | const cacheWhitelist = [staticCacheName]; 46 | 47 | event.waitUntil( 48 | caches.keys().then(cacheNames => { 49 | return Promise.all( 50 | cacheNames.map(cacheName => { 51 | if (cacheWhitelist.indexOf(cacheName) === -1) { 52 | return caches.delete(cacheName); 53 | } 54 | }) 55 | ); 56 | }) 57 | ); 58 | }); 59 | --------------------------------------------------------------------------------