├── .gitignore ├── README.md ├── browser ├── index.html └── index.tha.html ├── images ├── testocr.png └── tha.png ├── lang-data ├── eng.traineddata.gz └── tha.traineddata.gz ├── node ├── index.js └── index.tha.js ├── package-lock.json └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.traineddata 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Tesseract.js Offline 2 | ==================== 3 | 4 | This repository is a show case for how to use tesseract.js without downloading any files from remote. 5 | 6 | # Installation 7 | 8 | ```shell 9 | $ npm install 10 | ``` 11 | 12 | # Usage 13 | 14 | ## Browser 15 | 16 | For browser version, execute command below to start the server 17 | 18 | ```shell 19 | $ npm run start 20 | ``` 21 | 22 | Visit [http://localhost:3000/browser/](http://localhost:3000/browser/) and press F12 to check console logs. 23 | 24 | ## Node.js 25 | 26 | For Node.js version, simply run this command 27 | 28 | ```shell 29 | $ node ./node/index.js 30 | ``` 31 | 32 | You will see the result in terminal 33 | 34 | # FAQ 35 | 36 | ## Where can I download \*.traineddata.gz for other languages? 37 | 38 | You can download from [naptha/tessdata](https://github.com/naptha/tessdata/tree/gh-pages/4.0.0) 39 | -------------------------------------------------------------------------------- /browser/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Tesseract.js Offline 6 | 7 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /browser/index.tha.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Tesseract.js Offline 6 | 7 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /images/testocr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeromewu/tesseract.js-offline/820c667b7c394fe8baa86a7832ffe24c201e2a03/images/testocr.png -------------------------------------------------------------------------------- /images/tha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeromewu/tesseract.js-offline/820c667b7c394fe8baa86a7832ffe24c201e2a03/images/tha.png -------------------------------------------------------------------------------- /lang-data/eng.traineddata.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeromewu/tesseract.js-offline/820c667b7c394fe8baa86a7832ffe24c201e2a03/lang-data/eng.traineddata.gz -------------------------------------------------------------------------------- /lang-data/tha.traineddata.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeromewu/tesseract.js-offline/820c667b7c394fe8baa86a7832ffe24c201e2a03/lang-data/tha.traineddata.gz -------------------------------------------------------------------------------- /node/index.js: -------------------------------------------------------------------------------- 1 | const { createWorker } = require('tesseract.js'); 2 | const path = require('path'); 3 | 4 | const worker = createWorker({ 5 | langPath: path.join(__dirname, '..', 'lang-data'), 6 | logger: m => console.log(m), 7 | }); 8 | 9 | (async () => { 10 | await worker.load(); 11 | await worker.loadLanguage('eng'); 12 | await worker.initialize('eng'); 13 | const { data: { text } } = await worker.recognize(path.join(__dirname, '..', 'images', 'testocr.png')); 14 | console.log(text); 15 | await worker.terminate(); 16 | })(); 17 | -------------------------------------------------------------------------------- /node/index.tha.js: -------------------------------------------------------------------------------- 1 | const { createWorker } = require('tesseract.js'); 2 | const path = require('path'); 3 | 4 | const worker = createWorker({ 5 | langPath: path.join(__dirname, '..', 'lang-data'), 6 | logger: m => console.log(m), 7 | }); 8 | 9 | (async () => { 10 | await worker.load(); 11 | await worker.loadLanguage('tha'); 12 | await worker.initialize('tha'); 13 | const { data: { text } } = await worker.recognize(path.join(__dirname, '..', 'images', 'tha.png')); 14 | console.log(text); 15 | await worker.terminate(); 16 | })(); 17 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tesseract.js-offline", 3 | "version": "0.1.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "async": { 8 | "version": "1.5.2", 9 | "resolved": "https://registry.npmjs.org/async/-/async-1.5.2.tgz", 10 | "integrity": "sha1-7GphrlZIDAw8skHJVhjiCJL5Zyo=", 11 | "dev": true 12 | }, 13 | "axios": { 14 | "version": "0.18.1", 15 | "resolved": "https://registry.npmjs.org/axios/-/axios-0.18.1.tgz", 16 | "integrity": "sha512-0BfJq4NSfQXd+SkFdrvFbG7addhYSBA2mQwISr46pD6E5iqkWg02RAs8vyTT/j0RTnoYmeXauBuSv1qKwR179g==", 17 | "requires": { 18 | "follow-redirects": "1.5.10", 19 | "is-buffer": "^2.0.2" 20 | }, 21 | "dependencies": { 22 | "debug": { 23 | "version": "3.1.0", 24 | "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", 25 | "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", 26 | "requires": { 27 | "ms": "2.0.0" 28 | } 29 | }, 30 | "follow-redirects": { 31 | "version": "1.5.10", 32 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.5.10.tgz", 33 | "integrity": "sha512-0V5l4Cizzvqt5D44aTXbFZz+FtyXV1vrDN6qrelxtfYQKW0KO0W2T/hkE8xvGa/540LkZlkaUjO4ailYTFtHVQ==", 34 | "requires": { 35 | "debug": "=3.1.0" 36 | } 37 | }, 38 | "ms": { 39 | "version": "2.0.0", 40 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", 41 | "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" 42 | } 43 | } 44 | }, 45 | "bmp-js": { 46 | "version": "0.1.0", 47 | "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", 48 | "integrity": "sha1-4Fpj95amwf8l9Hcex62twUjAcjM=" 49 | }, 50 | "colors": { 51 | "version": "1.0.3", 52 | "resolved": "https://registry.npmjs.org/colors/-/colors-1.0.3.tgz", 53 | "integrity": "sha1-BDP0TYCWgP3rYO0mDxsMJi6CpAs=", 54 | "dev": true 55 | }, 56 | "corser": { 57 | "version": "2.0.1", 58 | "resolved": "https://registry.npmjs.org/corser/-/corser-2.0.1.tgz", 59 | "integrity": "sha1-jtolLsqrWEDc2XXOuQ2TcMgZ/4c=", 60 | "dev": true 61 | }, 62 | "debug": { 63 | "version": "3.2.6", 64 | "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", 65 | "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", 66 | "dev": true, 67 | "requires": { 68 | "ms": "^2.1.1" 69 | } 70 | }, 71 | "ecstatic": { 72 | "version": "3.3.2", 73 | "resolved": "https://registry.npmjs.org/ecstatic/-/ecstatic-3.3.2.tgz", 74 | "integrity": "sha512-fLf9l1hnwrHI2xn9mEDT7KIi22UDqA2jaCwyCbSUJh9a1V+LEUSL/JO/6TIz/QyuBURWUHrFL5Kg2TtO1bkkog==", 75 | "dev": true, 76 | "requires": { 77 | "he": "^1.1.1", 78 | "mime": "^1.6.0", 79 | "minimist": "^1.1.0", 80 | "url-join": "^2.0.5" 81 | } 82 | }, 83 | "eventemitter3": { 84 | "version": "3.1.2", 85 | "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-3.1.2.tgz", 86 | "integrity": "sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==", 87 | "dev": true 88 | }, 89 | "file-type": { 90 | "version": "12.3.0", 91 | "resolved": "https://registry.npmjs.org/file-type/-/file-type-12.3.0.tgz", 92 | "integrity": "sha512-4E4Esq9KLwjYCY32E7qSmd0h7LefcniZHX+XcdJ4Wfx1uGJX7QCigiqw/U0yT7WOslm28yhxl87DJ0wHYv0RAA==" 93 | }, 94 | "follow-redirects": { 95 | "version": "1.7.0", 96 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.7.0.tgz", 97 | "integrity": "sha512-m/pZQy4Gj287eNy94nivy5wchN3Kp+Q5WgUPNy5lJSZ3sgkVKSYV/ZChMAQVIgx1SqfZ2zBZtPA2YlXIWxxJOQ==", 98 | "dev": true, 99 | "requires": { 100 | "debug": "^3.2.6" 101 | } 102 | }, 103 | "he": { 104 | "version": "1.2.0", 105 | "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", 106 | "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", 107 | "dev": true 108 | }, 109 | "http-proxy": { 110 | "version": "1.17.0", 111 | "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.17.0.tgz", 112 | "integrity": "sha512-Taqn+3nNvYRfJ3bGvKfBSRwy1v6eePlm3oc/aWVxZp57DQr5Eq3xhKJi7Z4hZpS8PC3H4qI+Yly5EmFacGuA/g==", 113 | "dev": true, 114 | "requires": { 115 | "eventemitter3": "^3.0.0", 116 | "follow-redirects": "^1.0.0", 117 | "requires-port": "^1.0.0" 118 | } 119 | }, 120 | "http-server": { 121 | "version": "0.11.1", 122 | "resolved": "https://registry.npmjs.org/http-server/-/http-server-0.11.1.tgz", 123 | "integrity": "sha512-6JeGDGoujJLmhjiRGlt8yK8Z9Kl0vnl/dQoQZlc4oeqaUoAKQg94NILLfrY3oWzSyFaQCVNTcKE5PZ3cH8VP9w==", 124 | "dev": true, 125 | "requires": { 126 | "colors": "1.0.3", 127 | "corser": "~2.0.0", 128 | "ecstatic": "^3.0.0", 129 | "http-proxy": "^1.8.1", 130 | "opener": "~1.4.0", 131 | "optimist": "0.6.x", 132 | "portfinder": "^1.0.13", 133 | "union": "~0.4.3" 134 | } 135 | }, 136 | "idb-keyval": { 137 | "version": "3.2.0", 138 | "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-3.2.0.tgz", 139 | "integrity": "sha512-slx8Q6oywCCSfKgPgL0sEsXtPVnSbTLWpyiDcu6msHOyKOLari1TD1qocXVCft80umnkk3/Qqh3lwoFt8T/BPQ==" 140 | }, 141 | "is-buffer": { 142 | "version": "2.0.4", 143 | "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.4.tgz", 144 | "integrity": "sha512-Kq1rokWXOPXWuaMAqZiJW4XxsmD9zGx9q4aePabbn3qCRGedtH7Cm+zV8WETitMfu1wdh+Rvd6w5egwSngUX2A==" 145 | }, 146 | "is-url": { 147 | "version": "1.2.2", 148 | "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.2.tgz", 149 | "integrity": "sha1-SYkFpZO/R8wtnn9zg3K792lsfyY=" 150 | }, 151 | "mime": { 152 | "version": "1.6.0", 153 | "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", 154 | "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", 155 | "dev": true 156 | }, 157 | "minimist": { 158 | "version": "1.2.0", 159 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", 160 | "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", 161 | "dev": true 162 | }, 163 | "mkdirp": { 164 | "version": "0.5.1", 165 | "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", 166 | "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", 167 | "dev": true, 168 | "requires": { 169 | "minimist": "0.0.8" 170 | }, 171 | "dependencies": { 172 | "minimist": { 173 | "version": "0.0.8", 174 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", 175 | "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", 176 | "dev": true 177 | } 178 | } 179 | }, 180 | "ms": { 181 | "version": "2.1.1", 182 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", 183 | "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==", 184 | "dev": true 185 | }, 186 | "opencollective-postinstall": { 187 | "version": "2.0.2", 188 | "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.2.tgz", 189 | "integrity": "sha512-pVOEP16TrAO2/fjej1IdOyupJY8KDUM1CvsaScRbw6oddvpQoOfGk4ywha0HKKVAD6RkW4x6Q+tNBwhf3Bgpuw==" 190 | }, 191 | "opener": { 192 | "version": "1.4.3", 193 | "resolved": "https://registry.npmjs.org/opener/-/opener-1.4.3.tgz", 194 | "integrity": "sha1-XG2ixdflgx6P+jlklQ+NZnSskLg=", 195 | "dev": true 196 | }, 197 | "optimist": { 198 | "version": "0.6.1", 199 | "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", 200 | "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", 201 | "dev": true, 202 | "requires": { 203 | "minimist": "~0.0.1", 204 | "wordwrap": "~0.0.2" 205 | }, 206 | "dependencies": { 207 | "minimist": { 208 | "version": "0.0.10", 209 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", 210 | "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=", 211 | "dev": true 212 | } 213 | } 214 | }, 215 | "portfinder": { 216 | "version": "1.0.20", 217 | "resolved": "https://registry.npmjs.org/portfinder/-/portfinder-1.0.20.tgz", 218 | "integrity": "sha512-Yxe4mTyDzTd59PZJY4ojZR8F+E5e97iq2ZOHPz3HDgSvYC5siNad2tLooQ5y5QHyQhc3xVqvyk/eNA3wuoa7Sw==", 219 | "dev": true, 220 | "requires": { 221 | "async": "^1.5.2", 222 | "debug": "^2.2.0", 223 | "mkdirp": "0.5.x" 224 | }, 225 | "dependencies": { 226 | "debug": { 227 | "version": "2.6.9", 228 | "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", 229 | "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", 230 | "dev": true, 231 | "requires": { 232 | "ms": "2.0.0" 233 | } 234 | }, 235 | "ms": { 236 | "version": "2.0.0", 237 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", 238 | "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", 239 | "dev": true 240 | } 241 | } 242 | }, 243 | "qs": { 244 | "version": "2.3.3", 245 | "resolved": "https://registry.npmjs.org/qs/-/qs-2.3.3.tgz", 246 | "integrity": "sha1-6eha2+ddoLvkyOBHaghikPhjtAQ=", 247 | "dev": true 248 | }, 249 | "regenerator-runtime": { 250 | "version": "0.13.3", 251 | "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.3.tgz", 252 | "integrity": "sha512-naKIZz2GQ8JWh///G7L3X6LaQUAMp2lvb1rvwwsURe/VXwD6VMfr+/1NuNw3ag8v2kY1aQ/go5SNn79O9JU7yw==" 253 | }, 254 | "requires-port": { 255 | "version": "1.0.0", 256 | "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", 257 | "integrity": "sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=", 258 | "dev": true 259 | }, 260 | "resolve-url": { 261 | "version": "0.2.1", 262 | "resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz", 263 | "integrity": "sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=" 264 | }, 265 | "tesseract.js": { 266 | "version": "2.0.0-beta.1", 267 | "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-2.0.0-beta.1.tgz", 268 | "integrity": "sha512-PPELe7ArJycS1ZZomecL4+MG5SCin0uHxzRhLecxGxp00Ec6rEYx9p6LwzJjyORgUlDkocP6jgb/Rczqv3DTkQ==", 269 | "requires": { 270 | "axios": "^0.18.0", 271 | "bmp-js": "^0.1.0", 272 | "file-type": "^12.3.0", 273 | "idb-keyval": "^3.2.0", 274 | "is-url": "1.2.2", 275 | "opencollective-postinstall": "^2.0.2", 276 | "regenerator-runtime": "^0.13.3", 277 | "resolve-url": "^0.2.1", 278 | "tesseract.js-core": "^2.0.0-beta.13", 279 | "zlibjs": "^0.3.1" 280 | }, 281 | "dependencies": { 282 | "tesseract.js-core": { 283 | "version": "2.0.0-beta.13", 284 | "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.13.tgz", 285 | "integrity": "sha512-GboWV/aV5h+Whito6L6Q3WCFZ2+lgxZGgjY84wSpWbTLEkkZgHsU+dz1or+3rWSABH/nuzHDco1bZRk5+f94mw==" 286 | } 287 | } 288 | }, 289 | "tesseract.js-core": { 290 | "version": "2.0.0-beta.13", 291 | "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.13.tgz", 292 | "integrity": "sha512-GboWV/aV5h+Whito6L6Q3WCFZ2+lgxZGgjY84wSpWbTLEkkZgHsU+dz1or+3rWSABH/nuzHDco1bZRk5+f94mw==" 293 | }, 294 | "union": { 295 | "version": "0.4.6", 296 | "resolved": "https://registry.npmjs.org/union/-/union-0.4.6.tgz", 297 | "integrity": "sha1-GY+9rrolTniLDvy2MLwR8kopWeA=", 298 | "dev": true, 299 | "requires": { 300 | "qs": "~2.3.3" 301 | } 302 | }, 303 | "url-join": { 304 | "version": "2.0.5", 305 | "resolved": "https://registry.npmjs.org/url-join/-/url-join-2.0.5.tgz", 306 | "integrity": "sha1-WvIvGMBSoACkjXuCxenC4v7tpyg=", 307 | "dev": true 308 | }, 309 | "wordwrap": { 310 | "version": "0.0.3", 311 | "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", 312 | "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=", 313 | "dev": true 314 | }, 315 | "zlibjs": { 316 | "version": "0.3.1", 317 | "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", 318 | "integrity": "sha1-UBl+2yihxCymWcyLTmqd3W1ERVQ=" 319 | } 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tesseract.js-offline", 3 | "version": "0.1.0", 4 | "description": "An example to show how to use tesseract.js in offline mode", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "http-server -p 3000 ." 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/jeromewu/tesseract.js-offline.git" 12 | }, 13 | "author": "Jerome Wu (https://github.com/jeromewu)", 14 | "license": "Apache 2.0", 15 | "bugs": { 16 | "url": "https://github.com/jeromewu/tesseract.js-offline/issues" 17 | }, 18 | "homepage": "https://github.com/jeromewu/tesseract.js-offline#readme", 19 | "dependencies": { 20 | "tesseract.js": "^2.0.0-beta.1", 21 | "tesseract.js-core": "^2.0.0-beta.13" 22 | }, 23 | "devDependencies": { 24 | "http-server": "^0.11.1" 25 | } 26 | } 27 | --------------------------------------------------------------------------------