├── .gitignore ├── LICENSE.md ├── README.md ├── captcha.js ├── config.json ├── examples ├── 0123456789.png ├── abcdefghijklmnopqrstuvwxyz.png └── abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.png ├── main.js ├── mnist.js ├── network.js ├── package.json ├── test.js └── tools.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | mnist 3 | ocr.js 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | #The MIT License (MIT) 2 | 3 | *Copyright (c) 2016 Mateo Gianolio* 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OCR 2 | 3 | Trains a multi-layer perceptron (MLP) neural network to perform optical character recognition (OCR). 4 | 5 | The training set is automatically generated using a heavily modified version of the captcha-generator [node-captcha](http://npmjs.com/package/node-captcha). Support for the MNIST handwritten digit database has been added recently (see performance section). 6 | 7 | The network takes a one-dimensional binary array (default ```20 * 20 = 400```-bit) as input and outputs an 10-bit array of probabilities, which can be converted into a character code. Initial performance measurements show promising success rates. 8 | 9 | After training, the network is saved as a standalone module to ```./ocr.js```, which can then be used in your project like this (from `test.js`): 10 | 11 | ```javascript 12 | var predict = require('./ocr.js'); 13 | 14 | // a binary array that we want to predict 15 | var one = [ 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 18 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 19 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 20 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 21 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 22 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 23 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 24 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 25 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 26 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 28 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 29 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 30 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 | ]; 33 | 34 | // the prediction is an array of probabilities 35 | var prediction = predict(one); 36 | 37 | // the index with the maximum probability is the best guess 38 | console.log('prediction:', prediction.indexOf(Math.max.apply(null, prediction))); 39 | // will hopefully output 1 if trained with 0-9 :) 40 | ``` 41 | 42 | ## Usage 43 | 44 | Clone this repository. The script is using [canvas](https://www.npmjs.com/package/canvas), so you'll need to install the **Cairo** rendering engine. On OS X, assuming you have [Homebrew](http://brew.sh) installed, this can be done with the following (copied from canvas README): 45 | 46 | ```bash 47 | $ brew install pkg-config cairo jpeg giflib 48 | ``` 49 | 50 | Then install npm dependencies and test it: 51 | 52 | ```bash 53 | $ npm install 54 | $ node main.js 55 | $ node test.js 56 | ``` 57 | 58 | ## Performance 59 | 60 | All runs below were performed with a MacBook Pro Retina 13" Early 2015 with 8GB RAM. 61 | 62 | ### [MNIST [0-9]](http://yann.lecun.com/exdb/mnist/) 63 | 64 | To test with the MNIST dataset: click on the title above, download the 4 data files and put them in a folder called ```mnist``` in the root directory of this repository. 65 | 66 | ```javascript 67 | // config.json 68 | { 69 | "mnist": true, 70 | "network": { 71 | "hidden": 160, 72 | "learning_rate": 0.03 73 | } 74 | } 75 | ``` 76 | 77 | Then run 78 | 79 | ```bash 80 | $ node mnist.js 81 | ``` 82 | 83 | * **Neurons** 84 | * ```400``` input 85 | * ```160``` hidden 86 | * ```10``` output 87 | * **Learning rate:** ```0.03``` 88 | * **Training set:** ```60000``` digits 89 | * **Testing set:** ```10000``` digits 90 | * **Training time:** ```21 min 53 s 753 ms``` 91 | * **Success rate:** ```95.16%``` 92 | 93 | ### [A-Za-z0-9] 94 | 95 | ```javascript 96 | // config.json 97 | { 98 | "mnist": false, 99 | "text": "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012356789", 100 | "fonts": [ 101 | "sans-serif", 102 | "serif" 103 | ], 104 | "training_set": 2000, 105 | "testing_set": 1000, 106 | "image_size": 16, 107 | "threshold": 400, 108 | "network": { 109 | "hidden": 60, 110 | "learning_rate": 0.1, 111 | "output": 62 112 | } 113 | } 114 | ``` 115 | 116 | * **Neurons** 117 | * ```256``` input 118 | * ```60``` hidden 119 | * ```62``` output 120 | * **Learning rate:** ```0.03``` 121 | * **Training set** 122 | * **Size:** ```124000``` characters 123 | * **Sample:** ![abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789](https://raw.github.com/mateogianolio/mlp-character-recognition/master/examples/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.png) 124 | * **Testing set:** ```62000``` characters 125 | * **Training time:** ```8 min 18 s 560 ms``` 126 | * **Success rate:** ```93.58225806451614%``` 127 | 128 | ### [a-z] 129 | 130 | ```javascript 131 | // config.json 132 | { 133 | "mnist": false, 134 | "text": "abcdefghijklmnopqrstuvwxyz", 135 | "fonts": [ 136 | "sans-serif", 137 | "serif" 138 | ], 139 | "training_set": 2000, 140 | "testing_set": 1000, 141 | "image_size": 16, 142 | "threshold": 400, 143 | "network": { 144 | "hidden": 40, 145 | "learning_rate": 0.1, 146 | "output": 26 147 | } 148 | } 149 | ``` 150 | 151 | * **Neurons** 152 | * ```256``` input 153 | * ```40``` hidden 154 | * ```26``` output 155 | * **Learning rate:** ```0.1``` 156 | * **Training set** 157 | * **Size:** ```52000``` characters 158 | * **Sample:** ![abcdefghijklmnopqrstuvwxyz](https://raw.github.com/mateogianolio/mlp-character-recognition/master/examples/abcdefghijklmnopqrstuvwxyz.png) 159 | * **Testing set:** ```26000``` characters 160 | * **Training time:** ```1 min 55 s 414 ms``` 161 | * **Success rate:** ```93.83846153846153%``` 162 | 163 | ### [0-9] 164 | 165 | ```javascript 166 | // config.json 167 | { 168 | "mnist": false, 169 | "text": "0123456789", 170 | "fonts": [ 171 | "sans-serif", 172 | "serif" 173 | ], 174 | "training_set": 2000, 175 | "testing_set": 1000, 176 | "image_size": 16, 177 | "threshold": 400, 178 | "network": { 179 | "hidden": 40, 180 | "learning_rate": 0.1 181 | } 182 | } 183 | ``` 184 | 185 | * **Neurons** 186 | * ```256``` input 187 | * ```40``` hidden 188 | * ```10``` output 189 | * **Learning rate:** ```0.1``` 190 | * **Training set** 191 | * **Size:** ```20000``` digits 192 | * **Sample:** ![0123456789](https://raw.github.com/mateogianolio/mlp-character-recognition/master/examples/0123456789.png) 193 | * **Testing set:** ```10000``` digits 194 | * **Training time:** ```0 min 44 s 363 ms``` 195 | * **Success rate:** ```99.59%``` 196 | 197 | ## Configuration 198 | 199 | Tweak the network for your needs by editing the ```config.json``` file located in the main folder. Pasted below is the default config file. 200 | 201 | ```javascript 202 | // config.json 203 | { 204 | "mnist": false, 205 | "text": "0123456789", 206 | "fonts": [ 207 | "sans-serif", 208 | "serif" 209 | ], 210 | "training_set": 2000, 211 | "testing_set": 1000, 212 | "image_size": 16, 213 | "threshold": 400, 214 | "network": { 215 | "hidden": 40, 216 | "learning_rate": 0.1 217 | } 218 | } 219 | 220 | ``` 221 | 222 | * **```mnist```** 223 | * If set to true, the MNIST handwritten digit dataset will be used for training and testing the network. This setting will overwrite configured set sizes and will ignore the ```image_size```, ```threshold```, ```fonts``` and ```text``` settings. 224 | * **```text```** 225 | * A string containing the glyphs with which to train/test the network. 226 | * **```fonts```** 227 | * An array of fonts to be used when generating images. 228 | * **```training_set```** 229 | * Number of images to be generated and used as the network training set. 230 | * **```testing_set```** 231 | * Same as above, but these images are used for testing the network. 232 | * **```image_size```** 233 | * The size of the square chunk (in pixels) containing a glyph. The resulting network input size is ```image_size```^2. 234 | * **```threshold```** 235 | * When analyzing the pixels of a glyph, the algorithm reduces each pixel ```(r, g, b)``` to ```(r + g + b)``` and everything below ```threshold``` is marked as 1 in the resulting binary array used as network input. 236 | * **```network```** 237 | * **```hidden```** 238 | * The size (number of neurons) of the hidden layer of the network. 239 | * **```learning_rate```** 240 | * The learning rate of the network. 241 | -------------------------------------------------------------------------------- /captcha.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 'use strict'; 3 | 4 | var canvas = require('canvas'); 5 | 6 | function generate(config, callback) { 7 | config.size = config.size || 4; 8 | config.height = config.height || 24; 9 | config.width = config.width || config.height * config.size; 10 | config.color = config.color || 'rgb(0,0,0)'; 11 | config.background = config.background || 'rgb(255,255,255)'; 12 | config.text = config.text || ('' + Math.random()).substr(2, config.size); 13 | config.distortion = config.distortion === undefined ? true : config.distortion; 14 | 15 | var size = Math.round(config.height * 0.7), 16 | c = new canvas(config.width, config.height), 17 | context = c.getContext('2d'), 18 | fonts = config.fonts || ['sans-serif', 'serif'], 19 | i; 20 | 21 | context.fillStyle = config.background; 22 | context.fillRect(0, 0, config.width, config.height); 23 | context.fillStyle = config.color; 24 | 25 | for(i = 0; i < config.text.length; i++) { 26 | context.font = size + 'px ' + fonts[Math.floor(Math.random() * fonts.length)]; 27 | 28 | if(config.distortion) { 29 | context.setTransform( 30 | Math.random() * 0.25 + 1, // scale horizontally 31 | Math.random() * 0.25, // skew horizontally 32 | Math.random() * 0.25, // skew vertically 33 | Math.random() * 0.25 + 1, // scale vertically 34 | config.height * i + (config.height - size) / 2, // move horizontally 35 | config.height - size / 2 // move vertically 36 | ); 37 | } else { 38 | context.setTransform( 39 | 1, // scale horizontally 40 | 0, // skew horizontally 41 | 0, // skew vertically 42 | 1, // scale vertically 43 | config.height * i + (config.height - size) / 2, // move horizontally 44 | config.height - size / 2 // move vertically 45 | ); 46 | } 47 | 48 | context.fillText(config.text.charAt(i), 0, 0); 49 | } 50 | 51 | c.toBuffer(function(error, buffer) { 52 | if(error) 53 | throw error; 54 | 55 | callback(config.text, buffer); 56 | }); 57 | } 58 | 59 | module.exports.generate = generate; 60 | })(); 61 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "mnist": false, 3 | "text": "0123456789", 4 | "fonts": [ 5 | "sans-serif", 6 | "serif" 7 | ], 8 | "training_set": 2000, 9 | "testing_set": 1000, 10 | "image_size": 16, 11 | "threshold": 400, 12 | "network": { 13 | "hidden": 40, 14 | "learning_rate": 0.1 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/0123456789.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateogianolio/ocr/c4d13d2f1e5a6dbdcbd22999f3dd2aecdbb9c5e2/examples/0123456789.png -------------------------------------------------------------------------------- /examples/abcdefghijklmnopqrstuvwxyz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateogianolio/ocr/c4d13d2f1e5a6dbdcbd22999f3dd2aecdbb9c5e2/examples/abcdefghijklmnopqrstuvwxyz.png -------------------------------------------------------------------------------- /examples/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mateogianolio/ocr/c4d13d2f1e5a6dbdcbd22999f3dd2aecdbb9c5e2/examples/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.png -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 'use strict'; 3 | 4 | console.log('reading config file ...'); 5 | 6 | var synaptic = require('synaptic'), 7 | network = require('./network.js'), 8 | captcha = require('./captcha.js'), 9 | tools = require('./tools.js'), 10 | PNG = require('pngjs').PNG, 11 | fs = require('fs'), 12 | config = JSON.parse(fs.readFileSync('./config.json', 'utf8')); 13 | 14 | if(config === null) 15 | return; 16 | 17 | config.training_set = config.training_set || 2000; 18 | config.testing_set = config.testing_set || 500; 19 | config.image_size = config.image_size || 20; 20 | config.threshold = config.threshold || 400; 21 | config.text = config.text || '0123456789'; 22 | config.fonts = config.fonts || ['serif', 'sans-serif']; 23 | config.distortion = config.distortion === undefined ? true : config.distortion; 24 | config.network.hidden = config.network.hidden || 40; 25 | config.network.output = config.network.output || 10; 26 | config.network.learning_rate = config.network.learning_rate || 0.1; 27 | 28 | console.log('... done'); 29 | console.log(); 30 | 31 | var perceptron = new synaptic.Architect.Perceptron( 32 | (config.image_size * config.image_size), // input 33 | config.network.hidden, // hidden 34 | config.network.output // output 35 | ); 36 | 37 | var index, 38 | samples = config.training_set + config.testing_set, 39 | training = [], 40 | testing = [], 41 | settings = { 42 | size: config.text.length, 43 | height: config.image_size, 44 | text: config.text, 45 | fonts: config.fonts, 46 | distortion: config.distortion 47 | }; 48 | 49 | // captcha callback 50 | var k = 0; 51 | function generate(text, data) { 52 | if (k === 0) 53 | fs.writeFileSync('./examples/' + text + '.png', data, 'base64'); 54 | 55 | var png = new PNG({ filterType: 4 }); 56 | 57 | png.parse(data, function(error, data) { 58 | if(error) 59 | throw error; 60 | 61 | var position, 62 | chunk = [], 63 | pixel = [], 64 | i, j, x, y; 65 | 66 | for(i = 0; i < config.text.length; i++) { 67 | for(y = 0; y < data.height; y++) { 68 | for(x = i * config.image_size; x < (i * config.image_size + config.image_size); x++) { 69 | position = (data.width * y + x) << 2; 70 | 71 | for(j = 0; j < 3; j++) 72 | pixel.push(data.data[position + j]); 73 | 74 | chunk.push( 75 | pixel.reduce(function(previous, current) { 76 | return previous + current; 77 | }) > config.threshold ? 0 : 1 78 | ); 79 | pixel = []; 80 | } 81 | } 82 | 83 | chunk = tools.center(chunk); 84 | 85 | var output = Array.apply(null, new Array(config.network.output)).map(Number.prototype.valueOf, 0); 86 | output[i] = 1; 87 | 88 | if(k < config.training_set) { 89 | training.push({ 90 | input: chunk, 91 | output: output 92 | }); 93 | } else { 94 | testing.push({ 95 | input: chunk, 96 | output: output 97 | }); 98 | } 99 | 100 | chunk = []; 101 | } 102 | 103 | if(k++ === samples - 1) { 104 | console.log('... done'); 105 | console.log(); 106 | 107 | network.train(perceptron, training, config.network.learning_rate); 108 | fs.writeFileSync('./ocr.js', 'module.exports = ' + perceptron.standalone().toString()); 109 | network.test(perceptron, testing); 110 | } 111 | }); 112 | } 113 | 114 | console.log('generating images ...'); 115 | 116 | for(index = 0; index < samples; index++) 117 | captcha.generate(settings, generate); 118 | })(); 119 | -------------------------------------------------------------------------------- /mnist.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 'use strict'; 3 | 4 | var fs = require('fs'), 5 | synaptic = require('synaptic'), 6 | tools = require('./tools.js'), 7 | network = require('./network.js'), 8 | config = JSON.parse(fs.readFileSync('./config.json', 'utf8')); 9 | 10 | console.log('parsing MNIST data ...'); 11 | 12 | var data = fs.readFileSync('./mnist/train-images-idx3-ubyte'), 13 | labels = fs.readFileSync('./mnist/train-labels-idx1-ubyte'), 14 | training = [], 15 | testing = [], 16 | pixels = [], 17 | image, 18 | output, 19 | x, y; 20 | 21 | config.training_set = 60000; 22 | config.testing_set = 10000; 23 | config.threshold = 50; 24 | config.image_size = 20; 25 | 26 | var perceptron = new synaptic.Architect.Perceptron( 27 | (config.image_size * config.image_size), // input 28 | config.network.hidden, // hidden 29 | 10 // output 30 | ); 31 | 32 | for(image = 0; image < config.training_set; image++) { 33 | for(y = 4; y < config.image_size + 4; y++) 34 | for(x = 4; x < config.image_size + 4; x++) 35 | pixels.push(data[(image * 28 * 28) + (x + (y * 28)) + 15]); 36 | 37 | pixels = tools.center(pixels.map(tools.binary(config.threshold))); 38 | 39 | output = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; 40 | output[labels[image + 8]] = 1; 41 | 42 | training.push({ 43 | input: pixels, 44 | output: output 45 | }); 46 | 47 | pixels = []; 48 | } 49 | 50 | data = fs.readFileSync('./mnist/t10k-images-idx3-ubyte'); 51 | labels = fs.readFileSync('./mnist/t10k-labels-idx1-ubyte'); 52 | 53 | for(image = 0; image < config.testing_set; image++) { 54 | for(y = 4; y < config.image_size + 4; y++) 55 | for(x = 4; x < config.image_size + 4; x++) 56 | pixels.push(data[(image * 28 * 28) + (x + (y * 28)) + 15]); 57 | 58 | pixels = tools.center(pixels.map(tools.binary(config.threshold))); 59 | 60 | output = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; 61 | output[labels[image + 8]] = 1; 62 | 63 | testing.push({ 64 | input: pixels, 65 | output: output 66 | }); 67 | 68 | pixels = []; 69 | } 70 | 71 | console.log('... done', '\n'); 72 | 73 | network.train(perceptron, training, config.network.learning_rate); 74 | fs.writeFileSync('./ocr.js', 'module.exports = ' + perceptron.standalone().toString()); 75 | network.test(perceptron, testing); 76 | }()); 77 | -------------------------------------------------------------------------------- /network.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 'use strict'; 3 | 4 | module.exports.train = function(network, set, rate) { 5 | var length = set.length, 6 | object, 7 | count = 0; 8 | 9 | console.log('layers:'); 10 | console.log(' input:', network.layers.input.size, 'neurons.'); 11 | console.log(' hidden:', network.layers.hidden[0].size, 'neurons.'); 12 | console.log(' output:', network.layers.output.size, 'neurons.'); 13 | console.log('learning rate:', rate, '\n'); 14 | console.log('training with', length, 'inputs ...'); 15 | 16 | var start = process.hrtime(); 17 | 18 | while(set.length) { 19 | object = set.pop(); 20 | 21 | if(count % Math.round(length / 10) === 0) 22 | console.log('progress:', Math.round(100 * (count / length)),'%'); 23 | 24 | network.activate(object.input); 25 | network.propagate(rate, object.output); 26 | 27 | count++; 28 | } 29 | 30 | var elapsed = process.hrtime(start); 31 | var time = { 32 | minutes: Math.floor(elapsed[0] / 60), 33 | seconds: elapsed[0] % 60, 34 | milliseconds: Math.floor(elapsed[1] / 1000000) 35 | }; 36 | 37 | console.log('... done', '(' + time.minutes, 'min', time.seconds, 's', time.milliseconds, 'ms)'); 38 | console.log(); 39 | }; 40 | 41 | module.exports.test = function(network, set) { 42 | var object, 43 | prediction, 44 | result, 45 | length = set.length, 46 | success = 0, 47 | count = 0; 48 | 49 | // test on random inputs 50 | console.log('testing on', length, 'inputs ...'); 51 | while(set.length) { 52 | object = set.pop(); 53 | 54 | if(count % Math.round(length / 10) === 0) 55 | console.log('progress:', Math.round(100 * (count / length)), '%'); 56 | 57 | prediction = network 58 | .activate(object.input); 59 | 60 | prediction = prediction.indexOf(Math.max.apply(null, prediction)); 61 | result = object.output.indexOf(1); 62 | 63 | if(prediction === result) 64 | success++; 65 | 66 | count++; 67 | } 68 | 69 | console.log('... done', '\n'); 70 | console.log('success rate:', (100 * (success / length)), '%'); 71 | }; 72 | })(); 73 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mlp-character-recognition", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "main.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "Mateo Gianolio", 10 | "license": "MIT", 11 | "dependencies": { 12 | "canvas": "^1.3.7", 13 | "pngjs": "^2.2.0", 14 | "synaptic": "^1.0.2" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git://github.com/mateogianolio/mlp-character-recognition.git" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 'use strict'; 3 | 4 | var ocr = require('./ocr.js'); 5 | 6 | var one = [ 7 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 9 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 10 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 11 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 12 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 13 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 14 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 15 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 16 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 17 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 18 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 19 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 20 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 21 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | ]; 24 | 25 | var two = [ 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 29 | 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 34 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 35 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 36 | 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 | 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38 | 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 | ]; 43 | 44 | var seven = [ 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 47 | 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 48 | 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 49 | 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 51 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 52 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 53 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 55 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 56 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 57 | 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61 | ]; 62 | 63 | function print (x, i) { 64 | if (i && i % 16 === 0) 65 | console.log(); 66 | process.stdout.write((x ? '■' : '□') + ' '); 67 | } 68 | 69 | one.forEach(print); 70 | var prediction = ocr(one); 71 | 72 | console.log(); 73 | console.log('prediction:', prediction.indexOf(Math.max.apply(null, prediction))); 74 | console.log(); 75 | 76 | two.forEach(print); 77 | prediction = ocr(two); 78 | 79 | console.log(); 80 | console.log('prediction:', prediction.indexOf(Math.max.apply(null, prediction))); 81 | console.log(); 82 | 83 | seven.forEach(print); 84 | prediction = ocr(seven); 85 | 86 | console.log(); 87 | console.log('prediction:', prediction.indexOf(Math.max.apply(null, prediction))); 88 | console.log(); 89 | }()); 90 | -------------------------------------------------------------------------------- /tools.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | 'use strict'; 3 | 4 | module.exports.binary = function (threshold) { 5 | return function (pixel) { 6 | return pixel > threshold ? 1 : 0; 7 | }; 8 | }; 9 | 10 | // bounding box centering 11 | module.exports.center = function(chunk) { 12 | var size = Math.sqrt(chunk.length), 13 | min = { 14 | x: size, 15 | y: size 16 | }, 17 | max = { 18 | x: 0, 19 | y: 0 20 | }, 21 | x, y, j, k; 22 | 23 | for(y = 0; y < size; y++) { 24 | for(x = 0; x < size; x++) { 25 | if(chunk[size * y + x]) { 26 | if(min.x > x) 27 | min.x = x; 28 | 29 | if(min.y > y) 30 | min.y = y; 31 | 32 | if(max.x < x) 33 | max.x = x; 34 | 35 | if(max.y < y) 36 | max.y = y; 37 | } 38 | } 39 | } 40 | 41 | var diff = { 42 | x: Math.floor((size / 2) - (min.x + (max.x - min.x) / 2)), 43 | y: Math.floor((size / 2) - (min.y + (max.y - min.y) / 2)) 44 | }; 45 | 46 | // fill array with size * size zeros 47 | var clone = Array.apply(null, new Array(size * size)).map(Number.prototype.valueOf, 0); 48 | 49 | // move character to center 50 | for(y = 0; y < size; y++) { 51 | for(x = 0; x < size; x++) { 52 | j = size * y + x; 53 | k = size * (y + diff.y) + (x + diff.x); 54 | 55 | if(chunk[j]) 56 | clone[k] = chunk[j]; 57 | } 58 | } 59 | 60 | return clone; 61 | }; 62 | })(); 63 | --------------------------------------------------------------------------------