├── common.py ├── vis.py ├── README.md ├── extractbgs.py ├── model.py ├── detect.py ├── train.py └── gen.py /common.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Definitions that don't fit elsewhere. 4 | 5 | """ 6 | 7 | __all__ = ( 8 | 'DIGITS', 9 | 'LETTERS', 10 | 'CHARS', 11 | 'sigmoid', 12 | 'softmax', 13 | ) 14 | 15 | import numpy 16 | 17 | 18 | DIGITS = "0123456789" 19 | LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 20 | CHARS = LETTERS + DIGITS 21 | 22 | def softmax(a): 23 | exps = numpy.exp(a.astype(numpy.float64)) 24 | return exps / numpy.sum(exps, axis=-1)[:, numpy.newaxis] 25 | 26 | def sigmoid(a): 27 | return 1. / (1. + numpy.exp(-a)) 28 | 29 | -------------------------------------------------------------------------------- /vis.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy 6 | 7 | a = numpy.load(sys.argv[1]) 8 | 9 | conv1 = a['arr_0'] 10 | 11 | fig, ax = plt.subplots(8, 8, 12 | figsize=(8, 8), 13 | dpi=100, 14 | squeeze=False) 15 | 16 | """ 17 | for i in range(conv1.shape[3]): 18 | ax[i // 8, i % 8].imshow(conv1[:, :, 0, i], cmap='Greys') 19 | 20 | """ 21 | conv2 = a['arr_2'] 22 | for i in range(min(8, conv2.shape[3])): 23 | for j in range(min(8, conv2.shape[2])): 24 | ax[j, i].imshow(conv2[:, :, j, i], cmap='Greys') 25 | 26 | fig.savefig(sys.argv[2], dpi=30.) 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Number-plate-detection-using-CNN 2 | This program uses convolutional neural networks to recognize the text in the number plate.This network is based on [this paper](https://vision.in.tum.de/_media/spezial/bib/stark-gcpr15.pdf) by Stark et al, which describes how google broke their own CAPTCHA system. Do check it out, as it gives more specifics about the architecture used than Google's paper. 3 | 4 | To use this project: 5 | 6 | 1. `./extractbgs.py SUN397.tar.gz`: Extract ~3GB of background images from the [SUN database](http://groups.csail.mit.edu/vision/SUN/) 7 | into `bgs/`. (`bgs/` must not already exist.) The tar file (36GB) can be [downloaded here](http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz). 8 | This step may take a while as it will extract 108,634 images. 9 | 10 | 2. `./gen.py 1000`: Generate 1000 test set images in `test/`. (`test/` must not 11 | already exist.) This step requires `UKNumberPlate.ttf` to be in the 12 | `fonts/` directory, which can be 13 | [downloaded here](http://www.dafont.com/uk-number-plate.font). 14 | 15 | 3. `./train.py`: Train the model. A GPU is recommended for this step. It will 16 | take around 100,000 batches to converge. When you're satisfied that the 17 | network has learned enough press `Ctrl+C` and the process will write the 18 | weights to `weights.npz` and return. 19 | 20 | 4. `./detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image. 21 | -------------------------------------------------------------------------------- /extractbgs.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | """ 4 | Extract background images from a tar archive. 5 | 6 | """ 7 | 8 | 9 | __all__ = ( 10 | 'extract_backgrounds', 11 | ) 12 | 13 | 14 | import os 15 | import sys 16 | import tarfile 17 | 18 | import cv2 19 | import numpy 20 | 21 | 22 | def im_from_file(f): 23 | a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8) 24 | return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE) 25 | 26 | 27 | def extract_backgrounds(archive_name): 28 | """ 29 | Extract backgrounds from provided tar archive. 30 | 31 | JPEGs from the archive are converted into grayscale, and cropped/resized to 32 | 256x256, and saved in ./bgs/. 33 | 34 | :param archive_name: 35 | Name of the .tar file containing JPEGs of background images. 36 | 37 | """ 38 | os.mkdir("bgs") 39 | 40 | t = tarfile.open(name=archive_name) 41 | 42 | def members(): 43 | m = t.next() 44 | while m: 45 | yield m 46 | m = t.next() 47 | index = 0 48 | for m in members(): 49 | if not m.name.endswith(".jpg"): 50 | continue 51 | f = t.extractfile(m) 52 | try: 53 | im = im_from_file(f) 54 | finally: 55 | f.close() 56 | if im is None: 57 | continue 58 | 59 | if im.shape[0] > im.shape[1]: 60 | im = im[:im.shape[1], :] 61 | else: 62 | im = im[:, :im.shape[0]] 63 | if im.shape[0] > 256: 64 | im = cv2.resize(im, (256, 256)) 65 | fname = "bgs/{:08}.jpg".format(index) 66 | print fname 67 | rc = cv2.imwrite(fname, im) 68 | if not rc: 69 | raise Exception("Failed to write file {}".format(fname)) 70 | index += 1 71 | 72 | 73 | if __name__ == "__main__": 74 | 75 | extract_backgrounds(sys.argv[1]) 76 | 77 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Definition of the neural networks. 3 | 4 | """ 5 | 6 | 7 | __all__ = ( 8 | 'get_training_model', 9 | 'get_detect_model', 10 | 'WINDOW_SHAPE', 11 | ) 12 | 13 | 14 | import tensorflow as tf 15 | 16 | import common 17 | 18 | 19 | WINDOW_SHAPE = (64, 128) 20 | 21 | 22 | # Utility functions 23 | def weight_variable(shape): 24 | initial = tf.truncated_normal(shape, stddev=0.1) 25 | return tf.Variable(initial) 26 | 27 | 28 | def bias_variable(shape): 29 | initial = tf.constant(0.1, shape=shape) 30 | return tf.Variable(initial) 31 | 32 | 33 | def conv2d(x, W, stride=(1, 1), padding='SAME'): 34 | return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1], 35 | padding=padding) 36 | 37 | 38 | def max_pool(x, ksize=(2, 2), stride=(2, 2)): 39 | return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1], 40 | strides=[1, stride[0], stride[1], 1], padding='SAME') 41 | 42 | 43 | def avg_pool(x, ksize=(2, 2), stride=(2, 2)): 44 | return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1], 45 | strides=[1, stride[0], stride[1], 1], padding='SAME') 46 | 47 | 48 | def convolutional_layers(): 49 | """ 50 | Get the convolutional layers of the model. 51 | 52 | """ 53 | x = tf.placeholder(tf.float32, [None, None, None]) 54 | 55 | # First layer 56 | W_conv1 = weight_variable([5, 5, 1, 48]) 57 | b_conv1 = bias_variable([48]) 58 | x_expanded = tf.expand_dims(x, 3) 59 | h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1) 60 | h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2)) 61 | 62 | # Second layer 63 | W_conv2 = weight_variable([5, 5, 48, 64]) 64 | b_conv2 = bias_variable([64]) 65 | 66 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 67 | h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1)) 68 | 69 | # Third layer 70 | W_conv3 = weight_variable([5, 5, 64, 128]) 71 | b_conv3 = bias_variable([128]) 72 | 73 | h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) 74 | h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2)) 75 | 76 | return x, h_pool3, [W_conv1, b_conv1, 77 | W_conv2, b_conv2, 78 | W_conv3, b_conv3] 79 | 80 | 81 | def get_training_model(): 82 | """ 83 | The training model acts on a batch of 128x64 windows, and outputs a (1 + 84 | 7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is 85 | fully within the image and is at the correct scale. 86 | 87 | `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th 88 | character is `c`. 89 | 90 | """ 91 | x, conv_layer, conv_vars = convolutional_layers() 92 | 93 | # Densely connected layer 94 | W_fc1 = weight_variable([32 * 8 * 128, 2048]) 95 | b_fc1 = bias_variable([2048]) 96 | 97 | conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128]) 98 | h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1) 99 | 100 | # Output layer 101 | W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)]) 102 | b_fc2 = bias_variable([1 + 7 * len(common.CHARS)]) 103 | 104 | y = tf.matmul(h_fc1, W_fc2) + b_fc2 105 | 106 | return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2]) 107 | 108 | 109 | def get_detect_model(): 110 | """ 111 | The same as the training model, except it acts on an arbitrarily sized 112 | input, and slides the 128x64 window across the image in 8x8 strides. 113 | 114 | The output is of the form `v`, where `v[i, j]` is equivalent to the output 115 | of the training model, for the window at coordinates `(8 * i, 4 * j)`. 116 | 117 | """ 118 | x, conv_layer, conv_vars = convolutional_layers() 119 | 120 | # Fourth layer 121 | W_fc1 = weight_variable([8 * 32 * 128, 2048]) 122 | W_conv1 = tf.reshape(W_fc1, [8, 32, 128, 2048]) 123 | b_fc1 = bias_variable([2048]) 124 | h_conv1 = tf.nn.relu(conv2d(conv_layer, W_conv1, 125 | stride=(1, 1), padding="VALID") + b_fc1) 126 | # Fifth layer 127 | W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)]) 128 | W_conv2 = tf.reshape(W_fc2, [1, 1, 2048, 1 + 7 * len(common.CHARS)]) 129 | b_fc2 = bias_variable([1 + 7 * len(common.CHARS)]) 130 | h_conv2 = conv2d(h_conv1, W_conv2) + b_fc2 131 | 132 | return (x, h_conv2, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2]) 133 | 134 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | __all__ = ( 5 | 'detect', 6 | 'post_process', 7 | ) 8 | 9 | 10 | import collections 11 | import itertools 12 | import math 13 | import sys 14 | 15 | import cv2 16 | import numpy 17 | import tensorflow as tf 18 | 19 | import common 20 | import model 21 | 22 | 23 | def make_scaled_ims(im, min_shape): 24 | ratio = 1. / 2 ** 0.5 25 | shape = (im.shape[0] / ratio, im.shape[1] / ratio) 26 | 27 | while True: 28 | shape = (int(shape[0] * ratio), int(shape[1] * ratio)) 29 | if shape[0] < min_shape[0] or shape[1] < min_shape[1]: 30 | break 31 | yield cv2.resize(im, (shape[1], shape[0])) 32 | 33 | 34 | def detect(im, param_vals): 35 | """ 36 | Detect number plates in an image. 37 | 38 | :param im: 39 | Image to detect number plates in. 40 | 41 | :param param_vals: 42 | Model parameters to use. These are the parameters output by the `train` 43 | module. 44 | 45 | :returns: 46 | Iterable of `bbox_tl, bbox_br, letter_probs`, defining the bounding box 47 | top-left and bottom-right corners respectively, and a 7,36 matrix 48 | giving the probability distributions of each letter. 49 | 50 | """ 51 | 52 | # Convert the image to various scales. 53 | scaled_ims = list(make_scaled_ims(im, model.WINDOW_SHAPE)) 54 | 55 | # Load the model which detects number plates over a sliding window. 56 | x, y, params = model.get_detect_model() 57 | 58 | # Execute the model at each scale. 59 | with tf.Session(config=tf.ConfigProto()) as sess: 60 | y_vals = [] 61 | for scaled_im in scaled_ims: 62 | feed_dict = {x: numpy.stack([scaled_im])} 63 | feed_dict.update(dict(zip(params, param_vals))) 64 | y_vals.append(sess.run(y, feed_dict=feed_dict)) 65 | 66 | # Interpret the results in terms of bounding boxes in the input image. 67 | # Do this by identifying windows (at all scales) where the model predicts a 68 | # number plate has a greater than 50% probability of appearing. 69 | # 70 | # To obtain pixel coordinates, the window coordinates are scaled according 71 | # to the stride size, and pixel coordinates. 72 | for i, (scaled_im, y_val) in enumerate(zip(scaled_ims, y_vals)): 73 | for window_coords in numpy.argwhere(y_val[0, :, :, 0] > 74 | -math.log(1./0.99 - 1)): 75 | letter_probs = (y_val[0, 76 | window_coords[0], 77 | window_coords[1], 1:].reshape( 78 | 7, len(common.CHARS))) 79 | letter_probs = common.softmax(letter_probs) 80 | 81 | img_scale = float(im.shape[0]) / scaled_im.shape[0] 82 | 83 | bbox_tl = window_coords * (8, 4) * img_scale 84 | bbox_size = numpy.array(model.WINDOW_SHAPE) * img_scale 85 | 86 | present_prob = common.sigmoid( 87 | y_val[0, window_coords[0], window_coords[1], 0]) 88 | 89 | yield bbox_tl, bbox_tl + bbox_size, present_prob, letter_probs 90 | 91 | 92 | def _overlaps(match1, match2): 93 | bbox_tl1, bbox_br1, _, _ = match1 94 | bbox_tl2, bbox_br2, _, _ = match2 95 | return (bbox_br1[0] > bbox_tl2[0] and 96 | bbox_br2[0] > bbox_tl1[0] and 97 | bbox_br1[1] > bbox_tl2[1] and 98 | bbox_br2[1] > bbox_tl1[1]) 99 | 100 | 101 | def _group_overlapping_rectangles(matches): 102 | matches = list(matches) 103 | num_groups = 0 104 | match_to_group = {} 105 | for idx1 in range(len(matches)): 106 | for idx2 in range(idx1): 107 | if _overlaps(matches[idx1], matches[idx2]): 108 | match_to_group[idx1] = match_to_group[idx2] 109 | break 110 | else: 111 | match_to_group[idx1] = num_groups 112 | num_groups += 1 113 | 114 | groups = collections.defaultdict(list) 115 | for idx, group in match_to_group.items(): 116 | groups[group].append(matches[idx]) 117 | 118 | return groups 119 | 120 | 121 | def post_process(matches): 122 | """ 123 | Take an iterable of matches as returned by `detect` and merge duplicates. 124 | 125 | Merging consists of two steps: 126 | - Finding sets of overlapping rectangles. 127 | - Finding the intersection of those sets, along with the code 128 | corresponding with the rectangle with the highest presence parameter. 129 | 130 | """ 131 | groups = _group_overlapping_rectangles(matches) 132 | 133 | for group_matches in groups.values(): 134 | mins = numpy.stack(numpy.array(m[0]) for m in group_matches) 135 | maxs = numpy.stack(numpy.array(m[1]) for m in group_matches) 136 | present_probs = numpy.array([m[2] for m in group_matches]) 137 | letter_probs = numpy.stack(m[3] for m in group_matches) 138 | 139 | yield (numpy.max(mins, axis=0).flatten(), 140 | numpy.min(maxs, axis=0).flatten(), 141 | numpy.max(present_probs), 142 | letter_probs[numpy.argmax(present_probs)]) 143 | 144 | 145 | def letter_probs_to_code(letter_probs): 146 | return "".join(common.CHARS[i] for i in numpy.argmax(letter_probs, axis=1)) 147 | 148 | 149 | if __name__ == "__main__": 150 | im = cv2.imread(sys.argv[1]) 151 | im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) / 255. 152 | 153 | f = numpy.load(sys.argv[2]) 154 | param_vals = [f[n] for n in sorted(f.files, key=lambda s: int(s[4:]))] 155 | 156 | for pt1, pt2, present_prob, letter_probs in post_process( 157 | detect(im_gray, param_vals)): 158 | pt1 = tuple(reversed(map(int, pt1))) 159 | pt2 = tuple(reversed(map(int, pt2))) 160 | 161 | code = letter_probs_to_code(letter_probs) 162 | 163 | color = (0.0, 255.0, 0.0) 164 | cv2.rectangle(im, pt1, pt2, color) 165 | 166 | cv2.putText(im, 167 | code, 168 | pt1, 169 | cv2.FONT_HERSHEY_PLAIN, 170 | 1.5, 171 | (0, 0, 0), 172 | thickness=5) 173 | 174 | cv2.putText(im, 175 | code, 176 | pt1, 177 | cv2.FONT_HERSHEY_PLAIN, 178 | 1.5, 179 | (255, 255, 255), 180 | thickness=2) 181 | 182 | cv2.imwrite(sys.argv[3], im) 183 | 184 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | """ 4 | Routines for training the network. 5 | 6 | """ 7 | 8 | 9 | __all__ = ( 10 | 'train', 11 | ) 12 | 13 | 14 | import functools 15 | import glob 16 | import itertools 17 | import multiprocessing 18 | import random 19 | import sys 20 | import time 21 | 22 | import cv2 23 | import numpy 24 | import tensorflow as tf 25 | 26 | import common 27 | import gen 28 | import model 29 | 30 | 31 | def code_to_vec(p, code): 32 | def char_to_vec(c): 33 | y = numpy.zeros((len(common.CHARS),)) 34 | y[common.CHARS.index(c)] = 1.0 35 | return y 36 | 37 | c = numpy.vstack([char_to_vec(c) for c in code]) 38 | 39 | return numpy.concatenate([[1. if p else 0], c.flatten()]) 40 | 41 | 42 | def read_data(img_glob): 43 | for fname in sorted(glob.glob(img_glob)): 44 | im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255. 45 | code = fname.split("/")[1][9:16] 46 | p = fname.split("/")[1][17] == '1' 47 | yield im, code_to_vec(p, code) 48 | 49 | 50 | def unzip(b): 51 | xs, ys = zip(*b) 52 | xs = numpy.array(xs) 53 | ys = numpy.array(ys) 54 | return xs, ys 55 | 56 | 57 | def batch(it, batch_size): 58 | out = [] 59 | for x in it: 60 | out.append(x) 61 | if len(out) == batch_size: 62 | yield out 63 | out = [] 64 | if out: 65 | yield out 66 | 67 | 68 | def mpgen(f): 69 | def main(q, args, kwargs): 70 | try: 71 | for item in f(*args, **kwargs): 72 | q.put(item) 73 | finally: 74 | q.close() 75 | 76 | @functools.wraps(f) 77 | def wrapped(*args, **kwargs): 78 | q = multiprocessing.Queue(3) 79 | proc = multiprocessing.Process(target=main, 80 | args=(q, args, kwargs)) 81 | proc.start() 82 | try: 83 | while True: 84 | item = q.get() 85 | yield item 86 | finally: 87 | proc.terminate() 88 | proc.join() 89 | 90 | return wrapped 91 | 92 | 93 | @mpgen 94 | def read_batches(batch_size): 95 | g = gen.generate_ims() 96 | def gen_vecs(): 97 | for im, c, p in itertools.islice(g, batch_size): 98 | yield im, code_to_vec(p, c) 99 | 100 | while True: 101 | yield unzip(gen_vecs()) 102 | 103 | 104 | def get_loss(y, y_): 105 | # Calculate the loss from digits being incorrect. Don't count loss from 106 | # digits that are in non-present plates. 107 | digits_loss = tf.nn.softmax_cross_entropy_with_logits( 108 | tf.reshape(y[:, 1:], 109 | [-1, len(common.CHARS)]), 110 | tf.reshape(y_[:, 1:], 111 | [-1, len(common.CHARS)])) 112 | digits_loss = tf.reshape(digits_loss, [-1, 7]) 113 | digits_loss = tf.reduce_sum(digits_loss, 1) 114 | digits_loss *= (y_[:, 0] != 0) 115 | digits_loss = tf.reduce_sum(digits_loss) 116 | 117 | # Calculate the loss from presence indicator being wrong. 118 | presence_loss = tf.nn.sigmoid_cross_entropy_with_logits( 119 | y[:, :1], y_[:, :1]) 120 | presence_loss = 7 * tf.reduce_sum(presence_loss) 121 | 122 | return digits_loss, presence_loss, digits_loss + presence_loss 123 | 124 | 125 | def train(learn_rate, report_steps, batch_size, initial_weights=None): 126 | """ 127 | Train the network. 128 | 129 | The function operates interactively: Progress is reported on stdout, and 130 | training ceases upon `KeyboardInterrupt` at which point the learned weights 131 | are saved to `weights.npz`, and also returned. 132 | 133 | :param learn_rate: 134 | Learning rate to use. 135 | 136 | :param report_steps: 137 | Every `report_steps` batches a progress report is printed. 138 | 139 | :param batch_size: 140 | The size of the batches used for training. 141 | 142 | :param initial_weights: 143 | (Optional.) Weights to initialize the network with. 144 | 145 | :return: 146 | The learned network weights. 147 | 148 | """ 149 | x, y, params = model.get_training_model() 150 | 151 | y_ = tf.placeholder(tf.float32, [None, 7 * len(common.CHARS) + 1]) 152 | 153 | digits_loss, presence_loss, loss = get_loss(y, y_) 154 | train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss) 155 | 156 | best = tf.argmax(tf.reshape(y[:, 1:], [-1, 7, len(common.CHARS)]), 2) 157 | correct = tf.argmax(tf.reshape(y_[:, 1:], [-1, 7, len(common.CHARS)]), 2) 158 | 159 | if initial_weights is not None: 160 | assert len(params) == len(initial_weights) 161 | assign_ops = [w.assign(v) for w, v in zip(params, initial_weights)] 162 | 163 | init = tf.initialize_all_variables() 164 | 165 | def vec_to_plate(v): 166 | return "".join(common.CHARS[i] for i in v) 167 | 168 | def do_report(): 169 | r = sess.run([best, 170 | correct, 171 | tf.greater(y[:, 0], 0), 172 | y_[:, 0], 173 | digits_loss, 174 | presence_loss, 175 | loss], 176 | feed_dict={x: test_xs, y_: test_ys}) 177 | num_correct = numpy.sum( 178 | numpy.logical_or( 179 | numpy.all(r[0] == r[1], axis=1), 180 | numpy.logical_and(r[2] < 0.5, 181 | r[3] < 0.5))) 182 | r_short = (r[0][:190], r[1][:190], r[2][:190], r[3][:190]) 183 | for b, c, pb, pc in zip(*r_short): 184 | print "{} {} <-> {} {}".format(vec_to_plate(c), pc, 185 | vec_to_plate(b), float(pb)) 186 | num_p_correct = numpy.sum(r[2] == r[3]) 187 | 188 | print ("B{:3d} {:2.02f}% {:02.02f}% loss: {} " 189 | "(digits: {}, presence: {}) |{}|").format( 190 | batch_idx, 191 | 100. * num_correct / (len(r[0])), 192 | 100. * num_p_correct / len(r[2]), 193 | r[6], 194 | r[4], 195 | r[5], 196 | "".join("X "[numpy.array_equal(b, c) or (not pb and not pc)] 197 | for b, c, pb, pc in zip(*r_short))) 198 | 199 | def do_batch(): 200 | sess.run(train_step, 201 | feed_dict={x: batch_xs, y_: batch_ys}) 202 | if batch_idx % report_steps == 0: 203 | do_report() 204 | 205 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) 206 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 207 | sess.run(init) 208 | if initial_weights is not None: 209 | sess.run(assign_ops) 210 | 211 | test_xs, test_ys = unzip(list(read_data("test/*.png"))[:50]) 212 | 213 | try: 214 | last_batch_idx = 0 215 | last_batch_time = time.time() 216 | batch_iter = enumerate(read_batches(batch_size)) 217 | for batch_idx, (batch_xs, batch_ys) in batch_iter: 218 | do_batch() 219 | if batch_idx % report_steps == 0: 220 | batch_time = time.time() 221 | if last_batch_idx != batch_idx: 222 | print "time for 60 batches {}".format( 223 | 60 * (last_batch_time - batch_time) / 224 | (last_batch_idx - batch_idx)) 225 | last_batch_idx = batch_idx 226 | last_batch_time = batch_time 227 | 228 | except KeyboardInterrupt: 229 | last_weights = [p.eval() for p in params] 230 | numpy.savez("weights.npz", *last_weights) 231 | return last_weights 232 | 233 | 234 | if __name__ == "__main__": 235 | if len(sys.argv) > 1: 236 | f = numpy.load(sys.argv[1]) 237 | initial_weights = [f[n] for n in sorted(f.files, 238 | key=lambda s: int(s[4:]))] 239 | else: 240 | initial_weights = None 241 | 242 | train(learn_rate=0.001, 243 | report_steps=20, 244 | batch_size=50, 245 | initial_weights=initial_weights) 246 | 247 | -------------------------------------------------------------------------------- /gen.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | """ 4 | Generate training and test images. 5 | 6 | """ 7 | 8 | 9 | __all__ = ( 10 | 'generate_ims', 11 | ) 12 | 13 | 14 | import itertools 15 | import math 16 | import os 17 | import random 18 | import sys 19 | 20 | import cv2 21 | import numpy 22 | 23 | from PIL import Image 24 | from PIL import ImageDraw 25 | from PIL import ImageFont 26 | 27 | import common 28 | 29 | FONT_DIR = "./fonts" 30 | FONT_HEIGHT = 32 # Pixel size to which the chars are resized 31 | 32 | OUTPUT_SHAPE = (64, 128) 33 | 34 | CHARS = common.CHARS + " " 35 | 36 | 37 | def make_char_ims(font_path, output_height): 38 | font_size = output_height * 4 39 | 40 | font = ImageFont.truetype(font_path, font_size) 41 | 42 | height = max(font.getsize(c)[1] for c in CHARS) 43 | 44 | for c in CHARS: 45 | width = font.getsize(c)[0] 46 | im = Image.new("RGBA", (width, height), (0, 0, 0)) 47 | 48 | draw = ImageDraw.Draw(im) 49 | draw.text((0, 0), c, (255, 255, 255), font=font) 50 | scale = float(output_height) / height 51 | im = im.resize((int(width * scale), output_height), Image.ANTIALIAS) 52 | yield c, numpy.array(im)[:, :, 0].astype(numpy.float32) / 255. 53 | 54 | 55 | def euler_to_mat(yaw, pitch, roll): 56 | # Rotate clockwise about the Y-axis 57 | c, s = math.cos(yaw), math.sin(yaw) 58 | M = numpy.matrix([[ c, 0., s], 59 | [ 0., 1., 0.], 60 | [ -s, 0., c]]) 61 | 62 | # Rotate clockwise about the X-axis 63 | c, s = math.cos(pitch), math.sin(pitch) 64 | M = numpy.matrix([[ 1., 0., 0.], 65 | [ 0., c, -s], 66 | [ 0., s, c]]) * M 67 | 68 | # Rotate clockwise about the Z-axis 69 | c, s = math.cos(roll), math.sin(roll) 70 | M = numpy.matrix([[ c, -s, 0.], 71 | [ s, c, 0.], 72 | [ 0., 0., 1.]]) * M 73 | 74 | return M 75 | 76 | 77 | def pick_colors(): 78 | first = True 79 | while first or plate_color - text_color < 0.3: 80 | text_color = random.random() 81 | plate_color = random.random() 82 | if text_color > plate_color: 83 | text_color, plate_color = plate_color, text_color 84 | first = False 85 | return text_color, plate_color 86 | 87 | 88 | def make_affine_transform(from_shape, to_shape, 89 | min_scale, max_scale, 90 | scale_variation=1.0, 91 | rotation_variation=1.0, 92 | translation_variation=1.0): 93 | out_of_bounds = False 94 | 95 | from_size = numpy.array([[from_shape[1], from_shape[0]]]).T 96 | to_size = numpy.array([[to_shape[1], to_shape[0]]]).T 97 | 98 | scale = random.uniform((min_scale + max_scale) * 0.5 - 99 | (max_scale - min_scale) * 0.5 * scale_variation, 100 | (min_scale + max_scale) * 0.5 + 101 | (max_scale - min_scale) * 0.5 * scale_variation) 102 | if scale > max_scale or scale < min_scale: 103 | out_of_bounds = True 104 | roll = random.uniform(-0.3, 0.3) * rotation_variation 105 | pitch = random.uniform(-0.2, 0.2) * rotation_variation 106 | yaw = random.uniform(-1.2, 1.2) * rotation_variation 107 | 108 | # Compute a bounding box on the skewed input image (`from_shape`). 109 | M = euler_to_mat(yaw, pitch, roll)[:2, :2] 110 | h, w = from_shape 111 | corners = numpy.matrix([[-w, +w, -w, +w], 112 | [-h, -h, +h, +h]]) * 0.5 113 | skewed_size = numpy.array(numpy.max(M * corners, axis=1) - 114 | numpy.min(M * corners, axis=1)) 115 | 116 | # Set the scale as large as possible such that the skewed and scaled shape 117 | # is less than or equal to the desired ratio in either dimension. 118 | scale *= numpy.min(to_size / skewed_size) 119 | 120 | # Set the translation such that the skewed and scaled image falls within 121 | # the output shape's bounds. 122 | trans = (numpy.random.random((2,1)) - 0.5) * translation_variation 123 | trans = ((2.0 * trans) ** 5.0) / 2.0 124 | if numpy.any(trans < -0.5) or numpy.any(trans > 0.5): 125 | out_of_bounds = True 126 | trans = (to_size - skewed_size * scale) * trans 127 | 128 | center_to = to_size / 2. 129 | center_from = from_size / 2. 130 | 131 | M = euler_to_mat(yaw, pitch, roll)[:2, :2] 132 | M *= scale 133 | M = numpy.hstack([M, trans + center_to - M * center_from]) 134 | 135 | return M, out_of_bounds 136 | 137 | 138 | def generate_code(): 139 | return "{}{}{}{} {}{}{}".format( 140 | random.choice(common.LETTERS), 141 | random.choice(common.LETTERS), 142 | random.choice(common.DIGITS), 143 | random.choice(common.DIGITS), 144 | random.choice(common.LETTERS), 145 | random.choice(common.LETTERS), 146 | random.choice(common.LETTERS)) 147 | 148 | 149 | def rounded_rect(shape, radius): 150 | out = numpy.ones(shape) 151 | out[:radius, :radius] = 0.0 152 | out[-radius:, :radius] = 0.0 153 | out[:radius, -radius:] = 0.0 154 | out[-radius:, -radius:] = 0.0 155 | 156 | cv2.circle(out, (radius, radius), radius, 1.0, -1) 157 | cv2.circle(out, (radius, shape[0] - radius), radius, 1.0, -1) 158 | cv2.circle(out, (shape[1] - radius, radius), radius, 1.0, -1) 159 | cv2.circle(out, (shape[1] - radius, shape[0] - radius), radius, 1.0, -1) 160 | 161 | return out 162 | 163 | 164 | def generate_plate(font_height, char_ims): 165 | h_padding = random.uniform(0.2, 0.4) * font_height 166 | v_padding = random.uniform(0.1, 0.3) * font_height 167 | spacing = font_height * random.uniform(-0.05, 0.05) 168 | radius = 1 + int(font_height * 0.1 * random.random()) 169 | 170 | code = generate_code() 171 | text_width = sum(char_ims[c].shape[1] for c in code) 172 | text_width += (len(code) - 1) * spacing 173 | 174 | out_shape = (int(font_height + v_padding * 2), 175 | int(text_width + h_padding * 2)) 176 | 177 | text_color, plate_color = pick_colors() 178 | 179 | text_mask = numpy.zeros(out_shape) 180 | 181 | x = h_padding 182 | y = v_padding 183 | for c in code: 184 | char_im = char_ims[c] 185 | ix, iy = int(x), int(y) 186 | text_mask[iy:iy + char_im.shape[0], ix:ix + char_im.shape[1]] = char_im 187 | x += char_im.shape[1] + spacing 188 | 189 | plate = (numpy.ones(out_shape) * plate_color * (1. - text_mask) + 190 | numpy.ones(out_shape) * text_color * text_mask) 191 | 192 | return plate, rounded_rect(out_shape, radius), code.replace(" ", "") 193 | 194 | 195 | def generate_bg(num_bg_images): 196 | found = False 197 | while not found: 198 | fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1)) 199 | bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255. 200 | if (bg.shape[1] >= OUTPUT_SHAPE[1] and 201 | bg.shape[0] >= OUTPUT_SHAPE[0]): 202 | found = True 203 | 204 | x = random.randint(0, bg.shape[1] - OUTPUT_SHAPE[1]) 205 | y = random.randint(0, bg.shape[0] - OUTPUT_SHAPE[0]) 206 | bg = bg[y:y + OUTPUT_SHAPE[0], x:x + OUTPUT_SHAPE[1]] 207 | 208 | return bg 209 | 210 | 211 | def generate_im(char_ims, num_bg_images): 212 | bg = generate_bg(num_bg_images) 213 | 214 | plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims) 215 | 216 | M, out_of_bounds = make_affine_transform( 217 | from_shape=plate.shape, 218 | to_shape=bg.shape, 219 | min_scale=0.6, 220 | max_scale=0.875, 221 | rotation_variation=1.0, 222 | scale_variation=1.5, 223 | translation_variation=1.2) 224 | plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0])) 225 | plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0])) 226 | 227 | out = plate * plate_mask + bg * (1.0 - plate_mask) 228 | 229 | out = cv2.resize(out, (OUTPUT_SHAPE[1], OUTPUT_SHAPE[0])) 230 | 231 | out += numpy.random.normal(scale=0.05, size=out.shape) 232 | out = numpy.clip(out, 0., 1.) 233 | 234 | return out, code, not out_of_bounds 235 | 236 | 237 | def load_fonts(folder_path): 238 | font_char_ims = {} 239 | fonts = [f for f in os.listdir(folder_path) if f.endswith('.ttf')] 240 | for font in fonts: 241 | font_char_ims[font] = dict(make_char_ims(os.path.join(folder_path, 242 | font), 243 | FONT_HEIGHT)) 244 | return fonts, font_char_ims 245 | 246 | 247 | def generate_ims(): 248 | """ 249 | Generate number plate images. 250 | 251 | :return: 252 | Iterable of number plate images. 253 | 254 | """ 255 | variation = 1.0 256 | fonts, font_char_ims = load_fonts(FONT_DIR) 257 | num_bg_images = len(os.listdir("bgs")) 258 | while True: 259 | yield generate_im(font_char_ims[random.choice(fonts)], num_bg_images) 260 | 261 | 262 | if __name__ == "__main__": 263 | os.mkdir("test") 264 | im_gen = itertools.islice(generate_ims(), int(sys.argv[1])) 265 | for img_idx, (im, c, p) in enumerate(im_gen): 266 | fname = "test/{:08d}_{}_{}.png".format(img_idx, c, 267 | "1" if p else "0") 268 | print fname 269 | cv2.imwrite(fname, im * 255.) 270 | 271 | --------------------------------------------------------------------------------