├── common.py
├── vis.py
├── README.md
├── extractbgs.py
├── model.py
├── detect.py
├── train.py
└── gen.py


/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Definitions that don't fit elsewhere.
 4 | 
 5 | """
 6 | 
 7 | __all__ = (
 8 |     'DIGITS',
 9 |     'LETTERS',
10 |     'CHARS',
11 |     'sigmoid',
12 |     'softmax',
13 | )
14 | 
15 | import numpy
16 | 
17 | 
18 | DIGITS = "0123456789"
19 | LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
20 | CHARS = LETTERS + DIGITS
21 | 
22 | def softmax(a):
23 |     exps = numpy.exp(a.astype(numpy.float64))
24 |     return exps / numpy.sum(exps, axis=-1)[:, numpy.newaxis]
25 | 
26 | def sigmoid(a):
27 |   return 1. / (1. + numpy.exp(-a))
28 | 
29 | 


--------------------------------------------------------------------------------
/vis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | import numpy
 6 | 
 7 | a = numpy.load(sys.argv[1])
 8 | 
 9 | conv1 = a['arr_0']
10 | 
11 | fig, ax = plt.subplots(8, 8,
12 |                        figsize=(8, 8),
13 |                        dpi=100,
14 |                        squeeze=False)
15 | 
16 | """
17 | for i in range(conv1.shape[3]):
18 |     ax[i // 8, i % 8].imshow(conv1[:, :, 0, i], cmap='Greys')
19 |     
20 | """
21 | conv2 = a['arr_2']
22 | for i in range(min(8, conv2.shape[3])):
23 |     for j in range(min(8, conv2.shape[2])):
24 |         ax[j, i].imshow(conv2[:, :, j, i], cmap='Greys')
25 | 
26 | fig.savefig(sys.argv[2], dpi=30.)
27 | 
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Number-plate-detection-using-CNN
 2 | This program uses convolutional neural networks to recognize the text in the number plate.This network is based on [this paper](https://vision.in.tum.de/_media/spezial/bib/stark-gcpr15.pdf) by Stark et al, which describes how google broke their own CAPTCHA system. Do check it out, as it gives more specifics about the architecture used than Google's paper.
 3 | 
 4 | To use this project:
 5 | 
 6 | 1. `./extractbgs.py SUN397.tar.gz`: Extract ~3GB of background images from the [SUN database](http://groups.csail.mit.edu/vision/SUN/)
 7 |    into `bgs/`. (`bgs/` must not already exist.) The tar file (36GB) can be [downloaded here](http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz).
 8 |    This step may take a while as it will extract 108,634 images.
 9 | 
10 | 2. `./gen.py 1000`: Generate 1000 test set images in `test/`. (`test/` must not
11 |     already exist.) This step requires `UKNumberPlate.ttf` to be in the
12 |     `fonts/` directory, which can be
13 |     [downloaded here](http://www.dafont.com/uk-number-plate.font).
14 | 
15 | 3. `./train.py`: Train the model. A GPU is recommended for this step. It will
16 |    take around 100,000 batches to converge. When you're satisfied that the
17 |    network has learned enough press `Ctrl+C` and the process will write the
18 |    weights to `weights.npz` and return.
19 | 
20 | 4. `./detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image.
21 | 


--------------------------------------------------------------------------------
/extractbgs.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | """
 4 | Extract background images from a tar archive.
 5 | 
 6 | """
 7 | 
 8 | 
 9 | __all__ = (
10 |     'extract_backgrounds',
11 | )
12 | 
13 | 
14 | import os
15 | import sys
16 | import tarfile
17 | 
18 | import cv2
19 | import numpy
20 | 
21 | 
22 | def im_from_file(f):
23 |     a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8)
24 |     return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE)
25 | 
26 | 
27 | def extract_backgrounds(archive_name):
28 |     """
29 |     Extract backgrounds from provided tar archive.
30 | 
31 |     JPEGs from the archive are converted into grayscale, and cropped/resized to
32 |     256x256, and saved in ./bgs/.
33 | 
34 |     :param archive_name:
35 |         Name of the .tar file containing JPEGs of background images.
36 | 
37 |     """
38 |     os.mkdir("bgs")
39 | 
40 |     t = tarfile.open(name=archive_name)
41 | 
42 |     def members():
43 |         m = t.next()
44 |         while m:
45 |             yield m
46 |             m = t.next()
47 |     index = 0
48 |     for m in members():
49 |         if not m.name.endswith(".jpg"):
50 |             continue
51 |         f =  t.extractfile(m)
52 |         try:
53 |             im = im_from_file(f)
54 |         finally:
55 |             f.close()
56 |         if im is None:
57 |             continue
58 |         
59 |         if im.shape[0] > im.shape[1]:
60 |             im = im[:im.shape[1], :]
61 |         else:
62 |             im = im[:, :im.shape[0]]
63 |         if im.shape[0] > 256:
64 |             im = cv2.resize(im, (256, 256))
65 |         fname = "bgs/{:08}.jpg".format(index)
66 |         print fname
67 |         rc = cv2.imwrite(fname, im)
68 |         if not rc:
69 |             raise Exception("Failed to write file {}".format(fname))
70 |         index += 1
71 | 
72 | 
73 | if __name__ == "__main__":
74 | 
75 |     extract_backgrounds(sys.argv[1])
76 | 
77 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Definition of the neural networks. 
  3 | 
  4 | """
  5 | 
  6 | 
  7 | __all__ = (
  8 |     'get_training_model',
  9 |     'get_detect_model',
 10 |     'WINDOW_SHAPE',
 11 | )
 12 | 
 13 | 
 14 | import tensorflow as tf
 15 | 
 16 | import common
 17 | 
 18 | 
 19 | WINDOW_SHAPE = (64, 128)
 20 | 
 21 | 
 22 | # Utility functions
 23 | def weight_variable(shape):
 24 |   initial = tf.truncated_normal(shape, stddev=0.1)
 25 |   return tf.Variable(initial)
 26 | 
 27 | 
 28 | def bias_variable(shape):
 29 |   initial = tf.constant(0.1, shape=shape)
 30 |   return tf.Variable(initial)
 31 | 
 32 | 
 33 | def conv2d(x, W, stride=(1, 1), padding='SAME'):
 34 |   return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1],
 35 |                       padding=padding)
 36 | 
 37 | 
 38 | def max_pool(x, ksize=(2, 2), stride=(2, 2)):
 39 |   return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1],
 40 |                         strides=[1, stride[0], stride[1], 1], padding='SAME')
 41 | 
 42 | 
 43 | def avg_pool(x, ksize=(2, 2), stride=(2, 2)):
 44 |   return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1],
 45 |                         strides=[1, stride[0], stride[1], 1], padding='SAME')
 46 | 
 47 | 
 48 | def convolutional_layers():
 49 |     """
 50 |     Get the convolutional layers of the model.
 51 | 
 52 |     """
 53 |     x = tf.placeholder(tf.float32, [None, None, None])
 54 | 
 55 |     # First layer
 56 |     W_conv1 = weight_variable([5, 5, 1, 48])
 57 |     b_conv1 = bias_variable([48])
 58 |     x_expanded = tf.expand_dims(x, 3)
 59 |     h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1)
 60 |     h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2))
 61 | 
 62 |     # Second layer
 63 |     W_conv2 = weight_variable([5, 5, 48, 64])
 64 |     b_conv2 = bias_variable([64])
 65 | 
 66 |     h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
 67 |     h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1))
 68 | 
 69 |     # Third layer
 70 |     W_conv3 = weight_variable([5, 5, 64, 128])
 71 |     b_conv3 = bias_variable([128])
 72 | 
 73 |     h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
 74 |     h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2))
 75 | 
 76 |     return x, h_pool3, [W_conv1, b_conv1,
 77 |                         W_conv2, b_conv2,
 78 |                         W_conv3, b_conv3]
 79 | 
 80 | 
 81 | def get_training_model():
 82 |     """
 83 |     The training model acts on a batch of 128x64 windows, and outputs a (1 +
 84 |     7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is
 85 |     fully within the image and is at the correct scale.
 86 |     
 87 |     `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th
 88 |     character is `c`.
 89 | 
 90 |     """
 91 |     x, conv_layer, conv_vars = convolutional_layers()
 92 |     
 93 |     # Densely connected layer
 94 |     W_fc1 = weight_variable([32 * 8 * 128, 2048])
 95 |     b_fc1 = bias_variable([2048])
 96 | 
 97 |     conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128])
 98 |     h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)
 99 | 
100 |     # Output layer
101 |     W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
102 |     b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
103 | 
104 |     y = tf.matmul(h_fc1, W_fc2) + b_fc2
105 | 
106 |     return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
107 | 
108 | 
109 | def get_detect_model():
110 |     """
111 |     The same as the training model, except it acts on an arbitrarily sized
112 |     input, and slides the 128x64 window across the image in 8x8 strides.
113 | 
114 |     The output is of the form `v`, where `v[i, j]` is equivalent to the output
115 |     of the training model, for the window at coordinates `(8 * i, 4 * j)`.
116 | 
117 |     """
118 |     x, conv_layer, conv_vars = convolutional_layers()
119 |     
120 |     # Fourth layer
121 |     W_fc1 = weight_variable([8 * 32 * 128, 2048])
122 |     W_conv1 = tf.reshape(W_fc1, [8,  32, 128, 2048])
123 |     b_fc1 = bias_variable([2048])
124 |     h_conv1 = tf.nn.relu(conv2d(conv_layer, W_conv1,
125 |                                 stride=(1, 1), padding="VALID") + b_fc1) 
126 |     # Fifth layer
127 |     W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
128 |     W_conv2 = tf.reshape(W_fc2, [1, 1, 2048, 1 + 7 * len(common.CHARS)])
129 |     b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
130 |     h_conv2 = conv2d(h_conv1, W_conv2) + b_fc2
131 | 
132 |     return (x, h_conv2, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
133 | 
134 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | __all__ = (
  5 |     'detect',
  6 |     'post_process',
  7 | )
  8 | 
  9 | 
 10 | import collections
 11 | import itertools
 12 | import math
 13 | import sys
 14 | 
 15 | import cv2
 16 | import numpy
 17 | import tensorflow as tf
 18 | 
 19 | import common
 20 | import model
 21 | 
 22 | 
 23 | def make_scaled_ims(im, min_shape):
 24 |     ratio = 1. / 2 ** 0.5
 25 |     shape = (im.shape[0] / ratio, im.shape[1] / ratio)
 26 | 
 27 |     while True:
 28 |         shape = (int(shape[0] * ratio), int(shape[1] * ratio))
 29 |         if shape[0] < min_shape[0] or shape[1] < min_shape[1]:
 30 |             break
 31 |         yield cv2.resize(im, (shape[1], shape[0]))
 32 | 
 33 | 
 34 | def detect(im, param_vals):
 35 |     """
 36 |     Detect number plates in an image.
 37 | 
 38 |     :param im:
 39 |         Image to detect number plates in.
 40 | 
 41 |     :param param_vals:
 42 |         Model parameters to use. These are the parameters output by the `train`
 43 |         module.
 44 | 
 45 |     :returns:
 46 |         Iterable of `bbox_tl, bbox_br, letter_probs`, defining the bounding box
 47 |         top-left and bottom-right corners respectively, and a 7,36 matrix
 48 |         giving the probability distributions of each letter.
 49 | 
 50 |     """
 51 | 
 52 |     # Convert the image to various scales.
 53 |     scaled_ims = list(make_scaled_ims(im, model.WINDOW_SHAPE))
 54 | 
 55 |     # Load the model which detects number plates over a sliding window.
 56 |     x, y, params = model.get_detect_model()
 57 | 
 58 |     # Execute the model at each scale.
 59 |     with tf.Session(config=tf.ConfigProto()) as sess:
 60 |         y_vals = []
 61 |         for scaled_im in scaled_ims:
 62 |             feed_dict = {x: numpy.stack([scaled_im])}
 63 |             feed_dict.update(dict(zip(params, param_vals)))
 64 |             y_vals.append(sess.run(y, feed_dict=feed_dict))
 65 | 
 66 |     # Interpret the results in terms of bounding boxes in the input image.
 67 |     # Do this by identifying windows (at all scales) where the model predicts a
 68 |     # number plate has a greater than 50% probability of appearing.
 69 |     #
 70 |     # To obtain pixel coordinates, the window coordinates are scaled according
 71 |     # to the stride size, and pixel coordinates.
 72 |     for i, (scaled_im, y_val) in enumerate(zip(scaled_ims, y_vals)):
 73 |         for window_coords in numpy.argwhere(y_val[0, :, :, 0] >
 74 |                                                        -math.log(1./0.99 - 1)):
 75 |             letter_probs = (y_val[0,
 76 |                                   window_coords[0],
 77 |                                   window_coords[1], 1:].reshape(
 78 |                                     7, len(common.CHARS)))
 79 |             letter_probs = common.softmax(letter_probs)
 80 | 
 81 |             img_scale = float(im.shape[0]) / scaled_im.shape[0]
 82 | 
 83 |             bbox_tl = window_coords * (8, 4) * img_scale
 84 |             bbox_size = numpy.array(model.WINDOW_SHAPE) * img_scale
 85 | 
 86 |             present_prob = common.sigmoid(
 87 |                                y_val[0, window_coords[0], window_coords[1], 0])
 88 | 
 89 |             yield bbox_tl, bbox_tl + bbox_size, present_prob, letter_probs
 90 | 
 91 | 
 92 | def _overlaps(match1, match2):
 93 |     bbox_tl1, bbox_br1, _, _ = match1
 94 |     bbox_tl2, bbox_br2, _, _ = match2
 95 |     return (bbox_br1[0] > bbox_tl2[0] and
 96 |             bbox_br2[0] > bbox_tl1[0] and
 97 |             bbox_br1[1] > bbox_tl2[1] and
 98 |             bbox_br2[1] > bbox_tl1[1])
 99 | 
100 | 
101 | def _group_overlapping_rectangles(matches):
102 |     matches = list(matches)
103 |     num_groups = 0
104 |     match_to_group = {}
105 |     for idx1 in range(len(matches)):
106 |         for idx2 in range(idx1):
107 |             if _overlaps(matches[idx1], matches[idx2]):
108 |                 match_to_group[idx1] = match_to_group[idx2]
109 |                 break
110 |         else:
111 |             match_to_group[idx1] = num_groups 
112 |             num_groups += 1
113 | 
114 |     groups = collections.defaultdict(list)
115 |     for idx, group in match_to_group.items():
116 |         groups[group].append(matches[idx])
117 | 
118 |     return groups
119 | 
120 | 
121 | def post_process(matches):
122 |     """
123 |     Take an iterable of matches as returned by `detect` and merge duplicates.
124 | 
125 |     Merging consists of two steps:
126 |       - Finding sets of overlapping rectangles.
127 |       - Finding the intersection of those sets, along with the code
128 |         corresponding with the rectangle with the highest presence parameter.
129 | 
130 |     """
131 |     groups = _group_overlapping_rectangles(matches)
132 | 
133 |     for group_matches in groups.values():
134 |         mins = numpy.stack(numpy.array(m[0]) for m in group_matches)
135 |         maxs = numpy.stack(numpy.array(m[1]) for m in group_matches)
136 |         present_probs = numpy.array([m[2] for m in group_matches])
137 |         letter_probs = numpy.stack(m[3] for m in group_matches)
138 | 
139 |         yield (numpy.max(mins, axis=0).flatten(),
140 |                numpy.min(maxs, axis=0).flatten(),
141 |                numpy.max(present_probs),
142 |                letter_probs[numpy.argmax(present_probs)])
143 | 
144 | 
145 | def letter_probs_to_code(letter_probs):
146 |     return "".join(common.CHARS[i] for i in numpy.argmax(letter_probs, axis=1))
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     im = cv2.imread(sys.argv[1])
151 |     im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) / 255.
152 | 
153 |     f = numpy.load(sys.argv[2])
154 |     param_vals = [f[n] for n in sorted(f.files, key=lambda s: int(s[4:]))]
155 | 
156 |     for pt1, pt2, present_prob, letter_probs in post_process(
157 |                                                   detect(im_gray, param_vals)):
158 |         pt1 = tuple(reversed(map(int, pt1)))
159 |         pt2 = tuple(reversed(map(int, pt2)))
160 | 
161 |         code = letter_probs_to_code(letter_probs)
162 | 
163 |         color = (0.0, 255.0, 0.0)
164 |         cv2.rectangle(im, pt1, pt2, color)
165 | 
166 |         cv2.putText(im,
167 |                     code,
168 |                     pt1,
169 |                     cv2.FONT_HERSHEY_PLAIN, 
170 |                     1.5,
171 |                     (0, 0, 0),
172 |                     thickness=5)
173 | 
174 |         cv2.putText(im,
175 |                     code,
176 |                     pt1,
177 |                     cv2.FONT_HERSHEY_PLAIN, 
178 |                     1.5,
179 |                     (255, 255, 255),
180 |                     thickness=2)
181 | 
182 |     cv2.imwrite(sys.argv[3], im)
183 | 
184 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | """
  4 | Routines for training the network.
  5 | 
  6 | """
  7 | 
  8 | 
  9 | __all__ = (
 10 |     'train',
 11 | )
 12 | 
 13 | 
 14 | import functools
 15 | import glob
 16 | import itertools
 17 | import multiprocessing
 18 | import random
 19 | import sys
 20 | import time
 21 | 
 22 | import cv2
 23 | import numpy
 24 | import tensorflow as tf
 25 | 
 26 | import common
 27 | import gen
 28 | import model
 29 | 
 30 | 
 31 | def code_to_vec(p, code):
 32 |     def char_to_vec(c):
 33 |         y = numpy.zeros((len(common.CHARS),))
 34 |         y[common.CHARS.index(c)] = 1.0
 35 |         return y
 36 | 
 37 |     c = numpy.vstack([char_to_vec(c) for c in code])
 38 | 
 39 |     return numpy.concatenate([[1. if p else 0], c.flatten()])
 40 | 
 41 | 
 42 | def read_data(img_glob):
 43 |     for fname in sorted(glob.glob(img_glob)):
 44 |         im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255.
 45 |         code = fname.split("/")[1][9:16]
 46 |         p = fname.split("/")[1][17] == '1'
 47 |         yield im, code_to_vec(p, code)
 48 | 
 49 | 
 50 | def unzip(b):
 51 |     xs, ys = zip(*b)
 52 |     xs = numpy.array(xs)
 53 |     ys = numpy.array(ys)
 54 |     return xs, ys
 55 | 
 56 | 
 57 | def batch(it, batch_size):
 58 |     out = []
 59 |     for x in it:
 60 |         out.append(x)
 61 |         if len(out) == batch_size:
 62 |             yield out
 63 |             out = []
 64 |     if out:
 65 |         yield out
 66 | 
 67 | 
 68 | def mpgen(f):
 69 |     def main(q, args, kwargs):
 70 |         try:
 71 |             for item in f(*args, **kwargs):
 72 |                 q.put(item)
 73 |         finally:
 74 |             q.close()
 75 | 
 76 |     @functools.wraps(f)
 77 |     def wrapped(*args, **kwargs):
 78 |         q = multiprocessing.Queue(3) 
 79 |         proc = multiprocessing.Process(target=main,
 80 |                                        args=(q, args, kwargs))
 81 |         proc.start()
 82 |         try:
 83 |             while True:
 84 |                 item = q.get()
 85 |                 yield item
 86 |         finally:
 87 |             proc.terminate()
 88 |             proc.join()
 89 | 
 90 |     return wrapped
 91 |         
 92 | 
 93 | @mpgen
 94 | def read_batches(batch_size):
 95 |     g = gen.generate_ims()
 96 |     def gen_vecs():
 97 |         for im, c, p in itertools.islice(g, batch_size):
 98 |             yield im, code_to_vec(p, c)
 99 | 
100 |     while True:
101 |         yield unzip(gen_vecs())
102 | 
103 | 
104 | def get_loss(y, y_):
105 |     # Calculate the loss from digits being incorrect.  Don't count loss from
106 |     # digits that are in non-present plates.
107 |     digits_loss = tf.nn.softmax_cross_entropy_with_logits(
108 |                                           tf.reshape(y[:, 1:],
109 |                                                      [-1, len(common.CHARS)]),
110 |                                           tf.reshape(y_[:, 1:],
111 |                                                      [-1, len(common.CHARS)]))
112 |     digits_loss = tf.reshape(digits_loss, [-1, 7])
113 |     digits_loss = tf.reduce_sum(digits_loss, 1)
114 |     digits_loss *= (y_[:, 0] != 0)
115 |     digits_loss = tf.reduce_sum(digits_loss)
116 | 
117 |     # Calculate the loss from presence indicator being wrong.
118 |     presence_loss = tf.nn.sigmoid_cross_entropy_with_logits(
119 |                                                           y[:, :1], y_[:, :1])
120 |     presence_loss = 7 * tf.reduce_sum(presence_loss)
121 | 
122 |     return digits_loss, presence_loss, digits_loss + presence_loss
123 | 
124 | 
125 | def train(learn_rate, report_steps, batch_size, initial_weights=None):
126 |     """
127 |     Train the network.
128 | 
129 |     The function operates interactively: Progress is reported on stdout, and
130 |     training ceases upon `KeyboardInterrupt` at which point the learned weights
131 |     are saved to `weights.npz`, and also returned.
132 | 
133 |     :param learn_rate:
134 |         Learning rate to use.
135 | 
136 |     :param report_steps:
137 |         Every `report_steps` batches a progress report is printed.
138 | 
139 |     :param batch_size:
140 |         The size of the batches used for training.
141 | 
142 |     :param initial_weights:
143 |         (Optional.) Weights to initialize the network with.
144 | 
145 |     :return:
146 |         The learned network weights.
147 | 
148 |     """
149 |     x, y, params = model.get_training_model()
150 | 
151 |     y_ = tf.placeholder(tf.float32, [None, 7 * len(common.CHARS) + 1])
152 | 
153 |     digits_loss, presence_loss, loss = get_loss(y, y_)
154 |     train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)
155 | 
156 |     best = tf.argmax(tf.reshape(y[:, 1:], [-1, 7, len(common.CHARS)]), 2)
157 |     correct = tf.argmax(tf.reshape(y_[:, 1:], [-1, 7, len(common.CHARS)]), 2)
158 | 
159 |     if initial_weights is not None:
160 |         assert len(params) == len(initial_weights)
161 |         assign_ops = [w.assign(v) for w, v in zip(params, initial_weights)]
162 | 
163 |     init = tf.initialize_all_variables()
164 | 
165 |     def vec_to_plate(v):
166 |         return "".join(common.CHARS[i] for i in v)
167 | 
168 |     def do_report():
169 |         r = sess.run([best,
170 |                       correct,
171 |                       tf.greater(y[:, 0], 0),
172 |                       y_[:, 0],
173 |                       digits_loss,
174 |                       presence_loss,
175 |                       loss],
176 |                      feed_dict={x: test_xs, y_: test_ys})
177 |         num_correct = numpy.sum(
178 |                         numpy.logical_or(
179 |                             numpy.all(r[0] == r[1], axis=1),
180 |                             numpy.logical_and(r[2] < 0.5,
181 |                                               r[3] < 0.5)))
182 |         r_short = (r[0][:190], r[1][:190], r[2][:190], r[3][:190])
183 |         for b, c, pb, pc in zip(*r_short):
184 |             print "{} {} <-> {} {}".format(vec_to_plate(c), pc,
185 |                                            vec_to_plate(b), float(pb))
186 |         num_p_correct = numpy.sum(r[2] == r[3])
187 | 
188 |         print ("B{:3d} {:2.02f}% {:02.02f}% loss: {} "
189 |                "(digits: {}, presence: {}) |{}|").format(
190 |             batch_idx,
191 |             100. * num_correct / (len(r[0])),
192 |             100. * num_p_correct / len(r[2]),
193 |             r[6],
194 |             r[4],
195 |             r[5],
196 |             "".join("X "[numpy.array_equal(b, c) or (not pb and not pc)]
197 |                                            for b, c, pb, pc in zip(*r_short)))
198 | 
199 |     def do_batch():
200 |         sess.run(train_step,
201 |                  feed_dict={x: batch_xs, y_: batch_ys})
202 |         if batch_idx % report_steps == 0:
203 |             do_report()
204 | 
205 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
206 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
207 |         sess.run(init)
208 |         if initial_weights is not None:
209 |             sess.run(assign_ops)
210 | 
211 |         test_xs, test_ys = unzip(list(read_data("test/*.png"))[:50])
212 | 
213 |         try:
214 |             last_batch_idx = 0
215 |             last_batch_time = time.time()
216 |             batch_iter = enumerate(read_batches(batch_size))
217 |             for batch_idx, (batch_xs, batch_ys) in batch_iter:
218 |                 do_batch()
219 |                 if batch_idx % report_steps == 0:
220 |                     batch_time = time.time()
221 |                     if last_batch_idx != batch_idx:
222 |                         print "time for 60 batches {}".format(
223 |                             60 * (last_batch_time - batch_time) /
224 |                                             (last_batch_idx - batch_idx))
225 |                         last_batch_idx = batch_idx
226 |                         last_batch_time = batch_time
227 | 
228 |         except KeyboardInterrupt:
229 |             last_weights = [p.eval() for p in params]
230 |             numpy.savez("weights.npz", *last_weights)
231 |             return last_weights
232 | 
233 | 
234 | if __name__ == "__main__":
235 |     if len(sys.argv) > 1:
236 |         f = numpy.load(sys.argv[1])
237 |         initial_weights = [f[n] for n in sorted(f.files,
238 |                                                 key=lambda s: int(s[4:]))]
239 |     else:
240 |         initial_weights = None
241 | 
242 |     train(learn_rate=0.001,
243 |           report_steps=20,
244 |           batch_size=50,
245 |           initial_weights=initial_weights)
246 | 
247 | 


--------------------------------------------------------------------------------
/gen.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | """
  4 | Generate training and test images.
  5 | 
  6 | """
  7 | 
  8 | 
  9 | __all__ = (
 10 |     'generate_ims',
 11 | )
 12 | 
 13 | 
 14 | import itertools
 15 | import math
 16 | import os
 17 | import random
 18 | import sys
 19 | 
 20 | import cv2
 21 | import numpy
 22 | 
 23 | from PIL import Image
 24 | from PIL import ImageDraw
 25 | from PIL import ImageFont
 26 | 
 27 | import common
 28 | 
 29 | FONT_DIR = "./fonts"
 30 | FONT_HEIGHT = 32  # Pixel size to which the chars are resized
 31 | 
 32 | OUTPUT_SHAPE = (64, 128)
 33 | 
 34 | CHARS = common.CHARS + " "
 35 | 
 36 | 
 37 | def make_char_ims(font_path, output_height):
 38 |     font_size = output_height * 4
 39 | 
 40 |     font = ImageFont.truetype(font_path, font_size)
 41 | 
 42 |     height = max(font.getsize(c)[1] for c in CHARS)
 43 | 
 44 |     for c in CHARS:
 45 |         width = font.getsize(c)[0]
 46 |         im = Image.new("RGBA", (width, height), (0, 0, 0))
 47 | 
 48 |         draw = ImageDraw.Draw(im)
 49 |         draw.text((0, 0), c, (255, 255, 255), font=font)
 50 |         scale = float(output_height) / height
 51 |         im = im.resize((int(width * scale), output_height), Image.ANTIALIAS)
 52 |         yield c, numpy.array(im)[:, :, 0].astype(numpy.float32) / 255.
 53 | 
 54 | 
 55 | def euler_to_mat(yaw, pitch, roll):
 56 |     # Rotate clockwise about the Y-axis
 57 |     c, s = math.cos(yaw), math.sin(yaw)
 58 |     M = numpy.matrix([[  c, 0.,  s],
 59 |                       [ 0., 1., 0.],
 60 |                       [ -s, 0.,  c]])
 61 | 
 62 |     # Rotate clockwise about the X-axis
 63 |     c, s = math.cos(pitch), math.sin(pitch)
 64 |     M = numpy.matrix([[ 1., 0., 0.],
 65 |                       [ 0.,  c, -s],
 66 |                       [ 0.,  s,  c]]) * M
 67 | 
 68 |     # Rotate clockwise about the Z-axis
 69 |     c, s = math.cos(roll), math.sin(roll)
 70 |     M = numpy.matrix([[  c, -s, 0.],
 71 |                       [  s,  c, 0.],
 72 |                       [ 0., 0., 1.]]) * M
 73 | 
 74 |     return M
 75 | 
 76 | 
 77 | def pick_colors():
 78 |     first = True
 79 |     while first or plate_color - text_color < 0.3:
 80 |         text_color = random.random()
 81 |         plate_color = random.random()
 82 |         if text_color > plate_color:
 83 |             text_color, plate_color = plate_color, text_color
 84 |         first = False
 85 |     return text_color, plate_color
 86 | 
 87 | 
 88 | def make_affine_transform(from_shape, to_shape, 
 89 |                           min_scale, max_scale,
 90 |                           scale_variation=1.0,
 91 |                           rotation_variation=1.0,
 92 |                           translation_variation=1.0):
 93 |     out_of_bounds = False
 94 | 
 95 |     from_size = numpy.array([[from_shape[1], from_shape[0]]]).T
 96 |     to_size = numpy.array([[to_shape[1], to_shape[0]]]).T
 97 | 
 98 |     scale = random.uniform((min_scale + max_scale) * 0.5 -
 99 |                            (max_scale - min_scale) * 0.5 * scale_variation,
100 |                            (min_scale + max_scale) * 0.5 +
101 |                            (max_scale - min_scale) * 0.5 * scale_variation)
102 |     if scale > max_scale or scale < min_scale:
103 |         out_of_bounds = True
104 |     roll = random.uniform(-0.3, 0.3) * rotation_variation
105 |     pitch = random.uniform(-0.2, 0.2) * rotation_variation
106 |     yaw = random.uniform(-1.2, 1.2) * rotation_variation
107 | 
108 |     # Compute a bounding box on the skewed input image (`from_shape`).
109 |     M = euler_to_mat(yaw, pitch, roll)[:2, :2]
110 |     h, w = from_shape
111 |     corners = numpy.matrix([[-w, +w, -w, +w],
112 |                             [-h, -h, +h, +h]]) * 0.5
113 |     skewed_size = numpy.array(numpy.max(M * corners, axis=1) -
114 |                               numpy.min(M * corners, axis=1))
115 | 
116 |     # Set the scale as large as possible such that the skewed and scaled shape
117 |     # is less than or equal to the desired ratio in either dimension.
118 |     scale *= numpy.min(to_size / skewed_size)
119 | 
120 |     # Set the translation such that the skewed and scaled image falls within
121 |     # the output shape's bounds.
122 |     trans = (numpy.random.random((2,1)) - 0.5) * translation_variation
123 |     trans = ((2.0 * trans) ** 5.0) / 2.0
124 |     if numpy.any(trans < -0.5) or numpy.any(trans > 0.5):
125 |         out_of_bounds = True
126 |     trans = (to_size - skewed_size * scale) * trans
127 | 
128 |     center_to = to_size / 2.
129 |     center_from = from_size / 2.
130 | 
131 |     M = euler_to_mat(yaw, pitch, roll)[:2, :2]
132 |     M *= scale
133 |     M = numpy.hstack([M, trans + center_to - M * center_from])
134 | 
135 |     return M, out_of_bounds
136 | 
137 | 
138 | def generate_code():
139 |     return "{}{}{}{} {}{}{}".format(
140 |         random.choice(common.LETTERS),
141 |         random.choice(common.LETTERS),
142 |         random.choice(common.DIGITS),
143 |         random.choice(common.DIGITS),
144 |         random.choice(common.LETTERS),
145 |         random.choice(common.LETTERS),
146 |         random.choice(common.LETTERS))
147 | 
148 | 
149 | def rounded_rect(shape, radius):
150 |     out = numpy.ones(shape)
151 |     out[:radius, :radius] = 0.0
152 |     out[-radius:, :radius] = 0.0
153 |     out[:radius, -radius:] = 0.0
154 |     out[-radius:, -radius:] = 0.0
155 | 
156 |     cv2.circle(out, (radius, radius), radius, 1.0, -1)
157 |     cv2.circle(out, (radius, shape[0] - radius), radius, 1.0, -1)
158 |     cv2.circle(out, (shape[1] - radius, radius), radius, 1.0, -1)
159 |     cv2.circle(out, (shape[1] - radius, shape[0] - radius), radius, 1.0, -1)
160 | 
161 |     return out
162 | 
163 | 
164 | def generate_plate(font_height, char_ims):
165 |     h_padding = random.uniform(0.2, 0.4) * font_height
166 |     v_padding = random.uniform(0.1, 0.3) * font_height
167 |     spacing = font_height * random.uniform(-0.05, 0.05)
168 |     radius = 1 + int(font_height * 0.1 * random.random())
169 | 
170 |     code = generate_code()
171 |     text_width = sum(char_ims[c].shape[1] for c in code)
172 |     text_width += (len(code) - 1) * spacing
173 | 
174 |     out_shape = (int(font_height + v_padding * 2),
175 |                  int(text_width + h_padding * 2))
176 | 
177 |     text_color, plate_color = pick_colors()
178 |     
179 |     text_mask = numpy.zeros(out_shape)
180 |     
181 |     x = h_padding
182 |     y = v_padding 
183 |     for c in code:
184 |         char_im = char_ims[c]
185 |         ix, iy = int(x), int(y)
186 |         text_mask[iy:iy + char_im.shape[0], ix:ix + char_im.shape[1]] = char_im
187 |         x += char_im.shape[1] + spacing
188 | 
189 |     plate = (numpy.ones(out_shape) * plate_color * (1. - text_mask) +
190 |              numpy.ones(out_shape) * text_color * text_mask)
191 | 
192 |     return plate, rounded_rect(out_shape, radius), code.replace(" ", "")
193 | 
194 | 
195 | def generate_bg(num_bg_images):
196 |     found = False
197 |     while not found:
198 |         fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1))
199 |         bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255.
200 |         if (bg.shape[1] >= OUTPUT_SHAPE[1] and
201 |             bg.shape[0] >= OUTPUT_SHAPE[0]):
202 |             found = True
203 | 
204 |     x = random.randint(0, bg.shape[1] - OUTPUT_SHAPE[1])
205 |     y = random.randint(0, bg.shape[0] - OUTPUT_SHAPE[0])
206 |     bg = bg[y:y + OUTPUT_SHAPE[0], x:x + OUTPUT_SHAPE[1]]
207 | 
208 |     return bg
209 | 
210 | 
211 | def generate_im(char_ims, num_bg_images):
212 |     bg = generate_bg(num_bg_images)
213 | 
214 |     plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims)
215 |     
216 |     M, out_of_bounds = make_affine_transform(
217 |                             from_shape=plate.shape,
218 |                             to_shape=bg.shape,
219 |                             min_scale=0.6,
220 |                             max_scale=0.875,
221 |                             rotation_variation=1.0,
222 |                             scale_variation=1.5,
223 |                             translation_variation=1.2)
224 |     plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0]))
225 |     plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0]))
226 | 
227 |     out = plate * plate_mask + bg * (1.0 - plate_mask)
228 | 
229 |     out = cv2.resize(out, (OUTPUT_SHAPE[1], OUTPUT_SHAPE[0]))
230 | 
231 |     out += numpy.random.normal(scale=0.05, size=out.shape)
232 |     out = numpy.clip(out, 0., 1.)
233 | 
234 |     return out, code, not out_of_bounds
235 | 
236 | 
237 | def load_fonts(folder_path):
238 |     font_char_ims = {}
239 |     fonts = [f for f in os.listdir(folder_path) if f.endswith('.ttf')]
240 |     for font in fonts:
241 |         font_char_ims[font] = dict(make_char_ims(os.path.join(folder_path,
242 |                                                               font),
243 |                                                  FONT_HEIGHT))
244 |     return fonts, font_char_ims
245 | 
246 | 
247 | def generate_ims():
248 |     """
249 |     Generate number plate images.
250 | 
251 |     :return:
252 |         Iterable of number plate images.
253 | 
254 |     """
255 |     variation = 1.0
256 |     fonts, font_char_ims = load_fonts(FONT_DIR)
257 |     num_bg_images = len(os.listdir("bgs"))
258 |     while True:
259 |         yield generate_im(font_char_ims[random.choice(fonts)], num_bg_images)
260 | 
261 | 
262 | if __name__ == "__main__":
263 |     os.mkdir("test")
264 |     im_gen = itertools.islice(generate_ims(), int(sys.argv[1]))
265 |     for img_idx, (im, c, p) in enumerate(im_gen):
266 |         fname = "test/{:08d}_{}_{}.png".format(img_idx, c,
267 |                                                "1" if p else "0")
268 |         print fname
269 |         cv2.imwrite(fname, im * 255.)
270 | 
271 | 


--------------------------------------------------------------------------------