├── LICENSE
├── common.py
├── vis.py
├── README.md
├── extractbgs.py
├── model.py
├── detect.py
├── train.py
└── gen.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 matthewearl
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Matthew Earl
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | # 
10 | #     The above copyright notice and this permission notice shall be included
11 | #     in all copies or substantial portions of the Software.
12 | # 
13 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
16 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
20 | 
21 | """
22 | Definitions that don't fit elsewhere.
23 | 
24 | """
25 | 
26 | __all__ = (
27 |     'DIGITS',
28 |     'LETTERS',
29 |     'CHARS',
30 |     'sigmoid',
31 |     'softmax',
32 | )
33 | 
34 | import numpy
35 | 
36 | 
37 | DIGITS = "0123456789"
38 | LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
39 | CHARS = LETTERS + DIGITS
40 | 
41 | def softmax(a):
42 |     exps = numpy.exp(a.astype(numpy.float64))
43 |     return exps / numpy.sum(exps, axis=-1)[:, numpy.newaxis]
44 | 
45 | def sigmoid(a):
46 |   return 1. / (1. + numpy.exp(-a))
47 | 
48 | 


--------------------------------------------------------------------------------
/vis.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Matthew Earl
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | # 
10 | #     The above copyright notice and this permission notice shall be included
11 | #     in all copies or substantial portions of the Software.
12 | # 
13 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
16 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
20 | 
21 | import sys
22 | 
23 | import matplotlib.pyplot as plt
24 | import numpy
25 | 
26 | a = numpy.load(sys.argv[1])
27 | 
28 | conv1 = a['arr_0']
29 | 
30 | fig, ax = plt.subplots(8, 8,
31 |                        figsize=(8, 8),
32 |                        dpi=100,
33 |                        squeeze=False)
34 | 
35 | """
36 | for i in range(conv1.shape[3]):
37 |     ax[i // 8, i % 8].imshow(conv1[:, :, 0, i], cmap='Greys')
38 |     
39 | """
40 | conv2 = a['arr_2']
41 | for i in range(min(8, conv2.shape[3])):
42 |     for j in range(min(8, conv2.shape[2])):
43 |         ax[j, i].imshow(conv2[:, :, j, i], cmap='Greys')
44 | 
45 | fig.savefig(sys.argv[2], dpi=30.)
46 | 
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep ANPR
 2 | 
 3 | Using neural networks to build an automatic number plate recognition system.
 4 | See [this blog post](http://matthewearl.github.io/2016/05/06/cnn-anpr/) for an
 5 | explanation.
 6 | 
 7 | **Note: This is an experimental project and is incomplete in a number of ways,
 8 | if you're looking for a practical number plate recognition system this project
 9 | is not for you.** If however you've read the above blog post and wish to tinker
10 | with the code, read on.  If you're really keen you can tackle some of the
11 | enhancements on the Issues page to help make this project more practical.
12 | Please comment on the relevant issue if you plan on making an enhancement and
13 | we can talk through the potential solution.
14 | 
15 | Usage is as follows:
16 | 
17 | 1. `./extractbgs.py SUN397.tar.gz`: Extract ~3GB of background images from the [SUN database](http://groups.csail.mit.edu/vision/SUN/)
18 |    into `bgs/`. (`bgs/` must not already exist.) The tar file (36GB) can be [downloaded here](http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz).
19 |    This step may take a while as it will extract 108,634 images.
20 | 
21 | 2. `./gen.py 1000`: Generate 1000 test set images in `test/`. (`test/` must not
22 |     already exist.) This step requires `UKNumberPlate.ttf` to be in the
23 |     `fonts/` directory, which can be
24 |     [downloaded here](http://www.dafont.com/uk-number-plate.font).
25 | 
26 | 3. `./train.py`: Train the model. A GPU is recommended for this step. It will
27 |    take around 100,000 batches to converge. When you're satisfied that the
28 |    network has learned enough press `Ctrl+C` and the process will write the
29 |    weights to `weights.npz` and return.
30 | 
31 | 4. `./detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image.
32 | 
33 | The project has the following dependencies:
34 | 
35 | * [TensorFlow](https://tensorflow.org)
36 | * OpenCV
37 | * NumPy
38 | 
39 | Different typefaces can be put in `fonts/` in order to match different type
40 | faces.  With a large enough variety the network will learn to generalize and
41 | will match as yet unseen typefaces. See
42 | [#1](https://github.com/matthewearl/deep-anpr/issues/1) for more information.
43 | 
44 | 


--------------------------------------------------------------------------------
/extractbgs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Copyright (c) 2016 Matthew Earl
 4 | # 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | # 
12 | #     The above copyright notice and this permission notice shall be included
13 | #     in all copies or substantial portions of the Software.
14 | # 
15 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
18 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | 
25 | """
26 | Extract background images from a tar archive.
27 | 
28 | """
29 | 
30 | 
31 | __all__ = (
32 |     'extract_backgrounds',
33 | )
34 | 
35 | 
36 | import os
37 | import sys
38 | import tarfile
39 | 
40 | import cv2
41 | import numpy
42 | 
43 | 
44 | def im_from_file(f):
45 |     a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8)
46 |     return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE)
47 | 
48 | 
49 | def extract_backgrounds(archive_name):
50 |     """
51 |     Extract backgrounds from provided tar archive.
52 | 
53 |     JPEGs from the archive are converted into grayscale, and cropped/resized to
54 |     256x256, and saved in ./bgs/.
55 | 
56 |     :param archive_name:
57 |         Name of the .tar file containing JPEGs of background images.
58 | 
59 |     """
60 |     os.mkdir("bgs")
61 | 
62 |     t = tarfile.open(name=archive_name)
63 | 
64 |     def members():
65 |         m = t.next()
66 |         while m:
67 |             yield m
68 |             m = t.next()
69 |     index = 0
70 |     for m in members():
71 |         if not m.name.endswith(".jpg"):
72 |             continue
73 |         f =  t.extractfile(m)
74 |         try:
75 |             im = im_from_file(f)
76 |         finally:
77 |             f.close()
78 |         if im is None:
79 |             continue
80 |         
81 |         if im.shape[0] > im.shape[1]:
82 |             im = im[:im.shape[1], :]
83 |         else:
84 |             im = im[:, :im.shape[0]]
85 |         if im.shape[0] > 256:
86 |             im = cv2.resize(im, (256, 256))
87 |         fname = "bgs/{:08}.jpg".format(index)
88 |         print fname
89 |         rc = cv2.imwrite(fname, im)
90 |         if not rc:
91 |             raise Exception("Failed to write file {}".format(fname))
92 |         index += 1
93 | 
94 | 
95 | if __name__ == "__main__":
96 | 
97 |     extract_backgrounds(sys.argv[1])
98 | 
99 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016 Matthew Earl
  2 | # 
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to deal
  5 | # in the Software without restriction, including without limitation the rights
  6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | # copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | # 
 10 | #     The above copyright notice and this permission notice shall be included
 11 | #     in all copies or substantial portions of the Software.
 12 | # 
 13 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 14 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 15 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 16 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 17 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 18 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 19 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
 20 | 
 21 | 
 22 | """
 23 | Definition of the neural networks. 
 24 | 
 25 | """
 26 | 
 27 | 
 28 | __all__ = (
 29 |     'get_training_model',
 30 |     'get_detect_model',
 31 |     'WINDOW_SHAPE',
 32 | )
 33 | 
 34 | 
 35 | import tensorflow as tf
 36 | 
 37 | import common
 38 | 
 39 | 
 40 | WINDOW_SHAPE = (64, 128)
 41 | 
 42 | 
 43 | # Utility functions
 44 | def weight_variable(shape):
 45 |   initial = tf.truncated_normal(shape, stddev=0.1)
 46 |   return tf.Variable(initial)
 47 | 
 48 | 
 49 | def bias_variable(shape):
 50 |   initial = tf.constant(0.1, shape=shape)
 51 |   return tf.Variable(initial)
 52 | 
 53 | 
 54 | def conv2d(x, W, stride=(1, 1), padding='SAME'):
 55 |   return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1],
 56 |                       padding=padding)
 57 | 
 58 | 
 59 | def max_pool(x, ksize=(2, 2), stride=(2, 2)):
 60 |   return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1],
 61 |                         strides=[1, stride[0], stride[1], 1], padding='SAME')
 62 | 
 63 | 
 64 | def avg_pool(x, ksize=(2, 2), stride=(2, 2)):
 65 |   return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1],
 66 |                         strides=[1, stride[0], stride[1], 1], padding='SAME')
 67 | 
 68 | 
 69 | def convolutional_layers():
 70 |     """
 71 |     Get the convolutional layers of the model.
 72 | 
 73 |     """
 74 |     x = tf.placeholder(tf.float32, [None, None, None])
 75 | 
 76 |     # First layer
 77 |     W_conv1 = weight_variable([5, 5, 1, 48])
 78 |     b_conv1 = bias_variable([48])
 79 |     x_expanded = tf.expand_dims(x, 3)
 80 |     h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1)
 81 |     h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2))
 82 | 
 83 |     # Second layer
 84 |     W_conv2 = weight_variable([5, 5, 48, 64])
 85 |     b_conv2 = bias_variable([64])
 86 | 
 87 |     h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
 88 |     h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1))
 89 | 
 90 |     # Third layer
 91 |     W_conv3 = weight_variable([5, 5, 64, 128])
 92 |     b_conv3 = bias_variable([128])
 93 | 
 94 |     h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
 95 |     h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2))
 96 | 
 97 |     return x, h_pool3, [W_conv1, b_conv1,
 98 |                         W_conv2, b_conv2,
 99 |                         W_conv3, b_conv3]
100 | 
101 | 
102 | def get_training_model():
103 |     """
104 |     The training model acts on a batch of 128x64 windows, and outputs a (1 +
105 |     7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is
106 |     fully within the image and is at the correct scale.
107 |     
108 |     `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th
109 |     character is `c`.
110 | 
111 |     """
112 |     x, conv_layer, conv_vars = convolutional_layers()
113 |     
114 |     # Densely connected layer
115 |     W_fc1 = weight_variable([32 * 8 * 128, 2048])
116 |     b_fc1 = bias_variable([2048])
117 | 
118 |     conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128])
119 |     h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)
120 | 
121 |     # Output layer
122 |     W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
123 |     b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
124 | 
125 |     y = tf.matmul(h_fc1, W_fc2) + b_fc2
126 | 
127 |     return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
128 | 
129 | 
130 | def get_detect_model():
131 |     """
132 |     The same as the training model, except it acts on an arbitrarily sized
133 |     input, and slides the 128x64 window across the image in 8x8 strides.
134 | 
135 |     The output is of the form `v`, where `v[i, j]` is equivalent to the output
136 |     of the training model, for the window at coordinates `(8 * i, 4 * j)`.
137 | 
138 |     """
139 |     x, conv_layer, conv_vars = convolutional_layers()
140 |     
141 |     # Fourth layer
142 |     W_fc1 = weight_variable([8 * 32 * 128, 2048])
143 |     W_conv1 = tf.reshape(W_fc1, [8,  32, 128, 2048])
144 |     b_fc1 = bias_variable([2048])
145 |     h_conv1 = tf.nn.relu(conv2d(conv_layer, W_conv1,
146 |                                 stride=(1, 1), padding="VALID") + b_fc1) 
147 |     # Fifth layer
148 |     W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
149 |     W_conv2 = tf.reshape(W_fc2, [1, 1, 2048, 1 + 7 * len(common.CHARS)])
150 |     b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
151 |     h_conv2 = conv2d(h_conv1, W_conv2) + b_fc2
152 | 
153 |     return (x, h_conv2, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
154 | 
155 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (c) 2016 Matthew Earl
  4 | # 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | # 
 12 | #     The above copyright notice and this permission notice shall be included
 13 | #     in all copies or substantial portions of the Software.
 14 | # 
 15 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 18 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 19 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 20 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 21 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | 
 24 | """
 25 | Routines to detect number plates.
 26 | 
 27 | Use `detect` to detect all bounding boxes, and use `post_process` on the output
 28 | of `detect` to filter using non-maximum suppression.
 29 | 
 30 | """
 31 | 
 32 | 
 33 | __all__ = (
 34 |     'detect',
 35 |     'post_process',
 36 | )
 37 | 
 38 | 
 39 | import collections
 40 | import itertools
 41 | import math
 42 | import sys
 43 | 
 44 | import cv2
 45 | import numpy
 46 | import tensorflow as tf
 47 | 
 48 | import common
 49 | import model
 50 | 
 51 | 
 52 | def make_scaled_ims(im, min_shape):
 53 |     ratio = 1. / 2 ** 0.5
 54 |     shape = (im.shape[0] / ratio, im.shape[1] / ratio)
 55 | 
 56 |     while True:
 57 |         shape = (int(shape[0] * ratio), int(shape[1] * ratio))
 58 |         if shape[0] < min_shape[0] or shape[1] < min_shape[1]:
 59 |             break
 60 |         yield cv2.resize(im, (shape[1], shape[0]))
 61 | 
 62 | 
 63 | def detect(im, param_vals):
 64 |     """
 65 |     Detect number plates in an image.
 66 | 
 67 |     :param im:
 68 |         Image to detect number plates in.
 69 | 
 70 |     :param param_vals:
 71 |         Model parameters to use. These are the parameters output by the `train`
 72 |         module.
 73 | 
 74 |     :returns:
 75 |         Iterable of `bbox_tl, bbox_br, letter_probs`, defining the bounding box
 76 |         top-left and bottom-right corners respectively, and a 7,36 matrix
 77 |         giving the probability distributions of each letter.
 78 | 
 79 |     """
 80 | 
 81 |     # Convert the image to various scales.
 82 |     scaled_ims = list(make_scaled_ims(im, model.WINDOW_SHAPE))
 83 | 
 84 |     # Load the model which detects number plates over a sliding window.
 85 |     x, y, params = model.get_detect_model()
 86 | 
 87 |     # Execute the model at each scale.
 88 |     with tf.Session(config=tf.ConfigProto()) as sess:
 89 |         y_vals = []
 90 |         for scaled_im in scaled_ims:
 91 |             feed_dict = {x: numpy.stack([scaled_im])}
 92 |             feed_dict.update(dict(zip(params, param_vals)))
 93 |             y_vals.append(sess.run(y, feed_dict=feed_dict))
 94 | 
 95 |     # Interpret the results in terms of bounding boxes in the input image.
 96 |     # Do this by identifying windows (at all scales) where the model predicts a
 97 |     # number plate has a greater than 50% probability of appearing.
 98 |     #
 99 |     # To obtain pixel coordinates, the window coordinates are scaled according
100 |     # to the stride size, and pixel coordinates.
101 |     for i, (scaled_im, y_val) in enumerate(zip(scaled_ims, y_vals)):
102 |         for window_coords in numpy.argwhere(y_val[0, :, :, 0] >
103 |                                                        -math.log(1./0.99 - 1)):
104 |             letter_probs = (y_val[0,
105 |                                   window_coords[0],
106 |                                   window_coords[1], 1:].reshape(
107 |                                     7, len(common.CHARS)))
108 |             letter_probs = common.softmax(letter_probs)
109 | 
110 |             img_scale = float(im.shape[0]) / scaled_im.shape[0]
111 | 
112 |             bbox_tl = window_coords * (8, 4) * img_scale
113 |             bbox_size = numpy.array(model.WINDOW_SHAPE) * img_scale
114 | 
115 |             present_prob = common.sigmoid(
116 |                                y_val[0, window_coords[0], window_coords[1], 0])
117 | 
118 |             yield bbox_tl, bbox_tl + bbox_size, present_prob, letter_probs
119 | 
120 | 
121 | def _overlaps(match1, match2):
122 |     bbox_tl1, bbox_br1, _, _ = match1
123 |     bbox_tl2, bbox_br2, _, _ = match2
124 |     return (bbox_br1[0] > bbox_tl2[0] and
125 |             bbox_br2[0] > bbox_tl1[0] and
126 |             bbox_br1[1] > bbox_tl2[1] and
127 |             bbox_br2[1] > bbox_tl1[1])
128 | 
129 | 
130 | def _group_overlapping_rectangles(matches):
131 |     matches = list(matches)
132 |     num_groups = 0
133 |     match_to_group = {}
134 |     for idx1 in range(len(matches)):
135 |         for idx2 in range(idx1):
136 |             if _overlaps(matches[idx1], matches[idx2]):
137 |                 match_to_group[idx1] = match_to_group[idx2]
138 |                 break
139 |         else:
140 |             match_to_group[idx1] = num_groups 
141 |             num_groups += 1
142 | 
143 |     groups = collections.defaultdict(list)
144 |     for idx, group in match_to_group.items():
145 |         groups[group].append(matches[idx])
146 | 
147 |     return groups
148 | 
149 | 
150 | def post_process(matches):
151 |     """
152 |     Take an iterable of matches as returned by `detect` and merge duplicates.
153 | 
154 |     Merging consists of two steps:
155 |       - Finding sets of overlapping rectangles.
156 |       - Finding the intersection of those sets, along with the code
157 |         corresponding with the rectangle with the highest presence parameter.
158 | 
159 |     """
160 |     groups = _group_overlapping_rectangles(matches)
161 | 
162 |     for group_matches in groups.values():
163 |         mins = numpy.stack(numpy.array(m[0]) for m in group_matches)
164 |         maxs = numpy.stack(numpy.array(m[1]) for m in group_matches)
165 |         present_probs = numpy.array([m[2] for m in group_matches])
166 |         letter_probs = numpy.stack(m[3] for m in group_matches)
167 | 
168 |         yield (numpy.max(mins, axis=0).flatten(),
169 |                numpy.min(maxs, axis=0).flatten(),
170 |                numpy.max(present_probs),
171 |                letter_probs[numpy.argmax(present_probs)])
172 | 
173 | 
174 | def letter_probs_to_code(letter_probs):
175 |     return "".join(common.CHARS[i] for i in numpy.argmax(letter_probs, axis=1))
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     im = cv2.imread(sys.argv[1])
180 |     im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) / 255.
181 | 
182 |     f = numpy.load(sys.argv[2])
183 |     param_vals = [f[n] for n in sorted(f.files, key=lambda s: int(s[4:]))]
184 | 
185 |     for pt1, pt2, present_prob, letter_probs in post_process(
186 |                                                   detect(im_gray, param_vals)):
187 |         pt1 = tuple(reversed(map(int, pt1)))
188 |         pt2 = tuple(reversed(map(int, pt2)))
189 | 
190 |         code = letter_probs_to_code(letter_probs)
191 | 
192 |         color = (0.0, 255.0, 0.0)
193 |         cv2.rectangle(im, pt1, pt2, color)
194 | 
195 |         cv2.putText(im,
196 |                     code,
197 |                     pt1,
198 |                     cv2.FONT_HERSHEY_PLAIN, 
199 |                     1.5,
200 |                     (0, 0, 0),
201 |                     thickness=5)
202 | 
203 |         cv2.putText(im,
204 |                     code,
205 |                     pt1,
206 |                     cv2.FONT_HERSHEY_PLAIN, 
207 |                     1.5,
208 |                     (255, 255, 255),
209 |                     thickness=2)
210 | 
211 |     cv2.imwrite(sys.argv[3], im)
212 | 
213 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (c) 2016 Matthew Earl
  4 | # 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | # 
 12 | #     The above copyright notice and this permission notice shall be included
 13 | #     in all copies or substantial portions of the Software.
 14 | # 
 15 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 18 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 19 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 20 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 21 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | 
 24 | """
 25 | Routines for training the network.
 26 | 
 27 | """
 28 | 
 29 | 
 30 | __all__ = (
 31 |     'train',
 32 | )
 33 | 
 34 | 
 35 | import functools
 36 | import glob
 37 | import itertools
 38 | import multiprocessing
 39 | import random
 40 | import sys
 41 | import time
 42 | 
 43 | import cv2
 44 | import numpy
 45 | import tensorflow as tf
 46 | 
 47 | import common
 48 | import gen
 49 | import model
 50 | 
 51 | 
 52 | def code_to_vec(p, code):
 53 |     def char_to_vec(c):
 54 |         y = numpy.zeros((len(common.CHARS),))
 55 |         y[common.CHARS.index(c)] = 1.0
 56 |         return y
 57 | 
 58 |     c = numpy.vstack([char_to_vec(c) for c in code])
 59 | 
 60 |     return numpy.concatenate([[1. if p else 0], c.flatten()])
 61 | 
 62 | 
 63 | def read_data(img_glob):
 64 |     for fname in sorted(glob.glob(img_glob)):
 65 |         im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255.
 66 |         code = fname.split("/")[1][9:16]
 67 |         p = fname.split("/")[1][17] == '1'
 68 |         yield im, code_to_vec(p, code)
 69 | 
 70 | 
 71 | def unzip(b):
 72 |     xs, ys = zip(*b)
 73 |     xs = numpy.array(xs)
 74 |     ys = numpy.array(ys)
 75 |     return xs, ys
 76 | 
 77 | 
 78 | def batch(it, batch_size):
 79 |     out = []
 80 |     for x in it:
 81 |         out.append(x)
 82 |         if len(out) == batch_size:
 83 |             yield out
 84 |             out = []
 85 |     if out:
 86 |         yield out
 87 | 
 88 | 
 89 | def mpgen(f):
 90 |     def main(q, args, kwargs):
 91 |         try:
 92 |             for item in f(*args, **kwargs):
 93 |                 q.put(item)
 94 |         finally:
 95 |             q.close()
 96 | 
 97 |     @functools.wraps(f)
 98 |     def wrapped(*args, **kwargs):
 99 |         q = multiprocessing.Queue(3) 
100 |         proc = multiprocessing.Process(target=main,
101 |                                        args=(q, args, kwargs))
102 |         proc.start()
103 |         try:
104 |             while True:
105 |                 item = q.get()
106 |                 yield item
107 |         finally:
108 |             proc.terminate()
109 |             proc.join()
110 | 
111 |     return wrapped
112 |         
113 | 
114 | @mpgen
115 | def read_batches(batch_size):
116 |     g = gen.generate_ims()
117 |     def gen_vecs():
118 |         for im, c, p in itertools.islice(g, batch_size):
119 |             yield im, code_to_vec(p, c)
120 | 
121 |     while True:
122 |         yield unzip(gen_vecs())
123 | 
124 | 
125 | def get_loss(y, y_):
126 |     # Calculate the loss from digits being incorrect.  Don't count loss from
127 |     # digits that are in non-present plates.
128 |     digits_loss = tf.nn.softmax_cross_entropy_with_logits(
129 |                                           tf.reshape(y[:, 1:],
130 |                                                      [-1, len(common.CHARS)]),
131 |                                           tf.reshape(y_[:, 1:],
132 |                                                      [-1, len(common.CHARS)]))
133 |     digits_loss = tf.reshape(digits_loss, [-1, 7])
134 |     digits_loss = tf.reduce_sum(digits_loss, 1)
135 |     digits_loss *= (y_[:, 0] != 0)
136 |     digits_loss = tf.reduce_sum(digits_loss)
137 | 
138 |     # Calculate the loss from presence indicator being wrong.
139 |     presence_loss = tf.nn.sigmoid_cross_entropy_with_logits(
140 |                                                           y[:, :1], y_[:, :1])
141 |     presence_loss = 7 * tf.reduce_sum(presence_loss)
142 | 
143 |     return digits_loss, presence_loss, digits_loss + presence_loss
144 | 
145 | 
146 | def train(learn_rate, report_steps, batch_size, initial_weights=None):
147 |     """
148 |     Train the network.
149 | 
150 |     The function operates interactively: Progress is reported on stdout, and
151 |     training ceases upon `KeyboardInterrupt` at which point the learned weights
152 |     are saved to `weights.npz`, and also returned.
153 | 
154 |     :param learn_rate:
155 |         Learning rate to use.
156 | 
157 |     :param report_steps:
158 |         Every `report_steps` batches a progress report is printed.
159 | 
160 |     :param batch_size:
161 |         The size of the batches used for training.
162 | 
163 |     :param initial_weights:
164 |         (Optional.) Weights to initialize the network with.
165 | 
166 |     :return:
167 |         The learned network weights.
168 | 
169 |     """
170 |     x, y, params = model.get_training_model()
171 | 
172 |     y_ = tf.placeholder(tf.float32, [None, 7 * len(common.CHARS) + 1])
173 | 
174 |     digits_loss, presence_loss, loss = get_loss(y, y_)
175 |     train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)
176 | 
177 |     best = tf.argmax(tf.reshape(y[:, 1:], [-1, 7, len(common.CHARS)]), 2)
178 |     correct = tf.argmax(tf.reshape(y_[:, 1:], [-1, 7, len(common.CHARS)]), 2)
179 | 
180 |     if initial_weights is not None:
181 |         assert len(params) == len(initial_weights)
182 |         assign_ops = [w.assign(v) for w, v in zip(params, initial_weights)]
183 | 
184 |     init = tf.initialize_all_variables()
185 | 
186 |     def vec_to_plate(v):
187 |         return "".join(common.CHARS[i] for i in v)
188 | 
189 |     def do_report():
190 |         r = sess.run([best,
191 |                       correct,
192 |                       tf.greater(y[:, 0], 0),
193 |                       y_[:, 0],
194 |                       digits_loss,
195 |                       presence_loss,
196 |                       loss],
197 |                      feed_dict={x: test_xs, y_: test_ys})
198 |         num_correct = numpy.sum(
199 |                         numpy.logical_or(
200 |                             numpy.all(r[0] == r[1], axis=1),
201 |                             numpy.logical_and(r[2] < 0.5,
202 |                                               r[3] < 0.5)))
203 |         r_short = (r[0][:190], r[1][:190], r[2][:190], r[3][:190])
204 |         for b, c, pb, pc in zip(*r_short):
205 |             print "{} {} <-> {} {}".format(vec_to_plate(c), pc,
206 |                                            vec_to_plate(b), float(pb))
207 |         num_p_correct = numpy.sum(r[2] == r[3])
208 | 
209 |         print ("B{:3d} {:2.02f}% {:02.02f}% loss: {} "
210 |                "(digits: {}, presence: {}) |{}|").format(
211 |             batch_idx,
212 |             100. * num_correct / (len(r[0])),
213 |             100. * num_p_correct / len(r[2]),
214 |             r[6],
215 |             r[4],
216 |             r[5],
217 |             "".join("X "[numpy.array_equal(b, c) or (not pb and not pc)]
218 |                                            for b, c, pb, pc in zip(*r_short)))
219 | 
220 |     def do_batch():
221 |         sess.run(train_step,
222 |                  feed_dict={x: batch_xs, y_: batch_ys})
223 |         if batch_idx % report_steps == 0:
224 |             do_report()
225 | 
226 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
227 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
228 |         sess.run(init)
229 |         if initial_weights is not None:
230 |             sess.run(assign_ops)
231 | 
232 |         test_xs, test_ys = unzip(list(read_data("test/*.png"))[:50])
233 | 
234 |         try:
235 |             last_batch_idx = 0
236 |             last_batch_time = time.time()
237 |             batch_iter = enumerate(read_batches(batch_size))
238 |             for batch_idx, (batch_xs, batch_ys) in batch_iter:
239 |                 do_batch()
240 |                 if batch_idx % report_steps == 0:
241 |                     batch_time = time.time()
242 |                     if last_batch_idx != batch_idx:
243 |                         print "time for 60 batches {}".format(
244 |                             60 * (last_batch_time - batch_time) /
245 |                                             (last_batch_idx - batch_idx))
246 |                         last_batch_idx = batch_idx
247 |                         last_batch_time = batch_time
248 | 
249 |         except KeyboardInterrupt:
250 |             last_weights = [p.eval() for p in params]
251 |             numpy.savez("weights.npz", *last_weights)
252 |             return last_weights
253 | 
254 | 
255 | if __name__ == "__main__":
256 |     if len(sys.argv) > 1:
257 |         f = numpy.load(sys.argv[1])
258 |         initial_weights = [f[n] for n in sorted(f.files,
259 |                                                 key=lambda s: int(s[4:]))]
260 |     else:
261 |         initial_weights = None
262 | 
263 |     train(learn_rate=0.001,
264 |           report_steps=20,
265 |           batch_size=50,
266 |           initial_weights=initial_weights)
267 | 
268 | 


--------------------------------------------------------------------------------
/gen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (c) 2016 Matthew Earl
  4 | # 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | # 
 12 | #     The above copyright notice and this permission notice shall be included
 13 | #     in all copies or substantial portions of the Software.
 14 | # 
 15 | #     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | #     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | #     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 18 | #     NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 19 | #     DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 20 | #     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 21 | #     USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | 
 24 | 
 25 | """
 26 | Generate training and test images.
 27 | 
 28 | """
 29 | 
 30 | 
 31 | __all__ = (
 32 |     'generate_ims',
 33 | )
 34 | 
 35 | 
 36 | import itertools
 37 | import math
 38 | import os
 39 | import random
 40 | import sys
 41 | 
 42 | import cv2
 43 | import numpy
 44 | 
 45 | from PIL import Image
 46 | from PIL import ImageDraw
 47 | from PIL import ImageFont
 48 | 
 49 | import common
 50 | 
 51 | FONT_DIR = "./fonts"
 52 | FONT_HEIGHT = 32  # Pixel size to which the chars are resized
 53 | 
 54 | OUTPUT_SHAPE = (64, 128)
 55 | 
 56 | CHARS = common.CHARS + " "
 57 | 
 58 | 
 59 | def make_char_ims(font_path, output_height):
 60 |     font_size = output_height * 4
 61 | 
 62 |     font = ImageFont.truetype(font_path, font_size)
 63 | 
 64 |     height = max(font.getsize(c)[1] for c in CHARS)
 65 | 
 66 |     for c in CHARS:
 67 |         width = font.getsize(c)[0]
 68 |         im = Image.new("RGBA", (width, height), (0, 0, 0))
 69 | 
 70 |         draw = ImageDraw.Draw(im)
 71 |         draw.text((0, 0), c, (255, 255, 255), font=font)
 72 |         scale = float(output_height) / height
 73 |         im = im.resize((int(width * scale), output_height), Image.ANTIALIAS)
 74 |         yield c, numpy.array(im)[:, :, 0].astype(numpy.float32) / 255.
 75 | 
 76 | 
 77 | def euler_to_mat(yaw, pitch, roll):
 78 |     # Rotate clockwise about the Y-axis
 79 |     c, s = math.cos(yaw), math.sin(yaw)
 80 |     M = numpy.matrix([[  c, 0.,  s],
 81 |                       [ 0., 1., 0.],
 82 |                       [ -s, 0.,  c]])
 83 | 
 84 |     # Rotate clockwise about the X-axis
 85 |     c, s = math.cos(pitch), math.sin(pitch)
 86 |     M = numpy.matrix([[ 1., 0., 0.],
 87 |                       [ 0.,  c, -s],
 88 |                       [ 0.,  s,  c]]) * M
 89 | 
 90 |     # Rotate clockwise about the Z-axis
 91 |     c, s = math.cos(roll), math.sin(roll)
 92 |     M = numpy.matrix([[  c, -s, 0.],
 93 |                       [  s,  c, 0.],
 94 |                       [ 0., 0., 1.]]) * M
 95 | 
 96 |     return M
 97 | 
 98 | 
 99 | def pick_colors():
100 |     first = True
101 |     while first or plate_color - text_color < 0.3:
102 |         text_color = random.random()
103 |         plate_color = random.random()
104 |         if text_color > plate_color:
105 |             text_color, plate_color = plate_color, text_color
106 |         first = False
107 |     return text_color, plate_color
108 | 
109 | 
110 | def make_affine_transform(from_shape, to_shape, 
111 |                           min_scale, max_scale,
112 |                           scale_variation=1.0,
113 |                           rotation_variation=1.0,
114 |                           translation_variation=1.0):
115 |     out_of_bounds = False
116 | 
117 |     from_size = numpy.array([[from_shape[1], from_shape[0]]]).T
118 |     to_size = numpy.array([[to_shape[1], to_shape[0]]]).T
119 | 
120 |     scale = random.uniform((min_scale + max_scale) * 0.5 -
121 |                            (max_scale - min_scale) * 0.5 * scale_variation,
122 |                            (min_scale + max_scale) * 0.5 +
123 |                            (max_scale - min_scale) * 0.5 * scale_variation)
124 |     if scale > max_scale or scale < min_scale:
125 |         out_of_bounds = True
126 |     roll = random.uniform(-0.3, 0.3) * rotation_variation
127 |     pitch = random.uniform(-0.2, 0.2) * rotation_variation
128 |     yaw = random.uniform(-1.2, 1.2) * rotation_variation
129 | 
130 |     # Compute a bounding box on the skewed input image (`from_shape`).
131 |     M = euler_to_mat(yaw, pitch, roll)[:2, :2]
132 |     h, w = from_shape
133 |     corners = numpy.matrix([[-w, +w, -w, +w],
134 |                             [-h, -h, +h, +h]]) * 0.5
135 |     skewed_size = numpy.array(numpy.max(M * corners, axis=1) -
136 |                               numpy.min(M * corners, axis=1))
137 | 
138 |     # Set the scale as large as possible such that the skewed and scaled shape
139 |     # is less than or equal to the desired ratio in either dimension.
140 |     scale *= numpy.min(to_size / skewed_size)
141 | 
142 |     # Set the translation such that the skewed and scaled image falls within
143 |     # the output shape's bounds.
144 |     trans = (numpy.random.random((2,1)) - 0.5) * translation_variation
145 |     trans = ((2.0 * trans) ** 5.0) / 2.0
146 |     if numpy.any(trans < -0.5) or numpy.any(trans > 0.5):
147 |         out_of_bounds = True
148 |     trans = (to_size - skewed_size * scale) * trans
149 | 
150 |     center_to = to_size / 2.
151 |     center_from = from_size / 2.
152 | 
153 |     M = euler_to_mat(yaw, pitch, roll)[:2, :2]
154 |     M *= scale
155 |     M = numpy.hstack([M, trans + center_to - M * center_from])
156 | 
157 |     return M, out_of_bounds
158 | 
159 | 
160 | def generate_code():
161 |     return "{}{}{}{} {}{}{}".format(
162 |         random.choice(common.LETTERS),
163 |         random.choice(common.LETTERS),
164 |         random.choice(common.DIGITS),
165 |         random.choice(common.DIGITS),
166 |         random.choice(common.LETTERS),
167 |         random.choice(common.LETTERS),
168 |         random.choice(common.LETTERS))
169 | 
170 | 
171 | def rounded_rect(shape, radius):
172 |     out = numpy.ones(shape)
173 |     out[:radius, :radius] = 0.0
174 |     out[-radius:, :radius] = 0.0
175 |     out[:radius, -radius:] = 0.0
176 |     out[-radius:, -radius:] = 0.0
177 | 
178 |     cv2.circle(out, (radius, radius), radius, 1.0, -1)
179 |     cv2.circle(out, (radius, shape[0] - radius), radius, 1.0, -1)
180 |     cv2.circle(out, (shape[1] - radius, radius), radius, 1.0, -1)
181 |     cv2.circle(out, (shape[1] - radius, shape[0] - radius), radius, 1.0, -1)
182 | 
183 |     return out
184 | 
185 | 
186 | def generate_plate(font_height, char_ims):
187 |     h_padding = random.uniform(0.2, 0.4) * font_height
188 |     v_padding = random.uniform(0.1, 0.3) * font_height
189 |     spacing = font_height * random.uniform(-0.05, 0.05)
190 |     radius = 1 + int(font_height * 0.1 * random.random())
191 | 
192 |     code = generate_code()
193 |     text_width = sum(char_ims[c].shape[1] for c in code)
194 |     text_width += (len(code) - 1) * spacing
195 | 
196 |     out_shape = (int(font_height + v_padding * 2),
197 |                  int(text_width + h_padding * 2))
198 | 
199 |     text_color, plate_color = pick_colors()
200 |     
201 |     text_mask = numpy.zeros(out_shape)
202 |     
203 |     x = h_padding
204 |     y = v_padding 
205 |     for c in code:
206 |         char_im = char_ims[c]
207 |         ix, iy = int(x), int(y)
208 |         text_mask[iy:iy + char_im.shape[0], ix:ix + char_im.shape[1]] = char_im
209 |         x += char_im.shape[1] + spacing
210 | 
211 |     plate = (numpy.ones(out_shape) * plate_color * (1. - text_mask) +
212 |              numpy.ones(out_shape) * text_color * text_mask)
213 | 
214 |     return plate, rounded_rect(out_shape, radius), code.replace(" ", "")
215 | 
216 | 
217 | def generate_bg(num_bg_images):
218 |     found = False
219 |     while not found:
220 |         fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1))
221 |         bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255.
222 |         if (bg.shape[1] >= OUTPUT_SHAPE[1] and
223 |             bg.shape[0] >= OUTPUT_SHAPE[0]):
224 |             found = True
225 | 
226 |     x = random.randint(0, bg.shape[1] - OUTPUT_SHAPE[1])
227 |     y = random.randint(0, bg.shape[0] - OUTPUT_SHAPE[0])
228 |     bg = bg[y:y + OUTPUT_SHAPE[0], x:x + OUTPUT_SHAPE[1]]
229 | 
230 |     return bg
231 | 
232 | 
233 | def generate_im(char_ims, num_bg_images):
234 |     bg = generate_bg(num_bg_images)
235 | 
236 |     plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims)
237 |     
238 |     M, out_of_bounds = make_affine_transform(
239 |                             from_shape=plate.shape,
240 |                             to_shape=bg.shape,
241 |                             min_scale=0.6,
242 |                             max_scale=0.875,
243 |                             rotation_variation=1.0,
244 |                             scale_variation=1.5,
245 |                             translation_variation=1.2)
246 |     plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0]))
247 |     plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0]))
248 | 
249 |     out = plate * plate_mask + bg * (1.0 - plate_mask)
250 | 
251 |     out = cv2.resize(out, (OUTPUT_SHAPE[1], OUTPUT_SHAPE[0]))
252 | 
253 |     out += numpy.random.normal(scale=0.05, size=out.shape)
254 |     out = numpy.clip(out, 0., 1.)
255 | 
256 |     return out, code, not out_of_bounds
257 | 
258 | 
259 | def load_fonts(folder_path):
260 |     font_char_ims = {}
261 |     fonts = [f for f in os.listdir(folder_path) if f.endswith('.ttf')]
262 |     for font in fonts:
263 |         font_char_ims[font] = dict(make_char_ims(os.path.join(folder_path,
264 |                                                               font),
265 |                                                  FONT_HEIGHT))
266 |     return fonts, font_char_ims
267 | 
268 | 
269 | def generate_ims():
270 |     """
271 |     Generate number plate images.
272 | 
273 |     :return:
274 |         Iterable of number plate images.
275 | 
276 |     """
277 |     variation = 1.0
278 |     fonts, font_char_ims = load_fonts(FONT_DIR)
279 |     num_bg_images = len(os.listdir("bgs"))
280 |     while True:
281 |         yield generate_im(font_char_ims[random.choice(fonts)], num_bg_images)
282 | 
283 | 
284 | if __name__ == "__main__":
285 |     os.mkdir("test")
286 |     im_gen = itertools.islice(generate_ims(), int(sys.argv[1]))
287 |     for img_idx, (im, c, p) in enumerate(im_gen):
288 |         fname = "test/{:08d}_{}_{}.png".format(img_idx, c,
289 |                                                "1" if p else "0")
290 |         print fname
291 |         cv2.imwrite(fname, im * 255.)
292 | 
293 | 


--------------------------------------------------------------------------------