├── LICENSE ├── common.py ├── vis.py ├── README.md ├── extractbgs.py ├── model.py ├── detect.py ├── train.py └── gen.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 matthewearl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016 Matthew Earl 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included 11 | # in all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 15 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 16 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | """ 22 | Definitions that don't fit elsewhere. 23 | 24 | """ 25 | 26 | __all__ = ( 27 | 'DIGITS', 28 | 'LETTERS', 29 | 'CHARS', 30 | 'sigmoid', 31 | 'softmax', 32 | ) 33 | 34 | import numpy 35 | 36 | 37 | DIGITS = "0123456789" 38 | LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 39 | CHARS = LETTERS + DIGITS 40 | 41 | def softmax(a): 42 | exps = numpy.exp(a.astype(numpy.float64)) 43 | return exps / numpy.sum(exps, axis=-1)[:, numpy.newaxis] 44 | 45 | def sigmoid(a): 46 | return 1. / (1. + numpy.exp(-a)) 47 | 48 | -------------------------------------------------------------------------------- /vis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016 Matthew Earl 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included 11 | # in all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 15 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 16 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | import sys 22 | 23 | import matplotlib.pyplot as plt 24 | import numpy 25 | 26 | a = numpy.load(sys.argv[1]) 27 | 28 | conv1 = a['arr_0'] 29 | 30 | fig, ax = plt.subplots(8, 8, 31 | figsize=(8, 8), 32 | dpi=100, 33 | squeeze=False) 34 | 35 | """ 36 | for i in range(conv1.shape[3]): 37 | ax[i // 8, i % 8].imshow(conv1[:, :, 0, i], cmap='Greys') 38 | 39 | """ 40 | conv2 = a['arr_2'] 41 | for i in range(min(8, conv2.shape[3])): 42 | for j in range(min(8, conv2.shape[2])): 43 | ax[j, i].imshow(conv2[:, :, j, i], cmap='Greys') 44 | 45 | fig.savefig(sys.argv[2], dpi=30.) 46 | 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep ANPR 2 | 3 | Using neural networks to build an automatic number plate recognition system. 4 | See [this blog post](http://matthewearl.github.io/2016/05/06/cnn-anpr/) for an 5 | explanation. 6 | 7 | **Note: This is an experimental project and is incomplete in a number of ways, 8 | if you're looking for a practical number plate recognition system this project 9 | is not for you.** If however you've read the above blog post and wish to tinker 10 | with the code, read on. If you're really keen you can tackle some of the 11 | enhancements on the Issues page to help make this project more practical. 12 | Please comment on the relevant issue if you plan on making an enhancement and 13 | we can talk through the potential solution. 14 | 15 | Usage is as follows: 16 | 17 | 1. `./extractbgs.py SUN397.tar.gz`: Extract ~3GB of background images from the [SUN database](http://groups.csail.mit.edu/vision/SUN/) 18 | into `bgs/`. (`bgs/` must not already exist.) The tar file (36GB) can be [downloaded here](http://vision.princeton.edu/projects/2010/SUN/SUN397.tar.gz). 19 | This step may take a while as it will extract 108,634 images. 20 | 21 | 2. `./gen.py 1000`: Generate 1000 test set images in `test/`. (`test/` must not 22 | already exist.) This step requires `UKNumberPlate.ttf` to be in the 23 | `fonts/` directory, which can be 24 | [downloaded here](http://www.dafont.com/uk-number-plate.font). 25 | 26 | 3. `./train.py`: Train the model. A GPU is recommended for this step. It will 27 | take around 100,000 batches to converge. When you're satisfied that the 28 | network has learned enough press `Ctrl+C` and the process will write the 29 | weights to `weights.npz` and return. 30 | 31 | 4. `./detect.py in.jpg weights.npz out.jpg`: Detect number plates in an image. 32 | 33 | The project has the following dependencies: 34 | 35 | * [TensorFlow](https://tensorflow.org) 36 | * OpenCV 37 | * NumPy 38 | 39 | Different typefaces can be put in `fonts/` in order to match different type 40 | faces. With a large enough variety the network will learn to generalize and 41 | will match as yet unseen typefaces. See 42 | [#1](https://github.com/matthewearl/deep-anpr/issues/1) for more information. 43 | 44 | -------------------------------------------------------------------------------- /extractbgs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2016 Matthew Earl 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 18 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | 25 | """ 26 | Extract background images from a tar archive. 27 | 28 | """ 29 | 30 | 31 | __all__ = ( 32 | 'extract_backgrounds', 33 | ) 34 | 35 | 36 | import os 37 | import sys 38 | import tarfile 39 | 40 | import cv2 41 | import numpy 42 | 43 | 44 | def im_from_file(f): 45 | a = numpy.asarray(bytearray(f.read()), dtype=numpy.uint8) 46 | return cv2.imdecode(a, cv2.CV_LOAD_IMAGE_GRAYSCALE) 47 | 48 | 49 | def extract_backgrounds(archive_name): 50 | """ 51 | Extract backgrounds from provided tar archive. 52 | 53 | JPEGs from the archive are converted into grayscale, and cropped/resized to 54 | 256x256, and saved in ./bgs/. 55 | 56 | :param archive_name: 57 | Name of the .tar file containing JPEGs of background images. 58 | 59 | """ 60 | os.mkdir("bgs") 61 | 62 | t = tarfile.open(name=archive_name) 63 | 64 | def members(): 65 | m = t.next() 66 | while m: 67 | yield m 68 | m = t.next() 69 | index = 0 70 | for m in members(): 71 | if not m.name.endswith(".jpg"): 72 | continue 73 | f = t.extractfile(m) 74 | try: 75 | im = im_from_file(f) 76 | finally: 77 | f.close() 78 | if im is None: 79 | continue 80 | 81 | if im.shape[0] > im.shape[1]: 82 | im = im[:im.shape[1], :] 83 | else: 84 | im = im[:, :im.shape[0]] 85 | if im.shape[0] > 256: 86 | im = cv2.resize(im, (256, 256)) 87 | fname = "bgs/{:08}.jpg".format(index) 88 | print fname 89 | rc = cv2.imwrite(fname, im) 90 | if not rc: 91 | raise Exception("Failed to write file {}".format(fname)) 92 | index += 1 93 | 94 | 95 | if __name__ == "__main__": 96 | 97 | extract_backgrounds(sys.argv[1]) 98 | 99 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016 Matthew Earl 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included 11 | # in all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 15 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 16 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | 22 | """ 23 | Definition of the neural networks. 24 | 25 | """ 26 | 27 | 28 | __all__ = ( 29 | 'get_training_model', 30 | 'get_detect_model', 31 | 'WINDOW_SHAPE', 32 | ) 33 | 34 | 35 | import tensorflow as tf 36 | 37 | import common 38 | 39 | 40 | WINDOW_SHAPE = (64, 128) 41 | 42 | 43 | # Utility functions 44 | def weight_variable(shape): 45 | initial = tf.truncated_normal(shape, stddev=0.1) 46 | return tf.Variable(initial) 47 | 48 | 49 | def bias_variable(shape): 50 | initial = tf.constant(0.1, shape=shape) 51 | return tf.Variable(initial) 52 | 53 | 54 | def conv2d(x, W, stride=(1, 1), padding='SAME'): 55 | return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1], 56 | padding=padding) 57 | 58 | 59 | def max_pool(x, ksize=(2, 2), stride=(2, 2)): 60 | return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1], 61 | strides=[1, stride[0], stride[1], 1], padding='SAME') 62 | 63 | 64 | def avg_pool(x, ksize=(2, 2), stride=(2, 2)): 65 | return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1], 66 | strides=[1, stride[0], stride[1], 1], padding='SAME') 67 | 68 | 69 | def convolutional_layers(): 70 | """ 71 | Get the convolutional layers of the model. 72 | 73 | """ 74 | x = tf.placeholder(tf.float32, [None, None, None]) 75 | 76 | # First layer 77 | W_conv1 = weight_variable([5, 5, 1, 48]) 78 | b_conv1 = bias_variable([48]) 79 | x_expanded = tf.expand_dims(x, 3) 80 | h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1) 81 | h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2)) 82 | 83 | # Second layer 84 | W_conv2 = weight_variable([5, 5, 48, 64]) 85 | b_conv2 = bias_variable([64]) 86 | 87 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 88 | h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1)) 89 | 90 | # Third layer 91 | W_conv3 = weight_variable([5, 5, 64, 128]) 92 | b_conv3 = bias_variable([128]) 93 | 94 | h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) 95 | h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2)) 96 | 97 | return x, h_pool3, [W_conv1, b_conv1, 98 | W_conv2, b_conv2, 99 | W_conv3, b_conv3] 100 | 101 | 102 | def get_training_model(): 103 | """ 104 | The training model acts on a batch of 128x64 windows, and outputs a (1 + 105 | 7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is 106 | fully within the image and is at the correct scale. 107 | 108 | `v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th 109 | character is `c`. 110 | 111 | """ 112 | x, conv_layer, conv_vars = convolutional_layers() 113 | 114 | # Densely connected layer 115 | W_fc1 = weight_variable([32 * 8 * 128, 2048]) 116 | b_fc1 = bias_variable([2048]) 117 | 118 | conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128]) 119 | h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1) 120 | 121 | # Output layer 122 | W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)]) 123 | b_fc2 = bias_variable([1 + 7 * len(common.CHARS)]) 124 | 125 | y = tf.matmul(h_fc1, W_fc2) + b_fc2 126 | 127 | return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2]) 128 | 129 | 130 | def get_detect_model(): 131 | """ 132 | The same as the training model, except it acts on an arbitrarily sized 133 | input, and slides the 128x64 window across the image in 8x8 strides. 134 | 135 | The output is of the form `v`, where `v[i, j]` is equivalent to the output 136 | of the training model, for the window at coordinates `(8 * i, 4 * j)`. 137 | 138 | """ 139 | x, conv_layer, conv_vars = convolutional_layers() 140 | 141 | # Fourth layer 142 | W_fc1 = weight_variable([8 * 32 * 128, 2048]) 143 | W_conv1 = tf.reshape(W_fc1, [8, 32, 128, 2048]) 144 | b_fc1 = bias_variable([2048]) 145 | h_conv1 = tf.nn.relu(conv2d(conv_layer, W_conv1, 146 | stride=(1, 1), padding="VALID") + b_fc1) 147 | # Fifth layer 148 | W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)]) 149 | W_conv2 = tf.reshape(W_fc2, [1, 1, 2048, 1 + 7 * len(common.CHARS)]) 150 | b_fc2 = bias_variable([1 + 7 * len(common.CHARS)]) 151 | h_conv2 = conv2d(h_conv1, W_conv2) + b_fc2 152 | 153 | return (x, h_conv2, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2]) 154 | 155 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2016 Matthew Earl 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 18 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | """ 25 | Routines to detect number plates. 26 | 27 | Use `detect` to detect all bounding boxes, and use `post_process` on the output 28 | of `detect` to filter using non-maximum suppression. 29 | 30 | """ 31 | 32 | 33 | __all__ = ( 34 | 'detect', 35 | 'post_process', 36 | ) 37 | 38 | 39 | import collections 40 | import itertools 41 | import math 42 | import sys 43 | 44 | import cv2 45 | import numpy 46 | import tensorflow as tf 47 | 48 | import common 49 | import model 50 | 51 | 52 | def make_scaled_ims(im, min_shape): 53 | ratio = 1. / 2 ** 0.5 54 | shape = (im.shape[0] / ratio, im.shape[1] / ratio) 55 | 56 | while True: 57 | shape = (int(shape[0] * ratio), int(shape[1] * ratio)) 58 | if shape[0] < min_shape[0] or shape[1] < min_shape[1]: 59 | break 60 | yield cv2.resize(im, (shape[1], shape[0])) 61 | 62 | 63 | def detect(im, param_vals): 64 | """ 65 | Detect number plates in an image. 66 | 67 | :param im: 68 | Image to detect number plates in. 69 | 70 | :param param_vals: 71 | Model parameters to use. These are the parameters output by the `train` 72 | module. 73 | 74 | :returns: 75 | Iterable of `bbox_tl, bbox_br, letter_probs`, defining the bounding box 76 | top-left and bottom-right corners respectively, and a 7,36 matrix 77 | giving the probability distributions of each letter. 78 | 79 | """ 80 | 81 | # Convert the image to various scales. 82 | scaled_ims = list(make_scaled_ims(im, model.WINDOW_SHAPE)) 83 | 84 | # Load the model which detects number plates over a sliding window. 85 | x, y, params = model.get_detect_model() 86 | 87 | # Execute the model at each scale. 88 | with tf.Session(config=tf.ConfigProto()) as sess: 89 | y_vals = [] 90 | for scaled_im in scaled_ims: 91 | feed_dict = {x: numpy.stack([scaled_im])} 92 | feed_dict.update(dict(zip(params, param_vals))) 93 | y_vals.append(sess.run(y, feed_dict=feed_dict)) 94 | 95 | # Interpret the results in terms of bounding boxes in the input image. 96 | # Do this by identifying windows (at all scales) where the model predicts a 97 | # number plate has a greater than 50% probability of appearing. 98 | # 99 | # To obtain pixel coordinates, the window coordinates are scaled according 100 | # to the stride size, and pixel coordinates. 101 | for i, (scaled_im, y_val) in enumerate(zip(scaled_ims, y_vals)): 102 | for window_coords in numpy.argwhere(y_val[0, :, :, 0] > 103 | -math.log(1./0.99 - 1)): 104 | letter_probs = (y_val[0, 105 | window_coords[0], 106 | window_coords[1], 1:].reshape( 107 | 7, len(common.CHARS))) 108 | letter_probs = common.softmax(letter_probs) 109 | 110 | img_scale = float(im.shape[0]) / scaled_im.shape[0] 111 | 112 | bbox_tl = window_coords * (8, 4) * img_scale 113 | bbox_size = numpy.array(model.WINDOW_SHAPE) * img_scale 114 | 115 | present_prob = common.sigmoid( 116 | y_val[0, window_coords[0], window_coords[1], 0]) 117 | 118 | yield bbox_tl, bbox_tl + bbox_size, present_prob, letter_probs 119 | 120 | 121 | def _overlaps(match1, match2): 122 | bbox_tl1, bbox_br1, _, _ = match1 123 | bbox_tl2, bbox_br2, _, _ = match2 124 | return (bbox_br1[0] > bbox_tl2[0] and 125 | bbox_br2[0] > bbox_tl1[0] and 126 | bbox_br1[1] > bbox_tl2[1] and 127 | bbox_br2[1] > bbox_tl1[1]) 128 | 129 | 130 | def _group_overlapping_rectangles(matches): 131 | matches = list(matches) 132 | num_groups = 0 133 | match_to_group = {} 134 | for idx1 in range(len(matches)): 135 | for idx2 in range(idx1): 136 | if _overlaps(matches[idx1], matches[idx2]): 137 | match_to_group[idx1] = match_to_group[idx2] 138 | break 139 | else: 140 | match_to_group[idx1] = num_groups 141 | num_groups += 1 142 | 143 | groups = collections.defaultdict(list) 144 | for idx, group in match_to_group.items(): 145 | groups[group].append(matches[idx]) 146 | 147 | return groups 148 | 149 | 150 | def post_process(matches): 151 | """ 152 | Take an iterable of matches as returned by `detect` and merge duplicates. 153 | 154 | Merging consists of two steps: 155 | - Finding sets of overlapping rectangles. 156 | - Finding the intersection of those sets, along with the code 157 | corresponding with the rectangle with the highest presence parameter. 158 | 159 | """ 160 | groups = _group_overlapping_rectangles(matches) 161 | 162 | for group_matches in groups.values(): 163 | mins = numpy.stack(numpy.array(m[0]) for m in group_matches) 164 | maxs = numpy.stack(numpy.array(m[1]) for m in group_matches) 165 | present_probs = numpy.array([m[2] for m in group_matches]) 166 | letter_probs = numpy.stack(m[3] for m in group_matches) 167 | 168 | yield (numpy.max(mins, axis=0).flatten(), 169 | numpy.min(maxs, axis=0).flatten(), 170 | numpy.max(present_probs), 171 | letter_probs[numpy.argmax(present_probs)]) 172 | 173 | 174 | def letter_probs_to_code(letter_probs): 175 | return "".join(common.CHARS[i] for i in numpy.argmax(letter_probs, axis=1)) 176 | 177 | 178 | if __name__ == "__main__": 179 | im = cv2.imread(sys.argv[1]) 180 | im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) / 255. 181 | 182 | f = numpy.load(sys.argv[2]) 183 | param_vals = [f[n] for n in sorted(f.files, key=lambda s: int(s[4:]))] 184 | 185 | for pt1, pt2, present_prob, letter_probs in post_process( 186 | detect(im_gray, param_vals)): 187 | pt1 = tuple(reversed(map(int, pt1))) 188 | pt2 = tuple(reversed(map(int, pt2))) 189 | 190 | code = letter_probs_to_code(letter_probs) 191 | 192 | color = (0.0, 255.0, 0.0) 193 | cv2.rectangle(im, pt1, pt2, color) 194 | 195 | cv2.putText(im, 196 | code, 197 | pt1, 198 | cv2.FONT_HERSHEY_PLAIN, 199 | 1.5, 200 | (0, 0, 0), 201 | thickness=5) 202 | 203 | cv2.putText(im, 204 | code, 205 | pt1, 206 | cv2.FONT_HERSHEY_PLAIN, 207 | 1.5, 208 | (255, 255, 255), 209 | thickness=2) 210 | 211 | cv2.imwrite(sys.argv[3], im) 212 | 213 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2016 Matthew Earl 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 18 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | """ 25 | Routines for training the network. 26 | 27 | """ 28 | 29 | 30 | __all__ = ( 31 | 'train', 32 | ) 33 | 34 | 35 | import functools 36 | import glob 37 | import itertools 38 | import multiprocessing 39 | import random 40 | import sys 41 | import time 42 | 43 | import cv2 44 | import numpy 45 | import tensorflow as tf 46 | 47 | import common 48 | import gen 49 | import model 50 | 51 | 52 | def code_to_vec(p, code): 53 | def char_to_vec(c): 54 | y = numpy.zeros((len(common.CHARS),)) 55 | y[common.CHARS.index(c)] = 1.0 56 | return y 57 | 58 | c = numpy.vstack([char_to_vec(c) for c in code]) 59 | 60 | return numpy.concatenate([[1. if p else 0], c.flatten()]) 61 | 62 | 63 | def read_data(img_glob): 64 | for fname in sorted(glob.glob(img_glob)): 65 | im = cv2.imread(fname)[:, :, 0].astype(numpy.float32) / 255. 66 | code = fname.split("/")[1][9:16] 67 | p = fname.split("/")[1][17] == '1' 68 | yield im, code_to_vec(p, code) 69 | 70 | 71 | def unzip(b): 72 | xs, ys = zip(*b) 73 | xs = numpy.array(xs) 74 | ys = numpy.array(ys) 75 | return xs, ys 76 | 77 | 78 | def batch(it, batch_size): 79 | out = [] 80 | for x in it: 81 | out.append(x) 82 | if len(out) == batch_size: 83 | yield out 84 | out = [] 85 | if out: 86 | yield out 87 | 88 | 89 | def mpgen(f): 90 | def main(q, args, kwargs): 91 | try: 92 | for item in f(*args, **kwargs): 93 | q.put(item) 94 | finally: 95 | q.close() 96 | 97 | @functools.wraps(f) 98 | def wrapped(*args, **kwargs): 99 | q = multiprocessing.Queue(3) 100 | proc = multiprocessing.Process(target=main, 101 | args=(q, args, kwargs)) 102 | proc.start() 103 | try: 104 | while True: 105 | item = q.get() 106 | yield item 107 | finally: 108 | proc.terminate() 109 | proc.join() 110 | 111 | return wrapped 112 | 113 | 114 | @mpgen 115 | def read_batches(batch_size): 116 | g = gen.generate_ims() 117 | def gen_vecs(): 118 | for im, c, p in itertools.islice(g, batch_size): 119 | yield im, code_to_vec(p, c) 120 | 121 | while True: 122 | yield unzip(gen_vecs()) 123 | 124 | 125 | def get_loss(y, y_): 126 | # Calculate the loss from digits being incorrect. Don't count loss from 127 | # digits that are in non-present plates. 128 | digits_loss = tf.nn.softmax_cross_entropy_with_logits( 129 | tf.reshape(y[:, 1:], 130 | [-1, len(common.CHARS)]), 131 | tf.reshape(y_[:, 1:], 132 | [-1, len(common.CHARS)])) 133 | digits_loss = tf.reshape(digits_loss, [-1, 7]) 134 | digits_loss = tf.reduce_sum(digits_loss, 1) 135 | digits_loss *= (y_[:, 0] != 0) 136 | digits_loss = tf.reduce_sum(digits_loss) 137 | 138 | # Calculate the loss from presence indicator being wrong. 139 | presence_loss = tf.nn.sigmoid_cross_entropy_with_logits( 140 | y[:, :1], y_[:, :1]) 141 | presence_loss = 7 * tf.reduce_sum(presence_loss) 142 | 143 | return digits_loss, presence_loss, digits_loss + presence_loss 144 | 145 | 146 | def train(learn_rate, report_steps, batch_size, initial_weights=None): 147 | """ 148 | Train the network. 149 | 150 | The function operates interactively: Progress is reported on stdout, and 151 | training ceases upon `KeyboardInterrupt` at which point the learned weights 152 | are saved to `weights.npz`, and also returned. 153 | 154 | :param learn_rate: 155 | Learning rate to use. 156 | 157 | :param report_steps: 158 | Every `report_steps` batches a progress report is printed. 159 | 160 | :param batch_size: 161 | The size of the batches used for training. 162 | 163 | :param initial_weights: 164 | (Optional.) Weights to initialize the network with. 165 | 166 | :return: 167 | The learned network weights. 168 | 169 | """ 170 | x, y, params = model.get_training_model() 171 | 172 | y_ = tf.placeholder(tf.float32, [None, 7 * len(common.CHARS) + 1]) 173 | 174 | digits_loss, presence_loss, loss = get_loss(y, y_) 175 | train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss) 176 | 177 | best = tf.argmax(tf.reshape(y[:, 1:], [-1, 7, len(common.CHARS)]), 2) 178 | correct = tf.argmax(tf.reshape(y_[:, 1:], [-1, 7, len(common.CHARS)]), 2) 179 | 180 | if initial_weights is not None: 181 | assert len(params) == len(initial_weights) 182 | assign_ops = [w.assign(v) for w, v in zip(params, initial_weights)] 183 | 184 | init = tf.initialize_all_variables() 185 | 186 | def vec_to_plate(v): 187 | return "".join(common.CHARS[i] for i in v) 188 | 189 | def do_report(): 190 | r = sess.run([best, 191 | correct, 192 | tf.greater(y[:, 0], 0), 193 | y_[:, 0], 194 | digits_loss, 195 | presence_loss, 196 | loss], 197 | feed_dict={x: test_xs, y_: test_ys}) 198 | num_correct = numpy.sum( 199 | numpy.logical_or( 200 | numpy.all(r[0] == r[1], axis=1), 201 | numpy.logical_and(r[2] < 0.5, 202 | r[3] < 0.5))) 203 | r_short = (r[0][:190], r[1][:190], r[2][:190], r[3][:190]) 204 | for b, c, pb, pc in zip(*r_short): 205 | print "{} {} <-> {} {}".format(vec_to_plate(c), pc, 206 | vec_to_plate(b), float(pb)) 207 | num_p_correct = numpy.sum(r[2] == r[3]) 208 | 209 | print ("B{:3d} {:2.02f}% {:02.02f}% loss: {} " 210 | "(digits: {}, presence: {}) |{}|").format( 211 | batch_idx, 212 | 100. * num_correct / (len(r[0])), 213 | 100. * num_p_correct / len(r[2]), 214 | r[6], 215 | r[4], 216 | r[5], 217 | "".join("X "[numpy.array_equal(b, c) or (not pb and not pc)] 218 | for b, c, pb, pc in zip(*r_short))) 219 | 220 | def do_batch(): 221 | sess.run(train_step, 222 | feed_dict={x: batch_xs, y_: batch_ys}) 223 | if batch_idx % report_steps == 0: 224 | do_report() 225 | 226 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) 227 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 228 | sess.run(init) 229 | if initial_weights is not None: 230 | sess.run(assign_ops) 231 | 232 | test_xs, test_ys = unzip(list(read_data("test/*.png"))[:50]) 233 | 234 | try: 235 | last_batch_idx = 0 236 | last_batch_time = time.time() 237 | batch_iter = enumerate(read_batches(batch_size)) 238 | for batch_idx, (batch_xs, batch_ys) in batch_iter: 239 | do_batch() 240 | if batch_idx % report_steps == 0: 241 | batch_time = time.time() 242 | if last_batch_idx != batch_idx: 243 | print "time for 60 batches {}".format( 244 | 60 * (last_batch_time - batch_time) / 245 | (last_batch_idx - batch_idx)) 246 | last_batch_idx = batch_idx 247 | last_batch_time = batch_time 248 | 249 | except KeyboardInterrupt: 250 | last_weights = [p.eval() for p in params] 251 | numpy.savez("weights.npz", *last_weights) 252 | return last_weights 253 | 254 | 255 | if __name__ == "__main__": 256 | if len(sys.argv) > 1: 257 | f = numpy.load(sys.argv[1]) 258 | initial_weights = [f[n] for n in sorted(f.files, 259 | key=lambda s: int(s[4:]))] 260 | else: 261 | initial_weights = None 262 | 263 | train(learn_rate=0.001, 264 | report_steps=20, 265 | batch_size=50, 266 | initial_weights=initial_weights) 267 | 268 | -------------------------------------------------------------------------------- /gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2016 Matthew Earl 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 18 | # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 19 | # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 | # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 | # USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | 25 | """ 26 | Generate training and test images. 27 | 28 | """ 29 | 30 | 31 | __all__ = ( 32 | 'generate_ims', 33 | ) 34 | 35 | 36 | import itertools 37 | import math 38 | import os 39 | import random 40 | import sys 41 | 42 | import cv2 43 | import numpy 44 | 45 | from PIL import Image 46 | from PIL import ImageDraw 47 | from PIL import ImageFont 48 | 49 | import common 50 | 51 | FONT_DIR = "./fonts" 52 | FONT_HEIGHT = 32 # Pixel size to which the chars are resized 53 | 54 | OUTPUT_SHAPE = (64, 128) 55 | 56 | CHARS = common.CHARS + " " 57 | 58 | 59 | def make_char_ims(font_path, output_height): 60 | font_size = output_height * 4 61 | 62 | font = ImageFont.truetype(font_path, font_size) 63 | 64 | height = max(font.getsize(c)[1] for c in CHARS) 65 | 66 | for c in CHARS: 67 | width = font.getsize(c)[0] 68 | im = Image.new("RGBA", (width, height), (0, 0, 0)) 69 | 70 | draw = ImageDraw.Draw(im) 71 | draw.text((0, 0), c, (255, 255, 255), font=font) 72 | scale = float(output_height) / height 73 | im = im.resize((int(width * scale), output_height), Image.ANTIALIAS) 74 | yield c, numpy.array(im)[:, :, 0].astype(numpy.float32) / 255. 75 | 76 | 77 | def euler_to_mat(yaw, pitch, roll): 78 | # Rotate clockwise about the Y-axis 79 | c, s = math.cos(yaw), math.sin(yaw) 80 | M = numpy.matrix([[ c, 0., s], 81 | [ 0., 1., 0.], 82 | [ -s, 0., c]]) 83 | 84 | # Rotate clockwise about the X-axis 85 | c, s = math.cos(pitch), math.sin(pitch) 86 | M = numpy.matrix([[ 1., 0., 0.], 87 | [ 0., c, -s], 88 | [ 0., s, c]]) * M 89 | 90 | # Rotate clockwise about the Z-axis 91 | c, s = math.cos(roll), math.sin(roll) 92 | M = numpy.matrix([[ c, -s, 0.], 93 | [ s, c, 0.], 94 | [ 0., 0., 1.]]) * M 95 | 96 | return M 97 | 98 | 99 | def pick_colors(): 100 | first = True 101 | while first or plate_color - text_color < 0.3: 102 | text_color = random.random() 103 | plate_color = random.random() 104 | if text_color > plate_color: 105 | text_color, plate_color = plate_color, text_color 106 | first = False 107 | return text_color, plate_color 108 | 109 | 110 | def make_affine_transform(from_shape, to_shape, 111 | min_scale, max_scale, 112 | scale_variation=1.0, 113 | rotation_variation=1.0, 114 | translation_variation=1.0): 115 | out_of_bounds = False 116 | 117 | from_size = numpy.array([[from_shape[1], from_shape[0]]]).T 118 | to_size = numpy.array([[to_shape[1], to_shape[0]]]).T 119 | 120 | scale = random.uniform((min_scale + max_scale) * 0.5 - 121 | (max_scale - min_scale) * 0.5 * scale_variation, 122 | (min_scale + max_scale) * 0.5 + 123 | (max_scale - min_scale) * 0.5 * scale_variation) 124 | if scale > max_scale or scale < min_scale: 125 | out_of_bounds = True 126 | roll = random.uniform(-0.3, 0.3) * rotation_variation 127 | pitch = random.uniform(-0.2, 0.2) * rotation_variation 128 | yaw = random.uniform(-1.2, 1.2) * rotation_variation 129 | 130 | # Compute a bounding box on the skewed input image (`from_shape`). 131 | M = euler_to_mat(yaw, pitch, roll)[:2, :2] 132 | h, w = from_shape 133 | corners = numpy.matrix([[-w, +w, -w, +w], 134 | [-h, -h, +h, +h]]) * 0.5 135 | skewed_size = numpy.array(numpy.max(M * corners, axis=1) - 136 | numpy.min(M * corners, axis=1)) 137 | 138 | # Set the scale as large as possible such that the skewed and scaled shape 139 | # is less than or equal to the desired ratio in either dimension. 140 | scale *= numpy.min(to_size / skewed_size) 141 | 142 | # Set the translation such that the skewed and scaled image falls within 143 | # the output shape's bounds. 144 | trans = (numpy.random.random((2,1)) - 0.5) * translation_variation 145 | trans = ((2.0 * trans) ** 5.0) / 2.0 146 | if numpy.any(trans < -0.5) or numpy.any(trans > 0.5): 147 | out_of_bounds = True 148 | trans = (to_size - skewed_size * scale) * trans 149 | 150 | center_to = to_size / 2. 151 | center_from = from_size / 2. 152 | 153 | M = euler_to_mat(yaw, pitch, roll)[:2, :2] 154 | M *= scale 155 | M = numpy.hstack([M, trans + center_to - M * center_from]) 156 | 157 | return M, out_of_bounds 158 | 159 | 160 | def generate_code(): 161 | return "{}{}{}{} {}{}{}".format( 162 | random.choice(common.LETTERS), 163 | random.choice(common.LETTERS), 164 | random.choice(common.DIGITS), 165 | random.choice(common.DIGITS), 166 | random.choice(common.LETTERS), 167 | random.choice(common.LETTERS), 168 | random.choice(common.LETTERS)) 169 | 170 | 171 | def rounded_rect(shape, radius): 172 | out = numpy.ones(shape) 173 | out[:radius, :radius] = 0.0 174 | out[-radius:, :radius] = 0.0 175 | out[:radius, -radius:] = 0.0 176 | out[-radius:, -radius:] = 0.0 177 | 178 | cv2.circle(out, (radius, radius), radius, 1.0, -1) 179 | cv2.circle(out, (radius, shape[0] - radius), radius, 1.0, -1) 180 | cv2.circle(out, (shape[1] - radius, radius), radius, 1.0, -1) 181 | cv2.circle(out, (shape[1] - radius, shape[0] - radius), radius, 1.0, -1) 182 | 183 | return out 184 | 185 | 186 | def generate_plate(font_height, char_ims): 187 | h_padding = random.uniform(0.2, 0.4) * font_height 188 | v_padding = random.uniform(0.1, 0.3) * font_height 189 | spacing = font_height * random.uniform(-0.05, 0.05) 190 | radius = 1 + int(font_height * 0.1 * random.random()) 191 | 192 | code = generate_code() 193 | text_width = sum(char_ims[c].shape[1] for c in code) 194 | text_width += (len(code) - 1) * spacing 195 | 196 | out_shape = (int(font_height + v_padding * 2), 197 | int(text_width + h_padding * 2)) 198 | 199 | text_color, plate_color = pick_colors() 200 | 201 | text_mask = numpy.zeros(out_shape) 202 | 203 | x = h_padding 204 | y = v_padding 205 | for c in code: 206 | char_im = char_ims[c] 207 | ix, iy = int(x), int(y) 208 | text_mask[iy:iy + char_im.shape[0], ix:ix + char_im.shape[1]] = char_im 209 | x += char_im.shape[1] + spacing 210 | 211 | plate = (numpy.ones(out_shape) * plate_color * (1. - text_mask) + 212 | numpy.ones(out_shape) * text_color * text_mask) 213 | 214 | return plate, rounded_rect(out_shape, radius), code.replace(" ", "") 215 | 216 | 217 | def generate_bg(num_bg_images): 218 | found = False 219 | while not found: 220 | fname = "bgs/{:08d}.jpg".format(random.randint(0, num_bg_images - 1)) 221 | bg = cv2.imread(fname, cv2.CV_LOAD_IMAGE_GRAYSCALE) / 255. 222 | if (bg.shape[1] >= OUTPUT_SHAPE[1] and 223 | bg.shape[0] >= OUTPUT_SHAPE[0]): 224 | found = True 225 | 226 | x = random.randint(0, bg.shape[1] - OUTPUT_SHAPE[1]) 227 | y = random.randint(0, bg.shape[0] - OUTPUT_SHAPE[0]) 228 | bg = bg[y:y + OUTPUT_SHAPE[0], x:x + OUTPUT_SHAPE[1]] 229 | 230 | return bg 231 | 232 | 233 | def generate_im(char_ims, num_bg_images): 234 | bg = generate_bg(num_bg_images) 235 | 236 | plate, plate_mask, code = generate_plate(FONT_HEIGHT, char_ims) 237 | 238 | M, out_of_bounds = make_affine_transform( 239 | from_shape=plate.shape, 240 | to_shape=bg.shape, 241 | min_scale=0.6, 242 | max_scale=0.875, 243 | rotation_variation=1.0, 244 | scale_variation=1.5, 245 | translation_variation=1.2) 246 | plate = cv2.warpAffine(plate, M, (bg.shape[1], bg.shape[0])) 247 | plate_mask = cv2.warpAffine(plate_mask, M, (bg.shape[1], bg.shape[0])) 248 | 249 | out = plate * plate_mask + bg * (1.0 - plate_mask) 250 | 251 | out = cv2.resize(out, (OUTPUT_SHAPE[1], OUTPUT_SHAPE[0])) 252 | 253 | out += numpy.random.normal(scale=0.05, size=out.shape) 254 | out = numpy.clip(out, 0., 1.) 255 | 256 | return out, code, not out_of_bounds 257 | 258 | 259 | def load_fonts(folder_path): 260 | font_char_ims = {} 261 | fonts = [f for f in os.listdir(folder_path) if f.endswith('.ttf')] 262 | for font in fonts: 263 | font_char_ims[font] = dict(make_char_ims(os.path.join(folder_path, 264 | font), 265 | FONT_HEIGHT)) 266 | return fonts, font_char_ims 267 | 268 | 269 | def generate_ims(): 270 | """ 271 | Generate number plate images. 272 | 273 | :return: 274 | Iterable of number plate images. 275 | 276 | """ 277 | variation = 1.0 278 | fonts, font_char_ims = load_fonts(FONT_DIR) 279 | num_bg_images = len(os.listdir("bgs")) 280 | while True: 281 | yield generate_im(font_char_ims[random.choice(fonts)], num_bg_images) 282 | 283 | 284 | if __name__ == "__main__": 285 | os.mkdir("test") 286 | im_gen = itertools.islice(generate_ims(), int(sys.argv[1])) 287 | for img_idx, (im, c, p) in enumerate(im_gen): 288 | fname = "test/{:08d}_{}_{}.png".format(img_idx, c, 289 | "1" if p else "0") 290 | print fname 291 | cv2.imwrite(fname, im * 255.) 292 | 293 | --------------------------------------------------------------------------------