├── demo.jpg ├── test.jpg ├── run.sh ├── classify.py ├── README.md └── train.py /demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rajdas2001/Crack-Detection/HEAD/demo.jpg -------------------------------------------------------------------------------- /test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rajdas2001/Crack-Detection/HEAD/test.jpg -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | python train.py \ 2 | --bottleneck_dir=logs/bottlenecks \ 3 | --how_many_training_steps=2000 \ 4 | --model_dir=inception \ 5 | --summaries_dir=logs/training_summaries/basic \ 6 | --output_graph=logs/trained_graph.pb \ 7 | --output_labels=logs/trained_labels.txt \ 8 | --image_dir=./dataset 9 | -------------------------------------------------------------------------------- /classify.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import sys 3 | import os 4 | 5 | 6 | # Disable tensorflow compilation warnings 7 | os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 8 | import tensorflow as tf 9 | 10 | image_path = sys.argv[1] 11 | 12 | # Read the image_data 13 | image_data = tf.gfile.FastGFile(image_path, 'rb').read() 14 | 15 | 16 | # Loads label file, strips off carriage return 17 | label_lines = [line.rstrip() for line 18 | in tf.gfile.GFile("logs/trained_labels.txt")] 19 | 20 | # Unpersists graph from file 21 | with tf.gfile.FastGFile("logs/trained_graph.pb", 'rb') as f: 22 | graph_def = tf.GraphDef() 23 | graph_def.ParseFromString(f.read()) 24 | _ = tf.import_graph_def(graph_def, name='') 25 | 26 | with tf.Session() as sess: 27 | # Feed the image_data as input to the graph and get first prediction 28 | softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') 29 | 30 | predictions = sess.run(softmax_tensor, \ 31 | {'DecodeJpeg/contents:0': image_data}) 32 | 33 | # Sort to show labels of first prediction in order of confidence 34 | top_k = predictions[0].argsort()[-len(predictions[0]):][::-1] 35 | 36 | for node_id in top_k: 37 | human_string = label_lines[node_id] 38 | score = predictions[0][node_id] 39 | print('%s (score = %.5f)' % (human_string, score)) 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crack Detection 2 | Built on python, this repository contains the code for crack detection in concrete surfaces. 3 | A generic image classification program that uses Google's Machine Learning library, [Tensorflow](https://www.tensorflow.org/) and a pre-trained Deep Learning Convolutional Neural Network model called [Inception](https://research.googleblog.com/2016/03/train-your-own-image-classifier-with.html). 4 | 5 | This model has been pre-trained for the [ImageNet](http://image-net.org/) Large Visual Recognition Challenge using the data from 2012, and it can differentiate between 1,000 different classes, like Dalmatian, dishwasher etc. 6 | The program applies Transfer Learning to this existing model and re-trains it to classify a new set of images. 7 | 8 | Here in this project, it has been retrained to differentiate between cracked and non-cracked surfaces. 9 | 10 | ## Dependencies 11 | Make sure you have [Python](https://www.python.org/) installed, 12 | then install [Tensorflow](https://www.tensorflow.org/install/) on your system, and clone this repo. 13 | 'pip install tensorflow==1.4' 14 | 15 | 16 | ## Usage 17 | 18 | ### A folder named dataset has already been created which contains two sub-folders namely 'Crack' and 'No Crack'. Download the dataset from this [link](https://drive.google.com/drive/folders/102R9iOaT8zePRMMS3vhbSKkMH8tbOmrc?usp=sharing) 19 | This enables classification of images between the data sets. 20 | 21 | 22 | ### Initiate transfer learning 23 | Go to the project directory and run: 24 | 25 | ``` 26 | $ bash run.sh 27 | ``` 28 | 29 | This script installs the ``Inception`` model and initiates the re-training process for the specified image data sets. 30 | 31 | Once the process is complete, it will return a training accuracy somewhere between ``85% - 100%``. 32 | 33 | The ``training summaries``, ``trained graphs`` and ``trained labels`` will be saved in a folder named ``logs``. 34 | 35 | ### Classify objects 36 | 37 | ``` 38 | python classify.py test.jpg 39 | ``` 40 | 41 | Where ``test.jpg`` is the input file which is to be classified. 42 | 43 | The classifier will output the predictions for each data set. A prediction score between ``0.8`` to ``1`` is considered to be optimal. 44 | 45 | # Sample Output: 46 | ![](demo.jpg) 47 | 48 | 49 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | The program applies Transfer Learning to this existing model and re-trains it to classify a new set of images. 3 | 4 | This example shows how to take a Inception v3 architecture model trained on ImageNet images, 5 | and train a new top layer that can recognize other classes of images. 6 | 7 | You can replace the image_dir argument with any folder containing subfolders of 8 | images. The label for each image is taken from the name of the subfolder it's in. 9 | 10 | """ 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import argparse 16 | #from datetime import datetime 17 | import hashlib 18 | import os.path 19 | import random 20 | import re 21 | import struct 22 | import sys 23 | import tarfile 24 | 25 | import numpy as np 26 | from six.moves import urllib 27 | import tensorflow as tf 28 | 29 | from tensorflow.python.framework import graph_util 30 | from tensorflow.python.framework import tensor_shape 31 | from tensorflow.python.platform import gfile 32 | from tensorflow.python.util import compat 33 | 34 | FLAGS = None 35 | 36 | # These are all parameters that are tied to the particular model architecture 37 | # we're using for Inception v3. These include things like tensor names and their 38 | # sizes. If you want to adapt this script to work with another model, you will 39 | # need to update these to reflect the values in the network you're using. 40 | DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' 41 | 42 | BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0' 43 | BOTTLENECK_TENSOR_SIZE = 2048 44 | MODEL_INPUT_WIDTH = 299 45 | MODEL_INPUT_HEIGHT = 299 46 | MODEL_INPUT_DEPTH = 3 47 | JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0' 48 | RESIZED_INPUT_TENSOR_NAME = 'ResizeBilinear:0' 49 | MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M 50 | 51 | 52 | def create_image_lists(image_dir, testing_percentage, validation_percentage): 53 | """ 54 | Brief: 55 | Builds a list of training images from the file system. 56 | Analyzes the sub folders in the image directory, splits them into stable 57 | training, testing, and validation sets, and returns a data structure 58 | describing the lists of images for each label and their paths. 59 | Args: 60 | image_dir: String path to a folder containing subfolders of images. 61 | testing_percentage: Integer percentage of the images to reserve for tests. 62 | validation_percentage: Integer percentage of images reserved for validation. 63 | Returns: 64 | A dictionary containing an entry for each label subfolder, with images split 65 | into training, testing, and validation sets within each label. 66 | """ 67 | if not gfile.Exists(image_dir): 68 | print("Image directory '" + image_dir + "' not found.") 69 | return None 70 | result = {} 71 | sub_dirs = [x[0] for x in gfile.Walk(image_dir)] 72 | # The root directory comes first, so skip it. 73 | is_root_dir = True 74 | for sub_dir in sub_dirs: 75 | if is_root_dir: 76 | is_root_dir = False 77 | continue 78 | extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] 79 | file_list = [] 80 | dir_name = os.path.basename(sub_dir) 81 | if dir_name == image_dir: 82 | continue 83 | print("Looking for images in '" + dir_name + "'") 84 | for extension in extensions: 85 | file_glob = os.path.join(image_dir, dir_name, '*.' + extension) 86 | file_list.extend(gfile.Glob(file_glob)) 87 | if not file_list: 88 | print('No files found') 89 | continue 90 | if len(file_list) < 20: 91 | print('WARNING: Folder has less than 20 images, which may cause issues.') 92 | elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS: 93 | print('WARNING: Folder {} has more than {} images. Some images will ' 94 | 'never be selected.'.format(dir_name, MAX_NUM_IMAGES_PER_CLASS)) 95 | label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower()) 96 | training_images = [] 97 | testing_images = [] 98 | validation_images = [] 99 | for file_name in file_list: 100 | base_name = os.path.basename(file_name) 101 | # We want to ignore anything after '_nohash_' in the file name when 102 | # deciding which set to put an image in, the data set creator has a way of 103 | # grouping photos that are close variations of each other. For example 104 | # this is used in the plant disease data set to group multiple pictures of 105 | # the same leaf. 106 | hash_name = re.sub(r'_nohash_.*$', '', file_name) 107 | # This looks a bit magical, but we need to decide whether this file should 108 | # go into the training, testing, or validation sets, and we want to keep 109 | # existing files in the same set even if more files are subsequently 110 | # added. 111 | # To do that, we need a stable way of deciding based on just the file name 112 | # itself, so we do a hash of that and then use that to generate a 113 | # probability value that we use to assign it. 114 | hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() 115 | percentage_hash = ((int(hash_name_hashed, 16) % 116 | (MAX_NUM_IMAGES_PER_CLASS + 1)) * 117 | (100.0 / MAX_NUM_IMAGES_PER_CLASS)) 118 | if percentage_hash < validation_percentage: 119 | validation_images.append(base_name) 120 | elif percentage_hash < (testing_percentage + validation_percentage): 121 | testing_images.append(base_name) 122 | else: 123 | training_images.append(base_name) 124 | result[label_name] = { 125 | 'dir': dir_name, 126 | 'training': training_images, 127 | 'testing': testing_images, 128 | 'validation': validation_images, 129 | } 130 | return result 131 | 132 | 133 | def get_image_path(image_lists, label_name, index, image_dir, category): 134 | """" 135 | Brief: 136 | Returns a path to an image for a label at the given index. 137 | Args: 138 | image_lists: Dictionary of training images for each label. 139 | label_name: Label string we want to get an image for. 140 | index: Int offset of the image we want. This will be moduloed by the 141 | available number of images for the label, so it can be arbitrarily large. 142 | image_dir: Root folder string of the subfolders containing the training images. 143 | category: Name string of set to pull images from - training, testing, or validation. 144 | Returns: 145 | File system path string to an image that meets the requested parameters. 146 | """ 147 | if label_name not in image_lists: 148 | tf.logging.fatal('Label does not exist %s.', label_name) 149 | label_lists = image_lists[label_name] 150 | if category not in label_lists: 151 | tf.logging.fatal('Category does not exist %s.', category) 152 | category_list = label_lists[category] 153 | if not category_list: 154 | tf.logging.fatal('Label %s has no images in the category %s.', label_name, category) 155 | mod_index = index % len(category_list) 156 | base_name = category_list[mod_index] 157 | sub_dir = label_lists['dir'] 158 | full_path = os.path.join(image_dir, sub_dir, base_name) 159 | return full_path 160 | 161 | 162 | def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir, category): 163 | """" 164 | Brief: 165 | Returns a path to a bottleneck file for a label at the given index. 166 | Args: 167 | image_lists: Dictionary of training images for each label. 168 | label_name: Label string we want to get an image for. 169 | index: Integer offset of the image we want. This will be moduloed by the 170 | available number of images for the label, so it can be arbitrarily large. 171 | bottleneck_dir: Folder string holding cached files of bottleneck values. 172 | category: Name string of set to pull images from - training, testing, or validation. 173 | Returns: 174 | File system path string to an image that meets the requested parameters. 175 | """ 176 | return get_image_path(image_lists, label_name, index, bottleneck_dir, 177 | category) + '.txt' 178 | 179 | 180 | def create_inception_graph(): 181 | """" 182 | Brief: 183 | Creates a graph from saved GraphDef file and returns a Graph object. 184 | Returns: 185 | Graph holding the trained Inception network, and various tensors we'll be 186 | manipulating. 187 | """ 188 | with tf.Graph().as_default() as graph: 189 | model_filename = os.path.join(FLAGS.model_dir, 'classify_image_graph_def.pb') 190 | with gfile.FastGFile(model_filename, 'rb') as f: 191 | graph_def = tf.GraphDef() 192 | graph_def.ParseFromString(f.read()) 193 | bottleneck_tensor, jpeg_data_tensor, resized_input_tensor = ( 194 | tf.import_graph_def(graph_def, name='', return_elements=[ 195 | BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME, 196 | RESIZED_INPUT_TENSOR_NAME])) 197 | return graph, bottleneck_tensor, jpeg_data_tensor, resized_input_tensor 198 | 199 | 200 | def run_bottleneck_on_image(sess, image_data, image_data_tensor, bottleneck_tensor): 201 | """" 202 | Brief: 203 | Runs inference on an image to extract the 'bottleneck' summary layer. 204 | Args: 205 | sess: Current active TensorFlow Session. 206 | image_data: String of raw JPEG data. 207 | image_data_tensor: Input data layer in the graph. 208 | bottleneck_tensor: Layer before the final softmax. 209 | Returns: 210 | Numpy array of bottleneck values. 211 | """ 212 | bottleneck_values = sess.run( 213 | bottleneck_tensor, 214 | {image_data_tensor: image_data}) 215 | bottleneck_values = np.squeeze(bottleneck_values) 216 | return bottleneck_values 217 | 218 | 219 | def maybe_download_and_extract(): 220 | """ 221 | Brief: 222 | Download and extract model tar file. 223 | If the pretrained model we're using doesn't already exist, this function 224 | downloads it from the TensorFlow.org website and unpacks it into a directory. 225 | """ 226 | dest_directory = FLAGS.model_dir 227 | if not os.path.exists(dest_directory): 228 | os.makedirs(dest_directory) 229 | filename = DATA_URL.split('/')[-1] 230 | filepath = os.path.join(dest_directory, filename) 231 | if not os.path.exists(filepath): 232 | def _progress(count, block_size, total_size): 233 | sys.stdout.write('\r>> Downloading %s %.1f%%' % 234 | (filename, 235 | float(count * block_size) / float(total_size) * 100.0)) 236 | sys.stdout.flush() 237 | 238 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 239 | print() 240 | statinfo = os.stat(filepath) 241 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 242 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 243 | 244 | 245 | def ensure_dir_exists(dir_name): 246 | """ 247 | Brief: 248 | Makes sure the folder exists on disk. 249 | Args: 250 | dir_name: Path string to the folder we want to create. 251 | """ 252 | if not os.path.exists(dir_name): 253 | os.makedirs(dir_name) 254 | 255 | 256 | def write_list_of_floats_to_file(list_of_floats, file_path): 257 | """ 258 | Brief: 259 | Writes a given list of floats to a binary file. 260 | Args: 261 | list_of_floats: List of floats we want to write to a file. 262 | file_path: Path to a file where list of floats will be stored. 263 | """ 264 | s = struct.pack('d' * BOTTLENECK_TENSOR_SIZE, *list_of_floats) 265 | with open(file_path, 'wb') as f: 266 | f.write(s) 267 | 268 | 269 | def read_list_of_floats_from_file(file_path): 270 | """ 271 | Brief: 272 | Reads list of floats from a given file. 273 | Args: 274 | file_path: Path to a file where list of floats was stored. 275 | Returns: 276 | Array of bottleneck values (list of floats). 277 | """ 278 | with open(file_path, 'rb') as f: 279 | s = struct.unpack('d' * BOTTLENECK_TENSOR_SIZE, f.read()) 280 | return list(s) 281 | 282 | 283 | bottleneck_path_2_bottleneck_values = {} 284 | 285 | 286 | def create_bottleneck_file(bottleneck_path, image_lists, label_name, index, 287 | image_dir, category, sess, jpeg_data_tensor, 288 | bottleneck_tensor): 289 | """Create a single bottleneck file.""" 290 | print('Creating bottleneck at ' + bottleneck_path) 291 | image_path = get_image_path(image_lists, label_name, index, 292 | image_dir, category) 293 | if not gfile.Exists(image_path): 294 | tf.logging.fatal('File does not exist %s', image_path) 295 | image_data = gfile.FastGFile(image_path, 'rb').read() 296 | try: 297 | bottleneck_values = run_bottleneck_on_image( 298 | sess, image_data, jpeg_data_tensor, bottleneck_tensor) 299 | except: 300 | raise RuntimeError('Error during processing file %s' % image_path) 301 | 302 | bottleneck_string = ','.join(str(x) for x in bottleneck_values) 303 | with open(bottleneck_path, 'w') as bottleneck_file: 304 | bottleneck_file.write(bottleneck_string) 305 | 306 | 307 | def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir, 308 | category, bottleneck_dir, jpeg_data_tensor, 309 | bottleneck_tensor): 310 | """ 311 | Brief: 312 | Retrieves or calculates bottleneck values for an image. 313 | 314 | If a cached version of the bottleneck data exists on-disk, return that, 315 | otherwise calculate the data and save it to disk for future use. 316 | Args: 317 | sess: The current active TensorFlow Session. 318 | image_lists: Dictionary of training images for each label. 319 | label_name: Label string we want to get an image for. 320 | index: Integer offset of the image we want. This will be modulo-ed by the 321 | available number of images for the label, so it can be arbitrarily large. 322 | image_dir: Root folder string of the subfolders containing the training 323 | images. 324 | category: Name string of which set to pull images from - training, testing, 325 | or validation. 326 | bottleneck_dir: Folder string holding cached files of bottleneck values. 327 | jpeg_data_tensor: The tensor to feed loaded jpeg data into. 328 | bottleneck_tensor: The output tensor for the bottleneck values. 329 | Returns: 330 | Numpy array of values produced by the bottleneck layer for the image. 331 | """ 332 | label_lists = image_lists[label_name] 333 | sub_dir = label_lists['dir'] 334 | sub_dir_path = os.path.join(bottleneck_dir, sub_dir) 335 | ensure_dir_exists(sub_dir_path) 336 | bottleneck_path = get_bottleneck_path(image_lists, label_name, index, 337 | bottleneck_dir, category) 338 | if not os.path.exists(bottleneck_path): 339 | create_bottleneck_file(bottleneck_path, image_lists, label_name, index, 340 | image_dir, category, sess, jpeg_data_tensor, 341 | bottleneck_tensor) 342 | with open(bottleneck_path, 'r') as bottleneck_file: 343 | bottleneck_string = bottleneck_file.read() 344 | did_hit_error = False 345 | try: 346 | bottleneck_values = [float(x) for x in bottleneck_string.split(',')] 347 | except ValueError: 348 | print('Invalid float found, recreating bottleneck') 349 | did_hit_error = True 350 | if did_hit_error: 351 | create_bottleneck_file(bottleneck_path, image_lists, label_name, index, 352 | image_dir, category, sess, jpeg_data_tensor, 353 | bottleneck_tensor) 354 | with open(bottleneck_path, 'r') as bottleneck_file: 355 | bottleneck_string = bottleneck_file.read() 356 | # Allow exceptions to propagate here, since they shouldn't happen after a 357 | # fresh creation 358 | bottleneck_values = [float(x) for x in bottleneck_string.split(',')] 359 | return bottleneck_values 360 | 361 | 362 | def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir, 363 | jpeg_data_tensor, bottleneck_tensor): 364 | """ 365 | Brief: 366 | Ensures all the training, testing, and validation bottlenecks are cached. 367 | 368 | Because we're likely to read the same image multiple times (if there are no 369 | distortions applied during training) it can speed things up a lot if we 370 | calculate the bottleneck layer values once for each image during 371 | preprocessing, and then just read those cached values repeatedly during 372 | training. Here we go through all the images we've found, calculate those 373 | values, and save them off. 374 | Args: 375 | sess: The current active TensorFlow Session. 376 | image_lists: Dictionary of training images for each label. 377 | image_dir: Root folder string of the subfolders containing the training 378 | images. 379 | bottleneck_dir: Folder string holding cached files of bottleneck values. 380 | jpeg_data_tensor: Input tensor for jpeg data from file. 381 | bottleneck_tensor: The penultimate output layer of the graph. 382 | Returns: 383 | Nothing. 384 | """ 385 | how_many_bottlenecks = 0 386 | ensure_dir_exists(bottleneck_dir) 387 | for label_name, label_lists in image_lists.items(): 388 | for category in ['training', 'testing', 'validation']: 389 | category_list = label_lists[category] 390 | for index, unused_base_name in enumerate(category_list): 391 | get_or_create_bottleneck(sess, image_lists, label_name, index, 392 | image_dir, category, bottleneck_dir, 393 | jpeg_data_tensor, bottleneck_tensor) 394 | 395 | how_many_bottlenecks += 1 396 | if how_many_bottlenecks % 100 == 0: 397 | print(str(how_many_bottlenecks) + ' bottleneck files created.') 398 | 399 | 400 | def get_random_cached_bottlenecks(sess, image_lists, how_many, category, 401 | bottleneck_dir, image_dir, jpeg_data_tensor, 402 | bottleneck_tensor): 403 | """ 404 | Brief: 405 | Retrieves bottleneck values for cached images. 406 | 407 | If no distortions are being applied, this function can retrieve the cached 408 | bottleneck values directly from disk for images. It picks a random set of 409 | images from the specified category. 410 | Args: 411 | sess: Current TensorFlow Session. 412 | image_lists: Dictionary of training images for each label. 413 | how_many: If positive, a random sample of this size will be chosen. 414 | If negative, all bottlenecks will be retrieved. 415 | category: Name string of which set to pull from - training, testing, or 416 | validation. 417 | bottleneck_dir: Folder string holding cached files of bottleneck values. 418 | image_dir: Root folder string of the subfolders containing the training 419 | images. 420 | jpeg_data_tensor: The layer to feed jpeg image data into. 421 | bottleneck_tensor: The bottleneck output layer of the CNN graph. 422 | Returns: 423 | List of bottleneck arrays, their corresponding ground truths, and the 424 | relevant filenames. 425 | """ 426 | class_count = len(image_lists.keys()) 427 | bottlenecks = [] 428 | ground_truths = [] 429 | filenames = [] 430 | if how_many >= 0: 431 | # Retrieve a random sample of bottlenecks. 432 | for unused_i in range(how_many): 433 | label_index = random.randrange(class_count) 434 | label_name = list(image_lists.keys())[label_index] 435 | image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) 436 | image_name = get_image_path(image_lists, label_name, image_index, 437 | image_dir, category) 438 | bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, 439 | image_index, image_dir, category, 440 | bottleneck_dir, jpeg_data_tensor, 441 | bottleneck_tensor) 442 | ground_truth = np.zeros(class_count, dtype=np.float32) 443 | ground_truth[label_index] = 1.0 444 | bottlenecks.append(bottleneck) 445 | ground_truths.append(ground_truth) 446 | filenames.append(image_name) 447 | else: 448 | # Retrieve all bottlenecks. 449 | for label_index, label_name in enumerate(image_lists.keys()): 450 | for image_index, image_name in enumerate( 451 | image_lists[label_name][category]): 452 | image_name = get_image_path(image_lists, label_name, image_index, 453 | image_dir, category) 454 | bottleneck = get_or_create_bottleneck(sess, image_lists, label_name, 455 | image_index, image_dir, category, 456 | bottleneck_dir, jpeg_data_tensor, 457 | bottleneck_tensor) 458 | ground_truth = np.zeros(class_count, dtype=np.float32) 459 | ground_truth[label_index] = 1.0 460 | bottlenecks.append(bottleneck) 461 | ground_truths.append(ground_truth) 462 | filenames.append(image_name) 463 | return bottlenecks, ground_truths, filenames 464 | 465 | 466 | def get_random_distorted_bottlenecks( 467 | sess, image_lists, how_many, category, image_dir, input_jpeg_tensor, 468 | distorted_image, resized_input_tensor, bottleneck_tensor): 469 | """ 470 | Brief: 471 | Retrieves bottleneck values for training images, after distortions. 472 | 473 | If we're training with distortions like crops, scales, or flips, we have to 474 | recalculate the full model for every image, and so we can't use cached 475 | bottleneck values. Instead we find random images for the requested category, 476 | run them through the distortion graph, and then the full graph to get the 477 | bottleneck results for each. 478 | Args: 479 | sess: Current TensorFlow Session. 480 | image_lists: Dictionary of training images for each label. 481 | how_many: The integer number of bottleneck values to return. 482 | category: Name string of which set of images to fetch - training, testing, 483 | or validation. 484 | image_dir: Root folder string of the subfolders containing the training 485 | images. 486 | input_jpeg_tensor: The input layer we feed the image data to. 487 | distorted_image: The output node of the distortion graph. 488 | resized_input_tensor: The input node of the recognition graph. 489 | bottleneck_tensor: The bottleneck output layer of the CNN graph. 490 | Returns: 491 | List of bottleneck arrays and their corresponding ground truths. 492 | """ 493 | class_count = len(image_lists.keys()) 494 | bottlenecks = [] 495 | ground_truths = [] 496 | for unused_i in range(how_many): 497 | label_index = random.randrange(class_count) 498 | label_name = list(image_lists.keys())[label_index] 499 | image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) 500 | image_path = get_image_path(image_lists, label_name, image_index, image_dir, 501 | category) 502 | if not gfile.Exists(image_path): 503 | tf.logging.fatal('File does not exist %s', image_path) 504 | jpeg_data = gfile.FastGFile(image_path, 'rb').read() 505 | # Note that we materialize the distorted_image_data as a numpy array before 506 | # sending running inference on the image. This involves 2 memory copies and 507 | # might be optimized in other implementations. 508 | distorted_image_data = sess.run(distorted_image, 509 | {input_jpeg_tensor: jpeg_data}) 510 | bottleneck = run_bottleneck_on_image(sess, distorted_image_data, 511 | resized_input_tensor, 512 | bottleneck_tensor) 513 | ground_truth = np.zeros(class_count, dtype=np.float32) 514 | ground_truth[label_index] = 1.0 515 | bottlenecks.append(bottleneck) 516 | ground_truths.append(ground_truth) 517 | return bottlenecks, ground_truths 518 | 519 | 520 | def should_distort_images(flip_left_right, random_crop, random_scale, 521 | random_brightness): 522 | """ 523 | Brief: 524 | Whether any distortions are enabled, from the input flags. 525 | Args: 526 | flip_left_right: Boolean whether to randomly mirror images horizontally. 527 | random_crop: Integer percentage setting the total margin used around the 528 | crop box. 529 | random_scale: Integer percentage of how much to vary the scale by. 530 | random_brightness: Integer range to randomly multiply the pixel values by. 531 | Returns: 532 | Boolean value indicating whether any distortions should be applied. 533 | """ 534 | return (flip_left_right or (random_crop != 0) or (random_scale != 0) or 535 | (random_brightness != 0)) 536 | 537 | 538 | def add_input_distortions(flip_left_right, random_crop, random_scale, 539 | random_brightness): 540 | """ 541 | Brief: 542 | Creates the operations to apply the specified distortions. 543 | 544 | During training it can help to improve the results if we run the images 545 | through simple distortions like crops, scales, and flips. These reflect the 546 | kind of variations we expect in the real world, and so can help train the 547 | model to cope with natural data more effectively. Here we take the supplied 548 | parameters and construct a network of operations to apply them to an image. 549 | 550 | Cropping 551 | ~~~~~~~~ 552 | 553 | Cropping is done by placing a bounding box at a random position in the full 554 | image. The cropping parameter controls the size of that box relative to the 555 | input image. If it's zero, then the box is the same size as the input and no 556 | cropping is performed. If the value is 50%, then the crop box will be half the 557 | width and height of the input. In a diagram it looks like this: 558 | 559 | < width > 560 | +---------------------+ 561 | | | 562 | | width - crop% | 563 | | < > | 564 | | +------+ | 565 | | | | | 566 | | | | | 567 | | | | | 568 | | +------+ | 569 | | | 570 | | | 571 | +---------------------+ 572 | 573 | Scaling 574 | ~~~~~~~ 575 | 576 | Scaling is a lot like cropping, except that the bounding box is always 577 | centered and its size varies randomly within the given range. For example if 578 | the scale percentage is zero, then the bounding box is the same size as the 579 | input and no scaling is applied. If it's 50%, then the bounding box will be in 580 | a random range between half the width and height and full size. 581 | Args: 582 | flip_left_right: Boolean whether to randomly mirror images horizontally. 583 | random_crop: Integer percentage setting the total margin used around the 584 | crop box. 585 | random_scale: Integer percentage of how much to vary the scale by. 586 | random_brightness: Integer range to randomly multiply the pixel values by. 587 | graph. 588 | Returns: 589 | The jpeg input layer and the distorted result tensor. 590 | """ 591 | 592 | jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') 593 | decoded_image = tf.image.decode_jpeg(jpeg_data, channels=MODEL_INPUT_DEPTH) 594 | decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) 595 | decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) 596 | margin_scale = 1.0 + (random_crop / 100.0) 597 | resize_scale = 1.0 + (random_scale / 100.0) 598 | margin_scale_value = tf.constant(margin_scale) 599 | resize_scale_value = tf.random_uniform(tensor_shape.scalar(), 600 | minval=1.0, 601 | maxval=resize_scale) 602 | scale_value = tf.multiply(margin_scale_value, resize_scale_value) 603 | precrop_width = tf.multiply(scale_value, MODEL_INPUT_WIDTH) 604 | precrop_height = tf.multiply(scale_value, MODEL_INPUT_HEIGHT) 605 | precrop_shape = tf.stack([precrop_height, precrop_width]) 606 | precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) 607 | precropped_image = tf.image.resize_bilinear(decoded_image_4d, 608 | precrop_shape_as_int) 609 | precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) 610 | cropped_image = tf.random_crop(precropped_image_3d, 611 | [MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, 612 | MODEL_INPUT_DEPTH]) 613 | if flip_left_right: 614 | flipped_image = tf.image.random_flip_left_right(cropped_image) 615 | else: 616 | flipped_image = cropped_image 617 | brightness_min = 1.0 - (random_brightness / 100.0) 618 | brightness_max = 1.0 + (random_brightness / 100.0) 619 | brightness_value = tf.random_uniform(tensor_shape.scalar(), 620 | minval=brightness_min, 621 | maxval=brightness_max) 622 | brightened_image = tf.multiply(flipped_image, brightness_value) 623 | distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') 624 | return jpeg_data, distort_result 625 | 626 | 627 | def variable_summaries(var): 628 | """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" 629 | with tf.name_scope('summaries'): 630 | mean = tf.reduce_mean(var) 631 | tf.summary.scalar('mean', mean) 632 | with tf.name_scope('stddev'): 633 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 634 | tf.summary.scalar('stddev', stddev) 635 | tf.summary.scalar('max', tf.reduce_max(var)) 636 | tf.summary.scalar('min', tf.reduce_min(var)) 637 | tf.summary.histogram('histogram', var) 638 | 639 | 640 | def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): 641 | """ 642 | Brief: 643 | Adds a new softmax and fully-connected layer for training. 644 | 645 | We need to retrain the top layer to identify our new classes, so this function 646 | adds the right operations to the graph, along with some variables to hold the 647 | weights, and then sets up all the gradients for the backward pass. 648 | 649 | The set up for the softmax and fully-connected layers is based on: 650 | https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html 651 | Args: 652 | class_count: Integer of how many categories of things we're trying to 653 | recognize. 654 | final_tensor_name: Name string for the new final node that produces results. 655 | bottleneck_tensor: The output of the main CNN graph. 656 | Returns: 657 | The tensors for the training and cross entropy results, and tensors for the 658 | bottleneck input and ground truth input. 659 | """ 660 | with tf.name_scope('input'): 661 | bottleneck_input = tf.placeholder_with_default( 662 | bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], 663 | name='BottleneckInputPlaceholder') 664 | 665 | ground_truth_input = tf.placeholder(tf.float32, 666 | [None, class_count], 667 | name='GroundTruthInput') 668 | 669 | # Organizing the following ops as `final_training_ops` so they're easier 670 | # to see in TensorBoard 671 | layer_name = 'final_training_ops' 672 | with tf.name_scope(layer_name): 673 | with tf.name_scope('weights'): 674 | initial_value = tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, class_count], 675 | stddev=0.001) 676 | 677 | layer_weights = tf.Variable(initial_value, name='final_weights') 678 | 679 | variable_summaries(layer_weights) 680 | with tf.name_scope('biases'): 681 | layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') 682 | variable_summaries(layer_biases) 683 | with tf.name_scope('Wx_plus_b'): 684 | logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases 685 | tf.summary.histogram('pre_activations', logits) 686 | 687 | final_tensor = tf.nn.softmax(logits, name=final_tensor_name) 688 | tf.summary.histogram('activations', final_tensor) 689 | 690 | with tf.name_scope('cross_entropy'): 691 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits( 692 | labels=ground_truth_input, logits=logits) 693 | with tf.name_scope('total'): 694 | cross_entropy_mean = tf.reduce_mean(cross_entropy) 695 | tf.summary.scalar('cross_entropy', cross_entropy_mean) 696 | 697 | with tf.name_scope('train'): 698 | optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) 699 | train_step = optimizer.minimize(cross_entropy_mean) 700 | 701 | return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, 702 | final_tensor) 703 | 704 | 705 | def add_evaluation_step(result_tensor, ground_truth_tensor): 706 | """ 707 | Brief: 708 | Inserts the operations we need to evaluate the accuracy of our results. 709 | Args: 710 | result_tensor: The new final node that produces results. 711 | ground_truth_tensor: The node we feed ground truth data 712 | into. 713 | Returns: 714 | Tuple of (evaluation step, prediction). 715 | """ 716 | with tf.name_scope('accuracy'): 717 | with tf.name_scope('correct_prediction'): 718 | prediction = tf.argmax(result_tensor, 1) 719 | correct_prediction = tf.equal( 720 | prediction, tf.argmax(ground_truth_tensor, 1)) 721 | with tf.name_scope('accuracy'): 722 | evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 723 | tf.summary.scalar('accuracy', evaluation_step) 724 | return evaluation_step, prediction 725 | 726 | 727 | def main(_): 728 | # Setup the directory we'll write summaries to for TensorBoard 729 | if tf.gfile.Exists(FLAGS.summaries_dir): 730 | tf.gfile.DeleteRecursively(FLAGS.summaries_dir) 731 | tf.gfile.MakeDirs(FLAGS.summaries_dir) 732 | 733 | # Set up the pre-trained graph. 734 | maybe_download_and_extract() 735 | graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor = ( 736 | create_inception_graph()) 737 | 738 | # Look at the folder structure, and create lists of all the images. 739 | image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, 740 | FLAGS.validation_percentage) 741 | class_count = len(image_lists.keys()) 742 | if class_count == 0: 743 | print('No valid folders of images found at ' + FLAGS.image_dir) 744 | return -1 745 | if class_count == 1: 746 | print('Only one valid folder of images found at ' + FLAGS.image_dir + 747 | ' - multiple classes are needed for classification.') 748 | return -1 749 | 750 | # See if the command-line flags mean we're applying any distortions. 751 | do_distort_images = should_distort_images( 752 | FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, 753 | FLAGS.random_brightness) 754 | 755 | with tf.Session(graph=graph) as sess: 756 | 757 | if do_distort_images: 758 | # We will be applying distortions, so setup the operations we'll need. 759 | (distorted_jpeg_data_tensor, 760 | distorted_image_tensor) = add_input_distortions( 761 | FLAGS.flip_left_right, FLAGS.random_crop, 762 | FLAGS.random_scale, FLAGS.random_brightness) 763 | else: 764 | # We'll make sure we've calculated the 'bottleneck' image summaries and 765 | # cached them on disk. 766 | cache_bottlenecks(sess, image_lists, FLAGS.image_dir, 767 | FLAGS.bottleneck_dir, jpeg_data_tensor, 768 | bottleneck_tensor) 769 | 770 | # Add the new layer that we'll be training. 771 | (train_step, cross_entropy, bottleneck_input, ground_truth_input, 772 | final_tensor) = add_final_training_ops(len(image_lists.keys()), 773 | FLAGS.final_tensor_name, 774 | bottleneck_tensor) 775 | 776 | # Create the operations we need to evaluate the accuracy of our new layer. 777 | evaluation_step, prediction = add_evaluation_step( 778 | final_tensor, ground_truth_input) 779 | 780 | # Merge all the summaries and write them out to the summaries_dir 781 | merged = tf.summary.merge_all() 782 | train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', 783 | sess.graph) 784 | 785 | validation_writer = tf.summary.FileWriter( 786 | FLAGS.summaries_dir + '/validation') 787 | 788 | # Set up all our weights to their initial default values. 789 | init = tf.global_variables_initializer() 790 | sess.run(init) 791 | 792 | # Run the training for as many cycles as requested on the command line. 793 | for i in range(FLAGS.how_many_training_steps): 794 | # Get a batch of input bottleneck values, either calculated fresh every 795 | # time with distortions applied, or from the cache stored on disk. 796 | if do_distort_images: 797 | (train_bottlenecks, 798 | train_ground_truth) = get_random_distorted_bottlenecks( 799 | sess, image_lists, FLAGS.train_batch_size, 'training', 800 | FLAGS.image_dir, distorted_jpeg_data_tensor, 801 | distorted_image_tensor, resized_image_tensor, bottleneck_tensor) 802 | else: 803 | (train_bottlenecks, 804 | train_ground_truth, _) = get_random_cached_bottlenecks( 805 | sess, image_lists, FLAGS.train_batch_size, 'training', 806 | FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, 807 | bottleneck_tensor) 808 | # Feed the bottlenecks and ground truth into the graph, and run a training 809 | # step. Capture training summaries for TensorBoard with the `merged` op. 810 | 811 | train_summary, _ = sess.run( 812 | [merged, train_step], 813 | feed_dict={bottleneck_input: train_bottlenecks, 814 | ground_truth_input: train_ground_truth}) 815 | train_writer.add_summary(train_summary, i) 816 | 817 | # Every so often, print out how well the graph is training. 818 | is_last_step = (i + 1 == FLAGS.how_many_training_steps) 819 | if (i % FLAGS.eval_step_interval) == 0 or is_last_step: 820 | train_accuracy, cross_entropy_value = sess.run( 821 | [evaluation_step, cross_entropy], 822 | feed_dict={bottleneck_input: train_bottlenecks, 823 | ground_truth_input: train_ground_truth}) 824 | validation_bottlenecks, validation_ground_truth, _ = ( 825 | get_random_cached_bottlenecks( 826 | sess, image_lists, FLAGS.validation_batch_size, 'validation', 827 | FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, 828 | bottleneck_tensor)) 829 | # Run a validation step and capture training summaries for TensorBoard 830 | # with the `merged` op. 831 | validation_summary, validation_accuracy = sess.run( 832 | [merged, evaluation_step], 833 | feed_dict={bottleneck_input: validation_bottlenecks, 834 | ground_truth_input: validation_ground_truth}) 835 | validation_writer.add_summary(validation_summary, i) 836 | print('Step: %d, Train accuracy: %.4f%%, Cross entropy: %f, Validation accuracy: %.1f%% (N=%d)' % (i, 837 | train_accuracy * 100, cross_entropy_value, validation_accuracy * 100, len(validation_bottlenecks))) 838 | 839 | # We've completed all our training, so run a final test evaluation on 840 | # some new images we haven't used before. 841 | test_bottlenecks, test_ground_truth, test_filenames = ( 842 | get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, 843 | 'testing', FLAGS.bottleneck_dir, 844 | FLAGS.image_dir, jpeg_data_tensor, 845 | bottleneck_tensor)) 846 | test_accuracy, predictions = sess.run( 847 | [evaluation_step, prediction], 848 | feed_dict={bottleneck_input: test_bottlenecks, 849 | ground_truth_input: test_ground_truth}) 850 | print('Final test accuracy = %.1f%% (N=%d)' % ( 851 | test_accuracy * 100, len(test_bottlenecks))) 852 | 853 | if FLAGS.print_misclassified_test_images: 854 | print('=== MISCLASSIFIED TEST IMAGES ===') 855 | for i, test_filename in enumerate(test_filenames): 856 | if predictions[i] != test_ground_truth[i].argmax(): 857 | print('%70s %s' % (test_filename, 858 | list(image_lists.keys())[predictions[i]])) 859 | 860 | # Write out the trained graph and labels with the weights stored as 861 | # constants. 862 | output_graph_def = graph_util.convert_variables_to_constants( 863 | sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) 864 | with gfile.FastGFile(FLAGS.output_graph, 'wb') as f: 865 | f.write(output_graph_def.SerializeToString()) 866 | with gfile.FastGFile(FLAGS.output_labels, 'w') as f: 867 | f.write('\n'.join(image_lists.keys()) + '\n') 868 | 869 | 870 | if __name__ == '__main__': 871 | parser = argparse.ArgumentParser() 872 | parser.add_argument( 873 | '--image_dir', 874 | type=str, 875 | default='', 876 | help='Path to folders of labeled images.' 877 | ) 878 | parser.add_argument( 879 | '--output_graph', 880 | type=str, 881 | default='/tmp/output_graph.pb', 882 | help='Where to save the trained graph.' 883 | ) 884 | parser.add_argument( 885 | '--output_labels', 886 | type=str, 887 | default='/tmp/output_labels.txt', 888 | help='Where to save the trained graph\'s labels.' 889 | ) 890 | parser.add_argument( 891 | '--summaries_dir', 892 | type=str, 893 | default='/tmp/retrain_logs', 894 | help='Where to save summary logs for TensorBoard.' 895 | ) 896 | parser.add_argument( 897 | '--how_many_training_steps', 898 | type=int, 899 | default=5000, 900 | help='How many training steps to run before ending.' 901 | ) 902 | parser.add_argument( 903 | '--learning_rate', 904 | type=float, 905 | default=0.01, 906 | help='How large a learning rate to use when training.' 907 | ) 908 | parser.add_argument( 909 | '--testing_percentage', 910 | type=int, 911 | default=10, 912 | help='What percentage of images to use as a test set.' 913 | ) 914 | parser.add_argument( 915 | '--validation_percentage', 916 | type=int, 917 | default=10, 918 | help='What percentage of images to use as a validation set.' 919 | ) 920 | parser.add_argument( 921 | '--eval_step_interval', 922 | type=int, 923 | default=100, 924 | help='How often to evaluate the training results.' 925 | ) 926 | parser.add_argument( 927 | '--train_batch_size', 928 | type=int, 929 | default=100, 930 | help='How many images to train on at a time.' 931 | ) 932 | parser.add_argument( 933 | '--test_batch_size', 934 | type=int, 935 | default=-1, 936 | help="""\ 937 | How many images to test on. This test set is only used once, to evaluate 938 | the final accuracy of the model after training completes. 939 | A value of -1 causes the entire test set to be used, which leads to more 940 | stable results across runs.\ 941 | """ 942 | ) 943 | parser.add_argument( 944 | '--validation_batch_size', 945 | type=int, 946 | default=100, 947 | help="""\ 948 | How many images to use in an evaluation batch. This validation set is 949 | used much more often than the test set, and is an early indicator of how 950 | accurate the model is during training. 951 | A value of -1 causes the entire validation set to be used, which leads to 952 | more stable results across training iterations, but may be slower on large 953 | training sets.\ 954 | """ 955 | ) 956 | parser.add_argument( 957 | '--print_misclassified_test_images', 958 | default=False, 959 | help="""\ 960 | Whether to print out a list of all misclassified test images.\ 961 | """, 962 | action='store_true' 963 | ) 964 | parser.add_argument( 965 | '--model_dir', 966 | type=str, 967 | default='/tmp/imagenet', 968 | help="""\ 969 | Path to classify_image_graph_def.pb, 970 | imagenet_synset_to_human_label_map.txt, and 971 | imagenet_2012_challenge_label_map_proto.pbtxt.\ 972 | """ 973 | ) 974 | parser.add_argument( 975 | '--bottleneck_dir', 976 | type=str, 977 | default='/tmp/bottleneck', 978 | help='Path to cache bottleneck layer values as files.' 979 | ) 980 | parser.add_argument( 981 | '--final_tensor_name', 982 | type=str, 983 | default='final_result', 984 | help="""\ 985 | The name of the output classification layer in the retrained graph.\ 986 | """ 987 | ) 988 | parser.add_argument( 989 | '--flip_left_right', 990 | default=False, 991 | help="""\ 992 | Whether to randomly flip half of the training images horizontally.\ 993 | """, 994 | action='store_true' 995 | ) 996 | parser.add_argument( 997 | '--random_crop', 998 | type=int, 999 | default=0, 1000 | help="""\ 1001 | A percentage determining how much of a margin to randomly crop off the 1002 | training images.\ 1003 | """ 1004 | ) 1005 | parser.add_argument( 1006 | '--random_scale', 1007 | type=int, 1008 | default=0, 1009 | help="""\ 1010 | A percentage determining how much to randomly scale up the size of the 1011 | training images by.\ 1012 | """ 1013 | ) 1014 | parser.add_argument( 1015 | '--random_brightness', 1016 | type=int, 1017 | default=0, 1018 | help="""\ 1019 | A percentage determining how much to randomly multiply the training image 1020 | input pixels up or down by.\ 1021 | """ 1022 | ) 1023 | FLAGS, unparsed = parser.parse_known_args() 1024 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 1025 | --------------------------------------------------------------------------------