├── .gitignore ├── CRFCNNImageSegmentation.py ├── README.md ├── cat.jpg ├── cat_annotation.png └── obj_detection ├── README.md ├── annotations ├── test.txt ├── trainval.txt └── xmls │ ├── shirt-1.xml │ ├── shirt-3.xml │ ├── shirt-4.xml │ ├── shirt-5.xml │ ├── skirt-2.xml │ ├── skirt-4.xml │ ├── skirt-5.xml │ ├── suit-1.xml │ ├── suit-3.xml │ ├── suit-4.xml │ └── suit-5.xml ├── create_fashion_tf_record.py ├── evaluation-results ├── 1.png ├── 2.png └── 3.png ├── fashion_label_map.pbtxt ├── faster_rcnn_resnet101_fash.config └── images ├── shirt-1.jpg ├── shirt-3.jpg ├── shirt-4.jpg ├── shirt-5.jpg ├── skirt-2.jpg ├── skirt-4.jpg ├── skirt-5.jpg ├── suit-1.jpg ├── suit-3.jpg ├── suit-4.jpg └── suit-5.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /CRFCNNImageSegmentation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import os 6 | from matplotlib import pyplot as plt 7 | slim = tf.contrib.slim 8 | from nets import vgg 9 | # Load the mean pixel values and the function 10 | # that performs the subtraction from each pixel 11 | from preprocessing.vgg_preprocessing import (_mean_image_subtraction, 12 | _R_MEAN, _G_MEAN, _B_MEAN) 13 | import pydensecrf.densecrf as dcrf 14 | from pydensecrf.utils import compute_unary, create_pairwise_bilateral, \ 15 | create_pairwise_gaussian, softmax_to_unary 16 | 17 | def get_kernel_size(factor): 18 | """ 19 | Find the kernel size given the desired factor of upsampling. 20 | """ 21 | return 2 * factor - factor % 2 22 | 23 | 24 | def upsample_filt(size): 25 | """ 26 | Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size. 27 | """ 28 | factor = (size + 1) // 2 29 | if size % 2 == 1: 30 | center = factor - 1 31 | else: 32 | center = factor - 0.5 33 | og = np.ogrid[:size, :size] 34 | return (1 - abs(og[0] - center) / factor) * \ 35 | (1 - abs(og[1] - center) / factor) 36 | 37 | 38 | def bilinear_upsample_weights(factor, number_of_classes): 39 | """ 40 | Create weights matrix for transposed convolution with bilinear filter 41 | initialization. 42 | """ 43 | 44 | filter_size = get_kernel_size(factor) 45 | 46 | weights = np.zeros((filter_size, 47 | filter_size, 48 | number_of_classes, 49 | number_of_classes), dtype=np.float32) 50 | 51 | upsample_kernel = upsample_filt(filter_size) 52 | 53 | for i in xrange(number_of_classes): 54 | weights[:, :, i, i] = upsample_kernel 55 | 56 | return weights 57 | 58 | 59 | 60 | 61 | os.environ["CUDA_VISIBLE_DEVICES"] = '1' 62 | # sys.path.append("/home/dpakhom1/workspace/my_models/slim/") 63 | checkpoints_dir = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/vgg16' 64 | 65 | image_filename = 'cat.jpg' 66 | annotation_filename = 'cat_annotation.png' 67 | # 68 | # image_filename = 'dog.png' 69 | # annotation_filename = 'dog_black.png' 70 | 71 | image_filename_placeholder = tf.placeholder(tf.string) 72 | annotation_filename_placeholder = tf.placeholder(tf.string) 73 | is_training_placeholder = tf.placeholder(tf.bool) 74 | 75 | feed_dict_to_use = {image_filename_placeholder: image_filename, 76 | annotation_filename_placeholder: annotation_filename, 77 | is_training_placeholder: True} 78 | 79 | image_tensor = tf.read_file(image_filename_placeholder) 80 | annotation_tensor = tf.read_file(annotation_filename_placeholder) 81 | 82 | image_tensor = tf.image.decode_jpeg(image_tensor, channels=3) 83 | annotation_tensor = tf.image.decode_png(annotation_tensor, channels=1) 84 | 85 | # Get ones for each class instead of a number -- we need that 86 | # for cross-entropy loss later on. Sometimes the groundtruth 87 | # masks have values other than 1 and 0. 88 | class_labels_tensor = tf.equal(annotation_tensor, 1) 89 | background_labels_tensor = tf.not_equal(annotation_tensor, 1) 90 | 91 | # Convert the boolean values into floats -- so that 92 | # computations in cross-entropy loss is correct 93 | bit_mask_class = tf.to_float(class_labels_tensor) 94 | bit_mask_background = tf.to_float(background_labels_tensor) 95 | 96 | combined_mask = tf.concat(axis=2, values=[bit_mask_class, 97 | bit_mask_background]) 98 | 99 | # Lets reshape our input so that it becomes suitable for 100 | # tf.softmax_cross_entropy_with_logits with [batch_size, num_classes] 101 | flat_labels = tf.reshape(tensor=combined_mask, shape=(-1, 2)) 102 | 103 | 104 | fig_size = [15, 4] 105 | plt.rcParams["figure.figsize"] = fig_size 106 | 107 | 108 | upsample_factor = 32 109 | number_of_classes = 2 110 | log_folder = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/logs' 111 | 112 | vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt') 113 | 114 | # Convert image to float32 before subtracting the 115 | # mean pixel value 116 | image_float = tf.to_float(image_tensor, name='ToFloat') 117 | 118 | # Subtract the mean pixel value from each pixel 119 | mean_centered_image = _mean_image_subtraction(image_float, 120 | [_R_MEAN, _G_MEAN, _B_MEAN]) 121 | 122 | processed_images = tf.expand_dims(mean_centered_image, 0) 123 | 124 | upsample_filter_np = bilinear_upsample_weights(upsample_factor, 125 | number_of_classes) 126 | 127 | upsample_filter_tensor = tf.constant(upsample_filter_np) 128 | 129 | # Define the model that we want to use -- specify to use only two classes at the last layer 130 | with slim.arg_scope(vgg.vgg_arg_scope()): 131 | logits, end_points = vgg.vgg_16(processed_images, 132 | num_classes=2, 133 | is_training=is_training_placeholder, 134 | spatial_squeeze=False, 135 | fc_conv_padding='SAME') 136 | 137 | downsampled_logits_shape = tf.shape(logits) 138 | 139 | # Calculate the ouput size of the upsampled tensor 140 | upsampled_logits_shape = tf.stack([ 141 | downsampled_logits_shape[0], 142 | downsampled_logits_shape[1] * upsample_factor, 143 | downsampled_logits_shape[2] * upsample_factor, 144 | downsampled_logits_shape[3] 145 | ]) 146 | 147 | # Perform the upsampling 148 | upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, 149 | output_shape=upsampled_logits_shape, 150 | strides=[1, upsample_factor, upsample_factor, 1]) 151 | 152 | # Flatten the predictions, so that we can compute cross-entropy for 153 | # each pixel and get a sum of cross-entropies. 154 | flat_logits = tf.reshape(tensor=upsampled_logits, shape=(-1, number_of_classes)) 155 | 156 | cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=flat_logits, 157 | labels=flat_labels) 158 | 159 | cross_entropy_sum = tf.reduce_sum(cross_entropies) 160 | 161 | # Tensor to get the final prediction for each pixel -- pay 162 | # attention that we don't need softmax in this case because 163 | # we only need the final decision. If we also need the respective 164 | # probabilities we will have to apply softmax. 165 | pred = tf.argmax(upsampled_logits, dimension=3) 166 | 167 | probabilities = tf.nn.softmax(upsampled_logits) 168 | 169 | # Here we define an optimizer and put all the variables 170 | # that will be created under a namespace of 'adam_vars'. 171 | # This is done so that we can easily access them later. 172 | # Those variables are used by adam optimizer and are not 173 | # related to variables of the vgg model. 174 | 175 | # We also retrieve gradient Tensors for each of our variables 176 | # This way we can later visualize them in tensorboard. 177 | # optimizer.compute_gradients and optimizer.apply_gradients 178 | # is equivalent to running: 179 | # train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cross_entropy_sum) 180 | with tf.variable_scope("adam_vars"): 181 | optimizer = tf.train.AdamOptimizer(learning_rate=0.0001) 182 | gradients = optimizer.compute_gradients(loss=cross_entropy_sum) 183 | 184 | for grad_var_pair in gradients: 185 | current_variable = grad_var_pair[1] 186 | current_gradient = grad_var_pair[0] 187 | 188 | # Relace some characters from the original variable name 189 | # tensorboard doesn't accept ':' symbol 190 | gradient_name_to_save = current_variable.name.replace(":", "_") 191 | 192 | # Let's get histogram of gradients for each layer and 193 | # visualize them later in tensorboard 194 | tf.summary.histogram(gradient_name_to_save, current_gradient) 195 | 196 | train_step = optimizer.apply_gradients(grads_and_vars=gradients) 197 | 198 | # Now we define a function that will load the weights from VGG checkpoint 199 | # into our variables when we call it. We exclude the weights from the last layer 200 | # which is responsible for class predictions. We do this because 201 | # we will have different number of classes to predict and we can't 202 | # use the old ones as an initialization. 203 | vgg_except_fc8_weights = slim.get_variables_to_restore(exclude=['vgg_16/fc8', 'adam_vars']) 204 | 205 | # Here we get variables that belong to the last layer of network. 206 | # As we saw, the number of classes that VGG was originally trained on 207 | # is different from ours -- in our case it is only 2 classes. 208 | vgg_fc8_weights = slim.get_variables_to_restore(include=['vgg_16/fc8']) 209 | 210 | adam_optimizer_variables = slim.get_variables_to_restore(include=['adam_vars']) 211 | 212 | # Add summary op for the loss -- to be able to see it in 213 | # tensorboard. 214 | tf.summary.scalar('cross_entropy_loss', cross_entropy_sum) 215 | 216 | # Put all summary ops into one op. Produces string when 217 | # you run it. 218 | merged_summary_op = tf.summary.merge_all() 219 | 220 | # Create the summary writer -- to write all the logs 221 | # into a specified file. This file can be later read 222 | # by tensorboard. 223 | summary_string_writer = tf.summary.FileWriter(log_folder) 224 | 225 | # Create the log folder if doesn't exist yet 226 | if not os.path.exists(log_folder): 227 | os.makedirs(log_folder) 228 | 229 | # Create an OP that performs the initialization of 230 | # values of variables to the values from VGG. 231 | read_vgg_weights_except_fc8_func = slim.assign_from_checkpoint_fn( 232 | vgg_checkpoint_path, 233 | vgg_except_fc8_weights) 234 | 235 | # Initializer for new fc8 weights -- for two classes. 236 | vgg_fc8_weights_initializer = tf.variables_initializer(vgg_fc8_weights) 237 | 238 | # Initializer for adam variables 239 | optimization_variables_initializer = tf.variables_initializer(adam_optimizer_variables) 240 | 241 | with tf.Session() as sess: 242 | # Run the initializers. 243 | read_vgg_weights_except_fc8_func(sess) 244 | sess.run(vgg_fc8_weights_initializer) 245 | sess.run(optimization_variables_initializer) 246 | 247 | train_image, train_annotation = sess.run([image_tensor, annotation_tensor], 248 | feed_dict=feed_dict_to_use) 249 | 250 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) 251 | ax1.imshow(train_image) 252 | ax1.set_title('Input image') 253 | probability_graph = ax2.imshow(np.dstack((train_annotation,) * 3) * 100) 254 | ax2.set_title('Input Ground-Truth Annotation') 255 | plt.show() 256 | 257 | # Let's perform 10 interations 258 | for i in range(10): 259 | print("Starting interaction - " + str(i)) 260 | loss, summary_string = sess.run([cross_entropy_sum, merged_summary_op], 261 | feed_dict=feed_dict_to_use) 262 | 263 | sess.run(train_step, feed_dict=feed_dict_to_use) 264 | 265 | pred_np, probabilities_np = sess.run([pred, probabilities], 266 | feed_dict=feed_dict_to_use) 267 | 268 | summary_string_writer.add_summary(summary_string, i) 269 | 270 | cmap = plt.get_cmap('bwr') 271 | 272 | # f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) 273 | # ax1.imshow(np.uint8(pred_np.squeeze() != 1), vmax=1.5, vmin=-0.4, cmap=cmap) 274 | # ax1.set_title('Argmax. Iteration # ' + str(i)) 275 | # probability_graph = ax2.imshow(probabilities_np.squeeze()[:, :, 0]) 276 | # ax2.set_title('Probability of the Class. Iteration # ' + str(i)) 277 | # 278 | # plt.colorbar(probability_graph) 279 | # plt.show() 280 | 281 | print("Current Loss: " + str(loss)) 282 | 283 | feed_dict_to_use[is_training_placeholder] = False 284 | 285 | final_predictions, final_probabilities, final_loss = sess.run([pred, 286 | probabilities, 287 | cross_entropy_sum], 288 | feed_dict=feed_dict_to_use) 289 | 290 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) 291 | 292 | ax1.imshow(np.uint8(final_predictions.squeeze() != 1), 293 | vmax=1.5, 294 | vmin=-0.4, 295 | cmap=cmap) 296 | 297 | ax1.set_title('Final Argmax') 298 | 299 | probability_graph = ax2.imshow(final_probabilities.squeeze()[:, :, 0]) 300 | ax2.set_title('Final Probability of the Class') 301 | plt.colorbar(probability_graph) 302 | 303 | plt.show() 304 | 305 | print("Final Loss: " + str(final_loss)) 306 | 307 | summary_string_writer.close() 308 | 309 | 310 | image = train_image 311 | 312 | softmax = final_probabilities.squeeze() 313 | 314 | 315 | softmax = softmax.transpose((2, 0, 1)) 316 | 317 | # The input should be the negative of the logarithm of probability values 318 | # Look up the definition of the softmax_to_unary for more information 319 | unary = softmax_to_unary(softmax) 320 | 321 | # The inputs should be C-continious -- we are using Cython wrapper 322 | unary = np.ascontiguousarray(unary) 323 | 324 | d = dcrf.DenseCRF(image.shape[0] * image.shape[1], 2) 325 | 326 | d.setUnaryEnergy(unary) 327 | 328 | # This potential penalizes small pieces of segmentation that are 329 | # spatially isolated -- enforces more spatially consistent segmentations 330 | feats = create_pairwise_gaussian(sdims=(10, 10), shape=image.shape[:2]) 331 | 332 | d.addPairwiseEnergy(feats, compat=3, 333 | kernel=dcrf.DIAG_KERNEL, 334 | normalization=dcrf.NORMALIZE_SYMMETRIC) 335 | 336 | # This creates the color-dependent features -- 337 | # because the segmentation that we get from CNN are too coarse 338 | # and we can use local color features to refine them 339 | feats = create_pairwise_bilateral(sdims=(50, 50), schan=(20, 20, 20), 340 | img=image, chdim=2) 341 | 342 | d.addPairwiseEnergy(feats, compat=10, 343 | kernel=dcrf.DIAG_KERNEL, 344 | normalization=dcrf.NORMALIZE_SYMMETRIC) 345 | Q = d.inference(5) 346 | 347 | res = np.argmax(Q, axis=0).reshape((image.shape[0], image.shape[1])) 348 | 349 | cmap = plt.get_cmap('bwr') 350 | 351 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) 352 | ax1.imshow(res, vmax=1.5, vmin=-0.4, cmap=cmap) 353 | ax1.set_title('Segmentation with CRF post-processing') 354 | probability_graph = ax2.imshow(np.dstack((train_annotation,)*3)*100) 355 | ax2.set_title('Ground-Truth Annotation') 356 | plt.show() 357 | 358 | 359 | 360 | 361 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CRF-image-segmentation 2 | 3 | 1. Install tensorflow, tf-slim and other dependencies required by the code (Ideally in a virtual environment). 4 | 2. Download VGG model. http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz 5 | 3. Untar the model and then update the checkpoints_dir variable as folder containing the extracted vgg model 6 | 4. Update log folder variable log_folder 7 | 5. Lines 272 - 279 have been temporarily commented to prevent plots being showed after each iteration. Plots holds up further execution till the plot window is closed. 8 | 6. The input image size should be 480x352. All images must be resized to this dimension before being fed to the algorithm. 9 | 7. Replace vaiable named 'processed_probabilities' with 'softmax'(Already done in the code). 10 | -------------------------------------------------------------------------------- /cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat.jpg -------------------------------------------------------------------------------- /cat_annotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat_annotation.png -------------------------------------------------------------------------------- /obj_detection/README.md: -------------------------------------------------------------------------------- 1 | # OBJ-Detection 2 | 3 | We are using the object detection api provided by tensorflow to create the solution. 4 | 5 | 1. Install tensorflow and then clone tensorflow models libraries - https://github.com/tensorflow/models. 6 | 2. Follow installation steps documented in the models/object_detection. 7 | 3. Copy annotations and images folder to models/object_detection. Images contain the actual images while annotations contain xmls specifying bounding boxes for each image. 8 | 4. Copy create_fashion_tf_record.py to models/object_detection. 9 | 5. Copy fashion_label_map.pbtxt to models/object_detection/data 10 | 6. Execute command from object_detection folder "python create_fashion_tf_record.py --label_map_path=data/fashion_label_map.pbtxt --data_dir=`pwd` --output_dir=`pwd`". 11 | This will create two files fash_train.record and fash_val.record. These files will be fed to the tensorflow network. 12 | 7. Copy fash_train.record and fash_val.record to data folder. 13 | 8. Create folder called fash-model in models. 14 | 9. Copy faster_rcnn_resnet101_fash.config to fash-model. 15 | 10. Create folders named train and eval inside fash-model folder. 16 | 11. Download othe COCO-pretrained Faster R-CNN with Resnet-101 model. Unzip the contents of the folder and copy the model.ckpt* files into fash-model folder. (http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz) 17 | 12. Start training by executing the folllowing command from object_detection folder - "python train.py --logtostderr --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config --train_dir=models/fash-model/train". 18 | Training goes on indefinitely till its killed by the user. 19 | 13. Execute "tensorboard --logdir=models/fash-model" to see and visualize the training and eval phases. 20 | 14. For evaluation execute the following command from object_detection folder - "python eval.py --logtostderr --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config --checkpoint_dir=models/fash-model/train --eval_dir=models/fash-model/eval". 21 | This command will periodically fetch the latest checkoint from models/fash-model/train and perform evalutions. Take images tab in tensorboard ui to see evaluation results. 22 | 23 | Since the dataset is very small we can see some noise in the evalution results. Even then the correct catgories were detected in each image with the higest confidence. 24 | -------------------------------------------------------------------------------- /obj_detection/annotations/test.txt: -------------------------------------------------------------------------------- 1 | suit-1 3 2 | suit-2 3 3 | suit-3 3 4 | suit-4 3 5 | suit-5 3 6 | skirt-1 2 7 | skirt-2 2 8 | skirt-3 2 9 | skirt-4 2 10 | skirt-5 2 11 | shirt-1 1 12 | shirt-2 1 13 | shirt-3 1 14 | shirt-4 1 15 | shirt-5 1 16 | 17 | -------------------------------------------------------------------------------- /obj_detection/annotations/trainval.txt: -------------------------------------------------------------------------------- 1 | suit-1 2 | suit-3 3 | suit-4 4 | suit-5 5 | skirt-2 6 | skirt-4 7 | skirt-5 8 | shirt-1 9 | shirt-3 10 | shirt-4 11 | shirt-5 12 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/shirt-1.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | shirt-1.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 320 11 | 400 12 | 3 13 | 14 | 0 15 | 16 | shirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 46 22 | 105 23 | 293 24 | 391 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/shirt-3.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | shirt-3.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 220 11 | 258 12 | 3 13 | 14 | 0 15 | 16 | shirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 40 22 | 50 23 | 187 24 | 240 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/shirt-4.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | shirt-4.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 320 11 | 400 12 | 3 13 | 14 | 0 15 | 16 | shirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 19 22 | 111 23 | 255 24 | 379 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/shirt-5.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | shirt-5.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 234 11 | 312 12 | 3 13 | 14 | 0 15 | 16 | shirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 29 22 | 69 23 | 210 24 | 270 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/skirt-2.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | skirt-2.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 385 11 | 500 12 | 3 13 | 14 | 0 15 | 16 | skirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 50 22 | 76 23 | 258 24 | 412 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/skirt-4.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | skirt-4.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 750 11 | 1154 12 | 3 13 | 14 | 0 15 | 16 | skirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 180 22 | 402 23 | 580 24 | 996 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/skirt-5.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | skirt-5.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 328 11 | 350 12 | 3 13 | 14 | 0 15 | 16 | skirt 17 | Frontal 18 | 0 19 | 0 20 | 21 | 97 22 | 121 23 | 218 24 | 222 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/suit-1.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | suit-1.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 290 11 | 370 12 | 3 13 | 14 | 0 15 | 16 | suit 17 | Frontal 18 | 0 19 | 0 20 | 21 | 78 22 | 40 23 | 210 24 | 200 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/suit-3.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | suit-3.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 322 11 | 545 12 | 3 13 | 14 | 0 15 | 16 | suit 17 | Frontal 18 | 0 19 | 0 20 | 21 | 7 22 | 109 23 | 321 24 | 450 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/suit-4.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | suit-4.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 361 11 | 452 12 | 3 13 | 14 | 0 15 | 16 | suit 17 | Frontal 18 | 0 19 | 0 20 | 21 | 34 22 | 74 23 | 318 24 | 391 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/annotations/xmls/suit-5.xml: -------------------------------------------------------------------------------- 1 | 2 | OXIIIT 3 | suit-5.jpg 4 | 5 | OXFORD-IIIT Pet Dataset 6 | OXIIIT 7 | flickr 8 | 9 | 10 | 361 11 | 452 12 | 3 13 | 14 | 0 15 | 16 | suit 17 | Frontal 18 | 0 19 | 0 20 | 21 | 59 22 | 109 23 | 340 24 | 424 25 | 26 | 0 27 | 28 | 29 | -------------------------------------------------------------------------------- /obj_detection/create_fashion_tf_record.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | r"""Convert the Oxford pet dataset to TFRecord for object_detection. 17 | 18 | See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar 19 | Cats and Dogs 20 | IEEE Conference on Computer Vision and Pattern Recognition, 2012 21 | http://www.robots.ox.ac.uk/~vgg/data/pets/ 22 | 23 | Example usage: 24 | ./create_pet_tf_record --data_dir=/home/user/pet \ 25 | --output_dir=/home/user/pet/output 26 | """ 27 | 28 | import hashlib 29 | import io 30 | import logging 31 | import os 32 | import random 33 | import re 34 | 35 | from lxml import etree 36 | import PIL.Image 37 | import tensorflow as tf 38 | 39 | from object_detection.utils import dataset_util 40 | from object_detection.utils import label_map_util 41 | 42 | flags = tf.app.flags 43 | flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.') 44 | flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.') 45 | flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt', 46 | 'Path to label map proto') 47 | FLAGS = flags.FLAGS 48 | 49 | 50 | def get_class_name_from_filename(file_name): 51 | """Gets the class name from a file. 52 | 53 | Args: 54 | file_name: The file name to get the class name from. 55 | ie. "american_pit_bull_terrier_105.jpg" 56 | 57 | Returns: 58 | example: The converted tf.Example. 59 | """ 60 | match = re.match(r'([A-Za-z_]+)(-[0-9]+\.jpg)', file_name, re.I) 61 | return match.groups()[0] 62 | 63 | 64 | def dict_to_tf_example(data, 65 | label_map_dict, 66 | image_subdirectory, 67 | ignore_difficult_instances=False): 68 | """Convert XML derived dict to tf.Example proto. 69 | 70 | Notice that this function normalizes the bounding box coordinates provided 71 | by the raw data. 72 | 73 | Args: 74 | data: dict holding PASCAL XML fields for a single image (obtained by 75 | running dataset_util.recursive_parse_xml_to_dict) 76 | label_map_dict: A map from string label names to integers ids. 77 | image_subdirectory: String specifying subdirectory within the 78 | Pascal dataset directory holding the actual image data. 79 | ignore_difficult_instances: Whether to skip difficult instances in the 80 | dataset (default: False). 81 | 82 | Returns: 83 | example: The converted tf.Example. 84 | 85 | Raises: 86 | ValueError: if the image pointed to by data['filename'] is not a valid JPEG 87 | """ 88 | img_path = os.path.join(image_subdirectory, data['filename']) 89 | with tf.gfile.GFile(img_path, 'rb') as fid: 90 | encoded_jpg = fid.read() 91 | encoded_jpg_io = io.BytesIO(encoded_jpg) 92 | image = PIL.Image.open(encoded_jpg_io) 93 | if image.format != 'JPEG': 94 | raise ValueError('Image format not JPEG') 95 | key = hashlib.sha256(encoded_jpg).hexdigest() 96 | 97 | width = int(data['size']['width']) 98 | height = int(data['size']['height']) 99 | 100 | xmin = [] 101 | ymin = [] 102 | xmax = [] 103 | ymax = [] 104 | classes = [] 105 | classes_text = [] 106 | truncated = [] 107 | poses = [] 108 | difficult_obj = [] 109 | for obj in data['object']: 110 | difficult = bool(int(obj['difficult'])) 111 | if ignore_difficult_instances and difficult: 112 | continue 113 | 114 | difficult_obj.append(int(difficult)) 115 | 116 | xmin.append(float(obj['bndbox']['xmin']) / width) 117 | ymin.append(float(obj['bndbox']['ymin']) / height) 118 | xmax.append(float(obj['bndbox']['xmax']) / width) 119 | ymax.append(float(obj['bndbox']['ymax']) / height) 120 | class_name = get_class_name_from_filename(data['filename']) 121 | classes_text.append(class_name.encode('utf8')) 122 | classes.append(label_map_dict[class_name]) 123 | truncated.append(int(obj['truncated'])) 124 | poses.append(obj['pose'].encode('utf8')) 125 | 126 | example = tf.train.Example(features=tf.train.Features(feature={ 127 | 'image/height': dataset_util.int64_feature(height), 128 | 'image/width': dataset_util.int64_feature(width), 129 | 'image/filename': dataset_util.bytes_feature( 130 | data['filename'].encode('utf8')), 131 | 'image/source_id': dataset_util.bytes_feature( 132 | data['filename'].encode('utf8')), 133 | 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 134 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 135 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 136 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 137 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 138 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 139 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 140 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 141 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 142 | 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 143 | 'image/object/truncated': dataset_util.int64_list_feature(truncated), 144 | 'image/object/view': dataset_util.bytes_list_feature(poses), 145 | })) 146 | return example 147 | 148 | 149 | def create_tf_record(output_filename, 150 | label_map_dict, 151 | annotations_dir, 152 | image_dir, 153 | examples): 154 | """Creates a TFRecord file from examples. 155 | 156 | Args: 157 | output_filename: Path to where output file is saved. 158 | label_map_dict: The label map dictionary. 159 | annotations_dir: Directory where annotation files are stored. 160 | image_dir: Directory where image files are stored. 161 | examples: Examples to parse and save to tf record. 162 | """ 163 | writer = tf.python_io.TFRecordWriter(output_filename) 164 | for idx, example in enumerate(examples): 165 | if idx % 100 == 0: 166 | logging.info('On image %d of %d', idx, len(examples)) 167 | path = os.path.join(annotations_dir, 'xmls', example + '.xml') 168 | 169 | if not os.path.exists(path): 170 | logging.warning('Could not find %s, ignoring example.', path) 171 | continue 172 | with tf.gfile.GFile(path, 'r') as fid: 173 | xml_str = fid.read() 174 | xml = etree.fromstring(xml_str) 175 | data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] 176 | 177 | tf_example = dict_to_tf_example(data, label_map_dict, image_dir) 178 | writer.write(tf_example.SerializeToString()) 179 | 180 | writer.close() 181 | 182 | 183 | # TODO: Add test for pet/PASCAL main files. 184 | def main(_): 185 | data_dir = FLAGS.data_dir 186 | label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) 187 | 188 | logging.info('Reading from Fashion dataset.') 189 | image_dir = os.path.join(data_dir, 'images') 190 | annotations_dir = os.path.join(data_dir, 'annotations') 191 | examples_path = os.path.join(annotations_dir, 'trainval.txt') 192 | examples_list = dataset_util.read_examples_list(examples_path) 193 | 194 | # Test images are not included in the downloaded data set, so we shall perform 195 | # our own split. 196 | random.seed(42) 197 | random.shuffle(examples_list) 198 | num_examples = len(examples_list) 199 | num_train = int(0.7 * num_examples) 200 | train_examples = examples_list[:num_train] 201 | val_examples = examples_list[num_train:] 202 | logging.info('%d training and %d validation examples.', 203 | len(train_examples), len(val_examples)) 204 | 205 | train_output_path = os.path.join(FLAGS.output_dir, 'fash_train.record') 206 | val_output_path = os.path.join(FLAGS.output_dir, 'fash_val.record') 207 | create_tf_record(train_output_path, label_map_dict, annotations_dir, 208 | image_dir, train_examples) 209 | create_tf_record(val_output_path, label_map_dict, annotations_dir, 210 | image_dir, val_examples) 211 | 212 | if __name__ == '__main__': 213 | tf.app.run() 214 | -------------------------------------------------------------------------------- /obj_detection/evaluation-results/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/1.png -------------------------------------------------------------------------------- /obj_detection/evaluation-results/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/2.png -------------------------------------------------------------------------------- /obj_detection/evaluation-results/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/3.png -------------------------------------------------------------------------------- /obj_detection/fashion_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 0 3 | name: 'none_of_the_above' 4 | } 5 | 6 | item { 7 | id: 1 8 | name: 'shirt' 9 | } 10 | 11 | item { 12 | id: 2 13 | name: 'skirt' 14 | } 15 | 16 | item { 17 | id: 3 18 | name: 'suit' 19 | } 20 | -------------------------------------------------------------------------------- /obj_detection/faster_rcnn_resnet101_fash.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 37 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0003 91 | schedule { 92 | step: 0 93 | learning_rate: .0003 94 | } 95 | schedule { 96 | step: 900000 97 | learning_rate: .00003 98 | } 99 | schedule { 100 | step: 1200000 101 | learning_rate: .000003 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "models/fash-model/model.ckpt" 111 | from_detection_checkpoint: true 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | } 116 | } 117 | 118 | train_input_reader: { 119 | tf_record_input_reader { 120 | input_path: "data/fash_train.record" 121 | } 122 | label_map_path: "data/fashion_label_map.pbtxt" 123 | } 124 | 125 | eval_config: { 126 | num_examples: 2000 127 | } 128 | 129 | eval_input_reader: { 130 | tf_record_input_reader { 131 | input_path: "data/fash_val.record" 132 | } 133 | label_map_path: "data/fashion_label_map.pbtxt" 134 | shuffle: false 135 | num_readers: 1 136 | } 137 | -------------------------------------------------------------------------------- /obj_detection/images/shirt-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-1.jpg -------------------------------------------------------------------------------- /obj_detection/images/shirt-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-3.jpg -------------------------------------------------------------------------------- /obj_detection/images/shirt-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-4.jpg -------------------------------------------------------------------------------- /obj_detection/images/shirt-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-5.jpg -------------------------------------------------------------------------------- /obj_detection/images/skirt-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-2.jpg -------------------------------------------------------------------------------- /obj_detection/images/skirt-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-4.jpg -------------------------------------------------------------------------------- /obj_detection/images/skirt-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-5.jpg -------------------------------------------------------------------------------- /obj_detection/images/suit-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-1.jpg -------------------------------------------------------------------------------- /obj_detection/images/suit-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-3.jpg -------------------------------------------------------------------------------- /obj_detection/images/suit-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-4.jpg -------------------------------------------------------------------------------- /obj_detection/images/suit-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-5.jpg --------------------------------------------------------------------------------