├── .gitignore
├── CRFCNNImageSegmentation.py
├── README.md
├── cat.jpg
├── cat_annotation.png
└── obj_detection
    ├── README.md
    ├── annotations
        ├── test.txt
        ├── trainval.txt
        └── xmls
        │   ├── shirt-1.xml
        │   ├── shirt-3.xml
        │   ├── shirt-4.xml
        │   ├── shirt-5.xml
        │   ├── skirt-2.xml
        │   ├── skirt-4.xml
        │   ├── skirt-5.xml
        │   ├── suit-1.xml
        │   ├── suit-3.xml
        │   ├── suit-4.xml
        │   └── suit-5.xml
    ├── create_fashion_tf_record.py
    ├── evaluation-results
        ├── 1.png
        ├── 2.png
        └── 3.png
    ├── fashion_label_map.pbtxt
    ├── faster_rcnn_resnet101_fash.config
    └── images
        ├── shirt-1.jpg
        ├── shirt-3.jpg
        ├── shirt-4.jpg
        ├── shirt-5.jpg
        ├── skirt-2.jpg
        ├── skirt-4.jpg
        ├── skirt-5.jpg
        ├── suit-1.jpg
        ├── suit-3.jpg
        ├── suit-4.jpg
        └── suit-5.jpg


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/CRFCNNImageSegmentation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import os
  6 | from matplotlib import pyplot as plt
  7 | slim = tf.contrib.slim
  8 | from nets import vgg
  9 | # Load the mean pixel values and the function
 10 | # that performs the subtraction from each pixel
 11 | from preprocessing.vgg_preprocessing import (_mean_image_subtraction,
 12 |                                              _R_MEAN, _G_MEAN, _B_MEAN)
 13 | import pydensecrf.densecrf as dcrf
 14 | from pydensecrf.utils import compute_unary, create_pairwise_bilateral, \
 15 |     create_pairwise_gaussian, softmax_to_unary
 16 | 
 17 | def get_kernel_size(factor):
 18 |     """
 19 |     Find the kernel size given the desired factor of upsampling.
 20 |     """
 21 |     return 2 * factor - factor % 2
 22 | 
 23 | 
 24 | def upsample_filt(size):
 25 |     """
 26 |     Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
 27 |     """
 28 |     factor = (size + 1) // 2
 29 |     if size % 2 == 1:
 30 |         center = factor - 1
 31 |     else:
 32 |         center = factor - 0.5
 33 |     og = np.ogrid[:size, :size]
 34 |     return (1 - abs(og[0] - center) / factor) * \
 35 |            (1 - abs(og[1] - center) / factor)
 36 | 
 37 | 
 38 | def bilinear_upsample_weights(factor, number_of_classes):
 39 |     """
 40 |     Create weights matrix for transposed convolution with bilinear filter
 41 |     initialization.
 42 |     """
 43 | 
 44 |     filter_size = get_kernel_size(factor)
 45 | 
 46 |     weights = np.zeros((filter_size,
 47 |                         filter_size,
 48 |                         number_of_classes,
 49 |                         number_of_classes), dtype=np.float32)
 50 | 
 51 |     upsample_kernel = upsample_filt(filter_size)
 52 | 
 53 |     for i in xrange(number_of_classes):
 54 |         weights[:, :, i, i] = upsample_kernel
 55 | 
 56 |     return weights
 57 | 
 58 | 
 59 | 
 60 | 
 61 | os.environ["CUDA_VISIBLE_DEVICES"] = '1'
 62 | # sys.path.append("/home/dpakhom1/workspace/my_models/slim/")
 63 | checkpoints_dir = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/vgg16'
 64 | 
 65 | image_filename = 'cat.jpg'
 66 | annotation_filename = 'cat_annotation.png'
 67 | #
 68 | # image_filename = 'dog.png'
 69 | # annotation_filename = 'dog_black.png'
 70 | 
 71 | image_filename_placeholder = tf.placeholder(tf.string)
 72 | annotation_filename_placeholder = tf.placeholder(tf.string)
 73 | is_training_placeholder = tf.placeholder(tf.bool)
 74 | 
 75 | feed_dict_to_use = {image_filename_placeholder: image_filename,
 76 |                     annotation_filename_placeholder: annotation_filename,
 77 |                     is_training_placeholder: True}
 78 | 
 79 | image_tensor = tf.read_file(image_filename_placeholder)
 80 | annotation_tensor = tf.read_file(annotation_filename_placeholder)
 81 | 
 82 | image_tensor = tf.image.decode_jpeg(image_tensor, channels=3)
 83 | annotation_tensor = tf.image.decode_png(annotation_tensor, channels=1)
 84 | 
 85 | # Get ones for each class instead of a number -- we need that
 86 | # for cross-entropy loss later on. Sometimes the groundtruth
 87 | # masks have values other than 1 and 0.
 88 | class_labels_tensor = tf.equal(annotation_tensor, 1)
 89 | background_labels_tensor = tf.not_equal(annotation_tensor, 1)
 90 | 
 91 | # Convert the boolean values into floats -- so that
 92 | # computations in cross-entropy loss is correct
 93 | bit_mask_class = tf.to_float(class_labels_tensor)
 94 | bit_mask_background = tf.to_float(background_labels_tensor)
 95 | 
 96 | combined_mask = tf.concat(axis=2, values=[bit_mask_class,
 97 |                                                 bit_mask_background])
 98 | 
 99 | # Lets reshape our input so that it becomes suitable for
100 | # tf.softmax_cross_entropy_with_logits with [batch_size, num_classes]
101 | flat_labels = tf.reshape(tensor=combined_mask, shape=(-1, 2))
102 | 
103 | 
104 | fig_size = [15, 4]
105 | plt.rcParams["figure.figsize"] = fig_size
106 | 
107 | 
108 | upsample_factor = 32
109 | number_of_classes = 2
110 | log_folder = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/logs'
111 | 
112 | vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt')
113 | 
114 | # Convert image to float32 before subtracting the
115 | # mean pixel value
116 | image_float = tf.to_float(image_tensor, name='ToFloat')
117 | 
118 | # Subtract the mean pixel value from each pixel
119 | mean_centered_image = _mean_image_subtraction(image_float,
120 |                                               [_R_MEAN, _G_MEAN, _B_MEAN])
121 | 
122 | processed_images = tf.expand_dims(mean_centered_image, 0)
123 | 
124 | upsample_filter_np = bilinear_upsample_weights(upsample_factor,
125 |                                                number_of_classes)
126 | 
127 | upsample_filter_tensor = tf.constant(upsample_filter_np)
128 | 
129 | # Define the model that we want to use -- specify to use only two classes at the last layer
130 | with slim.arg_scope(vgg.vgg_arg_scope()):
131 |     logits, end_points = vgg.vgg_16(processed_images,
132 |                                     num_classes=2,
133 |                                     is_training=is_training_placeholder,
134 |                                     spatial_squeeze=False,
135 |                                     fc_conv_padding='SAME')
136 | 
137 | downsampled_logits_shape = tf.shape(logits)
138 | 
139 | # Calculate the ouput size of the upsampled tensor
140 | upsampled_logits_shape = tf.stack([
141 |     downsampled_logits_shape[0],
142 |     downsampled_logits_shape[1] * upsample_factor,
143 |     downsampled_logits_shape[2] * upsample_factor,
144 |     downsampled_logits_shape[3]
145 | ])
146 | 
147 | # Perform the upsampling
148 | upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor,
149 |                                           output_shape=upsampled_logits_shape,
150 |                                           strides=[1, upsample_factor, upsample_factor, 1])
151 | 
152 | # Flatten the predictions, so that we can compute cross-entropy for
153 | # each pixel and get a sum of cross-entropies.
154 | flat_logits = tf.reshape(tensor=upsampled_logits, shape=(-1, number_of_classes))
155 | 
156 | cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=flat_logits,
157 |                                                           labels=flat_labels)
158 | 
159 | cross_entropy_sum = tf.reduce_sum(cross_entropies)
160 | 
161 | # Tensor to get the final prediction for each pixel -- pay
162 | # attention that we don't need softmax in this case because
163 | # we only need the final decision. If we also need the respective
164 | # probabilities we will have to apply softmax.
165 | pred = tf.argmax(upsampled_logits, dimension=3)
166 | 
167 | probabilities = tf.nn.softmax(upsampled_logits)
168 | 
169 | # Here we define an optimizer and put all the variables
170 | # that will be created under a namespace of 'adam_vars'.
171 | # This is done so that we can easily access them later.
172 | # Those variables are used by adam optimizer and are not
173 | # related to variables of the vgg model.
174 | 
175 | # We also retrieve gradient Tensors for each of our variables
176 | # This way we can later visualize them in tensorboard.
177 | # optimizer.compute_gradients and optimizer.apply_gradients
178 | # is equivalent to running:
179 | # train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cross_entropy_sum)
180 | with tf.variable_scope("adam_vars"):
181 |     optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
182 |     gradients = optimizer.compute_gradients(loss=cross_entropy_sum)
183 | 
184 |     for grad_var_pair in gradients:
185 |         current_variable = grad_var_pair[1]
186 |         current_gradient = grad_var_pair[0]
187 | 
188 |         # Relace some characters from the original variable name
189 |         # tensorboard doesn't accept ':' symbol
190 |         gradient_name_to_save = current_variable.name.replace(":", "_")
191 | 
192 |         # Let's get histogram of gradients for each layer and
193 |         # visualize them later in tensorboard
194 |         tf.summary.histogram(gradient_name_to_save, current_gradient)
195 | 
196 |     train_step = optimizer.apply_gradients(grads_and_vars=gradients)
197 | 
198 | # Now we define a function that will load the weights from VGG checkpoint
199 | # into our variables when we call it. We exclude the weights from the last layer
200 | # which is responsible for class predictions. We do this because
201 | # we will have different number of classes to predict and we can't
202 | # use the old ones as an initialization.
203 | vgg_except_fc8_weights = slim.get_variables_to_restore(exclude=['vgg_16/fc8', 'adam_vars'])
204 | 
205 | # Here we get variables that belong to the last layer of network.
206 | # As we saw, the number of classes that VGG was originally trained on
207 | # is different from ours -- in our case it is only 2 classes.
208 | vgg_fc8_weights = slim.get_variables_to_restore(include=['vgg_16/fc8'])
209 | 
210 | adam_optimizer_variables = slim.get_variables_to_restore(include=['adam_vars'])
211 | 
212 | # Add summary op for the loss -- to be able to see it in
213 | # tensorboard.
214 | tf.summary.scalar('cross_entropy_loss', cross_entropy_sum)
215 | 
216 | # Put all summary ops into one op. Produces string when
217 | # you run it.
218 | merged_summary_op = tf.summary.merge_all()
219 | 
220 | # Create the summary writer -- to write all the logs
221 | # into a specified file. This file can be later read
222 | # by tensorboard.
223 | summary_string_writer = tf.summary.FileWriter(log_folder)
224 | 
225 | # Create the log folder if doesn't exist yet
226 | if not os.path.exists(log_folder):
227 |     os.makedirs(log_folder)
228 | 
229 | # Create an OP that performs the initialization of
230 | # values of variables to the values from VGG.
231 | read_vgg_weights_except_fc8_func = slim.assign_from_checkpoint_fn(
232 |     vgg_checkpoint_path,
233 |     vgg_except_fc8_weights)
234 | 
235 | # Initializer for new fc8 weights -- for two classes.
236 | vgg_fc8_weights_initializer = tf.variables_initializer(vgg_fc8_weights)
237 | 
238 | # Initializer for adam variables
239 | optimization_variables_initializer = tf.variables_initializer(adam_optimizer_variables)
240 | 
241 | with tf.Session() as sess:
242 |     # Run the initializers.
243 |     read_vgg_weights_except_fc8_func(sess)
244 |     sess.run(vgg_fc8_weights_initializer)
245 |     sess.run(optimization_variables_initializer)
246 | 
247 |     train_image, train_annotation = sess.run([image_tensor, annotation_tensor],
248 |                                              feed_dict=feed_dict_to_use)
249 | 
250 |     f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
251 |     ax1.imshow(train_image)
252 |     ax1.set_title('Input image')
253 |     probability_graph = ax2.imshow(np.dstack((train_annotation,) * 3) * 100)
254 |     ax2.set_title('Input Ground-Truth Annotation')
255 |     plt.show()
256 | 
257 |     # Let's perform 10 interations
258 |     for i in range(10):
259 |         print("Starting interaction - " + str(i))
260 |         loss, summary_string = sess.run([cross_entropy_sum, merged_summary_op],
261 |                                         feed_dict=feed_dict_to_use)
262 | 
263 |         sess.run(train_step, feed_dict=feed_dict_to_use)
264 | 
265 |         pred_np, probabilities_np = sess.run([pred, probabilities],
266 |                                              feed_dict=feed_dict_to_use)
267 | 
268 |         summary_string_writer.add_summary(summary_string, i)
269 | 
270 |         cmap = plt.get_cmap('bwr')
271 | 
272 |         # f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
273 |         # ax1.imshow(np.uint8(pred_np.squeeze() != 1), vmax=1.5, vmin=-0.4, cmap=cmap)
274 |         # ax1.set_title('Argmax. Iteration # ' + str(i))
275 |         # probability_graph = ax2.imshow(probabilities_np.squeeze()[:, :, 0])
276 |         # ax2.set_title('Probability of the Class. Iteration # ' + str(i))
277 |         #
278 |         # plt.colorbar(probability_graph)
279 |         # plt.show()
280 | 
281 |         print("Current Loss: " + str(loss))
282 | 
283 |     feed_dict_to_use[is_training_placeholder] = False
284 | 
285 |     final_predictions, final_probabilities, final_loss = sess.run([pred,
286 |                                                                    probabilities,
287 |                                                                    cross_entropy_sum],
288 |                                                                   feed_dict=feed_dict_to_use)
289 | 
290 |     f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
291 | 
292 |     ax1.imshow(np.uint8(final_predictions.squeeze() != 1),
293 |                vmax=1.5,
294 |                vmin=-0.4,
295 |                cmap=cmap)
296 | 
297 |     ax1.set_title('Final Argmax')
298 | 
299 |     probability_graph = ax2.imshow(final_probabilities.squeeze()[:, :, 0])
300 |     ax2.set_title('Final Probability of the Class')
301 |     plt.colorbar(probability_graph)
302 | 
303 |     plt.show()
304 | 
305 |     print("Final Loss: " + str(final_loss))
306 | 
307 | summary_string_writer.close()
308 | 
309 | 
310 | image = train_image
311 | 
312 | softmax = final_probabilities.squeeze()
313 | 
314 | 
315 | softmax = softmax.transpose((2, 0, 1))
316 | 
317 | # The input should be the negative of the logarithm of probability values
318 | # Look up the definition of the softmax_to_unary for more information
319 | unary = softmax_to_unary(softmax)
320 | 
321 | # The inputs should be C-continious -- we are using Cython wrapper
322 | unary = np.ascontiguousarray(unary)
323 | 
324 | d = dcrf.DenseCRF(image.shape[0] * image.shape[1], 2)
325 | 
326 | d.setUnaryEnergy(unary)
327 | 
328 | # This potential penalizes small pieces of segmentation that are
329 | # spatially isolated -- enforces more spatially consistent segmentations
330 | feats = create_pairwise_gaussian(sdims=(10, 10), shape=image.shape[:2])
331 | 
332 | d.addPairwiseEnergy(feats, compat=3,
333 |                     kernel=dcrf.DIAG_KERNEL,
334 |                     normalization=dcrf.NORMALIZE_SYMMETRIC)
335 | 
336 | # This creates the color-dependent features --
337 | # because the segmentation that we get from CNN are too coarse
338 | # and we can use local color features to refine them
339 | feats = create_pairwise_bilateral(sdims=(50, 50), schan=(20, 20, 20),
340 |                                    img=image, chdim=2)
341 | 
342 | d.addPairwiseEnergy(feats, compat=10,
343 |                      kernel=dcrf.DIAG_KERNEL,
344 |                      normalization=dcrf.NORMALIZE_SYMMETRIC)
345 | Q = d.inference(5)
346 | 
347 | res = np.argmax(Q, axis=0).reshape((image.shape[0], image.shape[1]))
348 | 
349 | cmap = plt.get_cmap('bwr')
350 | 
351 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
352 | ax1.imshow(res, vmax=1.5, vmin=-0.4, cmap=cmap)
353 | ax1.set_title('Segmentation with CRF post-processing')
354 | probability_graph = ax2.imshow(np.dstack((train_annotation,)*3)*100)
355 | ax2.set_title('Ground-Truth Annotation')
356 | plt.show()
357 | 
358 | 
359 | 
360 | 
361 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CRF-image-segmentation
 2 | 
 3 | 1. Install tensorflow, tf-slim and other dependencies required by the code (Ideally in a virtual environment).
 4 | 2. Download VGG model. http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
 5 | 3. Untar the model and then update the checkpoints_dir variable as folder containing the extracted vgg model
 6 | 4. Update log folder variable log_folder
 7 | 5. Lines 272 - 279 have been temporarily commented to prevent plots being showed after each iteration. Plots holds up further execution till the plot window is closed. 
 8 | 6. The input image size should be 480x352. All images must be resized to this dimension before being fed to the algorithm.
 9 | 7. Replace vaiable named 'processed_probabilities' with 'softmax'(Already done in the code).
10 | 


--------------------------------------------------------------------------------
/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat.jpg


--------------------------------------------------------------------------------
/cat_annotation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat_annotation.png


--------------------------------------------------------------------------------
/obj_detection/README.md:
--------------------------------------------------------------------------------
 1 | # OBJ-Detection
 2 | 
 3 | We are using the object detection api provided by tensorflow to create the solution.
 4 | 
 5 | 1. Install tensorflow and then clone tensorflow models libraries - https://github.com/tensorflow/models.
 6 | 2. Follow installation steps documented in the models/object_detection.
 7 | 3. Copy annotations and images folder to models/object_detection. Images contain the actual images while annotations contain xmls specifying bounding boxes for each image.
 8 | 4. Copy create_fashion_tf_record.py to models/object_detection.
 9 | 5. Copy fashion_label_map.pbtxt to models/object_detection/data
10 | 6. Execute command from object_detection folder "python create_fashion_tf_record.py     --label_map_path=data/fashion_label_map.pbtxt     --data_dir=`pwd`     --output_dir=`pwd`". 
11 | This will create two files fash_train.record and fash_val.record. These files will be fed to the tensorflow network. 
12 | 7. Copy fash_train.record and fash_val.record to data folder.
13 | 8. Create folder called fash-model in models.
14 | 9. Copy faster_rcnn_resnet101_fash.config to fash-model.
15 | 10. Create folders named train and eval inside fash-model folder.
16 | 11. Download othe COCO-pretrained Faster R-CNN with Resnet-101 model. Unzip the contents of the folder and copy the model.ckpt* files into fash-model folder. (http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz)
17 | 12. Start training by executing the folllowing command from object_detection folder - "python train.py     --logtostderr     --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config  --train_dir=models/fash-model/train". 
18 | Training goes on indefinitely till its killed by the user.
19 | 13. Execute "tensorboard --logdir=models/fash-model" to see and visualize the training and eval phases.
20 | 14. For evaluation execute the following command from object_detection folder - "python eval.py     --logtostderr     --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config     --checkpoint_dir=models/fash-model/train     --eval_dir=models/fash-model/eval". 
21 | This command will periodically fetch the latest checkoint from models/fash-model/train and perform evalutions. Take images tab in tensorboard ui to see evaluation results. 
22 | 
23 | Since the dataset is very small we can see some noise in the evalution results. Even then the correct catgories were detected in each image with the higest confidence.
24 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/test.txt:
--------------------------------------------------------------------------------
 1 | suit-1 3
 2 | suit-2 3
 3 | suit-3 3
 4 | suit-4 3
 5 | suit-5 3
 6 | skirt-1 2
 7 | skirt-2 2
 8 | skirt-3 2
 9 | skirt-4 2
10 | skirt-5 2
11 | shirt-1 1
12 | shirt-2 1
13 | shirt-3 1
14 | shirt-4 1
15 | shirt-5 1
16 | 
17 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/trainval.txt:
--------------------------------------------------------------------------------
 1 | suit-1
 2 | suit-3
 3 | suit-4
 4 | suit-5
 5 | skirt-2
 6 | skirt-4
 7 | skirt-5
 8 | shirt-1
 9 | shirt-3
10 | shirt-4
11 | shirt-5
12 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-1.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>shirt-1.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>320</width>
11 | 		<height>400</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>shirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>46</xmin>
22 | 			<ymin>105</ymin>
23 | 			<xmax>293</xmax>
24 | 			<ymax>391</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-3.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>shirt-3.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>220</width>
11 | 		<height>258</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>shirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>40</xmin>
22 | 			<ymin>50</ymin>
23 | 			<xmax>187</xmax>
24 | 			<ymax>240</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-4.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>shirt-4.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>320</width>
11 | 		<height>400</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>shirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>19</xmin>
22 | 			<ymin>111</ymin>
23 | 			<xmax>255</xmax>
24 | 			<ymax>379</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-5.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>shirt-5.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>234</width>
11 | 		<height>312</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>shirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>29</xmin>
22 | 			<ymin>69</ymin>
23 | 			<xmax>210</xmax>
24 | 			<ymax>270</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-2.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>skirt-2.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>385</width>
11 | 		<height>500</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>skirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>50</xmin>
22 | 			<ymin>76</ymin>
23 | 			<xmax>258</xmax>
24 | 			<ymax>412</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-4.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>skirt-4.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>750</width>
11 | 		<height>1154</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>skirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>180</xmin>
22 | 			<ymin>402</ymin>
23 | 			<xmax>580</xmax>
24 | 			<ymax>996</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-5.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>skirt-5.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>328</width>
11 | 		<height>350</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>skirt</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>97</xmin>
22 | 			<ymin>121</ymin>
23 | 			<xmax>218</xmax>
24 | 			<ymax>222</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-1.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>suit-1.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>290</width>
11 | 		<height>370</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>suit</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>78</xmin>
22 | 			<ymin>40</ymin>
23 | 			<xmax>210</xmax>
24 | 			<ymax>200</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-3.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>suit-3.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>322</width>
11 | 		<height>545</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>suit</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>7</xmin>
22 | 			<ymin>109</ymin>
23 | 			<xmax>321</xmax>
24 | 			<ymax>450</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-4.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>suit-4.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>361</width>
11 | 		<height>452</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>suit</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>34</xmin>
22 | 			<ymin>74</ymin>
23 | 			<xmax>318</xmax>
24 | 			<ymax>391</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-5.xml:
--------------------------------------------------------------------------------
 1 | <annotation>
 2 | 	<folder>OXIIIT</folder>
 3 | 	<filename>suit-5.jpg</filename>
 4 | 	<source>
 5 | 		<database>OXFORD-IIIT Pet Dataset</database>
 6 | 		<annotation>OXIIIT</annotation>
 7 | 		<image>flickr</image>
 8 | 	</source>
 9 | 	<size>
10 | 		<width>361</width>
11 | 		<height>452</height>
12 | 		<depth>3</depth>
13 | 	</size>
14 | 	<segmented>0</segmented>
15 | 	<object>
16 | 		<name>suit</name>
17 | 		<pose>Frontal</pose>
18 | 		<truncated>0</truncated>
19 | 		<occluded>0</occluded>
20 | 		<bndbox>
21 | 			<xmin>59</xmin>
22 | 			<ymin>109</ymin>
23 | 			<xmax>340</xmax>
24 | 			<ymax>424</ymax>
25 | 		</bndbox>
26 | 		<difficult>0</difficult>
27 | 	</object>
28 | </annotation>
29 | 


--------------------------------------------------------------------------------
/obj_detection/create_fashion_tf_record.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | r"""Convert the Oxford pet dataset to TFRecord for object_detection.
 17 | 
 18 | See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
 19 |      Cats and Dogs
 20 |      IEEE Conference on Computer Vision and Pattern Recognition, 2012
 21 |      http://www.robots.ox.ac.uk/~vgg/data/pets/
 22 | 
 23 | Example usage:
 24 |     ./create_pet_tf_record --data_dir=/home/user/pet \
 25 |         --output_dir=/home/user/pet/output
 26 | """
 27 | 
 28 | import hashlib
 29 | import io
 30 | import logging
 31 | import os
 32 | import random
 33 | import re
 34 | 
 35 | from lxml import etree
 36 | import PIL.Image
 37 | import tensorflow as tf
 38 | 
 39 | from object_detection.utils import dataset_util
 40 | from object_detection.utils import label_map_util
 41 | 
 42 | flags = tf.app.flags
 43 | flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
 44 | flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
 45 | flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
 46 |                     'Path to label map proto')
 47 | FLAGS = flags.FLAGS
 48 | 
 49 | 
 50 | def get_class_name_from_filename(file_name):
 51 |   """Gets the class name from a file.
 52 | 
 53 |   Args:
 54 |     file_name: The file name to get the class name from.
 55 |                ie. "american_pit_bull_terrier_105.jpg"
 56 | 
 57 |   Returns:
 58 |     example: The converted tf.Example.
 59 |   """
 60 |   match = re.match(r'([A-Za-z_]+)(-[0-9]+\.jpg)', file_name, re.I)
 61 |   return match.groups()[0]
 62 | 
 63 | 
 64 | def dict_to_tf_example(data,
 65 |                        label_map_dict,
 66 |                        image_subdirectory,
 67 |                        ignore_difficult_instances=False):
 68 |   """Convert XML derived dict to tf.Example proto.
 69 | 
 70 |   Notice that this function normalizes the bounding box coordinates provided
 71 |   by the raw data.
 72 | 
 73 |   Args:
 74 |     data: dict holding PASCAL XML fields for a single image (obtained by
 75 |       running dataset_util.recursive_parse_xml_to_dict)
 76 |     label_map_dict: A map from string label names to integers ids.
 77 |     image_subdirectory: String specifying subdirectory within the
 78 |       Pascal dataset directory holding the actual image data.
 79 |     ignore_difficult_instances: Whether to skip difficult instances in the
 80 |       dataset  (default: False).
 81 | 
 82 |   Returns:
 83 |     example: The converted tf.Example.
 84 | 
 85 |   Raises:
 86 |     ValueError: if the image pointed to by data['filename'] is not a valid JPEG
 87 |   """
 88 |   img_path = os.path.join(image_subdirectory, data['filename'])
 89 |   with tf.gfile.GFile(img_path, 'rb') as fid:
 90 |     encoded_jpg = fid.read()
 91 |   encoded_jpg_io = io.BytesIO(encoded_jpg)
 92 |   image = PIL.Image.open(encoded_jpg_io)
 93 |   if image.format != 'JPEG':
 94 |     raise ValueError('Image format not JPEG')
 95 |   key = hashlib.sha256(encoded_jpg).hexdigest()
 96 | 
 97 |   width = int(data['size']['width'])
 98 |   height = int(data['size']['height'])
 99 | 
100 |   xmin = []
101 |   ymin = []
102 |   xmax = []
103 |   ymax = []
104 |   classes = []
105 |   classes_text = []
106 |   truncated = []
107 |   poses = []
108 |   difficult_obj = []
109 |   for obj in data['object']:
110 |     difficult = bool(int(obj['difficult']))
111 |     if ignore_difficult_instances and difficult:
112 |       continue
113 | 
114 |     difficult_obj.append(int(difficult))
115 | 
116 |     xmin.append(float(obj['bndbox']['xmin']) / width)
117 |     ymin.append(float(obj['bndbox']['ymin']) / height)
118 |     xmax.append(float(obj['bndbox']['xmax']) / width)
119 |     ymax.append(float(obj['bndbox']['ymax']) / height)
120 |     class_name = get_class_name_from_filename(data['filename'])
121 |     classes_text.append(class_name.encode('utf8'))
122 |     classes.append(label_map_dict[class_name])
123 |     truncated.append(int(obj['truncated']))
124 |     poses.append(obj['pose'].encode('utf8'))
125 | 
126 |   example = tf.train.Example(features=tf.train.Features(feature={
127 |       'image/height': dataset_util.int64_feature(height),
128 |       'image/width': dataset_util.int64_feature(width),
129 |       'image/filename': dataset_util.bytes_feature(
130 |           data['filename'].encode('utf8')),
131 |       'image/source_id': dataset_util.bytes_feature(
132 |           data['filename'].encode('utf8')),
133 |       'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
134 |       'image/encoded': dataset_util.bytes_feature(encoded_jpg),
135 |       'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
136 |       'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
137 |       'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
138 |       'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
139 |       'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
140 |       'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
141 |       'image/object/class/label': dataset_util.int64_list_feature(classes),
142 |       'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
143 |       'image/object/truncated': dataset_util.int64_list_feature(truncated),
144 |       'image/object/view': dataset_util.bytes_list_feature(poses),
145 |   }))
146 |   return example
147 | 
148 | 
149 | def create_tf_record(output_filename,
150 |                      label_map_dict,
151 |                      annotations_dir,
152 |                      image_dir,
153 |                      examples):
154 |   """Creates a TFRecord file from examples.
155 | 
156 |   Args:
157 |     output_filename: Path to where output file is saved.
158 |     label_map_dict: The label map dictionary.
159 |     annotations_dir: Directory where annotation files are stored.
160 |     image_dir: Directory where image files are stored.
161 |     examples: Examples to parse and save to tf record.
162 |   """
163 |   writer = tf.python_io.TFRecordWriter(output_filename)
164 |   for idx, example in enumerate(examples):
165 |     if idx % 100 == 0:
166 |       logging.info('On image %d of %d', idx, len(examples))
167 |     path = os.path.join(annotations_dir, 'xmls', example + '.xml')
168 | 
169 |     if not os.path.exists(path):
170 |       logging.warning('Could not find %s, ignoring example.', path)
171 |       continue
172 |     with tf.gfile.GFile(path, 'r') as fid:
173 |       xml_str = fid.read()
174 |     xml = etree.fromstring(xml_str)
175 |     data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
176 | 
177 |     tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
178 |     writer.write(tf_example.SerializeToString())
179 | 
180 |   writer.close()
181 | 
182 | 
183 | # TODO: Add test for pet/PASCAL main files.
184 | def main(_):
185 |   data_dir = FLAGS.data_dir
186 |   label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
187 | 
188 |   logging.info('Reading from Fashion dataset.')
189 |   image_dir = os.path.join(data_dir, 'images')
190 |   annotations_dir = os.path.join(data_dir, 'annotations')
191 |   examples_path = os.path.join(annotations_dir, 'trainval.txt')
192 |   examples_list = dataset_util.read_examples_list(examples_path)
193 | 
194 |   # Test images are not included in the downloaded data set, so we shall perform
195 |   # our own split.
196 |   random.seed(42)
197 |   random.shuffle(examples_list)
198 |   num_examples = len(examples_list)
199 |   num_train = int(0.7 * num_examples)
200 |   train_examples = examples_list[:num_train]
201 |   val_examples = examples_list[num_train:]
202 |   logging.info('%d training and %d validation examples.',
203 |                len(train_examples), len(val_examples))
204 | 
205 |   train_output_path = os.path.join(FLAGS.output_dir, 'fash_train.record')
206 |   val_output_path = os.path.join(FLAGS.output_dir, 'fash_val.record')
207 |   create_tf_record(train_output_path, label_map_dict, annotations_dir,
208 |                    image_dir, train_examples)
209 |   create_tf_record(val_output_path, label_map_dict, annotations_dir,
210 |                    image_dir, val_examples)
211 | 
212 | if __name__ == '__main__':
213 |   tf.app.run()
214 | 


--------------------------------------------------------------------------------
/obj_detection/evaluation-results/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/1.png


--------------------------------------------------------------------------------
/obj_detection/evaluation-results/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/2.png


--------------------------------------------------------------------------------
/obj_detection/evaluation-results/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/3.png


--------------------------------------------------------------------------------
/obj_detection/fashion_label_map.pbtxt:
--------------------------------------------------------------------------------
 1 | item {
 2 |   id: 0
 3 |   name: 'none_of_the_above'
 4 | }
 5 | 
 6 | item {
 7 |   id: 1
 8 |   name: 'shirt'
 9 | }
10 | 
11 | item {
12 |   id: 2
13 |   name: 'skirt'
14 | }
15 | 
16 | item {
17 |   id: 3
18 |   name: 'suit'
19 | }
20 | 


--------------------------------------------------------------------------------
/obj_detection/faster_rcnn_resnet101_fash.config:
--------------------------------------------------------------------------------
  1 | # Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
  2 | # Users should configure the fine_tune_checkpoint field in the train config as
  3 | # well as the label_map_path and input_path fields in the train_input_reader and
  4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
  5 | # should be configured.
  6 | 
  7 | model {
  8 |   faster_rcnn {
  9 |     num_classes: 37
 10 |     image_resizer {
 11 |       keep_aspect_ratio_resizer {
 12 |         min_dimension: 600
 13 |         max_dimension: 1024
 14 |       }
 15 |     }
 16 |     feature_extractor {
 17 |       type: 'faster_rcnn_resnet101'
 18 |       first_stage_features_stride: 16
 19 |     }
 20 |     first_stage_anchor_generator {
 21 |       grid_anchor_generator {
 22 |         scales: [0.25, 0.5, 1.0, 2.0]
 23 |         aspect_ratios: [0.5, 1.0, 2.0]
 24 |         height_stride: 16
 25 |         width_stride: 16
 26 |       }
 27 |     }
 28 |     first_stage_box_predictor_conv_hyperparams {
 29 |       op: CONV
 30 |       regularizer {
 31 |         l2_regularizer {
 32 |           weight: 0.0
 33 |         }
 34 |       }
 35 |       initializer {
 36 |         truncated_normal_initializer {
 37 |           stddev: 0.01
 38 |         }
 39 |       }
 40 |     }
 41 |     first_stage_nms_score_threshold: 0.0
 42 |     first_stage_nms_iou_threshold: 0.7
 43 |     first_stage_max_proposals: 300
 44 |     first_stage_localization_loss_weight: 2.0
 45 |     first_stage_objectness_loss_weight: 1.0
 46 |     initial_crop_size: 14
 47 |     maxpool_kernel_size: 2
 48 |     maxpool_stride: 2
 49 |     second_stage_box_predictor {
 50 |       mask_rcnn_box_predictor {
 51 |         use_dropout: false
 52 |         dropout_keep_probability: 1.0
 53 |         fc_hyperparams {
 54 |           op: FC
 55 |           regularizer {
 56 |             l2_regularizer {
 57 |               weight: 0.0
 58 |             }
 59 |           }
 60 |           initializer {
 61 |             variance_scaling_initializer {
 62 |               factor: 1.0
 63 |               uniform: true
 64 |               mode: FAN_AVG
 65 |             }
 66 |           }
 67 |         }
 68 |       }
 69 |     }
 70 |     second_stage_post_processing {
 71 |       batch_non_max_suppression {
 72 |         score_threshold: 0.0
 73 |         iou_threshold: 0.6
 74 |         max_detections_per_class: 100
 75 |         max_total_detections: 300
 76 |       }
 77 |       score_converter: SOFTMAX
 78 |     }
 79 |     second_stage_localization_loss_weight: 2.0
 80 |     second_stage_classification_loss_weight: 1.0
 81 |   }
 82 | }
 83 | 
 84 | train_config: {
 85 |   batch_size: 1
 86 |   optimizer {
 87 |     momentum_optimizer: {
 88 |       learning_rate: {
 89 |         manual_step_learning_rate {
 90 |           initial_learning_rate: 0.0003
 91 |           schedule {
 92 |             step: 0
 93 |             learning_rate: .0003
 94 |           }
 95 |           schedule {
 96 |             step: 900000
 97 |             learning_rate: .00003
 98 |           }
 99 |           schedule {
100 |             step: 1200000
101 |             learning_rate: .000003
102 |           }
103 |         }
104 |       }
105 |       momentum_optimizer_value: 0.9
106 |     }
107 |     use_moving_average: false
108 |   }
109 |   gradient_clipping_by_norm: 10.0
110 |   fine_tune_checkpoint: "models/fash-model/model.ckpt"
111 |   from_detection_checkpoint: true
112 |   data_augmentation_options {
113 |     random_horizontal_flip {
114 |     }
115 |   }
116 | }
117 | 
118 | train_input_reader: {
119 |   tf_record_input_reader {
120 |     input_path: "data/fash_train.record"
121 |   }
122 |   label_map_path: "data/fashion_label_map.pbtxt"
123 | }
124 | 
125 | eval_config: {
126 |   num_examples: 2000
127 | }
128 | 
129 | eval_input_reader: {
130 |   tf_record_input_reader {
131 |     input_path: "data/fash_val.record"
132 |   }
133 |   label_map_path: "data/fashion_label_map.pbtxt"
134 |   shuffle: false
135 |   num_readers: 1
136 | }
137 | 


--------------------------------------------------------------------------------
/obj_detection/images/shirt-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-1.jpg


--------------------------------------------------------------------------------
/obj_detection/images/shirt-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-3.jpg


--------------------------------------------------------------------------------
/obj_detection/images/shirt-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-4.jpg


--------------------------------------------------------------------------------
/obj_detection/images/shirt-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-5.jpg


--------------------------------------------------------------------------------
/obj_detection/images/skirt-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-2.jpg


--------------------------------------------------------------------------------
/obj_detection/images/skirt-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-4.jpg


--------------------------------------------------------------------------------
/obj_detection/images/skirt-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-5.jpg


--------------------------------------------------------------------------------
/obj_detection/images/suit-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-1.jpg


--------------------------------------------------------------------------------
/obj_detection/images/suit-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-3.jpg


--------------------------------------------------------------------------------
/obj_detection/images/suit-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-4.jpg


--------------------------------------------------------------------------------
/obj_detection/images/suit-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-5.jpg


--------------------------------------------------------------------------------