├── .gitignore
├── CRFCNNImageSegmentation.py
├── README.md
├── cat.jpg
├── cat_annotation.png
└── obj_detection
├── README.md
├── annotations
├── test.txt
├── trainval.txt
└── xmls
│ ├── shirt-1.xml
│ ├── shirt-3.xml
│ ├── shirt-4.xml
│ ├── shirt-5.xml
│ ├── skirt-2.xml
│ ├── skirt-4.xml
│ ├── skirt-5.xml
│ ├── suit-1.xml
│ ├── suit-3.xml
│ ├── suit-4.xml
│ └── suit-5.xml
├── create_fashion_tf_record.py
├── evaluation-results
├── 1.png
├── 2.png
└── 3.png
├── fashion_label_map.pbtxt
├── faster_rcnn_resnet101_fash.config
└── images
├── shirt-1.jpg
├── shirt-3.jpg
├── shirt-4.jpg
├── shirt-5.jpg
├── skirt-2.jpg
├── skirt-4.jpg
├── skirt-5.jpg
├── suit-1.jpg
├── suit-3.jpg
├── suit-4.jpg
└── suit-5.jpg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/CRFCNNImageSegmentation.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import numpy as np
4 | import tensorflow as tf
5 | import os
6 | from matplotlib import pyplot as plt
7 | slim = tf.contrib.slim
8 | from nets import vgg
9 | # Load the mean pixel values and the function
10 | # that performs the subtraction from each pixel
11 | from preprocessing.vgg_preprocessing import (_mean_image_subtraction,
12 | _R_MEAN, _G_MEAN, _B_MEAN)
13 | import pydensecrf.densecrf as dcrf
14 | from pydensecrf.utils import compute_unary, create_pairwise_bilateral, \
15 | create_pairwise_gaussian, softmax_to_unary
16 |
17 | def get_kernel_size(factor):
18 | """
19 | Find the kernel size given the desired factor of upsampling.
20 | """
21 | return 2 * factor - factor % 2
22 |
23 |
24 | def upsample_filt(size):
25 | """
26 | Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
27 | """
28 | factor = (size + 1) // 2
29 | if size % 2 == 1:
30 | center = factor - 1
31 | else:
32 | center = factor - 0.5
33 | og = np.ogrid[:size, :size]
34 | return (1 - abs(og[0] - center) / factor) * \
35 | (1 - abs(og[1] - center) / factor)
36 |
37 |
38 | def bilinear_upsample_weights(factor, number_of_classes):
39 | """
40 | Create weights matrix for transposed convolution with bilinear filter
41 | initialization.
42 | """
43 |
44 | filter_size = get_kernel_size(factor)
45 |
46 | weights = np.zeros((filter_size,
47 | filter_size,
48 | number_of_classes,
49 | number_of_classes), dtype=np.float32)
50 |
51 | upsample_kernel = upsample_filt(filter_size)
52 |
53 | for i in xrange(number_of_classes):
54 | weights[:, :, i, i] = upsample_kernel
55 |
56 | return weights
57 |
58 |
59 |
60 |
61 | os.environ["CUDA_VISIBLE_DEVICES"] = '1'
62 | # sys.path.append("/home/dpakhom1/workspace/my_models/slim/")
63 | checkpoints_dir = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/vgg16'
64 |
65 | image_filename = 'cat.jpg'
66 | annotation_filename = 'cat_annotation.png'
67 | #
68 | # image_filename = 'dog.png'
69 | # annotation_filename = 'dog_black.png'
70 |
71 | image_filename_placeholder = tf.placeholder(tf.string)
72 | annotation_filename_placeholder = tf.placeholder(tf.string)
73 | is_training_placeholder = tf.placeholder(tf.bool)
74 |
75 | feed_dict_to_use = {image_filename_placeholder: image_filename,
76 | annotation_filename_placeholder: annotation_filename,
77 | is_training_placeholder: True}
78 |
79 | image_tensor = tf.read_file(image_filename_placeholder)
80 | annotation_tensor = tf.read_file(annotation_filename_placeholder)
81 |
82 | image_tensor = tf.image.decode_jpeg(image_tensor, channels=3)
83 | annotation_tensor = tf.image.decode_png(annotation_tensor, channels=1)
84 |
85 | # Get ones for each class instead of a number -- we need that
86 | # for cross-entropy loss later on. Sometimes the groundtruth
87 | # masks have values other than 1 and 0.
88 | class_labels_tensor = tf.equal(annotation_tensor, 1)
89 | background_labels_tensor = tf.not_equal(annotation_tensor, 1)
90 |
91 | # Convert the boolean values into floats -- so that
92 | # computations in cross-entropy loss is correct
93 | bit_mask_class = tf.to_float(class_labels_tensor)
94 | bit_mask_background = tf.to_float(background_labels_tensor)
95 |
96 | combined_mask = tf.concat(axis=2, values=[bit_mask_class,
97 | bit_mask_background])
98 |
99 | # Lets reshape our input so that it becomes suitable for
100 | # tf.softmax_cross_entropy_with_logits with [batch_size, num_classes]
101 | flat_labels = tf.reshape(tensor=combined_mask, shape=(-1, 2))
102 |
103 |
104 | fig_size = [15, 4]
105 | plt.rcParams["figure.figsize"] = fig_size
106 |
107 |
108 | upsample_factor = 32
109 | number_of_classes = 2
110 | log_folder = '/home/nidhin/Confidential/blueprints/experimental/python/unifiedapp-poc/logs'
111 |
112 | vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt')
113 |
114 | # Convert image to float32 before subtracting the
115 | # mean pixel value
116 | image_float = tf.to_float(image_tensor, name='ToFloat')
117 |
118 | # Subtract the mean pixel value from each pixel
119 | mean_centered_image = _mean_image_subtraction(image_float,
120 | [_R_MEAN, _G_MEAN, _B_MEAN])
121 |
122 | processed_images = tf.expand_dims(mean_centered_image, 0)
123 |
124 | upsample_filter_np = bilinear_upsample_weights(upsample_factor,
125 | number_of_classes)
126 |
127 | upsample_filter_tensor = tf.constant(upsample_filter_np)
128 |
129 | # Define the model that we want to use -- specify to use only two classes at the last layer
130 | with slim.arg_scope(vgg.vgg_arg_scope()):
131 | logits, end_points = vgg.vgg_16(processed_images,
132 | num_classes=2,
133 | is_training=is_training_placeholder,
134 | spatial_squeeze=False,
135 | fc_conv_padding='SAME')
136 |
137 | downsampled_logits_shape = tf.shape(logits)
138 |
139 | # Calculate the ouput size of the upsampled tensor
140 | upsampled_logits_shape = tf.stack([
141 | downsampled_logits_shape[0],
142 | downsampled_logits_shape[1] * upsample_factor,
143 | downsampled_logits_shape[2] * upsample_factor,
144 | downsampled_logits_shape[3]
145 | ])
146 |
147 | # Perform the upsampling
148 | upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor,
149 | output_shape=upsampled_logits_shape,
150 | strides=[1, upsample_factor, upsample_factor, 1])
151 |
152 | # Flatten the predictions, so that we can compute cross-entropy for
153 | # each pixel and get a sum of cross-entropies.
154 | flat_logits = tf.reshape(tensor=upsampled_logits, shape=(-1, number_of_classes))
155 |
156 | cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=flat_logits,
157 | labels=flat_labels)
158 |
159 | cross_entropy_sum = tf.reduce_sum(cross_entropies)
160 |
161 | # Tensor to get the final prediction for each pixel -- pay
162 | # attention that we don't need softmax in this case because
163 | # we only need the final decision. If we also need the respective
164 | # probabilities we will have to apply softmax.
165 | pred = tf.argmax(upsampled_logits, dimension=3)
166 |
167 | probabilities = tf.nn.softmax(upsampled_logits)
168 |
169 | # Here we define an optimizer and put all the variables
170 | # that will be created under a namespace of 'adam_vars'.
171 | # This is done so that we can easily access them later.
172 | # Those variables are used by adam optimizer and are not
173 | # related to variables of the vgg model.
174 |
175 | # We also retrieve gradient Tensors for each of our variables
176 | # This way we can later visualize them in tensorboard.
177 | # optimizer.compute_gradients and optimizer.apply_gradients
178 | # is equivalent to running:
179 | # train_step = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cross_entropy_sum)
180 | with tf.variable_scope("adam_vars"):
181 | optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
182 | gradients = optimizer.compute_gradients(loss=cross_entropy_sum)
183 |
184 | for grad_var_pair in gradients:
185 | current_variable = grad_var_pair[1]
186 | current_gradient = grad_var_pair[0]
187 |
188 | # Relace some characters from the original variable name
189 | # tensorboard doesn't accept ':' symbol
190 | gradient_name_to_save = current_variable.name.replace(":", "_")
191 |
192 | # Let's get histogram of gradients for each layer and
193 | # visualize them later in tensorboard
194 | tf.summary.histogram(gradient_name_to_save, current_gradient)
195 |
196 | train_step = optimizer.apply_gradients(grads_and_vars=gradients)
197 |
198 | # Now we define a function that will load the weights from VGG checkpoint
199 | # into our variables when we call it. We exclude the weights from the last layer
200 | # which is responsible for class predictions. We do this because
201 | # we will have different number of classes to predict and we can't
202 | # use the old ones as an initialization.
203 | vgg_except_fc8_weights = slim.get_variables_to_restore(exclude=['vgg_16/fc8', 'adam_vars'])
204 |
205 | # Here we get variables that belong to the last layer of network.
206 | # As we saw, the number of classes that VGG was originally trained on
207 | # is different from ours -- in our case it is only 2 classes.
208 | vgg_fc8_weights = slim.get_variables_to_restore(include=['vgg_16/fc8'])
209 |
210 | adam_optimizer_variables = slim.get_variables_to_restore(include=['adam_vars'])
211 |
212 | # Add summary op for the loss -- to be able to see it in
213 | # tensorboard.
214 | tf.summary.scalar('cross_entropy_loss', cross_entropy_sum)
215 |
216 | # Put all summary ops into one op. Produces string when
217 | # you run it.
218 | merged_summary_op = tf.summary.merge_all()
219 |
220 | # Create the summary writer -- to write all the logs
221 | # into a specified file. This file can be later read
222 | # by tensorboard.
223 | summary_string_writer = tf.summary.FileWriter(log_folder)
224 |
225 | # Create the log folder if doesn't exist yet
226 | if not os.path.exists(log_folder):
227 | os.makedirs(log_folder)
228 |
229 | # Create an OP that performs the initialization of
230 | # values of variables to the values from VGG.
231 | read_vgg_weights_except_fc8_func = slim.assign_from_checkpoint_fn(
232 | vgg_checkpoint_path,
233 | vgg_except_fc8_weights)
234 |
235 | # Initializer for new fc8 weights -- for two classes.
236 | vgg_fc8_weights_initializer = tf.variables_initializer(vgg_fc8_weights)
237 |
238 | # Initializer for adam variables
239 | optimization_variables_initializer = tf.variables_initializer(adam_optimizer_variables)
240 |
241 | with tf.Session() as sess:
242 | # Run the initializers.
243 | read_vgg_weights_except_fc8_func(sess)
244 | sess.run(vgg_fc8_weights_initializer)
245 | sess.run(optimization_variables_initializer)
246 |
247 | train_image, train_annotation = sess.run([image_tensor, annotation_tensor],
248 | feed_dict=feed_dict_to_use)
249 |
250 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
251 | ax1.imshow(train_image)
252 | ax1.set_title('Input image')
253 | probability_graph = ax2.imshow(np.dstack((train_annotation,) * 3) * 100)
254 | ax2.set_title('Input Ground-Truth Annotation')
255 | plt.show()
256 |
257 | # Let's perform 10 interations
258 | for i in range(10):
259 | print("Starting interaction - " + str(i))
260 | loss, summary_string = sess.run([cross_entropy_sum, merged_summary_op],
261 | feed_dict=feed_dict_to_use)
262 |
263 | sess.run(train_step, feed_dict=feed_dict_to_use)
264 |
265 | pred_np, probabilities_np = sess.run([pred, probabilities],
266 | feed_dict=feed_dict_to_use)
267 |
268 | summary_string_writer.add_summary(summary_string, i)
269 |
270 | cmap = plt.get_cmap('bwr')
271 |
272 | # f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
273 | # ax1.imshow(np.uint8(pred_np.squeeze() != 1), vmax=1.5, vmin=-0.4, cmap=cmap)
274 | # ax1.set_title('Argmax. Iteration # ' + str(i))
275 | # probability_graph = ax2.imshow(probabilities_np.squeeze()[:, :, 0])
276 | # ax2.set_title('Probability of the Class. Iteration # ' + str(i))
277 | #
278 | # plt.colorbar(probability_graph)
279 | # plt.show()
280 |
281 | print("Current Loss: " + str(loss))
282 |
283 | feed_dict_to_use[is_training_placeholder] = False
284 |
285 | final_predictions, final_probabilities, final_loss = sess.run([pred,
286 | probabilities,
287 | cross_entropy_sum],
288 | feed_dict=feed_dict_to_use)
289 |
290 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
291 |
292 | ax1.imshow(np.uint8(final_predictions.squeeze() != 1),
293 | vmax=1.5,
294 | vmin=-0.4,
295 | cmap=cmap)
296 |
297 | ax1.set_title('Final Argmax')
298 |
299 | probability_graph = ax2.imshow(final_probabilities.squeeze()[:, :, 0])
300 | ax2.set_title('Final Probability of the Class')
301 | plt.colorbar(probability_graph)
302 |
303 | plt.show()
304 |
305 | print("Final Loss: " + str(final_loss))
306 |
307 | summary_string_writer.close()
308 |
309 |
310 | image = train_image
311 |
312 | softmax = final_probabilities.squeeze()
313 |
314 |
315 | softmax = softmax.transpose((2, 0, 1))
316 |
317 | # The input should be the negative of the logarithm of probability values
318 | # Look up the definition of the softmax_to_unary for more information
319 | unary = softmax_to_unary(softmax)
320 |
321 | # The inputs should be C-continious -- we are using Cython wrapper
322 | unary = np.ascontiguousarray(unary)
323 |
324 | d = dcrf.DenseCRF(image.shape[0] * image.shape[1], 2)
325 |
326 | d.setUnaryEnergy(unary)
327 |
328 | # This potential penalizes small pieces of segmentation that are
329 | # spatially isolated -- enforces more spatially consistent segmentations
330 | feats = create_pairwise_gaussian(sdims=(10, 10), shape=image.shape[:2])
331 |
332 | d.addPairwiseEnergy(feats, compat=3,
333 | kernel=dcrf.DIAG_KERNEL,
334 | normalization=dcrf.NORMALIZE_SYMMETRIC)
335 |
336 | # This creates the color-dependent features --
337 | # because the segmentation that we get from CNN are too coarse
338 | # and we can use local color features to refine them
339 | feats = create_pairwise_bilateral(sdims=(50, 50), schan=(20, 20, 20),
340 | img=image, chdim=2)
341 |
342 | d.addPairwiseEnergy(feats, compat=10,
343 | kernel=dcrf.DIAG_KERNEL,
344 | normalization=dcrf.NORMALIZE_SYMMETRIC)
345 | Q = d.inference(5)
346 |
347 | res = np.argmax(Q, axis=0).reshape((image.shape[0], image.shape[1]))
348 |
349 | cmap = plt.get_cmap('bwr')
350 |
351 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
352 | ax1.imshow(res, vmax=1.5, vmin=-0.4, cmap=cmap)
353 | ax1.set_title('Segmentation with CRF post-processing')
354 | probability_graph = ax2.imshow(np.dstack((train_annotation,)*3)*100)
355 | ax2.set_title('Ground-Truth Annotation')
356 | plt.show()
357 |
358 |
359 |
360 |
361 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CRF-image-segmentation
2 |
3 | 1. Install tensorflow, tf-slim and other dependencies required by the code (Ideally in a virtual environment).
4 | 2. Download VGG model. http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
5 | 3. Untar the model and then update the checkpoints_dir variable as folder containing the extracted vgg model
6 | 4. Update log folder variable log_folder
7 | 5. Lines 272 - 279 have been temporarily commented to prevent plots being showed after each iteration. Plots holds up further execution till the plot window is closed.
8 | 6. The input image size should be 480x352. All images must be resized to this dimension before being fed to the algorithm.
9 | 7. Replace vaiable named 'processed_probabilities' with 'softmax'(Already done in the code).
10 |
--------------------------------------------------------------------------------
/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat.jpg
--------------------------------------------------------------------------------
/cat_annotation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/cat_annotation.png
--------------------------------------------------------------------------------
/obj_detection/README.md:
--------------------------------------------------------------------------------
1 | # OBJ-Detection
2 |
3 | We are using the object detection api provided by tensorflow to create the solution.
4 |
5 | 1. Install tensorflow and then clone tensorflow models libraries - https://github.com/tensorflow/models.
6 | 2. Follow installation steps documented in the models/object_detection.
7 | 3. Copy annotations and images folder to models/object_detection. Images contain the actual images while annotations contain xmls specifying bounding boxes for each image.
8 | 4. Copy create_fashion_tf_record.py to models/object_detection.
9 | 5. Copy fashion_label_map.pbtxt to models/object_detection/data
10 | 6. Execute command from object_detection folder "python create_fashion_tf_record.py --label_map_path=data/fashion_label_map.pbtxt --data_dir=`pwd` --output_dir=`pwd`".
11 | This will create two files fash_train.record and fash_val.record. These files will be fed to the tensorflow network.
12 | 7. Copy fash_train.record and fash_val.record to data folder.
13 | 8. Create folder called fash-model in models.
14 | 9. Copy faster_rcnn_resnet101_fash.config to fash-model.
15 | 10. Create folders named train and eval inside fash-model folder.
16 | 11. Download othe COCO-pretrained Faster R-CNN with Resnet-101 model. Unzip the contents of the folder and copy the model.ckpt* files into fash-model folder. (http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz)
17 | 12. Start training by executing the folllowing command from object_detection folder - "python train.py --logtostderr --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config --train_dir=models/fash-model/train".
18 | Training goes on indefinitely till its killed by the user.
19 | 13. Execute "tensorboard --logdir=models/fash-model" to see and visualize the training and eval phases.
20 | 14. For evaluation execute the following command from object_detection folder - "python eval.py --logtostderr --pipeline_config_path=models/fash-model/faster_rcnn_resnet101_fash.config --checkpoint_dir=models/fash-model/train --eval_dir=models/fash-model/eval".
21 | This command will periodically fetch the latest checkoint from models/fash-model/train and perform evalutions. Take images tab in tensorboard ui to see evaluation results.
22 |
23 | Since the dataset is very small we can see some noise in the evalution results. Even then the correct catgories were detected in each image with the higest confidence.
24 |
--------------------------------------------------------------------------------
/obj_detection/annotations/test.txt:
--------------------------------------------------------------------------------
1 | suit-1 3
2 | suit-2 3
3 | suit-3 3
4 | suit-4 3
5 | suit-5 3
6 | skirt-1 2
7 | skirt-2 2
8 | skirt-3 2
9 | skirt-4 2
10 | skirt-5 2
11 | shirt-1 1
12 | shirt-2 1
13 | shirt-3 1
14 | shirt-4 1
15 | shirt-5 1
16 |
17 |
--------------------------------------------------------------------------------
/obj_detection/annotations/trainval.txt:
--------------------------------------------------------------------------------
1 | suit-1
2 | suit-3
3 | suit-4
4 | suit-5
5 | skirt-2
6 | skirt-4
7 | skirt-5
8 | shirt-1
9 | shirt-3
10 | shirt-4
11 | shirt-5
12 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-1.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | shirt-1.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 320
11 | 400
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-3.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | shirt-3.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 220
11 | 258
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-4.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | shirt-4.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 320
11 | 400
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/shirt-5.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | shirt-5.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 234
11 | 312
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-2.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | skirt-2.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 385
11 | 500
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-4.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | skirt-4.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 750
11 | 1154
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/skirt-5.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | skirt-5.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 328
11 | 350
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-1.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | suit-1.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 290
11 | 370
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-3.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | suit-3.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 322
11 | 545
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-4.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | suit-4.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 361
11 | 452
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/annotations/xmls/suit-5.xml:
--------------------------------------------------------------------------------
1 |
2 | OXIIIT
3 | suit-5.jpg
4 |
5 | OXFORD-IIIT Pet Dataset
6 | OXIIIT
7 | flickr
8 |
9 |
10 | 361
11 | 452
12 | 3
13 |
14 | 0
15 |
28 |
29 |
--------------------------------------------------------------------------------
/obj_detection/create_fashion_tf_record.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | r"""Convert the Oxford pet dataset to TFRecord for object_detection.
17 |
18 | See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
19 | Cats and Dogs
20 | IEEE Conference on Computer Vision and Pattern Recognition, 2012
21 | http://www.robots.ox.ac.uk/~vgg/data/pets/
22 |
23 | Example usage:
24 | ./create_pet_tf_record --data_dir=/home/user/pet \
25 | --output_dir=/home/user/pet/output
26 | """
27 |
28 | import hashlib
29 | import io
30 | import logging
31 | import os
32 | import random
33 | import re
34 |
35 | from lxml import etree
36 | import PIL.Image
37 | import tensorflow as tf
38 |
39 | from object_detection.utils import dataset_util
40 | from object_detection.utils import label_map_util
41 |
42 | flags = tf.app.flags
43 | flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
44 | flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
45 | flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
46 | 'Path to label map proto')
47 | FLAGS = flags.FLAGS
48 |
49 |
50 | def get_class_name_from_filename(file_name):
51 | """Gets the class name from a file.
52 |
53 | Args:
54 | file_name: The file name to get the class name from.
55 | ie. "american_pit_bull_terrier_105.jpg"
56 |
57 | Returns:
58 | example: The converted tf.Example.
59 | """
60 | match = re.match(r'([A-Za-z_]+)(-[0-9]+\.jpg)', file_name, re.I)
61 | return match.groups()[0]
62 |
63 |
64 | def dict_to_tf_example(data,
65 | label_map_dict,
66 | image_subdirectory,
67 | ignore_difficult_instances=False):
68 | """Convert XML derived dict to tf.Example proto.
69 |
70 | Notice that this function normalizes the bounding box coordinates provided
71 | by the raw data.
72 |
73 | Args:
74 | data: dict holding PASCAL XML fields for a single image (obtained by
75 | running dataset_util.recursive_parse_xml_to_dict)
76 | label_map_dict: A map from string label names to integers ids.
77 | image_subdirectory: String specifying subdirectory within the
78 | Pascal dataset directory holding the actual image data.
79 | ignore_difficult_instances: Whether to skip difficult instances in the
80 | dataset (default: False).
81 |
82 | Returns:
83 | example: The converted tf.Example.
84 |
85 | Raises:
86 | ValueError: if the image pointed to by data['filename'] is not a valid JPEG
87 | """
88 | img_path = os.path.join(image_subdirectory, data['filename'])
89 | with tf.gfile.GFile(img_path, 'rb') as fid:
90 | encoded_jpg = fid.read()
91 | encoded_jpg_io = io.BytesIO(encoded_jpg)
92 | image = PIL.Image.open(encoded_jpg_io)
93 | if image.format != 'JPEG':
94 | raise ValueError('Image format not JPEG')
95 | key = hashlib.sha256(encoded_jpg).hexdigest()
96 |
97 | width = int(data['size']['width'])
98 | height = int(data['size']['height'])
99 |
100 | xmin = []
101 | ymin = []
102 | xmax = []
103 | ymax = []
104 | classes = []
105 | classes_text = []
106 | truncated = []
107 | poses = []
108 | difficult_obj = []
109 | for obj in data['object']:
110 | difficult = bool(int(obj['difficult']))
111 | if ignore_difficult_instances and difficult:
112 | continue
113 |
114 | difficult_obj.append(int(difficult))
115 |
116 | xmin.append(float(obj['bndbox']['xmin']) / width)
117 | ymin.append(float(obj['bndbox']['ymin']) / height)
118 | xmax.append(float(obj['bndbox']['xmax']) / width)
119 | ymax.append(float(obj['bndbox']['ymax']) / height)
120 | class_name = get_class_name_from_filename(data['filename'])
121 | classes_text.append(class_name.encode('utf8'))
122 | classes.append(label_map_dict[class_name])
123 | truncated.append(int(obj['truncated']))
124 | poses.append(obj['pose'].encode('utf8'))
125 |
126 | example = tf.train.Example(features=tf.train.Features(feature={
127 | 'image/height': dataset_util.int64_feature(height),
128 | 'image/width': dataset_util.int64_feature(width),
129 | 'image/filename': dataset_util.bytes_feature(
130 | data['filename'].encode('utf8')),
131 | 'image/source_id': dataset_util.bytes_feature(
132 | data['filename'].encode('utf8')),
133 | 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
134 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
135 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
136 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
137 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
138 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
139 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
140 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
141 | 'image/object/class/label': dataset_util.int64_list_feature(classes),
142 | 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
143 | 'image/object/truncated': dataset_util.int64_list_feature(truncated),
144 | 'image/object/view': dataset_util.bytes_list_feature(poses),
145 | }))
146 | return example
147 |
148 |
149 | def create_tf_record(output_filename,
150 | label_map_dict,
151 | annotations_dir,
152 | image_dir,
153 | examples):
154 | """Creates a TFRecord file from examples.
155 |
156 | Args:
157 | output_filename: Path to where output file is saved.
158 | label_map_dict: The label map dictionary.
159 | annotations_dir: Directory where annotation files are stored.
160 | image_dir: Directory where image files are stored.
161 | examples: Examples to parse and save to tf record.
162 | """
163 | writer = tf.python_io.TFRecordWriter(output_filename)
164 | for idx, example in enumerate(examples):
165 | if idx % 100 == 0:
166 | logging.info('On image %d of %d', idx, len(examples))
167 | path = os.path.join(annotations_dir, 'xmls', example + '.xml')
168 |
169 | if not os.path.exists(path):
170 | logging.warning('Could not find %s, ignoring example.', path)
171 | continue
172 | with tf.gfile.GFile(path, 'r') as fid:
173 | xml_str = fid.read()
174 | xml = etree.fromstring(xml_str)
175 | data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
176 |
177 | tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
178 | writer.write(tf_example.SerializeToString())
179 |
180 | writer.close()
181 |
182 |
183 | # TODO: Add test for pet/PASCAL main files.
184 | def main(_):
185 | data_dir = FLAGS.data_dir
186 | label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
187 |
188 | logging.info('Reading from Fashion dataset.')
189 | image_dir = os.path.join(data_dir, 'images')
190 | annotations_dir = os.path.join(data_dir, 'annotations')
191 | examples_path = os.path.join(annotations_dir, 'trainval.txt')
192 | examples_list = dataset_util.read_examples_list(examples_path)
193 |
194 | # Test images are not included in the downloaded data set, so we shall perform
195 | # our own split.
196 | random.seed(42)
197 | random.shuffle(examples_list)
198 | num_examples = len(examples_list)
199 | num_train = int(0.7 * num_examples)
200 | train_examples = examples_list[:num_train]
201 | val_examples = examples_list[num_train:]
202 | logging.info('%d training and %d validation examples.',
203 | len(train_examples), len(val_examples))
204 |
205 | train_output_path = os.path.join(FLAGS.output_dir, 'fash_train.record')
206 | val_output_path = os.path.join(FLAGS.output_dir, 'fash_val.record')
207 | create_tf_record(train_output_path, label_map_dict, annotations_dir,
208 | image_dir, train_examples)
209 | create_tf_record(val_output_path, label_map_dict, annotations_dir,
210 | image_dir, val_examples)
211 |
212 | if __name__ == '__main__':
213 | tf.app.run()
214 |
--------------------------------------------------------------------------------
/obj_detection/evaluation-results/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/1.png
--------------------------------------------------------------------------------
/obj_detection/evaluation-results/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/2.png
--------------------------------------------------------------------------------
/obj_detection/evaluation-results/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/evaluation-results/3.png
--------------------------------------------------------------------------------
/obj_detection/fashion_label_map.pbtxt:
--------------------------------------------------------------------------------
1 | item {
2 | id: 0
3 | name: 'none_of_the_above'
4 | }
5 |
6 | item {
7 | id: 1
8 | name: 'shirt'
9 | }
10 |
11 | item {
12 | id: 2
13 | name: 'skirt'
14 | }
15 |
16 | item {
17 | id: 3
18 | name: 'suit'
19 | }
20 |
--------------------------------------------------------------------------------
/obj_detection/faster_rcnn_resnet101_fash.config:
--------------------------------------------------------------------------------
1 | # Faster R-CNN with Resnet-101 (v1) configured for the Oxford-IIIT Pet Dataset.
2 | # Users should configure the fine_tune_checkpoint field in the train config as
3 | # well as the label_map_path and input_path fields in the train_input_reader and
4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
5 | # should be configured.
6 |
7 | model {
8 | faster_rcnn {
9 | num_classes: 37
10 | image_resizer {
11 | keep_aspect_ratio_resizer {
12 | min_dimension: 600
13 | max_dimension: 1024
14 | }
15 | }
16 | feature_extractor {
17 | type: 'faster_rcnn_resnet101'
18 | first_stage_features_stride: 16
19 | }
20 | first_stage_anchor_generator {
21 | grid_anchor_generator {
22 | scales: [0.25, 0.5, 1.0, 2.0]
23 | aspect_ratios: [0.5, 1.0, 2.0]
24 | height_stride: 16
25 | width_stride: 16
26 | }
27 | }
28 | first_stage_box_predictor_conv_hyperparams {
29 | op: CONV
30 | regularizer {
31 | l2_regularizer {
32 | weight: 0.0
33 | }
34 | }
35 | initializer {
36 | truncated_normal_initializer {
37 | stddev: 0.01
38 | }
39 | }
40 | }
41 | first_stage_nms_score_threshold: 0.0
42 | first_stage_nms_iou_threshold: 0.7
43 | first_stage_max_proposals: 300
44 | first_stage_localization_loss_weight: 2.0
45 | first_stage_objectness_loss_weight: 1.0
46 | initial_crop_size: 14
47 | maxpool_kernel_size: 2
48 | maxpool_stride: 2
49 | second_stage_box_predictor {
50 | mask_rcnn_box_predictor {
51 | use_dropout: false
52 | dropout_keep_probability: 1.0
53 | fc_hyperparams {
54 | op: FC
55 | regularizer {
56 | l2_regularizer {
57 | weight: 0.0
58 | }
59 | }
60 | initializer {
61 | variance_scaling_initializer {
62 | factor: 1.0
63 | uniform: true
64 | mode: FAN_AVG
65 | }
66 | }
67 | }
68 | }
69 | }
70 | second_stage_post_processing {
71 | batch_non_max_suppression {
72 | score_threshold: 0.0
73 | iou_threshold: 0.6
74 | max_detections_per_class: 100
75 | max_total_detections: 300
76 | }
77 | score_converter: SOFTMAX
78 | }
79 | second_stage_localization_loss_weight: 2.0
80 | second_stage_classification_loss_weight: 1.0
81 | }
82 | }
83 |
84 | train_config: {
85 | batch_size: 1
86 | optimizer {
87 | momentum_optimizer: {
88 | learning_rate: {
89 | manual_step_learning_rate {
90 | initial_learning_rate: 0.0003
91 | schedule {
92 | step: 0
93 | learning_rate: .0003
94 | }
95 | schedule {
96 | step: 900000
97 | learning_rate: .00003
98 | }
99 | schedule {
100 | step: 1200000
101 | learning_rate: .000003
102 | }
103 | }
104 | }
105 | momentum_optimizer_value: 0.9
106 | }
107 | use_moving_average: false
108 | }
109 | gradient_clipping_by_norm: 10.0
110 | fine_tune_checkpoint: "models/fash-model/model.ckpt"
111 | from_detection_checkpoint: true
112 | data_augmentation_options {
113 | random_horizontal_flip {
114 | }
115 | }
116 | }
117 |
118 | train_input_reader: {
119 | tf_record_input_reader {
120 | input_path: "data/fash_train.record"
121 | }
122 | label_map_path: "data/fashion_label_map.pbtxt"
123 | }
124 |
125 | eval_config: {
126 | num_examples: 2000
127 | }
128 |
129 | eval_input_reader: {
130 | tf_record_input_reader {
131 | input_path: "data/fash_val.record"
132 | }
133 | label_map_path: "data/fashion_label_map.pbtxt"
134 | shuffle: false
135 | num_readers: 1
136 | }
137 |
--------------------------------------------------------------------------------
/obj_detection/images/shirt-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-1.jpg
--------------------------------------------------------------------------------
/obj_detection/images/shirt-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-3.jpg
--------------------------------------------------------------------------------
/obj_detection/images/shirt-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-4.jpg
--------------------------------------------------------------------------------
/obj_detection/images/shirt-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/shirt-5.jpg
--------------------------------------------------------------------------------
/obj_detection/images/skirt-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-2.jpg
--------------------------------------------------------------------------------
/obj_detection/images/skirt-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-4.jpg
--------------------------------------------------------------------------------
/obj_detection/images/skirt-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/skirt-5.jpg
--------------------------------------------------------------------------------
/obj_detection/images/suit-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-1.jpg
--------------------------------------------------------------------------------
/obj_detection/images/suit-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-3.jpg
--------------------------------------------------------------------------------
/obj_detection/images/suit-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-4.jpg
--------------------------------------------------------------------------------
/obj_detection/images/suit-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/therealnidhin/CRF-image-segmentation/dad1ced824f84ee7ff25e248d9ae41afce44cb6f/obj_detection/images/suit-5.jpg
--------------------------------------------------------------------------------