├── README.md
├── agent.py
├── config.py
├── data_util.py
├── main.py
├── net.py
└── vis_util.py
/README.md:
--------------------------------------------------------------------------------
1 | ⚠ **Disclaimer:** I am currently employed at CafeBazaar and I actively participate in the
2 | interview/recruitment processes; nothing good's going to happen if we find out
3 | you have plagiarized this work for similar tasks. So beware!
4 |
5 | # UnsupervisedObjectLocalization
6 | Your classification neural network for object detection and localization
7 |
8 |
9 | ## Method
10 |
11 | Guided Back-propagation has proven to be a fast and interpretable util for visualizing the parts of the image to which a specific neuron fires the most — not exactly though, just giving an insight here. This is done by gating the gradients through relu units when back-propagating from a single neuron all the way back to the input image. The idea is to make the most out of these gradients for the task of object localization when they correspond to the object. For every single image, the top neurons with the biggest positive impact on the class score, are the candidates for guided back-propagation. This top selection of neurons is done via DAM heuristic. After that, for every gradient, a saliency mask is generated by keeping pixel values that fall into a certain percentile of the gradient. These masks are separately applied on the input image and once more passed into the network, to recalculate the class scores. Neurons with the lowest classification loss, are supposed to be the ones to which the object in the image corresponds the most. The masks are finally unioned and the bounding box is simply the smallest box that encloses this area.
12 |
13 | ## This repo also contains:
14 | * T-SNE Representation of the Dataset
15 | * A Graceful Guided-Backpropagation Switch
16 | * I use VGG16 for the classification network
17 |
18 | ## Dataset
19 | I implemented this for [Divar](https://divar.ir/) image dataset, which is a C2C e-commerce platform in Iran. You can find some of these images in the below t-SNE represetation.
20 |
21 |
22 |
--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from vis_util import get_full_plot
3 | from scipy.ndimage import binary_erosion, binary_dilation
4 | from data_util import T_SNE, sample_data
5 | from net import Net
6 |
7 |
8 | def do_batch_plot(guided_backprops, indices, images, bounding_boxes, files_dict):
9 | folders_and_names = []
10 | for key, values in files_dict.items():
11 | for val in values:
12 | folders_and_names.append([key, val])
13 | for gbps, index, image, bb, fn in zip(guided_backprops, indices, images, bounding_boxes, folders_and_names):
14 | top_grads = [gbps[i] for i in index]
15 | get_full_plot(top_grads, image, bb, *fn)
16 |
17 |
18 | class Agent(object):
19 | """
20 | recipes for doing the dirty work
21 | """
22 |
23 | def __init__(self, config):
24 | self.config = config
25 | self.net = Net(config)
26 |
27 | def get_bounding_box(self, image=None, files_dict=None):
28 | if image is None and files_dict is None:
29 | image, files_dict = sample_data(self.config.batch_size)
30 | preprocessed_image = self._preprocess(image.copy())
31 | kmax_neuron_indices, top_classes_indices = self.net.get_top_kmax_neurons(preprocessed_image)
32 | guided_backprops = self._get_guided_backprops(preprocessed_image, kmax_neuron_indices)
33 | masks = self._get_images_masks(guided_backprops)
34 | top_k_neurons_relative_indices = self._get_top_k_neurons(preprocessed_image, masks, top_classes_indices)
35 | bounding_boxes = self._get_all_bounding_boxes(masks, top_k_neurons_relative_indices)
36 | if self.config.do_plotting:
37 | do_batch_plot(guided_backprops, top_k_neurons_relative_indices,
38 | image, bounding_boxes, files_dict)
39 | return bounding_boxes
40 |
41 | def make_tsne_pic_for_directory(self, folder='personal'):
42 | fc_features = None
43 | images = None
44 | images_number = self.config.grid_size ** 2
45 | batch_count = images_number // self.config.batch_size + 1
46 | for i in range(batch_count):
47 | image_batch, _ = sample_data(self.config.batch_size, [folder])
48 | images = image_batch if images is None else np.concatenate([images, image_batch])
49 | fc_features_batch = self.net.get_fc_features(image_batch)
50 | fc_features = fc_features_batch if fc_features is None else np.concatenate([fc_features, fc_features_batch])
51 |
52 | tsne = T_SNE()
53 | tsne_embedding = tsne.generate_tsne(fc_features)
54 | tsne.save_grid(images, tsne_embedding, folder + '.jpg')
55 |
56 | def _preprocess(self, image):
57 | return self.net.vgg16.preprocess_input(image)
58 |
59 | def _get_guided_backprops(self, images, neuron_indices):
60 | # lazy programming, this part should as well be vectorized
61 | guided_backprops = []
62 | for image, neuron_index in zip(images, neuron_indices):
63 | gbp = [self.net.get_guided_backprop(np.expand_dims(image, axis=0), ni)
64 | for ni in neuron_index]
65 | guided_backprops.append(gbp)
66 | return guided_backprops
67 |
68 | def _get_images_masks(self, guided_backprops):
69 | masks = []
70 | for gbps in guided_backprops:
71 | projected_gbps = [np.max(gb, axis=-1).squeeze() for gb in gbps]
72 | raw_masks = [pgbp > np.percentile(pgbp, self.config.cut_off_percentile) for pgbp in projected_gbps]
73 | # erosion and dilation
74 | masks_per_image = [binary_dilation(binary_erosion(raw_mask)).astype(projected_gbps[0].dtype) for raw_mask in
75 | raw_masks]
76 | masks.append(masks_per_image)
77 | return masks
78 |
79 | def _get_top_k_neurons(self, images, masks, top_class):
80 | top_k_neurons_relative_indices = []
81 | for i, image in enumerate(images):
82 | reshaped_image = image.reshape(np.roll(self.config.vgg16.input_size, 1))
83 | masked_images = np.stack(
84 | [np.reshape(reshaped_image * mask, self.config.vgg16.input_size) for mask in masks[i]])
85 | losses = self.net.get_batch_loss(masked_images, top_class[i])
86 | top_k_neurons_relative_indices.append(list(np.argsort(losses)[:self.config.k]))
87 | return top_k_neurons_relative_indices
88 |
89 | def _get_all_bounding_boxes(self, all_masks, all_mask_indices):
90 | bounding_boxes = []
91 | for mask, mask_indices in zip(all_masks, all_mask_indices):
92 | bounding_boxes.append(self._get_bounding_box(mask, mask_indices))
93 | return bounding_boxes
94 |
95 | def _get_bounding_box(self, masks, mask_indices):
96 | # sorry, super lazy
97 | final_masks = np.array(masks)[mask_indices]
98 | the_mask = final_masks[0] * False
99 | for mask in final_masks:
100 | the_mask = np.logical_or(the_mask, mask)
101 | y_min = self.config.vgg16.input_size[0]
102 | x_min = self.config.vgg16.input_size[1]
103 | y_max = x_max = 0
104 | for i in range(self.config.vgg16.input_size[0]):
105 | for j in range(self.config.vgg16.input_size[1]):
106 | if the_mask[i, j]:
107 | y_min = min(y_min, i)
108 | x_min = min(x_min, j)
109 | y_max = max(y_max, i)
110 | x_max = max(x_max, j)
111 | return [[x_min, y_min], [x_max, y_max]]
112 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | class VGG16:
5 | input_size = (224, 224, 3)
6 | feature_layer = 'block5_conv2'
7 |
8 |
9 | class Config(object):
10 | vgg16 = VGG16
11 |
12 | # data
13 | resize = True
14 | default_size = (600, 600, 3)
15 | dataset_path = os.path.abspath(os.path.join(os.path.realpath('.'), os.pardir, 'dataset'))
16 | dataset_allowed_folders = ['personal']
17 | batch_size = 32 # sorry, not enough local vram
18 |
19 | # hyper-parameters
20 | kmax = 10
21 | k = 5
22 | top_n_classes_weights = [0.05, 0.15, 0.8]
23 | cut_off_percentile = 20
24 |
25 | # TSNE
26 | tsne_perplexity = 30
27 | tsne_iter = 5000
28 | tsne_output_dir = os.path.abspath(os.path.join(os.path.realpath('.'), 'tsne'))
29 | grid_size = 30
30 |
31 | # plot
32 | do_plotting = True
33 | gradients_plot_path = os.path.abspath(os.path.join(os.path.realpath('.'), 'results'))
34 | max_per_row_image_plot = 5
35 |
--------------------------------------------------------------------------------
/data_util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | from lapjv import lapjv
4 | import numpy as np
5 | from tensorflow.python.keras.preprocessing import image
6 | from sklearn.manifold import TSNE
7 | from scipy.spatial.distance import cdist
8 | from config import Config
9 |
10 |
11 | def sample_data(batch_size, folders=None):
12 | size_per_cat = batch_size // len(Config.dataset_allowed_folders)
13 | size = size_per_cat * len(Config.dataset_allowed_folders)
14 | input_size = Config.vgg16.input_size if Config.resize else Config.default_size
15 | image_batch = np.empty([size] + list(input_size))
16 | allowed_folders = Config.dataset_allowed_folders if folders is None else folders
17 | files_dict = {}
18 | index = 0
19 | for cat in allowed_folders:
20 | cat_path = os.path.join(Config.dataset_path, cat)
21 | cat_image_file_names = random.sample(os.listdir(cat_path), size_per_cat)
22 | files_dict[cat] = cat_image_file_names
23 | for i, img in enumerate(cat_image_file_names):
24 | img_path = os.path.join(cat_path, img)
25 | loaded_image = image.load_img(img_path, target_size=input_size)
26 | image_batch[index + i] = image.img_to_array(loaded_image)
27 | index += size_per_cat
28 | return image_batch, files_dict
29 |
30 |
31 | class T_SNE(object):
32 | def __init__(self):
33 | self.perplexity = Config.tsne_perplexity
34 | self.iters = Config.tsne_iter
35 | self.output_dir = Config.tsne_output_dir
36 | self.grid_size = Config.grid_size
37 | self.tile_res = Config.vgg16.input_size[0]
38 | if not os.path.isdir(self.output_dir):
39 | os.mkdir(self.output_dir)
40 |
41 | def generate_tsne(self, embeddings):
42 | tsne = TSNE(perplexity=self.perplexity, n_components=2, init='random', n_iter=self.iters)
43 | tsne_embedding = tsne.fit_transform(embeddings.squeeze()[:self.grid_size ** 2, :])
44 | tsne_embedding -= tsne_embedding.min(axis=0)
45 | tsne_embedding /= tsne_embedding.max(axis=0)
46 | return tsne_embedding
47 |
48 | def save_grid(self, images, tsne_embedding, out_name):
49 | grid = np.dstack(np.meshgrid(np.linspace(0, 1, self.grid_size), np.linspace(0, 1, self.grid_size))).reshape(-1, 2)
50 | cost_matrix = cdist(grid, tsne_embedding, "sqeuclidean").astype(np.float32)
51 | cost_matrix = cost_matrix * (100000 / cost_matrix.max())
52 | row_asses, col_asses, _ = lapjv(cost_matrix)
53 | grid_jv = grid[col_asses]
54 | out = np.ones((self.grid_size * self.tile_res, self.grid_size * self.tile_res, 3))
55 |
56 | for pos, img in zip(grid_jv, images[0:self.grid_size ** 2]):
57 | h_range = int(np.floor(pos[0] * (self.grid_size - 1) * self.tile_res))
58 | w_range = int(np.floor(pos[1] * (self.grid_size - 1) * self.tile_res))
59 | out[h_range:h_range + self.tile_res, w_range:w_range + self.tile_res] = image.img_to_array(img)
60 |
61 | im = image.array_to_img(out)
62 | im.save(os.path.join(self.output_dir, out_name), quality=100)
63 |
64 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from config import Config
2 | from agent import Agent
3 | from data_util import sample_data
4 |
5 |
6 | def main():
7 | agent = Agent(Config)
8 | # agent.make_tsne_pic_for_directory()
9 | for i in range(3):
10 | agent.get_bounding_box()
11 |
12 |
13 | if __name__ == '__main__':
14 | main()
15 |
--------------------------------------------------------------------------------
/net.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from tensorflow.python.framework import ops
4 | from tensorflow.python.ops import gen_nn_ops
5 |
6 |
7 | class Net(object):
8 | def __init__(self, config):
9 |
10 | self.config = config
11 | self.sess = tf.Session()
12 | self.sess.__enter__()
13 | self.graph = tf.get_default_graph()
14 |
15 | # for the good of efficient guided-backprop
16 | self.not_guided_flag = tf.placeholder(dtype=tf.float32, shape=[]) # 1 for normal, 0 for guided
17 | self._add_guided_backprop()
18 |
19 | self._build_net()
20 |
21 | def _add_guided_backprop(self):
22 | @ops.RegisterGradient("GuidedRelu")
23 | def _GuidedReluGrad(op, grad):
24 | return tf.where(0. < grad, gen_nn_ops.relu_grad(grad, op.outputs[0]),
25 | gen_nn_ops.relu_grad(grad, op.outputs[0]) * self.not_guided_flag)
26 |
27 | def _build_net(self):
28 | with self.graph.gradient_override_map({'Relu': 'GuidedRelu'}):
29 | self._vgg16()
30 | self._fetch_shapes()
31 | self._build_impact_gradient()
32 | self._build_top_kmax_neuron_selection()
33 | self._build_guided_backprop()
34 | self._build_softmax_loss()
35 |
36 | def _vgg16(self):
37 | self.vgg16 = tf.keras.applications.vgg16
38 | self.model = self.vgg16.VGG16()
39 | self.feature_tensor = self.model.get_layer(self.config.vgg16.feature_layer).input
40 | self.fc_features = self.model.get_layer('fc2').output
41 |
42 | def _fetch_shapes(self):
43 | self.features_shape = self.feature_tensor.get_shape().as_list()
44 | self.classes_shape = self.model.output.get_shape().as_list()
45 | self.features_shape[0] = self.classes_shape[0] = 1
46 |
47 | def _build_impact_gradient(self):
48 | self.category_indices = tf.argmax(self.model.output, axis=-1, name='max_scoring_categories')
49 | fake_upstream_grad = tf.one_hot(self.category_indices, self.classes_shape[-1], axis=-1)
50 | self.impact_grad = tf.gradients(self.model.output, self.feature_tensor,
51 | grad_ys=[fake_upstream_grad], name='impact_gradients')[0]
52 |
53 | def _build_top_kmax_neuron_selection(self):
54 | # use DAM heuristic for selection
55 | neurons_effect = self.impact_grad * self.feature_tensor
56 | neurons_effect_flat_batch = tf.reshape(neurons_effect, (-1, np.prod(self.features_shape)))
57 | self.batch_top_kmax_neuron_indices = tf.nn.top_k(neurons_effect_flat_batch, k=self.config.kmax)[1]
58 |
59 | def _build_guided_backprop(self):
60 | self.neuron_index = tf.placeholder(tf.int32, shape=[])
61 | fake_upstream_grad = tf.one_hot(self.neuron_index, np.prod(self.features_shape), axis=-1)
62 | fake_upstream_grad = tf.reshape(fake_upstream_grad, shape=self.features_shape)
63 | self.guided_backprop = tf.gradients(self.feature_tensor, self.model.input,
64 | grad_ys=[fake_upstream_grad], name='guided_backprop')
65 |
66 | def _build_softmax_loss(self):
67 | self.top_class_index_ph = tf.placeholder(tf.int32, shape=[])
68 | self.top_class_batch_one_hot = tf.one_hot(tf.ones([self.config.kmax, ], dtype=tf.int32) * self.top_class_index_ph,
69 | self.classes_shape[-1], axis=-1)
70 | self.softmax_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.top_class_batch_one_hot,
71 | logits=self.model.output)
72 |
73 | def get_top_kmax_neurons(self, images):
74 | top_kmax_neurons_indices, max_scoring_indices = self.sess.run([self.batch_top_kmax_neuron_indices,
75 | self.category_indices],
76 | feed_dict={self.model.input: images,
77 | self.not_guided_flag: 1.0})
78 | return top_kmax_neurons_indices, max_scoring_indices
79 |
80 | def get_guided_backprop(self, image, neuron_index):
81 | numerical_guided_backprop = self.sess.run(self.guided_backprop,
82 | feed_dict={
83 | self.model.input: image,
84 | self.neuron_index: neuron_index,
85 | self.not_guided_flag: 0.0,
86 | })
87 | return numerical_guided_backprop[0]
88 |
89 | def get_batch_loss(self, images, top_class):
90 | assert images.shape[0] is self.config.kmax
91 | batch_loss = self.sess.run(self.softmax_loss,
92 | feed_dict={
93 | self.top_class_index_ph: top_class,
94 | self.model.input: images,
95 | })
96 | return batch_loss
97 |
98 | def get_fc_features(self, images):
99 | fc_features = self.sess.run(self.fc_features,
100 | feed_dict={self.model.input: images})
101 | return fc_features
102 |
103 |
--------------------------------------------------------------------------------
/vis_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import matplotlib.patches as patches
4 | import os
5 | from config import Config
6 |
7 |
8 | def get_positive_negative_saliency(gradient):
9 | pos_saliency = (np.maximum(0, gradient) / gradient.max())
10 | neg_saliency = (np.maximum(0, -gradient) / -gradient.min())
11 | return pos_saliency, neg_saliency
12 |
13 |
14 | def get_full_plot(gradients, image, bounding_box, folder, name, show=False, save=True):
15 | fig, ax = plt.subplots(1, len(gradients) + 1)
16 | # plot gradients
17 | for i, grad in enumerate(gradients):
18 | _, neg_saliency = get_positive_negative_saliency(grad)
19 | neg_saliency = neg_saliency.squeeze() * 255
20 | ax[i].imshow(neg_saliency.astype('uint8'))
21 | ax[i].axis('off')
22 | # plot the image with bounding box
23 | ax[len(gradients)].imshow(image.squeeze().astype('uint8'))
24 | width = bounding_box[1][0] - bounding_box[0][0]
25 | height = bounding_box[1][1] - bounding_box[0][1]
26 | box = patches.Rectangle(bounding_box[0], width, height, linewidth=1, edgecolor='r', facecolor='none')
27 | ax[len(gradients)].add_patch(box)
28 | ax[len(gradients)].axis('off')
29 | if save:
30 | if not os.path.isdir(Config.gradients_plot_path):
31 | os.mkdir(Config.gradients_plot_path)
32 | if not os.path.isdir(os.path.join(Config.gradients_plot_path, folder)):
33 | os.mkdir(os.path.join(Config.gradients_plot_path, folder))
34 | plt.savefig(os.path.join(Config.gradients_plot_path, folder, name), dpi=200, bbox_inches='tight')
35 | if show:
36 | plt.show()
37 |
--------------------------------------------------------------------------------