├── .gitignore
├── README.md
├── results
    ├── demo.jpg
    ├── demo.main.jpg
    ├── demo.main2.jpg
    ├── demo2.jpg
    ├── demo3.jpg
    ├── demo4.jpg
    ├── demo5.jpg
    ├── demo6.jpg
    ├── demo7.jpg
    ├── demo8.jpg
    ├── log.caltech256.txt
    ├── log.imagenet.txt
    └── log.txt
└── src
    ├── .ipynb_checkpoints
        └── Untitled-checkpoint.ipynb
    ├── Untitled.ipynb
    ├── detector.py
    ├── detector.pyc
    ├── test.caltech.py
    ├── train.caltech.py
    ├── train.imagenet.py
    ├── util.py
    └── util.pyc


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | models/
3 | 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Weakly_detector
 2 | Tensorflow implementation of "Learning Deep Features for Discriminative Localization"
 3 | 
 4 | B. Zhou, A. Khosla, A. Lapedriza, A. Oliva, and A. Torralba
 5 | Learning Deep Features for Discriminative Localization.
 6 | Computer Vision and Pattern Recognition (CVPR), 2016.
 7 | [[PDF](http://arxiv.org/pdf/1512.04150.pdf)][[Project Page](http://cnnlocalization.csail.mit.edu/)]
 8 | 
 9 | ### Results of Caltech256 Dataset
10 | ![alt tag](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/results/demo.main.jpg)
11 | 
12 | ### Results of Action40 Dataset
13 | ![alt tag](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/results/demo.main2.jpg)
14 | Object localization using only image-level annotation, without bounding box annotation.
15 | 
16 | * If you want to train the model using custom dataset, you need the pretrained VGG Network weights [[VGG](https://drive.google.com/file/d/0B5o40yxdA9PqOVI5dF9tN3NUc2c/view?usp=sharing)], which is used in [[code](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/src/train.caltech.py#L10)].
17 | 
18 | 


--------------------------------------------------------------------------------
/results/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.jpg


--------------------------------------------------------------------------------
/results/demo.main.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.main.jpg


--------------------------------------------------------------------------------
/results/demo.main2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.main2.jpg


--------------------------------------------------------------------------------
/results/demo2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo2.jpg


--------------------------------------------------------------------------------
/results/demo3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo3.jpg


--------------------------------------------------------------------------------
/results/demo4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo4.jpg


--------------------------------------------------------------------------------
/results/demo5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo5.jpg


--------------------------------------------------------------------------------
/results/demo6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo6.jpg


--------------------------------------------------------------------------------
/results/demo7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo7.jpg


--------------------------------------------------------------------------------
/results/demo8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo8.jpg


--------------------------------------------------------------------------------
/results/log.caltech256.txt:
--------------------------------------------------------------------------------
1 | epoch:0	acc:0.702
2 | epoch:1	acc:0.751
3 | epoch:2	acc:0.759
4 | epoch:3	acc:0.778
5 | epoch:4	acc:0.767
6 | 


--------------------------------------------------------------------------------
/results/log.imagenet.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/log.imagenet.txt


--------------------------------------------------------------------------------
/results/log.txt:
--------------------------------------------------------------------------------
  1 | epoch:0	acc:0.01
  2 | epoch:1	acc:0.01
  3 | epoch:2	acc:0.005
  4 | epoch:3	acc:0.005
  5 | epoch:4	acc:0.005
  6 | epoch:5	acc:0.03
  7 | epoch:6	acc:0.03
  8 | epoch:7	acc:0.03
  9 | epoch:8	acc:0.005
 10 | epoch:9	acc:0.01
 11 | epoch:10	acc:0.01
 12 | epoch:11	acc:0.01
 13 | epoch:12	acc:0.01
 14 | epoch:13	acc:0.01
 15 | epoch:14	acc:0.01
 16 | epoch:15	acc:0.01
 17 | epoch:16	acc:0.01
 18 | epoch:17	acc:0.01
 19 | epoch:18	acc:0.01
 20 | epoch:19	acc:0.01
 21 | epoch:20	acc:0.01
 22 | epoch:21	acc:0.01
 23 | epoch:22	acc:0.01
 24 | epoch:23	acc:0.01
 25 | epoch:24	acc:0.01
 26 | epoch:25	acc:0.01
 27 | epoch:26	acc:0.01
 28 | epoch:27	acc:0.01
 29 | epoch:28	acc:0.01
 30 | epoch:29	acc:0.01
 31 | epoch:30	acc:0.01
 32 | epoch:31	acc:0.01
 33 | epoch:32	acc:0.01
 34 | epoch:33	acc:0.005
 35 | epoch:34	acc:0.01
 36 | epoch:35	acc:0.005
 37 | epoch:36	acc:0.01
 38 | epoch:37	acc:0.02
 39 | epoch:38	acc:0.015
 40 | epoch:39	acc:0.02
 41 | epoch:40	acc:0.015
 42 | epoch:41	acc:0.02
 43 | epoch:42	acc:0.025
 44 | epoch:43	acc:0.02
 45 | epoch:44	acc:0.02
 46 | epoch:45	acc:0.02
 47 | epoch:46	acc:0.01
 48 | epoch:47	acc:0.01
 49 | epoch:48	acc:0.01
 50 | epoch:49	acc:0.01
 51 | epoch:50	acc:0.01
 52 | epoch:51	acc:0.01
 53 | epoch:52	acc:0.01
 54 | epoch:53	acc:0.01
 55 | epoch:54	acc:0.01
 56 | epoch:55	acc:0.01
 57 | epoch:56	acc:0.01
 58 | epoch:57	acc:0.01
 59 | epoch:58	acc:0.01
 60 | epoch:59	acc:0.01
 61 | epoch:60	acc:0.01
 62 | epoch:61	acc:0.01
 63 | epoch:62	acc:0.01
 64 | epoch:63	acc:0.01
 65 | epoch:64	acc:0.01
 66 | epoch:65	acc:0.01
 67 | epoch:66	acc:0.01
 68 | epoch:67	acc:0.01
 69 | epoch:68	acc:0.01
 70 | epoch:69	acc:0.01
 71 | epoch:70	acc:0.01
 72 | epoch:71	acc:0.01
 73 | epoch:72	acc:0.01
 74 | epoch:73	acc:0.01
 75 | epoch:74	acc:0.01
 76 | epoch:75	acc:0.01
 77 | epoch:76	acc:0.01
 78 | epoch:77	acc:0.01
 79 | epoch:78	acc:0.01
 80 | epoch:79	acc:0.01
 81 | epoch:80	acc:0.01
 82 | epoch:81	acc:0.01
 83 | epoch:82	acc:0.01
 84 | epoch:83	acc:0.01
 85 | epoch:84	acc:0.01
 86 | epoch:85	acc:0.025
 87 | epoch:86	acc:0.03
 88 | epoch:87	acc:0.02
 89 | epoch:88	acc:0.02
 90 | epoch:89	acc:0.02
 91 | epoch:90	acc:0.01
 92 | epoch:91	acc:0.01
 93 | epoch:92	acc:0.01
 94 | epoch:93	acc:0.01
 95 | epoch:94	acc:0.01
 96 | epoch:95	acc:0.01
 97 | epoch:96	acc:0.01
 98 | epoch:97	acc:0.01
 99 | epoch:98	acc:0.01
100 | epoch:99	acc:0.01
101 | epoch:100	acc:0.01
102 | epoch:101	acc:0.01
103 | epoch:102	acc:0.01
104 | epoch:103	acc:0.01
105 | epoch:104	acc:0.01
106 | epoch:105	acc:0.01
107 | epoch:106	acc:0.01
108 | epoch:107	acc:0.01
109 | epoch:108	acc:0.01
110 | epoch:109	acc:0.01
111 | epoch:110	acc:0.01
112 | epoch:111	acc:0.01
113 | epoch:112	acc:0.01
114 | epoch:113	acc:0.01
115 | epoch:114	acc:0.01
116 | epoch:115	acc:0.01
117 | epoch:116	acc:0.01
118 | epoch:117	acc:0.01
119 | epoch:118	acc:0.01
120 | epoch:119	acc:0.01
121 | epoch:120	acc:0.01
122 | epoch:121	acc:0.01
123 | epoch:122	acc:0.01
124 | epoch:123	acc:0.01
125 | epoch:124	acc:0.01
126 | epoch:125	acc:0.01
127 | epoch:126	acc:0.01
128 | epoch:127	acc:0.01
129 | epoch:128	acc:0.01
130 | epoch:129	acc:0.01
131 | epoch:130	acc:0.01
132 | epoch:131	acc:0.01
133 | epoch:132	acc:0.01
134 | epoch:133	acc:0.01
135 | epoch:134	acc:0.01
136 | epoch:135	acc:0.01
137 | epoch:136	acc:0.01
138 | epoch:137	acc:0.01
139 | epoch:138	acc:0.01
140 | epoch:139	acc:0.01
141 | epoch:140	acc:0.01
142 | epoch:141	acc:0.01
143 | epoch:142	acc:0.01
144 | epoch:143	acc:0.01
145 | epoch:144	acc:0.01
146 | epoch:145	acc:0.01
147 | epoch:146	acc:0.01
148 | epoch:147	acc:0.01
149 | epoch:148	acc:0.01
150 | epoch:149	acc:0.01
151 | epoch:150	acc:0.01
152 | epoch:151	acc:0.01
153 | epoch:152	acc:0.01
154 | epoch:153	acc:0.01
155 | epoch:154	acc:0.01
156 | epoch:155	acc:0.01
157 | epoch:156	acc:0.01
158 | epoch:157	acc:0.01
159 | epoch:158	acc:0.01
160 | epoch:159	acc:0.01
161 | epoch:160	acc:0.01
162 | epoch:161	acc:0.01
163 | epoch:162	acc:0.01
164 | epoch:163	acc:0.01
165 | epoch:164	acc:0.01
166 | epoch:165	acc:0.01
167 | 


--------------------------------------------------------------------------------
/src/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/src/detector.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import cPickle
  4 | import ipdb
  5 | class Detector():
  6 |     def __init__(self, weight_file_path, n_labels):
  7 |         self.image_mean = [103.939, 116.779, 123.68]
  8 |         self.n_labels = n_labels
  9 | 
 10 |         with open(weight_file_path) as f:
 11 |             self.pretrained_weights = cPickle.load(f)
 12 | 
 13 |     def get_weight( self, layer_name):
 14 |         layer = self.pretrained_weights[layer_name]
 15 |         return layer[0]
 16 | 
 17 |     def get_bias( self, layer_name ):
 18 |         layer = self.pretrained_weights[layer_name]
 19 |         return layer[1]
 20 | 
 21 |     def get_conv_weight( self, name ):
 22 |         f = self.get_weight( name )
 23 |         return f.transpose(( 2,3,1,0 ))
 24 | 
 25 |     def conv_layer( self, bottom, name ):
 26 |         with tf.variable_scope(name) as scope:
 27 | 
 28 |             w = self.get_conv_weight(name)
 29 |             b = self.get_bias(name)
 30 | 
 31 |             conv_weights = tf.get_variable(
 32 |                     "W",
 33 |                     shape=w.shape,
 34 |                     initializer=tf.constant_initializer(w)
 35 |                     )
 36 |             conv_biases = tf.get_variable(
 37 |                     "b",
 38 |                     shape=b.shape,
 39 |                     initializer=tf.constant_initializer(b)
 40 |                     )
 41 | 
 42 |             conv = tf.nn.conv2d( bottom, conv_weights, [1,1,1,1], padding='SAME')
 43 |             bias = tf.nn.bias_add( conv, conv_biases )
 44 |             relu = tf.nn.relu( bias, name=name )
 45 | 
 46 |         return relu
 47 | 
 48 |     def new_conv_layer( self, bottom, filter_shape, name ):
 49 |         with tf.variable_scope( name ) as scope:
 50 |             w = tf.get_variable(
 51 |                     "W",
 52 |                     shape=filter_shape,
 53 |                     initializer=tf.random_normal_initializer(0., 0.01))
 54 |             b = tf.get_variable(
 55 |                     "b",
 56 |                     shape=filter_shape[-1],
 57 |                     initializer=tf.constant_initializer(0.))
 58 | 
 59 |             conv = tf.nn.conv2d( bottom, w, [1,1,1,1], padding='SAME')
 60 |             bias = tf.nn.bias_add(conv, b)
 61 | 
 62 |         return bias #relu
 63 | 
 64 |     def fc_layer(self, bottom, name, create=False):
 65 |         shape = bottom.get_shape().as_list()
 66 |         dim = np.prod( shape[1:] )
 67 |         x = tf.reshape(bottom, [-1, dim])
 68 | 
 69 |         cw = self.get_weight(name)
 70 |         b = self.get_bias(name)
 71 | 
 72 |         if name == "fc6":
 73 |             cw = cw.reshape((4096, 512, 7,7))
 74 |             cw = cw.transpose((2,3,1,0))
 75 |             cw = cw.reshape((25088,4096))
 76 |         else:
 77 |             cw = cw.transpose((1,0))
 78 | 
 79 |         with tf.variable_scope(name) as scope:
 80 |             cw = tf.get_variable(
 81 |                     "W",
 82 |                     shape=cw.shape,
 83 |                     initializer=tf.constant_initializer(cw))
 84 |             b = tf.get_variable(
 85 |                     "b",
 86 |                     shape=b.shape,
 87 |                     initializer=tf.constant_initializer(b))
 88 | 
 89 |             fc = tf.nn.bias_add( tf.matmul( x, cw ), b, name=scope)
 90 | 
 91 |         return fc
 92 | 
 93 |     def new_fc_layer( self, bottom, input_size, output_size, name ):
 94 |         shape = bottom.get_shape().to_list()
 95 |         dim = np.prod( shape[1:] )
 96 |         x = tf.reshape( bottom, [-1, dim])
 97 | 
 98 |         with tf.variable_scope(name) as scope:
 99 |             w = tf.get_variable(
100 |                     "W",
101 |                     shape=[input_size, output_size],
102 |                     initializer=tf.random_normal_initializer(0., 0.01))
103 |             b = tf.get_variable(
104 |                     "b",
105 |                     shape=[output_size],
106 |                     initializer=tf.constant_initializer(0.))
107 |             fc = tf.nn.bias_add( tf.matmul(x, w), b, name=scope)
108 | 
109 |         return fc
110 | 
111 |     def inference( self, rgb, train=False ):
112 |         rgb *= 255.
113 |         r, g, b = tf.split(3, 3, rgb)
114 |         bgr = tf.concat(3,
115 |             [
116 |                 b-self.image_mean[0],
117 |                 g-self.image_mean[1],
118 |                 r-self.image_mean[2]
119 |             ])
120 | 
121 |         relu1_1 = self.conv_layer( bgr, "conv1_1" )
122 |         relu1_2 = self.conv_layer( relu1_1, "conv1_2" )
123 | 
124 |         pool1 = tf.nn.max_pool(relu1_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
125 |                                          padding='SAME', name='pool1')
126 | 
127 |         relu2_1 = self.conv_layer(pool1, "conv2_1")
128 |         relu2_2 = self.conv_layer(relu2_1, "conv2_2")
129 |         pool2 = tf.nn.max_pool(relu2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
130 |                                padding='SAME', name='pool2')
131 | 
132 |         relu3_1 = self.conv_layer( pool2, "conv3_1")
133 |         relu3_2 = self.conv_layer( relu3_1, "conv3_2")
134 |         relu3_3 = self.conv_layer( relu3_2, "conv3_3")
135 |         pool3 = tf.nn.max_pool(relu3_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
136 |                                padding='SAME', name='pool3')
137 | 
138 |         relu4_1 = self.conv_layer( pool3, "conv4_1")
139 |         relu4_2 = self.conv_layer( relu4_1, "conv4_2")
140 |         relu4_3 = self.conv_layer( relu4_2, "conv4_3")
141 |         pool4 = tf.nn.max_pool(relu4_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
142 |                                padding='SAME', name='pool4')
143 | 
144 |         relu5_1 = self.conv_layer( pool4, "conv5_1")
145 |         relu5_2 = self.conv_layer( relu5_1, "conv5_2")
146 |         relu5_3 = self.conv_layer( relu5_2, "conv5_3")
147 | 
148 |         conv6 = self.new_conv_layer( relu5_3, [3,3,512,1024], "conv6")
149 |         gap = tf.reduce_mean( conv6, [1,2] )
150 | 
151 |         with tf.variable_scope("GAP"):
152 |             gap_w = tf.get_variable(
153 |                     "W",
154 |                     shape=[1024, self.n_labels],
155 |                     initializer=tf.random_normal_initializer(0., 0.01))
156 | 
157 |         output = tf.matmul( gap, gap_w)
158 | 
159 |         return pool1, pool2, pool3, pool4, relu5_3, conv6, gap, output
160 | 
161 |     def get_classmap(self, label, conv6):
162 |         conv6_resized = tf.image.resize_bilinear( conv6, [224, 224] )
163 |         with tf.variable_scope("GAP", reuse=True):
164 |             label_w = tf.gather(tf.transpose(tf.get_variable("W")), label)
165 |             label_w = tf.reshape( label_w, [-1, 1024, 1] ) # [batch_size, 1024, 1]
166 | 
167 |         conv6_resized = tf.reshape(conv6_resized, [-1, 224*224, 1024]) # [batch_size, 224*224, 1024]
168 | 
169 |         classmap = tf.batch_matmul( conv6_resized, label_w )
170 |         classmap = tf.reshape( classmap, [-1, 224,224] )
171 |         return classmap
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------
/src/detector.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/src/detector.pyc


--------------------------------------------------------------------------------
/src/test.caltech.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | from detector import Detector
 6 | from util import load_image
 7 | 
 8 | import skimage.io
 9 | import matplotlib.pyplot as plt
10 | 
11 | import os
12 | import ipdb
13 | 
14 | testset_path = '../data/caltech/test.pickle'
15 | label_dict_path = '../data/caltech/label_dict.pickle'
16 | 
17 | weight_path = '../data/caffe_layers_value.pickle'
18 | model_path = '../models/caltech256/model-4'
19 | 
20 | batch_size = 1
21 | 
22 | testset = pd.read_pickle( testset_path )[::-1][:20]
23 | label_dict = pd.read_pickle( label_dict_path )
24 | n_labels = len( label_dict )
25 | 
26 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images")
27 | labels_tf = tf.placeholder( tf.int64, [None], name='labels')
28 | 
29 | detector = Detector( weight_path, n_labels )
30 | c1,c2,c3,c4,conv5, conv6, gap, output = detector.inference( images_tf )
31 | classmap = detector.get_classmap( labels_tf, conv6 )
32 | 
33 | sess = tf.InteractiveSession()
34 | saver = tf.train.Saver()
35 | 
36 | saver.restore( sess, model_path )
37 | 
38 | for start, end in zip(
39 |     range( 0, len(testset)+batch_size, batch_size),
40 |     range(batch_size, len(testset)+batch_size, batch_size)):
41 | 
42 |     current_data = testset[start:end]
43 |     current_image_paths = current_data['image_path'].values
44 |     current_images = np.array(map(lambda x: load_image(x), current_image_paths))
45 | 
46 |     good_index = np.array(map(lambda x: x is not None, current_images))
47 | 
48 |     current_data = current_data[good_index]
49 |     current_image_paths = current_image_paths[good_index]
50 |     current_images = np.stack(current_images[good_index])
51 |     current_labels = current_data['label'].values
52 |     current_label_names = current_data['label_name'].values
53 | 
54 |     conv6_val, output_val = sess.run(
55 |             [conv6, output],
56 |             feed_dict={
57 |                 images_tf: current_images
58 |                 })
59 | 
60 |     label_predictions = output_val.argmax( axis=1 )
61 |     acc = (label_predictions == current_labels).sum()
62 | 
63 |     classmap_vals = sess.run(
64 |             classmap,
65 |             feed_dict={
66 |                 labels_tf: label_predictions,
67 |                 conv6: conv6_val
68 |                 })
69 | 
70 |     classmap_answer = sess.run(
71 |             classmap,
72 |             feed_dict={
73 |                 labels_tf: current_labels,
74 |                 conv6: conv6_val
75 |                 })
76 | 
77 |     classmap_vis = map(lambda x: ((x-x.min())/(x.max()-x.min())), classmap_answer)
78 | 
79 |     for vis, ori,ori_path, l_name in zip(classmap_vis, current_images, current_image_paths, current_label_names):
80 |         print l_name
81 |         plt.imshow( ori )
82 |         plt.imshow( vis, cmap=plt.cm.jet, alpha=0.5, interpolation='nearest' )
83 |         plt.show()
84 | 
85 | #        vis_path = '../results/'+ ori_path.split('/')[-1]
86 | #        vis_path_ori = '../results/'+ori_path.split('/')[-1].split('.')[0]+'.ori.jpg'
87 | #        skimage.io.imsave( vis_path, vis )
88 | #        skimage.io.imsave( vis_path_ori, ori )
89 | 
90 | 


--------------------------------------------------------------------------------
/src/train.caltech.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from detector import Detector
  6 | from util import load_image
  7 | import os
  8 | import ipdb
  9 | 
 10 | weight_path = '../data/caffe_layers_value.pickle'
 11 | model_path = '../models/caltech256/'
 12 | pretrained_model_path = None #'../models/caltech256/model-0'
 13 | n_epochs = 10000
 14 | init_learning_rate = 0.01
 15 | weight_decay_rate = 0.0005
 16 | momentum = 0.9
 17 | batch_size = 60
 18 | 
 19 | dataset_path = '/media/storage3/Study/data/256_ObjectCategories'
 20 | 
 21 | caltech_path = '../data/caltech'
 22 | trainset_path = '../data/caltech/train.pickle'
 23 | testset_path = '../data/caltech/test.pickle'
 24 | label_dict_path = '../data/caltech/label_dict.pickle'
 25 | 
 26 | if not os.path.exists( trainset_path ):
 27 |     if not os.path.exists( caltech_path ):
 28 |         os.makedirs( caltech_path )
 29 |     image_dir_list = os.listdir( dataset_path )
 30 | 
 31 |     label_pairs = map(lambda x: x.split('.'), image_dir_list)
 32 |     labels, label_names = zip(*label_pairs)
 33 |     labels = map(lambda x: int(x), labels)
 34 | 
 35 |     label_dict = pd.Series( labels, index=label_names )
 36 |     label_dict -= 1
 37 |     n_labels = len( label_dict )
 38 | 
 39 |     image_paths_per_label = map(lambda one_dir: map(lambda one_file: os.path.join( dataset_path, one_dir, one_file ), os.listdir( os.path.join( dataset_path, one_dir))), image_dir_list)
 40 |     image_paths_train = np.hstack(map(lambda one_class: one_class[:-10], image_paths_per_label))
 41 |     image_paths_test = np.hstack(map(lambda one_class: one_class[-10:], image_paths_per_label))
 42 | 
 43 |     trainset = pd.DataFrame({'image_path': image_paths_train})
 44 |     testset  = pd.DataFrame({'image_path': image_paths_test })
 45 | 
 46 |     trainset = trainset[ trainset['image_path'].map( lambda x: x.endswith('.jpg'))]
 47 |     trainset['label'] = trainset['image_path'].map(lambda x: int(x.split('/')[-2].split('.')[0]) - 1)
 48 |     trainset['label_name'] = trainset['image_path'].map(lambda x: x.split('/')[-2].split('.')[1])
 49 | 
 50 |     testset = testset[ testset['image_path'].map( lambda x: x.endswith('.jpg'))]
 51 |     testset['label'] = testset['image_path'].map(lambda x: int(x.split('/')[-2].split('.')[0]) - 1)
 52 |     testset['label_name'] = testset['image_path'].map(lambda x: x.split('/')[-2].split('.')[1])
 53 | 
 54 |     label_dict.to_pickle(label_dict_path)
 55 |     trainset.to_pickle(trainset_path)
 56 |     testset.to_pickle(testset_path)
 57 | else:
 58 |     trainset = pd.read_pickle( trainset_path )
 59 |     testset  = pd.read_pickle( testset_path )
 60 |     label_dict = pd.read_pickle( label_dict_path )
 61 |     n_labels = len(label_dict)
 62 | 
 63 | learning_rate = tf.placeholder( tf.float32, [])
 64 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images")
 65 | labels_tf = tf.placeholder( tf.int64, [None], name='labels')
 66 | 
 67 | detector = Detector(weight_path, n_labels)
 68 | 
 69 | p1,p2,p3,p4,conv5, conv6, gap, output = detector.inference(images_tf)
 70 | loss_tf = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( output, labels_tf ))
 71 | 
 72 | weights_only = filter( lambda x: x.name.endswith('W:0'), tf.trainable_variables() )
 73 | weight_decay = tf.reduce_sum(tf.pack([tf.nn.l2_loss(x) for x in weights_only])) * weight_decay_rate
 74 | loss_tf += weight_decay
 75 | 
 76 | sess = tf.InteractiveSession()
 77 | saver = tf.train.Saver( max_to_keep=50 )
 78 | 
 79 | optimizer = tf.train.MomentumOptimizer( learning_rate, momentum )
 80 | grads_and_vars = optimizer.compute_gradients( loss_tf )
 81 | grads_and_vars = map(lambda gv: (gv[0], gv[1]) if ('conv6' in gv[1].name or 'GAP' in gv[1].name) else (gv[0]*0.1, gv[1]), grads_and_vars)
 82 | #grads_and_vars = [(tf.clip_by_value(gv[0], -5., 5.), gv[1]) for gv in grads_and_vars]
 83 | train_op = optimizer.apply_gradients( grads_and_vars )
 84 | tf.initialize_all_variables().run()
 85 | 
 86 | if pretrained_model_path:
 87 |     print "Pretrained"
 88 |     saver.restore(sess, pretrained_model_path)
 89 | 
 90 | testset.index  = range( len(testset) )
 91 | #testset = testset.ix[np.random.permutation( len(testset) )]#[:1000]
 92 | #trainset2 = testset[1000:]
 93 | #testset = testset[:1000]
 94 | 
 95 | #trainset = pd.concat( [trainset, trainset2] )
 96 | # We lack the number of training set. Let's use some of the test images
 97 | 
 98 | f_log = open('../results/log.caltech256.txt', 'w')
 99 | 
100 | iterations = 0
101 | loss_list = []
102 | for epoch in range(n_epochs):
103 | 
104 |     trainset.index = range( len(trainset) )
105 |     trainset = trainset.ix[ np.random.permutation( len(trainset) )]
106 | 
107 |     for start, end in zip(
108 |         range( 0, len(trainset)+batch_size, batch_size),
109 |         range(batch_size, len(trainset)+batch_size, batch_size)):
110 | 
111 |         current_data = trainset[start:end]
112 |         current_image_paths = current_data['image_path'].values
113 |         current_images = np.array(map(lambda x: load_image(x), current_image_paths))
114 | 
115 |         good_index = np.array(map(lambda x: x is not None, current_images))
116 | 
117 |         current_data = current_data[good_index]
118 |         current_images = np.stack(current_images[good_index])
119 |         current_labels = current_data['label'].values
120 | 
121 |         _, loss_val, output_val = sess.run(
122 |                 [train_op, loss_tf, output],
123 |                 feed_dict={
124 |                     learning_rate: init_learning_rate,
125 |                     images_tf: current_images,
126 |                     labels_tf: current_labels
127 |                     })
128 | 
129 |         loss_list.append( loss_val )
130 | 
131 |         iterations += 1
132 |         if iterations % 5 == 0:
133 |             print "======================================"
134 |             print "Epoch", epoch, "Iteration", iterations
135 |             print "Processed", start, '/', len(trainset)
136 | 
137 |             label_predictions = output_val.argmax(axis=1)
138 |             acc = (label_predictions == current_labels).sum()
139 | 
140 |             print "Accuracy:", acc, '/', len(current_labels)
141 |             print "Training Loss:", np.mean(loss_list)
142 |             print "\n"
143 |             loss_list = []
144 | 
145 |     n_correct = 0
146 |     n_data = 0
147 |     for start, end in zip(
148 |             range(0, len(testset)+batch_size, batch_size),
149 |             range(batch_size, len(testset)+batch_size, batch_size)
150 |             ):
151 |         current_data = testset[start:end]
152 |         current_image_paths = current_data['image_path'].values
153 |         current_images = np.array(map(lambda x: load_image(x), current_image_paths))
154 | 
155 |         good_index = np.array(map(lambda x: x is not None, current_images))
156 | 
157 |         current_data = current_data[good_index]
158 |         current_images = np.stack(current_images[good_index])
159 |         current_labels = current_data['label'].values
160 | 
161 |         output_vals = sess.run(
162 |                 output,
163 |                 feed_dict={images_tf:current_images})
164 | 
165 |         label_predictions = output_vals.argmax(axis=1)
166 |         acc = (label_predictions == current_labels).sum()
167 | 
168 |         n_correct += acc
169 |         n_data += len(current_data)
170 | 
171 |     acc_all = n_correct / float(n_data)
172 |     f_log.write('epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n')
173 |     print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
174 |     print 'epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n'
175 |     print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
176 | 
177 |     saver.save( sess, os.path.join( model_path, 'model'), global_step=epoch)
178 | 
179 |     init_learning_rate *= 0.99
180 | 
181 | 
182 | 
183 | 
184 | 


--------------------------------------------------------------------------------
/src/train.imagenet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from detector import Detector
  6 | from util import load_image
  7 | import os
  8 | import ipdb
  9 | 
 10 | weight_path = '../data/caffe_layers_value.pickle'
 11 | model_path = '../models/imagenet/'
 12 | pretrained_model_path = None #'../models/imagenet/model-0'
 13 | n_epochs = 10000
 14 | init_learning_rate = 0.01
 15 | weight_decay_rate = 0.0001
 16 | momentum = 0.9
 17 | batch_size = 60
 18 | 
 19 | dataset_path = '/media/storage3/Study/data/imagenet'
 20 | image_dir_list = label_names = filter(lambda x: x.startswith('n'), os.listdir( dataset_path ))
 21 | 
 22 | label_dict = pd.Series( range(len(label_names)), index=label_names )
 23 | n_labels = len( label_dict )
 24 | 
 25 | image_paths_per_label = map(lambda one_dir: map(lambda one_file: os.path.join( dataset_path, one_dir, one_file ), os.listdir( os.path.join( dataset_path, one_dir))), image_dir_list)
 26 | image_paths_train = np.hstack(map(lambda one_class: one_class[:-50], image_paths_per_label))
 27 | image_paths_test = np.hstack(map(lambda one_class: one_class[-50:], image_paths_per_label))
 28 | 
 29 | trainset = pd.DataFrame({'image_path': image_paths_train})
 30 | testset  = pd.DataFrame({'image_path': image_paths_test })
 31 | 
 32 | trainset['label_name'] = trainset['image_path'].map(lambda x: x.split('/')[-2])
 33 | testset['label_name'] = testset['image_path'].map(lambda x: x.split('/')[-2])
 34 | 
 35 | trainset['label'] = trainset['label_name'].map( label_dict )
 36 | testset['label'] = testset['label_name'].map( label_dict )
 37 | 
 38 | train_phase = tf.placeholder( tf.bool )
 39 | learning_rate = tf.placeholder( tf.float32, [])
 40 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images")
 41 | labels_tf = tf.placeholder( tf.int64, [None], name='labels')
 42 | 
 43 | detector = Detector(weight_path, n_labels)
 44 | 
 45 | p1,p2,p3,p4,conv5, conv6, gap, output = detector.inference(images_tf)
 46 | loss_tf = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( output, labels_tf ))
 47 | 
 48 | weights_only = filter( lambda x: x.name.endswith('W:0'), tf.trainable_variables() )
 49 | weight_decay = tf.reduce_sum(tf.pack([tf.nn.l2_loss(x) for x in weights_only])) * weight_decay_rate
 50 | loss_tf += weight_decay
 51 | 
 52 | sess = tf.InteractiveSession()
 53 | saver = tf.train.Saver( max_to_keep=50 )
 54 | 
 55 | #optimizer = tf.train.RMSPropOptimizer( learning_rate )
 56 | optimizer = tf.train.MomentumOptimizer( learning_rate, momentum )
 57 | grads_and_vars = optimizer.compute_gradients( loss_tf )
 58 | grads_and_vars = [(tf.clip_by_value(gv[0], -5., 5.), gv[1]) for gv in grads_and_vars]
 59 | grads_and_vars = map(lambda gv: (gv[0], gv[1]) if ('conv6' in gv[1].name or 'GAP' in gv[1].name) else (gv[0]*0.1, gv[1]), grads_and_vars)
 60 | train_op = optimizer.apply_gradients( grads_and_vars )
 61 | tf.initialize_all_variables().run()
 62 | 
 63 | if pretrained_model_path:
 64 |     print "Pretrained"
 65 |     saver.restore(sess, pretrained_model_path)
 66 | 
 67 | testset.index  = range( len(testset) )
 68 | # We lack the number of training set. Let's use some of the test images
 69 | 
 70 | f_log = open('../results/log.imagenet.txt', 'w')
 71 | 
 72 | iterations = 0
 73 | loss_list = []
 74 | for epoch in range(n_epochs):
 75 | 
 76 |     trainset.index = range( len(trainset) )
 77 |     trainset = trainset.ix[ np.random.permutation( len(trainset) )]
 78 | 
 79 |     for start, end in zip(
 80 |         range( 0, len(trainset)+batch_size, batch_size),
 81 |         range(batch_size, len(trainset)+batch_size, batch_size)):
 82 | 
 83 |         current_data = trainset[start:end]
 84 |         current_image_paths = current_data['image_path'].values
 85 |         current_images = np.array(map(lambda x: load_image(x), current_image_paths))
 86 | 
 87 |         good_index = np.array(map(lambda x: x is not None, current_images))
 88 | 
 89 |         current_data = current_data[good_index]
 90 |         current_images = np.stack(current_images[good_index])
 91 |         current_labels = current_data['label'].values
 92 | 
 93 |         try:
 94 |             _, loss_val, output_val = sess.run(
 95 |                     [train_op, loss_tf, output],
 96 |                     feed_dict={
 97 |                         learning_rate: init_learning_rate,
 98 |                         images_tf: current_images,
 99 |                         labels_tf: current_labels
100 |                         })
101 | #conv5, conv6, gap, output = detector.inference(images_tf)
102 |             p1_val, p2_val, p3_val, p4_val, conv5_val, conv6_val, gap_val = sess.run(
103 |                     [p1,p2,p3,p4,conv5, conv6, gap],
104 |                     feed_dict={
105 |                         images_tf: current_images
106 |                         })
107 | 
108 |         except:
109 |             continue
110 | 
111 |         loss_list.append( loss_val )
112 | 
113 |         iterations += 1
114 |         if iterations % 5 == 0:
115 |             print "======================================"
116 |             print "Epoch", epoch, "Iteration", iterations
117 |             print "Processed", start, '/', len(trainset)
118 |             print "Max output:", output_val.max()
119 | 
120 |             label_predictions = output_val.argmax(axis=1)
121 |             acc = (label_predictions == current_labels).sum()
122 | 
123 |             print "Accuracy:", acc, '/', len(current_labels)
124 |             print "Max prob class:", output_val.argmax(axis=1)
125 |             print "Training Loss:", np.mean(loss_list)
126 |             print "\n"
127 |             loss_list = []
128 | 
129 |     n_correct = 0
130 |     n_data = 0
131 | 
132 |     for start, end in zip(
133 |             range(0, len(testset)+batch_size, batch_size),
134 |             range(batch_size, len(testset)+batch_size, batch_size)
135 |             ):
136 |         current_data = testset[start:end]
137 |         current_image_paths = current_data['image_path'].values
138 |         current_images = np.array(map(lambda x: load_image(x), current_image_paths))
139 | 
140 |         good_index = np.array(map(lambda x: x is not None, current_images))
141 | 
142 |         current_data = current_data[good_index]
143 |         current_images = np.stack(current_images[good_index])
144 |         current_labels = current_data['label'].values
145 | 
146 |         output_vals = sess.run(
147 |                 output,
148 |                 feed_dict={images_tf:current_images})
149 | 
150 |         label_predictions = output_vals.argmax(axis=1)
151 |         acc = (label_predictions == current_labels).sum()
152 | 
153 |         n_correct += acc
154 |         n_data += len(current_data)
155 | 
156 |     acc_all = n_correct / float(n_data)
157 |     f_log.write('epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n')
158 |     print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
159 |     print 'epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n'
160 |     print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
161 | 
162 |     if epoch % 10 == 0:
163 |         saver.save( sess, os.path.join( model_path, 'model'), global_step=epoch/10)
164 | 
165 |     init_learning_rate *= 0.9
166 | 
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/src/util.py:
--------------------------------------------------------------------------------
 1 | import skimage.io
 2 | import skimage.transform
 3 | import ipdb
 4 | 
 5 | import numpy as np
 6 | 
 7 | def load_image( path ):
 8 |     try:
 9 |         img = skimage.io.imread( path ).astype( float )
10 |     except:
11 |         return None
12 | 
13 |     if img is None: return None
14 |     if len(img.shape) < 2: return None
15 |     if len(img.shape) == 4: return None
16 |     if len(img.shape) == 2: img=np.tile(img[:,:,None], 3)
17 |     if img.shape[2] == 4: img=img[:,:,:3]
18 |     if img.shape[2] > 4: return None
19 | 
20 |     img /= 255.
21 | 
22 |     short_edge = min( img.shape[:2] )
23 |     yy = int((img.shape[0] - short_edge) / 2)
24 |     xx = int((img.shape[1] - short_edge) / 2)
25 |     crop_img = img[yy:yy+short_edge, xx:xx+short_edge]
26 |     resized_img = skimage.transform.resize( crop_img, [224,224] )
27 |     return resized_img
28 | 
29 | 


--------------------------------------------------------------------------------
/src/util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/src/util.pyc


--------------------------------------------------------------------------------