├── .gitignore ├── README.md ├── results ├── demo.jpg ├── demo.main.jpg ├── demo.main2.jpg ├── demo2.jpg ├── demo3.jpg ├── demo4.jpg ├── demo5.jpg ├── demo6.jpg ├── demo7.jpg ├── demo8.jpg ├── log.caltech256.txt ├── log.imagenet.txt └── log.txt └── src ├── .ipynb_checkpoints └── Untitled-checkpoint.ipynb ├── Untitled.ipynb ├── detector.py ├── detector.pyc ├── test.caltech.py ├── train.caltech.py ├── train.imagenet.py ├── util.py └── util.pyc /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | models/ 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Weakly_detector 2 | Tensorflow implementation of "Learning Deep Features for Discriminative Localization" 3 | 4 | B. Zhou, A. Khosla, A. Lapedriza, A. Oliva, and A. Torralba 5 | Learning Deep Features for Discriminative Localization. 6 | Computer Vision and Pattern Recognition (CVPR), 2016. 7 | [[PDF](http://arxiv.org/pdf/1512.04150.pdf)][[Project Page](http://cnnlocalization.csail.mit.edu/)] 8 | 9 | ### Results of Caltech256 Dataset 10 | ![alt tag](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/results/demo.main.jpg) 11 | 12 | ### Results of Action40 Dataset 13 | ![alt tag](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/results/demo.main2.jpg) 14 | Object localization using only image-level annotation, without bounding box annotation. 15 | 16 | * If you want to train the model using custom dataset, you need the pretrained VGG Network weights [[VGG](https://drive.google.com/file/d/0B5o40yxdA9PqOVI5dF9tN3NUc2c/view?usp=sharing)], which is used in [[code](https://github.com/jazzsaxmafia/Weakly_detector/blob/master/src/train.caltech.py#L10)]. 17 | 18 | -------------------------------------------------------------------------------- /results/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.jpg -------------------------------------------------------------------------------- /results/demo.main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.main.jpg -------------------------------------------------------------------------------- /results/demo.main2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo.main2.jpg -------------------------------------------------------------------------------- /results/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo2.jpg -------------------------------------------------------------------------------- /results/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo3.jpg -------------------------------------------------------------------------------- /results/demo4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo4.jpg -------------------------------------------------------------------------------- /results/demo5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo5.jpg -------------------------------------------------------------------------------- /results/demo6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo6.jpg -------------------------------------------------------------------------------- /results/demo7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo7.jpg -------------------------------------------------------------------------------- /results/demo8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/demo8.jpg -------------------------------------------------------------------------------- /results/log.caltech256.txt: -------------------------------------------------------------------------------- 1 | epoch:0 acc:0.702 2 | epoch:1 acc:0.751 3 | epoch:2 acc:0.759 4 | epoch:3 acc:0.778 5 | epoch:4 acc:0.767 6 | -------------------------------------------------------------------------------- /results/log.imagenet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/results/log.imagenet.txt -------------------------------------------------------------------------------- /results/log.txt: -------------------------------------------------------------------------------- 1 | epoch:0 acc:0.01 2 | epoch:1 acc:0.01 3 | epoch:2 acc:0.005 4 | epoch:3 acc:0.005 5 | epoch:4 acc:0.005 6 | epoch:5 acc:0.03 7 | epoch:6 acc:0.03 8 | epoch:7 acc:0.03 9 | epoch:8 acc:0.005 10 | epoch:9 acc:0.01 11 | epoch:10 acc:0.01 12 | epoch:11 acc:0.01 13 | epoch:12 acc:0.01 14 | epoch:13 acc:0.01 15 | epoch:14 acc:0.01 16 | epoch:15 acc:0.01 17 | epoch:16 acc:0.01 18 | epoch:17 acc:0.01 19 | epoch:18 acc:0.01 20 | epoch:19 acc:0.01 21 | epoch:20 acc:0.01 22 | epoch:21 acc:0.01 23 | epoch:22 acc:0.01 24 | epoch:23 acc:0.01 25 | epoch:24 acc:0.01 26 | epoch:25 acc:0.01 27 | epoch:26 acc:0.01 28 | epoch:27 acc:0.01 29 | epoch:28 acc:0.01 30 | epoch:29 acc:0.01 31 | epoch:30 acc:0.01 32 | epoch:31 acc:0.01 33 | epoch:32 acc:0.01 34 | epoch:33 acc:0.005 35 | epoch:34 acc:0.01 36 | epoch:35 acc:0.005 37 | epoch:36 acc:0.01 38 | epoch:37 acc:0.02 39 | epoch:38 acc:0.015 40 | epoch:39 acc:0.02 41 | epoch:40 acc:0.015 42 | epoch:41 acc:0.02 43 | epoch:42 acc:0.025 44 | epoch:43 acc:0.02 45 | epoch:44 acc:0.02 46 | epoch:45 acc:0.02 47 | epoch:46 acc:0.01 48 | epoch:47 acc:0.01 49 | epoch:48 acc:0.01 50 | epoch:49 acc:0.01 51 | epoch:50 acc:0.01 52 | epoch:51 acc:0.01 53 | epoch:52 acc:0.01 54 | epoch:53 acc:0.01 55 | epoch:54 acc:0.01 56 | epoch:55 acc:0.01 57 | epoch:56 acc:0.01 58 | epoch:57 acc:0.01 59 | epoch:58 acc:0.01 60 | epoch:59 acc:0.01 61 | epoch:60 acc:0.01 62 | epoch:61 acc:0.01 63 | epoch:62 acc:0.01 64 | epoch:63 acc:0.01 65 | epoch:64 acc:0.01 66 | epoch:65 acc:0.01 67 | epoch:66 acc:0.01 68 | epoch:67 acc:0.01 69 | epoch:68 acc:0.01 70 | epoch:69 acc:0.01 71 | epoch:70 acc:0.01 72 | epoch:71 acc:0.01 73 | epoch:72 acc:0.01 74 | epoch:73 acc:0.01 75 | epoch:74 acc:0.01 76 | epoch:75 acc:0.01 77 | epoch:76 acc:0.01 78 | epoch:77 acc:0.01 79 | epoch:78 acc:0.01 80 | epoch:79 acc:0.01 81 | epoch:80 acc:0.01 82 | epoch:81 acc:0.01 83 | epoch:82 acc:0.01 84 | epoch:83 acc:0.01 85 | epoch:84 acc:0.01 86 | epoch:85 acc:0.025 87 | epoch:86 acc:0.03 88 | epoch:87 acc:0.02 89 | epoch:88 acc:0.02 90 | epoch:89 acc:0.02 91 | epoch:90 acc:0.01 92 | epoch:91 acc:0.01 93 | epoch:92 acc:0.01 94 | epoch:93 acc:0.01 95 | epoch:94 acc:0.01 96 | epoch:95 acc:0.01 97 | epoch:96 acc:0.01 98 | epoch:97 acc:0.01 99 | epoch:98 acc:0.01 100 | epoch:99 acc:0.01 101 | epoch:100 acc:0.01 102 | epoch:101 acc:0.01 103 | epoch:102 acc:0.01 104 | epoch:103 acc:0.01 105 | epoch:104 acc:0.01 106 | epoch:105 acc:0.01 107 | epoch:106 acc:0.01 108 | epoch:107 acc:0.01 109 | epoch:108 acc:0.01 110 | epoch:109 acc:0.01 111 | epoch:110 acc:0.01 112 | epoch:111 acc:0.01 113 | epoch:112 acc:0.01 114 | epoch:113 acc:0.01 115 | epoch:114 acc:0.01 116 | epoch:115 acc:0.01 117 | epoch:116 acc:0.01 118 | epoch:117 acc:0.01 119 | epoch:118 acc:0.01 120 | epoch:119 acc:0.01 121 | epoch:120 acc:0.01 122 | epoch:121 acc:0.01 123 | epoch:122 acc:0.01 124 | epoch:123 acc:0.01 125 | epoch:124 acc:0.01 126 | epoch:125 acc:0.01 127 | epoch:126 acc:0.01 128 | epoch:127 acc:0.01 129 | epoch:128 acc:0.01 130 | epoch:129 acc:0.01 131 | epoch:130 acc:0.01 132 | epoch:131 acc:0.01 133 | epoch:132 acc:0.01 134 | epoch:133 acc:0.01 135 | epoch:134 acc:0.01 136 | epoch:135 acc:0.01 137 | epoch:136 acc:0.01 138 | epoch:137 acc:0.01 139 | epoch:138 acc:0.01 140 | epoch:139 acc:0.01 141 | epoch:140 acc:0.01 142 | epoch:141 acc:0.01 143 | epoch:142 acc:0.01 144 | epoch:143 acc:0.01 145 | epoch:144 acc:0.01 146 | epoch:145 acc:0.01 147 | epoch:146 acc:0.01 148 | epoch:147 acc:0.01 149 | epoch:148 acc:0.01 150 | epoch:149 acc:0.01 151 | epoch:150 acc:0.01 152 | epoch:151 acc:0.01 153 | epoch:152 acc:0.01 154 | epoch:153 acc:0.01 155 | epoch:154 acc:0.01 156 | epoch:155 acc:0.01 157 | epoch:156 acc:0.01 158 | epoch:157 acc:0.01 159 | epoch:158 acc:0.01 160 | epoch:159 acc:0.01 161 | epoch:160 acc:0.01 162 | epoch:161 acc:0.01 163 | epoch:162 acc:0.01 164 | epoch:163 acc:0.01 165 | epoch:164 acc:0.01 166 | epoch:165 acc:0.01 167 | -------------------------------------------------------------------------------- /src/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /src/detector.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cPickle 4 | import ipdb 5 | class Detector(): 6 | def __init__(self, weight_file_path, n_labels): 7 | self.image_mean = [103.939, 116.779, 123.68] 8 | self.n_labels = n_labels 9 | 10 | with open(weight_file_path) as f: 11 | self.pretrained_weights = cPickle.load(f) 12 | 13 | def get_weight( self, layer_name): 14 | layer = self.pretrained_weights[layer_name] 15 | return layer[0] 16 | 17 | def get_bias( self, layer_name ): 18 | layer = self.pretrained_weights[layer_name] 19 | return layer[1] 20 | 21 | def get_conv_weight( self, name ): 22 | f = self.get_weight( name ) 23 | return f.transpose(( 2,3,1,0 )) 24 | 25 | def conv_layer( self, bottom, name ): 26 | with tf.variable_scope(name) as scope: 27 | 28 | w = self.get_conv_weight(name) 29 | b = self.get_bias(name) 30 | 31 | conv_weights = tf.get_variable( 32 | "W", 33 | shape=w.shape, 34 | initializer=tf.constant_initializer(w) 35 | ) 36 | conv_biases = tf.get_variable( 37 | "b", 38 | shape=b.shape, 39 | initializer=tf.constant_initializer(b) 40 | ) 41 | 42 | conv = tf.nn.conv2d( bottom, conv_weights, [1,1,1,1], padding='SAME') 43 | bias = tf.nn.bias_add( conv, conv_biases ) 44 | relu = tf.nn.relu( bias, name=name ) 45 | 46 | return relu 47 | 48 | def new_conv_layer( self, bottom, filter_shape, name ): 49 | with tf.variable_scope( name ) as scope: 50 | w = tf.get_variable( 51 | "W", 52 | shape=filter_shape, 53 | initializer=tf.random_normal_initializer(0., 0.01)) 54 | b = tf.get_variable( 55 | "b", 56 | shape=filter_shape[-1], 57 | initializer=tf.constant_initializer(0.)) 58 | 59 | conv = tf.nn.conv2d( bottom, w, [1,1,1,1], padding='SAME') 60 | bias = tf.nn.bias_add(conv, b) 61 | 62 | return bias #relu 63 | 64 | def fc_layer(self, bottom, name, create=False): 65 | shape = bottom.get_shape().as_list() 66 | dim = np.prod( shape[1:] ) 67 | x = tf.reshape(bottom, [-1, dim]) 68 | 69 | cw = self.get_weight(name) 70 | b = self.get_bias(name) 71 | 72 | if name == "fc6": 73 | cw = cw.reshape((4096, 512, 7,7)) 74 | cw = cw.transpose((2,3,1,0)) 75 | cw = cw.reshape((25088,4096)) 76 | else: 77 | cw = cw.transpose((1,0)) 78 | 79 | with tf.variable_scope(name) as scope: 80 | cw = tf.get_variable( 81 | "W", 82 | shape=cw.shape, 83 | initializer=tf.constant_initializer(cw)) 84 | b = tf.get_variable( 85 | "b", 86 | shape=b.shape, 87 | initializer=tf.constant_initializer(b)) 88 | 89 | fc = tf.nn.bias_add( tf.matmul( x, cw ), b, name=scope) 90 | 91 | return fc 92 | 93 | def new_fc_layer( self, bottom, input_size, output_size, name ): 94 | shape = bottom.get_shape().to_list() 95 | dim = np.prod( shape[1:] ) 96 | x = tf.reshape( bottom, [-1, dim]) 97 | 98 | with tf.variable_scope(name) as scope: 99 | w = tf.get_variable( 100 | "W", 101 | shape=[input_size, output_size], 102 | initializer=tf.random_normal_initializer(0., 0.01)) 103 | b = tf.get_variable( 104 | "b", 105 | shape=[output_size], 106 | initializer=tf.constant_initializer(0.)) 107 | fc = tf.nn.bias_add( tf.matmul(x, w), b, name=scope) 108 | 109 | return fc 110 | 111 | def inference( self, rgb, train=False ): 112 | rgb *= 255. 113 | r, g, b = tf.split(3, 3, rgb) 114 | bgr = tf.concat(3, 115 | [ 116 | b-self.image_mean[0], 117 | g-self.image_mean[1], 118 | r-self.image_mean[2] 119 | ]) 120 | 121 | relu1_1 = self.conv_layer( bgr, "conv1_1" ) 122 | relu1_2 = self.conv_layer( relu1_1, "conv1_2" ) 123 | 124 | pool1 = tf.nn.max_pool(relu1_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 125 | padding='SAME', name='pool1') 126 | 127 | relu2_1 = self.conv_layer(pool1, "conv2_1") 128 | relu2_2 = self.conv_layer(relu2_1, "conv2_2") 129 | pool2 = tf.nn.max_pool(relu2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 130 | padding='SAME', name='pool2') 131 | 132 | relu3_1 = self.conv_layer( pool2, "conv3_1") 133 | relu3_2 = self.conv_layer( relu3_1, "conv3_2") 134 | relu3_3 = self.conv_layer( relu3_2, "conv3_3") 135 | pool3 = tf.nn.max_pool(relu3_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 136 | padding='SAME', name='pool3') 137 | 138 | relu4_1 = self.conv_layer( pool3, "conv4_1") 139 | relu4_2 = self.conv_layer( relu4_1, "conv4_2") 140 | relu4_3 = self.conv_layer( relu4_2, "conv4_3") 141 | pool4 = tf.nn.max_pool(relu4_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 142 | padding='SAME', name='pool4') 143 | 144 | relu5_1 = self.conv_layer( pool4, "conv5_1") 145 | relu5_2 = self.conv_layer( relu5_1, "conv5_2") 146 | relu5_3 = self.conv_layer( relu5_2, "conv5_3") 147 | 148 | conv6 = self.new_conv_layer( relu5_3, [3,3,512,1024], "conv6") 149 | gap = tf.reduce_mean( conv6, [1,2] ) 150 | 151 | with tf.variable_scope("GAP"): 152 | gap_w = tf.get_variable( 153 | "W", 154 | shape=[1024, self.n_labels], 155 | initializer=tf.random_normal_initializer(0., 0.01)) 156 | 157 | output = tf.matmul( gap, gap_w) 158 | 159 | return pool1, pool2, pool3, pool4, relu5_3, conv6, gap, output 160 | 161 | def get_classmap(self, label, conv6): 162 | conv6_resized = tf.image.resize_bilinear( conv6, [224, 224] ) 163 | with tf.variable_scope("GAP", reuse=True): 164 | label_w = tf.gather(tf.transpose(tf.get_variable("W")), label) 165 | label_w = tf.reshape( label_w, [-1, 1024, 1] ) # [batch_size, 1024, 1] 166 | 167 | conv6_resized = tf.reshape(conv6_resized, [-1, 224*224, 1024]) # [batch_size, 224*224, 1024] 168 | 169 | classmap = tf.batch_matmul( conv6_resized, label_w ) 170 | classmap = tf.reshape( classmap, [-1, 224,224] ) 171 | return classmap 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /src/detector.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/src/detector.pyc -------------------------------------------------------------------------------- /src/test.caltech.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import pandas as pd 3 | import numpy as np 4 | 5 | from detector import Detector 6 | from util import load_image 7 | 8 | import skimage.io 9 | import matplotlib.pyplot as plt 10 | 11 | import os 12 | import ipdb 13 | 14 | testset_path = '../data/caltech/test.pickle' 15 | label_dict_path = '../data/caltech/label_dict.pickle' 16 | 17 | weight_path = '../data/caffe_layers_value.pickle' 18 | model_path = '../models/caltech256/model-4' 19 | 20 | batch_size = 1 21 | 22 | testset = pd.read_pickle( testset_path )[::-1][:20] 23 | label_dict = pd.read_pickle( label_dict_path ) 24 | n_labels = len( label_dict ) 25 | 26 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images") 27 | labels_tf = tf.placeholder( tf.int64, [None], name='labels') 28 | 29 | detector = Detector( weight_path, n_labels ) 30 | c1,c2,c3,c4,conv5, conv6, gap, output = detector.inference( images_tf ) 31 | classmap = detector.get_classmap( labels_tf, conv6 ) 32 | 33 | sess = tf.InteractiveSession() 34 | saver = tf.train.Saver() 35 | 36 | saver.restore( sess, model_path ) 37 | 38 | for start, end in zip( 39 | range( 0, len(testset)+batch_size, batch_size), 40 | range(batch_size, len(testset)+batch_size, batch_size)): 41 | 42 | current_data = testset[start:end] 43 | current_image_paths = current_data['image_path'].values 44 | current_images = np.array(map(lambda x: load_image(x), current_image_paths)) 45 | 46 | good_index = np.array(map(lambda x: x is not None, current_images)) 47 | 48 | current_data = current_data[good_index] 49 | current_image_paths = current_image_paths[good_index] 50 | current_images = np.stack(current_images[good_index]) 51 | current_labels = current_data['label'].values 52 | current_label_names = current_data['label_name'].values 53 | 54 | conv6_val, output_val = sess.run( 55 | [conv6, output], 56 | feed_dict={ 57 | images_tf: current_images 58 | }) 59 | 60 | label_predictions = output_val.argmax( axis=1 ) 61 | acc = (label_predictions == current_labels).sum() 62 | 63 | classmap_vals = sess.run( 64 | classmap, 65 | feed_dict={ 66 | labels_tf: label_predictions, 67 | conv6: conv6_val 68 | }) 69 | 70 | classmap_answer = sess.run( 71 | classmap, 72 | feed_dict={ 73 | labels_tf: current_labels, 74 | conv6: conv6_val 75 | }) 76 | 77 | classmap_vis = map(lambda x: ((x-x.min())/(x.max()-x.min())), classmap_answer) 78 | 79 | for vis, ori,ori_path, l_name in zip(classmap_vis, current_images, current_image_paths, current_label_names): 80 | print l_name 81 | plt.imshow( ori ) 82 | plt.imshow( vis, cmap=plt.cm.jet, alpha=0.5, interpolation='nearest' ) 83 | plt.show() 84 | 85 | # vis_path = '../results/'+ ori_path.split('/')[-1] 86 | # vis_path_ori = '../results/'+ori_path.split('/')[-1].split('.')[0]+'.ori.jpg' 87 | # skimage.io.imsave( vis_path, vis ) 88 | # skimage.io.imsave( vis_path_ori, ori ) 89 | 90 | -------------------------------------------------------------------------------- /src/train.caltech.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from detector import Detector 6 | from util import load_image 7 | import os 8 | import ipdb 9 | 10 | weight_path = '../data/caffe_layers_value.pickle' 11 | model_path = '../models/caltech256/' 12 | pretrained_model_path = None #'../models/caltech256/model-0' 13 | n_epochs = 10000 14 | init_learning_rate = 0.01 15 | weight_decay_rate = 0.0005 16 | momentum = 0.9 17 | batch_size = 60 18 | 19 | dataset_path = '/media/storage3/Study/data/256_ObjectCategories' 20 | 21 | caltech_path = '../data/caltech' 22 | trainset_path = '../data/caltech/train.pickle' 23 | testset_path = '../data/caltech/test.pickle' 24 | label_dict_path = '../data/caltech/label_dict.pickle' 25 | 26 | if not os.path.exists( trainset_path ): 27 | if not os.path.exists( caltech_path ): 28 | os.makedirs( caltech_path ) 29 | image_dir_list = os.listdir( dataset_path ) 30 | 31 | label_pairs = map(lambda x: x.split('.'), image_dir_list) 32 | labels, label_names = zip(*label_pairs) 33 | labels = map(lambda x: int(x), labels) 34 | 35 | label_dict = pd.Series( labels, index=label_names ) 36 | label_dict -= 1 37 | n_labels = len( label_dict ) 38 | 39 | image_paths_per_label = map(lambda one_dir: map(lambda one_file: os.path.join( dataset_path, one_dir, one_file ), os.listdir( os.path.join( dataset_path, one_dir))), image_dir_list) 40 | image_paths_train = np.hstack(map(lambda one_class: one_class[:-10], image_paths_per_label)) 41 | image_paths_test = np.hstack(map(lambda one_class: one_class[-10:], image_paths_per_label)) 42 | 43 | trainset = pd.DataFrame({'image_path': image_paths_train}) 44 | testset = pd.DataFrame({'image_path': image_paths_test }) 45 | 46 | trainset = trainset[ trainset['image_path'].map( lambda x: x.endswith('.jpg'))] 47 | trainset['label'] = trainset['image_path'].map(lambda x: int(x.split('/')[-2].split('.')[0]) - 1) 48 | trainset['label_name'] = trainset['image_path'].map(lambda x: x.split('/')[-2].split('.')[1]) 49 | 50 | testset = testset[ testset['image_path'].map( lambda x: x.endswith('.jpg'))] 51 | testset['label'] = testset['image_path'].map(lambda x: int(x.split('/')[-2].split('.')[0]) - 1) 52 | testset['label_name'] = testset['image_path'].map(lambda x: x.split('/')[-2].split('.')[1]) 53 | 54 | label_dict.to_pickle(label_dict_path) 55 | trainset.to_pickle(trainset_path) 56 | testset.to_pickle(testset_path) 57 | else: 58 | trainset = pd.read_pickle( trainset_path ) 59 | testset = pd.read_pickle( testset_path ) 60 | label_dict = pd.read_pickle( label_dict_path ) 61 | n_labels = len(label_dict) 62 | 63 | learning_rate = tf.placeholder( tf.float32, []) 64 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images") 65 | labels_tf = tf.placeholder( tf.int64, [None], name='labels') 66 | 67 | detector = Detector(weight_path, n_labels) 68 | 69 | p1,p2,p3,p4,conv5, conv6, gap, output = detector.inference(images_tf) 70 | loss_tf = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( output, labels_tf )) 71 | 72 | weights_only = filter( lambda x: x.name.endswith('W:0'), tf.trainable_variables() ) 73 | weight_decay = tf.reduce_sum(tf.pack([tf.nn.l2_loss(x) for x in weights_only])) * weight_decay_rate 74 | loss_tf += weight_decay 75 | 76 | sess = tf.InteractiveSession() 77 | saver = tf.train.Saver( max_to_keep=50 ) 78 | 79 | optimizer = tf.train.MomentumOptimizer( learning_rate, momentum ) 80 | grads_and_vars = optimizer.compute_gradients( loss_tf ) 81 | grads_and_vars = map(lambda gv: (gv[0], gv[1]) if ('conv6' in gv[1].name or 'GAP' in gv[1].name) else (gv[0]*0.1, gv[1]), grads_and_vars) 82 | #grads_and_vars = [(tf.clip_by_value(gv[0], -5., 5.), gv[1]) for gv in grads_and_vars] 83 | train_op = optimizer.apply_gradients( grads_and_vars ) 84 | tf.initialize_all_variables().run() 85 | 86 | if pretrained_model_path: 87 | print "Pretrained" 88 | saver.restore(sess, pretrained_model_path) 89 | 90 | testset.index = range( len(testset) ) 91 | #testset = testset.ix[np.random.permutation( len(testset) )]#[:1000] 92 | #trainset2 = testset[1000:] 93 | #testset = testset[:1000] 94 | 95 | #trainset = pd.concat( [trainset, trainset2] ) 96 | # We lack the number of training set. Let's use some of the test images 97 | 98 | f_log = open('../results/log.caltech256.txt', 'w') 99 | 100 | iterations = 0 101 | loss_list = [] 102 | for epoch in range(n_epochs): 103 | 104 | trainset.index = range( len(trainset) ) 105 | trainset = trainset.ix[ np.random.permutation( len(trainset) )] 106 | 107 | for start, end in zip( 108 | range( 0, len(trainset)+batch_size, batch_size), 109 | range(batch_size, len(trainset)+batch_size, batch_size)): 110 | 111 | current_data = trainset[start:end] 112 | current_image_paths = current_data['image_path'].values 113 | current_images = np.array(map(lambda x: load_image(x), current_image_paths)) 114 | 115 | good_index = np.array(map(lambda x: x is not None, current_images)) 116 | 117 | current_data = current_data[good_index] 118 | current_images = np.stack(current_images[good_index]) 119 | current_labels = current_data['label'].values 120 | 121 | _, loss_val, output_val = sess.run( 122 | [train_op, loss_tf, output], 123 | feed_dict={ 124 | learning_rate: init_learning_rate, 125 | images_tf: current_images, 126 | labels_tf: current_labels 127 | }) 128 | 129 | loss_list.append( loss_val ) 130 | 131 | iterations += 1 132 | if iterations % 5 == 0: 133 | print "======================================" 134 | print "Epoch", epoch, "Iteration", iterations 135 | print "Processed", start, '/', len(trainset) 136 | 137 | label_predictions = output_val.argmax(axis=1) 138 | acc = (label_predictions == current_labels).sum() 139 | 140 | print "Accuracy:", acc, '/', len(current_labels) 141 | print "Training Loss:", np.mean(loss_list) 142 | print "\n" 143 | loss_list = [] 144 | 145 | n_correct = 0 146 | n_data = 0 147 | for start, end in zip( 148 | range(0, len(testset)+batch_size, batch_size), 149 | range(batch_size, len(testset)+batch_size, batch_size) 150 | ): 151 | current_data = testset[start:end] 152 | current_image_paths = current_data['image_path'].values 153 | current_images = np.array(map(lambda x: load_image(x), current_image_paths)) 154 | 155 | good_index = np.array(map(lambda x: x is not None, current_images)) 156 | 157 | current_data = current_data[good_index] 158 | current_images = np.stack(current_images[good_index]) 159 | current_labels = current_data['label'].values 160 | 161 | output_vals = sess.run( 162 | output, 163 | feed_dict={images_tf:current_images}) 164 | 165 | label_predictions = output_vals.argmax(axis=1) 166 | acc = (label_predictions == current_labels).sum() 167 | 168 | n_correct += acc 169 | n_data += len(current_data) 170 | 171 | acc_all = n_correct / float(n_data) 172 | f_log.write('epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n') 173 | print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" 174 | print 'epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n' 175 | print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" 176 | 177 | saver.save( sess, os.path.join( model_path, 'model'), global_step=epoch) 178 | 179 | init_learning_rate *= 0.99 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /src/train.imagenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from detector import Detector 6 | from util import load_image 7 | import os 8 | import ipdb 9 | 10 | weight_path = '../data/caffe_layers_value.pickle' 11 | model_path = '../models/imagenet/' 12 | pretrained_model_path = None #'../models/imagenet/model-0' 13 | n_epochs = 10000 14 | init_learning_rate = 0.01 15 | weight_decay_rate = 0.0001 16 | momentum = 0.9 17 | batch_size = 60 18 | 19 | dataset_path = '/media/storage3/Study/data/imagenet' 20 | image_dir_list = label_names = filter(lambda x: x.startswith('n'), os.listdir( dataset_path )) 21 | 22 | label_dict = pd.Series( range(len(label_names)), index=label_names ) 23 | n_labels = len( label_dict ) 24 | 25 | image_paths_per_label = map(lambda one_dir: map(lambda one_file: os.path.join( dataset_path, one_dir, one_file ), os.listdir( os.path.join( dataset_path, one_dir))), image_dir_list) 26 | image_paths_train = np.hstack(map(lambda one_class: one_class[:-50], image_paths_per_label)) 27 | image_paths_test = np.hstack(map(lambda one_class: one_class[-50:], image_paths_per_label)) 28 | 29 | trainset = pd.DataFrame({'image_path': image_paths_train}) 30 | testset = pd.DataFrame({'image_path': image_paths_test }) 31 | 32 | trainset['label_name'] = trainset['image_path'].map(lambda x: x.split('/')[-2]) 33 | testset['label_name'] = testset['image_path'].map(lambda x: x.split('/')[-2]) 34 | 35 | trainset['label'] = trainset['label_name'].map( label_dict ) 36 | testset['label'] = testset['label_name'].map( label_dict ) 37 | 38 | train_phase = tf.placeholder( tf.bool ) 39 | learning_rate = tf.placeholder( tf.float32, []) 40 | images_tf = tf.placeholder( tf.float32, [None, 224, 224, 3], name="images") 41 | labels_tf = tf.placeholder( tf.int64, [None], name='labels') 42 | 43 | detector = Detector(weight_path, n_labels) 44 | 45 | p1,p2,p3,p4,conv5, conv6, gap, output = detector.inference(images_tf) 46 | loss_tf = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( output, labels_tf )) 47 | 48 | weights_only = filter( lambda x: x.name.endswith('W:0'), tf.trainable_variables() ) 49 | weight_decay = tf.reduce_sum(tf.pack([tf.nn.l2_loss(x) for x in weights_only])) * weight_decay_rate 50 | loss_tf += weight_decay 51 | 52 | sess = tf.InteractiveSession() 53 | saver = tf.train.Saver( max_to_keep=50 ) 54 | 55 | #optimizer = tf.train.RMSPropOptimizer( learning_rate ) 56 | optimizer = tf.train.MomentumOptimizer( learning_rate, momentum ) 57 | grads_and_vars = optimizer.compute_gradients( loss_tf ) 58 | grads_and_vars = [(tf.clip_by_value(gv[0], -5., 5.), gv[1]) for gv in grads_and_vars] 59 | grads_and_vars = map(lambda gv: (gv[0], gv[1]) if ('conv6' in gv[1].name or 'GAP' in gv[1].name) else (gv[0]*0.1, gv[1]), grads_and_vars) 60 | train_op = optimizer.apply_gradients( grads_and_vars ) 61 | tf.initialize_all_variables().run() 62 | 63 | if pretrained_model_path: 64 | print "Pretrained" 65 | saver.restore(sess, pretrained_model_path) 66 | 67 | testset.index = range( len(testset) ) 68 | # We lack the number of training set. Let's use some of the test images 69 | 70 | f_log = open('../results/log.imagenet.txt', 'w') 71 | 72 | iterations = 0 73 | loss_list = [] 74 | for epoch in range(n_epochs): 75 | 76 | trainset.index = range( len(trainset) ) 77 | trainset = trainset.ix[ np.random.permutation( len(trainset) )] 78 | 79 | for start, end in zip( 80 | range( 0, len(trainset)+batch_size, batch_size), 81 | range(batch_size, len(trainset)+batch_size, batch_size)): 82 | 83 | current_data = trainset[start:end] 84 | current_image_paths = current_data['image_path'].values 85 | current_images = np.array(map(lambda x: load_image(x), current_image_paths)) 86 | 87 | good_index = np.array(map(lambda x: x is not None, current_images)) 88 | 89 | current_data = current_data[good_index] 90 | current_images = np.stack(current_images[good_index]) 91 | current_labels = current_data['label'].values 92 | 93 | try: 94 | _, loss_val, output_val = sess.run( 95 | [train_op, loss_tf, output], 96 | feed_dict={ 97 | learning_rate: init_learning_rate, 98 | images_tf: current_images, 99 | labels_tf: current_labels 100 | }) 101 | #conv5, conv6, gap, output = detector.inference(images_tf) 102 | p1_val, p2_val, p3_val, p4_val, conv5_val, conv6_val, gap_val = sess.run( 103 | [p1,p2,p3,p4,conv5, conv6, gap], 104 | feed_dict={ 105 | images_tf: current_images 106 | }) 107 | 108 | except: 109 | continue 110 | 111 | loss_list.append( loss_val ) 112 | 113 | iterations += 1 114 | if iterations % 5 == 0: 115 | print "======================================" 116 | print "Epoch", epoch, "Iteration", iterations 117 | print "Processed", start, '/', len(trainset) 118 | print "Max output:", output_val.max() 119 | 120 | label_predictions = output_val.argmax(axis=1) 121 | acc = (label_predictions == current_labels).sum() 122 | 123 | print "Accuracy:", acc, '/', len(current_labels) 124 | print "Max prob class:", output_val.argmax(axis=1) 125 | print "Training Loss:", np.mean(loss_list) 126 | print "\n" 127 | loss_list = [] 128 | 129 | n_correct = 0 130 | n_data = 0 131 | 132 | for start, end in zip( 133 | range(0, len(testset)+batch_size, batch_size), 134 | range(batch_size, len(testset)+batch_size, batch_size) 135 | ): 136 | current_data = testset[start:end] 137 | current_image_paths = current_data['image_path'].values 138 | current_images = np.array(map(lambda x: load_image(x), current_image_paths)) 139 | 140 | good_index = np.array(map(lambda x: x is not None, current_images)) 141 | 142 | current_data = current_data[good_index] 143 | current_images = np.stack(current_images[good_index]) 144 | current_labels = current_data['label'].values 145 | 146 | output_vals = sess.run( 147 | output, 148 | feed_dict={images_tf:current_images}) 149 | 150 | label_predictions = output_vals.argmax(axis=1) 151 | acc = (label_predictions == current_labels).sum() 152 | 153 | n_correct += acc 154 | n_data += len(current_data) 155 | 156 | acc_all = n_correct / float(n_data) 157 | f_log.write('epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n') 158 | print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" 159 | print 'epoch:'+str(epoch)+'\tacc:'+str(acc_all) + '\n' 160 | print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" 161 | 162 | if epoch % 10 == 0: 163 | saver.save( sess, os.path.join( model_path, 'model'), global_step=epoch/10) 164 | 165 | init_learning_rate *= 0.9 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | import skimage.io 2 | import skimage.transform 3 | import ipdb 4 | 5 | import numpy as np 6 | 7 | def load_image( path ): 8 | try: 9 | img = skimage.io.imread( path ).astype( float ) 10 | except: 11 | return None 12 | 13 | if img is None: return None 14 | if len(img.shape) < 2: return None 15 | if len(img.shape) == 4: return None 16 | if len(img.shape) == 2: img=np.tile(img[:,:,None], 3) 17 | if img.shape[2] == 4: img=img[:,:,:3] 18 | if img.shape[2] > 4: return None 19 | 20 | img /= 255. 21 | 22 | short_edge = min( img.shape[:2] ) 23 | yy = int((img.shape[0] - short_edge) / 2) 24 | xx = int((img.shape[1] - short_edge) / 2) 25 | crop_img = img[yy:yy+short_edge, xx:xx+short_edge] 26 | resized_img = skimage.transform.resize( crop_img, [224,224] ) 27 | return resized_img 28 | 29 | -------------------------------------------------------------------------------- /src/util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzsaxmafia/Weakly_detector/2da68d209a563c6373fbbb2659ddbbe18afb708c/src/util.pyc --------------------------------------------------------------------------------