├── .gitignore ├── README.md ├── caffenet.py ├── config.py ├── examples ├── facescrub │ ├── convert_facescrub_data.py │ ├── deploy.prototxt │ ├── deploy_with_latent_layer.prototxt │ ├── generate_feature_mat.py │ ├── retrieve.sh │ ├── solver.prototxt │ ├── solver_with_latent_layer.prototxt │ ├── train.sh │ ├── train_val.prototxt │ └── train_val_with_latent_layer.prototxt └── shoes7k │ ├── convert_shoes7k_data.py │ ├── deploy.prototxt │ ├── deploy_with_latent_layer.prototxt │ ├── eval.py │ ├── generate_feature_mat.py │ ├── retrieve.sh │ ├── solver.prototxt │ ├── solver_with_latent_layer.prototxt │ ├── train.sh │ ├── train_val.prototxt │ └── train_val_with_latent_layer.prototxt ├── img └── shoes7k_retrieval.png ├── layer_features.py ├── retrieve.py └── tools └── convert_protomean.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # local config 104 | config.cfg 105 | 106 | # caffe model files 107 | *.caffemodel 108 | *.solverstate 109 | 110 | # dataset files 111 | *_lmdb/ 112 | *.binaryproto 113 | *.npy -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast-image-retrieval 2 | A lightweight framework using binary hash codes and deep learning for fast image retrieval. 3 | 4 | ## Configuration 5 | To run the examples, you need to create a `config.cfg` file under the root folder of this project. An example of `config.cfg` looks like: 6 | 7 | ``` 8 | [examples] 9 | shoes7k_pos_path: /path/to/datasets/shoes7k/classification 10 | shoes7k_neg_path: /path/to/datasets/shoes7k/classificationNeg 11 | ``` 12 | 13 | ## Run Examples 14 | 15 | ### Shoes7k 16 | 17 | To run example on shoes7k dataset, firstly, you need to convert shoes7k dataset to LMDB dataset. 18 | 19 | ``` 20 | cd fast-image-retrieval/ 21 | python ./examples/shoes7k/convert_shoes7k_data.py 22 | ``` 23 | 24 | Then, you need to train the CNN model. 25 | 26 | ``` 27 | ./examples/shoes7k/train.sh 28 | ``` 29 | 30 | Next, you can retrieve similar image `target.jpg` using 31 | 32 | ``` 33 | ./examples/shoes7k/retrieve.sh target.jpg 34 | ``` 35 | 36 | Note that the first retrival procedure might be very slow because the program reads all shoes7k images and generate feature matrix. The later retrivals can be very fast. 37 | 38 | Evaluate over the whole dataset by using 39 | 40 | ``` 41 | python ./examples/shoes7k/eval.py 42 | ``` 43 | 44 | Some retrieval results: 45 | 46 | ![shoes7k retrieval](img/shoes7k_retrieval.png) 47 | -------------------------------------------------------------------------------- /caffenet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Caffenet 3 | 4 | edit from $CAFFEROOT/examples/pycaffe/caffenet.py 5 | ''' 6 | from __future__ import print_function 7 | from caffe import layers as L, params as P, to_proto 8 | 9 | 10 | # helper function for common structures 11 | def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1): 12 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 13 | num_output=nout, pad=pad, group=group) 14 | return conv, L.ReLU(conv, in_place=True) 15 | 16 | 17 | def fc_relu(bottom, nout): 18 | fc = L.InnerProduct(bottom, num_output=nout) 19 | return fc, L.ReLU(fc, in_place=True) 20 | 21 | 22 | def max_pool(bottom, ks, stride=1): 23 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 24 | 25 | 26 | def caffenet(lmdb, batch_size=256, include_acc=False): 27 | data, label = L.Data(source=lmdb, backend=P.Data.LMDB, 28 | batch_size=batch_size, ntop=2) 29 | 30 | # the net itself 31 | conv1, relu1 = conv_relu(data, 11, 96, stride=4) 32 | pool1 = max_pool(relu1, 3, stride=2) 33 | norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75) 34 | conv2, relu2 = conv_relu(norm1, 5, 256, pad=2, group=2) 35 | pool2 = max_pool(relu2, 3, stride=2) 36 | norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75) 37 | conv3, relu3 = conv_relu(norm2, 3, 384, pad=1) 38 | conv4, relu4 = conv_relu(relu3, 3, 384, pad=1, group=2) 39 | conv5, relu5 = conv_relu(relu4, 3, 256, pad=1, group=2) 40 | pool5 = max_pool(relu5, 3, stride=2) 41 | fc6, relu6 = fc_relu(pool5, 4096) 42 | drop6 = L.Dropout(relu6, in_place=True) 43 | fc7, relu7 = fc_relu(drop6, 4096) 44 | drop7 = L.Dropout(relu7, in_place=True) 45 | fc8 = L.InnerProduct(drop7, num_output=1000) 46 | loss = L.SoftmaxWithLoss(fc8, label) 47 | 48 | if include_acc: 49 | acc = L.Accuracy(fc8, label) 50 | return to_proto(loss, acc) 51 | else: 52 | return to_proto(loss) 53 | 54 | 55 | def make_net(train_lmdb, test_lmdb): 56 | with open('train.prototxt', 'w') as f: 57 | print(caffenet(train_lmdb), file=f) 58 | 59 | with open('test.prototxt', 'w') as f: 60 | print(caffenet(test_lmdb, batch_size=50, 61 | include_acc=True), file=f) 62 | 63 | 64 | if __name__ == '__main__': 65 | import sys 66 | 67 | if len(sys.argv) == 3: 68 | train_lmdb = sys.argv[1] 69 | test_lmdb = sys.argv[2] 70 | make_net(train_lmdb, test_lmdb) 71 | else: 72 | print('Usage: python caffenet.py train_lmdb_path test_lmdb_path') 73 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | config 3 | ''' 4 | import ConfigParser as cp 5 | 6 | 7 | config = cp.RawConfigParser() 8 | config.read('./config.cfg') 9 | 10 | 11 | # config for example shoes7k 12 | eg_shoes7k_pos_path = config.get('shoes7k', 'pos_path') 13 | eg_shoes7k_neg_path = config.get('shoes7k', 'neg_path') 14 | eg_shoes7k_latent_num = config.getint('shoes7k', 'latent_num') 15 | eg_shoes7k_class_num = config.getint('shoes7k', 'class_num') 16 | 17 | # config for example facescrub 18 | eg_facescrub_folder = config.get('facescrub', 'root') 19 | -------------------------------------------------------------------------------- /examples/facescrub/convert_facescrub_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Convert facescrub dataset to train/test lmdb dataset 3 | 4 | ------------------------------------------------------ 5 | Two classes: actor, actress 6 | 7 | The dataset is downloaded using https://github.com/faceteam/facescrub 8 | Please keep the folder structure after downloaded, 9 | and configure the config.eg_facescrub_folder as path to `facescrub` 10 | where `download.py` exists. 11 | ''' 12 | import os 13 | import cv2 14 | import lmdb 15 | import numpy as np 16 | from caffe.proto import caffe_pb2 17 | 18 | import config 19 | 20 | 21 | IM_HEIGHT = 227 22 | IM_WIDTH = 227 23 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) 24 | TRAIN_LMDB = os.path.join(SCRIPT_PATH, 'facescrub_train_lmdb') 25 | TEST_LMDB = os.path.join(SCRIPT_PATH, 'facescrub_test_lmdb') 26 | ACTORS = os.path.join(config.eg_facescrub_folder, 'facescrub_actors.txt') 27 | ACTRESS = os.path.join(config.eg_facescrub_folder, 'facescrub_actresses.txt') 28 | DOWNLOAD = os.path.join(config.eg_facescrub_folder, 'download') 29 | 30 | 31 | def get_names(path): 32 | """get actor or actress names""" 33 | data = np.loadtxt(path, delimiter='\t', skiprows=1, dtype=str) 34 | return np.unique(data[:, 0]) 35 | 36 | 37 | def get_images(names, ratio, label, train_images, train_labels, 38 | test_images, test_labels): 39 | for name in names: 40 | folder = '_'.join(name.split()) 41 | folder = os.path.join(DOWNLOAD, folder, 'face') 42 | 43 | faces = os.listdir(folder) 44 | split = int(len(faces) * ratio) 45 | for idx, face in enumerate(faces): 46 | face = os.path.join(folder, face) 47 | if idx < split: 48 | train_images.append(face) 49 | train_labels.append(label) 50 | else: 51 | test_images.append(face) 52 | test_labels.append(label) 53 | 54 | 55 | def get_all_images(): 56 | actors = get_names(ACTORS) 57 | actresses = get_names(ACTRESS) 58 | names = np.concatenate((actors, actresses)) 59 | all_images = [] 60 | for name in names: 61 | folder = '_'.join(name.split()) 62 | folder = os.path.join(DOWNLOAD, folder, 'face') 63 | 64 | faces = os.listdir(folder) 65 | for face in faces: 66 | face = os.path.join(folder, face) 67 | all_images.append(face) 68 | 69 | return np.array(all_images) 70 | 71 | 72 | def get_tr_te_images(ratio): 73 | """get training and test images for two classes""" 74 | train_images, train_labels = [], [] 75 | test_images, test_labels = [], [] 76 | 77 | actors = get_names(ACTORS) 78 | actresses = get_names(ACTRESS) 79 | 80 | get_images(actors, ratio, 1, train_images, train_labels, test_images, 81 | test_labels) 82 | get_images(actresses, ratio, 0, train_images, train_labels, test_images, 83 | test_labels) 84 | 85 | train_images = np.array(train_images) 86 | train_labels = np.array(train_labels) 87 | test_images = np.array(test_images) 88 | test_labels = np.array(test_labels) 89 | 90 | # shuffle 91 | train_idxs = np.arange(train_images.shape[0]) 92 | test_idxs = np.arange(test_images.shape[0]) 93 | np.random.shuffle(train_idxs) 94 | np.random.shuffle(test_idxs) 95 | 96 | return (train_images[train_idxs], train_labels[train_idxs], 97 | test_images[test_idxs], test_labels[test_idxs]) 98 | 99 | 100 | def save_to_lmdb(images, labels, lmdb_file): 101 | if not os.path.exists(lmdb_file): 102 | batch_size = 256 103 | lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12)) 104 | lmdb_txn = lmdb_env.begin(write=True) 105 | item_id = 0 106 | datum = caffe_pb2.Datum() 107 | 108 | for i in range(images.shape[0]): 109 | im = cv2.imread(images[i]) 110 | if im is None: 111 | continue 112 | im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH)) 113 | datum.channels = im.shape[2] 114 | datum.height = im.shape[0] 115 | datum.width = im.shape[1] 116 | datum.data = im.tobytes() 117 | datum.label = labels[i] 118 | keystr = '{:0>8d}'.format(item_id) 119 | lmdb_txn.put(keystr, datum.SerializeToString()) 120 | 121 | # write batch 122 | if (item_id + 1) % batch_size == 0: 123 | lmdb_txn.commit() 124 | lmdb_txn = lmdb_env.begin(write=True) 125 | print('converted {} images'.format(item_id + 1)) 126 | 127 | item_id += 1 128 | 129 | # write last batch 130 | if (item_id + 1) % batch_size != 0: 131 | lmdb_txn.commit() 132 | print('converted {} images'.format(item_id + 1)) 133 | print('Generated ' + lmdb_file) 134 | else: 135 | print(lmdb_file + ' already exists') 136 | 137 | 138 | def convert_facecrub_data(ratio=.8): 139 | tr_images, tr_labels, te_images, te_labels = get_tr_te_images(ratio) 140 | save_to_lmdb(tr_images, tr_labels, TRAIN_LMDB) 141 | save_to_lmdb(te_images, te_labels, TEST_LMDB) 142 | 143 | 144 | if __name__ == '__main__': 145 | convert_facecrub_data() 146 | -------------------------------------------------------------------------------- /examples/facescrub/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | convolution_param { 14 | num_output: 96 15 | kernel_size: 11 16 | stride: 4 17 | } 18 | } 19 | layer { 20 | name: "relu1" 21 | type: "ReLU" 22 | bottom: "conv1" 23 | top: "conv1" 24 | } 25 | layer { 26 | name: "pool1" 27 | type: "Pooling" 28 | bottom: "conv1" 29 | top: "pool1" 30 | pooling_param { 31 | pool: MAX 32 | kernel_size: 3 33 | stride: 2 34 | } 35 | } 36 | layer { 37 | name: "norm1" 38 | type: "LRN" 39 | bottom: "pool1" 40 | top: "norm1" 41 | lrn_param { 42 | local_size: 5 43 | alpha: 0.0001 44 | beta: 0.75 45 | } 46 | } 47 | layer { 48 | name: "conv2" 49 | type: "Convolution" 50 | bottom: "norm1" 51 | top: "conv2" 52 | convolution_param { 53 | num_output: 256 54 | pad: 2 55 | kernel_size: 5 56 | group: 2 57 | } 58 | } 59 | layer { 60 | name: "relu2" 61 | type: "ReLU" 62 | bottom: "conv2" 63 | top: "conv2" 64 | } 65 | layer { 66 | name: "pool2" 67 | type: "Pooling" 68 | bottom: "conv2" 69 | top: "pool2" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 3 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "norm2" 78 | type: "LRN" 79 | bottom: "pool2" 80 | top: "norm2" 81 | lrn_param { 82 | local_size: 5 83 | alpha: 0.0001 84 | beta: 0.75 85 | } 86 | } 87 | layer { 88 | name: "conv3" 89 | type: "Convolution" 90 | bottom: "norm2" 91 | top: "conv3" 92 | convolution_param { 93 | num_output: 384 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | name: "relu3" 100 | type: "ReLU" 101 | bottom: "conv3" 102 | top: "conv3" 103 | } 104 | layer { 105 | name: "conv4" 106 | type: "Convolution" 107 | bottom: "conv3" 108 | top: "conv4" 109 | convolution_param { 110 | num_output: 384 111 | pad: 1 112 | kernel_size: 3 113 | group: 2 114 | } 115 | } 116 | layer { 117 | name: "relu4" 118 | type: "ReLU" 119 | bottom: "conv4" 120 | top: "conv4" 121 | } 122 | layer { 123 | name: "conv5" 124 | type: "Convolution" 125 | bottom: "conv4" 126 | top: "conv5" 127 | convolution_param { 128 | num_output: 256 129 | pad: 1 130 | kernel_size: 3 131 | group: 2 132 | } 133 | } 134 | layer { 135 | name: "relu5" 136 | type: "ReLU" 137 | bottom: "conv5" 138 | top: "conv5" 139 | } 140 | layer { 141 | name: "pool5" 142 | type: "Pooling" 143 | bottom: "conv5" 144 | top: "pool5" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "fc6" 153 | type: "InnerProduct" 154 | bottom: "pool5" 155 | top: "fc6" 156 | inner_product_param { 157 | num_output: 4096 158 | } 159 | } 160 | layer { 161 | name: "relu6" 162 | type: "ReLU" 163 | bottom: "fc6" 164 | top: "fc6" 165 | } 166 | layer { 167 | name: "drop6" 168 | type: "Dropout" 169 | bottom: "fc6" 170 | top: "fc6" 171 | dropout_param { 172 | dropout_ratio: 0.5 173 | } 174 | } 175 | layer { 176 | name: "fc7" 177 | type: "InnerProduct" 178 | bottom: "fc6" 179 | top: "fc7" 180 | inner_product_param { 181 | num_output: 4096 182 | } 183 | } 184 | layer { 185 | name: "relu7" 186 | type: "ReLU" 187 | bottom: "fc7" 188 | top: "fc7" 189 | } 190 | layer { 191 | name: "drop7" 192 | type: "Dropout" 193 | bottom: "fc7" 194 | top: "fc7" 195 | dropout_param { 196 | dropout_ratio: 0.5 197 | } 198 | } 199 | layer { 200 | name: "fc8" 201 | type: "InnerProduct" 202 | bottom: "fc7" 203 | top: "fc8" 204 | inner_product_param { 205 | num_output: 2 206 | } 207 | } 208 | layer { 209 | name: "prob" 210 | type: "Softmax" 211 | bottom: "fc8" 212 | top: "prob" 213 | } 214 | -------------------------------------------------------------------------------- /examples/facescrub/deploy_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | convolution_param { 14 | num_output: 96 15 | kernel_size: 11 16 | stride: 4 17 | } 18 | } 19 | layer { 20 | name: "relu1" 21 | type: "ReLU" 22 | bottom: "conv1" 23 | top: "conv1" 24 | } 25 | layer { 26 | name: "pool1" 27 | type: "Pooling" 28 | bottom: "conv1" 29 | top: "pool1" 30 | pooling_param { 31 | pool: MAX 32 | kernel_size: 3 33 | stride: 2 34 | } 35 | } 36 | layer { 37 | name: "norm1" 38 | type: "LRN" 39 | bottom: "pool1" 40 | top: "norm1" 41 | lrn_param { 42 | local_size: 5 43 | alpha: 0.0001 44 | beta: 0.75 45 | } 46 | } 47 | layer { 48 | name: "conv2" 49 | type: "Convolution" 50 | bottom: "norm1" 51 | top: "conv2" 52 | convolution_param { 53 | num_output: 256 54 | pad: 2 55 | kernel_size: 5 56 | group: 2 57 | } 58 | } 59 | layer { 60 | name: "relu2" 61 | type: "ReLU" 62 | bottom: "conv2" 63 | top: "conv2" 64 | } 65 | layer { 66 | name: "pool2" 67 | type: "Pooling" 68 | bottom: "conv2" 69 | top: "pool2" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 3 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "norm2" 78 | type: "LRN" 79 | bottom: "pool2" 80 | top: "norm2" 81 | lrn_param { 82 | local_size: 5 83 | alpha: 0.0001 84 | beta: 0.75 85 | } 86 | } 87 | layer { 88 | name: "conv3" 89 | type: "Convolution" 90 | bottom: "norm2" 91 | top: "conv3" 92 | convolution_param { 93 | num_output: 384 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | name: "relu3" 100 | type: "ReLU" 101 | bottom: "conv3" 102 | top: "conv3" 103 | } 104 | layer { 105 | name: "conv4" 106 | type: "Convolution" 107 | bottom: "conv3" 108 | top: "conv4" 109 | convolution_param { 110 | num_output: 384 111 | pad: 1 112 | kernel_size: 3 113 | group: 2 114 | } 115 | } 116 | layer { 117 | name: "relu4" 118 | type: "ReLU" 119 | bottom: "conv4" 120 | top: "conv4" 121 | } 122 | layer { 123 | name: "conv5" 124 | type: "Convolution" 125 | bottom: "conv4" 126 | top: "conv5" 127 | convolution_param { 128 | num_output: 256 129 | pad: 1 130 | kernel_size: 3 131 | group: 2 132 | } 133 | } 134 | layer { 135 | name: "relu5" 136 | type: "ReLU" 137 | bottom: "conv5" 138 | top: "conv5" 139 | } 140 | layer { 141 | name: "pool5" 142 | type: "Pooling" 143 | bottom: "conv5" 144 | top: "pool5" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "fc6" 153 | type: "InnerProduct" 154 | bottom: "pool5" 155 | top: "fc6" 156 | inner_product_param { 157 | num_output: 4096 158 | } 159 | } 160 | layer { 161 | name: "relu6" 162 | type: "ReLU" 163 | bottom: "fc6" 164 | top: "fc6" 165 | } 166 | layer { 167 | name: "drop6" 168 | type: "Dropout" 169 | bottom: "fc6" 170 | top: "fc6" 171 | dropout_param { 172 | dropout_ratio: 0.5 173 | } 174 | } 175 | layer { 176 | name: "fc7" 177 | type: "InnerProduct" 178 | bottom: "fc6" 179 | top: "fc7" 180 | inner_product_param { 181 | num_output: 4096 182 | } 183 | } 184 | layer { 185 | name: "relu7" 186 | type: "ReLU" 187 | bottom: "fc7" 188 | top: "fc7" 189 | } 190 | layer { 191 | name: "drop7" 192 | type: "Dropout" 193 | bottom: "fc7" 194 | top: "fc7" 195 | dropout_param { 196 | dropout_ratio: 0.5 197 | } 198 | } 199 | layer { 200 | name: "latent" 201 | type: "InnerProduct" 202 | bottom: "fc7" 203 | top: "latent" 204 | inner_product_param { 205 | num_output: 48 206 | } 207 | } 208 | layer { 209 | name: "fc8_new" 210 | type: "InnerProduct" 211 | bottom: "latent" 212 | top: "fc8_new" 213 | inner_product_param { 214 | num_output: 2 215 | } 216 | } 217 | layer { 218 | name: "prob" 219 | type: "Softmax" 220 | bottom: "fc8_new" 221 | top: "prob" 222 | } 223 | -------------------------------------------------------------------------------- /examples/facescrub/generate_feature_mat.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Generate feature matrix 3 | 4 | Prepare files for image retrieval: 5 | image_files.npy 6 | fc7_features.npy 7 | latent_features.npy 8 | ''' 9 | import os 10 | import sys 11 | import numpy as np 12 | 13 | from convert_facescrub_data import get_all_images 14 | 15 | sys.path.append('../..') 16 | import config 17 | from layer_features import layer_features 18 | from retrieve import binary_hash_codes 19 | 20 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) 21 | 22 | 23 | def generate_feature_matrix(model_file, deploy_file, imagemean_file): 24 | """generate feature matrix of image dataset 25 | save the matrix as npy file""" 26 | image_files = get_all_images() 27 | np.random.shuffle(image_files) 28 | 29 | # feed the network and get feature vectors 30 | feature_mat = {'fc7': [], 'latent': []} 31 | 32 | batch = [] 33 | batch_size = 0 34 | for image in image_files: 35 | batch.append(image) 36 | batch_size += 1 37 | 38 | if batch_size == 1000: 39 | for layer, mat in layer_features(feature_mat.keys(), model_file, 40 | deploy_file, imagemean_file, 41 | batch): 42 | if layer == 'latent': 43 | mat = binary_hash_codes(mat) 44 | 45 | feature_mat[layer].extend(mat) 46 | 47 | batch = [] 48 | batch_size = 0 49 | 50 | if batch_size > 0: 51 | for layer, mat in layer_features(feature_mat.keys(), model_file, 52 | deploy_file, imagemean_file, batch): 53 | if layer == 'latent': 54 | mat = binary_hash_codes(mat) 55 | 56 | feature_mat[layer].extend(mat) 57 | 58 | # save to npy files 59 | np.save(os.path.join(SCRIPT_PATH, 'image_files.npy'), image_files) 60 | for layer in feature_mat.keys(): 61 | npy_file = os.path.join(SCRIPT_PATH, layer + '_features.npy') 62 | np.save(npy_file, np.array(feature_mat[layer])) 63 | 64 | 65 | if __name__ == '__main__': 66 | if len(sys.argv) != 4: 67 | usage = 'Usage: python generate_feature_mat.py' + \ 68 | ' model_file deploy_file imagemean_file' 69 | print(usage) 70 | else: 71 | model_file = sys.argv[1] 72 | deploy_file = sys.argv[2] 73 | imagemean_file = sys.argv[3] 74 | 75 | is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\ 76 | and os.path.exists(imagemean_file) 77 | 78 | if is_exists: 79 | generate_feature_matrix(model_file, deploy_file, imagemean_file) 80 | -------------------------------------------------------------------------------- /examples/facescrub/retrieve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # init env 4 | BASEDIR=$(dirname "$0") 5 | PROJROOT=$BASEDIR/../.. 6 | MODEL_FILE="$BASEDIR/facescrub_model_with_latent_layer_iter_10000.caffemodel" 7 | DEPLOY_FILE="$BASEDIR/deploy_with_latent_layer.prototxt" 8 | MEAN_FILE="$BASEDIR/facescrub_mean.npy" 9 | IMAGE_NPY="$BASEDIR/image_files.npy" 10 | FC7_NPY="$BASEDIR/fc7_features.npy" 11 | LATENT_NPY="$BASEDIR/latent_features.npy" 12 | TARGET="$1" 13 | 14 | # check model 15 | if [ ! -e $MODEL_FILE ] || [ ! -e $DEPLOY_FILE ] || [ ! -e $MEAN_FILE ]; then 16 | echo "Please train the model at first" 17 | echo "./train.sh" 18 | exit 19 | fi 20 | 21 | # parse parsemeters 22 | if [[ $TARGET == "-h" ]] || [[ $TARGET == "--help" ]]; then 23 | echo "Usage: ./retrieve.sh image_to_retrieve.jpg" 24 | exit 25 | fi 26 | 27 | if [ -e $IMAGE_NPY ] && [ -e $FC7_NPY ] && [ -e $LATENT_NPY ]; then 28 | python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET 29 | else 30 | echo "generate feature matrix..." 31 | python $BASEDIR/generate_feature_mat.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE 32 | python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET 33 | fi 34 | 35 | -------------------------------------------------------------------------------- /examples/facescrub/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 1000 3 | test_interval: 1000 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 5000 8 | display: 20 9 | max_iter: 10000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 2000 13 | snapshot_prefix: "facescrub_model" 14 | solver_mode: GPU 15 | -------------------------------------------------------------------------------- /examples/facescrub/solver_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val_with_latent_layer.prototxt" 2 | test_iter: 1000 3 | test_interval: 1000 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 5000 8 | display: 20 9 | max_iter: 10000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 2000 13 | snapshot_prefix: "facescrub_model_with_latent_layer" 14 | solver_mode: GPU 15 | -------------------------------------------------------------------------------- /examples/facescrub/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check caffe 4 | if [[ $CAFFEROOT == '' ]]; then 5 | echo "\$CAFFEROOT not found, please define it at first." 6 | echo "export CAFFEROOT=/path/to/your/caffe" 7 | exit 8 | fi 9 | 10 | # init env 11 | TOOLS=$CAFFEROOT/build/tools 12 | BASEDIR=$(dirname "$0") 13 | PROJROOT=$BASEDIR/../../ 14 | CURRDIR=$(pwd) 15 | 16 | # check dataset 17 | if [ ! -d $BASEDIR/facescrub_train_lmdb ] || [ ! -d $BASEDIR/facescrub_test_lmdb ] ; then 18 | echo "The facescrub dataset folder cannot found" 19 | echo "Please execute convert_facescrub_data.py at first" 20 | exit 21 | fi 22 | 23 | # compute mean 24 | $TOOLS/compute_image_mean $BASEDIR/facescrub_train_lmdb $BASEDIR/facescrub_mean.binaryproto 25 | python $PROJROOT/tools/convert_protomean.py $BASEDIR/facescrub_mean.binaryproto $BASEDIR/facescrub_mean.npy 26 | 27 | cd $BASEDIR 28 | 29 | # pretrain 30 | $TOOLS/caffe train --solver=solver.prototxt 2>&1 | tee pretrain.log 31 | 32 | # train net with latent layer 33 | $TOOLS/caffe train \ 34 | --solver=solver_with_latent_layer.prototxt \ 35 | --weights=facescrub_model_iter_10000.caffemodel 2>&1 | tee train.log 36 | 37 | cd $CURRDIR 38 | -------------------------------------------------------------------------------- /examples/facescrub/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: true 12 | crop_size: 227 13 | mean_file: "facescrub_mean.binaryproto" 14 | } 15 | # mean pixel / channel-wise mean instead of mean image 16 | # transform_param { 17 | # crop_size: 227 18 | # mean_value: 104 19 | # mean_value: 117 20 | # mean_value: 123 21 | # mirror: true 22 | # } 23 | data_param { 24 | source: "facescrub_train_lmdb" 25 | batch_size: 256 26 | backend: LMDB 27 | } 28 | } 29 | layer { 30 | name: "data" 31 | type: "Data" 32 | top: "data" 33 | top: "label" 34 | include { 35 | phase: TEST 36 | } 37 | transform_param { 38 | mirror: false 39 | crop_size: 227 40 | mean_file: "facescrub_mean.binaryproto" 41 | } 42 | # mean pixel / channel-wise mean instead of mean image 43 | # transform_param { 44 | # crop_size: 227 45 | # mean_value: 104 46 | # mean_value: 117 47 | # mean_value: 123 48 | # mirror: false 49 | # } 50 | data_param { 51 | source: "facescrub_test_lmdb" 52 | batch_size: 50 53 | backend: LMDB 54 | } 55 | } 56 | layer { 57 | name: "conv1" 58 | type: "Convolution" 59 | bottom: "data" 60 | top: "conv1" 61 | param { 62 | lr_mult: 1 63 | decay_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 96 71 | kernel_size: 11 72 | stride: 4 73 | weight_filler { 74 | type: "gaussian" 75 | std: 0.01 76 | } 77 | bias_filler { 78 | type: "constant" 79 | value: 0 80 | } 81 | } 82 | } 83 | layer { 84 | name: "relu1" 85 | type: "ReLU" 86 | bottom: "conv1" 87 | top: "conv1" 88 | } 89 | layer { 90 | name: "pool1" 91 | type: "Pooling" 92 | bottom: "conv1" 93 | top: "pool1" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | layer { 101 | name: "norm1" 102 | type: "LRN" 103 | bottom: "pool1" 104 | top: "norm1" 105 | lrn_param { 106 | local_size: 5 107 | alpha: 0.0001 108 | beta: 0.75 109 | } 110 | } 111 | layer { 112 | name: "conv2" 113 | type: "Convolution" 114 | bottom: "norm1" 115 | top: "conv2" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 256 126 | pad: 2 127 | kernel_size: 5 128 | group: 2 129 | weight_filler { 130 | type: "gaussian" 131 | std: 0.01 132 | } 133 | bias_filler { 134 | type: "constant" 135 | value: 1 136 | } 137 | } 138 | } 139 | layer { 140 | name: "relu2" 141 | type: "ReLU" 142 | bottom: "conv2" 143 | top: "conv2" 144 | } 145 | layer { 146 | name: "pool2" 147 | type: "Pooling" 148 | bottom: "conv2" 149 | top: "pool2" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 3 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "norm2" 158 | type: "LRN" 159 | bottom: "pool2" 160 | top: "norm2" 161 | lrn_param { 162 | local_size: 5 163 | alpha: 0.0001 164 | beta: 0.75 165 | } 166 | } 167 | layer { 168 | name: "conv3" 169 | type: "Convolution" 170 | bottom: "norm2" 171 | top: "conv3" 172 | param { 173 | lr_mult: 1 174 | decay_mult: 1 175 | } 176 | param { 177 | lr_mult: 2 178 | decay_mult: 0 179 | } 180 | convolution_param { 181 | num_output: 384 182 | pad: 1 183 | kernel_size: 3 184 | weight_filler { 185 | type: "gaussian" 186 | std: 0.01 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0 191 | } 192 | } 193 | } 194 | layer { 195 | name: "relu3" 196 | type: "ReLU" 197 | bottom: "conv3" 198 | top: "conv3" 199 | } 200 | layer { 201 | name: "conv4" 202 | type: "Convolution" 203 | bottom: "conv3" 204 | top: "conv4" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 2 211 | decay_mult: 0 212 | } 213 | convolution_param { 214 | num_output: 384 215 | pad: 1 216 | kernel_size: 3 217 | group: 2 218 | weight_filler { 219 | type: "gaussian" 220 | std: 0.01 221 | } 222 | bias_filler { 223 | type: "constant" 224 | value: 1 225 | } 226 | } 227 | } 228 | layer { 229 | name: "relu4" 230 | type: "ReLU" 231 | bottom: "conv4" 232 | top: "conv4" 233 | } 234 | layer { 235 | name: "conv5" 236 | type: "Convolution" 237 | bottom: "conv4" 238 | top: "conv5" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | param { 244 | lr_mult: 2 245 | decay_mult: 0 246 | } 247 | convolution_param { 248 | num_output: 256 249 | pad: 1 250 | kernel_size: 3 251 | group: 2 252 | weight_filler { 253 | type: "gaussian" 254 | std: 0.01 255 | } 256 | bias_filler { 257 | type: "constant" 258 | value: 1 259 | } 260 | } 261 | } 262 | layer { 263 | name: "relu5" 264 | type: "ReLU" 265 | bottom: "conv5" 266 | top: "conv5" 267 | } 268 | layer { 269 | name: "pool5" 270 | type: "Pooling" 271 | bottom: "conv5" 272 | top: "pool5" 273 | pooling_param { 274 | pool: MAX 275 | kernel_size: 3 276 | stride: 2 277 | } 278 | } 279 | layer { 280 | name: "fc6" 281 | type: "InnerProduct" 282 | bottom: "pool5" 283 | top: "fc6" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | inner_product_param { 293 | num_output: 4096 294 | weight_filler { 295 | type: "gaussian" 296 | std: 0.005 297 | } 298 | bias_filler { 299 | type: "constant" 300 | value: 1 301 | } 302 | } 303 | } 304 | layer { 305 | name: "relu6" 306 | type: "ReLU" 307 | bottom: "fc6" 308 | top: "fc6" 309 | } 310 | layer { 311 | name: "drop6" 312 | type: "Dropout" 313 | bottom: "fc6" 314 | top: "fc6" 315 | dropout_param { 316 | dropout_ratio: 0.5 317 | } 318 | } 319 | layer { 320 | name: "fc7" 321 | type: "InnerProduct" 322 | bottom: "fc6" 323 | top: "fc7" 324 | param { 325 | lr_mult: 1 326 | decay_mult: 1 327 | } 328 | param { 329 | lr_mult: 2 330 | decay_mult: 0 331 | } 332 | inner_product_param { 333 | num_output: 4096 334 | weight_filler { 335 | type: "gaussian" 336 | std: 0.005 337 | } 338 | bias_filler { 339 | type: "constant" 340 | value: 1 341 | } 342 | } 343 | } 344 | layer { 345 | name: "relu7" 346 | type: "ReLU" 347 | bottom: "fc7" 348 | top: "fc7" 349 | } 350 | layer { 351 | name: "drop7" 352 | type: "Dropout" 353 | bottom: "fc7" 354 | top: "fc7" 355 | dropout_param { 356 | dropout_ratio: 0.5 357 | } 358 | } 359 | layer { 360 | name: "fc8" 361 | type: "InnerProduct" 362 | bottom: "fc7" 363 | top: "fc8" 364 | param { 365 | lr_mult: 1 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2 370 | decay_mult: 0 371 | } 372 | inner_product_param { 373 | num_output: 2 374 | weight_filler { 375 | type: "gaussian" 376 | std: 0.01 377 | } 378 | bias_filler { 379 | type: "constant" 380 | value: 0 381 | } 382 | } 383 | } 384 | layer { 385 | name: "accuracy" 386 | type: "Accuracy" 387 | bottom: "fc8" 388 | bottom: "label" 389 | top: "accuracy" 390 | include { 391 | phase: TEST 392 | } 393 | } 394 | layer { 395 | name: "loss" 396 | type: "SoftmaxWithLoss" 397 | bottom: "fc8" 398 | bottom: "label" 399 | top: "loss" 400 | } 401 | -------------------------------------------------------------------------------- /examples/facescrub/train_val_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: true 12 | crop_size: 227 13 | mean_file: "facescrub_mean.binaryproto" 14 | } 15 | # mean pixel / channel-wise mean instead of mean image 16 | # transform_param { 17 | # crop_size: 227 18 | # mean_value: 104 19 | # mean_value: 117 20 | # mean_value: 123 21 | # mirror: true 22 | # } 23 | data_param { 24 | source: "facescrub_train_lmdb" 25 | batch_size: 256 26 | backend: LMDB 27 | } 28 | } 29 | layer { 30 | name: "data" 31 | type: "Data" 32 | top: "data" 33 | top: "label" 34 | include { 35 | phase: TEST 36 | } 37 | transform_param { 38 | mirror: false 39 | crop_size: 227 40 | mean_file: "facescrub_mean.binaryproto" 41 | } 42 | # mean pixel / channel-wise mean instead of mean image 43 | # transform_param { 44 | # crop_size: 227 45 | # mean_value: 104 46 | # mean_value: 117 47 | # mean_value: 123 48 | # mirror: false 49 | # } 50 | data_param { 51 | source: "facescrub_test_lmdb" 52 | batch_size: 50 53 | backend: LMDB 54 | } 55 | } 56 | layer { 57 | name: "conv1" 58 | type: "Convolution" 59 | bottom: "data" 60 | top: "conv1" 61 | param { 62 | lr_mult: 1 63 | decay_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 96 71 | kernel_size: 11 72 | stride: 4 73 | weight_filler { 74 | type: "gaussian" 75 | std: 0.01 76 | } 77 | bias_filler { 78 | type: "constant" 79 | value: 0 80 | } 81 | } 82 | } 83 | layer { 84 | name: "relu1" 85 | type: "ReLU" 86 | bottom: "conv1" 87 | top: "conv1" 88 | } 89 | layer { 90 | name: "pool1" 91 | type: "Pooling" 92 | bottom: "conv1" 93 | top: "pool1" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | layer { 101 | name: "norm1" 102 | type: "LRN" 103 | bottom: "pool1" 104 | top: "norm1" 105 | lrn_param { 106 | local_size: 5 107 | alpha: 0.0001 108 | beta: 0.75 109 | } 110 | } 111 | layer { 112 | name: "conv2" 113 | type: "Convolution" 114 | bottom: "norm1" 115 | top: "conv2" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 256 126 | pad: 2 127 | kernel_size: 5 128 | group: 2 129 | weight_filler { 130 | type: "gaussian" 131 | std: 0.01 132 | } 133 | bias_filler { 134 | type: "constant" 135 | value: 1 136 | } 137 | } 138 | } 139 | layer { 140 | name: "relu2" 141 | type: "ReLU" 142 | bottom: "conv2" 143 | top: "conv2" 144 | } 145 | layer { 146 | name: "pool2" 147 | type: "Pooling" 148 | bottom: "conv2" 149 | top: "pool2" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 3 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "norm2" 158 | type: "LRN" 159 | bottom: "pool2" 160 | top: "norm2" 161 | lrn_param { 162 | local_size: 5 163 | alpha: 0.0001 164 | beta: 0.75 165 | } 166 | } 167 | layer { 168 | name: "conv3" 169 | type: "Convolution" 170 | bottom: "norm2" 171 | top: "conv3" 172 | param { 173 | lr_mult: 1 174 | decay_mult: 1 175 | } 176 | param { 177 | lr_mult: 2 178 | decay_mult: 0 179 | } 180 | convolution_param { 181 | num_output: 384 182 | pad: 1 183 | kernel_size: 3 184 | weight_filler { 185 | type: "gaussian" 186 | std: 0.01 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0 191 | } 192 | } 193 | } 194 | layer { 195 | name: "relu3" 196 | type: "ReLU" 197 | bottom: "conv3" 198 | top: "conv3" 199 | } 200 | layer { 201 | name: "conv4" 202 | type: "Convolution" 203 | bottom: "conv3" 204 | top: "conv4" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 2 211 | decay_mult: 0 212 | } 213 | convolution_param { 214 | num_output: 384 215 | pad: 1 216 | kernel_size: 3 217 | group: 2 218 | weight_filler { 219 | type: "gaussian" 220 | std: 0.01 221 | } 222 | bias_filler { 223 | type: "constant" 224 | value: 1 225 | } 226 | } 227 | } 228 | layer { 229 | name: "relu4" 230 | type: "ReLU" 231 | bottom: "conv4" 232 | top: "conv4" 233 | } 234 | layer { 235 | name: "conv5" 236 | type: "Convolution" 237 | bottom: "conv4" 238 | top: "conv5" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | param { 244 | lr_mult: 2 245 | decay_mult: 0 246 | } 247 | convolution_param { 248 | num_output: 256 249 | pad: 1 250 | kernel_size: 3 251 | group: 2 252 | weight_filler { 253 | type: "gaussian" 254 | std: 0.01 255 | } 256 | bias_filler { 257 | type: "constant" 258 | value: 1 259 | } 260 | } 261 | } 262 | layer { 263 | name: "relu5" 264 | type: "ReLU" 265 | bottom: "conv5" 266 | top: "conv5" 267 | } 268 | layer { 269 | name: "pool5" 270 | type: "Pooling" 271 | bottom: "conv5" 272 | top: "pool5" 273 | pooling_param { 274 | pool: MAX 275 | kernel_size: 3 276 | stride: 2 277 | } 278 | } 279 | layer { 280 | name: "fc6" 281 | type: "InnerProduct" 282 | bottom: "pool5" 283 | top: "fc6" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | inner_product_param { 293 | num_output: 4096 294 | weight_filler { 295 | type: "gaussian" 296 | std: 0.005 297 | } 298 | bias_filler { 299 | type: "constant" 300 | value: 1 301 | } 302 | } 303 | } 304 | layer { 305 | name: "relu6" 306 | type: "ReLU" 307 | bottom: "fc6" 308 | top: "fc6" 309 | } 310 | layer { 311 | name: "drop6" 312 | type: "Dropout" 313 | bottom: "fc6" 314 | top: "fc6" 315 | dropout_param { 316 | dropout_ratio: 0.5 317 | } 318 | } 319 | layer { 320 | name: "fc7" 321 | type: "InnerProduct" 322 | bottom: "fc6" 323 | top: "fc7" 324 | param { 325 | lr_mult: 1 326 | decay_mult: 1 327 | } 328 | param { 329 | lr_mult: 2 330 | decay_mult: 0 331 | } 332 | inner_product_param { 333 | num_output: 4096 334 | weight_filler { 335 | type: "gaussian" 336 | std: 0.005 337 | } 338 | bias_filler { 339 | type: "constant" 340 | value: 1 341 | } 342 | } 343 | } 344 | layer { 345 | name: "relu7" 346 | type: "ReLU" 347 | bottom: "fc7" 348 | top: "fc7" 349 | } 350 | layer { 351 | name: "drop7" 352 | type: "Dropout" 353 | bottom: "fc7" 354 | top: "fc7" 355 | dropout_param { 356 | dropout_ratio: 0.5 357 | } 358 | } 359 | layer { 360 | name: "latent" 361 | type: "InnerProduct" 362 | bottom: "fc7" 363 | top: "latent" 364 | param { 365 | lr_mult: 1 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2 370 | decay_mult: 0 371 | } 372 | inner_product_param { 373 | num_output: 48 374 | weight_filler { 375 | type: "gaussian" 376 | std: 0.01 377 | } 378 | bias_filler { 379 | type: "constant" 380 | value: 0 381 | } 382 | } 383 | } 384 | layer { 385 | name: "fc8_new" 386 | type: "InnerProduct" 387 | bottom: "latent" 388 | top: "fc8_new" 389 | param { 390 | lr_mult: 1 391 | decay_mult: 1 392 | } 393 | param { 394 | lr_mult: 2 395 | decay_mult: 0 396 | } 397 | inner_product_param { 398 | num_output: 2 399 | weight_filler { 400 | type: "gaussian" 401 | std: 0.01 402 | } 403 | bias_filler { 404 | type: "constant" 405 | value: 0 406 | } 407 | } 408 | } 409 | layer { 410 | name: "accuracy" 411 | type: "Accuracy" 412 | bottom: "fc8_new" 413 | bottom: "label" 414 | top: "accuracy" 415 | include { 416 | phase: TEST 417 | } 418 | } 419 | layer { 420 | name: "loss" 421 | type: "SoftmaxWithLoss" 422 | bottom: "fc8_new" 423 | bottom: "label" 424 | top: "loss" 425 | } 426 | -------------------------------------------------------------------------------- /examples/shoes7k/convert_shoes7k_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Convert shoes7k dataset to train/test lmdb dataset 3 | ''' 4 | import os 5 | import cv2 6 | import lmdb 7 | import numpy as np 8 | from caffe.proto import caffe_pb2 9 | 10 | import config 11 | 12 | 13 | IM_HEIGHT = 227 14 | IM_WIDTH = 227 15 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) 16 | TRAIN_LMDB = os.path.join(SCRIPT_PATH, 'shoes7k_train_lmdb') 17 | TEST_LMDB = os.path.join(SCRIPT_PATH, 'shoes7k_test_lmdb') 18 | 19 | 20 | def get_images(path): 21 | """get images under path into a numpy array""" 22 | image_types = ['.jpg', '.png'] 23 | images = [os.path.join(path, i) for i in os.listdir(path) if i[-4:] 24 | in image_types] 25 | return np.array(images) 26 | 27 | 28 | def get_tr_te_images(ratio): 29 | """get training and test images""" 30 | pos_images = get_images(config.eg_shoes7k_pos_path) 31 | neg_images = get_images(config.eg_shoes7k_neg_path) 32 | np.random.shuffle(pos_images) 33 | np.random.shuffle(neg_images) 34 | 35 | pos_split = int(pos_images.shape[0] * ratio) 36 | pos_train_images = pos_images[:pos_split] 37 | pos_test_images = pos_images[pos_split:] 38 | pos_train_labels = np.ones(pos_train_images.shape[0]).astype('int') 39 | pos_test_labels = np.ones(pos_test_images.shape[0]).astype('int') 40 | 41 | neg_split = int(neg_images.shape[0] * ratio) 42 | neg_train_images = neg_images[:neg_split] 43 | neg_test_images = neg_images[neg_split:] 44 | neg_train_labels = np.zeros(neg_train_images.shape[0]).astype('int') 45 | neg_test_labels = np.zeros(neg_test_images.shape[0]).astype('int') 46 | 47 | train_images = np.concatenate((pos_train_images, neg_train_images)) 48 | train_labels = np.concatenate((pos_train_labels, neg_train_labels)) 49 | test_images = np.concatenate((pos_test_images, neg_test_images)) 50 | test_labels = np.concatenate((pos_test_labels, neg_test_labels)) 51 | 52 | # shuffle 53 | train_idxs = np.arange(train_images.shape[0]) 54 | test_idxs = np.arange(test_images.shape[0]) 55 | np.random.shuffle(train_idxs) 56 | np.random.shuffle(test_idxs) 57 | return (train_images[train_idxs], train_labels[train_idxs], 58 | test_images[test_idxs], test_labels[test_idxs]) 59 | 60 | 61 | def save_to_lmdb(images, labels, lmdb_file): 62 | if not os.path.exists(lmdb_file): 63 | batch_size = 256 64 | lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12)) 65 | lmdb_txn = lmdb_env.begin(write=True) 66 | item_id = 0 67 | datum = caffe_pb2.Datum() 68 | 69 | for i in range(images.shape[0]): 70 | im = cv2.imread(images[i]) 71 | im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH)) 72 | datum.channels = im.shape[2] 73 | datum.height = im.shape[0] 74 | datum.width = im.shape[1] 75 | datum.data = im.tobytes() 76 | datum.label = labels[i] 77 | keystr = '{:0>8d}'.format(item_id) 78 | lmdb_txn.put(keystr, datum.SerializeToString()) 79 | 80 | # write batch 81 | if (item_id + 1) % batch_size == 0: 82 | lmdb_txn.commit() 83 | lmdb_txn = lmdb_env.begin(write=True) 84 | print('converted {} images'.format(item_id + 1)) 85 | 86 | item_id += 1 87 | 88 | # write last batch 89 | if (item_id + 1) % batch_size != 0: 90 | lmdb_txn.commit() 91 | print('converted {} images'.format(item_id + 1)) 92 | print('Generated ' + lmdb_file) 93 | else: 94 | print(lmdb_file + ' already exists') 95 | 96 | 97 | def convert_shoes7k_data(ratio=.8): 98 | """read shoes7k pos and neg images and convert to lmdb""" 99 | tr_images, tr_labels, te_images, te_labels = get_tr_te_images(ratio) 100 | save_to_lmdb(tr_images, tr_labels, TRAIN_LMDB) 101 | save_to_lmdb(te_images, te_labels, TEST_LMDB) 102 | 103 | 104 | if __name__ == '__main__': 105 | convert_shoes7k_data() 106 | -------------------------------------------------------------------------------- /examples/shoes7k/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | convolution_param { 14 | num_output: 96 15 | kernel_size: 11 16 | stride: 4 17 | } 18 | } 19 | layer { 20 | name: "relu1" 21 | type: "ReLU" 22 | bottom: "conv1" 23 | top: "conv1" 24 | } 25 | layer { 26 | name: "pool1" 27 | type: "Pooling" 28 | bottom: "conv1" 29 | top: "pool1" 30 | pooling_param { 31 | pool: MAX 32 | kernel_size: 3 33 | stride: 2 34 | } 35 | } 36 | layer { 37 | name: "norm1" 38 | type: "LRN" 39 | bottom: "pool1" 40 | top: "norm1" 41 | lrn_param { 42 | local_size: 5 43 | alpha: 0.0001 44 | beta: 0.75 45 | } 46 | } 47 | layer { 48 | name: "conv2" 49 | type: "Convolution" 50 | bottom: "norm1" 51 | top: "conv2" 52 | convolution_param { 53 | num_output: 256 54 | pad: 2 55 | kernel_size: 5 56 | group: 2 57 | } 58 | } 59 | layer { 60 | name: "relu2" 61 | type: "ReLU" 62 | bottom: "conv2" 63 | top: "conv2" 64 | } 65 | layer { 66 | name: "pool2" 67 | type: "Pooling" 68 | bottom: "conv2" 69 | top: "pool2" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 3 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "norm2" 78 | type: "LRN" 79 | bottom: "pool2" 80 | top: "norm2" 81 | lrn_param { 82 | local_size: 5 83 | alpha: 0.0001 84 | beta: 0.75 85 | } 86 | } 87 | layer { 88 | name: "conv3" 89 | type: "Convolution" 90 | bottom: "norm2" 91 | top: "conv3" 92 | convolution_param { 93 | num_output: 384 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | name: "relu3" 100 | type: "ReLU" 101 | bottom: "conv3" 102 | top: "conv3" 103 | } 104 | layer { 105 | name: "conv4" 106 | type: "Convolution" 107 | bottom: "conv3" 108 | top: "conv4" 109 | convolution_param { 110 | num_output: 384 111 | pad: 1 112 | kernel_size: 3 113 | group: 2 114 | } 115 | } 116 | layer { 117 | name: "relu4" 118 | type: "ReLU" 119 | bottom: "conv4" 120 | top: "conv4" 121 | } 122 | layer { 123 | name: "conv5" 124 | type: "Convolution" 125 | bottom: "conv4" 126 | top: "conv5" 127 | convolution_param { 128 | num_output: 256 129 | pad: 1 130 | kernel_size: 3 131 | group: 2 132 | } 133 | } 134 | layer { 135 | name: "relu5" 136 | type: "ReLU" 137 | bottom: "conv5" 138 | top: "conv5" 139 | } 140 | layer { 141 | name: "pool5" 142 | type: "Pooling" 143 | bottom: "conv5" 144 | top: "pool5" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "fc6" 153 | type: "InnerProduct" 154 | bottom: "pool5" 155 | top: "fc6" 156 | inner_product_param { 157 | num_output: 4096 158 | } 159 | } 160 | layer { 161 | name: "relu6" 162 | type: "ReLU" 163 | bottom: "fc6" 164 | top: "fc6" 165 | } 166 | layer { 167 | name: "drop6" 168 | type: "Dropout" 169 | bottom: "fc6" 170 | top: "fc6" 171 | dropout_param { 172 | dropout_ratio: 0.5 173 | } 174 | } 175 | layer { 176 | name: "fc7" 177 | type: "InnerProduct" 178 | bottom: "fc6" 179 | top: "fc7" 180 | inner_product_param { 181 | num_output: 4096 182 | } 183 | } 184 | layer { 185 | name: "relu7" 186 | type: "ReLU" 187 | bottom: "fc7" 188 | top: "fc7" 189 | } 190 | layer { 191 | name: "drop7" 192 | type: "Dropout" 193 | bottom: "fc7" 194 | top: "fc7" 195 | dropout_param { 196 | dropout_ratio: 0.5 197 | } 198 | } 199 | layer { 200 | name: "fc8" 201 | type: "InnerProduct" 202 | bottom: "fc7" 203 | top: "fc8" 204 | inner_product_param { 205 | num_output: 2 206 | } 207 | } 208 | layer { 209 | name: "prob" 210 | type: "Softmax" 211 | bottom: "fc8" 212 | top: "prob" 213 | } 214 | -------------------------------------------------------------------------------- /examples/shoes7k/deploy_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | convolution_param { 14 | num_output: 96 15 | kernel_size: 11 16 | stride: 4 17 | } 18 | } 19 | layer { 20 | name: "relu1" 21 | type: "ReLU" 22 | bottom: "conv1" 23 | top: "conv1" 24 | } 25 | layer { 26 | name: "pool1" 27 | type: "Pooling" 28 | bottom: "conv1" 29 | top: "pool1" 30 | pooling_param { 31 | pool: MAX 32 | kernel_size: 3 33 | stride: 2 34 | } 35 | } 36 | layer { 37 | name: "norm1" 38 | type: "LRN" 39 | bottom: "pool1" 40 | top: "norm1" 41 | lrn_param { 42 | local_size: 5 43 | alpha: 0.0001 44 | beta: 0.75 45 | } 46 | } 47 | layer { 48 | name: "conv2" 49 | type: "Convolution" 50 | bottom: "norm1" 51 | top: "conv2" 52 | convolution_param { 53 | num_output: 256 54 | pad: 2 55 | kernel_size: 5 56 | group: 2 57 | } 58 | } 59 | layer { 60 | name: "relu2" 61 | type: "ReLU" 62 | bottom: "conv2" 63 | top: "conv2" 64 | } 65 | layer { 66 | name: "pool2" 67 | type: "Pooling" 68 | bottom: "conv2" 69 | top: "pool2" 70 | pooling_param { 71 | pool: MAX 72 | kernel_size: 3 73 | stride: 2 74 | } 75 | } 76 | layer { 77 | name: "norm2" 78 | type: "LRN" 79 | bottom: "pool2" 80 | top: "norm2" 81 | lrn_param { 82 | local_size: 5 83 | alpha: 0.0001 84 | beta: 0.75 85 | } 86 | } 87 | layer { 88 | name: "conv3" 89 | type: "Convolution" 90 | bottom: "norm2" 91 | top: "conv3" 92 | convolution_param { 93 | num_output: 384 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | name: "relu3" 100 | type: "ReLU" 101 | bottom: "conv3" 102 | top: "conv3" 103 | } 104 | layer { 105 | name: "conv4" 106 | type: "Convolution" 107 | bottom: "conv3" 108 | top: "conv4" 109 | convolution_param { 110 | num_output: 384 111 | pad: 1 112 | kernel_size: 3 113 | group: 2 114 | } 115 | } 116 | layer { 117 | name: "relu4" 118 | type: "ReLU" 119 | bottom: "conv4" 120 | top: "conv4" 121 | } 122 | layer { 123 | name: "conv5" 124 | type: "Convolution" 125 | bottom: "conv4" 126 | top: "conv5" 127 | convolution_param { 128 | num_output: 256 129 | pad: 1 130 | kernel_size: 3 131 | group: 2 132 | } 133 | } 134 | layer { 135 | name: "relu5" 136 | type: "ReLU" 137 | bottom: "conv5" 138 | top: "conv5" 139 | } 140 | layer { 141 | name: "pool5" 142 | type: "Pooling" 143 | bottom: "conv5" 144 | top: "pool5" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "fc6" 153 | type: "InnerProduct" 154 | bottom: "pool5" 155 | top: "fc6" 156 | inner_product_param { 157 | num_output: 4096 158 | } 159 | } 160 | layer { 161 | name: "relu6" 162 | type: "ReLU" 163 | bottom: "fc6" 164 | top: "fc6" 165 | } 166 | layer { 167 | name: "drop6" 168 | type: "Dropout" 169 | bottom: "fc6" 170 | top: "fc6" 171 | dropout_param { 172 | dropout_ratio: 0.5 173 | } 174 | } 175 | layer { 176 | name: "fc7" 177 | type: "InnerProduct" 178 | bottom: "fc6" 179 | top: "fc7" 180 | inner_product_param { 181 | num_output: 4096 182 | } 183 | } 184 | layer { 185 | name: "relu7" 186 | type: "ReLU" 187 | bottom: "fc7" 188 | top: "fc7" 189 | } 190 | layer { 191 | name: "drop7" 192 | type: "Dropout" 193 | bottom: "fc7" 194 | top: "fc7" 195 | dropout_param { 196 | dropout_ratio: 0.5 197 | } 198 | } 199 | layer { 200 | name: "latent" 201 | type: "InnerProduct" 202 | bottom: "fc7" 203 | top: "latent" 204 | inner_product_param { 205 | num_output: 48 206 | } 207 | } 208 | layer { 209 | name: "fc8_new" 210 | type: "InnerProduct" 211 | bottom: "latent" 212 | top: "fc8_new" 213 | inner_product_param { 214 | num_output: 2 215 | } 216 | } 217 | layer { 218 | name: "prob" 219 | type: "Softmax" 220 | bottom: "fc8_new" 221 | top: "prob" 222 | } 223 | -------------------------------------------------------------------------------- /examples/shoes7k/eval.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Evaluation 3 | 4 | Find the top-5 similar images 5 | ''' 6 | import os 7 | import sys 8 | import subprocess 9 | 10 | from convert_shoes7k_data import get_images 11 | 12 | sys.path.append('../..') 13 | import config 14 | from retrieve import retrieve_image 15 | 16 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) 17 | EVAL_PATH = os.path.join(SCRIPT_PATH, 'evaluation') 18 | 19 | # define model params 20 | model_file = 'shoes7k_model_with_latent_layer_iter_10000.caffemodel' 21 | deploy_file = 'deploy_with_latent_layer.prototxt' 22 | imagemean_file = 'shoes7k_mean.npy' 23 | MODEL_FILE = os.path.join(SCRIPT_PATH, model_file) 24 | DEPLOY_FILE = os.path.join(SCRIPT_PATH, deploy_file) 25 | IMAGE_MEAN = os.path.join(SCRIPT_PATH, imagemean_file) 26 | 27 | 28 | def retrieve_single_image(image_file, d_threshold): 29 | """retrive similar images and copy 30 | the retrieved images to evaluation folder""" 31 | retrieved, dist = retrieve_image(image_file, MODEL_FILE, DEPLOY_FILE, 32 | IMAGE_MEAN) 33 | if dist[-1] < d_threshold and len(dist) > 1: 34 | # this is a image that has acceptable similar top-5 images 35 | print('Retrieved image ' + image_file) 36 | image_name = os.path.basename(image_file) 37 | image_name = image_name.split('.')[0] 38 | eval_res_dir = os.path.join(EVAL_PATH, image_name) 39 | if not os.path.exists(eval_res_dir): 40 | os.mkdir(eval_res_dir) 41 | 42 | image_id = 0 43 | for similar_img in retrieved: 44 | img_name = os.path.basename(similar_img) 45 | res_img = '_'.join([str(image_id), img_name]) 46 | res_img = os.path.join(eval_res_dir, res_img) 47 | subprocess.call(['cp', similar_img, res_img]) 48 | image_id += 1 49 | 50 | 51 | def eval_shoes7k(d_threshold=2): 52 | """Evaluate through all positive images""" 53 | if not os.path.exists(EVAL_PATH): 54 | os.mkdir(EVAL_PATH) 55 | 56 | images = get_images(config.eg_shoes7k_pos_path) 57 | for image_file in images: 58 | retrieve_single_image(image_file, d_threshold) 59 | 60 | 61 | if __name__ == '__main__': 62 | eval_shoes7k() 63 | -------------------------------------------------------------------------------- /examples/shoes7k/generate_feature_mat.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Generate feature matrix 3 | 4 | Prepare files for image retrieval: 5 | image_files.npy 6 | fc7_features.npy 7 | latent_features.npy 8 | ''' 9 | import os 10 | import sys 11 | import numpy as np 12 | 13 | from convert_shoes7k_data import get_images 14 | 15 | sys.path.append('../..') 16 | import config 17 | from layer_features import layer_features 18 | from retrieve import binary_hash_codes 19 | 20 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) 21 | 22 | 23 | def generate_feature_matrix(model_file, deploy_file, imagemean_file): 24 | """generate feature matrix of image dataset 25 | save the matrix as npy file""" 26 | # get image files 27 | pos_images = get_images(config.eg_shoes7k_pos_path) 28 | neg_images = get_images(config.eg_shoes7k_neg_path) 29 | 30 | image_files = np.concatenate((pos_images, neg_images)) 31 | np.random.shuffle(image_files) 32 | 33 | # feed the network and get feature vectors 34 | feature_mat = {'fc7': [], 'latent': []} 35 | 36 | batch = [] 37 | batch_size = 0 38 | for image in image_files: 39 | batch.append(image) 40 | batch_size += 1 41 | 42 | if batch_size == 1000: 43 | for layer, mat in layer_features(feature_mat.keys(), model_file, 44 | deploy_file, imagemean_file, 45 | batch): 46 | if layer == 'latent': 47 | mat = binary_hash_codes(mat) 48 | 49 | feature_mat[layer].extend(mat) 50 | 51 | batch = [] 52 | batch_size = 0 53 | 54 | if batch_size > 0: 55 | for layer, mat in layer_features(feature_mat.keys(), model_file, 56 | deploy_file, imagemean_file, batch): 57 | if layer == 'latent': 58 | mat = binary_hash_codes(mat) 59 | 60 | feature_mat[layer].extend(mat) 61 | 62 | # save to npy files 63 | np.save(os.path.join(SCRIPT_PATH, 'image_files.npy'), image_files) 64 | for layer in feature_mat.keys(): 65 | npy_file = os.path.join(SCRIPT_PATH, layer + '_features.npy') 66 | np.save(npy_file, np.array(feature_mat[layer])) 67 | 68 | 69 | if __name__ == '__main__': 70 | if len(sys.argv) != 4: 71 | usage = 'Usage: python generate_feature_mat.py' + \ 72 | ' model_file deploy_file imagemean_file' 73 | print(usage) 74 | else: 75 | model_file = sys.argv[1] 76 | deploy_file = sys.argv[2] 77 | imagemean_file = sys.argv[3] 78 | 79 | is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\ 80 | and os.path.exists(imagemean_file) 81 | 82 | if is_exists: 83 | generate_feature_matrix(model_file, deploy_file, imagemean_file) 84 | -------------------------------------------------------------------------------- /examples/shoes7k/retrieve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # init env 4 | BASEDIR=$(dirname "$0") 5 | PROJROOT=$BASEDIR/../.. 6 | MODEL_FILE="$BASEDIR/shoes7k_model_with_latent_layer_iter_10000.caffemodel" 7 | DEPLOY_FILE="$BASEDIR/deploy_with_latent_layer.prototxt" 8 | MEAN_FILE="$BASEDIR/shoes7k_mean.npy" 9 | IMAGE_NPY="$BASEDIR/image_files.npy" 10 | FC7_NPY="$BASEDIR/fc7_features.npy" 11 | LATENT_NPY="$BASEDIR/latent_features.npy" 12 | TARGET="$1" 13 | 14 | # check model 15 | if [ ! -e $MODEL_FILE ] || [ ! -e $DEPLOY_FILE ] || [ ! -e $MEAN_FILE ]; then 16 | echo "Please train the model at first" 17 | echo "./train.sh" 18 | exit 19 | fi 20 | 21 | # parse parsemeters 22 | if [[ $TARGET == "-h" ]] || [[ $TARGET == "--help" ]]; then 23 | echo "Usage: ./retrieve.sh image_to_retrieve.jpg" 24 | exit 25 | fi 26 | 27 | if [ -e $IMAGE_NPY ] && [ -e $FC7_NPY ] && [ -e $LATENT_NPY ]; then 28 | python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET 29 | else 30 | echo "generate feature matrix..." 31 | python $BASEDIR/generate_feature_mat.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE 32 | python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET 33 | fi 34 | 35 | -------------------------------------------------------------------------------- /examples/shoes7k/solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val.prototxt" 2 | test_iter: 1000 3 | test_interval: 1000 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 5000 8 | display: 20 9 | max_iter: 10000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 2000 13 | snapshot_prefix: "shoes7k_model" 14 | solver_mode: GPU 15 | -------------------------------------------------------------------------------- /examples/shoes7k/solver_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | net: "train_val_with_latent_layer.prototxt" 2 | test_iter: 1000 3 | test_interval: 1000 4 | base_lr: 0.001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 5000 8 | display: 20 9 | max_iter: 10000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 2000 13 | snapshot_prefix: "shoes7k_model_with_latent_layer" 14 | solver_mode: GPU 15 | -------------------------------------------------------------------------------- /examples/shoes7k/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check caffe 4 | if [[ $CAFFEROOT == '' ]]; then 5 | echo "\$CAFFEROOT not found, please define it at first." 6 | echo "export CAFFEROOT=/path/to/your/caffe" 7 | exit 8 | fi 9 | 10 | # init env 11 | TOOLS=$CAFFEROOT/build/tools 12 | BASEDIR=$(dirname "$0") 13 | PROJROOT=$BASEDIR/../../ 14 | CURRDIR=$(pwd) 15 | 16 | # check dataset 17 | if [ ! -d $BASEDIR/shoes7k_train_lmdb ] || [ ! -d $BASEDIR/shoes7k_test_lmdb ] ; then 18 | echo "The shoes7k dataset folder cannot found" 19 | echo "Please execute convert_shoes7k_data.py at first" 20 | exit 21 | fi 22 | 23 | # compute mean 24 | $TOOLS/compute_image_mean $BASEDIR/shoes7k_train_lmdb $BASEDIR/shoes7k_mean.binaryproto 25 | python $PROJROOT/tools/convert_protomean.py $BASEDIR/shoes7k_mean.binaryproto $BASEDIR/shoes7k_mean.npy 26 | 27 | cd $BASEDIR 28 | 29 | # pretrain 30 | $TOOLS/caffe train --solver=solver.prototxt 2>&1 | tee pretrain.log 31 | 32 | # train net with latent layer 33 | $TOOLS/caffe train \ 34 | --solver=solver_with_latent_layer.prototxt \ 35 | --weights=shoes7k_model_iter_10000.caffemodel 2>&1 | tee train.log 36 | 37 | cd $CURRDIR 38 | -------------------------------------------------------------------------------- /examples/shoes7k/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: true 12 | crop_size: 227 13 | mean_file: "shoes7k_mean.binaryproto" 14 | } 15 | # mean pixel / channel-wise mean instead of mean image 16 | # transform_param { 17 | # crop_size: 227 18 | # mean_value: 104 19 | # mean_value: 117 20 | # mean_value: 123 21 | # mirror: true 22 | # } 23 | data_param { 24 | source: "shoes7k_train_lmdb" 25 | batch_size: 256 26 | backend: LMDB 27 | } 28 | } 29 | layer { 30 | name: "data" 31 | type: "Data" 32 | top: "data" 33 | top: "label" 34 | include { 35 | phase: TEST 36 | } 37 | transform_param { 38 | mirror: false 39 | crop_size: 227 40 | mean_file: "shoes7k_mean.binaryproto" 41 | } 42 | # mean pixel / channel-wise mean instead of mean image 43 | # transform_param { 44 | # crop_size: 227 45 | # mean_value: 104 46 | # mean_value: 117 47 | # mean_value: 123 48 | # mirror: false 49 | # } 50 | data_param { 51 | source: "shoes7k_test_lmdb" 52 | batch_size: 50 53 | backend: LMDB 54 | } 55 | } 56 | layer { 57 | name: "conv1" 58 | type: "Convolution" 59 | bottom: "data" 60 | top: "conv1" 61 | param { 62 | lr_mult: 1 63 | decay_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 96 71 | kernel_size: 11 72 | stride: 4 73 | weight_filler { 74 | type: "gaussian" 75 | std: 0.01 76 | } 77 | bias_filler { 78 | type: "constant" 79 | value: 0 80 | } 81 | } 82 | } 83 | layer { 84 | name: "relu1" 85 | type: "ReLU" 86 | bottom: "conv1" 87 | top: "conv1" 88 | } 89 | layer { 90 | name: "pool1" 91 | type: "Pooling" 92 | bottom: "conv1" 93 | top: "pool1" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | layer { 101 | name: "norm1" 102 | type: "LRN" 103 | bottom: "pool1" 104 | top: "norm1" 105 | lrn_param { 106 | local_size: 5 107 | alpha: 0.0001 108 | beta: 0.75 109 | } 110 | } 111 | layer { 112 | name: "conv2" 113 | type: "Convolution" 114 | bottom: "norm1" 115 | top: "conv2" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 256 126 | pad: 2 127 | kernel_size: 5 128 | group: 2 129 | weight_filler { 130 | type: "gaussian" 131 | std: 0.01 132 | } 133 | bias_filler { 134 | type: "constant" 135 | value: 1 136 | } 137 | } 138 | } 139 | layer { 140 | name: "relu2" 141 | type: "ReLU" 142 | bottom: "conv2" 143 | top: "conv2" 144 | } 145 | layer { 146 | name: "pool2" 147 | type: "Pooling" 148 | bottom: "conv2" 149 | top: "pool2" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 3 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "norm2" 158 | type: "LRN" 159 | bottom: "pool2" 160 | top: "norm2" 161 | lrn_param { 162 | local_size: 5 163 | alpha: 0.0001 164 | beta: 0.75 165 | } 166 | } 167 | layer { 168 | name: "conv3" 169 | type: "Convolution" 170 | bottom: "norm2" 171 | top: "conv3" 172 | param { 173 | lr_mult: 1 174 | decay_mult: 1 175 | } 176 | param { 177 | lr_mult: 2 178 | decay_mult: 0 179 | } 180 | convolution_param { 181 | num_output: 384 182 | pad: 1 183 | kernel_size: 3 184 | weight_filler { 185 | type: "gaussian" 186 | std: 0.01 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0 191 | } 192 | } 193 | } 194 | layer { 195 | name: "relu3" 196 | type: "ReLU" 197 | bottom: "conv3" 198 | top: "conv3" 199 | } 200 | layer { 201 | name: "conv4" 202 | type: "Convolution" 203 | bottom: "conv3" 204 | top: "conv4" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 2 211 | decay_mult: 0 212 | } 213 | convolution_param { 214 | num_output: 384 215 | pad: 1 216 | kernel_size: 3 217 | group: 2 218 | weight_filler { 219 | type: "gaussian" 220 | std: 0.01 221 | } 222 | bias_filler { 223 | type: "constant" 224 | value: 1 225 | } 226 | } 227 | } 228 | layer { 229 | name: "relu4" 230 | type: "ReLU" 231 | bottom: "conv4" 232 | top: "conv4" 233 | } 234 | layer { 235 | name: "conv5" 236 | type: "Convolution" 237 | bottom: "conv4" 238 | top: "conv5" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | param { 244 | lr_mult: 2 245 | decay_mult: 0 246 | } 247 | convolution_param { 248 | num_output: 256 249 | pad: 1 250 | kernel_size: 3 251 | group: 2 252 | weight_filler { 253 | type: "gaussian" 254 | std: 0.01 255 | } 256 | bias_filler { 257 | type: "constant" 258 | value: 1 259 | } 260 | } 261 | } 262 | layer { 263 | name: "relu5" 264 | type: "ReLU" 265 | bottom: "conv5" 266 | top: "conv5" 267 | } 268 | layer { 269 | name: "pool5" 270 | type: "Pooling" 271 | bottom: "conv5" 272 | top: "pool5" 273 | pooling_param { 274 | pool: MAX 275 | kernel_size: 3 276 | stride: 2 277 | } 278 | } 279 | layer { 280 | name: "fc6" 281 | type: "InnerProduct" 282 | bottom: "pool5" 283 | top: "fc6" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | inner_product_param { 293 | num_output: 4096 294 | weight_filler { 295 | type: "gaussian" 296 | std: 0.005 297 | } 298 | bias_filler { 299 | type: "constant" 300 | value: 1 301 | } 302 | } 303 | } 304 | layer { 305 | name: "relu6" 306 | type: "ReLU" 307 | bottom: "fc6" 308 | top: "fc6" 309 | } 310 | layer { 311 | name: "drop6" 312 | type: "Dropout" 313 | bottom: "fc6" 314 | top: "fc6" 315 | dropout_param { 316 | dropout_ratio: 0.5 317 | } 318 | } 319 | layer { 320 | name: "fc7" 321 | type: "InnerProduct" 322 | bottom: "fc6" 323 | top: "fc7" 324 | param { 325 | lr_mult: 1 326 | decay_mult: 1 327 | } 328 | param { 329 | lr_mult: 2 330 | decay_mult: 0 331 | } 332 | inner_product_param { 333 | num_output: 4096 334 | weight_filler { 335 | type: "gaussian" 336 | std: 0.005 337 | } 338 | bias_filler { 339 | type: "constant" 340 | value: 1 341 | } 342 | } 343 | } 344 | layer { 345 | name: "relu7" 346 | type: "ReLU" 347 | bottom: "fc7" 348 | top: "fc7" 349 | } 350 | layer { 351 | name: "drop7" 352 | type: "Dropout" 353 | bottom: "fc7" 354 | top: "fc7" 355 | dropout_param { 356 | dropout_ratio: 0.5 357 | } 358 | } 359 | layer { 360 | name: "fc8" 361 | type: "InnerProduct" 362 | bottom: "fc7" 363 | top: "fc8" 364 | param { 365 | lr_mult: 1 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2 370 | decay_mult: 0 371 | } 372 | inner_product_param { 373 | num_output: 2 374 | weight_filler { 375 | type: "gaussian" 376 | std: 0.01 377 | } 378 | bias_filler { 379 | type: "constant" 380 | value: 0 381 | } 382 | } 383 | } 384 | layer { 385 | name: "accuracy" 386 | type: "Accuracy" 387 | bottom: "fc8" 388 | bottom: "label" 389 | top: "accuracy" 390 | include { 391 | phase: TEST 392 | } 393 | } 394 | layer { 395 | name: "loss" 396 | type: "SoftmaxWithLoss" 397 | bottom: "fc8" 398 | bottom: "label" 399 | top: "loss" 400 | } 401 | -------------------------------------------------------------------------------- /examples/shoes7k/train_val_with_latent_layer.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: true 12 | crop_size: 227 13 | mean_file: "shoes7k_mean.binaryproto" 14 | } 15 | # mean pixel / channel-wise mean instead of mean image 16 | # transform_param { 17 | # crop_size: 227 18 | # mean_value: 104 19 | # mean_value: 117 20 | # mean_value: 123 21 | # mirror: true 22 | # } 23 | data_param { 24 | source: "shoes7k_train_lmdb" 25 | batch_size: 256 26 | backend: LMDB 27 | } 28 | } 29 | layer { 30 | name: "data" 31 | type: "Data" 32 | top: "data" 33 | top: "label" 34 | include { 35 | phase: TEST 36 | } 37 | transform_param { 38 | mirror: false 39 | crop_size: 227 40 | mean_file: "shoes7k_mean.binaryproto" 41 | } 42 | # mean pixel / channel-wise mean instead of mean image 43 | # transform_param { 44 | # crop_size: 227 45 | # mean_value: 104 46 | # mean_value: 117 47 | # mean_value: 123 48 | # mirror: false 49 | # } 50 | data_param { 51 | source: "shoes7k_test_lmdb" 52 | batch_size: 50 53 | backend: LMDB 54 | } 55 | } 56 | layer { 57 | name: "conv1" 58 | type: "Convolution" 59 | bottom: "data" 60 | top: "conv1" 61 | param { 62 | lr_mult: 1 63 | decay_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | decay_mult: 0 68 | } 69 | convolution_param { 70 | num_output: 96 71 | kernel_size: 11 72 | stride: 4 73 | weight_filler { 74 | type: "gaussian" 75 | std: 0.01 76 | } 77 | bias_filler { 78 | type: "constant" 79 | value: 0 80 | } 81 | } 82 | } 83 | layer { 84 | name: "relu1" 85 | type: "ReLU" 86 | bottom: "conv1" 87 | top: "conv1" 88 | } 89 | layer { 90 | name: "pool1" 91 | type: "Pooling" 92 | bottom: "conv1" 93 | top: "pool1" 94 | pooling_param { 95 | pool: MAX 96 | kernel_size: 3 97 | stride: 2 98 | } 99 | } 100 | layer { 101 | name: "norm1" 102 | type: "LRN" 103 | bottom: "pool1" 104 | top: "norm1" 105 | lrn_param { 106 | local_size: 5 107 | alpha: 0.0001 108 | beta: 0.75 109 | } 110 | } 111 | layer { 112 | name: "conv2" 113 | type: "Convolution" 114 | bottom: "norm1" 115 | top: "conv2" 116 | param { 117 | lr_mult: 1 118 | decay_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | decay_mult: 0 123 | } 124 | convolution_param { 125 | num_output: 256 126 | pad: 2 127 | kernel_size: 5 128 | group: 2 129 | weight_filler { 130 | type: "gaussian" 131 | std: 0.01 132 | } 133 | bias_filler { 134 | type: "constant" 135 | value: 1 136 | } 137 | } 138 | } 139 | layer { 140 | name: "relu2" 141 | type: "ReLU" 142 | bottom: "conv2" 143 | top: "conv2" 144 | } 145 | layer { 146 | name: "pool2" 147 | type: "Pooling" 148 | bottom: "conv2" 149 | top: "pool2" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 3 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "norm2" 158 | type: "LRN" 159 | bottom: "pool2" 160 | top: "norm2" 161 | lrn_param { 162 | local_size: 5 163 | alpha: 0.0001 164 | beta: 0.75 165 | } 166 | } 167 | layer { 168 | name: "conv3" 169 | type: "Convolution" 170 | bottom: "norm2" 171 | top: "conv3" 172 | param { 173 | lr_mult: 1 174 | decay_mult: 1 175 | } 176 | param { 177 | lr_mult: 2 178 | decay_mult: 0 179 | } 180 | convolution_param { 181 | num_output: 384 182 | pad: 1 183 | kernel_size: 3 184 | weight_filler { 185 | type: "gaussian" 186 | std: 0.01 187 | } 188 | bias_filler { 189 | type: "constant" 190 | value: 0 191 | } 192 | } 193 | } 194 | layer { 195 | name: "relu3" 196 | type: "ReLU" 197 | bottom: "conv3" 198 | top: "conv3" 199 | } 200 | layer { 201 | name: "conv4" 202 | type: "Convolution" 203 | bottom: "conv3" 204 | top: "conv4" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 2 211 | decay_mult: 0 212 | } 213 | convolution_param { 214 | num_output: 384 215 | pad: 1 216 | kernel_size: 3 217 | group: 2 218 | weight_filler { 219 | type: "gaussian" 220 | std: 0.01 221 | } 222 | bias_filler { 223 | type: "constant" 224 | value: 1 225 | } 226 | } 227 | } 228 | layer { 229 | name: "relu4" 230 | type: "ReLU" 231 | bottom: "conv4" 232 | top: "conv4" 233 | } 234 | layer { 235 | name: "conv5" 236 | type: "Convolution" 237 | bottom: "conv4" 238 | top: "conv5" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | param { 244 | lr_mult: 2 245 | decay_mult: 0 246 | } 247 | convolution_param { 248 | num_output: 256 249 | pad: 1 250 | kernel_size: 3 251 | group: 2 252 | weight_filler { 253 | type: "gaussian" 254 | std: 0.01 255 | } 256 | bias_filler { 257 | type: "constant" 258 | value: 1 259 | } 260 | } 261 | } 262 | layer { 263 | name: "relu5" 264 | type: "ReLU" 265 | bottom: "conv5" 266 | top: "conv5" 267 | } 268 | layer { 269 | name: "pool5" 270 | type: "Pooling" 271 | bottom: "conv5" 272 | top: "pool5" 273 | pooling_param { 274 | pool: MAX 275 | kernel_size: 3 276 | stride: 2 277 | } 278 | } 279 | layer { 280 | name: "fc6" 281 | type: "InnerProduct" 282 | bottom: "pool5" 283 | top: "fc6" 284 | param { 285 | lr_mult: 1 286 | decay_mult: 1 287 | } 288 | param { 289 | lr_mult: 2 290 | decay_mult: 0 291 | } 292 | inner_product_param { 293 | num_output: 4096 294 | weight_filler { 295 | type: "gaussian" 296 | std: 0.005 297 | } 298 | bias_filler { 299 | type: "constant" 300 | value: 1 301 | } 302 | } 303 | } 304 | layer { 305 | name: "relu6" 306 | type: "ReLU" 307 | bottom: "fc6" 308 | top: "fc6" 309 | } 310 | layer { 311 | name: "drop6" 312 | type: "Dropout" 313 | bottom: "fc6" 314 | top: "fc6" 315 | dropout_param { 316 | dropout_ratio: 0.5 317 | } 318 | } 319 | layer { 320 | name: "fc7" 321 | type: "InnerProduct" 322 | bottom: "fc6" 323 | top: "fc7" 324 | param { 325 | lr_mult: 1 326 | decay_mult: 1 327 | } 328 | param { 329 | lr_mult: 2 330 | decay_mult: 0 331 | } 332 | inner_product_param { 333 | num_output: 4096 334 | weight_filler { 335 | type: "gaussian" 336 | std: 0.005 337 | } 338 | bias_filler { 339 | type: "constant" 340 | value: 1 341 | } 342 | } 343 | } 344 | layer { 345 | name: "relu7" 346 | type: "ReLU" 347 | bottom: "fc7" 348 | top: "fc7" 349 | } 350 | layer { 351 | name: "drop7" 352 | type: "Dropout" 353 | bottom: "fc7" 354 | top: "fc7" 355 | dropout_param { 356 | dropout_ratio: 0.5 357 | } 358 | } 359 | layer { 360 | name: "latent" 361 | type: "InnerProduct" 362 | bottom: "fc7" 363 | top: "latent" 364 | param { 365 | lr_mult: 1 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2 370 | decay_mult: 0 371 | } 372 | inner_product_param { 373 | num_output: 48 374 | weight_filler { 375 | type: "gaussian" 376 | std: 0.01 377 | } 378 | bias_filler { 379 | type: "constant" 380 | value: 0 381 | } 382 | } 383 | } 384 | layer { 385 | name: "fc8_new" 386 | type: "InnerProduct" 387 | bottom: "latent" 388 | top: "fc8_new" 389 | param { 390 | lr_mult: 1 391 | decay_mult: 1 392 | } 393 | param { 394 | lr_mult: 2 395 | decay_mult: 0 396 | } 397 | inner_product_param { 398 | num_output: 2 399 | weight_filler { 400 | type: "gaussian" 401 | std: 0.01 402 | } 403 | bias_filler { 404 | type: "constant" 405 | value: 0 406 | } 407 | } 408 | } 409 | layer { 410 | name: "accuracy" 411 | type: "Accuracy" 412 | bottom: "fc8_new" 413 | bottom: "label" 414 | top: "accuracy" 415 | include { 416 | phase: TEST 417 | } 418 | } 419 | layer { 420 | name: "loss" 421 | type: "SoftmaxWithLoss" 422 | bottom: "fc8_new" 423 | bottom: "label" 424 | top: "loss" 425 | } 426 | -------------------------------------------------------------------------------- /img/shoes7k_retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xueeinstein/fast-image-retrieval/c952b34b9dc7833cd0bb3cc49a6b69cd81b7cae8/img/shoes7k_retrieval.png -------------------------------------------------------------------------------- /layer_features.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Extract FC7 4096 feature vector 3 | ''' 4 | import numpy as np 5 | import caffe 6 | 7 | 8 | def feed_net(model_file, deploy_file, imagemean_file, image_files, show_pred): 9 | """feed network""" 10 | n_files = len(image_files) 11 | net = caffe.Net(deploy_file, model_file, caffe.TEST) 12 | 13 | # define transformer for preprocessing 14 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) 15 | transformer.set_mean('data', np.load(imagemean_file).mean(1).mean(1)) 16 | transformer.set_transpose('data', (2, 0, 1)) 17 | transformer.set_channel_swap('data', (2, 1, 0)) 18 | transformer.set_raw_scale('data', 255.0) 19 | 20 | net.blobs['data'].reshape(n_files, 3, 227, 227) 21 | 22 | idx = 0 23 | for image in image_files: 24 | try: 25 | im = caffe.io.load_image(image) 26 | transformed_im = transformer.preprocess('data', im) 27 | net.blobs['data'].data[idx, :, :, :] = transformed_im 28 | idx += 1 29 | except Exception: 30 | pass 31 | 32 | out = net.forward() 33 | if show_pred: 34 | print(out['prob'].argmax()) 35 | return net 36 | 37 | 38 | def layer_features(layers, model_file, deploy_file, imagemean_file, 39 | image_files, gpu=True, gpu_id=0, show_pred=False): 40 | """extract features from various layers""" 41 | if gpu: 42 | caffe.set_device(gpu_id) 43 | caffe.set_mode_gpu() 44 | 45 | net = feed_net(model_file, deploy_file, imagemean_file, image_files, 46 | show_pred) 47 | 48 | #if type(layers) == str: 49 | #return net.blobs[layers].data 50 | 51 | for layer in layers: 52 | if layer not in net.blobs: 53 | raise TypeError('Invalid layer name: ' + layer) 54 | yield (layer, net.blobs[layer].data) 55 | -------------------------------------------------------------------------------- /retrieve.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Image retrieval 3 | ''' 4 | import os 5 | import subprocess 6 | import numpy as np 7 | from sklearn.neighbors import KDTree 8 | 9 | from layer_features import layer_features 10 | 11 | 12 | def binary_hash_codes(feature_mat): 13 | """convert feature matrix of latent layer to binary hash codes""" 14 | xs, ys = np.where(feature_mat > 0.5) 15 | code_mat = np.zeros(feature_mat.shape) 16 | 17 | for i in range(len(xs)): 18 | code_mat[xs[i]][ys[i]] = 1 19 | 20 | return code_mat 21 | 22 | 23 | def retrieve_image(target_image, model_file, deploy_file, imagemean_file, 24 | threshold=1): 25 | model_dir = os.path.dirname(model_file) 26 | image_files = np.load(os.path.join(model_dir, 'image_files.npy')) 27 | fc7_feature_mat = np.load(os.path.join(model_dir, 'fc7_features.npy')) 28 | latent_feature_file = os.path.join(model_dir, 'latent_features.npy') 29 | latent_feature_mat = np.load(latent_feature_file) 30 | 31 | candidates = [] 32 | dist = 0 33 | for layer, mat in layer_features(['latent', 'fc7'], model_file, 34 | deploy_file, imagemean_file, 35 | [target_image], show_pred=True): 36 | if layer == 'latent': 37 | # coarse-level search 38 | mat = binary_hash_codes(mat) 39 | mat = mat * np.ones((latent_feature_mat.shape[0], 1)) 40 | dis_mat = np.abs(mat - latent_feature_mat) 41 | hamming_dis = np.sum(dis_mat, axis=1) 42 | distance_file = os.path.join(model_dir, 'hamming_dis.npy') 43 | np.save(distance_file, hamming_dis) 44 | candidates = np.where(hamming_dis < threshold)[0] 45 | 46 | if layer == 'fc7': 47 | # fine-level search 48 | kdt = KDTree(fc7_feature_mat[candidates], metric='euclidean') 49 | k = 6 50 | 51 | if not candidates.shape[0] > 6: 52 | k = candidates.shape[0] 53 | 54 | dist, idxs = kdt.query(mat, k=k) 55 | candidates = candidates[idxs] 56 | print(dist) 57 | 58 | return image_files[candidates][0], dist[0] 59 | 60 | 61 | if __name__ == '__main__': 62 | import sys 63 | if len(sys.argv) != 5: 64 | usage = 'Usage: python retrieve.py' + \ 65 | ' model_file deploy_file imagemean_file target_image.jpg' 66 | print(usage) 67 | else: 68 | model_file = sys.argv[1] 69 | deploy_file = sys.argv[2] 70 | imagemean_file = sys.argv[3] 71 | target_image = sys.argv[4] 72 | 73 | is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\ 74 | and os.path.exists(imagemean_file) 75 | 76 | if is_exists: 77 | res, _ = retrieve_image(target_image, model_file, deploy_file, 78 | imagemean_file, threshold=5) 79 | print(res) 80 | if not os.path.exists('results'): 81 | os.mkdir('results') 82 | for i in range(len(res)): 83 | subprocess.call(['cp', res[i], 'results/%s.jpg' % str(i)]) 84 | else: 85 | print('The model related files may not exit') 86 | print('Please check files: {}, {}, {}' 87 | .format(model_file, deploy_file, imagemean_file)) 88 | -------------------------------------------------------------------------------- /tools/convert_protomean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | import caffe 5 | 6 | 7 | if len(sys.argv) != 3: 8 | print('Usage: python convert_protomean.py proto.mean out.npy') 9 | sys.exit() 10 | 11 | 12 | blob = caffe.proto.caffe_pb2.BlobProto() 13 | data = open(sys.argv[1], 'rb').read() 14 | blob.ParseFromString(data) 15 | arr = np.array(caffe.io.blobproto_to_array(blob)) 16 | out = arr[0] 17 | np.save(sys.argv[2], out) 18 | --------------------------------------------------------------------------------