├── .gitignore
├── README.md
├── caffenet.py
├── config.py
├── examples
    ├── facescrub
    │   ├── convert_facescrub_data.py
    │   ├── deploy.prototxt
    │   ├── deploy_with_latent_layer.prototxt
    │   ├── generate_feature_mat.py
    │   ├── retrieve.sh
    │   ├── solver.prototxt
    │   ├── solver_with_latent_layer.prototxt
    │   ├── train.sh
    │   ├── train_val.prototxt
    │   └── train_val_with_latent_layer.prototxt
    └── shoes7k
    │   ├── convert_shoes7k_data.py
    │   ├── deploy.prototxt
    │   ├── deploy_with_latent_layer.prototxt
    │   ├── eval.py
    │   ├── generate_feature_mat.py
    │   ├── retrieve.sh
    │   ├── solver.prototxt
    │   ├── solver_with_latent_layer.prototxt
    │   ├── train.sh
    │   ├── train_val.prototxt
    │   └── train_val_with_latent_layer.prototxt
├── img
    └── shoes7k_retrieval.png
├── layer_features.py
├── retrieve.py
└── tools
    └── convert_protomean.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # local config
104 | config.cfg
105 | 
106 | # caffe model files
107 | *.caffemodel
108 | *.solverstate
109 | 
110 | # dataset files
111 | *_lmdb/
112 | *.binaryproto
113 | *.npy


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fast-image-retrieval
 2 | A lightweight framework using binary hash codes and deep learning for fast image retrieval.
 3 | 
 4 | ## Configuration
 5 | To run the examples, you need to create a `config.cfg` file under the root folder of this project. An example of `config.cfg` looks like:
 6 | 
 7 | ```
 8 | [examples]
 9 | shoes7k_pos_path: /path/to/datasets/shoes7k/classification
10 | shoes7k_neg_path: /path/to/datasets/shoes7k/classificationNeg
11 | ```
12 | 
13 | ## Run Examples
14 | 
15 | ### Shoes7k
16 | 
17 | To run example on shoes7k dataset, firstly, you need to convert shoes7k dataset to LMDB dataset.
18 | 
19 | ```
20 | cd fast-image-retrieval/
21 | python ./examples/shoes7k/convert_shoes7k_data.py
22 | ```
23 | 
24 | Then, you need to train the CNN model.
25 | 
26 | ```
27 | ./examples/shoes7k/train.sh
28 | ```
29 | 
30 | Next, you can retrieve similar image `target.jpg` using
31 | 
32 | ```
33 | ./examples/shoes7k/retrieve.sh target.jpg
34 | ```
35 | 
36 | Note that the first retrival procedure might be very slow because the program reads all shoes7k images and generate feature matrix. The later retrivals can be very fast.
37 | 
38 | Evaluate over the whole dataset by using
39 | 
40 | ```
41 | python ./examples/shoes7k/eval.py
42 | ```
43 | 
44 | Some retrieval results:
45 | 
46 | ![shoes7k retrieval](img/shoes7k_retrieval.png)
47 | 


--------------------------------------------------------------------------------
/caffenet.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Caffenet
 3 | 
 4 | edit from $CAFFEROOT/examples/pycaffe/caffenet.py
 5 | '''
 6 | from __future__ import print_function
 7 | from caffe import layers as L, params as P, to_proto
 8 | 
 9 | 
10 | # helper function for common structures
11 | def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):
12 |     conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
13 |                          num_output=nout, pad=pad, group=group)
14 |     return conv, L.ReLU(conv, in_place=True)
15 | 
16 | 
17 | def fc_relu(bottom, nout):
18 |     fc = L.InnerProduct(bottom, num_output=nout)
19 |     return fc, L.ReLU(fc, in_place=True)
20 | 
21 | 
22 | def max_pool(bottom, ks, stride=1):
23 |     return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
24 | 
25 | 
26 | def caffenet(lmdb, batch_size=256, include_acc=False):
27 |     data, label = L.Data(source=lmdb, backend=P.Data.LMDB,
28 |                          batch_size=batch_size, ntop=2)
29 | 
30 |     # the net itself
31 |     conv1, relu1 = conv_relu(data, 11, 96, stride=4)
32 |     pool1 = max_pool(relu1, 3, stride=2)
33 |     norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75)
34 |     conv2, relu2 = conv_relu(norm1, 5, 256, pad=2, group=2)
35 |     pool2 = max_pool(relu2, 3, stride=2)
36 |     norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75)
37 |     conv3, relu3 = conv_relu(norm2, 3, 384, pad=1)
38 |     conv4, relu4 = conv_relu(relu3, 3, 384, pad=1, group=2)
39 |     conv5, relu5 = conv_relu(relu4, 3, 256, pad=1, group=2)
40 |     pool5 = max_pool(relu5, 3, stride=2)
41 |     fc6, relu6 = fc_relu(pool5, 4096)
42 |     drop6 = L.Dropout(relu6, in_place=True)
43 |     fc7, relu7 = fc_relu(drop6, 4096)
44 |     drop7 = L.Dropout(relu7, in_place=True)
45 |     fc8 = L.InnerProduct(drop7, num_output=1000)
46 |     loss = L.SoftmaxWithLoss(fc8, label)
47 | 
48 |     if include_acc:
49 |         acc = L.Accuracy(fc8, label)
50 |         return to_proto(loss, acc)
51 |     else:
52 |         return to_proto(loss)
53 | 
54 | 
55 | def make_net(train_lmdb, test_lmdb):
56 |     with open('train.prototxt', 'w') as f:
57 |         print(caffenet(train_lmdb), file=f)
58 | 
59 |     with open('test.prototxt', 'w') as f:
60 |         print(caffenet(test_lmdb, batch_size=50,
61 |                        include_acc=True), file=f)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     import sys
66 | 
67 |     if len(sys.argv) == 3:
68 |         train_lmdb = sys.argv[1]
69 |         test_lmdb = sys.argv[2]
70 |         make_net(train_lmdb, test_lmdb)
71 |     else:
72 |         print('Usage: python caffenet.py train_lmdb_path test_lmdb_path')
73 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | config
 3 | '''
 4 | import ConfigParser as cp
 5 | 
 6 | 
 7 | config = cp.RawConfigParser()
 8 | config.read('./config.cfg')
 9 | 
10 | 
11 | # config for example shoes7k
12 | eg_shoes7k_pos_path = config.get('shoes7k', 'pos_path')
13 | eg_shoes7k_neg_path = config.get('shoes7k', 'neg_path')
14 | eg_shoes7k_latent_num = config.getint('shoes7k', 'latent_num')
15 | eg_shoes7k_class_num = config.getint('shoes7k', 'class_num')
16 | 
17 | # config for example facescrub
18 | eg_facescrub_folder = config.get('facescrub', 'root')
19 | 


--------------------------------------------------------------------------------
/examples/facescrub/convert_facescrub_data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Convert facescrub dataset to train/test lmdb dataset
  3 | 
  4 | ------------------------------------------------------
  5 | Two classes: actor, actress
  6 | 
  7 | The dataset is downloaded using https://github.com/faceteam/facescrub
  8 | Please keep the folder structure after downloaded,
  9 | and configure the config.eg_facescrub_folder as path to `facescrub`
 10 | where `download.py` exists.
 11 | '''
 12 | import os
 13 | import cv2
 14 | import lmdb
 15 | import numpy as np
 16 | from caffe.proto import caffe_pb2
 17 | 
 18 | import config
 19 | 
 20 | 
 21 | IM_HEIGHT = 227
 22 | IM_WIDTH = 227
 23 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
 24 | TRAIN_LMDB = os.path.join(SCRIPT_PATH, 'facescrub_train_lmdb')
 25 | TEST_LMDB = os.path.join(SCRIPT_PATH, 'facescrub_test_lmdb')
 26 | ACTORS = os.path.join(config.eg_facescrub_folder, 'facescrub_actors.txt')
 27 | ACTRESS = os.path.join(config.eg_facescrub_folder, 'facescrub_actresses.txt')
 28 | DOWNLOAD = os.path.join(config.eg_facescrub_folder, 'download')
 29 | 
 30 | 
 31 | def get_names(path):
 32 |     """get actor or actress names"""
 33 |     data = np.loadtxt(path, delimiter='\t', skiprows=1, dtype=str)
 34 |     return np.unique(data[:, 0])
 35 | 
 36 | 
 37 | def get_images(names, ratio, label, train_images, train_labels,
 38 |                test_images, test_labels):
 39 |     for name in names:
 40 |         folder = '_'.join(name.split())
 41 |         folder = os.path.join(DOWNLOAD, folder, 'face')
 42 | 
 43 |         faces = os.listdir(folder)
 44 |         split = int(len(faces) * ratio)
 45 |         for idx, face in enumerate(faces):
 46 |             face = os.path.join(folder, face)
 47 |             if idx < split:
 48 |                 train_images.append(face)
 49 |                 train_labels.append(label)
 50 |             else:
 51 |                 test_images.append(face)
 52 |                 test_labels.append(label)
 53 | 
 54 | 
 55 | def get_all_images():
 56 |     actors = get_names(ACTORS)
 57 |     actresses = get_names(ACTRESS)
 58 |     names = np.concatenate((actors, actresses))
 59 |     all_images = []
 60 |     for name in names:
 61 |         folder = '_'.join(name.split())
 62 |         folder = os.path.join(DOWNLOAD, folder, 'face')
 63 | 
 64 |         faces = os.listdir(folder)
 65 |         for face in faces:
 66 |             face = os.path.join(folder, face)
 67 |             all_images.append(face)
 68 | 
 69 |     return np.array(all_images)
 70 | 
 71 | 
 72 | def get_tr_te_images(ratio):
 73 |     """get training and test images for two classes"""
 74 |     train_images, train_labels = [], []
 75 |     test_images, test_labels = [], []
 76 | 
 77 |     actors = get_names(ACTORS)
 78 |     actresses = get_names(ACTRESS)
 79 | 
 80 |     get_images(actors, ratio, 1, train_images, train_labels, test_images,
 81 |                test_labels)
 82 |     get_images(actresses, ratio, 0, train_images, train_labels, test_images,
 83 |                test_labels)
 84 | 
 85 |     train_images = np.array(train_images)
 86 |     train_labels = np.array(train_labels)
 87 |     test_images = np.array(test_images)
 88 |     test_labels = np.array(test_labels)
 89 | 
 90 |     # shuffle
 91 |     train_idxs = np.arange(train_images.shape[0])
 92 |     test_idxs = np.arange(test_images.shape[0])
 93 |     np.random.shuffle(train_idxs)
 94 |     np.random.shuffle(test_idxs)
 95 | 
 96 |     return (train_images[train_idxs], train_labels[train_idxs],
 97 |             test_images[test_idxs], test_labels[test_idxs])
 98 | 
 99 | 
100 | def save_to_lmdb(images, labels, lmdb_file):
101 |     if not os.path.exists(lmdb_file):
102 |         batch_size = 256
103 |         lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
104 |         lmdb_txn = lmdb_env.begin(write=True)
105 |         item_id = 0
106 |         datum = caffe_pb2.Datum()
107 | 
108 |         for i in range(images.shape[0]):
109 |             im = cv2.imread(images[i])
110 |             if im is None:
111 |                 continue
112 |             im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH))
113 |             datum.channels = im.shape[2]
114 |             datum.height = im.shape[0]
115 |             datum.width = im.shape[1]
116 |             datum.data = im.tobytes()
117 |             datum.label = labels[i]
118 |             keystr = '{:0>8d}'.format(item_id)
119 |             lmdb_txn.put(keystr, datum.SerializeToString())
120 | 
121 |             # write batch
122 |             if (item_id + 1) % batch_size == 0:
123 |                 lmdb_txn.commit()
124 |                 lmdb_txn = lmdb_env.begin(write=True)
125 |                 print('converted {} images'.format(item_id + 1))
126 | 
127 |             item_id += 1
128 | 
129 |         # write last batch
130 |         if (item_id + 1) % batch_size != 0:
131 |             lmdb_txn.commit()
132 |             print('converted {} images'.format(item_id + 1))
133 |             print('Generated ' + lmdb_file)
134 |     else:
135 |         print(lmdb_file + ' already exists')
136 | 
137 | 
138 | def convert_facecrub_data(ratio=.8):
139 |     tr_images, tr_labels, te_images, te_labels = get_tr_te_images(ratio)
140 |     save_to_lmdb(tr_images, tr_labels, TRAIN_LMDB)
141 |     save_to_lmdb(te_images, te_labels, TEST_LMDB)
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     convert_facecrub_data()
146 | 


--------------------------------------------------------------------------------
/examples/facescrub/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   convolution_param {
 14 |     num_output: 96
 15 |     kernel_size: 11
 16 |     stride: 4
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "relu1"
 21 |   type: "ReLU"
 22 |   bottom: "conv1"
 23 |   top: "conv1"
 24 | }
 25 | layer {
 26 |   name: "pool1"
 27 |   type: "Pooling"
 28 |   bottom: "conv1"
 29 |   top: "pool1"
 30 |   pooling_param {
 31 |     pool: MAX
 32 |     kernel_size: 3
 33 |     stride: 2
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "pool1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0001
 44 |     beta: 0.75
 45 |   }
 46 | }
 47 | layer {
 48 |   name: "conv2"
 49 |   type: "Convolution"
 50 |   bottom: "norm1"
 51 |   top: "conv2"
 52 |   convolution_param {
 53 |     num_output: 256
 54 |     pad: 2
 55 |     kernel_size: 5
 56 |     group: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu2"
 61 |   type: "ReLU"
 62 |   bottom: "conv2"
 63 |   top: "conv2"
 64 | }
 65 | layer {
 66 |   name: "pool2"
 67 |   type: "Pooling"
 68 |   bottom: "conv2"
 69 |   top: "pool2"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 3
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "norm2"
 78 |   type: "LRN"
 79 |   bottom: "pool2"
 80 |   top: "norm2"
 81 |   lrn_param {
 82 |     local_size: 5
 83 |     alpha: 0.0001
 84 |     beta: 0.75
 85 |   }
 86 | }
 87 | layer {
 88 |   name: "conv3"
 89 |   type: "Convolution"
 90 |   bottom: "norm2"
 91 |   top: "conv3"
 92 |   convolution_param {
 93 |     num_output: 384
 94 |     pad: 1
 95 |     kernel_size: 3
 96 |   }
 97 | }
 98 | layer {
 99 |   name: "relu3"
100 |   type: "ReLU"
101 |   bottom: "conv3"
102 |   top: "conv3"
103 | }
104 | layer {
105 |   name: "conv4"
106 |   type: "Convolution"
107 |   bottom: "conv3"
108 |   top: "conv4"
109 |   convolution_param {
110 |     num_output: 384
111 |     pad: 1
112 |     kernel_size: 3
113 |     group: 2
114 |   }
115 | }
116 | layer {
117 |   name: "relu4"
118 |   type: "ReLU"
119 |   bottom: "conv4"
120 |   top: "conv4"
121 | }
122 | layer {
123 |   name: "conv5"
124 |   type: "Convolution"
125 |   bottom: "conv4"
126 |   top: "conv5"
127 |   convolution_param {
128 |     num_output: 256
129 |     pad: 1
130 |     kernel_size: 3
131 |     group: 2
132 |   }
133 | }
134 | layer {
135 |   name: "relu5"
136 |   type: "ReLU"
137 |   bottom: "conv5"
138 |   top: "conv5"
139 | }
140 | layer {
141 |   name: "pool5"
142 |   type: "Pooling"
143 |   bottom: "conv5"
144 |   top: "pool5"
145 |   pooling_param {
146 |     pool: MAX
147 |     kernel_size: 3
148 |     stride: 2
149 |   }
150 | }
151 | layer {
152 |   name: "fc6"
153 |   type: "InnerProduct"
154 |   bottom: "pool5"
155 |   top: "fc6"
156 |   inner_product_param {
157 |     num_output: 4096
158 |   }
159 | }
160 | layer {
161 |   name: "relu6"
162 |   type: "ReLU"
163 |   bottom: "fc6"
164 |   top: "fc6"
165 | }
166 | layer {
167 |   name: "drop6"
168 |   type: "Dropout"
169 |   bottom: "fc6"
170 |   top: "fc6"
171 |   dropout_param {
172 |     dropout_ratio: 0.5
173 |   }
174 | }
175 | layer {
176 |   name: "fc7"
177 |   type: "InnerProduct"
178 |   bottom: "fc6"
179 |   top: "fc7"
180 |   inner_product_param {
181 |     num_output: 4096
182 |   }
183 | }
184 | layer {
185 |   name: "relu7"
186 |   type: "ReLU"
187 |   bottom: "fc7"
188 |   top: "fc7"
189 | }
190 | layer {
191 |   name: "drop7"
192 |   type: "Dropout"
193 |   bottom: "fc7"
194 |   top: "fc7"
195 |   dropout_param {
196 |     dropout_ratio: 0.5
197 |   }
198 | }
199 | layer {
200 |   name: "fc8"
201 |   type: "InnerProduct"
202 |   bottom: "fc7"
203 |   top: "fc8"
204 |   inner_product_param {
205 |     num_output: 2
206 |   }
207 | }
208 | layer {
209 |   name: "prob"
210 |   type: "Softmax"
211 |   bottom: "fc8"
212 |   top: "prob"
213 | }
214 | 


--------------------------------------------------------------------------------
/examples/facescrub/deploy_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   convolution_param {
 14 |     num_output: 96
 15 |     kernel_size: 11
 16 |     stride: 4
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "relu1"
 21 |   type: "ReLU"
 22 |   bottom: "conv1"
 23 |   top: "conv1"
 24 | }
 25 | layer {
 26 |   name: "pool1"
 27 |   type: "Pooling"
 28 |   bottom: "conv1"
 29 |   top: "pool1"
 30 |   pooling_param {
 31 |     pool: MAX
 32 |     kernel_size: 3
 33 |     stride: 2
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "pool1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0001
 44 |     beta: 0.75
 45 |   }
 46 | }
 47 | layer {
 48 |   name: "conv2"
 49 |   type: "Convolution"
 50 |   bottom: "norm1"
 51 |   top: "conv2"
 52 |   convolution_param {
 53 |     num_output: 256
 54 |     pad: 2
 55 |     kernel_size: 5
 56 |     group: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu2"
 61 |   type: "ReLU"
 62 |   bottom: "conv2"
 63 |   top: "conv2"
 64 | }
 65 | layer {
 66 |   name: "pool2"
 67 |   type: "Pooling"
 68 |   bottom: "conv2"
 69 |   top: "pool2"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 3
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "norm2"
 78 |   type: "LRN"
 79 |   bottom: "pool2"
 80 |   top: "norm2"
 81 |   lrn_param {
 82 |     local_size: 5
 83 |     alpha: 0.0001
 84 |     beta: 0.75
 85 |   }
 86 | }
 87 | layer {
 88 |   name: "conv3"
 89 |   type: "Convolution"
 90 |   bottom: "norm2"
 91 |   top: "conv3"
 92 |   convolution_param {
 93 |     num_output: 384
 94 |     pad: 1
 95 |     kernel_size: 3
 96 |   }
 97 | }
 98 | layer {
 99 |   name: "relu3"
100 |   type: "ReLU"
101 |   bottom: "conv3"
102 |   top: "conv3"
103 | }
104 | layer {
105 |   name: "conv4"
106 |   type: "Convolution"
107 |   bottom: "conv3"
108 |   top: "conv4"
109 |   convolution_param {
110 |     num_output: 384
111 |     pad: 1
112 |     kernel_size: 3
113 |     group: 2
114 |   }
115 | }
116 | layer {
117 |   name: "relu4"
118 |   type: "ReLU"
119 |   bottom: "conv4"
120 |   top: "conv4"
121 | }
122 | layer {
123 |   name: "conv5"
124 |   type: "Convolution"
125 |   bottom: "conv4"
126 |   top: "conv5"
127 |   convolution_param {
128 |     num_output: 256
129 |     pad: 1
130 |     kernel_size: 3
131 |     group: 2
132 |   }
133 | }
134 | layer {
135 |   name: "relu5"
136 |   type: "ReLU"
137 |   bottom: "conv5"
138 |   top: "conv5"
139 | }
140 | layer {
141 |   name: "pool5"
142 |   type: "Pooling"
143 |   bottom: "conv5"
144 |   top: "pool5"
145 |   pooling_param {
146 |     pool: MAX
147 |     kernel_size: 3
148 |     stride: 2
149 |   }
150 | }
151 | layer {
152 |   name: "fc6"
153 |   type: "InnerProduct"
154 |   bottom: "pool5"
155 |   top: "fc6"
156 |   inner_product_param {
157 |     num_output: 4096
158 |   }
159 | }
160 | layer {
161 |   name: "relu6"
162 |   type: "ReLU"
163 |   bottom: "fc6"
164 |   top: "fc6"
165 | }
166 | layer {
167 |   name: "drop6"
168 |   type: "Dropout"
169 |   bottom: "fc6"
170 |   top: "fc6"
171 |   dropout_param {
172 |     dropout_ratio: 0.5
173 |   }
174 | }
175 | layer {
176 |   name: "fc7"
177 |   type: "InnerProduct"
178 |   bottom: "fc6"
179 |   top: "fc7"
180 |   inner_product_param {
181 |     num_output: 4096
182 |   }
183 | }
184 | layer {
185 |   name: "relu7"
186 |   type: "ReLU"
187 |   bottom: "fc7"
188 |   top: "fc7"
189 | }
190 | layer {
191 |   name: "drop7"
192 |   type: "Dropout"
193 |   bottom: "fc7"
194 |   top: "fc7"
195 |   dropout_param {
196 |     dropout_ratio: 0.5
197 |   }
198 | }
199 | layer {
200 |   name: "latent"
201 |   type: "InnerProduct"
202 |   bottom: "fc7"
203 |   top: "latent"
204 |   inner_product_param {
205 |     num_output: 48
206 |   }
207 | }
208 | layer {
209 |   name: "fc8_new"
210 |   type: "InnerProduct"
211 |   bottom: "latent"
212 |   top: "fc8_new"
213 |   inner_product_param {
214 |     num_output: 2
215 |   }
216 | }
217 | layer {
218 |   name: "prob"
219 |   type: "Softmax"
220 |   bottom: "fc8_new"
221 |   top: "prob"
222 | }
223 | 


--------------------------------------------------------------------------------
/examples/facescrub/generate_feature_mat.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Generate feature matrix
 3 | 
 4 | Prepare files for image retrieval:
 5 | image_files.npy
 6 | fc7_features.npy
 7 | latent_features.npy
 8 | '''
 9 | import os
10 | import sys
11 | import numpy as np
12 | 
13 | from convert_facescrub_data import get_all_images
14 | 
15 | sys.path.append('../..')
16 | import config
17 | from layer_features import layer_features
18 | from retrieve import binary_hash_codes
19 | 
20 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
21 | 
22 | 
23 | def generate_feature_matrix(model_file, deploy_file, imagemean_file):
24 |     """generate feature matrix of image dataset
25 |     save the matrix as npy file"""
26 |     image_files = get_all_images()
27 |     np.random.shuffle(image_files)
28 | 
29 |     # feed the network and get feature vectors
30 |     feature_mat = {'fc7': [], 'latent': []}
31 | 
32 |     batch = []
33 |     batch_size = 0
34 |     for image in image_files:
35 |         batch.append(image)
36 |         batch_size += 1
37 | 
38 |         if batch_size == 1000:
39 |             for layer, mat in layer_features(feature_mat.keys(), model_file,
40 |                                              deploy_file, imagemean_file,
41 |                                              batch):
42 |                 if layer == 'latent':
43 |                     mat = binary_hash_codes(mat)
44 | 
45 |                 feature_mat[layer].extend(mat)
46 | 
47 |             batch = []
48 |             batch_size = 0
49 | 
50 |     if batch_size > 0:
51 |         for layer, mat in layer_features(feature_mat.keys(), model_file,
52 |                                          deploy_file, imagemean_file, batch):
53 |             if layer == 'latent':
54 |                 mat = binary_hash_codes(mat)
55 | 
56 |             feature_mat[layer].extend(mat)
57 | 
58 |     # save to npy files
59 |     np.save(os.path.join(SCRIPT_PATH, 'image_files.npy'), image_files)
60 |     for layer in feature_mat.keys():
61 |         npy_file = os.path.join(SCRIPT_PATH, layer + '_features.npy')
62 |         np.save(npy_file, np.array(feature_mat[layer]))
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     if len(sys.argv) != 4:
67 |         usage = 'Usage: python generate_feature_mat.py' + \
68 |                 ' model_file deploy_file imagemean_file'
69 |         print(usage)
70 |     else:
71 |         model_file = sys.argv[1]
72 |         deploy_file = sys.argv[2]
73 |         imagemean_file = sys.argv[3]
74 | 
75 |         is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\
76 |             and os.path.exists(imagemean_file)
77 | 
78 |         if is_exists:
79 |             generate_feature_matrix(model_file, deploy_file, imagemean_file)
80 | 


--------------------------------------------------------------------------------
/examples/facescrub/retrieve.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # init env
 4 | BASEDIR=$(dirname "$0")
 5 | PROJROOT=$BASEDIR/../..
 6 | MODEL_FILE="$BASEDIR/facescrub_model_with_latent_layer_iter_10000.caffemodel"
 7 | DEPLOY_FILE="$BASEDIR/deploy_with_latent_layer.prototxt"
 8 | MEAN_FILE="$BASEDIR/facescrub_mean.npy"
 9 | IMAGE_NPY="$BASEDIR/image_files.npy"
10 | FC7_NPY="$BASEDIR/fc7_features.npy"
11 | LATENT_NPY="$BASEDIR/latent_features.npy"
12 | TARGET="$1"
13 | 
14 | # check model
15 | if [ ! -e $MODEL_FILE ] || [ ! -e $DEPLOY_FILE ] || [ ! -e $MEAN_FILE ]; then
16 |   echo "Please train the model at first"
17 |   echo "./train.sh"
18 |   exit
19 | fi
20 | 
21 | # parse parsemeters
22 | if [[ $TARGET == "-h" ]] || [[ $TARGET == "--help" ]]; then
23 |   echo "Usage: ./retrieve.sh image_to_retrieve.jpg"
24 |   exit
25 | fi
26 | 
27 | if [ -e $IMAGE_NPY ] && [ -e $FC7_NPY ] && [ -e $LATENT_NPY ]; then
28 |   python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET
29 | else
30 |   echo "generate feature matrix..."
31 |   python $BASEDIR/generate_feature_mat.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE
32 |   python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET
33 | fi
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/facescrub/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 5000
 8 | display: 20
 9 | max_iter: 10000
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: 2000
13 | snapshot_prefix: "facescrub_model"
14 | solver_mode: GPU
15 | 


--------------------------------------------------------------------------------
/examples/facescrub/solver_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val_with_latent_layer.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 5000
 8 | display: 20
 9 | max_iter: 10000
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: 2000
13 | snapshot_prefix: "facescrub_model_with_latent_layer"
14 | solver_mode: GPU
15 | 


--------------------------------------------------------------------------------
/examples/facescrub/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check caffe
 4 | if [[ $CAFFEROOT == '' ]]; then
 5 |   echo "\$CAFFEROOT not found, please define it at first."
 6 |   echo "export CAFFEROOT=/path/to/your/caffe"
 7 |   exit
 8 | fi
 9 | 
10 | # init env
11 | TOOLS=$CAFFEROOT/build/tools
12 | BASEDIR=$(dirname "$0")
13 | PROJROOT=$BASEDIR/../../
14 | CURRDIR=$(pwd)
15 | 
16 | # check dataset
17 | if [ ! -d $BASEDIR/facescrub_train_lmdb ] || [ ! -d $BASEDIR/facescrub_test_lmdb ] ; then
18 |   echo "The facescrub dataset folder cannot found"
19 |   echo "Please execute convert_facescrub_data.py at first"
20 |   exit
21 | fi
22 | 
23 | # compute mean
24 | $TOOLS/compute_image_mean $BASEDIR/facescrub_train_lmdb $BASEDIR/facescrub_mean.binaryproto
25 | python $PROJROOT/tools/convert_protomean.py $BASEDIR/facescrub_mean.binaryproto $BASEDIR/facescrub_mean.npy
26 | 
27 | cd $BASEDIR
28 | 
29 | # pretrain
30 | $TOOLS/caffe train --solver=solver.prototxt 2>&1 | tee pretrain.log
31 | 
32 | # train net with latent layer
33 | $TOOLS/caffe train \
34 |   --solver=solver_with_latent_layer.prototxt \
35 |   --weights=facescrub_model_iter_10000.caffemodel 2>&1 | tee train.log
36 | 
37 | cd $CURRDIR
38 | 


--------------------------------------------------------------------------------
/examples/facescrub/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: true
 12 |     crop_size: 227
 13 |     mean_file: "facescrub_mean.binaryproto"
 14 |   }
 15 | # mean pixel / channel-wise mean instead of mean image
 16 | #  transform_param {
 17 | #    crop_size: 227
 18 | #    mean_value: 104
 19 | #    mean_value: 117
 20 | #    mean_value: 123
 21 | #    mirror: true
 22 | #  }
 23 |   data_param {
 24 |     source: "facescrub_train_lmdb"
 25 |     batch_size: 256
 26 |     backend: LMDB
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "data"
 31 |   type: "Data"
 32 |   top: "data"
 33 |   top: "label"
 34 |   include {
 35 |     phase: TEST
 36 |   }
 37 |   transform_param {
 38 |     mirror: false
 39 |     crop_size: 227
 40 |     mean_file: "facescrub_mean.binaryproto"
 41 |   }
 42 | # mean pixel / channel-wise mean instead of mean image
 43 | #  transform_param {
 44 | #    crop_size: 227
 45 | #    mean_value: 104
 46 | #    mean_value: 117
 47 | #    mean_value: 123
 48 | #    mirror: false
 49 | #  }
 50 |   data_param {
 51 |     source: "facescrub_test_lmdb"
 52 |     batch_size: 50
 53 |     backend: LMDB
 54 |   }
 55 | }
 56 | layer {
 57 |   name: "conv1"
 58 |   type: "Convolution"
 59 |   bottom: "data"
 60 |   top: "conv1"
 61 |   param {
 62 |     lr_mult: 1
 63 |     decay_mult: 1
 64 |   }
 65 |   param {
 66 |     lr_mult: 2
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 96
 71 |     kernel_size: 11
 72 |     stride: 4
 73 |     weight_filler {
 74 |       type: "gaussian"
 75 |       std: 0.01
 76 |     }
 77 |     bias_filler {
 78 |       type: "constant"
 79 |       value: 0
 80 |     }
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu1"
 85 |   type: "ReLU"
 86 |   bottom: "conv1"
 87 |   top: "conv1"
 88 | }
 89 | layer {
 90 |   name: "pool1"
 91 |   type: "Pooling"
 92 |   bottom: "conv1"
 93 |   top: "pool1"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "norm1"
102 |   type: "LRN"
103 |   bottom: "pool1"
104 |   top: "norm1"
105 |   lrn_param {
106 |     local_size: 5
107 |     alpha: 0.0001
108 |     beta: 0.75
109 |   }
110 | }
111 | layer {
112 |   name: "conv2"
113 |   type: "Convolution"
114 |   bottom: "norm1"
115 |   top: "conv2"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 256
126 |     pad: 2
127 |     kernel_size: 5
128 |     group: 2
129 |     weight_filler {
130 |       type: "gaussian"
131 |       std: 0.01
132 |     }
133 |     bias_filler {
134 |       type: "constant"
135 |       value: 1
136 |     }
137 |   }
138 | }
139 | layer {
140 |   name: "relu2"
141 |   type: "ReLU"
142 |   bottom: "conv2"
143 |   top: "conv2"
144 | }
145 | layer {
146 |   name: "pool2"
147 |   type: "Pooling"
148 |   bottom: "conv2"
149 |   top: "pool2"
150 |   pooling_param {
151 |     pool: MAX
152 |     kernel_size: 3
153 |     stride: 2
154 |   }
155 | }
156 | layer {
157 |   name: "norm2"
158 |   type: "LRN"
159 |   bottom: "pool2"
160 |   top: "norm2"
161 |   lrn_param {
162 |     local_size: 5
163 |     alpha: 0.0001
164 |     beta: 0.75
165 |   }
166 | }
167 | layer {
168 |   name: "conv3"
169 |   type: "Convolution"
170 |   bottom: "norm2"
171 |   top: "conv3"
172 |   param {
173 |     lr_mult: 1
174 |     decay_mult: 1
175 |   }
176 |   param {
177 |     lr_mult: 2
178 |     decay_mult: 0
179 |   }
180 |   convolution_param {
181 |     num_output: 384
182 |     pad: 1
183 |     kernel_size: 3
184 |     weight_filler {
185 |       type: "gaussian"
186 |       std: 0.01
187 |     }
188 |     bias_filler {
189 |       type: "constant"
190 |       value: 0
191 |     }
192 |   }
193 | }
194 | layer {
195 |   name: "relu3"
196 |   type: "ReLU"
197 |   bottom: "conv3"
198 |   top: "conv3"
199 | }
200 | layer {
201 |   name: "conv4"
202 |   type: "Convolution"
203 |   bottom: "conv3"
204 |   top: "conv4"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 2
211 |     decay_mult: 0
212 |   }
213 |   convolution_param {
214 |     num_output: 384
215 |     pad: 1
216 |     kernel_size: 3
217 |     group: 2
218 |     weight_filler {
219 |       type: "gaussian"
220 |       std: 0.01
221 |     }
222 |     bias_filler {
223 |       type: "constant"
224 |       value: 1
225 |     }
226 |   }
227 | }
228 | layer {
229 |   name: "relu4"
230 |   type: "ReLU"
231 |   bottom: "conv4"
232 |   top: "conv4"
233 | }
234 | layer {
235 |   name: "conv5"
236 |   type: "Convolution"
237 |   bottom: "conv4"
238 |   top: "conv5"
239 |   param {
240 |     lr_mult: 1
241 |     decay_mult: 1
242 |   }
243 |   param {
244 |     lr_mult: 2
245 |     decay_mult: 0
246 |   }
247 |   convolution_param {
248 |     num_output: 256
249 |     pad: 1
250 |     kernel_size: 3
251 |     group: 2
252 |     weight_filler {
253 |       type: "gaussian"
254 |       std: 0.01
255 |     }
256 |     bias_filler {
257 |       type: "constant"
258 |       value: 1
259 |     }
260 |   }
261 | }
262 | layer {
263 |   name: "relu5"
264 |   type: "ReLU"
265 |   bottom: "conv5"
266 |   top: "conv5"
267 | }
268 | layer {
269 |   name: "pool5"
270 |   type: "Pooling"
271 |   bottom: "conv5"
272 |   top: "pool5"
273 |   pooling_param {
274 |     pool: MAX
275 |     kernel_size: 3
276 |     stride: 2
277 |   }
278 | }
279 | layer {
280 |   name: "fc6"
281 |   type: "InnerProduct"
282 |   bottom: "pool5"
283 |   top: "fc6"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   inner_product_param {
293 |     num_output: 4096
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.005
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 1
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu6"
306 |   type: "ReLU"
307 |   bottom: "fc6"
308 |   top: "fc6"
309 | }
310 | layer {
311 |   name: "drop6"
312 |   type: "Dropout"
313 |   bottom: "fc6"
314 |   top: "fc6"
315 |   dropout_param {
316 |     dropout_ratio: 0.5
317 |   }
318 | }
319 | layer {
320 |   name: "fc7"
321 |   type: "InnerProduct"
322 |   bottom: "fc6"
323 |   top: "fc7"
324 |   param {
325 |     lr_mult: 1
326 |     decay_mult: 1
327 |   }
328 |   param {
329 |     lr_mult: 2
330 |     decay_mult: 0
331 |   }
332 |   inner_product_param {
333 |     num_output: 4096
334 |     weight_filler {
335 |       type: "gaussian"
336 |       std: 0.005
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |       value: 1
341 |     }
342 |   }
343 | }
344 | layer {
345 |   name: "relu7"
346 |   type: "ReLU"
347 |   bottom: "fc7"
348 |   top: "fc7"
349 | }
350 | layer {
351 |   name: "drop7"
352 |   type: "Dropout"
353 |   bottom: "fc7"
354 |   top: "fc7"
355 |   dropout_param {
356 |     dropout_ratio: 0.5
357 |   }
358 | }
359 | layer {
360 |   name: "fc8"
361 |   type: "InnerProduct"
362 |   bottom: "fc7"
363 |   top: "fc8"
364 |   param {
365 |     lr_mult: 1
366 |     decay_mult: 1
367 |   }
368 |   param {
369 |     lr_mult: 2
370 |     decay_mult: 0
371 |   }
372 |   inner_product_param {
373 |     num_output: 2
374 |     weight_filler {
375 |       type: "gaussian"
376 |       std: 0.01
377 |     }
378 |     bias_filler {
379 |       type: "constant"
380 |       value: 0
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "accuracy"
386 |   type: "Accuracy"
387 |   bottom: "fc8"
388 |   bottom: "label"
389 |   top: "accuracy"
390 |   include {
391 |     phase: TEST
392 |   }
393 | }
394 | layer {
395 |   name: "loss"
396 |   type: "SoftmaxWithLoss"
397 |   bottom: "fc8"
398 |   bottom: "label"
399 |   top: "loss"
400 | }
401 | 


--------------------------------------------------------------------------------
/examples/facescrub/train_val_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: true
 12 |     crop_size: 227
 13 |     mean_file: "facescrub_mean.binaryproto"
 14 |   }
 15 | # mean pixel / channel-wise mean instead of mean image
 16 | #  transform_param {
 17 | #    crop_size: 227
 18 | #    mean_value: 104
 19 | #    mean_value: 117
 20 | #    mean_value: 123
 21 | #    mirror: true
 22 | #  }
 23 |   data_param {
 24 |     source: "facescrub_train_lmdb"
 25 |     batch_size: 256
 26 |     backend: LMDB
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "data"
 31 |   type: "Data"
 32 |   top: "data"
 33 |   top: "label"
 34 |   include {
 35 |     phase: TEST
 36 |   }
 37 |   transform_param {
 38 |     mirror: false
 39 |     crop_size: 227
 40 |     mean_file: "facescrub_mean.binaryproto"
 41 |   }
 42 | # mean pixel / channel-wise mean instead of mean image
 43 | #  transform_param {
 44 | #    crop_size: 227
 45 | #    mean_value: 104
 46 | #    mean_value: 117
 47 | #    mean_value: 123
 48 | #    mirror: false
 49 | #  }
 50 |   data_param {
 51 |     source: "facescrub_test_lmdb"
 52 |     batch_size: 50
 53 |     backend: LMDB
 54 |   }
 55 | }
 56 | layer {
 57 |   name: "conv1"
 58 |   type: "Convolution"
 59 |   bottom: "data"
 60 |   top: "conv1"
 61 |   param {
 62 |     lr_mult: 1
 63 |     decay_mult: 1
 64 |   }
 65 |   param {
 66 |     lr_mult: 2
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 96
 71 |     kernel_size: 11
 72 |     stride: 4
 73 |     weight_filler {
 74 |       type: "gaussian"
 75 |       std: 0.01
 76 |     }
 77 |     bias_filler {
 78 |       type: "constant"
 79 |       value: 0
 80 |     }
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu1"
 85 |   type: "ReLU"
 86 |   bottom: "conv1"
 87 |   top: "conv1"
 88 | }
 89 | layer {
 90 |   name: "pool1"
 91 |   type: "Pooling"
 92 |   bottom: "conv1"
 93 |   top: "pool1"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "norm1"
102 |   type: "LRN"
103 |   bottom: "pool1"
104 |   top: "norm1"
105 |   lrn_param {
106 |     local_size: 5
107 |     alpha: 0.0001
108 |     beta: 0.75
109 |   }
110 | }
111 | layer {
112 |   name: "conv2"
113 |   type: "Convolution"
114 |   bottom: "norm1"
115 |   top: "conv2"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 256
126 |     pad: 2
127 |     kernel_size: 5
128 |     group: 2
129 |     weight_filler {
130 |       type: "gaussian"
131 |       std: 0.01
132 |     }
133 |     bias_filler {
134 |       type: "constant"
135 |       value: 1
136 |     }
137 |   }
138 | }
139 | layer {
140 |   name: "relu2"
141 |   type: "ReLU"
142 |   bottom: "conv2"
143 |   top: "conv2"
144 | }
145 | layer {
146 |   name: "pool2"
147 |   type: "Pooling"
148 |   bottom: "conv2"
149 |   top: "pool2"
150 |   pooling_param {
151 |     pool: MAX
152 |     kernel_size: 3
153 |     stride: 2
154 |   }
155 | }
156 | layer {
157 |   name: "norm2"
158 |   type: "LRN"
159 |   bottom: "pool2"
160 |   top: "norm2"
161 |   lrn_param {
162 |     local_size: 5
163 |     alpha: 0.0001
164 |     beta: 0.75
165 |   }
166 | }
167 | layer {
168 |   name: "conv3"
169 |   type: "Convolution"
170 |   bottom: "norm2"
171 |   top: "conv3"
172 |   param {
173 |     lr_mult: 1
174 |     decay_mult: 1
175 |   }
176 |   param {
177 |     lr_mult: 2
178 |     decay_mult: 0
179 |   }
180 |   convolution_param {
181 |     num_output: 384
182 |     pad: 1
183 |     kernel_size: 3
184 |     weight_filler {
185 |       type: "gaussian"
186 |       std: 0.01
187 |     }
188 |     bias_filler {
189 |       type: "constant"
190 |       value: 0
191 |     }
192 |   }
193 | }
194 | layer {
195 |   name: "relu3"
196 |   type: "ReLU"
197 |   bottom: "conv3"
198 |   top: "conv3"
199 | }
200 | layer {
201 |   name: "conv4"
202 |   type: "Convolution"
203 |   bottom: "conv3"
204 |   top: "conv4"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 2
211 |     decay_mult: 0
212 |   }
213 |   convolution_param {
214 |     num_output: 384
215 |     pad: 1
216 |     kernel_size: 3
217 |     group: 2
218 |     weight_filler {
219 |       type: "gaussian"
220 |       std: 0.01
221 |     }
222 |     bias_filler {
223 |       type: "constant"
224 |       value: 1
225 |     }
226 |   }
227 | }
228 | layer {
229 |   name: "relu4"
230 |   type: "ReLU"
231 |   bottom: "conv4"
232 |   top: "conv4"
233 | }
234 | layer {
235 |   name: "conv5"
236 |   type: "Convolution"
237 |   bottom: "conv4"
238 |   top: "conv5"
239 |   param {
240 |     lr_mult: 1
241 |     decay_mult: 1
242 |   }
243 |   param {
244 |     lr_mult: 2
245 |     decay_mult: 0
246 |   }
247 |   convolution_param {
248 |     num_output: 256
249 |     pad: 1
250 |     kernel_size: 3
251 |     group: 2
252 |     weight_filler {
253 |       type: "gaussian"
254 |       std: 0.01
255 |     }
256 |     bias_filler {
257 |       type: "constant"
258 |       value: 1
259 |     }
260 |   }
261 | }
262 | layer {
263 |   name: "relu5"
264 |   type: "ReLU"
265 |   bottom: "conv5"
266 |   top: "conv5"
267 | }
268 | layer {
269 |   name: "pool5"
270 |   type: "Pooling"
271 |   bottom: "conv5"
272 |   top: "pool5"
273 |   pooling_param {
274 |     pool: MAX
275 |     kernel_size: 3
276 |     stride: 2
277 |   }
278 | }
279 | layer {
280 |   name: "fc6"
281 |   type: "InnerProduct"
282 |   bottom: "pool5"
283 |   top: "fc6"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   inner_product_param {
293 |     num_output: 4096
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.005
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 1
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu6"
306 |   type: "ReLU"
307 |   bottom: "fc6"
308 |   top: "fc6"
309 | }
310 | layer {
311 |   name: "drop6"
312 |   type: "Dropout"
313 |   bottom: "fc6"
314 |   top: "fc6"
315 |   dropout_param {
316 |     dropout_ratio: 0.5
317 |   }
318 | }
319 | layer {
320 |   name: "fc7"
321 |   type: "InnerProduct"
322 |   bottom: "fc6"
323 |   top: "fc7"
324 |   param {
325 |     lr_mult: 1
326 |     decay_mult: 1
327 |   }
328 |   param {
329 |     lr_mult: 2
330 |     decay_mult: 0
331 |   }
332 |   inner_product_param {
333 |     num_output: 4096
334 |     weight_filler {
335 |       type: "gaussian"
336 |       std: 0.005
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |       value: 1
341 |     }
342 |   }
343 | }
344 | layer {
345 |   name: "relu7"
346 |   type: "ReLU"
347 |   bottom: "fc7"
348 |   top: "fc7"
349 | }
350 | layer {
351 |   name: "drop7"
352 |   type: "Dropout"
353 |   bottom: "fc7"
354 |   top: "fc7"
355 |   dropout_param {
356 |     dropout_ratio: 0.5
357 |   }
358 | }
359 | layer {
360 |   name: "latent"
361 |   type: "InnerProduct"
362 |   bottom: "fc7"
363 |   top: "latent"
364 |   param {
365 |     lr_mult: 1
366 |     decay_mult: 1
367 |   }
368 |   param {
369 |     lr_mult: 2
370 |     decay_mult: 0
371 |   }
372 |   inner_product_param {
373 |     num_output: 48
374 |     weight_filler {
375 |       type: "gaussian"
376 |       std: 0.01
377 |     }
378 |     bias_filler {
379 |       type: "constant"
380 |       value: 0
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "fc8_new"
386 |   type: "InnerProduct"
387 |   bottom: "latent"
388 |   top: "fc8_new"
389 |   param {
390 |     lr_mult: 1
391 |     decay_mult: 1
392 |   }
393 |   param {
394 |     lr_mult: 2
395 |     decay_mult: 0
396 |   }
397 |   inner_product_param {
398 |     num_output: 2
399 |     weight_filler {
400 |       type: "gaussian"
401 |       std: 0.01
402 |     }
403 |     bias_filler {
404 |       type: "constant"
405 |       value: 0
406 |     }
407 |   }
408 | }
409 | layer {
410 |   name: "accuracy"
411 |   type: "Accuracy"
412 |   bottom: "fc8_new"
413 |   bottom: "label"
414 |   top: "accuracy"
415 |   include {
416 |     phase: TEST
417 |   }
418 | }
419 | layer {
420 |   name: "loss"
421 |   type: "SoftmaxWithLoss"
422 |   bottom: "fc8_new"
423 |   bottom: "label"
424 |   top: "loss"
425 | }
426 | 


--------------------------------------------------------------------------------
/examples/shoes7k/convert_shoes7k_data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Convert shoes7k dataset to train/test lmdb dataset
  3 | '''
  4 | import os
  5 | import cv2
  6 | import lmdb
  7 | import numpy as np
  8 | from caffe.proto import caffe_pb2
  9 | 
 10 | import config
 11 | 
 12 | 
 13 | IM_HEIGHT = 227
 14 | IM_WIDTH = 227
 15 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
 16 | TRAIN_LMDB = os.path.join(SCRIPT_PATH, 'shoes7k_train_lmdb')
 17 | TEST_LMDB = os.path.join(SCRIPT_PATH, 'shoes7k_test_lmdb')
 18 | 
 19 | 
 20 | def get_images(path):
 21 |     """get images under path into a numpy array"""
 22 |     image_types = ['.jpg', '.png']
 23 |     images = [os.path.join(path, i) for i in os.listdir(path) if i[-4:]
 24 |               in image_types]
 25 |     return np.array(images)
 26 | 
 27 | 
 28 | def get_tr_te_images(ratio):
 29 |     """get training and test images"""
 30 |     pos_images = get_images(config.eg_shoes7k_pos_path)
 31 |     neg_images = get_images(config.eg_shoes7k_neg_path)
 32 |     np.random.shuffle(pos_images)
 33 |     np.random.shuffle(neg_images)
 34 | 
 35 |     pos_split = int(pos_images.shape[0] * ratio)
 36 |     pos_train_images = pos_images[:pos_split]
 37 |     pos_test_images = pos_images[pos_split:]
 38 |     pos_train_labels = np.ones(pos_train_images.shape[0]).astype('int')
 39 |     pos_test_labels = np.ones(pos_test_images.shape[0]).astype('int')
 40 | 
 41 |     neg_split = int(neg_images.shape[0] * ratio)
 42 |     neg_train_images = neg_images[:neg_split]
 43 |     neg_test_images = neg_images[neg_split:]
 44 |     neg_train_labels = np.zeros(neg_train_images.shape[0]).astype('int')
 45 |     neg_test_labels = np.zeros(neg_test_images.shape[0]).astype('int')
 46 | 
 47 |     train_images = np.concatenate((pos_train_images, neg_train_images))
 48 |     train_labels = np.concatenate((pos_train_labels, neg_train_labels))
 49 |     test_images = np.concatenate((pos_test_images, neg_test_images))
 50 |     test_labels = np.concatenate((pos_test_labels, neg_test_labels))
 51 | 
 52 |     # shuffle
 53 |     train_idxs = np.arange(train_images.shape[0])
 54 |     test_idxs = np.arange(test_images.shape[0])
 55 |     np.random.shuffle(train_idxs)
 56 |     np.random.shuffle(test_idxs)
 57 |     return (train_images[train_idxs], train_labels[train_idxs],
 58 |             test_images[test_idxs], test_labels[test_idxs])
 59 | 
 60 | 
 61 | def save_to_lmdb(images, labels, lmdb_file):
 62 |     if not os.path.exists(lmdb_file):
 63 |         batch_size = 256
 64 |         lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
 65 |         lmdb_txn = lmdb_env.begin(write=True)
 66 |         item_id = 0
 67 |         datum = caffe_pb2.Datum()
 68 | 
 69 |         for i in range(images.shape[0]):
 70 |             im = cv2.imread(images[i])
 71 |             im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH))
 72 |             datum.channels = im.shape[2]
 73 |             datum.height = im.shape[0]
 74 |             datum.width = im.shape[1]
 75 |             datum.data = im.tobytes()
 76 |             datum.label = labels[i]
 77 |             keystr = '{:0>8d}'.format(item_id)
 78 |             lmdb_txn.put(keystr, datum.SerializeToString())
 79 | 
 80 |             # write batch
 81 |             if (item_id + 1) % batch_size == 0:
 82 |                 lmdb_txn.commit()
 83 |                 lmdb_txn = lmdb_env.begin(write=True)
 84 |                 print('converted {} images'.format(item_id + 1))
 85 | 
 86 |             item_id += 1
 87 | 
 88 |         # write last batch
 89 |         if (item_id + 1) % batch_size != 0:
 90 |             lmdb_txn.commit()
 91 |             print('converted {} images'.format(item_id + 1))
 92 |             print('Generated ' + lmdb_file)
 93 |     else:
 94 |         print(lmdb_file + ' already exists')
 95 | 
 96 | 
 97 | def convert_shoes7k_data(ratio=.8):
 98 |     """read shoes7k pos and neg images and convert to lmdb"""
 99 |     tr_images, tr_labels, te_images, te_labels = get_tr_te_images(ratio)
100 |     save_to_lmdb(tr_images, tr_labels, TRAIN_LMDB)
101 |     save_to_lmdb(te_images, te_labels, TEST_LMDB)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     convert_shoes7k_data()
106 | 


--------------------------------------------------------------------------------
/examples/shoes7k/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   convolution_param {
 14 |     num_output: 96
 15 |     kernel_size: 11
 16 |     stride: 4
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "relu1"
 21 |   type: "ReLU"
 22 |   bottom: "conv1"
 23 |   top: "conv1"
 24 | }
 25 | layer {
 26 |   name: "pool1"
 27 |   type: "Pooling"
 28 |   bottom: "conv1"
 29 |   top: "pool1"
 30 |   pooling_param {
 31 |     pool: MAX
 32 |     kernel_size: 3
 33 |     stride: 2
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "pool1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0001
 44 |     beta: 0.75
 45 |   }
 46 | }
 47 | layer {
 48 |   name: "conv2"
 49 |   type: "Convolution"
 50 |   bottom: "norm1"
 51 |   top: "conv2"
 52 |   convolution_param {
 53 |     num_output: 256
 54 |     pad: 2
 55 |     kernel_size: 5
 56 |     group: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu2"
 61 |   type: "ReLU"
 62 |   bottom: "conv2"
 63 |   top: "conv2"
 64 | }
 65 | layer {
 66 |   name: "pool2"
 67 |   type: "Pooling"
 68 |   bottom: "conv2"
 69 |   top: "pool2"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 3
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "norm2"
 78 |   type: "LRN"
 79 |   bottom: "pool2"
 80 |   top: "norm2"
 81 |   lrn_param {
 82 |     local_size: 5
 83 |     alpha: 0.0001
 84 |     beta: 0.75
 85 |   }
 86 | }
 87 | layer {
 88 |   name: "conv3"
 89 |   type: "Convolution"
 90 |   bottom: "norm2"
 91 |   top: "conv3"
 92 |   convolution_param {
 93 |     num_output: 384
 94 |     pad: 1
 95 |     kernel_size: 3
 96 |   }
 97 | }
 98 | layer {
 99 |   name: "relu3"
100 |   type: "ReLU"
101 |   bottom: "conv3"
102 |   top: "conv3"
103 | }
104 | layer {
105 |   name: "conv4"
106 |   type: "Convolution"
107 |   bottom: "conv3"
108 |   top: "conv4"
109 |   convolution_param {
110 |     num_output: 384
111 |     pad: 1
112 |     kernel_size: 3
113 |     group: 2
114 |   }
115 | }
116 | layer {
117 |   name: "relu4"
118 |   type: "ReLU"
119 |   bottom: "conv4"
120 |   top: "conv4"
121 | }
122 | layer {
123 |   name: "conv5"
124 |   type: "Convolution"
125 |   bottom: "conv4"
126 |   top: "conv5"
127 |   convolution_param {
128 |     num_output: 256
129 |     pad: 1
130 |     kernel_size: 3
131 |     group: 2
132 |   }
133 | }
134 | layer {
135 |   name: "relu5"
136 |   type: "ReLU"
137 |   bottom: "conv5"
138 |   top: "conv5"
139 | }
140 | layer {
141 |   name: "pool5"
142 |   type: "Pooling"
143 |   bottom: "conv5"
144 |   top: "pool5"
145 |   pooling_param {
146 |     pool: MAX
147 |     kernel_size: 3
148 |     stride: 2
149 |   }
150 | }
151 | layer {
152 |   name: "fc6"
153 |   type: "InnerProduct"
154 |   bottom: "pool5"
155 |   top: "fc6"
156 |   inner_product_param {
157 |     num_output: 4096
158 |   }
159 | }
160 | layer {
161 |   name: "relu6"
162 |   type: "ReLU"
163 |   bottom: "fc6"
164 |   top: "fc6"
165 | }
166 | layer {
167 |   name: "drop6"
168 |   type: "Dropout"
169 |   bottom: "fc6"
170 |   top: "fc6"
171 |   dropout_param {
172 |     dropout_ratio: 0.5
173 |   }
174 | }
175 | layer {
176 |   name: "fc7"
177 |   type: "InnerProduct"
178 |   bottom: "fc6"
179 |   top: "fc7"
180 |   inner_product_param {
181 |     num_output: 4096
182 |   }
183 | }
184 | layer {
185 |   name: "relu7"
186 |   type: "ReLU"
187 |   bottom: "fc7"
188 |   top: "fc7"
189 | }
190 | layer {
191 |   name: "drop7"
192 |   type: "Dropout"
193 |   bottom: "fc7"
194 |   top: "fc7"
195 |   dropout_param {
196 |     dropout_ratio: 0.5
197 |   }
198 | }
199 | layer {
200 |   name: "fc8"
201 |   type: "InnerProduct"
202 |   bottom: "fc7"
203 |   top: "fc8"
204 |   inner_product_param {
205 |     num_output: 2
206 |   }
207 | }
208 | layer {
209 |   name: "prob"
210 |   type: "Softmax"
211 |   bottom: "fc8"
212 |   top: "prob"
213 | }
214 | 


--------------------------------------------------------------------------------
/examples/shoes7k/deploy_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }
  7 | }
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   convolution_param {
 14 |     num_output: 96
 15 |     kernel_size: 11
 16 |     stride: 4
 17 |   }
 18 | }
 19 | layer {
 20 |   name: "relu1"
 21 |   type: "ReLU"
 22 |   bottom: "conv1"
 23 |   top: "conv1"
 24 | }
 25 | layer {
 26 |   name: "pool1"
 27 |   type: "Pooling"
 28 |   bottom: "conv1"
 29 |   top: "pool1"
 30 |   pooling_param {
 31 |     pool: MAX
 32 |     kernel_size: 3
 33 |     stride: 2
 34 |   }
 35 | }
 36 | layer {
 37 |   name: "norm1"
 38 |   type: "LRN"
 39 |   bottom: "pool1"
 40 |   top: "norm1"
 41 |   lrn_param {
 42 |     local_size: 5
 43 |     alpha: 0.0001
 44 |     beta: 0.75
 45 |   }
 46 | }
 47 | layer {
 48 |   name: "conv2"
 49 |   type: "Convolution"
 50 |   bottom: "norm1"
 51 |   top: "conv2"
 52 |   convolution_param {
 53 |     num_output: 256
 54 |     pad: 2
 55 |     kernel_size: 5
 56 |     group: 2
 57 |   }
 58 | }
 59 | layer {
 60 |   name: "relu2"
 61 |   type: "ReLU"
 62 |   bottom: "conv2"
 63 |   top: "conv2"
 64 | }
 65 | layer {
 66 |   name: "pool2"
 67 |   type: "Pooling"
 68 |   bottom: "conv2"
 69 |   top: "pool2"
 70 |   pooling_param {
 71 |     pool: MAX
 72 |     kernel_size: 3
 73 |     stride: 2
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "norm2"
 78 |   type: "LRN"
 79 |   bottom: "pool2"
 80 |   top: "norm2"
 81 |   lrn_param {
 82 |     local_size: 5
 83 |     alpha: 0.0001
 84 |     beta: 0.75
 85 |   }
 86 | }
 87 | layer {
 88 |   name: "conv3"
 89 |   type: "Convolution"
 90 |   bottom: "norm2"
 91 |   top: "conv3"
 92 |   convolution_param {
 93 |     num_output: 384
 94 |     pad: 1
 95 |     kernel_size: 3
 96 |   }
 97 | }
 98 | layer {
 99 |   name: "relu3"
100 |   type: "ReLU"
101 |   bottom: "conv3"
102 |   top: "conv3"
103 | }
104 | layer {
105 |   name: "conv4"
106 |   type: "Convolution"
107 |   bottom: "conv3"
108 |   top: "conv4"
109 |   convolution_param {
110 |     num_output: 384
111 |     pad: 1
112 |     kernel_size: 3
113 |     group: 2
114 |   }
115 | }
116 | layer {
117 |   name: "relu4"
118 |   type: "ReLU"
119 |   bottom: "conv4"
120 |   top: "conv4"
121 | }
122 | layer {
123 |   name: "conv5"
124 |   type: "Convolution"
125 |   bottom: "conv4"
126 |   top: "conv5"
127 |   convolution_param {
128 |     num_output: 256
129 |     pad: 1
130 |     kernel_size: 3
131 |     group: 2
132 |   }
133 | }
134 | layer {
135 |   name: "relu5"
136 |   type: "ReLU"
137 |   bottom: "conv5"
138 |   top: "conv5"
139 | }
140 | layer {
141 |   name: "pool5"
142 |   type: "Pooling"
143 |   bottom: "conv5"
144 |   top: "pool5"
145 |   pooling_param {
146 |     pool: MAX
147 |     kernel_size: 3
148 |     stride: 2
149 |   }
150 | }
151 | layer {
152 |   name: "fc6"
153 |   type: "InnerProduct"
154 |   bottom: "pool5"
155 |   top: "fc6"
156 |   inner_product_param {
157 |     num_output: 4096
158 |   }
159 | }
160 | layer {
161 |   name: "relu6"
162 |   type: "ReLU"
163 |   bottom: "fc6"
164 |   top: "fc6"
165 | }
166 | layer {
167 |   name: "drop6"
168 |   type: "Dropout"
169 |   bottom: "fc6"
170 |   top: "fc6"
171 |   dropout_param {
172 |     dropout_ratio: 0.5
173 |   }
174 | }
175 | layer {
176 |   name: "fc7"
177 |   type: "InnerProduct"
178 |   bottom: "fc6"
179 |   top: "fc7"
180 |   inner_product_param {
181 |     num_output: 4096
182 |   }
183 | }
184 | layer {
185 |   name: "relu7"
186 |   type: "ReLU"
187 |   bottom: "fc7"
188 |   top: "fc7"
189 | }
190 | layer {
191 |   name: "drop7"
192 |   type: "Dropout"
193 |   bottom: "fc7"
194 |   top: "fc7"
195 |   dropout_param {
196 |     dropout_ratio: 0.5
197 |   }
198 | }
199 | layer {
200 |   name: "latent"
201 |   type: "InnerProduct"
202 |   bottom: "fc7"
203 |   top: "latent"
204 |   inner_product_param {
205 |     num_output: 48
206 |   }
207 | }
208 | layer {
209 |   name: "fc8_new"
210 |   type: "InnerProduct"
211 |   bottom: "latent"
212 |   top: "fc8_new"
213 |   inner_product_param {
214 |     num_output: 2
215 |   }
216 | }
217 | layer {
218 |   name: "prob"
219 |   type: "Softmax"
220 |   bottom: "fc8_new"
221 |   top: "prob"
222 | }
223 | 


--------------------------------------------------------------------------------
/examples/shoes7k/eval.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Evaluation
 3 | 
 4 | Find the top-5 similar images
 5 | '''
 6 | import os
 7 | import sys
 8 | import subprocess
 9 | 
10 | from convert_shoes7k_data import get_images
11 | 
12 | sys.path.append('../..')
13 | import config
14 | from retrieve import retrieve_image
15 | 
16 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
17 | EVAL_PATH = os.path.join(SCRIPT_PATH, 'evaluation')
18 | 
19 | # define model params
20 | model_file = 'shoes7k_model_with_latent_layer_iter_10000.caffemodel'
21 | deploy_file = 'deploy_with_latent_layer.prototxt'
22 | imagemean_file = 'shoes7k_mean.npy'
23 | MODEL_FILE = os.path.join(SCRIPT_PATH, model_file)
24 | DEPLOY_FILE = os.path.join(SCRIPT_PATH, deploy_file)
25 | IMAGE_MEAN = os.path.join(SCRIPT_PATH, imagemean_file)
26 | 
27 | 
28 | def retrieve_single_image(image_file, d_threshold):
29 |     """retrive similar images and copy
30 |     the retrieved images to evaluation folder"""
31 |     retrieved, dist = retrieve_image(image_file, MODEL_FILE, DEPLOY_FILE,
32 |                                      IMAGE_MEAN)
33 |     if dist[-1] < d_threshold and len(dist) > 1:
34 |         # this is a image that has acceptable similar top-5 images
35 |         print('Retrieved image ' + image_file)
36 |         image_name = os.path.basename(image_file)
37 |         image_name = image_name.split('.')[0]
38 |         eval_res_dir = os.path.join(EVAL_PATH, image_name)
39 |         if not os.path.exists(eval_res_dir):
40 |             os.mkdir(eval_res_dir)
41 | 
42 |         image_id = 0
43 |         for similar_img in retrieved:
44 |             img_name = os.path.basename(similar_img)
45 |             res_img = '_'.join([str(image_id), img_name])
46 |             res_img = os.path.join(eval_res_dir, res_img)
47 |             subprocess.call(['cp', similar_img, res_img])
48 |             image_id += 1
49 | 
50 | 
51 | def eval_shoes7k(d_threshold=2):
52 |     """Evaluate through all positive images"""
53 |     if not os.path.exists(EVAL_PATH):
54 |         os.mkdir(EVAL_PATH)
55 | 
56 |     images = get_images(config.eg_shoes7k_pos_path)
57 |     for image_file in images:
58 |         retrieve_single_image(image_file, d_threshold)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     eval_shoes7k()
63 | 


--------------------------------------------------------------------------------
/examples/shoes7k/generate_feature_mat.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Generate feature matrix
 3 | 
 4 | Prepare files for image retrieval:
 5 | image_files.npy
 6 | fc7_features.npy
 7 | latent_features.npy
 8 | '''
 9 | import os
10 | import sys
11 | import numpy as np
12 | 
13 | from convert_shoes7k_data import get_images
14 | 
15 | sys.path.append('../..')
16 | import config
17 | from layer_features import layer_features
18 | from retrieve import binary_hash_codes
19 | 
20 | SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
21 | 
22 | 
23 | def generate_feature_matrix(model_file, deploy_file, imagemean_file):
24 |     """generate feature matrix of image dataset
25 |     save the matrix as npy file"""
26 |     # get image files
27 |     pos_images = get_images(config.eg_shoes7k_pos_path)
28 |     neg_images = get_images(config.eg_shoes7k_neg_path)
29 | 
30 |     image_files = np.concatenate((pos_images, neg_images))
31 |     np.random.shuffle(image_files)
32 | 
33 |     # feed the network and get feature vectors
34 |     feature_mat = {'fc7': [], 'latent': []}
35 | 
36 |     batch = []
37 |     batch_size = 0
38 |     for image in image_files:
39 |         batch.append(image)
40 |         batch_size += 1
41 | 
42 |         if batch_size == 1000:
43 |             for layer, mat in layer_features(feature_mat.keys(), model_file,
44 |                                              deploy_file, imagemean_file,
45 |                                              batch):
46 |                 if layer == 'latent':
47 |                     mat = binary_hash_codes(mat)
48 | 
49 |                 feature_mat[layer].extend(mat)
50 | 
51 |             batch = []
52 |             batch_size = 0
53 | 
54 |     if batch_size > 0:
55 |         for layer, mat in layer_features(feature_mat.keys(), model_file,
56 |                                          deploy_file, imagemean_file, batch):
57 |             if layer == 'latent':
58 |                 mat = binary_hash_codes(mat)
59 | 
60 |             feature_mat[layer].extend(mat)
61 | 
62 |     # save to npy files
63 |     np.save(os.path.join(SCRIPT_PATH, 'image_files.npy'), image_files)
64 |     for layer in feature_mat.keys():
65 |         npy_file = os.path.join(SCRIPT_PATH, layer + '_features.npy')
66 |         np.save(npy_file, np.array(feature_mat[layer]))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     if len(sys.argv) != 4:
71 |         usage = 'Usage: python generate_feature_mat.py' + \
72 |                 ' model_file deploy_file imagemean_file'
73 |         print(usage)
74 |     else:
75 |         model_file = sys.argv[1]
76 |         deploy_file = sys.argv[2]
77 |         imagemean_file = sys.argv[3]
78 | 
79 |         is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\
80 |             and os.path.exists(imagemean_file)
81 | 
82 |         if is_exists:
83 |             generate_feature_matrix(model_file, deploy_file, imagemean_file)
84 | 


--------------------------------------------------------------------------------
/examples/shoes7k/retrieve.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # init env
 4 | BASEDIR=$(dirname "$0")
 5 | PROJROOT=$BASEDIR/../..
 6 | MODEL_FILE="$BASEDIR/shoes7k_model_with_latent_layer_iter_10000.caffemodel"
 7 | DEPLOY_FILE="$BASEDIR/deploy_with_latent_layer.prototxt"
 8 | MEAN_FILE="$BASEDIR/shoes7k_mean.npy"
 9 | IMAGE_NPY="$BASEDIR/image_files.npy"
10 | FC7_NPY="$BASEDIR/fc7_features.npy"
11 | LATENT_NPY="$BASEDIR/latent_features.npy"
12 | TARGET="$1"
13 | 
14 | # check model
15 | if [ ! -e $MODEL_FILE ] || [ ! -e $DEPLOY_FILE ] || [ ! -e $MEAN_FILE ]; then
16 |   echo "Please train the model at first"
17 |   echo "./train.sh"
18 |   exit
19 | fi
20 | 
21 | # parse parsemeters
22 | if [[ $TARGET == "-h" ]] || [[ $TARGET == "--help" ]]; then
23 |   echo "Usage: ./retrieve.sh image_to_retrieve.jpg"
24 |   exit
25 | fi
26 | 
27 | if [ -e $IMAGE_NPY ] && [ -e $FC7_NPY ] && [ -e $LATENT_NPY ]; then
28 |   python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET
29 | else
30 |   echo "generate feature matrix..."
31 |   python $BASEDIR/generate_feature_mat.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE
32 |   python $PROJROOT/retrieve.py $MODEL_FILE $DEPLOY_FILE $MEAN_FILE $TARGET
33 | fi
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/shoes7k/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 5000
 8 | display: 20
 9 | max_iter: 10000
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: 2000
13 | snapshot_prefix: "shoes7k_model"
14 | solver_mode: GPU
15 | 


--------------------------------------------------------------------------------
/examples/shoes7k/solver_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val_with_latent_layer.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | base_lr: 0.001
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 5000
 8 | display: 20
 9 | max_iter: 10000
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: 2000
13 | snapshot_prefix: "shoes7k_model_with_latent_layer"
14 | solver_mode: GPU
15 | 


--------------------------------------------------------------------------------
/examples/shoes7k/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check caffe
 4 | if [[ $CAFFEROOT == '' ]]; then
 5 |   echo "\$CAFFEROOT not found, please define it at first."
 6 |   echo "export CAFFEROOT=/path/to/your/caffe"
 7 |   exit
 8 | fi
 9 | 
10 | # init env
11 | TOOLS=$CAFFEROOT/build/tools
12 | BASEDIR=$(dirname "$0")
13 | PROJROOT=$BASEDIR/../../
14 | CURRDIR=$(pwd)
15 | 
16 | # check dataset
17 | if [ ! -d $BASEDIR/shoes7k_train_lmdb ] || [ ! -d $BASEDIR/shoes7k_test_lmdb ] ; then
18 |   echo "The shoes7k dataset folder cannot found"
19 |   echo "Please execute convert_shoes7k_data.py at first"
20 |   exit
21 | fi
22 | 
23 | # compute mean
24 | $TOOLS/compute_image_mean $BASEDIR/shoes7k_train_lmdb $BASEDIR/shoes7k_mean.binaryproto
25 | python $PROJROOT/tools/convert_protomean.py $BASEDIR/shoes7k_mean.binaryproto $BASEDIR/shoes7k_mean.npy
26 | 
27 | cd $BASEDIR
28 | 
29 | # pretrain
30 | $TOOLS/caffe train --solver=solver.prototxt 2>&1 | tee pretrain.log
31 | 
32 | # train net with latent layer
33 | $TOOLS/caffe train \
34 |   --solver=solver_with_latent_layer.prototxt \
35 |   --weights=shoes7k_model_iter_10000.caffemodel 2>&1 | tee train.log
36 | 
37 | cd $CURRDIR
38 | 


--------------------------------------------------------------------------------
/examples/shoes7k/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: true
 12 |     crop_size: 227
 13 |     mean_file: "shoes7k_mean.binaryproto"
 14 |   }
 15 | # mean pixel / channel-wise mean instead of mean image
 16 | #  transform_param {
 17 | #    crop_size: 227
 18 | #    mean_value: 104
 19 | #    mean_value: 117
 20 | #    mean_value: 123
 21 | #    mirror: true
 22 | #  }
 23 |   data_param {
 24 |     source: "shoes7k_train_lmdb"
 25 |     batch_size: 256
 26 |     backend: LMDB
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "data"
 31 |   type: "Data"
 32 |   top: "data"
 33 |   top: "label"
 34 |   include {
 35 |     phase: TEST
 36 |   }
 37 |   transform_param {
 38 |     mirror: false
 39 |     crop_size: 227
 40 |     mean_file: "shoes7k_mean.binaryproto"
 41 |   }
 42 | # mean pixel / channel-wise mean instead of mean image
 43 | #  transform_param {
 44 | #    crop_size: 227
 45 | #    mean_value: 104
 46 | #    mean_value: 117
 47 | #    mean_value: 123
 48 | #    mirror: false
 49 | #  }
 50 |   data_param {
 51 |     source: "shoes7k_test_lmdb"
 52 |     batch_size: 50
 53 |     backend: LMDB
 54 |   }
 55 | }
 56 | layer {
 57 |   name: "conv1"
 58 |   type: "Convolution"
 59 |   bottom: "data"
 60 |   top: "conv1"
 61 |   param {
 62 |     lr_mult: 1
 63 |     decay_mult: 1
 64 |   }
 65 |   param {
 66 |     lr_mult: 2
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 96
 71 |     kernel_size: 11
 72 |     stride: 4
 73 |     weight_filler {
 74 |       type: "gaussian"
 75 |       std: 0.01
 76 |     }
 77 |     bias_filler {
 78 |       type: "constant"
 79 |       value: 0
 80 |     }
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu1"
 85 |   type: "ReLU"
 86 |   bottom: "conv1"
 87 |   top: "conv1"
 88 | }
 89 | layer {
 90 |   name: "pool1"
 91 |   type: "Pooling"
 92 |   bottom: "conv1"
 93 |   top: "pool1"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "norm1"
102 |   type: "LRN"
103 |   bottom: "pool1"
104 |   top: "norm1"
105 |   lrn_param {
106 |     local_size: 5
107 |     alpha: 0.0001
108 |     beta: 0.75
109 |   }
110 | }
111 | layer {
112 |   name: "conv2"
113 |   type: "Convolution"
114 |   bottom: "norm1"
115 |   top: "conv2"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 256
126 |     pad: 2
127 |     kernel_size: 5
128 |     group: 2
129 |     weight_filler {
130 |       type: "gaussian"
131 |       std: 0.01
132 |     }
133 |     bias_filler {
134 |       type: "constant"
135 |       value: 1
136 |     }
137 |   }
138 | }
139 | layer {
140 |   name: "relu2"
141 |   type: "ReLU"
142 |   bottom: "conv2"
143 |   top: "conv2"
144 | }
145 | layer {
146 |   name: "pool2"
147 |   type: "Pooling"
148 |   bottom: "conv2"
149 |   top: "pool2"
150 |   pooling_param {
151 |     pool: MAX
152 |     kernel_size: 3
153 |     stride: 2
154 |   }
155 | }
156 | layer {
157 |   name: "norm2"
158 |   type: "LRN"
159 |   bottom: "pool2"
160 |   top: "norm2"
161 |   lrn_param {
162 |     local_size: 5
163 |     alpha: 0.0001
164 |     beta: 0.75
165 |   }
166 | }
167 | layer {
168 |   name: "conv3"
169 |   type: "Convolution"
170 |   bottom: "norm2"
171 |   top: "conv3"
172 |   param {
173 |     lr_mult: 1
174 |     decay_mult: 1
175 |   }
176 |   param {
177 |     lr_mult: 2
178 |     decay_mult: 0
179 |   }
180 |   convolution_param {
181 |     num_output: 384
182 |     pad: 1
183 |     kernel_size: 3
184 |     weight_filler {
185 |       type: "gaussian"
186 |       std: 0.01
187 |     }
188 |     bias_filler {
189 |       type: "constant"
190 |       value: 0
191 |     }
192 |   }
193 | }
194 | layer {
195 |   name: "relu3"
196 |   type: "ReLU"
197 |   bottom: "conv3"
198 |   top: "conv3"
199 | }
200 | layer {
201 |   name: "conv4"
202 |   type: "Convolution"
203 |   bottom: "conv3"
204 |   top: "conv4"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 2
211 |     decay_mult: 0
212 |   }
213 |   convolution_param {
214 |     num_output: 384
215 |     pad: 1
216 |     kernel_size: 3
217 |     group: 2
218 |     weight_filler {
219 |       type: "gaussian"
220 |       std: 0.01
221 |     }
222 |     bias_filler {
223 |       type: "constant"
224 |       value: 1
225 |     }
226 |   }
227 | }
228 | layer {
229 |   name: "relu4"
230 |   type: "ReLU"
231 |   bottom: "conv4"
232 |   top: "conv4"
233 | }
234 | layer {
235 |   name: "conv5"
236 |   type: "Convolution"
237 |   bottom: "conv4"
238 |   top: "conv5"
239 |   param {
240 |     lr_mult: 1
241 |     decay_mult: 1
242 |   }
243 |   param {
244 |     lr_mult: 2
245 |     decay_mult: 0
246 |   }
247 |   convolution_param {
248 |     num_output: 256
249 |     pad: 1
250 |     kernel_size: 3
251 |     group: 2
252 |     weight_filler {
253 |       type: "gaussian"
254 |       std: 0.01
255 |     }
256 |     bias_filler {
257 |       type: "constant"
258 |       value: 1
259 |     }
260 |   }
261 | }
262 | layer {
263 |   name: "relu5"
264 |   type: "ReLU"
265 |   bottom: "conv5"
266 |   top: "conv5"
267 | }
268 | layer {
269 |   name: "pool5"
270 |   type: "Pooling"
271 |   bottom: "conv5"
272 |   top: "pool5"
273 |   pooling_param {
274 |     pool: MAX
275 |     kernel_size: 3
276 |     stride: 2
277 |   }
278 | }
279 | layer {
280 |   name: "fc6"
281 |   type: "InnerProduct"
282 |   bottom: "pool5"
283 |   top: "fc6"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   inner_product_param {
293 |     num_output: 4096
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.005
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 1
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu6"
306 |   type: "ReLU"
307 |   bottom: "fc6"
308 |   top: "fc6"
309 | }
310 | layer {
311 |   name: "drop6"
312 |   type: "Dropout"
313 |   bottom: "fc6"
314 |   top: "fc6"
315 |   dropout_param {
316 |     dropout_ratio: 0.5
317 |   }
318 | }
319 | layer {
320 |   name: "fc7"
321 |   type: "InnerProduct"
322 |   bottom: "fc6"
323 |   top: "fc7"
324 |   param {
325 |     lr_mult: 1
326 |     decay_mult: 1
327 |   }
328 |   param {
329 |     lr_mult: 2
330 |     decay_mult: 0
331 |   }
332 |   inner_product_param {
333 |     num_output: 4096
334 |     weight_filler {
335 |       type: "gaussian"
336 |       std: 0.005
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |       value: 1
341 |     }
342 |   }
343 | }
344 | layer {
345 |   name: "relu7"
346 |   type: "ReLU"
347 |   bottom: "fc7"
348 |   top: "fc7"
349 | }
350 | layer {
351 |   name: "drop7"
352 |   type: "Dropout"
353 |   bottom: "fc7"
354 |   top: "fc7"
355 |   dropout_param {
356 |     dropout_ratio: 0.5
357 |   }
358 | }
359 | layer {
360 |   name: "fc8"
361 |   type: "InnerProduct"
362 |   bottom: "fc7"
363 |   top: "fc8"
364 |   param {
365 |     lr_mult: 1
366 |     decay_mult: 1
367 |   }
368 |   param {
369 |     lr_mult: 2
370 |     decay_mult: 0
371 |   }
372 |   inner_product_param {
373 |     num_output: 2
374 |     weight_filler {
375 |       type: "gaussian"
376 |       std: 0.01
377 |     }
378 |     bias_filler {
379 |       type: "constant"
380 |       value: 0
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "accuracy"
386 |   type: "Accuracy"
387 |   bottom: "fc8"
388 |   bottom: "label"
389 |   top: "accuracy"
390 |   include {
391 |     phase: TEST
392 |   }
393 | }
394 | layer {
395 |   name: "loss"
396 |   type: "SoftmaxWithLoss"
397 |   bottom: "fc8"
398 |   bottom: "label"
399 |   top: "loss"
400 | }
401 | 


--------------------------------------------------------------------------------
/examples/shoes7k/train_val_with_latent_layer.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CaffeNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: true
 12 |     crop_size: 227
 13 |     mean_file: "shoes7k_mean.binaryproto"
 14 |   }
 15 | # mean pixel / channel-wise mean instead of mean image
 16 | #  transform_param {
 17 | #    crop_size: 227
 18 | #    mean_value: 104
 19 | #    mean_value: 117
 20 | #    mean_value: 123
 21 | #    mirror: true
 22 | #  }
 23 |   data_param {
 24 |     source: "shoes7k_train_lmdb"
 25 |     batch_size: 256
 26 |     backend: LMDB
 27 |   }
 28 | }
 29 | layer {
 30 |   name: "data"
 31 |   type: "Data"
 32 |   top: "data"
 33 |   top: "label"
 34 |   include {
 35 |     phase: TEST
 36 |   }
 37 |   transform_param {
 38 |     mirror: false
 39 |     crop_size: 227
 40 |     mean_file: "shoes7k_mean.binaryproto"
 41 |   }
 42 | # mean pixel / channel-wise mean instead of mean image
 43 | #  transform_param {
 44 | #    crop_size: 227
 45 | #    mean_value: 104
 46 | #    mean_value: 117
 47 | #    mean_value: 123
 48 | #    mirror: false
 49 | #  }
 50 |   data_param {
 51 |     source: "shoes7k_test_lmdb"
 52 |     batch_size: 50
 53 |     backend: LMDB
 54 |   }
 55 | }
 56 | layer {
 57 |   name: "conv1"
 58 |   type: "Convolution"
 59 |   bottom: "data"
 60 |   top: "conv1"
 61 |   param {
 62 |     lr_mult: 1
 63 |     decay_mult: 1
 64 |   }
 65 |   param {
 66 |     lr_mult: 2
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 96
 71 |     kernel_size: 11
 72 |     stride: 4
 73 |     weight_filler {
 74 |       type: "gaussian"
 75 |       std: 0.01
 76 |     }
 77 |     bias_filler {
 78 |       type: "constant"
 79 |       value: 0
 80 |     }
 81 |   }
 82 | }
 83 | layer {
 84 |   name: "relu1"
 85 |   type: "ReLU"
 86 |   bottom: "conv1"
 87 |   top: "conv1"
 88 | }
 89 | layer {
 90 |   name: "pool1"
 91 |   type: "Pooling"
 92 |   bottom: "conv1"
 93 |   top: "pool1"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | layer {
101 |   name: "norm1"
102 |   type: "LRN"
103 |   bottom: "pool1"
104 |   top: "norm1"
105 |   lrn_param {
106 |     local_size: 5
107 |     alpha: 0.0001
108 |     beta: 0.75
109 |   }
110 | }
111 | layer {
112 |   name: "conv2"
113 |   type: "Convolution"
114 |   bottom: "norm1"
115 |   top: "conv2"
116 |   param {
117 |     lr_mult: 1
118 |     decay_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |     decay_mult: 0
123 |   }
124 |   convolution_param {
125 |     num_output: 256
126 |     pad: 2
127 |     kernel_size: 5
128 |     group: 2
129 |     weight_filler {
130 |       type: "gaussian"
131 |       std: 0.01
132 |     }
133 |     bias_filler {
134 |       type: "constant"
135 |       value: 1
136 |     }
137 |   }
138 | }
139 | layer {
140 |   name: "relu2"
141 |   type: "ReLU"
142 |   bottom: "conv2"
143 |   top: "conv2"
144 | }
145 | layer {
146 |   name: "pool2"
147 |   type: "Pooling"
148 |   bottom: "conv2"
149 |   top: "pool2"
150 |   pooling_param {
151 |     pool: MAX
152 |     kernel_size: 3
153 |     stride: 2
154 |   }
155 | }
156 | layer {
157 |   name: "norm2"
158 |   type: "LRN"
159 |   bottom: "pool2"
160 |   top: "norm2"
161 |   lrn_param {
162 |     local_size: 5
163 |     alpha: 0.0001
164 |     beta: 0.75
165 |   }
166 | }
167 | layer {
168 |   name: "conv3"
169 |   type: "Convolution"
170 |   bottom: "norm2"
171 |   top: "conv3"
172 |   param {
173 |     lr_mult: 1
174 |     decay_mult: 1
175 |   }
176 |   param {
177 |     lr_mult: 2
178 |     decay_mult: 0
179 |   }
180 |   convolution_param {
181 |     num_output: 384
182 |     pad: 1
183 |     kernel_size: 3
184 |     weight_filler {
185 |       type: "gaussian"
186 |       std: 0.01
187 |     }
188 |     bias_filler {
189 |       type: "constant"
190 |       value: 0
191 |     }
192 |   }
193 | }
194 | layer {
195 |   name: "relu3"
196 |   type: "ReLU"
197 |   bottom: "conv3"
198 |   top: "conv3"
199 | }
200 | layer {
201 |   name: "conv4"
202 |   type: "Convolution"
203 |   bottom: "conv3"
204 |   top: "conv4"
205 |   param {
206 |     lr_mult: 1
207 |     decay_mult: 1
208 |   }
209 |   param {
210 |     lr_mult: 2
211 |     decay_mult: 0
212 |   }
213 |   convolution_param {
214 |     num_output: 384
215 |     pad: 1
216 |     kernel_size: 3
217 |     group: 2
218 |     weight_filler {
219 |       type: "gaussian"
220 |       std: 0.01
221 |     }
222 |     bias_filler {
223 |       type: "constant"
224 |       value: 1
225 |     }
226 |   }
227 | }
228 | layer {
229 |   name: "relu4"
230 |   type: "ReLU"
231 |   bottom: "conv4"
232 |   top: "conv4"
233 | }
234 | layer {
235 |   name: "conv5"
236 |   type: "Convolution"
237 |   bottom: "conv4"
238 |   top: "conv5"
239 |   param {
240 |     lr_mult: 1
241 |     decay_mult: 1
242 |   }
243 |   param {
244 |     lr_mult: 2
245 |     decay_mult: 0
246 |   }
247 |   convolution_param {
248 |     num_output: 256
249 |     pad: 1
250 |     kernel_size: 3
251 |     group: 2
252 |     weight_filler {
253 |       type: "gaussian"
254 |       std: 0.01
255 |     }
256 |     bias_filler {
257 |       type: "constant"
258 |       value: 1
259 |     }
260 |   }
261 | }
262 | layer {
263 |   name: "relu5"
264 |   type: "ReLU"
265 |   bottom: "conv5"
266 |   top: "conv5"
267 | }
268 | layer {
269 |   name: "pool5"
270 |   type: "Pooling"
271 |   bottom: "conv5"
272 |   top: "pool5"
273 |   pooling_param {
274 |     pool: MAX
275 |     kernel_size: 3
276 |     stride: 2
277 |   }
278 | }
279 | layer {
280 |   name: "fc6"
281 |   type: "InnerProduct"
282 |   bottom: "pool5"
283 |   top: "fc6"
284 |   param {
285 |     lr_mult: 1
286 |     decay_mult: 1
287 |   }
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 0
291 |   }
292 |   inner_product_param {
293 |     num_output: 4096
294 |     weight_filler {
295 |       type: "gaussian"
296 |       std: 0.005
297 |     }
298 |     bias_filler {
299 |       type: "constant"
300 |       value: 1
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu6"
306 |   type: "ReLU"
307 |   bottom: "fc6"
308 |   top: "fc6"
309 | }
310 | layer {
311 |   name: "drop6"
312 |   type: "Dropout"
313 |   bottom: "fc6"
314 |   top: "fc6"
315 |   dropout_param {
316 |     dropout_ratio: 0.5
317 |   }
318 | }
319 | layer {
320 |   name: "fc7"
321 |   type: "InnerProduct"
322 |   bottom: "fc6"
323 |   top: "fc7"
324 |   param {
325 |     lr_mult: 1
326 |     decay_mult: 1
327 |   }
328 |   param {
329 |     lr_mult: 2
330 |     decay_mult: 0
331 |   }
332 |   inner_product_param {
333 |     num_output: 4096
334 |     weight_filler {
335 |       type: "gaussian"
336 |       std: 0.005
337 |     }
338 |     bias_filler {
339 |       type: "constant"
340 |       value: 1
341 |     }
342 |   }
343 | }
344 | layer {
345 |   name: "relu7"
346 |   type: "ReLU"
347 |   bottom: "fc7"
348 |   top: "fc7"
349 | }
350 | layer {
351 |   name: "drop7"
352 |   type: "Dropout"
353 |   bottom: "fc7"
354 |   top: "fc7"
355 |   dropout_param {
356 |     dropout_ratio: 0.5
357 |   }
358 | }
359 | layer {
360 |   name: "latent"
361 |   type: "InnerProduct"
362 |   bottom: "fc7"
363 |   top: "latent"
364 |   param {
365 |     lr_mult: 1
366 |     decay_mult: 1
367 |   }
368 |   param {
369 |     lr_mult: 2
370 |     decay_mult: 0
371 |   }
372 |   inner_product_param {
373 |     num_output: 48
374 |     weight_filler {
375 |       type: "gaussian"
376 |       std: 0.01
377 |     }
378 |     bias_filler {
379 |       type: "constant"
380 |       value: 0
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "fc8_new"
386 |   type: "InnerProduct"
387 |   bottom: "latent"
388 |   top: "fc8_new"
389 |   param {
390 |     lr_mult: 1
391 |     decay_mult: 1
392 |   }
393 |   param {
394 |     lr_mult: 2
395 |     decay_mult: 0
396 |   }
397 |   inner_product_param {
398 |     num_output: 2
399 |     weight_filler {
400 |       type: "gaussian"
401 |       std: 0.01
402 |     }
403 |     bias_filler {
404 |       type: "constant"
405 |       value: 0
406 |     }
407 |   }
408 | }
409 | layer {
410 |   name: "accuracy"
411 |   type: "Accuracy"
412 |   bottom: "fc8_new"
413 |   bottom: "label"
414 |   top: "accuracy"
415 |   include {
416 |     phase: TEST
417 |   }
418 | }
419 | layer {
420 |   name: "loss"
421 |   type: "SoftmaxWithLoss"
422 |   bottom: "fc8_new"
423 |   bottom: "label"
424 |   top: "loss"
425 | }
426 | 


--------------------------------------------------------------------------------
/img/shoes7k_retrieval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xueeinstein/fast-image-retrieval/c952b34b9dc7833cd0bb3cc49a6b69cd81b7cae8/img/shoes7k_retrieval.png


--------------------------------------------------------------------------------
/layer_features.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Extract FC7 4096 feature vector
 3 | '''
 4 | import numpy as np
 5 | import caffe
 6 | 
 7 | 
 8 | def feed_net(model_file, deploy_file, imagemean_file, image_files, show_pred):
 9 |     """feed network"""
10 |     n_files = len(image_files)
11 |     net = caffe.Net(deploy_file, model_file, caffe.TEST)
12 | 
13 |     # define transformer for preprocessing
14 |     transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
15 |     transformer.set_mean('data', np.load(imagemean_file).mean(1).mean(1))
16 |     transformer.set_transpose('data', (2, 0, 1))
17 |     transformer.set_channel_swap('data', (2, 1, 0))
18 |     transformer.set_raw_scale('data', 255.0)
19 | 
20 |     net.blobs['data'].reshape(n_files, 3, 227, 227)
21 | 
22 |     idx = 0
23 |     for image in image_files:
24 |         try:
25 |             im = caffe.io.load_image(image)
26 |             transformed_im = transformer.preprocess('data', im)
27 |             net.blobs['data'].data[idx, :, :, :] = transformed_im
28 |             idx += 1
29 |         except Exception:
30 |             pass
31 | 
32 |     out = net.forward()
33 |     if show_pred:
34 |         print(out['prob'].argmax())
35 |     return net
36 | 
37 | 
38 | def layer_features(layers, model_file, deploy_file, imagemean_file,
39 |                    image_files, gpu=True, gpu_id=0, show_pred=False):
40 |     """extract features from various layers"""
41 |     if gpu:
42 |         caffe.set_device(gpu_id)
43 |         caffe.set_mode_gpu()
44 | 
45 |     net = feed_net(model_file, deploy_file, imagemean_file, image_files,
46 |                    show_pred)
47 | 
48 |     #if type(layers) == str:
49 |         #return net.blobs[layers].data
50 | 
51 |     for layer in layers:
52 |         if layer not in net.blobs:
53 |             raise TypeError('Invalid layer name: ' + layer)
54 |         yield (layer, net.blobs[layer].data)
55 | 


--------------------------------------------------------------------------------
/retrieve.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Image retrieval
 3 | '''
 4 | import os
 5 | import subprocess
 6 | import numpy as np
 7 | from sklearn.neighbors import KDTree
 8 | 
 9 | from layer_features import layer_features
10 | 
11 | 
12 | def binary_hash_codes(feature_mat):
13 |     """convert feature matrix of latent layer to binary hash codes"""
14 |     xs, ys = np.where(feature_mat > 0.5)
15 |     code_mat = np.zeros(feature_mat.shape)
16 | 
17 |     for i in range(len(xs)):
18 |         code_mat[xs[i]][ys[i]] = 1
19 | 
20 |     return code_mat
21 | 
22 | 
23 | def retrieve_image(target_image, model_file, deploy_file, imagemean_file,
24 |                    threshold=1):
25 |     model_dir = os.path.dirname(model_file)
26 |     image_files = np.load(os.path.join(model_dir, 'image_files.npy'))
27 |     fc7_feature_mat = np.load(os.path.join(model_dir, 'fc7_features.npy'))
28 |     latent_feature_file = os.path.join(model_dir, 'latent_features.npy')
29 |     latent_feature_mat = np.load(latent_feature_file)
30 | 
31 |     candidates = []
32 |     dist = 0
33 |     for layer, mat in layer_features(['latent', 'fc7'], model_file,
34 |                                      deploy_file, imagemean_file,
35 |                                      [target_image], show_pred=True):
36 |         if layer == 'latent':
37 |             # coarse-level search
38 |             mat = binary_hash_codes(mat)
39 |             mat = mat * np.ones((latent_feature_mat.shape[0], 1))
40 |             dis_mat = np.abs(mat - latent_feature_mat)
41 |             hamming_dis = np.sum(dis_mat, axis=1)
42 |             distance_file = os.path.join(model_dir, 'hamming_dis.npy')
43 |             np.save(distance_file, hamming_dis)
44 |             candidates = np.where(hamming_dis < threshold)[0]
45 | 
46 |         if layer == 'fc7':
47 |             # fine-level search
48 |             kdt = KDTree(fc7_feature_mat[candidates], metric='euclidean')
49 |             k = 6
50 | 
51 |             if not candidates.shape[0] > 6:
52 |                 k = candidates.shape[0]
53 | 
54 |             dist, idxs = kdt.query(mat, k=k)
55 |             candidates = candidates[idxs]
56 |             print(dist)
57 | 
58 |     return image_files[candidates][0], dist[0]
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     import sys
63 |     if len(sys.argv) != 5:
64 |         usage = 'Usage: python retrieve.py' + \
65 |                 ' model_file deploy_file imagemean_file target_image.jpg'
66 |         print(usage)
67 |     else:
68 |         model_file = sys.argv[1]
69 |         deploy_file = sys.argv[2]
70 |         imagemean_file = sys.argv[3]
71 |         target_image = sys.argv[4]
72 | 
73 |         is_exists = os.path.exists(model_file) and os.path.exists(deploy_file)\
74 |             and os.path.exists(imagemean_file)
75 | 
76 |         if is_exists:
77 |             res, _ = retrieve_image(target_image, model_file, deploy_file,
78 |                                     imagemean_file, threshold=5)
79 |             print(res)
80 |             if not os.path.exists('results'):
81 |                 os.mkdir('results')
82 |             for i in range(len(res)):
83 |                 subprocess.call(['cp', res[i], 'results/%s.jpg' % str(i)])
84 |         else:
85 |             print('The model related files may not exit')
86 |             print('Please check files: {}, {}, {}'
87 |                   .format(model_file, deploy_file, imagemean_file))
88 | 


--------------------------------------------------------------------------------
/tools/convert_protomean.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | import caffe
 5 | 
 6 | 
 7 | if len(sys.argv) != 3:
 8 |     print('Usage: python convert_protomean.py proto.mean out.npy')
 9 |     sys.exit()
10 | 
11 | 
12 | blob = caffe.proto.caffe_pb2.BlobProto()
13 | data = open(sys.argv[1], 'rb').read()
14 | blob.ParseFromString(data)
15 | arr = np.array(caffe.io.blobproto_to_array(blob))
16 | out = arr[0]
17 | np.save(sys.argv[2], out)
18 | 


--------------------------------------------------------------------------------