├── __init__.py
├── model
    ├── __init__.py
    ├── dch.py
    ├── dhn.py
    ├── dhcs.py
    ├── dqn.py
    └── dvsq.py
├── distance
    ├── __init__.py
    ├── npversion.py
    └── tfversion.py
├── examples
    ├── __init__.py
    ├── dch
    │   ├── log.pkl
    │   └── train_val_script.py
    ├── dvsq
    │   ├── train_val.sh
    │   └── train_val_script.py
    ├── dhn
    │   └── train_val_script.py
    ├── dtq
    │   └── train_val_script.py
    ├── dhcs
    │   └── train_val_script.py
    └── dqn
    │   └── train_val_script.py
├── data_provider
    ├── __init__.py
    ├── pairwise.py
    ├── pq.py
    ├── text
    │   └── __init__.py
    ├── image
    │   └── __init__.py
    └── triplet.py
├── snapshot
├── util
    ├── __init__.py
    ├── plot.py
    ├── tool.py
    └── visualize.py
├── architecture
    ├── __init__.py
    ├── mlp.py
    ├── vgg_f.py
    ├── alexnet.py
    └── vgg.py
├── .gitignore
├── README.md
├── evaluation
    ├── load_and_predict.py
    └── __init__.py
└── loss
    └── __init__.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/distance/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/snapshot:
--------------------------------------------------------------------------------
1 | /media/disk1/chenshen/cachedir/DeepHash/snapshot


--------------------------------------------------------------------------------
/examples/dch/log.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenshen03/DeepHash-tensorflow/HEAD/examples/dch/log.pkl


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from .tool import *
2 | from .visualize import plot_distance, plot_distribution, plot_tsne
3 | 


--------------------------------------------------------------------------------
/architecture/__init__.py:
--------------------------------------------------------------------------------
1 | from .alexnet import img_alexnet_layers
2 | from .vgg import img_vgg16_layers
3 | from .mlp import txt_mlp_layers


--------------------------------------------------------------------------------
/examples/dvsq/train_val.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export TF_CPP_MIN_LOG_LEVEL=3
 4 | 
 5 | # baseline
 6 | CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --gpu 0 > logs/log1 2>&1 &
 7 | # CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --learning-rate 0.002 --batch-size 256 --output-dim 64\
 8 | #  --cq-lambda 0.0001 --n-subspace 4 --n-subcenter 256 --R 54000 --dataset cifar10 --gpu 0 > logs/log1 2>&1 &
 9 | 
10 | # CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --gpu 0 --n-subspace 4 --n-subcenter 256 > logs/log1 2>&1 &
11 | 
12 | # CUDA_VISIBLE_DEVICES=1 python -u train_val_script.py --gpu 1 --n-subspace 2 --n-subcenter 64 > logs/log2 2>&1 &
13 | 
14 | # CUDA_VISIBLE_DEVICES=2 python -u train_val_script.py --gpu 2 --n-subspace 3 --n-subcenter 16 > logs/log3 2>&1 &
15 | 
16 | # CUDA_VISIBLE_DEVICES=3 python -u train_val_script.py --gpu 3 --n-subspace 4 --n-subcenter 8 > logs/log4 2>&1 &
17 | 


--------------------------------------------------------------------------------
/util/plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import matplotlib
 4 | matplotlib.use('Agg')
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | import collections
 8 | import pickle as pickle
 9 | import os
10 | 
11 | _since_beginning = collections.defaultdict(lambda: {})
12 | _since_last_flush = collections.defaultdict(lambda: {})
13 | 
14 | _iter = 0
15 | def tick():
16 | 	_iter += 1
17 | 
18 | def plot(name, value):
19 | 	_since_last_flush[name][_iter] = value
20 | 
21 | def flush(path = ""):
22 | 	prints = []
23 | 
24 | 	for name, vals in list(_since_last_flush.items()):
25 | 		prints.append("{}\t{}".format(name, np.mean(list(vals.values()))))
26 | 		_since_beginning[name].update(vals)
27 | 
28 | 		x_vals = np.sort(list(_since_beginning[name].keys()))
29 | 		y_vals = [_since_beginning[name][x] for x in x_vals]
30 | 
31 | 		plt.clf()
32 | 		plt.plot(x_vals, y_vals)
33 | 		plt.xlabel('iteration')
34 | 		plt.ylabel(name)
35 | 		plt.savefig(os.path.join(path, name.replace(' ', '_')+'.jpg'))
36 | 
37 | 	print("iter {}\t{}".format(_iter, "\t".join(prints)))
38 | 	_since_last_flush.clear()
39 | 
40 | 	with open('log.pkl', 'wb') as f:
41 | 		pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL)
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | # *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | pretrained_model/
104 | logs/
105 | tflog/
106 | models/
107 | *.swp
108 | .vscode/
109 | data/
110 | *.log
111 | *.sh


--------------------------------------------------------------------------------
/util/tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import errno
 4 | import shutil
 5 | import os.path as osp
 6 | import argparse
 7 | import numpy as np
 8 | import tensorflow as tf
 9 | 
10 | 
11 | def mkdir_if_missing(directory):
12 |     if not osp.exists(directory):
13 |         try:
14 |             os.makedirs(directory)
15 |         except OSError as e:
16 |             if e.errno != errno.EEXIST:
17 |                 raise
18 | 
19 | 
20 | class Logger(object):
21 |     """
22 |     Write console output to external text file.
23 |     
24 |     Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py.
25 |     """
26 |     def __init__(self, fpath=None):
27 |         self.console = sys.stdout
28 |         self.file = None
29 |         if fpath is not None:
30 |             mkdir_if_missing(os.path.dirname(fpath))
31 |             self.file = open(fpath, 'w')
32 | 
33 |     def __del__(self):
34 |         self.close()
35 | 
36 |     def __enter__(self):
37 |         pass
38 | 
39 |     def __exit__(self, *args):
40 |         self.close()
41 | 
42 |     def write(self, msg):
43 |         self.console.write(msg)
44 |         if self.file is not None:
45 |             self.file.write(msg)
46 | 
47 |     def flush(self):
48 |         self.console.flush()
49 |         if self.file is not None:
50 |             self.file.flush()
51 |             os.fsync(self.file.fileno())
52 | 
53 |     def close(self):
54 |         self.console.close()
55 |         if self.file is not None:
56 |             self.file.close()
57 | 
58 | 
59 | def str2bool(v):
60 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
61 |         return True
62 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
63 |         return False
64 |     else:
65 |         raise argparse.ArgumentTypeError('Unsupported value encountered.')
66 | 
67 | 
68 | # return -1 if x < 0, 1 if x > 0, random -1 or 1 if x ==0
69 | def sign(x):
70 |     s = np.sign(x)
71 |     tmp = s[s == 0]
72 |     s[s==0] = np.random.choice([-1, 1], tmp.shape)
73 |     return s
74 | 
75 | 
76 | def reduce_shaper(t):
77 |     return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1])


--------------------------------------------------------------------------------
/data_provider/pairwise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Dataset(object):
 4 |     def __init__(self, dataset, output_dim):
 5 |         print ("Initializing Dataset")
 6 |         self._dataset = dataset
 7 |         self.n_samples = dataset.n_samples
 8 |         self._train = dataset.train
 9 |         self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32)
10 | 
11 |         self._perm = np.arange(self.n_samples)
12 |         np.random.shuffle(self._perm)
13 |         self._index_in_epoch = 0
14 |         self._epochs_complete = 0
15 |         print ("Dataset already")
16 |         return
17 | 
18 |     def next_batch(self, batch_size):
19 |         """
20 |         Args:
21 |           batch_size
22 |         Returns:
23 |           [batch_size, (n_inputs)]: next batch images
24 |           [batch_size, n_class]: next batch labels
25 |         """
26 |         start = self._index_in_epoch
27 |         self._index_in_epoch += batch_size
28 |         # Another epoch finish
29 |         if self._index_in_epoch > self.n_samples:
30 |             if self._train:
31 |                 # Training stage need repeating get batch
32 |                 self._epochs_complete += 1
33 |                 # Shuffle the data
34 |                 np.random.shuffle(self._perm)
35 |                 # Start next epoch
36 |                 start = 0
37 |                 self._index_in_epoch = batch_size
38 |             else:
39 |                 # Validation stage only process once
40 |                 start = self.n_samples - batch_size
41 |                 self._index_in_epoch = self.n_samples
42 |         end = self._index_in_epoch
43 | 
44 |         data, label = self._dataset.data(self._perm[start:end])
45 |         return (data, label)
46 | 
47 |     def feed_batch_output(self, batch_size, output):
48 |         """
49 |         Args:
50 |           batch_size
51 |           [batch_size, n_output]
52 |         """
53 |         start = self._index_in_epoch - batch_size
54 |         end = self._index_in_epoch
55 |         self.output[self._perm[start:end], :] = output
56 |         return
57 | 
58 |     @property
59 |     def output(self):
60 |         return self._output
61 | 
62 |     @property
63 |     def label(self):
64 |         return self._dataset.get_labels()
65 | 
66 |     def finish_epoch(self):
67 |         self._index_in_epoch = 0
68 |         np.random.shuffle(self._perm)
69 | 
70 | 


--------------------------------------------------------------------------------
/architecture/mlp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | 
 6 | def txt_mlp_layers(txt, txt_dim, output_dim, stage, model_weights=None, with_tanh=True):
 7 |     deep_param_txt = {}
 8 |     train_layers = []
 9 | 
10 |     if model_weights is None:
11 |         dir_path = os.path.dirname(os.path.realpath(__file__))
12 |         model_weights = os.path.join(
13 |             dir_path, "pretrained_model/reference_pretrain.npy")
14 | 
15 |     net_data = dict(np.load(model_weights, encoding='bytes').item())
16 | 
17 |     # txt_fc1
18 |     with tf.name_scope('txt_fc1'):
19 |         if 'txt_fc1' not in net_data:
20 |             txt_fc1w = tf.Variable(tf.truncated_normal([txt_dim, 4096],
21 |                                                        dtype=tf.float32,
22 |                                                        stddev=1e-2), name='weights')
23 |             txt_fc1b = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),
24 |                                    trainable=True, name='biases')
25 |         else:
26 |             txt_fc1w = tf.Variable(net_data['txt_fc1'][0], name='weights')
27 |             txt_fc1b = tf.Variable(net_data['txt_fc1'][1], name='biases')
28 |         txt_fc1l = tf.nn.bias_add(tf.matmul(txt, txt_fc1w), txt_fc1b)
29 | 
30 |         txt_fc1 = tf.cond(stage > 0, lambda: tf.nn.relu(
31 |             txt_fc1l), lambda: tf.nn.dropout(tf.nn.relu(txt_fc1l), 0.5))
32 | 
33 |         train_layers += [txt_fc1w, txt_fc1b]
34 |         deep_param_txt['txt_fc1'] = [txt_fc1w, txt_fc1b]
35 | 
36 |     # txt_fc2
37 |     with tf.name_scope('txt_fc2'):
38 |         if 'txt_fc2' not in net_data:
39 |             txt_fc2w = tf.Variable(tf.truncated_normal([4096, output_dim],
40 |                                                        dtype=tf.float32,
41 |                                                        stddev=1e-2), name='weights')
42 |             txt_fc2b = tf.Variable(tf.constant(0.0, shape=[output_dim], dtype=tf.float32),
43 |                                    trainable=True, name='biases')
44 |         else:
45 |             txt_fc2w = tf.Variable(net_data['txt_fc2'][0], name='weights')
46 |             txt_fc2b = tf.Variable(net_data['txt_fc2'][1], name='biases')
47 | 
48 |         txt_fc2l = tf.nn.bias_add(tf.matmul(txt_fc1, txt_fc2w), txt_fc2b)
49 |         if with_tanh:
50 |             txt_fc2 = tf.nn.tanh(txt_fc2l)
51 |         else:
52 |             txt_fc2 = txt_fc2l
53 | 
54 |         train_layers += [txt_fc2w, txt_fc2b]
55 |         train_layers += [txt_fc2w, txt_fc2b]
56 |         deep_param_txt['txt_fc2'] = [txt_fc2w, txt_fc2b]
57 | 
58 |     # return the output of text layer
59 |     return txt_fc2, deep_param_txt, train_layers
60 | 


--------------------------------------------------------------------------------
/examples/dch/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import warnings
 4 | import numpy as np
 5 | import scipy.io as sio
 6 | import model.dch as model
 7 | import data_provider.image as dataset
 8 | 
 9 | from pprint import pprint
10 | 
11 | warnings.filterwarnings("ignore", category = DeprecationWarning)
12 | warnings.filterwarnings("ignore", category = FutureWarning)
13 | 
14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
15 | 
16 | parser = argparse.ArgumentParser(description='Deep Cauchy Hashing')
17 | parser.add_argument('--lr', '--learning-rate', default=0.005, type=float)
18 | parser.add_argument('--output-dim', default=32, type=int)   # 256, 128
19 | parser.add_argument('--alpha', default=0.5, type=float)
20 | parser.add_argument('--bias', default=0.0, type=float)
21 | parser.add_argument('--gamma', default=20, type=float)
22 | parser.add_argument('--iter-num', default=10000, type=int)
23 | parser.add_argument('--q-lambda', default=0.001, type=float)
24 | parser.add_argument('--dataset', default='cifar10', type=str)
25 | parser.add_argument('--gpus', default='0', type=str)
26 | parser.add_argument('--log-dir', default='tflog', type=str)
27 | parser.add_argument('-b', '--batch-size', default=128, type=int)
28 | parser.add_argument('-vb', '--val-batch-size', default=100, type=int)
29 | parser.add_argument('--decay-step', default=3000, type=int)
30 | parser.add_argument('--decay-factor', default=0.5, type=float)
31 | 
32 | tanh_parser = parser.add_mutually_exclusive_group(required=False)
33 | tanh_parser.add_argument('--with-tanh', dest='with_tanh', action='store_true')
34 | tanh_parser.add_argument('--without-tanh', dest='with_tanh', action='store_false')
35 | parser.set_defaults(with_tanh=True)
36 | 
37 | parser.add_argument('--img-model', default='alexnet', type=str)
38 | parser.add_argument('--model-weights', type=str)
39 | parser.add_argument('--finetune-all', default=True, type=bool)
40 | parser.add_argument('--save-dir', default="./models/", type=str)
41 | parser.add_argument('--data-dir', default="../../data/", type=str)
42 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true')
43 | 
44 | args = parser.parse_args()
45 | 
46 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
47 | 
48 | label_dims = {'cifar10': 10, 'cub': 200, 'nuswide_81': 81, 'coco': 80}
49 | Rs = {'cifar10': 54000, 'nuswide_81': 5000, 'coco': 5000}
50 | args.R = Rs[args.dataset]
51 | args.label_dim = label_dims[args.dataset]
52 | 
53 | args.img_tr = os.path.join(args.data_dir, args.dataset, "train.txt")
54 | args.img_te = os.path.join(args.data_dir, args.dataset, "test.txt")
55 | args.img_db = os.path.join(args.data_dir, args.dataset, "database.txt")
56 | 
57 | pprint(vars(args))
58 | 
59 | data_root = os.path.join(args.data_dir, args.dataset)
60 | query_img, database_img = dataset.import_validation(data_root, args.img_te, args.img_db)
61 | 
62 | if not args.evaluate:
63 |     train_img = dataset.import_train(data_root, args.img_tr)
64 |     model_weights = model.train(train_img, args)
65 |     args.model_weights = model_weights
66 | 
67 | maps = model.validation(database_img, query_img, args)
68 | for key in maps:
69 |     print(("{}\t{}".format(key, maps[key])))
70 | 
71 | pprint(vars(args))
72 | 


--------------------------------------------------------------------------------
/distance/npversion.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import util
 4 | 
 5 | def norm(x, keepdims=False):
 6 |     '''
 7 |     Param: 
 8 |         x: matrix of shape (n1, n2, ..., nk) 
 9 |         keepdims: Whether keep dims or not
10 |     Return: norm of matrix of shape (n1, n2, ..., n_{k-1})
11 |     '''
12 |     return np.sqrt(np.sum(np.square(x), axis=-1, keepdims=keepdims))
13 | 
14 | def normed(x):
15 |     '''
16 |     Param: matrix of shape (n1, n2, ..., nk)
17 |     Return: normed matrix
18 |     '''
19 |     return x / (1e-20 + norm(x, keepdims=True))
20 | 
21 | def euclidean2(x1, x2):
22 |     return np.sum(np.square(x1 - x2), axis=-1)
23 | 
24 | def euclidean(x1, x2):
25 |     return np.sqrt(euclidean2(x1, x2))
26 | 
27 | def averaged_euclidean2(x1, x2):
28 |     return np.mean(np.square(x1 - x2), axis=-1)
29 | 
30 | def averaged_euclidean(x1, x2):
31 |     return np.sqrt(averaged_euclidean2(x1, x2)) 
32 | 
33 | def normed_euclidean2(x1, x2):
34 |     return euclidean2(normed(x1), normed(x2))
35 | 
36 | def inner_product(x1, x2, pair=False):
37 |     if pair:
38 |         return - np.inner(x1, x2)
39 |     else:
40 |         return - np.sum(x1 * x2, axis=-1)
41 | 
42 | def cosine(x1, x2):
43 |     return (1 + inner_product(normed(x1), normed(x2))) / 2
44 | 
45 | def hamming(x1, x2):
46 |     K = x1.shape[1]
47 |     return (K - np.dot(x1, x2.transpose())) / 2
48 | 
49 | def distance(x1, x2=None, pair=True, dist_type="euclidean2", ifsign=False):
50 |     '''
51 |     Param:
52 |         x2: if x2 is None, distance between x1 and x1 will be returned.
53 |         pair: if True, for i, j, x1_i, x2_j will be calculated
54 |               if False, for i, x1_i, x2_i will be calculated, and it requires the dimension of x1 and x2 is same.
55 |         dist_type: distance type, can be euclidean2, normed_euclidean2, inner_product, cosine
56 |     '''
57 |     if x2 is None:
58 |         x2 = x1
59 |     if ifsign:
60 |         x1 = util.sign(x1)
61 |         x2 = util.sign(x2)
62 |     if dist_type == 'inner_product':
63 |         return inner_product(x1, x2, pair)
64 |     if pair:
65 |         x1 = np.expand_dims(x1, 1)
66 |         x2 = np.expand_dims(x2, 0)
67 |     return getattr(sys.modules[__name__], dist_type)(x1, x2)
68 | 
69 | if __name__ == "__main__":
70 |     def myAssert(x1, x2):
71 |         assert np.mean(x1 - x2) < 1e-8
72 |     x1 = 2 * np.array([[1, 1, 1], [1, 1, 0], [1, 0, 1], [0, 1, 1]])
73 |     x2 = 3 * np.eye(3)
74 |     myAssert(distance(x1, x2, pair=True, dist_type="euclidean2"),
75 |              np.array([[  9.,   9.,   9.],
76 |                       [  5.,   5.,  17.],
77 |                       [  5.,  17.,   5.],
78 |                       [ 17.,   5.,   5.]]) )
79 |     myAssert(distance(x1, x2, pair=True, dist_type="normed_euclidean2"),
80 |               np.array([[ 0.84529946,  0.84529946,  0.84529946],
81 |                          [ 0.58578644,  0.58578644,  2.        ],
82 |                          [ 0.58578644,  2.        ,  0.58578644],
83 |                          [ 2.        ,  0.58578644,  0.58578644]]))
84 |     assert distance(x1, x2, pair=True, dist_type="cosine").shape == (4, 3)
85 |     assert distance(x1, x2, pair=True, dist_type="inner_product").shape == (4, 3)
86 |    
87 |     assert np.all(distance(x1, x1[::-1], pair=False, dist_type="euclidean2") == np.array([4, 8, 8, 4]))
88 |     myAssert(distance(x1, x1[::-1], pair=False, dist_type="normed_euclidean2"), np.array([ 0.36700684,  1.,  1.,  0.36700684]))
89 |     myAssert(distance(x1, x1[::-1], pair=False, dist_type="cosine"), np.array([ 0.09175171,  0.25,  0.25,  0.09175171]))
90 |     assert np.all(distance(x1, x1[::-1], pair=False, dist_type="inner_product") == np.array([-8, -4, -4, -8]))
91 | 


--------------------------------------------------------------------------------
/data_provider/pq.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Dataset(object):
  4 |     def __init__(self, dataset, output_dim, code_dim):
  5 |         print ("Initializing Dataset")
  6 |         self._dataset = dataset
  7 |         self.n_samples = dataset.n_samples
  8 |         self._train = dataset.train
  9 |         self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32)
 10 |         self._codes = np.zeros((self.n_samples, code_dim), dtype=np.float32)
 11 | 
 12 |         self._perm = np.arange(self.n_samples)
 13 |         np.random.shuffle(self._perm)
 14 |         self._index_in_epoch = 0
 15 |         self._epochs_complete = 0
 16 |         print ("Dataset already")
 17 |         return
 18 | 
 19 |     def next_batch(self, batch_size):
 20 |         """
 21 |         Args:
 22 |           batch_size
 23 |         Returns:
 24 |           [batch_size, (n_inputs)]: next batch images
 25 |           [batch_size, n_class]: next batch labels
 26 |         """
 27 |         start = self._index_in_epoch
 28 |         self._index_in_epoch += batch_size
 29 |         # Another epoch finish
 30 |         if self._index_in_epoch > self.n_samples:
 31 |             if self._train:
 32 |                 # Training stage need repeating get batch
 33 |                 self._epochs_complete += 1
 34 |                 # Shuffle the data
 35 |                 np.random.shuffle(self._perm)
 36 |                 # Start next epoch
 37 |                 start = 0
 38 |                 self._index_in_epoch = batch_size
 39 |             else:
 40 |                 # Validation stage only process once
 41 |                 start = self.n_samples - batch_size
 42 |                 self._index_in_epoch = self.n_samples
 43 |         end = self._index_in_epoch
 44 | 
 45 |         data, label = self._dataset.data(self._perm[start:end])
 46 |         return (data, label, self.codes[self._perm[start: end], :])
 47 | 
 48 |     def next_batch_output_codes(self, batch_size):
 49 |         start = self._index_in_epoch
 50 |         self._index_in_epoch += batch_size
 51 |         # Another epoch finish
 52 |         if self._index_in_epoch > self.n_samples:
 53 |             if self._train:
 54 |                 # Shuffle the data
 55 |                 np.random.shuffle(self._perm)
 56 |                 # Start next epoch
 57 |                 start = 0
 58 |                 self._index_in_epoch = batch_size
 59 |             else:
 60 |                 # Validation stage only process once
 61 |                 start = self.n_samples - batch_size
 62 |                 self._index_in_epoch = self.n_samples
 63 |         end = self._index_in_epoch
 64 | 
 65 |         return (self.output[self._perm[start: end], :],
 66 |                 self.codes[self._perm[start: end], :])
 67 | 
 68 |     def feed_batch_output(self, batch_size, output):
 69 |         start = self._index_in_epoch - batch_size
 70 |         end = self._index_in_epoch
 71 |         self.output[self._perm[start:end], :] = output
 72 |         return
 73 | 
 74 |     def feed_batch_codes(self, batch_size, codes):
 75 |         """
 76 |         Args:
 77 |           batch_size
 78 |           [batch_size, n_output]
 79 |         """
 80 |         start = self._index_in_epoch - batch_size
 81 |         end = self._index_in_epoch
 82 |         self.codes[self._perm[start:end], :] = codes
 83 |         return
 84 | 
 85 |     @property
 86 |     def output(self):
 87 |         return self._output
 88 | 
 89 |     @property
 90 |     def codes(self):
 91 |         return self._codes
 92 | 
 93 |     @property
 94 |     def label(self):
 95 |         return self._dataset.get_labels()
 96 | 
 97 |     def finish_epoch(self):
 98 |         self._index_in_epoch = 0
 99 |         np.random.shuffle(self._perm)
100 | 
101 | 


--------------------------------------------------------------------------------
/distance/tfversion.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import sys 
 3 | import numpy as np
 4 | 
 5 | def norm(x, keepdims=False):
 6 |     '''
 7 |     Param: 
 8 |         x: matrix of shape (n1, n2, ..., nk) 
 9 |         keepdims: Whether keep dims or not
10 |     Return: norm of matrix of shape (n1, n2, ..., n_{k-1})
11 |     '''
12 |     return tf.sqrt(tf.reduce_sum(tf.square(x), axis=-1, keepdims=keepdims))
13 | 
14 | def normed(x):
15 |     '''
16 |     Param: matrix of shape (n1, n2, ..., nk)
17 |     Return: normed matrix
18 |     '''
19 |     return x / norm(x, keepdims=True)
20 | 
21 | def euclidean2(x1, x2):
22 |     return tf.reduce_sum(tf.square(x1 - x2), axis=-1)
23 | 
24 | def euclidean(x1, x2):
25 |     return tf.sqrt(euclidean2(x1, x2))
26 | 
27 | def averaged_euclidean2(x1, x2):
28 |     return tf.reduce_mean(tf.square(x1 - x2), axis=-1)
29 | 
30 | def averaged_euclidean(x1, x2):
31 |     return tf.sqrt(averaged_euclidean2(x1, x2)) 
32 | 
33 | def normed_euclidean2(x1, x2):
34 |     return euclidean2(normed(x1), normed(x2))
35 | 
36 | def inner_product(x1, x2):
37 |     return - tf.reduce_sum(x1 * x2, axis=-1) 
38 | 
39 | def cosine(x1, x2):
40 |     return (1 + inner_product(normed(x1), normed(x2))) / 2
41 | 
42 | def my_inner_product(x1, x2):
43 |     K = tf.cast(tf.shape(x1)[-1], tf.float32)
44 |     return (1 + inner_product(x1, x2) / K) / 2
45 | 
46 | def my_euclidean(x1, x2):
47 |     K = tf.cast(tf.shape(x1)[-1], tf.float32)
48 |     return euclidean(x1, x2) / tf.sqrt(4 * K)
49 | 
50 | def my_euclidean2(x1, x2):
51 |     K = tf.cast(tf.shape(x1)[-1], tf.float32)
52 |     return euclidean2(x1, x2) / (4 * K)
53 | 
54 | def my_normed_euclidean2(x1, x2):
55 |     return normed_euclidean2(x1, x2) / 4
56 | 
57 | def cos(x1, x2):
58 |     return 1 - 2 * cosine(x1, x2)
59 | 
60 | def distance(x1, x2=None, pair=True, dist_type="euclidean2"):
61 |     '''
62 |     Param: 
63 |         x2: if x2 is None, distance between x1 and x1 will be returned.
64 |         pair: if True, for i, j, x1_i, x2_j will be calculated
65 |               if False, for i, x1_i, x2_i will be calculated, and it requires the dimension of x1 and x2 is same.
66 |         dist_type: distance type, can be euclidean2, normed_euclidean2, inner_product, cosine
67 |     '''
68 |     if x2 is None:
69 |         x2 = x1
70 |     if pair:
71 |         x1 = tf.expand_dims(x1, 1)
72 |         x2 = tf.expand_dims(x2, 0)
73 |     return getattr(sys.modules[__name__], dist_type)(x1, x2)
74 | 
75 | if __name__ == "__main__":
76 |     sess = tf.InteractiveSession()
77 |     def myAssert(x1, x2):
78 |         assert np.mean(x1 - x2) < 1e-8
79 |     x1 = 2 * np.array([[1, 1, 1], [1, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=float)
80 |     x2 = 3 * np.eye(3, dtype=float)
81 |     myAssert(distance(x1, x2, pair=True, dist_type="euclidean2").eval(),
82 |              np.array([[  9.,   9.,   9.],
83 |                       [  5.,   5.,  17.],
84 |                       [  5.,  17.,   5.],
85 |                       [ 17.,   5.,   5.]]) )
86 |     myAssert(distance(x1, x2, pair=True, dist_type="normed_euclidean2").eval(),
87 |               np.array([[ 0.84529946,  0.84529946,  0.84529946],
88 |                          [ 0.58578644,  0.58578644,  2.        ],
89 |                          [ 0.58578644,  2.        ,  0.58578644],
90 |                          [ 2.        ,  0.58578644,  0.58578644]]))
91 |     assert distance(x1, x2, pair=True, dist_type="cosine").eval().shape == (4, 3)
92 |     assert distance(x1, x2, pair=True, dist_type="inner_product").eval().shape == (4, 3)
93 |    
94 |     assert np.all(distance(x1, x1[::-1], pair=False, dist_type="euclidean2").eval() == np.array([4, 8, 8, 4]))
95 |     myAssert(distance(x1, x1[::-1], pair=False, dist_type="normed_euclidean2").eval(), np.array([ 0.36700684,  1.,  1.,  0.36700684]))
96 |     myAssert(distance(x1, x1[::-1], pair=False, dist_type="cosine").eval(), np.array([ 0.09175171,  0.25,  0.25,  0.09175171]))
97 |     assert np.all(distance(x1, x1[::-1], pair=False, dist_type="inner_product").eval() == np.array([-8, -4, -4, -8]))
98 | 


--------------------------------------------------------------------------------
/examples/dhn/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import warnings
 5 | import data_provider.image as dataset
 6 | import model.dhn as model
 7 | from pprint import pprint
 8 | from util import Logger, str2bool
 9 | 
10 | 
11 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21,
12 |               'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10}
13 | 
14 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000,
15 |       'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000}
16 | 
17 | 
18 | def parse_args(argv):
19 |     parser = argparse.ArgumentParser(description='Train and val model')
20 | 
21 |     # algorithm config
22 |     algorithm_group = parser.add_argument_group(title='Algorithm config')
23 |     algorithm_group.add_argument('--output-dim', type=int, default=32)
24 |     algorithm_group.add_argument('--cq-lambda', type=float, default=0.01)
25 |     algorithm_group.add_argument('--alpha', type=float, default=10)
26 |     # network config
27 |     network_group = parser.add_argument_group(title='Network config')
28 |     network_group.add_argument('--gpu_id', type=str, default='0')
29 |     network_group.add_argument('--max-iter', type=int, default=10000)
30 |     network_group.add_argument('--batch-size', type=int, default=128)
31 |     network_group.add_argument('--val-batch-size', type=int, default=100)
32 |     network_group.add_argument('--decay-step', type=int, default=3000)
33 |     network_group.add_argument('--learning-rate', type=float, default=0.0001)
34 |     network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5)
35 |     network_group.add_argument('--network', type=str, default='alexnet')
36 |     network_group.add_argument('--network-weights', type=str)
37 |     network_group.add_argument('--finetune-all', type=str2bool, default=True)
38 |     network_group.add_argument('--test', default=False, action='store_true')
39 |     network_group.add_argument('--debug', default=False, action='store_true')
40 |     # dataset config
41 |     dataset_group = parser.add_argument_group(title='Dataset config')
42 |     dataset_group.add_argument('--dataset', type=str, default='cifar10')
43 |     dataset_group.add_argument('--prefix', type=str, default='1')
44 |     # config process
45 |     config, rest = parser.parse_known_args()
46 |     _dataset = config.dataset
47 |     _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dhn/' + \
48 |         f'{config.prefix}_lambda{config.cq_lambda}_alpha{config.alpha}'
49 |     dataset_group.add_argument('--R', type=int, default=Rs[_dataset])
50 |     dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset])
51 |     dataset_group.add_argument('--save-dir', type=str, default=_save_dir)
52 | 
53 |     return parser.parse_args(argv)
54 | 
55 | 
56 | def main(config):
57 |     warnings.filterwarnings("ignore", category=DeprecationWarning)
58 |     warnings.filterwarnings("ignore", category=FutureWarning)
59 |     os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
60 |     os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id
61 | 
62 |     if not os.path.exists(config.save_dir):
63 |         os.makedirs(config.save_dir)
64 |     sys.stdout = Logger(os.path.join(config.save_dir, 'train.log'))
65 | 
66 |     pprint(vars(config))
67 |     data_root = os.path.join('../../data', config.dataset)
68 |     img_tr = f'{data_root}/train.txt'
69 |     img_te = f'{data_root}/test.txt'
70 |     img_db = f'{data_root}/database.txt'
71 | 
72 |     if config.test == True:
73 |         config.network_weights = os.path.join(config.save_dir, 'network_weights.npy')
74 |     else:
75 |         train_img = dataset.import_train(data_root, img_tr)
76 |         network_weights = model.train(train_img, config)
77 |         config.network_weights = network_weights
78 | 
79 |     query_img, database_img = dataset.import_validation(data_root, img_te, img_db)
80 |     maps = model.validation(database_img, query_img, config)
81 | 
82 |     for key in maps:
83 |         print(f"{key}: {maps[key]}")
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     main(parse_args(sys.argv[1:]))


--------------------------------------------------------------------------------
/data_provider/text/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # ==============================================================================
14 | 
15 | """Routine for decoding the NUS-WIDE binary file format."""
16 | 
17 | import h5py
18 | import numpy as np
19 | 
20 | 
21 | # Process images of this size. Note that this differs from the original nus-wide
22 | # image size of 224 x 224. If one alters this number, then the entire model
23 | # architecture will change and any model would need to be retrained.
24 | 
25 | # Global constants describing the NUS-WIDE data set.
26 | 
27 | class Dataset(object):
28 |     def __init__(self, modal, path, train=True):
29 |         self.lines = open(path, 'r').readlines()
30 |         self.n_samples = len(self.lines)
31 |         self.train = train
32 |         if modal == 'txt':
33 |             self.modal = 'txt'
34 |             self._txt = [0] * self.n_samples
35 |             self._label = [0] * self.n_samples
36 |             self._load = [0] * self.n_samples
37 |             self._load_num = 0
38 |             self._status = 0
39 |             self.data = self.txt_data
40 |             self.all_data = self.txt_all_data
41 | 
42 |     def txt_data(self, index):
43 |         if self._status:
44 |             return (self._txt[index, :], self._label[index, :])
45 |         else:
46 |             ret_txt = []
47 |             ret_label = []
48 |             for i in index:
49 |                 try:
50 |                     if self.train:
51 |                         if not self._load[i]:
52 |                             self._txt[i] = h5py.File(self.lines[i].split('\n')[0], 'r')["data"][0]
53 |                             self._label[i] = [int(j) for j in h5py.File(self.lines[i].split('\n')[0], 'r')['label1'][0]]
54 |                             self._load[i] = 1
55 |                             self._load_num += 1
56 |                         ret_txt.append(self._txt[i])
57 |                         ret_label.append(self._label[i])
58 |                     else:
59 |                         # self._label[i] = [int(j) for j in h5py.File(self.lines[i].split('\n')[0], 'r')['label1'][0]]
60 |                         f = h5py.File(self.lines[i].split('\n')[0], 'r')
61 |                         ret_txt.append(f["data"][0])
62 |                         ret_label.append([int(j) for j in f['label1'][0]])
63 |                         f.close()
64 |                 except:
65 |                     print('cannot open', self.lines[i].split('\n')[0])
66 | 
67 |             if self._load_num == self.n_samples:
68 |                 self._status = 1
69 |                 self._txt = np.reshape(np.asarray(self._txt), (self.n_samples, len(self._txt[0])))
70 |                 self._label = np.asarray(self._label)
71 |             return np.reshape(np.asarray(ret_txt), (len(index), -1)), np.asarray(ret_label)
72 | 
73 |     def txt_all_data(self):
74 |         if self._status:
75 |             return (self._txt, self._label)
76 | 
77 |     def get_labels(self):
78 |         for i in range(self.n_samples):
79 |             if self._label[i] == 0:
80 |                 if self.modal == 'img':
81 |                     self._label[i] = [int(j) for j in self.lines[i].strip().split()[1:]]
82 |                 elif self.modal == 'txt':
83 |                     f = h5py.File(self.lines[i].split('\n')[0], 'r')
84 |                     self._label[i] = [int(j) for j in f['label1'][0]]
85 |                     f.close()
86 |         return np.asarray(self._label)
87 | 
88 | 
89 | def import_train(txt_tr):
90 |     return (Dataset('txt', txt_tr, train=True))
91 | 
92 | 
93 | def import_validation(txt_te, txt_db):
94 |     return (Dataset('txt', txt_te, train=False),
95 |             Dataset('txt', txt_db, train=False))
96 | 


--------------------------------------------------------------------------------
/examples/dtq/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.io as sio
 3 | import warnings
 4 | import data_provider.image as dataset
 5 | import model.dtq as model
 6 | from pprint import pprint
 7 | import os
 8 | import argparse
 9 | 
10 | warnings.filterwarnings("ignore", category = DeprecationWarning)
11 | warnings.filterwarnings("ignore", category = FutureWarning)
12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
13 | 
14 | parser = argparse.ArgumentParser(description='Triplet Hashing')
15 | parser.add_argument('--lr', '--learning-rate', default=0.00003, type=float)
16 | parser.add_argument('--triplet-margin', default=30, type=float)
17 | parser.add_argument('--select-strategy', default='margin', choices=['hard', 'all', 'margin'])
18 | parser.add_argument('--output-dim', default=64, type=int)   # 256, 128
19 | parser.add_argument('--epochs', default=100, type=int)
20 | parser.add_argument('--cq-lambda', default=0, type=float)
21 | parser.add_argument('--n-subspace', default=4, type=int)
22 | parser.add_argument('--n-subcenter', default=256, type=int)
23 | parser.add_argument('--dataset', default='cifar10', type=str)
24 | parser.add_argument('--gpus', default='0,1,2,3', type=str)
25 | parser.add_argument('--log-dir', default='tflog', type=str)
26 | parser.add_argument('--dist-type', default='euclidean2', type=str,
27 |                     choices=['euclidean2', 'cosine', 'inner_product', 'euclidean'])
28 | parser.add_argument('-b', '--batch-size', default=128, type=int)
29 | parser.add_argument('-vb', '--val-batch-size', default=16, type=int)
30 | parser.add_argument('--decay-step', default=10000, type=int)
31 | parser.add_argument('--decay-factor', default=0.1, type=int)
32 | 
33 | tanh_parser = parser.add_mutually_exclusive_group(required=False)
34 | tanh_parser.add_argument('--with-tanh', dest='with_tanh', action='store_true')
35 | tanh_parser.add_argument('--without-tanh', dest='with_tanh', action='store_false')
36 | parser.set_defaults(with_tanh=True)
37 | 
38 | parser.add_argument('--img-model', default='alexnet', type=str)
39 | parser.add_argument('--model-weights', type=str,
40 |                     default='../../deephash/architecture/pretrained_model/reference_pretrain.npy')
41 | parser.add_argument('--finetune-all', default=True, type=bool)
42 | parser.add_argument('--max-iter-update-b', default=3, type=int)
43 | parser.add_argument('--max-iter-update-Cb', default=1, type=int)
44 | parser.add_argument('--code-batch-size', default=500, type=int)
45 | parser.add_argument('--n-part', default=20, type=int)
46 | parser.add_argument('--triplet-thresold', default=64000, type=int)
47 | parser.add_argument('--save-dir', default="./models/", type=str)
48 | parser.add_argument('--data-dir', default="../../data/", type=str)
49 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true')
50 | parser.add_argument('--val-freq', default=1, type=int)
51 | 
52 | args = parser.parse_args()
53 | 
54 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
55 | 
56 | label_dims = {'cifar10': 10, 'nuswide_81': 81, 'coco': 80, 'imagenet': 100}
57 | Rs = {'cifar10': 54000, 'nuswide_81': 5000, 'coco': 5000, 'imagenet': 5000}
58 | args.R = Rs[args.dataset]
59 | args.label_dim = label_dims[args.dataset]
60 | 
61 | args.img_tr = os.path.join(args.data_dir, args.dataset, "train.txt")
62 | args.img_te = os.path.join(args.data_dir, args.dataset, "test.txt")
63 | args.img_db = os.path.join(args.data_dir, args.dataset, "database.txt")
64 | 
65 | pprint(vars(args))
66 | 
67 | data_root = os.path.join(args.data_dir, args.dataset)
68 | query_img, database_img = dataset.import_validation(data_root, args.img_te, args.img_db)
69 | 
70 | # if not args.evaluate:
71 | #     train_img = dataset.import_train(data_root, args.img_tr)
72 | #     model_weights = model.train(train_img, database_img, query_img, args)
73 | #     args.model_weights = model_weights
74 | # else:
75 | #     maps = model.validation(database_img, query_img, args)
76 | #     for key in maps:
77 | #         print(("{}\t{}".format(key, maps[key])))
78 | 
79 | train_img = dataset.import_train(data_root, args.img_tr)
80 | model_weights = model.train(train_img, query_img, database_img, args)
81 | 
82 | args.model_weights = model_weights
83 | maps = model.validation(query_img, database_img, args)
84 | for key in maps:
85 |     print(("{}\t{}".format(key, maps[key])))
86 | 
87 | pprint(vars(args))
88 | 


--------------------------------------------------------------------------------
/data_provider/image/__init__.py:
--------------------------------------------------------------------------------
  1 | # =============================================================================
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Routine for decoding the NUS-WIDE binary file format."""
 17 | 
 18 | import os
 19 | import cv2
 20 | import numpy as np
 21 | 
 22 | 
 23 | # Process images of this size. Note that this differs from the original nus-wide
 24 | # image size of 224 x 224. If one alters this number, then the entire model
 25 | # architecture will change and any model would need to be retrained.
 26 | 
 27 | # Global constants describing the NUS-WIDE data set.
 28 | 
 29 | 
 30 | class Dataset(object):
 31 |     def __init__(self, modal, data_root, path, train=True):
 32 |         self.lines = open(path, 'r').readlines()
 33 |         self.data_root = data_root
 34 |         self.n_samples = len(self.lines)
 35 |         self.train = train
 36 |         assert modal == 'img'
 37 |         self.modal = 'img'
 38 |         self._img = [0] * self.n_samples
 39 |         self._label = [0] * self.n_samples
 40 |         self._load = [0] * self.n_samples
 41 |         self._load_num = 0
 42 |         self._status = 0
 43 |         self.data = self.img_data
 44 |         self.all_data = self.img_all_data
 45 | 
 46 |     def get_img(self, i):
 47 |         path = os.path.join(self.data_root, self.lines[i].strip().split()[0])
 48 |         return cv2.resize(cv2.imread(path), (256, 256))
 49 | 
 50 |     def get_label(self, i):
 51 |         return [int(j) for j in self.lines[i].strip().split()[1:]]
 52 | 
 53 |     def img_data(self, indexes):
 54 |         if self._status:
 55 |             return (self._img[indexes, :], self._label[indexes, :])
 56 |         else:
 57 |             ret_img = []
 58 |             ret_label = []
 59 |             for i in indexes:
 60 |                 try:
 61 |                     if self.train:
 62 |                         if not self._load[i]:
 63 |                             self._img[i] = self.get_img(i)
 64 |                             self._label[i] = self.get_label(i)
 65 |                             self._load[i] = 1
 66 |                             self._load_num += 1
 67 |                         ret_img.append(self._img[i])
 68 |                         ret_label.append(self._label[i])
 69 |                     else:
 70 |                         self._label[i] = self.get_label(i)
 71 |                         ret_img.append(self.get_img(i))
 72 |                         ret_label.append(self._label[i])
 73 |                 except Exception as e:
 74 |                     print('cannot open {}, exception: {}'.format(self.lines[i].strip(), e))
 75 | 
 76 |             if self._load_num == self.n_samples:
 77 |                 self._status = 1
 78 |                 self._img = np.asarray(self._img)
 79 |                 self._label = np.asarray(self._label)
 80 |             return (np.asarray(ret_img), np.asarray(ret_label))
 81 | 
 82 |     def img_all_data(self):
 83 |         if self._status:
 84 |             return (self._img, self._label)
 85 | 
 86 |     def get_labels(self):
 87 |         for i in range(self.n_samples):
 88 |             if self._label[i] is not list:
 89 |                 self._label[i] = [int(j)
 90 |                                   for j in self.lines[i].strip().split()[1:]]
 91 |         return np.asarray(self._label)
 92 | 
 93 | 
 94 | def import_train(data_root, img_tr):
 95 |     '''
 96 |     return (img_tr, txt_tr)
 97 |     '''
 98 |     return (Dataset('img', data_root, img_tr, train=True))
 99 | 
100 | 
101 | def import_validation(data_root, img_te, img_db):
102 |     '''
103 |     return (img_te, txt_te, img_db, txt_db)
104 |     '''
105 |     return (Dataset('img', data_root, img_te, train=False),
106 |             Dataset('img', data_root, img_db, train=False))
107 | 


--------------------------------------------------------------------------------
/examples/dhcs/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import warnings
 5 | import data_provider.image as dataset
 6 | import model.dhcs as model
 7 | from pprint import pprint
 8 | from util import Logger, str2bool
 9 | 
10 | 
11 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21,
12 |               'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10}
13 | 
14 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000,
15 |       'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000}
16 | 
17 | 
18 | def parse_args(argv):
19 |     parser = argparse.ArgumentParser(description='Train and val model')
20 | 
21 |     # algorithm config
22 |     algorithm_group = parser.add_argument_group(title='Algorithm config')
23 |     algorithm_group.add_argument('--bit', type=int, default=32)
24 |     algorithm_group.add_argument('--q-lambda', type=float, default=0.01)
25 |     algorithm_group.add_argument('--b-lambda', type=float, default=0.0)
26 |     algorithm_group.add_argument('--i-lambda', type=float, default=0.0)
27 |     algorithm_group.add_argument('--alpha', type=float, default=5)
28 |     # network config
29 |     network_group = parser.add_argument_group(title='Network config')
30 |     network_group.add_argument('--gpus', type=str, default='0')
31 |     network_group.add_argument('--max-iter', type=int, default=10000)
32 |     network_group.add_argument('--batch-size', type=int, default=128)
33 |     network_group.add_argument('--val-batch-size', type=int, default=100)
34 |     network_group.add_argument('--lr', type=float, default=0.0001)
35 |     network_group.add_argument('--lr-decay-factor', type=float, default=0.5)
36 |     network_group.add_argument('--decay-step', type=int, default=3000)
37 |     network_group.add_argument('--network', type=str, default='alexnet')
38 |     network_group.add_argument('--network-weights', type=str)
39 |     network_group.add_argument('--finetune-all',  type=str2bool, default=True)
40 |     network_group.add_argument('--test', default=False, action='store_true')
41 |     network_group.add_argument('--debug', default=False, action='store_true')
42 |     # dataset config
43 |     dataset_group = parser.add_argument_group(title='Dataset config')
44 |     dataset_group.add_argument('--dataset', type=str, default='cifar10')
45 |     dataset_group.add_argument('--prefix', type=str, default='1')
46 |     dataset_group.add_argument('--suffix', type=str, default='exp')
47 |     # config process
48 |     config, rest = parser.parse_known_args()
49 |     _dataset = config.dataset
50 |     _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.bit}bit_{config.suffix}/' + \
51 |                 f'{config.prefix}_q{config.q_lambda}'
52 |     dataset_group.add_argument('--R', type=int, default=Rs[_dataset])
53 |     dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset])
54 |     dataset_group.add_argument('--save-dir', type=str, default=_save_dir)
55 | 
56 |     return parser.parse_args(argv)
57 | 
58 | 
59 | def main(config):
60 |     warnings.filterwarnings("ignore", category=DeprecationWarning)
61 |     warnings.filterwarnings("ignore", category=FutureWarning)
62 |     os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
63 |     os.environ["CUDA_VISIBLE_DEVICES"] = config.gpus
64 | 
65 |     if not os.path.exists(config.save_dir):
66 |         os.makedirs(config.save_dir)
67 |     sys.stdout = Logger(os.path.join(config.save_dir, 'train.log'))
68 | 
69 |     pprint(vars(config))
70 |     data_root = os.path.join('../../data', config.dataset)
71 |     config.wordvec_dict = f'{data_root}/wordvec.txt'
72 |     img_tr = f'{data_root}/train.txt'
73 |     img_te = f'{data_root}/test.txt'
74 |     img_db = f'{data_root}/database.txt'
75 | 
76 |     if config.test == True:
77 |         # config.save_dir = '../snapshot/cifar10_alexnet_32bit_hyper_sigmoid/debug'
78 |         config.network_weights = os.path.join(config.save_dir, 'network_weights.npy')
79 |     else:
80 |         train_img = dataset.import_train(data_root, img_tr)
81 |         network_weights = model.train(train_img, config)
82 |         config.network_weights = network_weights
83 | 
84 |     sys.stdout = Logger(os.path.join(config.save_dir, 'test.log'))
85 |     query_img, database_img = dataset.import_validation(data_root, img_te, img_db)
86 |     maps = model.validation(database_img, query_img, config)
87 | 
88 |     for key in maps:
89 |         print(f"{key}: {maps[key]}")
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main(parse_args(sys.argv[1:]))


--------------------------------------------------------------------------------
/architecture/vgg_f.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import scipy.io
  3 | import scipy.misc
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | 
  8 | def img_vgg_f(input_image, bit, model_weights=None):
  9 | 	if model_weights is None:
 10 | 		dir_path = os.path.dirname(os.path.realpath(__file__))
 11 | 		model_weights = os.path.join(dir_path, "pretrained_model/imagenet-vgg-f.mat")
 12 | 
 13 | 	print("loading model from ", model_weights)
 14 | 	data = scipy.io.loadmat(model_weights)
 15 |     
 16 | 	layers = (
 17 | 		'conv1', 'relu1', 'norm1', 'pool1','conv2', 'relu2', 'norm2', 'pool2','conv3', 'relu3', 'conv4', 'relu4', 'conv5',
 18 | 		'relu5', 'pool5','fc6', 'relu6', 'fc7', 'relu7','fc8')
 19 | 	weights = data['layers'][0]
 20 | 	mean = data['normalization'][0][0][0]
 21 | 	net = {}
 22 | 	ops = []
 23 | 	current = tf.convert_to_tensor(input_image,dtype='float32')
 24 | 	for i, name in enumerate(layers[:-1]):
 25 | 		if name.startswith('conv'):
 26 | 			kernels, bias = weights[i][0][0][0][0]
 27 | 			# matconvnet: weights are [width, height, in_channels, out_channels]
 28 | 			# tensorflow: weights are [height, width, in_channels, out_channels]
 29 | 			#kernels = np.transpose(kernels, (1, 0, 2, 3))
 30 | 
 31 | 			bias = bias.reshape(-1)
 32 | 			pad = weights[i][0][0][1]
 33 | 			stride = weights[i][0][0][4]
 34 | 			current = _conv_layer(current,kernels,bias,pad,stride,i,ops,net)
 35 | 		elif name.startswith('relu'):
 36 | 			current = tf.nn.relu(current)
 37 | 		elif name.startswith('pool'):
 38 | 			stride = weights[i][0][0][1]
 39 | 			pad = weights[i][0][0][2]
 40 | 			area = weights[i][0][0][5]
 41 | 			current = _pool_layer(current,stride,pad,area)
 42 | 		elif name.startswith('fc'):
 43 | 			kernels, bias = weights[i][0][0][0][0]
 44 | 			# matconvnet: weights are [width, height, in_channels, out_channels]
 45 | 			# tensorflow: weights are [height, width, in_channels, out_channels]
 46 | 			#kernels = np.transpose(kernels, (1, 0, 2, 3))
 47 | 
 48 | 			bias = bias.reshape(-1)
 49 | 			current = _full_conv(current,kernels,bias,i,ops,net)
 50 | 		elif name.startswith('norm'):
 51 | 			current = tf.nn.local_response_normalization(current, depth_radius=2, bias=2.000, alpha=0.0001, beta=0.75)
 52 | 		net[name] = current
 53 | 	W_fc8 = tf.random_normal([4096, bit], stddev=1.0) * 0.01
 54 | 
 55 | 	b_fc8 = tf.random_normal([bit],stddev = 1.0) * 0.01
 56 | 	w = tf.Variable(W_fc8, name='w' + str(20))
 57 | 	b = tf.Variable(b_fc8, name='bias' + str(20))
 58 | 
 59 | 	### debugging...................
 60 | 	# layer8 = scipy.io.loadmat('data/wb-image.mat')
 61 | 	#
 62 | 	# w = tf.Variable(np.squeeze(layer8['w8']) * 0.01, name='w' + str(20))
 63 | 	# b = tf.Variable(np.squeeze(layer8['b8']) * 0.01, name='bias' + str(20))
 64 | 
 65 | 	ops.append(w)
 66 | 	ops.append(b)
 67 |     
 68 | 	fc8 = tf.matmul(tf.squeeze(current),w) + b
 69 | 	net['weigh21'] = w
 70 | 	net['b21'] = b
 71 | 	net[layers[-1]] = fc8
 72 | 	return net, mean
 73 | 
 74 | def _conv_layer(input, weights, bias,pad,stride,i,ops,net):
 75 | 	pad = pad[0]
 76 | 	stride= stride[0]
 77 | 	input = tf.pad(input, [[0, 0], [pad[0], pad[1]], [pad[2], pad[3]], [0, 0]], "CONSTANT")
 78 | 	w = tf.Variable(weights,name='w'+str(i),dtype='float32')
 79 | 	b = tf.Variable(bias,name='bias'+str(i),dtype='float32')
 80 | 	ops.append(w)
 81 | 	ops.append(b)
 82 | 	net['weights' + str(i)] = w
 83 | 	net['b' + str(i)] = b
 84 | 	conv = tf.nn.conv2d(input, w, strides=[1,stride[0],stride[1],1],padding='VALID',name='conv'+str(i))
 85 | 	return tf.nn.bias_add(conv, b,name='add'+str(i))
 86 | 
 87 | def _full_conv(input, weights, bias,i,ops,net):
 88 | 	w = tf.Variable(weights, name='w' + str(i),dtype='float32')
 89 | 	b = tf.Variable(bias, name='bias' + str(i),dtype='float32')
 90 | 	ops.append(w)
 91 | 	ops.append(b)
 92 | 	net['weights' + str(i)] = w
 93 | 	net['b' + str(i)] = b
 94 | 	conv = tf.nn.conv2d(input, w,strides=[1,1,1,1],padding='VALID',name='fc'+str(i))
 95 | 	return tf.nn.bias_add(conv, b,name='add'+str(i))
 96 | 
 97 | def _pool_layer(input,stride,pad,area):
 98 | 	pad = pad[0]
 99 | 	area = area[0]
100 | 	stride = stride[0]
101 | 	input = tf.pad(input, [[0, 0], [pad[0], pad[1]], [pad[2], pad[3]], [0, 0]], "CONSTANT")
102 | 	return tf.nn.max_pool(input, ksize=[1, area[0], area[1], 1], strides=[1,stride[0],stride[1],1],padding='VALID')
103 | 
104 | def preprocess(image, mean_pixel):
105 | 	return image - mean_pixel
106 | 
107 | 
108 | def unprocess(image, mean_pixel):
109 | 	return image + mean_pixel
110 | 
111 | def get_meanpix(data_path):
112 | 	data = scipy.io.loadmat(data_path)
113 | 	mean = data['normalization'][0][0][0]
114 | 	return mean
115 | 


--------------------------------------------------------------------------------
/util/visualize.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import torch
  5 | import torch.optim as optim
  6 | from scipy.spatial import distance
  7 | from sklearn import manifold
  8 | 
  9 | 
 10 | def plot_distribution(data, path):
 11 |     N, D = data.shape
 12 |     plt.figure(figsize=(32, D));
 13 |     for i in range(1, D+1):
 14 |         plt.subplot(D//4, 4, i);
 15 |         commutes = pd.Series(data[:, i-1])
 16 |         commutes.plot.hist(grid=True, bins=200, rwidth=0.9, color='#607c8e');
 17 |         plt.title(f'{i}bit')
 18 |     plt.savefig(f"{path}/data_distribution.png")
 19 | 
 20 |     res = ''
 21 |     for i in range(1, 11):
 22 |         t = i / 10
 23 |         ratio = (np.sum(data.flatten()>=t) + np.sum(data.flatten()<=-t)) / (N * D)
 24 |         res += f'threshold: {t:.1f}, quantizaion ratio: {ratio:.5f}\n'
 25 |     return res
 26 |     
 27 | 
 28 | def plot_distance(db_feats, db_label, query_feats, query_label, path):
 29 |     S = np.matmul(db_label, query_label.transpose())
 30 |     N = np.sum(S==1)
 31 | 
 32 |     plt.figure(figsize=[16, 6])
 33 |     plt.subplot(121)
 34 |     cosine_32bit = distance.cdist(db_feats, query_feats, metric='cosine') / 2
 35 |     plt.title('cosine distribution')
 36 |     commutes = pd.Series(np.random.choice(cosine_32bit[S==1].flatten(), N))
 37 |     commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#607c8e', alpha=0.7);
 38 |     commutes = pd.Series(np.random.choice(cosine_32bit[S==0].flatten(), N))
 39 |     commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#500c8e', alpha=0.7);
 40 | 
 41 |     plt.subplot(122)
 42 |     euclidean_32bit = distance.cdist(db_feats, query_feats, metric='euclidean')
 43 |     plt.title('euclidean distribution')
 44 |     commutes = pd.Series(np.random.choice(euclidean_32bit[S==1].flatten(), N))
 45 |     commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#607c8e', alpha=0.7);
 46 |     commutes = pd.Series(np.random.choice(euclidean_32bit[S==0].flatten(), N))
 47 |     commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#500c8e', alpha=0.7);
 48 | 
 49 |     plt.savefig(f"{path}/distance_distribution.png")
 50 | 
 51 | 
 52 | def plot_tsne(data, label, path, R=2000):
 53 |     if label.shape[1] != 10:
 54 |         return
 55 |     label2name = np.array(['airplane', 'automobile', 'bird', 'cat', 'deer', \
 56 |                            'dog', 'frog', 'horse', 'ship', 'truck'])
 57 |     label2color = np.array([(1,0,0), (0,1,0), (0,0,1), (1,0,1), (1,1,0), \
 58 |                             (0,1,1), (1,0.5,0), (0,0,0), (0.75,0.75,0.75), (0.25,0.5,0.5)])
 59 |     if label.ndim > 1:
 60 |         label = label.argmax(axis=1)
 61 |     plt.figure(figsize=(16, 12));
 62 |     embed = TSNE(n_components=2, perplexity=30, lr=1, eps=1e-9, n_iter=2000, device='cuda').fit_transform(data[:R])
 63 |     plt.scatter(embed[:, 0], embed[:, 1], c=label2color[label[:R]], s=10)
 64 |     plt.savefig(f"{path}/data_t-SNE.png")
 65 | 
 66 | 
 67 | class TSNE(object):
 68 |     
 69 |     def __init__(self, n_components=2, perplexity=30, lr=1, eps=1e-9, n_iter=2000, device='cpu'):
 70 |         self.perplexity = perplexity
 71 |         self.lr = lr
 72 |         self.eps = eps
 73 |         self.n_iter = n_iter
 74 |         self.device = device
 75 |         self.n_components = n_components
 76 |     
 77 |     def t_distribution(self, y):
 78 |         n = y.shape[0]
 79 |         dist = torch.sum((y.reshape(n, 1, -1) - y.reshape(1, n, -1)) ** 2, -1)
 80 |         affinity = 1 / (1 + dist)
 81 |         affinity *= (1 - torch.eye(n, device=self.device))  # set diag to zero
 82 |         q = affinity / affinity.sum() + self.eps
 83 |         return q
 84 |     
 85 |     def fit_transform(self, x):
 86 |         dist2 = distance.squareform(distance.pdist(x, metric='sqeuclidean'))
 87 |         p = distance.squareform(manifold.t_sne._joint_probabilities(dist2, self.perplexity, False)) + self.eps
 88 | 
 89 |         p = torch.tensor(p, device=self.device, dtype=torch.float32).reshape(-1)
 90 |         log_p = torch.log(p)
 91 | 
 92 |         y = torch.randn([dist2.shape[0], self.n_components], device=self.device, requires_grad=True)
 93 |         optimizer = optim.Adam([y], lr=self.lr)
 94 |         criterion = torch.nn.KLDivLoss()
 95 | 
 96 |         for i_iter in range(self.n_iter):
 97 |             q = self.t_distribution(y).reshape(-1)
 98 |             loss =  (p * (log_p - torch.log(q))).sum()
 99 |             optimizer.zero_grad()
100 |             loss.backward()
101 |             optimizer.step()
102 | 
103 |         return y.detach().cpu().numpy()
104 | 


--------------------------------------------------------------------------------
/examples/dqn/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import warnings
 5 | import data_provider.image as dataset
 6 | import model.dqn as model
 7 | from pprint import pprint
 8 | from util import Logger, str2bool
 9 | 
10 | 
11 | warnings.filterwarnings("ignore", category=DeprecationWarning)
12 | warnings.filterwarnings("ignore", category=FutureWarning)
13 | 
14 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21,
15 |               'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10}
16 | 
17 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000,
18 |       'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000}
19 | 
20 | 
21 | def parse_args(argv):
22 |     parser = argparse.ArgumentParser(description='Train and val')
23 | 
24 |     # algorithm config
25 |     algorithm_group = parser.add_argument_group(title='Algorithm config')
26 |     algorithm_group.add_argument('--output-dim', type=int, default=64)
27 |     algorithm_group.add_argument('--max-iter-update-b', type=int, default=3)
28 |     algorithm_group.add_argument('--max-iter-update-Cb', type=int, default=1)
29 |     algorithm_group.add_argument('--cq-lambda', type=float, default=0.0001)
30 |     algorithm_group.add_argument('--code-batch-size', type=int, default=500)
31 |     algorithm_group.add_argument('--n-subspace', type=int, default=4)
32 |     algorithm_group.add_argument('--n-subcenter', type=int, default=256)
33 |     # network config
34 |     network_group = parser.add_argument_group(title='Network config')
35 |     network_group.add_argument('--gpu_id', type=str, default='0')
36 |     network_group.add_argument('--max-iter', type=int, default=5000)
37 |     network_group.add_argument('--batch-size', type=int, default=256)
38 |     network_group.add_argument('--val-batch-size', type=int, default=100)
39 |     network_group.add_argument('--decay-step', type=int, default=1000, help='Epochs after which learning rate decays')
40 |     network_group.add_argument('--learning-rate', type=float, default=0.002) # 0.02 for DVSQ, 0.002 for DQN
41 |     network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5, help='Learning rate decay factor')
42 |     network_group.add_argument('--network', type=str, default='alexnet')
43 |     network_group.add_argument('--network-weights', type=str)
44 |     network_group.add_argument('--finetune-all', type=str2bool, default=True)
45 |     network_group.add_argument('--test', default=False, action='store_true')
46 |     network_group.add_argument('--debug', default=False, action='store_true')
47 |     # dataset config
48 |     dataset_group = parser.add_argument_group(title='Dataset config')
49 |     dataset_group.add_argument('--dataset', type=str, default='cifar10')
50 |     dataset_group.add_argument('--prefix', type=str, default='1')
51 |     # config process
52 |     config, rest = parser.parse_known_args()
53 |     _dataset = config.dataset
54 |     _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dqn/' + \
55 |             f'{config.prefix}_subspace{config.n_subspace}_subcenter{config.n_subcenter}'
56 |     dataset_group.add_argument('--R', type=int, default=Rs[_dataset])
57 |     dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset])
58 |     dataset_group.add_argument('--save-dir', type=str, default=_save_dir)
59 | 
60 |     return parser.parse_args(argv)
61 | 
62 | 
63 | def main(config):
64 |     warnings.filterwarnings("ignore", category=DeprecationWarning)
65 |     warnings.filterwarnings("ignore", category=FutureWarning)
66 |     os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
67 |     os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id
68 | 
69 |     if not os.path.exists(config.save_dir):
70 |         os.makedirs(config.save_dir)
71 |     sys.stdout = Logger(os.path.join(config.save_dir, 'train.log'))
72 | 
73 |     pprint(vars(config))
74 |     data_root = os.path.join('../../data', config.dataset)
75 |     img_tr = f'{data_root}/train.txt'
76 |     img_te = f'{data_root}/test.txt'
77 |     img_db = f'{data_root}/database.txt'
78 | 
79 |     if config.test == True:
80 |         # config.network_weights = os.path.join(config.save_dir, 'network_weights.npy')
81 |         config.network_weights = './models/lr0.002_cq0.0001_ss4_sc256_d64_cifar10.npy'
82 |     else:
83 |         train_img = dataset.import_train(data_root, img_tr)
84 |         network_weights = model.train(train_img, config)
85 |         config.network_weights = network_weights
86 | 
87 |     query_img, database_img = dataset.import_validation(data_root, img_te, img_db)
88 |     maps = model.validation(database_img, query_img, config)
89 | 
90 |     for key in maps:
91 |         print(f"{key}: {maps[key]}")
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main(parse_args(sys.argv[1:]))


--------------------------------------------------------------------------------
/examples/dvsq/train_val_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import warnings
 5 | import data_provider.image as dataset
 6 | import model.dvsq as model
 7 | from pprint import pprint
 8 | from util import Logger, str2bool
 9 | 
10 | 
11 | warnings.filterwarnings("ignore", category=DeprecationWarning)
12 | warnings.filterwarnings("ignore", category=FutureWarning)
13 | 
14 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21,
15 |               'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10}
16 | 
17 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000,
18 |       'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000}
19 | 
20 | 
21 | def parse_args(argv):
22 |     parser = argparse.ArgumentParser(description='Train and val')
23 | 
24 |     # algorithm config
25 |     algorithm_group = parser.add_argument_group(title='Algorithm config')
26 |     algorithm_group.add_argument('--output-dim', type=int, default=300)
27 |     algorithm_group.add_argument('--max-iter-update-b', type=int, default=3)
28 |     algorithm_group.add_argument('--max-iter-update-Cb', type=int, default=1)
29 |     algorithm_group.add_argument('--cq-lambda', type=float, default=0.0001)
30 |     algorithm_group.add_argument('--code-batch-size', type=int, default=500)
31 |     algorithm_group.add_argument('--n-subspace', type=int, default=4)
32 |     algorithm_group.add_argument('--n-subcenter', type=int, default=256)
33 |     algorithm_group.add_argument('--margin', type=int, default=0.7)
34 |     # network config
35 |     network_group = parser.add_argument_group(title='Network config')
36 |     network_group.add_argument('--gpu_id', type=str, default='0')
37 |     network_group.add_argument('--max-iter', type=int, default=5000)
38 |     network_group.add_argument('--batch-size', type=int, default=256)
39 |     network_group.add_argument('--val-batch-size', type=int, default=100)
40 |     network_group.add_argument('--decay-step', type=int, default=1000, help='Epochs after which learning rate decays')
41 |     network_group.add_argument('--learning-rate', type=float, default=0.02) # 0.02 for DVSQ, 0.002 for DQN
42 |     network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5, help='Learning rate decay factor')
43 |     network_group.add_argument('--network', type=str, default='alexnet')
44 |     network_group.add_argument('--network-weights', type=str)
45 |     network_group.add_argument('--finetune-all', type=str2bool, default=True)
46 |     network_group.add_argument('--test', default=False, action='store_true')
47 |     network_group.add_argument('--debug', default=False, action='store_true')
48 |     # dataset config
49 |     dataset_group = parser.add_argument_group(title='Dataset config')
50 |     dataset_group.add_argument('--dataset', type=str, default='cifar10')
51 |     dataset_group.add_argument('--prefix', type=str, default='1')
52 |     # config process
53 |     config, rest = parser.parse_known_args()
54 |     _dataset = config.dataset
55 |     _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dvsq/' + \
56 |             f'{config.prefix}_margin{config.margin}_subspace{config.n_subspace}_subcenter{config.n_subcenter}'
57 |     dataset_group.add_argument('--R', type=int, default=Rs[_dataset])
58 |     dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset])
59 |     dataset_group.add_argument('--save-dir', type=str, default=_save_dir)
60 |     dataset_group.add_argument('--wordvec-dict', type=str, default="../../data/cifar10/wordvec.txt")
61 | 
62 |     return parser.parse_args(argv)
63 | 
64 | 
65 | def main(config):
66 |     warnings.filterwarnings("ignore", category=DeprecationWarning)
67 |     warnings.filterwarnings("ignore", category=FutureWarning)
68 |     os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
69 |     os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id
70 | 
71 |     if not os.path.exists(config.save_dir):
72 |         os.makedirs(config.save_dir)
73 |     sys.stdout = Logger(os.path.join(config.save_dir, 'train.log'))
74 | 
75 |     pprint(vars(config))
76 |     data_root = os.path.join('../../data', config.dataset)
77 |     config.wordvec_dict = f'{data_root}/wordvec.txt'
78 |     img_tr = f'{data_root}/train.txt'
79 |     img_te = f'{data_root}/test.txt'
80 |     img_db = f'{data_root}/database.txt'
81 | 
82 |     if config.test == True:
83 |         config.network_weights = os.path.join(config.save_dir, 'network_weights.npy')
84 |     else:
85 |         train_img = dataset.import_train(data_root, img_tr)
86 |         network_weights = model.train(train_img, config)
87 |         config.network_weights = network_weights
88 | 
89 |     query_img, database_img = dataset.import_validation(data_root, img_te, img_db)
90 |     maps = model.validation(database_img, query_img, config)
91 | 
92 |     for key in maps:
93 |         print(f"{key}: {maps[key]}")
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     main(parse_args(sys.argv[1:]))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeepHash-tensorflow
  2 | 
  3 | This code repository is derived from [thulab/DeepHash](https://github.com/thulab/DeepHash) but adds more features.
  4 | 
  5 | See the commits and update logs for details.
  6 | 
  7 | ## Update logs:
  8 | - Metrics
  9 |     - add WhRank
 10 |     - add mAP_finetune
 11 |     - add RAMAP
 12 | - Backbone
 13 |     - add VGG-F network
 14 | - Loss functions
 15 |     - add Independent loss
 16 |     - add balance loss 
 17 |     - add orthogonal loss
 18 |     - add exp loss
 19 |     - add quantization losses, *i.e.*, L1 loss、L2 loss、cauchy loss
 20 | - Tools
 21 |     - add visualization tool such as T-SNE, *etc*
 22 |     - add distance libs
 23 | - Others
 24 |     - refactoring the `data_provider` code
 25 |     - refactoring the `model` code
 26 |     - refactoring `parser`  code
 27 |     - training acceleration
 28 | 
 29 | ## Contacts
 30 | Maintainers of this library:
 31 | * Shen Chen, Email: chenshen@stu.xmu.edu.cn
 32 | 
 33 | ---
 34 | DeepHash is a lightweight deep learning to hash library that implements state-of-the-art deep hashing/quantization algorithms. We will implement more representative deep hashing models continuously according to our released [deep hashing paper list](https://github.com/caoyue10/DeepHashingBaselines). Specifically, we welcome other researchers to contribute deep hashing models into this toolkit based on our framework. We will announce the contribution in this project.
 35 | 
 36 | The implemented models include: 
 37 | 
 38 | * DQN: [Deep Quantization Network for Efficient Image Retrieval](http://yue-cao.me/doc/deep-quantization-networks-dqn-aaai16.pdf), Yue Cao, Mingsheng Long, Jianmin Wang, Han Zhu, Qingfu Wen, AAAI Conference on Artificial Intelligence (AAAI), 2016
 39 | * DHN: [Deep Hashing Network for Efficient Similarity Retrieval](http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-hashing-network-aaai16.pdf), Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao, AAAI Conference on Artificial Intelligence (AAAI), 2016
 40 | * DVSQ: [Deep Visual-Semantic Quantization for Efficient Image Retrieval](http://yue-cao.me/doc/deep-visual-semantic-quantization-cvpr17.pdf), Yue Cao, Mingsheng Long, Jianmin Wang, Shichen Liu, IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017 
 41 | * DCH: [Deep Cauchy Hashing for Hamming Space Retrieval](http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf), Yue Cao, Mingsheng Long, Bin Liu, Jianmin Wang, IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2018
 42 | * DTQ: [Deep Triplet Quantization](ise.thss.tsinghua.edu.cn/~mlong/doc/deep-triplet-quantization-acmmm18.pdf), Bin Liu, Yue Cao, Mingsheng Long, Jianmin Wang, Jingdong Wang, ACM Multimedia (ACMMM), 2018
 43 | 
 44 | Note: DTQ and DCH are updated while DQN, DHN, DVSQ maybe outdated, feel free to touch us if you have any questions. We  welcome others to contribute!
 45 | 
 46 | ## Requirements
 47 | 
 48 | -  Python3: Anaconda is recommended because it already contains a lot of packages: 
 49 | ```
 50 | conda create -n DeepHash python=3.6 anaconda
 51 | source activate DeepHash
 52 | ```
 53 | -  Other packages: 
 54 | ```
 55 | conda install -y tensorflow-gpu
 56 | conda install -y -c conda-forge opencv
 57 | ```
 58 | 
 59 | To import the pakcages implemented in `./DeepHash`, we need to add the path of `./DeepHash` to environment variables as:
 60 | 
 61 | ```shell
 62 | export PYTHONPATH=/path/to/project/DeepHash/DeepHash:$PYTHONPATH
 63 | ```
 64 | 
 65 | ## Data Preparation
 66 | In `data/cifar10/train.txt`, we give an example to show how to prepare image training data. In `data/cifar10/test.txt` and `data/cifar10/database.txt`, the list of testing and database images could be processed during predicting procedure. If you want to add other datasets as the input, you need to prepare `train.txt`, `test.txt` and `database.txt` as CIFAR-10 dataset.
 67 | 
 68 | What's more, We have put the whole cifar10 dataset including the images and data list in the [release page](https://github.com/thulab/DeepHash/releases/download/v0.1/cifar10.zip). You can directly download it and unzip to data/cifar10 folder.
 69 | 
 70 | Make sure the tree of `/path/to/project/data/cifar10` looks like this:
 71 | 
 72 | ```
 73 | .
 74 | |-- database.txt
 75 | |-- test
 76 | |-- test.txt
 77 | |-- train
 78 | `-- train.txt
 79 | ```
 80 | 
 81 | If you need run on NUSWIDE_81 and COCO, we recommend you to follow https://github.com/thuml/HashNet/tree/master/pytorch#datasets to prepare NUSWIDE_81 and COCO images.
 82 | 
 83 | For *DVSQ* model, you also need the *word vector* of the semantic labels. Here we use word2vec model pretrained on GoogleNews Dataset (e.g. https://github.com/mmihaltz/word2vec-GoogleNews-vectors), to extract the word embeddings for the labels of images, e.g. dog, cat and so on.
 84 | 
 85 | ## Get Started
 86 | 
 87 | ### Pre-trained model
 88 | 
 89 | You should manually download the model file of the Imagenet pre-tained AlexNet from [here](https://github.com/thulab/DeepHash/releases/download/v0.1/reference_pretrain.npy.zip) or from release page and unzip it to `/path/to/project/DeepHash/architecture/pretrained_model`.
 90 | 
 91 | Make sure the tree of `/path/to/project/DeepHash/architecture` looks like this:
 92 | 
 93 | ```
 94 | ├── __init__.py
 95 | ├── pretrained_model
 96 |        └── reference_pretrain.npy
 97 | ```
 98 | 
 99 | ### Training and Testing
100 | 
101 | The example of `$method` (DCH and DTQ) can be run like:
102 | 
103 | ```shell
104 | cd example/$method/
105 | python train_val_script.py --gpus "0,1" --data-dir $PWD/../../data --"other parameters descirbe in train_val_script.py"
106 | ```
107 | 
108 | For DVSQ, DQN and DHN, please refer to the `train_val.sh` and `train_val_script.py` in the examples folder.
109 | 
110 | ## Citations
111 | If you find *DeepHash* is useful for your research, please consider citing the following papers:
112 | 
113 |     @InProceedings{cite:AAAI16DQN,
114 |       Author = {Yue Cao and Mingsheng Long and Jianmin Wang and Han Zhu and Qingfu Wen},
115 |       Publisher = {AAAI},
116 |       Title = {Deep Quantization Network for Efficient Image Retrieval},
117 |       Year = {2016}
118 |     }
119 |     
120 |     @InProceedings{cite:AAAI16DHN,
121 |       Author = {Han Zhu and Mingsheng Long and Jianmin Wang and Yue Cao},
122 |       Publisher = {AAAI},
123 |       Title = {Deep Hashing Network for Efficient Similarity Retrieval},
124 |       Year = {2016}
125 |     }
126 |     
127 |     @InProceedings{cite:CVPR17DVSQ,
128 |       Title={Deep visual-semantic quantization for efficient image retrieval},
129 |       Author={Cao, Yue and Long, Mingsheng and Wang, Jianmin and Liu, Shichen},
130 |       Booktitle={CVPR},
131 |       Year={2017}
132 |     }
133 |     
134 |     @InProceedings{cite:CVPR18DCH,
135 |       Title={Deep Cauchy Hashing for Hamming Space Retrieval},
136 |       Author={Cao, Yue and Long, Mingsheng and Bin, Liu and Wang, Jianmin},
137 |       Booktitle={CVPR},
138 |       Year={2018}
139 |     }
140 |     
141 |     @article{liu2018deep,
142 |       title={Deep triplet quantization},
143 |       author={Liu, Bin and Cao, Yue and Long, Mingsheng and Wang, Jianmin and Wang, Jingdong},
144 |       journal={MM, ACM},
145 |       year={2018}
146 |     }
147 | 


--------------------------------------------------------------------------------
/data_provider/triplet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | from distance.npversion import distance
  4 | 
  5 | class Dataset(object):
  6 |     def __init__(self, dataset, output_dim, code_dim):
  7 |         self._dataset = dataset
  8 |         self.n_samples = dataset.n_samples
  9 |         self._train = dataset.train
 10 |         self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32)
 11 |         self._codes = np.zeros((self.n_samples, code_dim), dtype=np.float32)
 12 |         self._triplets = np.array([])
 13 |         self._trip_index_in_epoch = 0
 14 |         self._index_in_epoch = 0
 15 |         self._epochs_complete = 0
 16 |         self._perm = np.arange(self.n_samples)
 17 |         np.random.shuffle(self._perm)
 18 |         return
 19 | 
 20 |     def update_triplets(self, margin, n_part=10, dist_type='euclidean2', select_strategy='margin'):
 21 |         """
 22 |         :param select_strategy: hard, all, margin
 23 |         :param dist_type: distance type, e.g. euclidean2, cosine
 24 |         :param margin: triplet margin parameter
 25 |         :n_part: number of part to split data
 26 |         """
 27 |         n_samples = self.n_samples
 28 |         np.random.shuffle(self._perm)
 29 |         embedding = self._output[self._perm[:n_samples]]
 30 |         labels = self._dataset.get_labels()[self._perm[:n_samples]]
 31 |         n_samples_per_part = int(math.ceil(n_samples / n_part))
 32 |         triplets = []
 33 |         for i in range(n_part):
 34 |             start = n_samples_per_part * i
 35 |             end = min(n_samples_per_part * (i+1), n_samples)
 36 |             dist = distance(embedding[start:end], pair=True, dist_type=dist_type)
 37 |             for idx_anchor in range(0, end - start):
 38 |                 label_anchor = np.copy(labels[idx_anchor+start, :])
 39 |                 label_anchor[label_anchor==0] = -1
 40 |                 all_pos = np.where(np.any(labels[start:end] == label_anchor, axis=1))[0]
 41 |                 all_neg = np.array(list(set(range(end-start)) - set(all_pos)))
 42 | 
 43 |                 if select_strategy == 'hard':
 44 |                     idx_pos = all_pos[np.argmax(dist[idx_anchor, all_pos])]
 45 |                     if idx_pos == idx_anchor:
 46 |                         continue
 47 |                     idx_neg = all_neg[np.argmin(dist[idx_anchor, all_neg])]
 48 |                     triplets.append((idx_anchor + start, idx_pos + start, idx_neg + start))
 49 |                     continue
 50 | 
 51 |                 for idx_pos in all_pos:
 52 |                     if idx_pos == idx_anchor:
 53 |                         continue
 54 | 
 55 |                     if select_strategy == 'all':
 56 |                         selected_neg = all_neg
 57 |                     elif select_strategy == 'margin':
 58 |                         selected_neg = all_neg[np.where(dist[idx_anchor, all_neg] - dist[idx_anchor, idx_pos] < margin)[0]]
 59 | 
 60 |                     if selected_neg.shape[0] > 0:
 61 |                         idx_neg = np.random.choice(selected_neg)
 62 |                         triplets.append((idx_anchor + start, idx_pos + start, idx_neg + start))
 63 |         self._triplets = np.array(triplets)
 64 |         np.random.shuffle(self._triplets)
 65 | 
 66 |         # assert
 67 |         anchor = labels[self._triplets[:, 0]]
 68 |         mapper = lambda anchor, other: np.any(anchor * (anchor == other), -1)
 69 |         assert(np.all(mapper(anchor, labels[self._triplets[:, 1]])))
 70 |         assert(np.all(np.invert(anchor, labels[self._triplets[:, 2]])))
 71 |         return
 72 | 
 73 |     def next_batch_triplet(self, batch_size):
 74 |         """
 75 |         Args:
 76 |           batch_size
 77 |         Returns:
 78 |           data, label, codes
 79 |         """
 80 |         start = self._trip_index_in_epoch
 81 |         self._trip_index_in_epoch += batch_size
 82 |         if self._trip_index_in_epoch > self.triplets.shape[0]:
 83 |             start = 0
 84 |             self._trip_index_in_epoch = batch_size
 85 |         end = self._trip_index_in_epoch
 86 | 
 87 |         # stack index of anchors, positive, negetive to one array
 88 |         arr = self.triplets[start:end]
 89 |         idx = self._perm[np.concatenate([arr[:, 0], arr[:, 1], arr[:, 2]], axis=0)]
 90 |         data, label = self._dataset.data(idx)
 91 | 
 92 |         return data, label, self._codes[idx]
 93 | 
 94 |     def next_batch(self, batch_size):
 95 |         """
 96 |         Args:
 97 |           batch_size
 98 |         Returns:
 99 |           [batch_size, (n_inputs)]: next batch images, by stacking anchor, positive, negetive
100 |           [batch_size, n_class]: next batch labels
101 |         """
102 |         start = self._index_in_epoch
103 |         self._index_in_epoch += batch_size
104 |         if self._index_in_epoch > self.n_samples:
105 |             if self._train:
106 |                 self._epochs_complete += 1
107 |                 start = 0
108 |                 self._index_in_epoch = batch_size
109 |             else:
110 |                 # Validation stage only process once
111 |                 start = self.n_samples - batch_size
112 |                 self._index_in_epoch = self.n_samples
113 |         end = self._index_in_epoch
114 | 
115 |         data, label = self._dataset.data(self._perm[start:end])
116 |         return (data, label, self._codes[self._perm[start: end], :])
117 | 
118 |     def next_batch_output_codes(self, batch_size):
119 |         start = self._index_in_epoch
120 |         self._index_in_epoch += batch_size
121 |         # Another epoch finish
122 |         if self._index_in_epoch > self.n_samples:
123 |             if self._train:
124 |                 # Start next epoch
125 |                 start = 0
126 |                 self._index_in_epoch = batch_size
127 |             else:
128 |                 # Validation stage only process once
129 |                 start = self.n_samples - batch_size
130 |                 self._index_in_epoch = self.n_samples
131 |         end = self._index_in_epoch
132 | 
133 |         return (self._output[self._perm[start: end], :],
134 |                 self._codes[self._perm[start: end], :])
135 | 
136 |     def feed_batch_output(self, batch_size, output):
137 |         start = self._index_in_epoch - batch_size
138 |         end = self._index_in_epoch
139 |         self._output[self._perm[start:end], :] = output
140 |         return
141 | 
142 |     def feed_batch_triplet_output(self, batch_size, triplet_output):
143 |         anchor, pos, neg = np.split(triplet_output, 3, axis=0)
144 |         start = self._trip_index_in_epoch - batch_size
145 |         end = self._trip_index_in_epoch
146 |         idx = self._perm[self._triplets[start:end, :]]
147 |         self._output[idx[:, 0]] = anchor
148 |         self._output[idx[:, 1]] = pos
149 |         self._output[idx[:, 2]] = neg
150 |         return
151 | 
152 |     def feed_batch_codes(self, batch_size, codes):
153 |         """
154 |         Args:
155 |           batch_size
156 |           [batch_size, n_output]
157 |         """
158 |         start = self._index_in_epoch - batch_size
159 |         end = self._index_in_epoch
160 |         self._codes[self._perm[start:end], :] = codes
161 |         return
162 | 
163 |     @property
164 |     def output(self):
165 |         return self._output
166 | 
167 |     @property
168 |     def codes(self):
169 |         return self._codes
170 | 
171 |     @property
172 |     def triplets(self):
173 |         return self._triplets
174 | 
175 |     @property
176 |     def label(self):
177 |         return self._dataset.get_labels()
178 | 
179 |     def finish_epoch(self):
180 |         self._index_in_epoch = 0
181 | 


--------------------------------------------------------------------------------
/evaluation/load_and_predict.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.io as scio
  3 | 
  4 | def pr_curve(params):
  5 |     database_code = np.array(params['database_code'])
  6 |     validation_code = np.array(params['validation_code'])
  7 |     database_labels = np.array(params['database_labels'])
  8 |     validation_labels = np.array(params['validation_labels'])
  9 | 
 10 |     query_num = validation_code.shape[0]
 11 |     database_num = database_code.shape[0]
 12 | 
 13 |     database_code = np.sign(database_code)
 14 |     database_code[database_code == -1] = 0
 15 |     database_code = database_code.astype(int)
 16 | 
 17 |     validation_code = np.sign(validation_code)
 18 |     validation_code[validation_code == -1] = 0
 19 |     validation_code = validation_code.astype(int)
 20 | 
 21 |     database_labels.astype(np.int)
 22 |     validation_labels.astype(np.int)
 23 | 
 24 |     WTrue = np.dot(validation_labels, database_labels.T)
 25 |     WTrue[WTrue >= 1] = 1
 26 |     WTrue[WTrue < 1] = 0
 27 |     print(WTrue.shape)
 28 |     print(np.max(WTrue))
 29 |     print(np.min(WTrue))
 30 | 
 31 |     DHat = np.zeros((query_num, database_num))
 32 | 
 33 |     for i in range(query_num):
 34 |         query = validation_code[i, :]
 35 |         query_matrix = np.tile(query, (database_num, 1))
 36 | 
 37 |         distance = np.sum(np.absolute(query_matrix - database_code), axis=1)
 38 |         DHat[i, :] = distance
 39 |         print(i)
 40 | 
 41 |     print(DHat.shape)
 42 |     print(np.max(DHat))
 43 |     print(np.min(DHat))
 44 | 
 45 |     mat_dic = dict(
 46 |         WTrue=WTrue,
 47 |         DHat=DHat
 48 |     )
 49 |     scio.savemat('./data/data.mat', mat_dic)
 50 | 
 51 | 
 52 | def precision_recall(params):
 53 |     database_code = np.array(params['database_code'])
 54 |     validation_code = np.array(params['validation_code'])
 55 |     database_labels = np.array(params['database_labels'])
 56 |     validation_labels = np.array(params['validation_labels'])
 57 |     database_code = np.sign(database_code)
 58 |     validation_code = np.sign(validation_code)
 59 |     database_labels.astype(np.int)
 60 |     validation_labels.astype(np.int)
 61 | 
 62 |     sim = np.dot(database_code, validation_code.T)
 63 |     ids = np.argsort(-sim, axis=0)
 64 |     ones = np.ones((ids.shape[0], ids.shape[1]), dtype=np.int)
 65 |     print(np.min(ids))
 66 |     ids = ids + ones
 67 |     print(np.min(ids))
 68 |     mat_ids = dict(
 69 |         ids=ids,
 70 |         LBase=database_labels,
 71 |         LTest=validation_labels
 72 |     )
 73 |     scio.savemat('./data/data.mat', mat_ids)
 74 | 
 75 | 
 76 | def hamming_precision(params):
 77 |     database_code = np.array(params['database_code'])
 78 |     validation_code = np.array(params['validation_code'])
 79 |     database_labels = np.array(params['database_labels'])
 80 |     validation_labels = np.array(params['validation_labels'])
 81 |     R = params['R']
 82 |     query_num = validation_code.shape[0]
 83 |     database_num = database_code.shape[0]
 84 | 
 85 |     database_code = np.sign(database_code)
 86 |     database_code[database_code == -1] = 0
 87 |     database_code = database_code.astype(int)
 88 | 
 89 |     validation_code = np.sign(validation_code)
 90 |     validation_code[validation_code == -1] = 0
 91 |     validation_code = validation_code.astype(int)
 92 | 
 93 |     APx = []
 94 | 
 95 |     for i in range(query_num):
 96 |         query = validation_code[i, :]
 97 |         query_matrix = np.tile(query, (database_num, 1))
 98 | 
 99 |         label = validation_labels[i, :]
100 |         label[label == 0] = -1
101 |         label_matrix = np.tile(label, (database_num, 1))
102 | 
103 |         distance = np.sum(np.absolute(query_matrix - database_code), axis=1)
104 |         similarity = np.sum(database_labels == label_matrix, axis=1)
105 |         similarity[similarity > 1] = 1
106 | 
107 |         total_rel_num = np.sum(distance <= R)
108 |         true_positive = np.sum((distance <= R) * similarity)
109 | 
110 |         print('--------')
111 |         print(i)
112 |         print(true_positive)
113 |         print(total_rel_num)
114 |         print('--------')
115 |         if total_rel_num != 0:
116 |             APx.append(float(true_positive) / total_rel_num)
117 |         else:
118 |             APx.append(float(0))
119 | 
120 |     print(np.sum(np.array(APx) != 0))
121 |     return np.mean(np.array(APx))
122 | 
123 | 
124 | def precision_curve(params):
125 |     database_code = np.array(params['database_code'])
126 |     validation_code = np.array(params['validation_code'])
127 |     database_labels = np.array(params['database_labels'])
128 |     validation_labels = np.array(params['validation_labels'])
129 |     query_num = validation_code.shape[0]
130 |     database_code = np.sign(database_code)
131 |     validation_code = np.sign(validation_code)
132 | 
133 |     sim = np.dot(database_code, validation_code.T)
134 |     ids = np.argsort(-sim, axis=0)
135 |     arr = []
136 | 
137 |     for iter in range(10):
138 |         R = (iter + 1) * 100
139 |         APx = []
140 |         for i in range(query_num):
141 |             label = validation_labels[i, :]
142 |             label[label == 0] = -1
143 |             idx = ids[:, i]
144 |             imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0
145 |             relevant_num = np.sum(imatch)
146 |             APx.append(float(relevant_num) / R)
147 |         arr.append(np.mean(np.array(APx)))
148 |         print(arr)
149 |     print(arr)
150 | 
151 | 
152 | def precision(params):
153 |     database_code = np.array(params['database_code'])
154 |     validation_code = np.array(params['validation_code'])
155 |     database_labels = np.array(params['database_labels'])
156 |     validation_labels = np.array(params['validation_labels'])
157 |     R = params['R']
158 |     query_num = validation_code.shape[0]
159 |     database_code = np.sign(database_code)
160 |     validation_code = np.sign(validation_code)
161 | 
162 |     sim = np.dot(database_code, validation_code.T)
163 |     ids = np.argsort(-sim, axis=0)
164 |     APx = []
165 | 
166 |     for i in range(query_num):
167 |         label = validation_labels[i, :]
168 |         label[label == 0] = -1
169 |         idx = ids[:, i]
170 |         imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0
171 |         relevant_num = np.sum(imatch)
172 |         APx.append(float(relevant_num) / R)
173 | 
174 |     return np.mean(np.array(APx))
175 | 
176 | 
177 | def mean_average_precision(params):
178 |     database_code = np.array(params['database_code'])
179 |     validation_code = np.array(params['validation_code'])
180 |     database_labels = np.array(params['database_labels'])
181 |     validation_labels = np.array(params['validation_labels'])
182 |     R = params['R']
183 |     query_num = validation_code.shape[0]
184 |     database_code = np.sign(database_code)
185 |     validation_code = np.sign(validation_code)
186 | 
187 |     sim = np.dot(database_code, validation_code.T)
188 |     ids = np.argsort(-sim, axis=0)
189 |     APx = []
190 | 
191 |     for i in range(query_num):
192 |         label = validation_labels[i, :]
193 |         label[label == 0] = -1
194 |         idx = ids[:, i]
195 |         imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0
196 |         relevant_num = np.sum(imatch)
197 |         Lx = np.cumsum(imatch)
198 |         Px = Lx.astype(float) / np.arange(1, R+1, 1)
199 |         if relevant_num != 0:
200 |             APx.append(np.sum(Px * imatch) / relevant_num)
201 | 
202 |     return np.mean(np.array(APx))
203 | 
204 | 
205 | def statistic_prob(params):
206 |     database_code = np.array(params['database_code'])
207 |     validation_code = np.array(params['validation_code'])
208 |     sim = np.dot(database_code, validation_code.T)
209 |     query_num = validation_code.shape[0]
210 |     database_num = database_code.shape[0]
211 |     ones = np.ones((database_num, query_num))
212 |     exp_sim = np.exp(sim)
213 |     prob = ones / (1 + 1 / exp_sim)
214 |     useless = np.sum(prob >= 0.95) + np.sum(prob <= 0.05)
215 |     useful = query_num * database_num - useless
216 |     print("useful")
217 |     print(useful)
218 |     print("useless")
219 |     print(useless)
220 | 
221 | 


--------------------------------------------------------------------------------
/architecture/alexnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | 
  6 | def img_alexnet_layers(img, batch_size, output_dim, stage, model_weights=None, val_batch_size=32, with_tanh=True):
  7 |     deep_param_img = {}
  8 |     train_layers = []
  9 | 
 10 |     if model_weights is None:
 11 |         dir_path = os.path.dirname(os.path.realpath(__file__))
 12 |         model_weights = os.path.join(dir_path, "pretrained_model/alexnet_weights.npy")
 13 |         # model_weights = os.path.join(dir_path, "pretrained_model/alexnet_weights_trained.npy")
 14 |     
 15 |     print("loading img model from ", model_weights)
 16 |     net_data = dict(np.load(model_weights, encoding='bytes').item())
 17 |     print(list(net_data.keys()))
 18 | 
 19 |     # swap(2,1,0), bgr -> rgb
 20 |     reshaped_image = tf.cast(img, tf.float32)[:, :, :, ::-1]
 21 | 
 22 |     height = 227
 23 |     width = 227
 24 | 
 25 |     # Randomly crop a [height, width] section of each image
 26 |     with tf.name_scope('preprocess'):
 27 |         def train_fn():
 28 |             return tf.stack([tf.random_crop(tf.image.random_flip_left_right(each), [height, width, 3])
 29 |                              for each in tf.unstack(reshaped_image, batch_size)])
 30 | 
 31 |         def val_fn():
 32 |             def crop(img, x, y): return tf.image.crop_to_bounding_box(
 33 |                 img, x, y, width, height)
 34 | 
 35 |             def distort(f, x, y): return tf.stack(
 36 |                 [crop(f(each), x, y) for each in tf.unstack(reshaped_image, val_batch_size)])
 37 | 
 38 |             def distort_raw(x, y): return distort(lambda x: x, x, y)
 39 | 
 40 |             def distort_fliped(x, y): return distort(
 41 |                 tf.image.flip_left_right, x, y)
 42 | 
 43 |             distorted = tf.concat([distort_fliped(0, 0), distort_fliped(28, 0),
 44 |                                    distort_fliped(
 45 |                                        0, 28), distort_fliped(28, 28),
 46 |                                    distort_fliped(14, 14), distort_raw(0, 0),
 47 |                                    distort_raw(28, 0), distort_raw(0, 28),
 48 |                                    distort_raw(28, 28), distort_raw(14, 14)], 0)
 49 |             return distorted
 50 | 
 51 |         distorted = tf.cond(stage > 0, val_fn, train_fn)
 52 | 
 53 |         # Zero-mean input
 54 |         mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[
 55 |                            1, 1, 1, 3], name='img-mean')
 56 |         distorted = distorted - mean
 57 | 
 58 |     # Conv1
 59 |     # Output 96, kernel 11, stride 4
 60 |     with tf.name_scope('conv1') as scope:
 61 |         kernel = tf.Variable(net_data['conv1'][0], name='weights')
 62 |         conv = tf.nn.conv2d(distorted, kernel, [1, 4, 4, 1], padding='VALID')
 63 |         biases = tf.Variable(net_data['conv1'][1], name='biases')
 64 |         out = tf.nn.bias_add(conv, biases)
 65 |         conv1 = tf.nn.relu(out, name=scope)
 66 |         deep_param_img['conv1'] = [kernel, biases]
 67 |         train_layers += [kernel, biases]
 68 | 
 69 |     # Pool1
 70 |     pool1 = tf.nn.max_pool(conv1,
 71 |                            ksize=[1, 3, 3, 1],
 72 |                            strides=[1, 2, 2, 1],
 73 |                            padding='VALID',
 74 |                            name='pool1')
 75 | 
 76 |     # LRN1
 77 |     radius = 2
 78 |     alpha = 2e-05
 79 |     beta = 0.75
 80 |     bias = 1.0
 81 |     lrn1 = tf.nn.local_response_normalization(pool1,
 82 |                                               depth_radius=radius,
 83 |                                               alpha=alpha,
 84 |                                               beta=beta,
 85 |                                               bias=bias)
 86 | 
 87 |     # Conv2
 88 |     # Output 256, pad 2, kernel 5, group 2
 89 |     with tf.name_scope('conv2') as scope:
 90 |         kernel = tf.Variable(net_data['conv2'][0], name='weights')
 91 |         group = 2
 92 | 
 93 |         def convolve(i, k): return tf.nn.conv2d(
 94 |             i, k, [1, 1, 1, 1], padding='SAME')
 95 |         input_groups = tf.split(lrn1, group, 3)
 96 |         kernel_groups = tf.split(kernel, group, 3)
 97 |         output_groups = [convolve(i, k)
 98 |                          for i, k in zip(input_groups, kernel_groups)]
 99 |         # Concatenate the groups
100 |         conv = tf.concat(output_groups, 3)
101 | 
102 |         biases = tf.Variable(net_data['conv2'][1], name='biases')
103 |         out = tf.nn.bias_add(conv, biases)
104 |         conv2 = tf.nn.relu(out, name=scope)
105 |         deep_param_img['conv2'] = [kernel, biases]
106 |         train_layers += [kernel, biases]
107 | 
108 |     # Pool2
109 |     pool2 = tf.nn.max_pool(conv2,
110 |                            ksize=[1, 3, 3, 1],
111 |                            strides=[1, 2, 2, 1],
112 |                            padding='VALID',
113 |                            name='pool2')
114 | 
115 |     # LRN2
116 |     radius = 2
117 |     alpha = 2e-05
118 |     beta = 0.75
119 |     bias = 1.0
120 |     lrn2 = tf.nn.local_response_normalization(pool2,
121 |                                               depth_radius=radius,
122 |                                               alpha=alpha,
123 |                                               beta=beta,
124 |                                               bias=bias)
125 | 
126 |     # Conv3
127 |     # Output 384, pad 1, kernel 3
128 |     with tf.name_scope('conv3') as scope:
129 |         kernel = tf.Variable(net_data['conv3'][0], name='weights')
130 |         conv = tf.nn.conv2d(lrn2, kernel, [1, 1, 1, 1], padding='SAME')
131 |         biases = tf.Variable(net_data['conv3'][1], name='biases')
132 |         out = tf.nn.bias_add(conv, biases)
133 |         conv3 = tf.nn.relu(out, name=scope)
134 |         deep_param_img['conv3'] = [kernel, biases]
135 |         train_layers += [kernel, biases]
136 | 
137 |     # Conv4
138 |     # Output 384, pad 1, kernel 3, group 2
139 |     with tf.name_scope('conv4') as scope:
140 |         kernel = tf.Variable(net_data['conv4'][0], name='weights')
141 |         group = 2
142 | 
143 |         def convolve(i, k): return tf.nn.conv2d(
144 |             i, k, [1, 1, 1, 1], padding='SAME')
145 |         input_groups = tf.split(conv3, group, 3)
146 |         kernel_groups = tf.split(kernel, group, 3)
147 |         output_groups = [convolve(i, k)
148 |                          for i, k in zip(input_groups, kernel_groups)]
149 |         # Concatenate the groups
150 |         conv = tf.concat(output_groups, 3)
151 |         biases = tf.Variable(net_data['conv4'][1], name='biases')
152 |         out = tf.nn.bias_add(conv, biases)
153 |         conv4 = tf.nn.relu(out, name=scope)
154 |         deep_param_img['conv4'] = [kernel, biases]
155 |         train_layers += [kernel, biases]
156 | 
157 |     # Conv5
158 |     # Output 256, pad 1, kernel 3, group 2
159 |     with tf.name_scope('conv5') as scope:
160 |         kernel = tf.Variable(net_data['conv5'][0], name='weights')
161 |         group = 2
162 | 
163 |         def convolve(i, k): return tf.nn.conv2d(
164 |             i, k, [1, 1, 1, 1], padding='SAME')
165 |         input_groups = tf.split(conv4, group, 3)
166 |         kernel_groups = tf.split(kernel, group, 3)
167 |         output_groups = [convolve(i, k)
168 |                          for i, k in zip(input_groups, kernel_groups)]
169 |         # Concatenate the groups
170 |         conv = tf.concat(output_groups, 3)
171 |         biases = tf.Variable(net_data['conv5'][1], name='biases')
172 |         out = tf.nn.bias_add(conv, biases)
173 |         conv5 = tf.nn.relu(out, name=scope)
174 |         deep_param_img['conv5'] = [kernel, biases]
175 |         train_layers += [kernel, biases]
176 | 
177 |     # Pool5
178 |     pool5 = tf.nn.max_pool(conv5,
179 |                            ksize=[1, 3, 3, 1],
180 |                            strides=[1, 2, 2, 1],
181 |                            padding='VALID',
182 |                            name='pool5')
183 | 
184 |     # FC6
185 |     # Output 4096
186 |     with tf.name_scope('fc6'):
187 |         shape = int(np.prod(pool5.get_shape()[1:]))
188 |         fc6w = tf.Variable(net_data['fc6'][0], name='weights')
189 |         fc6b = tf.Variable(net_data['fc6'][1], name='biases')
190 |         pool5_flat = tf.reshape(pool5, [-1, shape])
191 |         fc6l = tf.nn.bias_add(tf.matmul(pool5_flat, fc6w), fc6b)
192 |         fc6 = tf.nn.relu(fc6l)
193 |         fc6 = tf.cond(stage > 0, lambda: fc6, lambda: tf.nn.dropout(fc6, 0.5))
194 |         fc6o = tf.nn.relu(fc6l)
195 |         deep_param_img['fc6'] = [fc6w, fc6b]
196 |         train_layers += [fc6w, fc6b]
197 | 
198 |     # FC7
199 |     # Output 4096
200 |     with tf.name_scope('fc7'):
201 |         fc7w = tf.Variable(net_data['fc7'][0], name='weights')
202 |         fc7b = tf.Variable(net_data['fc7'][1], name='biases')
203 |         fc7l = tf.nn.bias_add(tf.matmul(fc6, fc7w), fc7b)
204 |         fc7 = tf.nn.relu(fc7l)
205 |         fc7 = tf.cond(stage > 0, lambda: fc7, lambda: tf.nn.dropout(fc7, 0.5))
206 |         deep_param_img['fc7'] = [fc7w, fc7b]
207 |         train_layers += [fc7w, fc7b]
208 | 
209 |     # FC8
210 |     # Output output_dim
211 |     with tf.name_scope('fc8'):
212 |         # Differ train and val stage by 'fc8' as key
213 |         if 'fc8' in net_data:
214 |             fc8w = tf.Variable(net_data['fc8'][0], name='weights')
215 |             fc8b = tf.Variable(net_data['fc8'][1], name='biases')
216 |         else:
217 |             fc8w = tf.Variable(tf.random_normal([4096, output_dim],
218 |                                                 dtype=tf.float32,
219 |                                                 stddev=1e-2), name='weights')
220 |             fc8b = tf.Variable(tf.constant(0.0, shape=[output_dim],
221 |                                            dtype=tf.float32), name='biases')
222 |         fc8l = tf.nn.bias_add(tf.matmul(fc7, fc8w), fc8b)
223 |         if with_tanh:
224 |             fc8_t = tf.nn.tanh(fc8l)
225 |         else:
226 |             fc8_t = fc8l
227 | 
228 |         def val_fn1():
229 |             concated = tf.concat([tf.expand_dims(i, 0)
230 |                                   for i in tf.split(fc8_t, 10, 0)], 0)
231 |             return tf.reduce_mean(concated, 0)
232 |         fc8 = tf.cond(stage > 0, val_fn1, lambda: fc8_t)
233 |         deep_param_img['fc8'] = [fc8w, fc8b]
234 |         train_layers += [fc8w, fc8b]
235 | 
236 |     print("img model loading finished")
237 |     # Return outputs
238 |     return fc8, deep_param_img, train_layers


--------------------------------------------------------------------------------
/model/dch.py:
--------------------------------------------------------------------------------
  1 | #################################################################################
  2 | # Deep Cauchy Hashing for Hamming Space Retrieval                                #
  3 | # Authors: Yue Cao, Mingsheng Long, Bin Liu, Jianmin Wang                        #
  4 | # Contact: caoyue10@gmail.com                                                    #
  5 | ##################################################################################
  6 | 
  7 | import os
  8 | import shutil
  9 | import time
 10 | from datetime import datetime
 11 | from math import ceil
 12 | 
 13 | import numpy as np
 14 | import tensorflow as tf
 15 | 
 16 | import util.plot as plot
 17 | from architecture import img_alexnet_layers
 18 | from evaluation import MAPs
 19 | from data_provider.pairwise import Dataset
 20 | from loss import *
 21 | 
 22 | 
 23 | class DCH(object):
 24 |     def __init__(self, config):
 25 |         ### Initialize setting
 26 |         print ("initializing")
 27 |         np.set_printoptions(precision=4)
 28 | 
 29 |         with tf.name_scope('stage'):
 30 |             # 0 for training, 1 for validation
 31 |             self.stage = tf.placeholder_with_default(tf.constant(0), [])
 32 |         for k, v in vars(config).items():
 33 |             setattr(self, k, v)
 34 |         self.file_name = 'lr_{}_cqlambda_{}_alpha_{}_bias_{}_gamma_{}_dataset_{}'.format(
 35 |                 self.lr,
 36 |                 self.q_lambda,
 37 |                 self.alpha,
 38 |                 self.bias,
 39 |                 self.gamma,
 40 |                 self.dataset)
 41 |         self.model_file = os.path.join(self.save_dir, self.file_name + '.npy')
 42 | 
 43 |         ### Setup session
 44 |         print ("launching session")
 45 |         configProto = tf.ConfigProto()
 46 |         configProto.gpu_options.allow_growth = True
 47 |         configProto.allow_soft_placement = True
 48 |         self.sess = tf.Session(config=configProto)
 49 | 
 50 |         ### Create variables and placeholders
 51 |         self.img = tf.placeholder(tf.float32, [None, 256, 256, 3])
 52 |         self.img_label = tf.placeholder(tf.float32, [None, self.label_dim])
 53 |         self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model()
 54 | 
 55 |         self.global_step = tf.Variable(0, trainable=False)
 56 |         self.train_op = self.apply_loss_function(self.global_step)
 57 |         self.sess.run(tf.global_variables_initializer())
 58 |         return
 59 | 
 60 |     def load_model(self):
 61 |         if self.img_model == 'alexnet':
 62 |             img_output = img_alexnet_layers(
 63 |                     self.img,
 64 |                     self.batch_size,
 65 |                     self.output_dim,
 66 |                     self.stage,
 67 |                     self.model_weights,
 68 |                     self.with_tanh,
 69 |                     self.val_batch_size)
 70 |         else:
 71 |             raise Exception('cannot use such CNN model as ' + self.img_model)
 72 |         return img_output
 73 | 
 74 |     def save_model(self, model_file=None):
 75 |         if model_file is None:
 76 |             model_file = self.model_file
 77 |         model = {}
 78 |         for layer in self.deep_param_img:
 79 |             model[layer] = self.sess.run(self.deep_param_img[layer])
 80 |         print("saving model to %s" % model_file)
 81 |         if os.path.exists(self.save_dir) is False:
 82 |             os.makedirs(self.save_dir)
 83 | 
 84 |         np.save(model_file, np.array(model))
 85 |         return
 86 | 
 87 |     def apply_loss_function(self, global_step):
 88 |         # loss function
 89 |         self.cos_loss = cauchy_cross_entropy_loss(self.img_last_layer, self.img_label, gamma=self.gamma, normed=True)
 90 |         self.q_loss = self.q_lambda * quantization_loss(self.img_last_layer)
 91 |         self.loss = self.cos_loss + self.q_loss
 92 | 
 93 |         ### Last layer has a 10 times learning rate
 94 |         lr = tf.train.exponential_decay(self.lr, global_step, self.decay_step, self.decay_factor, staircase=True)
 95 |         opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9)
 96 |         grads_and_vars = opt.compute_gradients(self.loss, self.train_layers+self.train_last_layer)
 97 |         fcgrad, _ = grads_and_vars[-2]
 98 |         fbgrad, _ = grads_and_vars[-1]
 99 | 
100 |         self.grads_and_vars = grads_and_vars
101 |         tf.summary.scalar('loss', self.loss)
102 |         tf.summary.scalar('cos_loss', self.cos_loss)
103 |         tf.summary.scalar('q_loss', self.q_loss)
104 |         tf.summary.scalar('lr', lr)
105 |         self.merged = tf.summary.merge_all()
106 | 
107 |         if self.finetune_all:
108 |             return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]),
109 |                                         (grads_and_vars[1][0]*2, self.train_layers[1]),
110 |                                         (grads_and_vars[2][0], self.train_layers[2]),
111 |                                         (grads_and_vars[3][0]*2, self.train_layers[3]),
112 |                                         (grads_and_vars[4][0], self.train_layers[4]),
113 |                                         (grads_and_vars[5][0]*2, self.train_layers[5]),
114 |                                         (grads_and_vars[6][0], self.train_layers[6]),
115 |                                         (grads_and_vars[7][0]*2, self.train_layers[7]),
116 |                                         (grads_and_vars[8][0], self.train_layers[8]),
117 |                                         (grads_and_vars[9][0]*2, self.train_layers[9]),
118 |                                         (grads_and_vars[10][0], self.train_layers[10]),
119 |                                         (grads_and_vars[11][0]*2, self.train_layers[11]),
120 |                                         (grads_and_vars[12][0], self.train_layers[12]),
121 |                                         (grads_and_vars[13][0]*2, self.train_layers[13]),
122 |                                         (fcgrad*10, self.train_last_layer[0]),
123 |                                         (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
124 |         else:
125 |             return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]),
126 |                                         (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
127 | 
128 |     def train(self, img_dataset):
129 |         print("%s #train# start training" % datetime.now())
130 | 
131 |         ### tensorboard
132 |         tflog_path = os.path.join(self.log_dir, self.file_name)
133 |         if os.path.exists(tflog_path):
134 |             shutil.rmtree(tflog_path)
135 |         train_writer = tf.summary.FileWriter(tflog_path, self.sess.graph)
136 | 
137 |         for train_iter in range(self.iter_num):
138 |             images, labels = img_dataset.next_batch(self.batch_size)
139 |             start_time = time.time()
140 | 
141 |             _, loss, cos_loss, output, summary = self.sess.run([self.train_op, self.loss, self.cos_loss, self.img_last_layer, self.merged],
142 |                                     feed_dict={self.img: images,
143 |                                                self.img_label: labels})
144 | 
145 |             train_writer.add_summary(summary, train_iter)
146 | 
147 |             img_dataset.feed_batch_output(self.batch_size, output)
148 |             duration = time.time() - start_time
149 | 
150 |             if train_iter % 100 == 0:
151 |                 print("%s #train# step %4d, loss = %.4f, cross_entropy loss = %.4f, %.1f sec/batch"
152 |                         %(datetime.now(), train_iter+1, loss, cos_loss, duration))
153 | 
154 |         print("%s #traing# finish training" % datetime.now())
155 |         self.save_model()
156 |         print ("model saved")
157 | 
158 |         self.sess.close()
159 | 
160 |     def validation(self, img_query, img_database, R=100):
161 |         print("%s #validation# start validation" % (datetime.now()))
162 |         query_batch = int(ceil(img_query.n_samples / float(self.val_batch_size)))
163 |         img_query.finish_epoch()
164 |         print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch))
165 |         for i in range(query_batch):
166 |             images, labels = img_query.next_batch(self.val_batch_size)
167 |             output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
168 |                                    feed_dict={self.img: images,
169 |                                               self.img_label: labels,
170 |                                               self.stage: 1})
171 |             img_query.feed_batch_output(self.val_batch_size, output)
172 |             print('Cosine Loss: %s'%loss)
173 | 
174 |         database_batch = int(ceil(img_database.n_samples / float(self.val_batch_size)))
175 |         img_database.finish_epoch()
176 |         print("%s #validation# totally %d database in %d batches" % (datetime.now(), img_database.n_samples, database_batch))
177 |         for i in range(database_batch):
178 |             images, labels = img_database.next_batch(self.val_batch_size)
179 | 
180 |             output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
181 |                                          feed_dict={self.img: images,
182 |                                                     self.img_label: labels,
183 |                                                     self.stage: 1})
184 |             img_database.feed_batch_output(self.val_batch_size, output)
185 |             if i % 100 == 0:
186 |                 print('Cosine Loss[%d/%d]: %s'%(i, database_batch, loss))
187 | 
188 |         mAPs = MAPs(R)
189 | 
190 |         self.sess.close()
191 |         prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius_All(img_database, img_query)
192 |         # for i in range(self.output_dim+1):
193 |         #     #prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, i)
194 |         #     plot.plot('prec', prec[i])
195 |         #     plot.plot('rec', rec[i])
196 |         #     plot.plot('mAP', mmap[i])
197 |         #     plot.tick()
198 |         #     print('Results ham dist [%d], prec:%s, rec:%s, mAP:%s'%(i, prec[i], rec[i], mmap[i]))
199 | 
200 |         result_save_dir = os.path.join(self.save_dir, self.file_name)
201 |         if os.path.exists(result_save_dir) is False:
202 |             os.makedirs(result_save_dir)
203 |         # plot.flush(result_save_dir)
204 | 
205 |         prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2)
206 |         return {
207 |             'i2i_by_feature': mAPs.get_mAPs_by_feature(img_database, img_query),
208 |             'i2i_after_sign': mAPs.get_mAPs_after_sign(img_database, img_query),
209 |             'i2i_map_radius_2': mmap,
210 |             'i2i_prec_radius_2': prec,
211 |             'i2i_recall_radius_2': rec
212 |         }
213 | 
214 | 
215 | def train(train_img, config):
216 |     model = DCH(config)
217 |     img_dataset = Dataset(train_img, config.output_dim)
218 |     model.train(img_dataset)
219 |     return model.model_file
220 | 
221 | 
222 | def validation(database_img, query_img, config):
223 |     model = DCH(config)
224 |     img_database = Dataset(database_img, config.output_dim)
225 |     img_query = Dataset(query_img, config.output_dim)
226 |     return model.validation(img_query, img_database, config.R)
227 | 


--------------------------------------------------------------------------------
/model/dhn.py:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # Deep Hashing Network for Efficient Similarity Retrieval                        #
  3 | # Authors: Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao                        #
  4 | # Contact: caoyue10@gmail.com                                                    #
  5 | ##################################################################################
  6 | 
  7 | import os
  8 | import shutil
  9 | import time
 10 | from datetime import datetime
 11 | from math import ceil
 12 | 
 13 | import numpy as np
 14 | import tensorflow as tf
 15 | 
 16 | from architecture import img_alexnet_layers
 17 | from evaluation import MAPs
 18 | from loss import cross_entropy_loss, quantization_loss
 19 | from data_provider.pairwise import Dataset
 20 | 
 21 | 
 22 | class DHN(object):
 23 |     def __init__(self, config):
 24 |         # Initialize setting
 25 |         print("initializing")
 26 |         np.set_printoptions(precision=4)
 27 |         self.stage = tf.placeholder_with_default(tf.constant(0), [])
 28 |         self.device = '/gpu:' + config.gpu_id
 29 |         self.output_dim = config.output_dim
 30 |         self.n_class = config.label_dim
 31 |         self.cq_lambda = config.cq_lambda
 32 |         self.alpha = config.alpha
 33 | 
 34 |         self.batch_size = config.batch_size
 35 |         self.val_batch_size = config.val_batch_size
 36 |         self.max_iter = config.max_iter
 37 |         self.network = config.network
 38 |         self.learning_rate = config.learning_rate
 39 |         self.learning_rate_decay_factor = config.learning_rate_decay_factor
 40 |         self.decay_step = config.decay_step
 41 | 
 42 |         self.finetune_all = config.finetune_all
 43 | 
 44 |         self.model_file = os.path.join(config.save_dir, 'network_weights.npy')
 45 |         self.codes_file = os.path.join(config.save_dir, 'codes.npy')
 46 |         self.tflog_path = os.path.join(config.save_dir, 'tflog')
 47 | 
 48 |         # Setup session
 49 |         print("launching session")
 50 |         configProto = tf.ConfigProto()
 51 |         configProto.gpu_options.allow_growth = True
 52 |         configProto.allow_soft_placement = True
 53 |         self.sess = tf.Session(config=configProto)
 54 | 
 55 |         # Create variables and placeholders
 56 | 
 57 |         with tf.device(self.device):
 58 |             self.img = tf.placeholder(tf.float32, [None, 256, 256, 3])
 59 |             self.img_label = tf.placeholder(tf.float32, [None, self.n_class])
 60 | 
 61 |             self.network_weights = config.network_weights
 62 |             self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model()
 63 | 
 64 |             self.global_step = tf.Variable(0, trainable=False)
 65 |             self.train_op = self.apply_loss_function(self.global_step)
 66 |             self.sess.run(tf.global_variables_initializer())
 67 | 
 68 |             if config.debug == True:
 69 |                 from tensorflow.python import debug as tf_debug
 70 |                 self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
 71 | 
 72 |     def load_model(self):
 73 |         if self.network == 'alexnet':
 74 |             img_output = img_alexnet_layers(
 75 |                 self.img, self.batch_size, self.output_dim,
 76 |                 self.stage, self.network_weights, val_batch_size=self.val_batch_size)
 77 |         else:
 78 |             raise Exception('cannot use such CNN model as ' + self.network)
 79 |         return img_output
 80 | 
 81 |     def save_model(self, model_file=None):
 82 |         if model_file is None:
 83 |             model_file = self.model_file
 84 |         model = {}
 85 |         for layer in self.deep_param_img:
 86 |             model[layer] = self.sess.run(self.deep_param_img[layer])
 87 |         print("saving model to %s" % model_file)
 88 |         folder = os.path.dirname(model_file)
 89 |         if os.path.exists(folder) is False:
 90 |             os.makedirs(folder)
 91 |         np.save(model_file, np.array(model))
 92 |         return
 93 | 
 94 |     def load_codes(self, codes_file=None):
 95 |         if codes_file is None:
 96 |             codes_file = self.codes_file
 97 |         codes = np.load(codes_file).item()
 98 | 
 99 |         import collections
100 |         mDataset = collections.namedtuple('Dataset', ['output', 'label'])  
101 |         database = mDataset(codes['db_features'], codes['db_label'])
102 |         query = mDataset(codes['query_features'], codes['query_label'])
103 |         return database, query
104 | 
105 |     def save_codes(self, database, query, codes_file=None):
106 |         if codes_file is None:
107 |             codes_file = self.codes_file
108 |         codes = {
109 |             'db_features': database.output,
110 |             'db_label': database.label,
111 |             'query_features': query.output,
112 |             'query_label': query.label,
113 |         }
114 |         print("saving codes to %s" % codes_file)
115 |         np.save(codes_file, np.array(codes))
116 | 
117 |     def apply_loss_function(self, global_step):
118 |         # loss function
119 |         self.cos_loss = cross_entropy_loss(self.img_last_layer, self.img_label, self.alpha, normed=True, balanced=True)
120 |         self.q_loss = self.cq_lambda * quantization_loss(self.img_last_layer)
121 |         self.loss = self.cos_loss + self.q_loss
122 | 
123 |         # Last layer has a 10 times learning rate
124 |         self.lr = tf.train.exponential_decay(
125 |             self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True)
126 |         opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
127 |         grads_and_vars = opt.compute_gradients(
128 |             self.loss, self.train_layers + self.train_last_layer)
129 |         fcgrad, _ = grads_and_vars[-2]
130 |         fbgrad, _ = grads_and_vars[-1]
131 | 
132 |         # for debug
133 |         self.grads_and_vars = grads_and_vars
134 |         tf.summary.scalar('loss', self.loss)
135 |         tf.summary.scalar('ce_loss', self.cos_loss)
136 |         tf.summary.scalar('q_loss', self.q_loss)
137 |         tf.summary.scalar('lr', self.lr)
138 |         self.merged = tf.summary.merge_all()
139 | 
140 |         if self.finetune_all:
141 |             return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]),
142 |                                                          (grads_and_vars[1][0]*2, self.train_layers[1]),
143 |                                                          (grads_and_vars[2][0], self.train_layers[2]),
144 |                                                          (grads_and_vars[3][0]*2, self.train_layers[3]),
145 |                                                          (grads_and_vars[4][0], self.train_layers[4]),
146 |                                                          (grads_and_vars[5][0]*2, self.train_layers[5]),
147 |                                                          (grads_and_vars[6][0], self.train_layers[6]),
148 |                                                          (grads_and_vars[7][0]*2, self.train_layers[7]),
149 |                                                          (grads_and_vars[8][0], self.train_layers[8]),
150 |                                                          (grads_and_vars[9][0]*2, self.train_layers[9]),
151 |                                                          (grads_and_vars[10][0], self.train_layers[10]),
152 |                                                          (grads_and_vars[11][0]*2, self.train_layers[11]),
153 |                                                          (grads_and_vars[12][0], self.train_layers[12]),
154 |                                                          (grads_and_vars[13][0]*2, self.train_layers[13]),
155 |                                                          (fcgrad*10, self.train_last_layer[0]),
156 |                                                          (fbgrad*20, self.train_last_layer[1])],
157 |                                                         global_step=global_step)
158 |         else:
159 |             return opt.apply_gradients([(fcgrad * 10, self.train_last_layer[0]),
160 |                                         (fbgrad * 20, self.train_last_layer[1])], global_step=global_step)
161 | 
162 |     def train(self, img_dataset):
163 |         print("%s #train# start training" % datetime.now())
164 | 
165 |         # tensorboard
166 |         if os.path.exists(self.tflog_path):
167 |             shutil.rmtree(self.tflog_path)
168 |         train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph)
169 | 
170 |         for train_iter in range(self.max_iter):
171 |             images, labels = img_dataset.next_batch(self.batch_size)
172 |             start_time = time.time()
173 | 
174 |             _, loss, cos_loss, q_loss, output, summary = self.sess.run(
175 |                 [self.train_op, self.loss, self.cos_loss, self.q_loss, self.img_last_layer, self.merged],
176 |                 feed_dict={self.img: images,
177 |                            self.img_label: labels})
178 | 
179 |             img_dataset.feed_batch_output(self.batch_size, output)
180 |             duration = time.time() - start_time
181 | 
182 |             if train_iter % 1 == 0:
183 |                 train_writer.add_summary(summary, train_iter)
184 |                 print("%s #train# step %4d, loss = %.4f, cross_entropy loss = %.4f, quantization loss = %.4f, %.1f sec/batch"
185 |                       % (datetime.now(), train_iter + 1, loss, cos_loss, q_loss, duration))
186 | 
187 |         print("%s #traing# finish training" % datetime.now())
188 |         self.save_model()
189 |         print("model saved")
190 | 
191 |         self.sess.close()
192 | 
193 |     def validation(self, img_query, img_database, R=100):
194 |         if os.path.exists(self.codes_file):
195 |             print("loading ", self.codes_file)
196 |             img_database, img_query = self.load_codes(self.codes_file)
197 |         else:
198 |             print("%s #validation# start validation" % (datetime.now()))
199 |             query_batch = int(ceil(img_query.n_samples / self.val_batch_size))
200 |             print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch))
201 |             for i in range(query_batch):
202 |                 images, labels = img_query.next_batch(self.val_batch_size)
203 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
204 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
205 |                 img_query.feed_batch_output(self.val_batch_size, output)
206 |                 print('Cosine Loss: %s' % loss)
207 | 
208 |             database_batch = int(ceil(img_database.n_samples / self.val_batch_size))
209 |             print("%s #validation# totally %d database in %d batches" %
210 |                 (datetime.now(), img_database.n_samples, database_batch))
211 |             for i in range(database_batch):
212 |                 images, labels = img_database.next_batch(self.val_batch_size)
213 | 
214 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
215 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
216 |                 img_database.feed_batch_output(self.val_batch_size, output)
217 |                 # print output[:10, :10]
218 |                 if i % 100 == 0:
219 |                     print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss))
220 |             # save features and codes
221 |             self.save_codes(img_database, img_query)
222 | 
223 |         mAPs = MAPs(R)
224 | 
225 |         self.sess.close()
226 |         prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2)
227 |         return {
228 |             'i2i_by_feature': mAPs.get_mAPs_by_feature(img_database, img_query),
229 |             'i2i_after_sign': mAPs.get_mAPs_after_sign(img_database, img_query),
230 |             'i2i_map_radius_2': mmap,
231 |             'i2i_prec_radius_2': prec,
232 |             'i2i_recall_radius_2': rec
233 |         }
234 | 
235 | 
236 | def train(train_img, config):
237 |     model = DHN(config)
238 |     img_dataset = Dataset(train_img, config.output_dim)
239 |     model.train(img_dataset)
240 |     return model.model_file
241 | 
242 | 
243 | def validation(database_img, query_img, config):
244 |     model = DHN(config)
245 |     img_database = Dataset(database_img, config.output_dim)
246 |     img_query = Dataset(query_img, config.output_dim)
247 |     return model.validation(img_query, img_database, config.R)
248 | 


--------------------------------------------------------------------------------
/model/dhcs.py:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | # Deep Hashing Network for Efficient Similarity Retrieval                        #
  3 | # Authors: Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao                        #
  4 | # Contact: caoyue10@gmail.com                                                    #
  5 | ##################################################################################
  6 | 
  7 | import os
  8 | import shutil
  9 | import time
 10 | from datetime import datetime
 11 | from math import ceil
 12 | 
 13 | import numpy as np
 14 | import tensorflow as tf
 15 | 
 16 | from architecture import *
 17 | from loss import *
 18 | from util import *
 19 | from evaluation import *
 20 | from data_provider.pairwise import Dataset
 21 | 
 22 | 
 23 | class DHCS(object):
 24 |     def __init__(self, config):
 25 |         # Initialize setting
 26 |         print("initializing")
 27 |         np.set_printoptions(precision=4)
 28 |         self.stage = tf.placeholder_with_default(tf.constant(0), [])
 29 |         self.device = '/gpu:' + config.gpus
 30 |         self.bit = config.bit
 31 |         self.n_class = config.label_dim
 32 |         self.q_lambda = config.q_lambda
 33 |         self.b_lambda = config.b_lambda
 34 |         self.i_lambda = config.i_lambda
 35 |         self.alpha = config.alpha
 36 |         self.wordvec_dict = config.wordvec_dict
 37 | 
 38 |         self.batch_size = config.batch_size
 39 |         self.val_batch_size = config.val_batch_size
 40 |         self.max_iter = config.max_iter
 41 |         self.network = config.network
 42 |         self.learning_rate = config.lr
 43 |         self.lr_decay_factor = config.lr_decay_factor
 44 |         self.decay_step = config.decay_step
 45 |         self.finetune_all = config.finetune_all
 46 | 
 47 |         self.save_dir = config.save_dir
 48 |         self.model_file = os.path.join(self.save_dir, 'network_weights.npy')
 49 |         self.codes_file = os.path.join(self.save_dir, 'codes.npy')
 50 |         self.tflog_path = os.path.join(self.save_dir, 'tflog')
 51 | 
 52 |         # Setup session
 53 |         print("launching session")
 54 |         configProto = tf.ConfigProto()
 55 |         configProto.gpu_options.allow_growth = True
 56 |         configProto.allow_soft_placement = True
 57 |         self.sess = tf.Session(config=configProto)
 58 | 
 59 |         # Create variables and placeholders
 60 | 
 61 |         with tf.device(self.device):
 62 |             self.img = tf.placeholder(tf.float32, [None, 256, 256, 3])
 63 |             self.img_label = tf.placeholder(tf.float32, [None, self.n_class])
 64 |             try:
 65 |                 self.wordvec = tf.constant(np.loadtxt(self.wordvec_dict), dtype=tf.float32)
 66 |             except:
 67 |                 print(f'{self.wordvec_dict} does not exist!')
 68 |                 self.wordvec = None
 69 | 
 70 |             self.network_weights = config.network_weights
 71 |             self.img_last_layer, self.deep_param_img, self.train_layers = self.load_model()
 72 | 
 73 |             self.global_step = tf.Variable(0, trainable=False)
 74 |             self.train_op = self.apply_loss_function(self.global_step)
 75 |             self.sess.run(tf.global_variables_initializer())
 76 |             
 77 |             if config.debug == True:
 78 |                 from tensorflow.python import debug as tf_debug
 79 |                 self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
 80 | 
 81 | 
 82 |     def load_model(self):
 83 |         networks = {'alexnet': img_alexnet_layers, 'vgg16': img_vgg16_layers}
 84 |         try:
 85 |             img_output = networks[self.network](
 86 |                     self.img, self.batch_size, self.bit,
 87 |                     self.stage, self.network_weights, self.val_batch_size)
 88 |         except:
 89 |             raise Exception('cannot use such CNN model as ' + self.network)
 90 |         return img_output
 91 | 
 92 | 
 93 |     def save_model(self, model_file=None):
 94 |         if model_file is None:
 95 |             model_file = self.model_file
 96 |         model = {}
 97 |         for layer in self.deep_param_img:
 98 |             model[layer] = self.sess.run(self.deep_param_img[layer])
 99 |         print("saving model to %s" % model_file)
100 |         folder = os.path.dirname(model_file)
101 |         if os.path.exists(folder) is False:
102 |             os.makedirs(folder)
103 |         np.save(model_file, np.array(model))
104 |         return
105 | 
106 | 
107 |     def load_codes(self, codes_file=None):
108 |         if codes_file is None:
109 |             codes_file = self.codes_file
110 |         codes = np.load(codes_file).item()
111 | 
112 |         import collections
113 |         mDataset = collections.namedtuple('Dataset', ['output', 'label'])  
114 |         database = mDataset(codes['db_features'], codes['db_label'])
115 |         query = mDataset(codes['query_features'], codes['query_label'])
116 |         return database, query
117 | 
118 | 
119 |     def save_codes(self, database, query, codes_file=None):
120 |         if codes_file is None:
121 |             codes_file = self.codes_file
122 |         codes = {
123 |             'db_features': database.output,
124 |             'db_label': database.label,
125 |             'query_features': query.output,
126 |             'query_label': query.label,
127 |         }
128 |         print("saving codes to %s" % codes_file)
129 |         np.save(codes_file, np.array(codes))
130 | 
131 | 
132 |     def apply_loss_function(self, global_step):
133 |         # loss function
134 |         self.S_loss = exp_loss(self.img_last_layer, self.img_label, self.alpha, self.wordvec)
135 |         self.q_loss = quantization_loss(self.img_last_layer, q_type='L2')
136 |         self.b_loss = balance_loss(self.img_last_layer)
137 |         self.i_loss = independence_loss(self.img_last_layer)
138 |         self.loss = self.S_loss + self.q_lambda * self.q_loss + \
139 |                                   self.b_lambda * self.b_loss + \
140 |                                   self.i_lambda * self.i_loss
141 | 
142 |         # for debug
143 |         tf.summary.scalar('loss', self.loss)
144 |         tf.summary.scalar('similar_loss', self.S_loss)
145 |         tf.summary.scalar('quantization_loss', self.q_loss)
146 |         tf.summary.scalar('balance_loss', self.b_loss)
147 |         tf.summary.scalar('independence_loss', self.i_loss)
148 |         self.merged = tf.summary.merge_all()
149 | 
150 |         # Last layer has a 10 times learning rate
151 |         lr = tf.train.exponential_decay(
152 |             self.learning_rate, global_step, self.decay_step, self.lr_decay_factor, staircase=True)
153 |         opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9)
154 |         grads_and_vars = opt.compute_gradients(self.loss, self.train_layers)
155 | 
156 |         capped_grads_and_vars = []
157 |         if self.finetune_all:
158 |             for i, grad in enumerate(grads_and_vars[:-2]):
159 |                 if i % 2 == 0:
160 |                     capped_grads_and_vars.append((grad[0], grad[1]))
161 |                 else:
162 |                     capped_grads_and_vars.append((grad[0]*2, grad[1]))
163 |         capped_grads_and_vars.append((grads_and_vars[-2][0]*10, grads_and_vars[-2][1]))
164 |         capped_grads_and_vars.append((grads_and_vars[-1][0]*20, grads_and_vars[-1][1]))
165 | 
166 |         return opt.apply_gradients(capped_grads_and_vars,  global_step=global_step)
167 | 
168 | 
169 |     def train(self, img_dataset):
170 |         print("%s #train# start training" % datetime.now())
171 | 
172 |         # tensorboard
173 |         if os.path.exists(self.tflog_path):
174 |             shutil.rmtree(self.tflog_path)
175 |         train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph)
176 | 
177 |         for train_iter in range(self.max_iter):
178 |             images, labels = img_dataset.next_batch(self.batch_size)
179 | 
180 |             start_time = time.time()
181 | 
182 |             _, loss, S_loss, q_loss, output, summary = self.sess.run(
183 |                 [self.train_op, self.loss, self.S_loss, self.q_loss, self.img_last_layer, self.merged],
184 |                 feed_dict={self.img: images,
185 |                            self.img_label: labels})
186 | 
187 |             img_dataset.feed_batch_output(self.batch_size, output)
188 |             duration = time.time() - start_time
189 | 
190 |             train_writer.add_summary(summary, train_iter)
191 |             if train_iter % 100 == 0:
192 |                 print("%s #train# step %4d, loss = %.4f, similar loss = %.4f, quantization loss = %.4f, %.1f sec/batch"
193 |                       % (datetime.now(), train_iter + 1, loss, S_loss, q_loss, duration))
194 | 
195 |         print("%s #traing# finish training" % datetime.now())
196 |         self.save_model()
197 |         print("model saved")
198 | 
199 |         self.sess.close()
200 | 
201 | 
202 |     def validation(self, img_database, img_query, R=100):
203 |         if os.path.exists(self.codes_file):
204 |             print("loading ", self.codes_file)
205 |             img_database, img_query = self.load_codes(self.codes_file)
206 |         else:
207 |             print("%s #validation# start validation" % (datetime.now()))
208 |             query_batch = int(ceil(img_query.n_samples / self.val_batch_size))
209 |             print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch))
210 |             for i in range(query_batch):
211 |                 images, labels = img_query.next_batch(self.val_batch_size)
212 |                 output, loss = self.sess.run([self.img_last_layer, self.S_loss],
213 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
214 |                 img_query.feed_batch_output(self.val_batch_size, output)
215 |                 print('Cosine Loss: %s' % loss)
216 | 
217 |             database_batch = int(ceil(img_database.n_samples / self.val_batch_size))
218 |             print("%s #validation# totally %d database in %d batches" %
219 |                 (datetime.now(), img_database.n_samples, database_batch))
220 |             for i in range(database_batch):
221 |                 images, labels = img_database.next_batch(self.val_batch_size)
222 | 
223 |                 output, loss = self.sess.run([self.img_last_layer, self.S_loss],
224 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
225 |                 img_database.feed_batch_output(self.val_batch_size, output)
226 |                 # print output[:10, :10]
227 |                 if i % 100 == 0:
228 |                     print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss))
229 |             # save features and codes
230 |             self.save_codes(img_database, img_query)
231 | 
232 |         self.sess.close()
233 | 
234 |         db_feats = img_database.output
235 |         db_codes = sign(img_database.output)
236 |         db_labels = img_database.label
237 |         q_feats = img_query.output
238 |         q_codes = sign(img_query.output)
239 |         q_labels = img_query.label
240 | 
241 |         print("visualizing data ...")
242 |         plot_tsne(np.row_stack((db_codes, q_codes)), np.row_stack((db_labels, q_labels)), self.save_dir)
243 |         plot_distance(db_feats, db_labels, q_feats, q_labels, self.save_dir)
244 |         print(plot_distribution(db_feats, self.save_dir))
245 | 
246 |         print("calculating metrics ...")
247 |         mAPs = MAPs(R)
248 |         prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2)
249 |         return {
250 |             'mAP_sign': mAPs.get_mAPs_after_sign(img_database, img_query),
251 |             'mAP_WhRank': get_whrank_mAP(q_feats, q_codes, q_labels, db_feats, db_codes, db_labels, Rs=R),
252 |             'mAP_finetune': get_finetune_mAP(q_feats, q_codes, q_labels, db_feats, db_codes, db_labels, Rs=R),
253 |             'mAP_feat': mAPs.get_mAPs_by_feature(img_database, img_query), 
254 |             'RAMAP': get_RAMAP(q_codes, q_labels, db_codes, db_labels),
255 |             'mAP_radius2': mmap,
256 |             'prec_radius2': prec,
257 |             'recall_radius2': rec
258 |         }
259 | 
260 | 
261 | def train(train_img, config):
262 |     model = DHCS(config)
263 |     img_dataset = Dataset(train_img, config.bit)
264 |     model.train(img_dataset)
265 |     return model.model_file
266 | 
267 | 
268 | def validation(database_img, query_img, config):
269 |     model = DHCS(config)
270 |     img_database = Dataset(database_img, config.bit)
271 |     img_query = Dataset(query_img, config.bit)
272 |     return model.validation(img_database, img_query, config.R)
273 | 


--------------------------------------------------------------------------------
/architecture/vgg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | 
  6 | def img_vgg16_layers(img, batch_size, output_dim, stage, model_weights=None, val_batch_size=32, with_tanh=True):
  7 |     deep_param_img = {}
  8 |     train_layers = []
  9 | 
 10 |     if model_weights is None:
 11 |         dir_path = os.path.dirname(os.path.realpath(__file__))
 12 |         model_weights = os.path.join(dir_path, "pretrained_model/vgg16_weights.npy")
 13 |     
 14 |     print("loading img model from ", model_weights)
 15 |     net_data = dict(np.load(model_weights, encoding='bytes').item())
 16 |     print(list(net_data.keys()))
 17 | 
 18 |     # swap(2,1,0), bgr -> rgb
 19 |     reshaped_image = tf.cast(img, tf.float32)[:, :, :, ::-1]
 20 | 
 21 |     height = 224
 22 |     width = 224
 23 | 
 24 |     # Randomly crop a [height, width] section of each image
 25 |     with tf.name_scope('preprocess'):
 26 |         def train_fn():
 27 |             return tf.stack([tf.random_crop(tf.image.random_flip_left_right(each), [height, width, 3])
 28 |                              for each in tf.unstack(reshaped_image, batch_size)])
 29 | 
 30 |         def val_fn():
 31 |             unstacked = tf.unstack(reshaped_image, val_batch_size)
 32 | 
 33 |             def crop(img, x, y): return tf.image.crop_to_bounding_box(
 34 |                 img, x, y, width, height)
 35 | 
 36 |             def distort(f, x, y): return tf.stack(
 37 |                 [crop(f(each), x, y) for each in unstacked])
 38 | 
 39 |             def distort_raw(x, y): return distort(lambda x: x, x, y)
 40 | 
 41 |             def distort_fliped(x, y): return distort(
 42 |                 tf.image.flip_left_right, x, y)
 43 |             distorted = tf.concat([distort_fliped(0, 0), distort_fliped(28, 0),
 44 |                                    distort_fliped(
 45 |                                        0, 28), distort_fliped(28, 28),
 46 |                                    distort_fliped(14, 14), distort_raw(0, 0),
 47 |                                    distort_raw(28, 0), distort_raw(0, 28),
 48 |                                    distort_raw(28, 28), distort_raw(14, 14)], 0)
 49 | 
 50 |             return distorted
 51 |         distorted = tf.cond(stage > 0, val_fn, train_fn)
 52 | 
 53 |         # Zero-mean input
 54 |         mean = tf.constant([103.939, 116.779, 123.68], dtype=tf.float32, shape=[
 55 |                            1, 1, 1, 3], name='img-mean')
 56 |         distorted = distorted - mean
 57 | 
 58 |     # conv1_1
 59 |     with tf.name_scope('conv1_1') as scope:
 60 |         kernel = tf.Variable(net_data['conv1_1'][0], name='weights')
 61 |         conv = tf.nn.conv2d(distorted, kernel, [1, 1, 1, 1], padding='SAME')
 62 |         biases = tf.Variable(net_data['conv1_1'][1], trainable=True, name='biases')
 63 |         out = tf.nn.bias_add(conv, biases)
 64 |         conv1_1 = tf.nn.relu(out, name=scope)
 65 |         deep_param_img['conv1_1'] = [kernel, biases]
 66 |         train_layers += [kernel, biases]
 67 | 
 68 |     # conv1_2
 69 |     with tf.name_scope('conv1_2') as scope:
 70 |         kernel = tf.Variable(net_data['conv1_2'][0], name='weights')
 71 |         conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
 72 |         biases = tf.Variable(net_data['conv1_2'][1],
 73 |                                 trainable=True, name='biases')
 74 |         out = tf.nn.bias_add(conv, biases)
 75 |         conv1_2 = tf.nn.relu(out, name=scope)
 76 |         deep_param_img['conv1_2'] = [kernel, biases]
 77 |         train_layers += [kernel, biases]
 78 | 
 79 |     # pool1
 80 |     pool1 = tf.nn.max_pool(conv1_2,
 81 |                             ksize=[1, 2, 2, 1],
 82 |                             strides=[1, 2, 2, 1],
 83 |                             padding='SAME',
 84 |                             name='pool1')
 85 | 
 86 |     # conv2_1
 87 |     with tf.name_scope('conv2_1') as scope:
 88 |         kernel = tf.Variable(net_data['conv2_1'][0], name='weights')
 89 |         conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
 90 |         biases = tf.Variable(net_data['conv2_1'][1],
 91 |                                 trainable=True, name='biases')
 92 |         out = tf.nn.bias_add(conv, biases)
 93 |         conv2_1 = tf.nn.relu(out, name=scope)
 94 |         deep_param_img['conv2_1'] = [kernel, biases]
 95 |         train_layers += [kernel, biases]
 96 | 
 97 |     # conv2_2
 98 |     with tf.name_scope('conv2_2') as scope:
 99 |         kernel = tf.Variable(net_data['conv2_2'][0], name='weights')
100 |         conv = tf.nn.conv2d(conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
101 |         biases = tf.Variable(net_data['conv2_2'][1],
102 |                                 trainable=True, name='biases')
103 |         out = tf.nn.bias_add(conv, biases)
104 |         conv2_2 = tf.nn.relu(out, name=scope)
105 |         deep_param_img['conv2_2'] = [kernel, biases]
106 |         train_layers += [kernel, biases]
107 | 
108 |     # pool2
109 |     pool2 = tf.nn.max_pool(conv2_2,
110 |                             ksize=[1, 2, 2, 1],
111 |                             strides=[1, 2, 2, 1],
112 |                             padding='SAME',
113 |                             name='pool2')
114 | 
115 |     # conv3_1
116 |     with tf.name_scope('conv3_1') as scope:
117 |         kernel = tf.Variable(net_data['conv3_1'][0], name='weights')
118 |         conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
119 |         biases = tf.Variable(net_data['conv3_1'][1],
120 |                                 trainable=True, name='biases')
121 |         out = tf.nn.bias_add(conv, biases)
122 |         conv3_1 = tf.nn.relu(out, name=scope)
123 |         deep_param_img['conv3_1'] = [kernel, biases]
124 |         train_layers += [kernel, biases]
125 | 
126 |     # conv3_2
127 |     with tf.name_scope('conv3_2') as scope:
128 |         kernel = tf.Variable(net_data['conv3_2'][0], name='weights')
129 |         conv = tf.nn.conv2d(conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
130 |         biases = tf.Variable(net_data['conv3_2'][1],
131 |                                 trainable=True, name='biases')
132 |         out = tf.nn.bias_add(conv, biases)
133 |         conv3_2 = tf.nn.relu(out, name=scope)
134 |         deep_param_img['conv3_2'] = [kernel, biases]
135 |         train_layers += [kernel, biases]
136 | 
137 |     # conv3_3
138 |     with tf.name_scope('conv3_3') as scope:
139 |         kernel = tf.Variable(net_data['conv3_3'][0], name='weights')
140 |         conv = tf.nn.conv2d(conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
141 |         biases = tf.Variable(net_data['conv3_3'][1],
142 |                                 trainable=True, name='biases')
143 |         out = tf.nn.bias_add(conv, biases)
144 |         conv3_3 = tf.nn.relu(out, name=scope)
145 |         deep_param_img['conv3_3'] = [kernel, biases]
146 |         train_layers += [kernel, biases]
147 | 
148 |     # pool3
149 |     pool3 = tf.nn.max_pool(conv3_3,
150 |                             ksize=[1, 2, 2, 1],
151 |                             strides=[1, 2, 2, 1],
152 |                             padding='SAME',
153 |                             name='pool3')
154 | 
155 |     # conv4_1
156 |     with tf.name_scope('conv4_1') as scope:
157 |         kernel = tf.Variable(net_data['conv4_1'][0], name='weights')
158 |         conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME')
159 |         biases = tf.Variable(net_data['conv4_1'][1],
160 |                                 trainable=True, name='biases')
161 |         out = tf.nn.bias_add(conv, biases)
162 |         conv4_1 = tf.nn.relu(out, name=scope)
163 |         deep_param_img['conv4_1'] = [kernel, biases]
164 |         train_layers += [kernel, biases]
165 | 
166 |     # conv4_2
167 |     with tf.name_scope('conv4_2') as scope:
168 |         kernel = tf.Variable(net_data['conv4_2'][0], name='weights')
169 |         conv = tf.nn.conv2d(conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
170 |         biases = tf.Variable(net_data['conv4_2'][1],
171 |                                 trainable=True, name='biases')
172 |         out = tf.nn.bias_add(conv, biases)
173 |         conv4_2 = tf.nn.relu(out, name=scope)
174 |         deep_param_img['conv4_2'] = [kernel, biases]
175 |         train_layers += [kernel, biases]
176 | 
177 |     # conv4_3
178 |     with tf.name_scope('conv4_3') as scope:
179 |         kernel = tf.Variable(net_data['conv4_3'][0], name='weights')
180 |         conv = tf.nn.conv2d(conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
181 |         biases = tf.Variable(net_data['conv4_3'][1],
182 |                                 trainable=True, name='biases')
183 |         out = tf.nn.bias_add(conv, biases)
184 |         conv4_3 = tf.nn.relu(out, name=scope)
185 |         deep_param_img['conv4_3'] = [kernel, biases]
186 |         train_layers += [kernel, biases]
187 | 
188 |     # pool4
189 |     pool4 = tf.nn.max_pool(conv4_3,
190 |                             ksize=[1, 2, 2, 1],
191 |                             strides=[1, 2, 2, 1],
192 |                             padding='SAME',
193 |                             name='pool4')
194 | 
195 |     # conv5_1
196 |     with tf.name_scope('conv5_1') as scope:
197 |         kernel = tf.Variable(net_data['conv5_1'][0], name='weights')
198 |         conv = tf.nn.conv2d(pool4, kernel, [1, 1, 1, 1], padding='SAME')
199 |         biases = tf.Variable(net_data['conv5_1'][1],
200 |                                 trainable=True, name='biases')
201 |         out = tf.nn.bias_add(conv, biases)
202 |         conv5_1 = tf.nn.relu(out, name=scope)
203 |         deep_param_img['conv5_1'] = [kernel, biases]
204 |         train_layers += [kernel, biases]
205 | 
206 |     # conv5_2
207 |     with tf.name_scope('conv5_2') as scope:
208 |         kernel = tf.Variable(net_data['conv5_2'][0], name='weights')
209 |         conv = tf.nn.conv2d(conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
210 |         biases = tf.Variable(net_data['conv5_2'][1],
211 |                                 trainable=True, name='biases')
212 |         out = tf.nn.bias_add(conv, biases)
213 |         conv5_2 = tf.nn.relu(out, name=scope)
214 |         deep_param_img['conv5_2'] = [kernel, biases]
215 |         train_layers += [kernel, biases]
216 | 
217 |     # conv5_3
218 |     with tf.name_scope('conv5_3') as scope:
219 |         kernel = tf.Variable(net_data['conv5_3'][0], name='weights')
220 |         conv = tf.nn.conv2d(conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
221 |         biases = tf.Variable(net_data['conv5_3'][1],
222 |                                 trainable=True, name='biases')
223 |         out = tf.nn.bias_add(conv, biases)
224 |         conv5_3 = tf.nn.relu(out, name=scope)
225 |         deep_param_img['conv5_3'] = [kernel, biases]
226 |         train_layers += [kernel, biases]
227 | 
228 |     # pool5
229 |     pool5 = tf.nn.max_pool(conv5_3,
230 |                             ksize=[1, 2, 2, 1],
231 |                             strides=[1, 2, 2, 1],
232 |                             padding='SAME',
233 |                             name='pool4')
234 | 
235 |     # fc6
236 |     with tf.name_scope('fc6') as scope:
237 |         shape = int(np.prod(pool5.get_shape()[1:]))
238 |         fc6w = tf.Variable(net_data['fc6'][0], name='weights')
239 |         fc6b = tf.Variable(net_data['fc6'][1],
240 |                                 trainable=True, name='biases')
241 |         pool5_flat = tf.reshape(pool5, [-1, shape])
242 |         fc6l = tf.nn.bias_add(tf.matmul(pool5_flat, fc6w), fc6b)
243 |         fc6 = tf.nn.relu(fc6l)
244 |         deep_param_img['fc6'] = [fc6w, fc6b]
245 |         train_layers += [fc6w, fc6b]
246 | 
247 |     # fc7
248 |     with tf.name_scope('fc7') as scope:
249 |         fc7w = tf.Variable(net_data['fc7'][0], name='weights')
250 |         fc7b = tf.Variable(net_data['fc7'][1],
251 |                                 trainable=True, name='biases')
252 |         fc7l = tf.nn.bias_add(tf.matmul(fc6, fc7w), fc7b)
253 |         fc7 = tf.nn.relu(fc7l)
254 |         deep_param_img['fc7'] = [fc7w, fc7b]
255 |         train_layers += [fc7w, fc7b]
256 | 
257 |     # FC8
258 |     # Output output_dim
259 |     with tf.name_scope('fc8'):
260 |         # Differ train and val stage by 'fc8' as key
261 |         if 'fc8' in net_data:
262 |             fc8w = tf.Variable(net_data['fc8'][0], name='weights')
263 |             fc8b = tf.Variable(net_data['fc8'][1], name='biases')
264 |         else:
265 |             fc8w = tf.Variable(tf.random_normal([4096, output_dim],
266 |                                                 dtype=tf.float32,
267 |                                                 stddev=1e-2), name='weights')
268 |             fc8b = tf.Variable(tf.constant(0.0, shape=[output_dim],
269 |                                            dtype=tf.float32), name='biases')
270 |         fc8l = tf.nn.bias_add(tf.matmul(fc7, fc8w), fc8b)
271 | 
272 |         if with_tanh:
273 |             fc8_t = tf.nn.tanh(fc8l)
274 |         else:
275 |             fc8_t = fc8l
276 | 
277 |         def val_fn1():
278 |             concated = tf.concat([tf.expand_dims(i, 0)
279 |                                   for i in tf.split(fc8_t, 10, 0)], 0)
280 |             return tf.reduce_mean(concated, 0)
281 |         fc8 = tf.cond(stage > 0, val_fn1, lambda: fc8_t)
282 | 
283 |         deep_param_img['fc8'] = [fc8w, fc8b]
284 |         train_layers += [fc8w, fc8b]
285 | 
286 |     print("img model loading finished")
287 | 
288 |     return fc8, deep_param_img, train_layers
289 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from distance.npversion import distance
  4 | from scipy.special import comb
  5 | from util import sign
  6 | 
  7 |     
  8 | def get_RAMAP(q_output, q_labels, db_output, db_labels, cost=False):
  9 |     ''' 
 10 |     - On the Evaluation Metric for Hashing
 11 |     '''
 12 |     M, Q = q_output.shape
 13 |     R = Q
 14 |     RAAPs = []
 15 |     time_costs = [comb(Q, r) for r in range(Q+1)]
 16 |     distH = distance(q_output, db_output, pair=False, dist_type='hamming')
 17 |     gnds = np.dot(q_labels, db_labels.transpose()) > 0
 18 |     for i in range(M):
 19 |         gnd = gnds[i,:]
 20 |         hamm = distH[i,:]
 21 |         RAAP = 0
 22 |         for r in range(R+1):
 23 |             hamm_r_idx = np.where(hamm<=r)
 24 |             rel = len(hamm_r_idx[0])
 25 |             if(rel == 0):
 26 |                 continue
 27 |             imatch = np.sum(gnd[hamm_r_idx])
 28 |             if cost:
 29 |                 time_cost = np.sum(time_costs[:r+1])
 30 |                 RAAP += (imatch / (rel * time_cost))
 31 |             else:
 32 |                 RAAP += (imatch / rel)
 33 |         RAAP = RAAP / (R + 1)
 34 |         RAAPs.append(RAAP)
 35 |     return np.mean(RAAPs)
 36 | 
 37 | 
 38 | def whrank(features, labels):
 39 |     N, D = features.shape
 40 |     classes = np.unique(labels)
 41 |     pairnum = N
 42 |     diffvals = np.zeros((pairnum, D))
 43 |     for i in range(pairnum):
 44 |         clsid = np.random.choice(classes, 1)
 45 |         sampids = np.where(labels == clsid)[0]
 46 |         samps = np.random.permutation(sampids)[:2]
 47 |         diffvals[i] = features[samps[0], :] - features[samps[1], :]
 48 |     fmu = np.mean(diffvals, axis=0)
 49 |     fstd = np.std(diffvals, axis=0)
 50 |     return fmu, fstd
 51 | 
 52 | 
 53 | def whrankHamm(q_codes, db_codes, q_feats, fmu, fstd, w_type='ones'):
 54 |     if w_type == 'ones':
 55 |         weights = np.ones_like(q_feats)
 56 |     elif w_type == 'q':
 57 |         weights = np.abs(q_feats)
 58 |     elif w_type == 'std':
 59 |         weights = np.ones_like(q_feats) / fstd
 60 |     elif w_type == 'q_std':
 61 |         weights = np.abs(q_feats) / fstd
 62 |     elif w_type == 'erf':
 63 |         Pr = 0.5 * (1 + q_codes * np.erf((-q_feats-fmu) / (np.sqrt(2)*fstd)))
 64 |         weights = np.log((1 - Pr) / Pr)
 65 |             
 66 |     num1 = q_codes.shape[0]
 67 |     num2 = db_codes.shape[0]
 68 |     distMat = np.zeros((num1, num2))
 69 |     for i in range(num1):
 70 |         codediff = np.abs(np.tile(q_codes[i], (num2, 1)) - db_codes) / 2
 71 |         distMat[i] = np.dot(weights[i], codediff.transpose())
 72 |     return distMat
 73 | 
 74 | 
 75 | def get_whrank_mAP(q_features, q_output, q_labels, db_features, db_output, db_labels, Rs=54000):
 76 |     fmu, fstd = whrank(db_features, np.argmax(db_labels, axis=1))
 77 |     dist = whrankHamm(q_output, db_output, q_features, fmu, fstd, w_type='erf')
 78 |     unsorted_ids = np.argpartition(dist, Rs - 1)[:, :Rs]
 79 |     APx = []
 80 |     for i in range(dist.shape[0]):
 81 |         label = q_labels[i, :]
 82 |         label[label == 0] = -1
 83 |         idx = unsorted_ids[i, :]
 84 |         idx = idx[np.argsort(dist[i, :][idx])]
 85 |         imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0
 86 |         rel = np.sum(imatch)
 87 |         Lx = np.cumsum(imatch)
 88 |         Px = Lx.astype(float) / np.arange(1, Rs + 1, 1)
 89 |         if rel != 0:
 90 |             APx.append(np.sum(Px * imatch) / rel)
 91 |     return np.mean(np.array(APx))
 92 | 
 93 | 
 94 | def finetune_distID(dist, q_features, db_features):
 95 |     N, D = q_features.shape
 96 |     distID_finetune = np.zeros_like(dist)
 97 |     for i in range(N):
 98 |         cur = 0
 99 |         for j in range(D+1):
100 |             idx = np.where(dist[i] == j)[0]
101 |             num = len(idx)
102 |             if num > 1:
103 |                 d = distance(q_features[i], db_features[idx], dist_type='inner_product', pair=True)
104 |                 idx = idx[np.argsort(d)]
105 |             distID_finetune[i,cur:cur+num] = idx
106 |             cur += num
107 |     distID_finetune = distID_finetune.astype(int)
108 |     return distID_finetune
109 | 
110 | 
111 | def get_finetune_mAP(q_features, q_output, q_labels, db_features, db_output, db_labels, Rs=54000):
112 |     dist_raw = distance(q_output, db_output, pair=False, dist_type='hamming')
113 |     dist_raw = np.partition(dist_raw, Rs - 1)[:, :Rs]
114 |     dist_finetune_idx = finetune_distID(dist_raw, q_features, db_features)
115 | 
116 |     N = dist_raw.shape[0]
117 |     dist_idx = dist_finetune_idx
118 |     APx = []
119 |     for i in range(N):
120 |         label = q_labels[i, :]
121 |         label[label == 0] = -1
122 |         idx = dist_idx[i, :]
123 |         imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0
124 |         rel = np.sum(imatch)
125 |         Lx = np.cumsum(imatch)
126 |         Px = Lx.astype(float) / np.arange(1, Rs + 1, 1)
127 |         if rel != 0:
128 |             APx.append(np.sum(Px * imatch) / rel)
129 |     mAP = np.mean(np.array(APx))
130 |     return mAP
131 | 
132 | 
133 | # optimized
134 | def get_mAPs(q_output, q_labels, db_output, db_labels, Rs, dist_type='inner_product'):
135 |     dist = distance(q_output, db_output, dist_type=dist_type, pair=True)
136 |     unsorted_ids = np.argpartition(dist, Rs - 1)[:, :Rs]
137 |     APx = []
138 |     q_labels_tmp = np.copy(q_labels)
139 |     for i in range(dist.shape[0]):
140 |         label = q_labels_tmp[i, :]
141 |         label[label == 0] = -1
142 |         idx = unsorted_ids[i, :]
143 |         idx = idx[np.argsort(dist[i, :][idx])]
144 |         imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0
145 |         rel = np.sum(imatch)
146 |         Lx = np.cumsum(imatch)
147 |         Px = Lx.astype(float) / np.arange(1, Rs + 1, 1)
148 |         if rel != 0:
149 |             APx.append(np.sum(Px * imatch) / rel)
150 |     return np.mean(np.array(APx))
151 | 
152 | 
153 | def get_mAPs_rerank(q_output, q_labels, db_output, db_labels, Rs, dist_type='inner_product'):
154 |     query_output = sign(q_output)
155 |     database_output = sign(db_output)
156 | 
157 |     bit_n = query_output.shape[1]
158 | 
159 |     ips = np.dot(query_output, database_output.T)
160 |     ips = (bit_n - ips) / 2
161 | 
162 |     mAPX = []
163 |     query_labels = q_labels
164 |     database_labels = db_labels
165 |     for i in range(ips.shape[0]):
166 |         label = query_labels[i, :]
167 |         label[label == 0] = -1
168 | 
169 |         imatch = np.array([])
170 |         for j in range(bit_n):
171 |             idx = np.reshape(np.argwhere(np.equal(ips[i, :], j)), (-1))
172 |             all_num = len(idx)
173 | 
174 |             if all_num != 0:
175 |                 ips_trad = np.dot(q_output[i, :], db_output[idx[:], :].T)
176 |                 ids_trad = np.argsort(-ips_trad, axis=0)
177 |                 db_labels_1 = database_labels[idx[:], :]
178 | 
179 |                 imatch = np.append(imatch, np.sum(
180 |                     np.equal(db_labels_1[ids_trad, :], label), 1) > 0)
181 |                 if imatch.shape[0] > Rs:
182 |                     break
183 | 
184 |         imatch = imatch[0:Rs]
185 |         rel = np.sum(imatch)
186 |         Lx = np.cumsum(imatch)
187 |         Px = Lx.astype(float) / np.arange(1, Rs + 1, 1)
188 |         if rel != 0:
189 |             mAPX.append(np.sum(Px * imatch) / rel)
190 | 
191 |     return np.mean(np.array(mAPX))
192 | 
193 | 
194 | class MAPs:
195 |     def __init__(self, R):
196 |         self.R = R
197 | 
198 |     def get_mAPs_by_feature(self, database, query, Rs=None, dist_type='inner_product'):
199 |         if Rs is None:
200 |             Rs = self.R
201 |         return get_mAPs(query.output, query.label, database.output, database.label, Rs, dist_type)
202 | 
203 |     def get_mAPs_after_sign(self, database, query, Rs=None, dist_type='inner_product'):
204 |         if Rs is None:
205 |             Rs = self.R
206 |         q_output = sign(query.output)
207 |         db_output = sign(database.output)
208 |         return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type)
209 | 
210 |     def get_RAMAP_after_sign(self, database, query):
211 |         q_output = sign(query.output)
212 |         db_output = sign(database.output)
213 |         return get_RAMAP(q_output, query.label, db_output, database.label)
214 | 
215 |     def get_mAPs_after_sign_with_feature_rerank(self, database, query, Rs=None, dist_type='inner_product'):
216 |         if Rs is None:
217 |             Rs = self.R
218 |         return get_mAPs_rerank(query.output, query.label, database.output, database.label, Rs, dist_type)
219 | 
220 |     @staticmethod
221 |     def get_precision_recall_by_Hamming_Radius(database, query, radius=2):
222 |         query_output = sign(query.output)
223 |         database_output = sign(database.output)
224 | 
225 |         bit_n = query_output.shape[1]
226 | 
227 |         ips = np.dot(query_output, database_output.T)
228 |         ips = (bit_n - ips) / 2
229 |         ids = np.argsort(ips, 1)
230 | 
231 |         precX = []
232 |         recX = []
233 |         mAPX = []
234 |         query_labels = query.label
235 |         database_labels = database.label
236 | 
237 |         for i in range(ips.shape[0]):
238 |             label = query_labels[i, :]
239 |             label[label == 0] = -1
240 |             idx = np.reshape(np.argwhere(ips[i, :] <= radius), (-1))
241 |             all_num = len(idx)
242 | 
243 |             if all_num != 0:
244 |                 imatch = np.sum(database_labels[idx[:], :] == label, 1) > 0
245 |                 match_num = np.sum(imatch)
246 |                 precX.append(np.float(match_num) / all_num)
247 | 
248 |                 all_sim_num = np.sum(
249 |                     np.sum(database_labels[:, :] == label, 1) > 0)
250 |                 recX.append(np.float(match_num) / all_sim_num)
251 | 
252 |                 if radius < 10:
253 |                     ips_trad = np.dot(
254 |                         query.output[i, :], database.output[ids[i, 0:all_num], :].T)
255 |                     ids_trad = np.argsort(-ips_trad, axis=0)
256 |                     db_labels = database_labels[ids[i, 0:all_num], :]
257 | 
258 |                     rel = match_num
259 |                     imatch = np.sum(db_labels[ids_trad, :] == label, 1) > 0
260 |                     Lx = np.cumsum(imatch)
261 |                     Px = Lx.astype(float) / np.arange(1, all_num + 1, 1)
262 |                     if rel != 0:
263 |                         mAPX.append(np.sum(Px * imatch) / rel)
264 |                 else:
265 |                     mAPX.append(np.float(match_num) / all_num)
266 | 
267 |             else:
268 |                 precX.append(np.float(0.0))
269 |                 recX.append(np.float(0.0))
270 |                 mAPX.append(np.float(0.0))
271 | 
272 |         return np.mean(np.array(precX)), np.mean(np.array(recX)), np.mean(np.array(mAPX))
273 | 
274 |     @staticmethod
275 |     def get_precision_recall_by_Hamming_Radius_All(database, query):
276 |         query_output = sign(query.output)
277 |         database_output = sign(database.output)
278 | 
279 |         bit_n = query_output.shape[1]
280 | 
281 |         ips = np.dot(query_output, database_output.T)
282 |         ips = (bit_n - ips) / 2
283 |         precX = np.zeros((ips.shape[0], bit_n + 1))
284 |         recX = np.zeros((ips.shape[0], bit_n + 1))
285 |         mAPX = np.zeros((ips.shape[0], bit_n + 1))
286 | 
287 |         query_labels = query.label
288 |         database_labels = database.label
289 | 
290 |         ids = np.argsort(ips, 1)
291 | 
292 |         for i in range(ips.shape[0]):
293 |             label = query_labels[i, :]
294 |             label[label == 0] = -1
295 | 
296 |             idx = ids[i, :]
297 |             imatch = np.sum(database_labels[idx[:], :] == label, 1) > 0
298 |             all_sim_num = np.sum(imatch)
299 | 
300 |             counts = np.bincount(ips[i, :].astype(np.int64))
301 | 
302 |             for r in range(bit_n + 1):
303 |                 if r >= len(counts):
304 |                     precX[i, r] = precX[i, r - 1]
305 |                     recX[i, r] = recX[i, r - 1]
306 |                     mAPX[i, r] = mAPX[i, r - 1]
307 |                     continue
308 | 
309 |                 all_num = np.sum(counts[0:r + 1])
310 | 
311 |                 if all_num != 0:
312 |                     match_num = np.sum(imatch[0:all_num])
313 |                     precX[i, r] = np.float(match_num) / all_num
314 |                     recX[i, r] = np.float(match_num) / all_sim_num
315 | 
316 |                     rel = match_num
317 |                     Lx = np.cumsum(imatch[0:all_num])
318 |                     Px = Lx.astype(float) / np.arange(1, all_num + 1, 1)
319 |                     if rel != 0:
320 |                         mAPX[i, r] = np.sum(Px * imatch[0:all_num]) / rel
321 |         return np.mean(np.array(precX), 0), np.mean(np.array(recX), 0), np.mean(np.array(mAPX), 0)
322 | 
323 | 
324 | class MAPs_CQ:
325 |     def __init__(self, C, subspace_num, subcenter_num, R):
326 |         self.C = C
327 |         self.subspace_num = subspace_num
328 |         self.subcenter_num = subcenter_num
329 |         self.R = R
330 | 
331 |     def get_mAPs_SQD(self, database, query, Rs=None, dist_type='inner_product'):
332 |         if Rs is None:
333 |             Rs = self.R
334 |         q_output = np.dot(query.codes, self.C)
335 |         db_output = np.dot(database.codes, self.C)
336 |         return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type)
337 | 
338 |     def get_mAPs_AQD(self, database, query, Rs=None, dist_type='inner_product'):
339 |         if Rs is None:
340 |             Rs = self.R
341 |         q_output = query.output
342 |         db_output = np.dot(database.codes, self.C)
343 |         return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type)
344 | 
345 |     def get_mAPs_by_feature(self, database, query, Rs=None, dist_type='inner_product'):
346 |         if Rs is None:
347 |             Rs = self.R
348 |         q_output = query.output
349 |         db_output = database.output
350 |         return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type)
351 | 
352 |     def get_mAPs_after_sign(self, database, query, Rs=None, dist_type='inner_product'):
353 |         if Rs is None:
354 |             Rs = self.R
355 |         q_output = sign(query.output)
356 |         db_output = sign(database.output)
357 |         return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type)
358 | 
359 | 
360 | if __name__ == "__main__":
361 |     m = MAPs(4)
362 |     radius = 2
363 | 
364 |     class ds:
365 |         def __init__(self):
366 |             self.output = []
367 |             self.label = []
368 |     database = ds()
369 |     query = ds()
370 | 
371 |     database.output = np.sign(np.random.rand(10000, 64) - 0.5)
372 |     database.label = np.sign(np.random.rand(10000, 20) - 0.5)
373 |     database.label[database.label < 0] = 0
374 |     query.output = np.sign(np.random.rand(1000, 64) - 0.5)
375 |     query.label = np.sign(np.random.rand(1000, 20) - 0.5)
376 |     query.label[query.label < 0] = 0
377 | 
378 |     print(m.get_mAPs_after_sign_with_feature_rerank(database, query, 500))
379 |     print(m.get_mAPs_by_feature(database, query, 500))
380 |     prec, rec, maps = m.get_precision_recall_by_Hamming_Radius_All(
381 |         database, query)
382 |     print(prec)
383 |     print(rec)
384 |     print(maps)
385 | 


--------------------------------------------------------------------------------
/loss/__init__.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from distance.tfversion import distance
  4 | from util import sign, reduce_shaper
  5 | 
  6 | 
  7 | '''pairwise loss
  8 | '''
  9 | 
 10 | def inner_product_loss(u, label_u, balanced=True):
 11 |     '''pairwise inner product loss
 12 |     - Hash with graph
 13 |     - Supervised Hashing for Image Retrieval via Image Representation Learning
 14 |     - Deep Discrete Supervised Hashing
 15 |     '''
 16 |     with tf.name_scope('inner_product_loss'):
 17 |         B = tf.cast(tf.shape(u)[1], tf.float32)
 18 |         ip = tf.matmul(u, u, transpose_b=True)
 19 | 
 20 |         # let sim = {0, 1} to be {-1, 1}
 21 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
 22 |         Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0))
 23 | 
 24 |         loss_1 = tf.square(tf.subtract(Sim, tf.div(ip, B)))
 25 | 
 26 |         if balanced:
 27 |             with tf.name_scope('balance'):
 28 |                 sum_1 = tf.reduce_sum(S)
 29 |                 sum_all = tf.reduce_sum(tf.abs(Sim))
 30 |                 balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))),
 31 |                                         tf.multiply(tf.div(sum_all, sum_1), S))
 32 |                 loss_1 = tf.multiply(loss_1, balance_param)
 33 |         
 34 |         loss = tf.reduce_mean(loss_1)
 35 |     return loss
 36 | 
 37 | 
 38 | def cosine_loss(u, label_u, balanced=True):
 39 |     '''squared pairwise cosine loss
 40 |     - Deep Quantization Network for Efficient Image Retrieval
 41 |     '''
 42 |     with tf.name_scope('cosine_loss'):
 43 |         ip_1 = tf.matmul(u, u, transpose_b=True)
 44 |         mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), reduce_shaper(
 45 |             tf.square(u)), transpose_b=True))
 46 |         cos_1 = tf.div(ip_1, mod_1)
 47 | 
 48 |         # let Sim = {0, 1} to be {-1, 1}
 49 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
 50 |         Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0))
 51 | 
 52 |         loss_1 = tf.square(tf.subtract(Sim, cos_1))
 53 | 
 54 |         if balanced:
 55 |             with tf.name_scope('balance'):
 56 |                 sum_1 = tf.reduce_sum(S)
 57 |                 sum_all = tf.reduce_sum(tf.abs(Sim))
 58 |                 balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))),
 59 |                                         tf.multiply(tf.div(sum_all, sum_1), S))
 60 |                 loss_1 = tf.multiply(loss_1, balance_param)
 61 | 
 62 |         loss = tf.reduce_mean(loss_1)
 63 |     return loss  
 64 | 
 65 | 
 66 | def cross_entropy_loss(u, label_u, alpha=0.5, normed=True, balanced=True):
 67 |     '''cross entropy loss
 68 |     - Deep Hashing Network for Efficient Similarity Retrieval
 69 |     '''
 70 |     with tf.name_scope('cross_entropy_loss'):
 71 |         if normed:
 72 |             ip_1 = tf.matmul(u, tf.transpose(u))
 73 |             mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)),
 74 |                                     reduce_shaper(tf.square(u)), transpose_b=True))
 75 |             ip = tf.div(ip_1, mod_1)
 76 |         else:
 77 |             ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
 78 |             
 79 |         ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]])
 80 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
 81 | 
 82 |         loss_1 = tf.log(ones + tf.exp(alpha * ip)) - S * alpha * ip
 83 | 
 84 |         if balanced:
 85 |             with tf.name_scope('balance'):
 86 |                 # let Sim \in {-1, 1}
 87 |                 Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0))
 88 |                 sum_1 = tf.reduce_sum(S)
 89 |                 sum_all = tf.reduce_sum(tf.abs(Sim))
 90 |                 balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))),
 91 |                                         tf.multiply(tf.div(sum_all, sum_1), S))
 92 |                 loss_1 = tf.multiply(loss_1, balance_param)
 93 | 
 94 |         loss = tf.reduce_mean(loss_1)
 95 |     return loss
 96 | 
 97 | 
 98 | def cauchy_cross_entropy_loss(u, label_u, gamma=16, normed=True):
 99 |     '''cauchy cross entropy loss
100 |     - Deep Cauchy Hashing for Hamming Space Retrieval
101 |     '''
102 |     with tf.name_scope('cauchy_cross_entropy_loss'):
103 |         bit = tf.cast(tf.shape(u)[1], tf.float32)
104 | 
105 |         if normed:
106 |             ip_1 = tf.matmul(u, tf.transpose(u))
107 |             mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), reduce_shaper(
108 |                 tf.square(u)) + tf.constant(1e-6), transpose_b=True))
109 |             dist = bit / 2.0 * (1.0 - tf.div(ip_1, mod_1) + tf.constant(1e-6))
110 |         else:
111 |             r_u = tf.reshape(tf.reduce_sum(u * u, 1), [-1, 1])
112 |             r_v = tf.reshape(tf.reduce_sum(u * u, 1), [-1, 1])
113 | 
114 |             dist = r_u - 2 * tf.matmul(u, tf.transpose(u)) + \
115 |                 tf.transpose(r_v) + tf.constant(0.001)
116 | 
117 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
118 |         with tf.name_scope('balance'):
119 |             Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0))
120 |             sum_1 = tf.reduce_sum(S)
121 |             sum_all = tf.reduce_sum(tf.abs(Sim))
122 |             balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 
123 |                                     tf.multiply(tf.div(sum_all, sum_1), S))
124 | 
125 |         mask = tf.equal(tf.eye(tf.shape(u)[0]), tf.constant(0.0))
126 |         cauchy = gamma / (dist + gamma)
127 |         cauchy_mask = tf.boolean_mask(cauchy, mask)
128 |         s_mask = tf.boolean_mask(S, mask)
129 |         balance_p_mask = tf.boolean_mask(balance_param, mask)
130 | 
131 |         all_loss = - s_mask * \
132 |             tf.log(cauchy_mask) - (tf.constant(1.0) - s_mask) * \
133 |             tf.log(tf.constant(1.0) - cauchy_mask)
134 | 
135 |         loss = tf.reduce_mean(tf.multiply(all_loss, balance_p_mask))
136 |     return loss
137 | 
138 | 
139 | def contrastive_loss(u, label_u, margin=4, balanced=False):
140 |     '''contrastive loss
141 |     - Deep Supervised Hashing for Fast Image Retrieval
142 |     '''
143 |     with tf.name_scope('contrastive_loss'):
144 |         batch_size = tf.cast(tf.shape(u)[0], tf.float32)
145 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
146 |         dist = distance(u)
147 | 
148 |         loss_1 = S * dist + (1 - S) * tf.maximum(margin - dist, 0.0)
149 | 
150 |         if balanced:
151 |             # TODO DELETTE! In this setting, results will be worse.
152 |             with tf.name_scope('balance'):
153 |                 # let Sim \in {-1, 1}
154 |                 Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0))
155 |                 sum_1 = tf.reduce_sum(S)
156 |                 sum_all = tf.reduce_sum(tf.abs(Sim))
157 |                 balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))),
158 |                                         tf.multiply(tf.div(sum_all, sum_1), S))
159 |                 loss_1 = tf.multiply(loss_1, balance_param)
160 | 
161 |         loss = tf.reduce_sum(loss_1) / (batch_size*(batch_size-1))
162 |     return loss
163 | 
164 | 
165 | def exp_loss(u, label_u, alpha, wordvec=None, balanced=True):
166 |     '''exponential loss
167 |     '''
168 |     with tf.name_scope('exp_loss'):
169 |         batch_size = tf.shape(u)[0]
170 |         bit = tf.shape(u)[1]
171 |         mask = tf.equal(tf.eye(batch_size), tf.constant(0.0))
172 |         S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0)
173 |         S_m = tf.boolean_mask(S, mask)
174 | 
175 |         # word vector
176 |         if wordvec != None:
177 |             wordvec_u = tf.matmul(label_u, wordvec) / tf.reduce_sum(label_u, axis=1, keepdims=True)
178 |             W = distance(wordvec_u, dist_type='cosine')
179 |         
180 |         ## margin hinge-like loss
181 |         # balanced = False
182 |         # D = distance(u, dist_type='euclidean2')
183 |         # E = D
184 |         # E_m = tf.boolean_mask(E, mask)
185 |         # loss_1 = S_m * E_m + (1 - S_m) *  tf.maximum(alpha - E_m, 0.0)
186 | 
187 |         ## double margin hinge-like loss
188 |         # balanced = False
189 |         # D = distance(u, dist_type='cosine')
190 |         # E = D
191 |         # E_m = tf.boolean_mask(E, mask)
192 |         # loss_1 = S_m * tf.maximum(E_m - 0.3, 0.0) + (1 - S_m) *  tf.maximum(0.45 - E_m, 0.0)
193 | 
194 |         ## cauchy cross-entropy loss
195 |         # D = distance(u, dist_type='cosine')
196 |         # E = tf.log(1 + alpha * D)
197 |         # E_m = tf.boolean_mask(E, mask)
198 |         # loss_1 = S_m * E_m + (1 - S_m) * (E_m - tf.log(tf.exp(E_m) - 1 + 1e-6))
199 | 
200 |         # sigmoid
201 |         # D = distance(u, dist_type='cosine')
202 |         # E = tf.log(1 + tf.exp(-alpha * (1-2*D)))
203 |         # E_m = tf.boolean_mask(E, mask)
204 |         # loss_1 = S_m * E_m + (1 - S_m) * (E_m - tf.log(tf.exp(E_m) - 1 + 1e-6))
205 | 
206 |         ## hyper sigmoid
207 |         balanced = False
208 |         alpha = 9 
209 |         belta = 20
210 |         gamma = 1.5
211 |         margin = 0.25
212 |         D = distance(u, dist_type='cosine')
213 |         E1 = tf.log(1 + tf.exp(-alpha * (1-gamma*2*D)))
214 |         E1_m = tf.boolean_mask(E1, mask)
215 |         loss_s1 = S_m * E1_m
216 |         E2 = tf.log(1 + tf.exp(-alpha * (1-gamma*2*(D-margin))))
217 |         E2_m = tf.boolean_mask(E2, mask)
218 |         loss_s0 = (1 - S_m) * (E2_m - tf.log(tf.exp(E2_m) - 1 + 1e-6))
219 |         loss_1 = belta * loss_s1 + loss_s0
220 | 
221 |         ## margin exp loss
222 |         # balanced = False
223 |         # D = distance(u, dist_type='cosine')
224 |         # E1 = tf.exp(2* D) - 1
225 |         # E2 = tf.exp(2 * (1 - D)) - 1
226 |         # E1_m = tf.boolean_mask(E1, mask)
227 |         # E2_m = tf.boolean_mask(E2, mask)
228 |         # loss_1 = S_m * E1_m + (1 - S_m) * E2_m
229 | 
230 |         ## post-tune
231 |         # balanced = False
232 |         # D = distance(u, dist_type='cosine')
233 |         # E = D
234 |         # E_m = tf.boolean_mask(E, mask)
235 |         # margin = 0.05
236 |         # loss_1 = S_m * tf.maximum(E_m - alpha + margin, 0.0) + (1 - S_m) *  tf.maximum(alpha + margin - E_m, 0.0)
237 |         # loss_1 = S_m * tf.maximum(E_m - alpha + margin, 0.0)
238 |         # loss_1 = (1 - S_m) *  tf.maximum(alpha + margin - E_m, 0.0)
239 | 
240 |         if balanced:
241 |             S_all = tf.cast(batch_size * (batch_size - 1), tf.float32)
242 |             S_1 = tf.reduce_sum(S)
243 |             balance_param = (S_all / S_1) * S + (1 - S)
244 |             B_m= tf.boolean_mask(balance_param, mask)
245 |             loss_1 = B_m * loss_1
246 | 
247 |         loss = tf.reduce_mean(loss_1)
248 |     return loss
249 | 
250 | 
251 | '''triplet loss
252 | '''
253 | 
254 | def triplet_loss(anchor, pos, neg, margin, dist_type='euclidean2'):
255 |     '''triplet loss
256 |     - Deep Triplet Quantization
257 |     '''
258 |     with tf.name_scope('triplet_loss'):
259 |         pos_dist = distance(anchor, pos, pair=False, dist_type=dist_type)
260 |         neg_dist = distance(anchor, neg, pair=False, dist_type=dist_type)
261 |         basic_loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
262 |         loss = tf.reduce_mean(basic_loss, 0)
263 | 
264 |         tf.summary.histogram('pos_dist', pos_dist)
265 |         tf.summary.histogram('neg_dist', neg_dist)
266 |         tf.summary.histogram('pos_dist - neg_dist', pos_dist - neg_dist)
267 |     return loss
268 | 
269 | 
270 | def cos_margin_multi_label_loss(u, label_u, wordvec, bit=300, soft=True, margin=0.7):
271 |     '''cosine margin multi label loss
272 |     - Deep Visual-Semantic Quantization for Efficient Image Retrieval
273 |     '''
274 |     # N: batchsize, L: label_dim, D: 300
275 |     # u: N * D
276 |     # label_u: N * L
277 |     # wordvec: L * D
278 |     with tf.name_scope('cos_margin_multi_label_loss'):
279 |         assert bit == 300
280 | 
281 |         batch_size = tf.cast(tf.shape(label_u)[0], tf.int32)
282 |         n_class = tf.cast(tf.shape(label_u)[1], tf.int32)
283 |         if soft == True:
284 |             ip_2 = tf.matmul(u, wordvec, transpose_b=True)
285 |             # multiply ids to inner product
286 |             mod_2 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(
287 |                 u)), reduce_shaper(tf.square(wordvec)), transpose_b=True))
288 |             # cos_2: N * L
289 |             cos_2 = tf.div(ip_2, mod_2)
290 | 
291 |             # ip_3: L * L
292 |             # compute soft margin
293 |             ip_3 = tf.matmul(wordvec, wordvec, transpose_b=True)
294 |             # use word_dic to avoid 0 in /
295 |             mod_3 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(
296 |                 wordvec)), reduce_shaper(tf.square(wordvec)), transpose_b=True))
297 |             margin_param = tf.subtract(tf.constant(
298 |                 1.0, dtype=tf.float32), tf.div(ip_3, mod_3))
299 | 
300 |             # cos - cos: N * L * L
301 |             cos_cos_1 = tf.subtract(tf.expand_dims(margin_param, 0), tf.subtract(
302 |                 tf.expand_dims(cos_2, 2), tf.expand_dims(cos_2, 1)))
303 |             # we need to let the wrong place be 0
304 |             cos_cos = tf.multiply(cos_cos_1, tf.expand_dims(label_u, 2))
305 | 
306 |             cos_loss = tf.reduce_sum(tf.maximum(
307 |                 tf.constant(0, dtype=tf.float32), cos_cos))
308 |             loss = tf.div(cos_loss, tf.multiply(tf.cast(
309 |                 n_class, dtype=tf.float32), tf.reduce_sum(label_u)))  
310 |         else:
311 |             margin_param = tf.constant(margin, dtype=tf.float32)
312 | 
313 |             # v_label: N * L * D
314 |             v_label = tf.multiply(tf.expand_dims(label_u, 2), tf.expand_dims(wordvec, 0))
315 |             # ip_1: N * L
316 |             ip_1 = tf.reduce_sum(tf.multiply(tf.expand_dims(u, 1), v_label), 2)
317 |             # mod_1: N * L
318 |             v_label_mod = tf.multiply(tf.expand_dims(
319 |                 tf.ones([batch_size, n_class]), 2), tf.expand_dims(wordvec, 0))
320 |             mod_1 = tf.sqrt(tf.multiply(tf.expand_dims(tf.reduce_sum(
321 |                 tf.square(u), 1), 1), tf.reduce_sum(tf.square(v_label_mod), 2)))
322 |             # cos_1: N * L
323 |             cos_1 = tf.div(ip_1, mod_1)
324 | 
325 |             ip_2 = tf.matmul(u, wordvec, transpose_b=True)
326 |             # multiply ids to inner product
327 |             mod_2 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(
328 |                 u)), reduce_shaper(tf.square(wordvec)), transpose_b=True))
329 |             # cos_2: N * L
330 |             cos_2 = tf.div(ip_2, mod_2)
331 | 
332 |             # cos - cos: N * L * L
333 |             cos_cos_1 = tf.subtract(margin_param, tf.subtract(
334 |                 tf.expand_dims(cos_1, 2), tf.expand_dims(cos_2, 1)))
335 |             # we need to let the wrong place be 0
336 |             cos_cos = tf.multiply(cos_cos_1, tf.expand_dims(label_u, 2))
337 | 
338 |             cos_loss = tf.reduce_sum(tf.maximum(
339 |                 tf.constant(0, dtype=tf.float32), cos_cos))
340 |             loss = tf.div(cos_loss, tf.multiply(tf.cast(
341 |                 n_class, dtype=tf.float32), tf.reduce_sum(label_u)))       
342 |     return loss
343 | 
344 | 
345 | '''quantization loss
346 | '''
347 | 
348 | def quantization_loss(u, q_type='L2'):
349 |     '''quantization loss
350 |     - Deep Hashing Network for Efficient Similarity Retrieval
351 |     - Deep Supervised Hashing for Fast Image Retrieval
352 |     - Deep Cauchy Hashing for Hamming Space Retrieval
353 |     - Deep Visual-Semantic Hashing for Cross-Modal Retrieval
354 |     - Correlation Hashing Network for Efficient Cross-Modal Retrieval
355 |     '''
356 |     with tf.name_scope('quantization_loss'):
357 |         if q_type == 'L2':
358 |             loss = tf.reduce_mean(tf.square(tf.abs(u) - tf.constant(1.0)))
359 |         elif q_type == 'L1':
360 |             loss = tf.reduce_mean(tf.abs(tf.abs(u) - tf.constant(1.0)))
361 |         elif q_type == 'cauchy':
362 |             epsilon = 0.58
363 |             loss = tf.reduce_mean(tf.log(1 + tf.abs((tf.abs(u) - tf.constant(1.0))) / epsilon))
364 |         elif q_type == 'margin':
365 |             margin = 0.5
366 |             loss = tf.reduce_mean(tf.maximum(margin - tf.abs(u), 0.0))
367 |         elif q_type == 'max_margin':
368 |             bit = tf.shape(u)[1]
369 |             margin = 0.95
370 |             D = distance(tf.abs(u), tf.ones(bit), dist_type='cos')
371 |             loss = tf.reduce_mean(tf.maximum(margin - D, 0.0))
372 |     return loss
373 | 
374 | 
375 | def pq_loss(u, h, C, wordvec=None, squared=True):
376 |     '''product quantization loss
377 |     - Deep Quantization Network for Efficient Image Retrieval
378 |     - Deep Visual-Semantic Quantization for Efficient Image Retrieval
379 |     - Deep Triplet Quantization
380 |     '''
381 |     with tf.name_scope('pq_loss'):
382 |         dist = u - tf.matmul(h, C)
383 | 
384 |         if wordvec != None:
385 |             dist = tf.matmul(dist, wordvec, transpose_b=True)
386 | 
387 |         if squared:
388 |             dist = tf.square(dist)
389 | 
390 |         loss = tf.reduce_mean(tf.reduce_sum(dist, 1))
391 |     return loss
392 | 
393 | 
394 | '''balance and independence loss
395 | - Deep semantic ranking based hashing for multi-label image retrieval
396 | - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search
397 | '''
398 | 
399 | def balance_loss(u):
400 |     '''balance loss
401 | 
402 |     Each bit should be half 0 and half 1.
403 |     - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search
404 |     '''
405 |     with tf.name_scope('balance_loss'):
406 |         H = tf.sign(u)
407 |         H_mean = tf.reduce_mean(H, axis=0)
408 |         loss = tf.reduce_mean(tf.square(H_mean))
409 |     return loss
410 | 
411 | 
412 | def independence_loss(u):
413 |     '''independence loss
414 |     - Deep Triplet Quantization
415 |     '''
416 |     with tf.name_scope('independence_loss'):
417 |         batch_size = tf.shape(u)[0]
418 |         bit = tf.shape(u)[1]
419 |         H = tf.sign(u)
420 |         I = tf.eye(bit)
421 |         loss = tf.reduce_mean(tf.square(tf.matmul(
422 |             H, H, transpose_a=True) / tf.cast(batch_size, tf.float32) - I))
423 |     return loss
424 | 
425 | 
426 | '''listwise loss
427 | - Hashing as Tie-Aware Learning to Rank
428 | '''
429 | 
430 | 
431 | '''classification loss
432 | - Deep Semantic Hashing with Generative Adversarial Networks
433 | - Deep Supervised Discrete Hashing
434 | - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search
435 | - Deep Supervised Cross-modal Retrieval
436 | '''
437 | 


--------------------------------------------------------------------------------
/model/dqn.py:
--------------------------------------------------------------------------------
  1 | #################################################################################
  2 | # Deep Quantization Network for Efficient Image Retrieval                        #
  3 | # Authors: Yue Cao, Mingsheng Long, Jianmin Wang, Han Zhu, Qingfu Wen            #
  4 | # Contact: caoyue10@gmail.com                                                    #
  5 | ##################################################################################
  6 | 
  7 | import os
  8 | import random
  9 | import shutil
 10 | import time
 11 | from datetime import datetime
 12 | from math import ceil
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from sklearn.cluster import MiniBatchKMeans
 17 | 
 18 | from architecture import img_alexnet_layers
 19 | from evaluation import MAPs_CQ
 20 | from data_provider.pq import Dataset
 21 | from loss import cosine_loss, pq_loss
 22 | 
 23 | 
 24 | class DQN(object):
 25 |     def __init__(self, config):
 26 |         # Initialize setting
 27 |         print("initializing")
 28 |         np.set_printoptions(precision=4)
 29 |         self.stage = tf.placeholder_with_default(tf.constant(0), [])
 30 |         self.device = '/gpu:' + config.gpu_id
 31 |         self.output_dim = config.output_dim
 32 |         self.n_class = config.label_dim
 33 | 
 34 |         self.subspace_num = config.n_subspace
 35 |         self.subcenter_num = config.n_subcenter
 36 |         self.code_batch_size = config.code_batch_size
 37 |         self.cq_lambda = config.cq_lambda
 38 |         self.max_iter_update_Cb = config.max_iter_update_Cb
 39 |         self.max_iter_update_b = config.max_iter_update_b
 40 | 
 41 |         self.batch_size = config.batch_size
 42 |         self.val_batch_size = config.val_batch_size
 43 |         self.max_iter = config.max_iter
 44 |         self.network = config.network
 45 |         self.learning_rate = config.learning_rate
 46 |         self.learning_rate_decay_factor = config.learning_rate_decay_factor
 47 |         self.decay_step = config.decay_step
 48 | 
 49 |         self.finetune_all = config.finetune_all
 50 | 
 51 |         self.model_file = os.path.join(config.save_dir, 'network_weights.npy')
 52 |         self.codes_file = os.path.join(config.save_dir, 'codes.npy')
 53 |         self.tflog_path = os.path.join(config.save_dir, 'tflog')
 54 | 
 55 |         # Setup session
 56 |         print("launching session")
 57 |         configProto = tf.ConfigProto()
 58 |         configProto.gpu_options.allow_growth = True
 59 |         configProto.allow_soft_placement = True
 60 |         self.sess = tf.Session(config=configProto)
 61 | 
 62 |         # Create variables and placeholders
 63 | 
 64 |         with tf.device(self.device):
 65 |             self.img = tf.placeholder(tf.float32, [None, 256, 256, 3])
 66 |             self.img_label = tf.placeholder(tf.float32, [None, self.n_class])
 67 | 
 68 |             self.network_weights = config.network_weights 
 69 |             self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model()
 70 | 
 71 |             self.C = tf.Variable(tf.random_uniform([self.subspace_num * self.subcenter_num, self.output_dim],
 72 |                                                    minval=-1, maxval=1, dtype=tf.float32, name='centers'))
 73 |             self.deep_param_img['C'] = self.C
 74 | 
 75 |             # Centers shared in different modalities (image & text)
 76 |             # Binary codes for different modalities (image & text)
 77 |             self.img_output_all = tf.placeholder(tf.float32, [None, self.output_dim])
 78 |             self.img_b_all = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num])
 79 | 
 80 |             self.b_img = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num])
 81 |             self.ICM_m = tf.placeholder(tf.int32, [])
 82 |             self.ICM_b_m = tf.placeholder(tf.float32, [None, self.subcenter_num])
 83 |             self.ICM_b_all = tf.placeholder(tf.float32, [None, self.subcenter_num * self.subspace_num])
 84 |             self.ICM_X = tf.placeholder(tf.float32, [self.code_batch_size, self.output_dim])
 85 |             self.ICM_C_m = tf.slice(self.C, [self.ICM_m * self.subcenter_num, 0], [self.subcenter_num, self.output_dim])
 86 |             self.ICM_X_residual = self.ICM_X - tf.matmul(self.ICM_b_all, self.C) + tf.matmul(self.ICM_b_m, self.ICM_C_m)
 87 |             ICM_X_expand = tf.expand_dims(self.ICM_X_residual, 1)  # N * 1 * D
 88 |             ICM_C_m_expand = tf.expand_dims(self.ICM_C_m, 0)  # 1 * M * D
 89 |             # N*sc*D  *  D*n
 90 |             ICM_sum_squares = tf.reduce_sum(tf.square(tf.squeeze(
 91 |                 tf.subtract(ICM_X_expand, ICM_C_m_expand))), reduction_indices=2)
 92 |             ICM_best_centers = tf.argmin(ICM_sum_squares, 1)
 93 |             self.ICM_best_centers_one_hot = tf.one_hot(
 94 |                 ICM_best_centers, self.subcenter_num, dtype=tf.float32)
 95 | 
 96 |             self.global_step = tf.Variable(0, trainable=False)
 97 |             self.train_op = self.apply_loss_function(self.global_step)
 98 |             self.sess.run(tf.global_variables_initializer())
 99 | 
100 |             if config.debug == True:
101 |                 from tensorflow.python import debug as tf_debug
102 |                 self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
103 | 
104 |     def load_model(self):
105 |         if self.network == 'alexnet':
106 |             img_output = img_alexnet_layers(
107 |                 self.img, self.batch_size, self.output_dim,
108 |                 self.stage, self.network_weights, val_batch_size=self.val_batch_size)
109 |         else:
110 |             raise Exception('cannot use such CNN model as ' + self.network)
111 |         return img_output
112 | 
113 |     def save_model(self, model_file=None):
114 |         if model_file is None:
115 |             model_file = self.model_file
116 |         model = {}
117 |         for layer in self.deep_param_img:
118 |             model[layer] = self.sess.run(self.deep_param_img[layer])
119 |         print("saving model to %s" % model_file)
120 |         folder = os.path.dirname(model_file)
121 |         if os.path.exists(folder) is False:
122 |             os.makedirs(folder)
123 |         np.save(model_file, np.array(model))
124 |         return
125 | 
126 |     def load_codes(self, codes_file=None):
127 |         if codes_file is None:
128 |             codes_file = self.codes_file
129 |         codes = np.load(codes_file).item()
130 | 
131 |         import collections
132 |         mDataset = collections.namedtuple('Dataset', ['output', 'codes', 'label'])  
133 |         database = mDataset(codes['db_features'], codes['db_codes'], codes['db_label'])
134 |         query = mDataset(codes['val_features'], codes['val_codes'], codes['val_label'])
135 |         C = codes['C']
136 |         return database, query, C
137 | 
138 |     def save_codes(self, database, query, C, codes_file=None):
139 |         if codes_file is None:
140 |             codes_file = self.codes_file
141 |         codes = {
142 |             'db_features': database.output,
143 |             'db_codes': database.codes,
144 |             'db_label': database.label,
145 |             'val_features': query.output,
146 |             'val_codes': query.codes,
147 |             'val_label': query.label,
148 |             'C': C,
149 |         }
150 |         print("saving codes to %s" % codes_file)
151 |         np.save(codes_file, np.array(codes))
152 |         return
153 | 
154 |     def apply_loss_function(self, global_step):
155 |         # loss function
156 |         self.cos_loss = cosine_loss(self.img_last_layer, self.img_label)
157 |         self.q_loss = self.cq_lambda * pq_loss(self.img_last_layer, self.b_img, self.C)
158 |         self.loss = self.cos_loss + self.q_loss
159 | 
160 |         # Last layer has a 10 times learning rate
161 |         self.lr = tf.train.exponential_decay(
162 |             self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True)
163 |         opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
164 |         grads_and_vars = opt.compute_gradients(
165 |             self.loss, self.train_layers + self.train_last_layer)
166 |         fcgrad, _ = grads_and_vars[-2]
167 |         fbgrad, _ = grads_and_vars[-1]
168 | 
169 |         # for debug
170 |         self.grads_and_vars = grads_and_vars
171 |         tf.summary.scalar('loss', self.loss)
172 |         tf.summary.scalar('cosine_loss', self.cos_loss)
173 |         tf.summary.scalar('quantization_loss', self.q_loss)
174 |         tf.summary.scalar('lr', self.lr)
175 |         self.merged = tf.summary.merge_all()
176 | 
177 |         if self.finetune_all:
178 |             return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]),
179 |                                                          (grads_and_vars[1][0]*2, self.train_layers[1]),
180 |                                                          (grads_and_vars[2][0], self.train_layers[2]),
181 |                                                          (grads_and_vars[3][0]*2, self.train_layers[3]),
182 |                                                          (grads_and_vars[4][0], self.train_layers[4]),
183 |                                                          (grads_and_vars[5][0]*2, self.train_layers[5]),
184 |                                                          (grads_and_vars[6][0], self.train_layers[6]),
185 |                                                          (grads_and_vars[7][0]*2, self.train_layers[7]),
186 |                                                          (grads_and_vars[8][0], self.train_layers[8]),
187 |                                                          (grads_and_vars[9][0]*2, self.train_layers[9]),
188 |                                                          (grads_and_vars[10][0], self.train_layers[10]),
189 |                                                          (grads_and_vars[11][0]*2, self.train_layers[11]),
190 |                                                          (grads_and_vars[12][0], self.train_layers[12]),
191 |                                                          (grads_and_vars[13][0]*2, self.train_layers[13]),
192 |                                                          (fcgrad*10, self.train_last_layer[0]),
193 |                                                          (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
194 |         else:
195 |             return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]),
196 |                                         (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
197 | 
198 |     def initial_centers(self, img_output):
199 |         C_init = np.zeros(
200 |             [self.subspace_num * self.subcenter_num, self.output_dim])
201 |         print("#DQN train# initilizing Centers")
202 |         all_output = img_output
203 |         div = int(self.output_dim / self.subspace_num)
204 |         for i in range(self.subspace_num):
205 |             kmeans = MiniBatchKMeans(n_clusters=self.subcenter_num).fit(
206 |                 all_output[:, i * div: (i + 1) * div])
207 |             C_init[i * self.subcenter_num: (i + 1) * self.subcenter_num, i * div: (i + 1) * div] = kmeans.cluster_centers_
208 |             print("step: ", i, " finish")
209 |         return C_init
210 | 
211 |     def update_centers(self, img_dataset):
212 |         '''
213 |         Optimize:
214 |             self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1}
215 |             self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T)
216 |             but all the C need to be replace with C^T :
217 |             self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V)
218 |         '''
219 |         print("#DQN train# updating Centers")
220 |         old_C_value = self.sess.run(self.C)
221 | 
222 |         h = self.img_b_all
223 |         U = self.img_output_all
224 |         smallResidual = tf.constant(
225 |             np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001)
226 |         Uh = tf.matmul(tf.transpose(h), U)
227 |         hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual)
228 |         compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh)
229 | 
230 |         update_C = self.C.assign(compute_centers)
231 |         C_value = self.sess.run(update_C, feed_dict={
232 |             self.img_output_all: img_dataset.output,
233 |             self.img_b_all: img_dataset.codes,
234 |         })
235 | 
236 |         C_sums = np.sum(np.square(C_value), axis=1)
237 |         C_zeros_ids = np.where(C_sums < 1e-8)
238 |         C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :]
239 |         self.sess.run(self.C.assign(C_value))
240 | 
241 |     def update_codes_ICM(self, output, code):
242 |         '''
243 |         Optimize:
244 |             min || output - self.C * codes ||
245 |             min || output - codes * self.C ||
246 |         args:
247 |             output: [n_train, n_output]
248 |             self.C: [n_subspace * n_subcenter, n_output]
249 |                 [C_1, C_2, ... C_M]
250 |             codes: [n_train, n_subspace * n_subcenter]
251 |         '''
252 | 
253 |         code = np.zeros(code.shape)
254 | 
255 |         for iterate in range(self.max_iter_update_b):
256 | 
257 |             sub_list = [i for i in range(self.subspace_num)]
258 |             random.shuffle(sub_list)
259 |             for m in sub_list:
260 |                 best_centers_one_hot_val = self.sess.run(self.ICM_best_centers_one_hot, feed_dict={
261 |                     self.ICM_b_m: code[:, m * self.subcenter_num: (m + 1) * self.subcenter_num],
262 |                     self.ICM_b_all: code,
263 |                     self.ICM_m: m,
264 |                     self.ICM_X: output,
265 |                 })
266 | 
267 |                 code[:, m * self.subcenter_num: (m + 1) *
268 |                      self.subcenter_num] = best_centers_one_hot_val
269 |         return code
270 | 
271 |     def update_codes_batch(self, dataset, batch_size):
272 |         '''
273 |         update codes in batch size
274 |         '''
275 |         total_batch = int(ceil(dataset.n_samples / batch_size))
276 |         dataset.finish_epoch()
277 | 
278 |         for i in range(total_batch):
279 |             output_val, code_val = dataset.next_batch_output_codes(batch_size)
280 |             codes_val = self.update_codes_ICM(output_val, code_val)
281 |             dataset.feed_batch_codes(batch_size, codes_val)
282 | 
283 |     def train(self, img_dataset):
284 |         print("%s #train# start training" % datetime.now())
285 |         epoch = 0
286 |         epoch_iter = int(ceil(img_dataset.n_samples / self.batch_size))
287 | 
288 |         # tensorboard
289 |         if os.path.exists(self.tflog_path):
290 |             shutil.rmtree(self.tflog_path)
291 |         train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph)
292 | 
293 |         for train_iter in range(self.max_iter):
294 |             images, labels, codes = img_dataset.next_batch(self.batch_size)
295 |             start_time = time.time()
296 | 
297 |             _, loss, output, summary = self.sess.run([self.train_op, self.loss, self.img_last_layer, self.merged],
298 |                                                      feed_dict={self.img: images,
299 |                                                                 self.img_label: labels,
300 |                                                                 self.b_img: codes})
301 | 
302 |             img_dataset.feed_batch_output(self.batch_size, output)
303 |             duration = time.time() - start_time
304 | 
305 |             # every epoch: update codes and centers
306 |             if train_iter % (1 * epoch_iter) == 0 and train_iter != 0:
307 |                 if epoch == 0:
308 |                     with tf.device(self.device):
309 |                         for i in range(self.max_iter_update_Cb):
310 |                             self.sess.run(self.C.assign(
311 |                                 self.initial_centers(img_dataset.output)))
312 | 
313 |                 epoch = epoch + 1
314 |                 for i in range(self.max_iter_update_Cb):
315 |                     self.update_codes_batch(img_dataset, self.code_batch_size)
316 |                     self.update_centers(img_dataset)
317 |                     # self.sess.run(self.C.assign(self.initial_centers(img_dataset.output)))
318 |                     
319 |             if train_iter % 1 == 0:
320 |                 train_writer.add_summary(summary, train_iter)
321 |                 print("%s #train# epoch %2d step %4d, loss = %.4f, %.1f sec/batch"
322 |                       % (datetime.now(), epoch, train_iter + 1, loss, duration))
323 | 
324 |         print("%s #traing# finish training" % datetime.now())
325 |         self.save_model()
326 |         print("model saved")
327 | 
328 |         self.sess.close()
329 | 
330 |     def validation(self, img_query, img_database, R=100):
331 |         if os.path.exists(self.codes_file):
332 |             print("loading ", self.codes_file)
333 |             img_database, img_query, C_tmp = self.load_codes(self.codes_file)
334 |         else:
335 |             print("%s #validation# start validation" % (datetime.now()))
336 |             query_batch = int(ceil(img_query.n_samples / self.val_batch_size))
337 |             print("%s #validation# totally %d query in %d batches" %
338 |                 (datetime.now(), img_query.n_samples, query_batch))
339 |             for i in range(query_batch):
340 |                 images, labels, codes = img_query.next_batch(self.val_batch_size)
341 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
342 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
343 |                 img_query.feed_batch_output(self.val_batch_size, output)
344 |                 print('Cosine Loss: %s' % loss)
345 | 
346 |             database_batch = int(ceil(img_database.n_samples / self.val_batch_size))
347 |             print("%s #validation# totally %d database in %d batches" %
348 |                 (datetime.now(), img_database.n_samples, database_batch))
349 |             for i in range(database_batch):
350 |                 images, labels, codes = img_database.next_batch(self.val_batch_size)
351 | 
352 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
353 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
354 |                 img_database.feed_batch_output(self.val_batch_size, output)
355 |                 # print output[:10, :10]
356 |                 if i % 100 == 0:
357 |                     print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss))
358 | 
359 |             self.update_codes_batch(img_query, self.code_batch_size)
360 |             self.update_codes_batch(img_database, self.code_batch_size)
361 | 
362 |             print("%s #validation# calculating MAP@%d" % (datetime.now(), R))
363 |             C_tmp = self.sess.run(self.C)
364 |             # save features and codes
365 |             self.save_codes(img_database, img_query, C_tmp)
366 | 
367 |         mAPs = MAPs_CQ(C_tmp, self.subspace_num, self.subcenter_num, R)
368 | 
369 |         self.sess.close()
370 |         return {
371 |             'i2i_nocq': mAPs.get_mAPs_by_feature(img_database, img_query),
372 |             'i2i_AQD': mAPs.get_mAPs_AQD(img_database, img_query),
373 |             'i2i_SQD': mAPs.get_mAPs_SQD(img_database, img_query)
374 |         }
375 | 
376 | 
377 | def train(train_img, config):
378 |     model = DQN(config)
379 |     img_dataset = Dataset(train_img, config.output_dim, config.n_subspace * config.n_subcenter)
380 |     model.train(img_dataset)
381 |     return model.model_file
382 | 
383 | 
384 | def validation(database_img, query_img, config):
385 |     model = DQN(config)
386 |     img_database = Dataset(database_img, config.output_dim, config.n_subspace * config.n_subcenter)
387 |     img_query = Dataset(query_img, config.output_dim, config.n_subspace * config.n_subcenter)
388 |     return model.validation(img_query, img_database, config.R)
389 | 


--------------------------------------------------------------------------------
/model/dvsq.py:
--------------------------------------------------------------------------------
  1 | #################################################################################
  2 | # Deep Visual-Semantic Quantization for Efficient Image Retrieval                #
  3 | # Authors: Yue Cao, Mingsheng Long, Jianmin Wang, Shichen Liu                    #
  4 | # Contact: caoyue10@gmail.com                                                    #
  5 | ##################################################################################
  6 | 
  7 | import os
  8 | import random
  9 | import shutil
 10 | import time
 11 | from datetime import datetime
 12 | from math import ceil
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from sklearn.cluster import MiniBatchKMeans
 17 | 
 18 | from architecture import img_alexnet_layers
 19 | from evaluation import MAPs_CQ
 20 | from data_provider.pq import Dataset
 21 | from loss import cos_margin_multi_label_loss, pq_loss
 22 | 
 23 | 
 24 | class DVSQ(object):
 25 |     def __init__(self, config):
 26 |         # Initialize setting
 27 |         print("initializing")
 28 |         np.set_printoptions(precision=4)
 29 |         self.stage = tf.placeholder_with_default(tf.constant(0), [])
 30 |         self.device = '/gpu:' + config.gpu_id
 31 |         self.output_dim = config.output_dim
 32 |         self.n_class = config.label_dim
 33 | 
 34 |         self.subspace_num = config.n_subspace
 35 |         self.subcenter_num = config.n_subcenter
 36 |         self.code_batch_size = config.code_batch_size
 37 |         self.cq_lambda = config.cq_lambda
 38 |         self.max_iter_update_Cb = config.max_iter_update_Cb
 39 |         self.max_iter_update_b = config.max_iter_update_b
 40 | 
 41 |         self.batch_size = config.batch_size
 42 |         self.val_batch_size = config.val_batch_size
 43 |         self.max_iter = config.max_iter
 44 |         self.network = config.network
 45 |         self.learning_rate = config.learning_rate
 46 |         self.learning_rate_decay_factor = config.learning_rate_decay_factor
 47 |         self.decay_step = config.decay_step
 48 | 
 49 |         self.finetune_all = config.finetune_all
 50 | 
 51 |         self.wordvec_dict = config.wordvec_dict
 52 | 
 53 |         self.model_file = os.path.join(config.save_dir, 'network_weights.npy')
 54 |         self.codes_file = os.path.join(config.save_dir, 'codes.npy')
 55 |         self.tflog_path = os.path.join(config.save_dir, 'tflog')
 56 | 
 57 |         # Setup session
 58 |         print("launching session")
 59 |         configProto = tf.ConfigProto()
 60 |         configProto.gpu_options.allow_growth = True
 61 |         configProto.allow_soft_placement = True
 62 |         self.sess = tf.Session(config=configProto)
 63 | 
 64 |         # Create variables and placeholders
 65 | 
 66 |         with tf.device(self.device):
 67 |             self.img = tf.placeholder(tf.float32, [None, 256, 256, 3])
 68 |             self.img_label = tf.placeholder(tf.float32, [None, self.n_class])
 69 | 
 70 |             self.network_weights = config.network_weights
 71 |             self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model()
 72 | 
 73 |             self.C = tf.Variable(tf.random_uniform([self.subspace_num * self.subcenter_num, self.output_dim],
 74 |                                                    minval=-1, maxval=1, dtype=tf.float32, name='centers'))
 75 |             self.deep_param_img['C'] = self.C
 76 | 
 77 |             # Centers shared in different modalities (image & text)
 78 |             # Binary codes for different modalities (image & text)
 79 |             self.img_output_all = tf.placeholder(tf.float32, [None, self.output_dim])
 80 |             self.img_b_all = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num])
 81 | 
 82 |             self.b_img = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num])
 83 |             self.ICM_m = tf.placeholder(tf.int32, [])
 84 |             self.ICM_b_m = tf.placeholder(tf.float32, [None, self.subcenter_num])
 85 |             self.ICM_b_all = tf.placeholder(tf.float32, [None, self.subcenter_num * self.subspace_num])
 86 |             self.ICM_X = tf.placeholder(tf.float32, [self.code_batch_size, self.output_dim])
 87 |             self.ICM_C_m = tf.slice(self.C, [self.ICM_m * self.subcenter_num, 0], [self.subcenter_num, self.output_dim])
 88 |             self.ICM_X_residual = self.ICM_X - tf.matmul(self.ICM_b_all, self.C) + tf.matmul(self.ICM_b_m, self.ICM_C_m)
 89 |             ICM_X_expand = tf.expand_dims(self.ICM_X_residual, 1)  # N * 1 * D
 90 |             ICM_C_m_expand = tf.expand_dims(self.ICM_C_m, 0)  # 1 * M * D
 91 |             # N*sc*D  *  D*n
 92 |             self.wordvec = tf.constant(np.loadtxt(self.wordvec_dict), dtype=tf.float32)
 93 |             ICM_word_dict = tf.reshape(tf.matmul(tf.reshape(
 94 |                         ICM_X_expand - ICM_C_m_expand, [self.code_batch_size * self.subcenter_num, self.output_dim]),
 95 |                     tf.transpose(self.wordvec)), [self.code_batch_size, self.subcenter_num, self.n_class])
 96 |             ICM_sum_squares = tf.reduce_sum(
 97 |                 tf.square(ICM_word_dict), reduction_indices=2)
 98 |             ICM_best_centers = tf.argmin(ICM_sum_squares, 1)
 99 |             self.ICM_best_centers_one_hot = tf.one_hot(
100 |                 ICM_best_centers, self.subcenter_num, dtype=tf.float32)
101 | 
102 |             self.global_step = tf.Variable(0, trainable=False)
103 |             self.train_op = self.apply_loss_function(self.global_step)
104 |             self.sess.run(tf.global_variables_initializer())
105 | 
106 |             if config.debug == True:
107 |                 from tensorflow.python import debug as tf_debug
108 |                 self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
109 | 
110 |     def load_model(self):
111 |         if self.network == 'alexnet':
112 |             img_output = img_alexnet_layers(
113 |                 self.img, self.batch_size, self.output_dim,
114 |                 self.stage, self.network_weights, val_batch_size=self.val_batch_size)
115 |         else:
116 |             raise Exception('cannot use such CNN model as ' + self.network)
117 |         return img_output
118 | 
119 |     def save_model(self, model_file=None):
120 |         if model_file is None:
121 |             model_file = self.model_file
122 |         model = {}
123 |         for layer in self.deep_param_img:
124 |             model[layer] = self.sess.run(self.deep_param_img[layer])
125 |         print("saving model to %s" % model_file)
126 |         folder = os.path.dirname(model_file)
127 |         if os.path.exists(folder) is False:
128 |             os.makedirs(folder)
129 |         np.save(model_file, np.array(model))
130 |         return
131 | 
132 |     def load_codes(self, codes_file=None):
133 |         if codes_file is None:
134 |             codes_file = self.codes_file
135 |         codes = np.load(codes_file).item()
136 | 
137 |         import collections
138 |         mDataset = collections.namedtuple('Dataset', ['output', 'codes', 'label'])  
139 |         database = mDataset(codes['db_features'], codes['db_codes'], codes['db_label'])
140 |         query = mDataset(codes['val_features'], codes['val_codes'], codes['val_label'])
141 |         C = codes['C']
142 |         return database, query, C
143 | 
144 |     def save_codes(self, database, query, C, codes_file=None):
145 |         if codes_file is None:
146 |             codes_file = self.codes_file
147 |         codes = {
148 |             'db_features': database.output,
149 |             'db_codes': database.codes,
150 |             'db_label': database.label,
151 |             'val_features': query.output,
152 |             'val_codes': query.codes,
153 |             'val_label': query.label,
154 |             'C': C,
155 |         }
156 |         print("saving codes to %s" % codes_file)
157 |         np.save(codes_file, np.array(codes))
158 |         return
159 | 
160 |     def apply_loss_function(self, global_step):
161 |         # loss function
162 |         self.cos_loss = cos_margin_multi_label_loss(self.img_last_layer, self.img_label, self.wordvec, self.output_dim, soft=False)
163 |         self.q_loss = self.cq_lambda * pq_loss(self.img_last_layer, self.b_img, self.C, self.wordvec)
164 |         self.loss = self.cos_loss + self.q_loss
165 | 
166 |         # Last layer has a 10 times learning rate
167 |         self.lr = tf.train.exponential_decay(
168 |             self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True)
169 |         opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
170 |         grads_and_vars = opt.compute_gradients(
171 |             self.loss, self.train_layers + self.train_last_layer)
172 |         fcgrad, _ = grads_and_vars[-2]
173 |         fbgrad, _ = grads_and_vars[-1]
174 | 
175 |         # for debug
176 |         self.grads_and_vars = grads_and_vars
177 |         tf.summary.scalar('loss', self.loss)
178 |         tf.summary.scalar('cosine_loss', self.cos_loss)
179 |         tf.summary.scalar('quantization_loss', self.q_loss)
180 |         tf.summary.scalar('lr', self.lr)
181 |         self.merged = tf.summary.merge_all()
182 | 
183 |         if self.finetune_all:
184 |             return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]),
185 |                                         (grads_and_vars[1][0]*2, self.train_layers[1]),
186 |                                         (grads_and_vars[2][0], self.train_layers[2]),
187 |                                         (grads_and_vars[3][0]*2, self.train_layers[3]),
188 |                                         (grads_and_vars[4][0], self.train_layers[4]),
189 |                                         (grads_and_vars[5][0]*2, self.train_layers[5]),
190 |                                         (grads_and_vars[6][0], self.train_layers[6]),
191 |                                         (grads_and_vars[7][0]*2, self.train_layers[7]),
192 |                                         (grads_and_vars[8][0], self.train_layers[8]),
193 |                                         (grads_and_vars[9][0]*2, self.train_layers[9]),
194 |                                         (grads_and_vars[10][0], self.train_layers[10]),
195 |                                         (grads_and_vars[11][0]*2, self.train_layers[11]),
196 |                                         (grads_and_vars[12][0], self.train_layers[12]),
197 |                                         (grads_and_vars[13][0]*2, self.train_layers[13]),
198 |                                         (fcgrad*10, self.train_last_layer[0]),
199 |                                         (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
200 |         else:
201 |             return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]),
202 |                                         (fbgrad*20, self.train_last_layer[1])], global_step=global_step)
203 | 
204 |     def initial_centers(self, img_output):
205 |         C_init = np.zeros(
206 |             [self.subspace_num * self.subcenter_num, self.output_dim])
207 |         print("#DVSQ train# initilizing Centers")
208 |         all_output = img_output
209 |         div = int(self.output_dim / self.subspace_num)
210 |         for i in range(self.subspace_num):
211 |             kmeans = MiniBatchKMeans(n_clusters=self.subcenter_num).fit(
212 |                 all_output[:, i * div: (i + 1) * div])
213 |             C_init[i * self.subcenter_num: (i + 1) * self.subcenter_num, i * div: (i + 1) * div] = kmeans.cluster_centers_
214 |             print("step: ", i, " finish")
215 |         return C_init
216 | 
217 |     def update_centers(self, img_dataset):
218 |         '''
219 |         Optimize:
220 |             self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1}
221 |             self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T)
222 |             but all the C need to be replace with C^T :
223 |             self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V)
224 |         '''
225 |         print("#DVSQ train# updating Centers")
226 |         old_C_value = self.sess.run(self.C)
227 | 
228 |         h = self.img_b_all
229 |         U = self.img_output_all
230 |         smallResidual = tf.constant(
231 |             np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001)
232 |         Uh = tf.matmul(tf.transpose(h), U)
233 |         hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual)
234 |         compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh)
235 | 
236 |         update_C = self.C.assign(compute_centers)
237 |         C_value = self.sess.run(update_C, feed_dict={
238 |             self.img_output_all: img_dataset.output,
239 |             self.img_b_all: img_dataset.codes,
240 |         })
241 | 
242 |         C_sums = np.sum(np.square(C_value), axis=1)
243 |         C_zeros_ids = np.where(C_sums < 1e-8)
244 |         C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :]
245 |         self.sess.run(self.C.assign(C_value))
246 | 
247 |     def update_codes_ICM(self, output, code):
248 |         '''
249 |         Optimize:
250 |             min || output - self.C * codes ||
251 |             min || output - codes * self.C ||
252 |         args:
253 |             output: [n_train, n_output]
254 |             self.C: [n_subspace * n_subcenter, n_output]
255 |                 [C_1, C_2, ... C_M]
256 |             codes: [n_train, n_subspace * n_subcenter]
257 |         '''
258 | 
259 |         code = np.zeros(code.shape)
260 | 
261 |         for iterate in range(self.max_iter_update_b):
262 | 
263 |             sub_list = [i for i in range(self.subspace_num)]
264 |             random.shuffle(sub_list)
265 |             for m in sub_list:
266 |                 best_centers_one_hot_val = self.sess.run(self.ICM_best_centers_one_hot, feed_dict={
267 |                     self.ICM_b_m: code[:, m * self.subcenter_num: (m + 1) * self.subcenter_num],
268 |                     self.ICM_b_all: code,
269 |                     self.ICM_m: m,
270 |                     self.ICM_X: output,
271 |                 })
272 | 
273 |                 code[:, m * self.subcenter_num: (m + 1) *
274 |                      self.subcenter_num] = best_centers_one_hot_val
275 |         return code
276 | 
277 |     def update_codes_batch(self, dataset, batch_size):
278 |         '''
279 |         update codes in batch size
280 |         '''
281 |         total_batch = int(ceil(dataset.n_samples / batch_size))
282 |         dataset.finish_epoch()
283 | 
284 |         for i in range(total_batch):
285 |             output_val, code_val = dataset.next_batch_output_codes(batch_size)
286 |             codes_val = self.update_codes_ICM(output_val, code_val)
287 |             dataset.feed_batch_codes(batch_size, codes_val)
288 | 
289 |     def train(self, img_dataset):
290 |         print("%s #train# start training" % datetime.now())
291 |         epoch = 0
292 |         epoch_iter = int(ceil(img_dataset.n_samples / self.batch_size))
293 | 
294 |         # tensorboard
295 |         if os.path.exists(self.tflog_path):
296 |             shutil.rmtree(self.tflog_path)
297 |         train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph)
298 | 
299 |         for train_iter in range(self.max_iter):
300 |             images, labels, codes = img_dataset.next_batch(self.batch_size)
301 |             start_time = time.time()
302 | 
303 |             _, loss, output, summary = self.sess.run([self.train_op, self.loss, self.img_last_layer, self.merged],
304 |                                                      feed_dict={self.img: images,
305 |                                                                 self.img_label: labels,
306 |                                                                 self.b_img: codes})
307 | 
308 |             img_dataset.feed_batch_output(self.batch_size, output)
309 |             duration = time.time() - start_time
310 | 
311 |             # every epoch: update codes and centers
312 |             if train_iter % (1 * epoch_iter) == 0 and train_iter != 0:
313 |                 if epoch == 0:
314 |                     with tf.device(self.device):
315 |                         for i in range(self.max_iter_update_Cb):
316 |                             self.sess.run(self.C.assign(
317 |                                 self.initial_centers(img_dataset.output)))
318 | 
319 |                 epoch = epoch + 1
320 |                 for i in range(self.max_iter_update_Cb):
321 |                     self.update_codes_batch(img_dataset, self.code_batch_size)
322 |                     self.update_centers(img_dataset)
323 |                     # self.sess.run(self.C.assign(self.initial_centers(img_dataset.output)))
324 | 
325 |             if train_iter % 1 == 0:
326 |                 train_writer.add_summary(summary, train_iter)
327 |                 print("%s #train# epoch %2d step %4d, loss = %.4f, %.1f sec/batch"
328 |                       % (datetime.now(), epoch, train_iter + 1, loss, duration))
329 | 
330 |         print("%s #traing# finish training" % datetime.now())
331 |         self.save_model()
332 |         print("model saved")
333 | 
334 |         self.sess.close()
335 | 
336 |     def validation(self, img_query, img_database, R=100):
337 |         if os.path.exists(self.codes_file):
338 |             print("loading ", self.codes_file)
339 |             img_database, img_query, C_tmp = self.load_codes(self.codes_file)
340 |         else:
341 |             print("%s #validation# start validation" % (datetime.now()))
342 |             query_batch = int(ceil(img_query.n_samples / self.val_batch_size))
343 |             print("%s #validation# totally %d query in %d batches" % 
344 |                 (datetime.now(), img_query.n_samples, query_batch))
345 |             for i in range(query_batch):
346 |                 images, labels, codes = img_query.next_batch(self.val_batch_size)
347 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
348 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
349 |                 img_query.feed_batch_output(self.val_batch_size, output)
350 |                 print('Cosine Loss: %s' % loss)
351 | 
352 |             database_batch = int(ceil(img_database.n_samples / self.val_batch_size))
353 |             print("%s #validation# totally %d database in %d batches" %
354 |                 (datetime.now(), img_database.n_samples, database_batch))
355 |             for i in range(database_batch):
356 |                 images, labels, codes = img_database.next_batch(self.val_batch_size)
357 | 
358 |                 output, loss = self.sess.run([self.img_last_layer, self.cos_loss],
359 |                                             feed_dict={self.img: images, self.img_label: labels, self.stage: 1})
360 |                 img_database.feed_batch_output(self.val_batch_size, output)
361 |                 # print output[:10, :10]
362 |                 if i % 100 == 0:
363 |                     print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss))
364 | 
365 |         self.update_codes_batch(img_query, self.code_batch_size)
366 |         self.update_codes_batch(img_database, self.code_batch_size)
367 | 
368 |         print("%s #validation# calculating MAP@%d" % (datetime.now(), R))
369 |         C_tmp = self.sess.run(self.C)
370 |         # save features and codes
371 |         self.save_codes(img_database, img_query, C_tmp)
372 | 
373 |         mAPs = MAPs_CQ(C_tmp, self.subspace_num, self.subcenter_num, R)
374 | 
375 |         self.sess.close()
376 |         return {
377 |             'i2i_nocq': mAPs.get_mAPs_by_feature(img_database, img_query),
378 |             'i2i_AQD': mAPs.get_mAPs_AQD(img_database, img_query),
379 |             'i2i_SQD': mAPs.get_mAPs_SQD(img_database, img_query)
380 |         }
381 | 
382 | 
383 | def train(train_img, config):
384 |     model = DVSQ(config)
385 |     img_dataset = Dataset(train_img, config.output_dim, config.n_subspace * config.n_subcenter)
386 |     model.train(img_dataset)
387 |     return model.model_file
388 | 
389 | 
390 | def validation(database_img, query_img, config):
391 |     model = DVSQ(config)
392 |     img_database = Dataset(database_img, config.output_dim, config.n_subspace * config.n_subcenter)
393 |     img_query = Dataset(query_img, config.output_dim, config.n_subspace * config.n_subcenter)
394 |     return model.validation(img_query, img_database, config.R)
395 | 


--------------------------------------------------------------------------------