├── __init__.py ├── model ├── __init__.py ├── dch.py ├── dhn.py ├── dhcs.py ├── dqn.py └── dvsq.py ├── distance ├── __init__.py ├── npversion.py └── tfversion.py ├── examples ├── __init__.py ├── dch │ ├── log.pkl │ └── train_val_script.py ├── dvsq │ ├── train_val.sh │ └── train_val_script.py ├── dhn │ └── train_val_script.py ├── dtq │ └── train_val_script.py ├── dhcs │ └── train_val_script.py └── dqn │ └── train_val_script.py ├── data_provider ├── __init__.py ├── pairwise.py ├── pq.py ├── text │ └── __init__.py ├── image │ └── __init__.py └── triplet.py ├── snapshot ├── util ├── __init__.py ├── plot.py ├── tool.py └── visualize.py ├── architecture ├── __init__.py ├── mlp.py ├── vgg_f.py ├── alexnet.py └── vgg.py ├── .gitignore ├── README.md ├── evaluation ├── load_and_predict.py └── __init__.py └── loss └── __init__.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /distance/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_provider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snapshot: -------------------------------------------------------------------------------- 1 | /media/disk1/chenshen/cachedir/DeepHash/snapshot -------------------------------------------------------------------------------- /examples/dch/log.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenshen03/DeepHash-tensorflow/HEAD/examples/dch/log.pkl -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | from .tool import * 2 | from .visualize import plot_distance, plot_distribution, plot_tsne 3 | -------------------------------------------------------------------------------- /architecture/__init__.py: -------------------------------------------------------------------------------- 1 | from .alexnet import img_alexnet_layers 2 | from .vgg import img_vgg16_layers 3 | from .mlp import txt_mlp_layers -------------------------------------------------------------------------------- /examples/dvsq/train_val.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TF_CPP_MIN_LOG_LEVEL=3 4 | 5 | # baseline 6 | CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --gpu 0 > logs/log1 2>&1 & 7 | # CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --learning-rate 0.002 --batch-size 256 --output-dim 64\ 8 | # --cq-lambda 0.0001 --n-subspace 4 --n-subcenter 256 --R 54000 --dataset cifar10 --gpu 0 > logs/log1 2>&1 & 9 | 10 | # CUDA_VISIBLE_DEVICES=0 python -u train_val_script.py --gpu 0 --n-subspace 4 --n-subcenter 256 > logs/log1 2>&1 & 11 | 12 | # CUDA_VISIBLE_DEVICES=1 python -u train_val_script.py --gpu 1 --n-subspace 2 --n-subcenter 64 > logs/log2 2>&1 & 13 | 14 | # CUDA_VISIBLE_DEVICES=2 python -u train_val_script.py --gpu 2 --n-subspace 3 --n-subcenter 16 > logs/log3 2>&1 & 15 | 16 | # CUDA_VISIBLE_DEVICES=3 python -u train_val_script.py --gpu 3 --n-subspace 4 --n-subcenter 8 > logs/log4 2>&1 & 17 | -------------------------------------------------------------------------------- /util/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import matplotlib 4 | matplotlib.use('Agg') 5 | import matplotlib.pyplot as plt 6 | 7 | import collections 8 | import pickle as pickle 9 | import os 10 | 11 | _since_beginning = collections.defaultdict(lambda: {}) 12 | _since_last_flush = collections.defaultdict(lambda: {}) 13 | 14 | _iter = 0 15 | def tick(): 16 | _iter += 1 17 | 18 | def plot(name, value): 19 | _since_last_flush[name][_iter] = value 20 | 21 | def flush(path = ""): 22 | prints = [] 23 | 24 | for name, vals in list(_since_last_flush.items()): 25 | prints.append("{}\t{}".format(name, np.mean(list(vals.values())))) 26 | _since_beginning[name].update(vals) 27 | 28 | x_vals = np.sort(list(_since_beginning[name].keys())) 29 | y_vals = [_since_beginning[name][x] for x in x_vals] 30 | 31 | plt.clf() 32 | plt.plot(x_vals, y_vals) 33 | plt.xlabel('iteration') 34 | plt.ylabel(name) 35 | plt.savefig(os.path.join(path, name.replace(' ', '_')+'.jpg')) 36 | 37 | print("iter {}\t{}".format(_iter, "\t".join(prints))) 38 | _since_last_flush.clear() 39 | 40 | with open('log.pkl', 'wb') as f: 41 | pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL) 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | # *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | pretrained_model/ 104 | logs/ 105 | tflog/ 106 | models/ 107 | *.swp 108 | .vscode/ 109 | data/ 110 | *.log 111 | *.sh -------------------------------------------------------------------------------- /util/tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import errno 4 | import shutil 5 | import os.path as osp 6 | import argparse 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | 11 | def mkdir_if_missing(directory): 12 | if not osp.exists(directory): 13 | try: 14 | os.makedirs(directory) 15 | except OSError as e: 16 | if e.errno != errno.EEXIST: 17 | raise 18 | 19 | 20 | class Logger(object): 21 | """ 22 | Write console output to external text file. 23 | 24 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. 25 | """ 26 | def __init__(self, fpath=None): 27 | self.console = sys.stdout 28 | self.file = None 29 | if fpath is not None: 30 | mkdir_if_missing(os.path.dirname(fpath)) 31 | self.file = open(fpath, 'w') 32 | 33 | def __del__(self): 34 | self.close() 35 | 36 | def __enter__(self): 37 | pass 38 | 39 | def __exit__(self, *args): 40 | self.close() 41 | 42 | def write(self, msg): 43 | self.console.write(msg) 44 | if self.file is not None: 45 | self.file.write(msg) 46 | 47 | def flush(self): 48 | self.console.flush() 49 | if self.file is not None: 50 | self.file.flush() 51 | os.fsync(self.file.fileno()) 52 | 53 | def close(self): 54 | self.console.close() 55 | if self.file is not None: 56 | self.file.close() 57 | 58 | 59 | def str2bool(v): 60 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 61 | return True 62 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 63 | return False 64 | else: 65 | raise argparse.ArgumentTypeError('Unsupported value encountered.') 66 | 67 | 68 | # return -1 if x < 0, 1 if x > 0, random -1 or 1 if x ==0 69 | def sign(x): 70 | s = np.sign(x) 71 | tmp = s[s == 0] 72 | s[s==0] = np.random.choice([-1, 1], tmp.shape) 73 | return s 74 | 75 | 76 | def reduce_shaper(t): 77 | return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1]) -------------------------------------------------------------------------------- /data_provider/pairwise.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Dataset(object): 4 | def __init__(self, dataset, output_dim): 5 | print ("Initializing Dataset") 6 | self._dataset = dataset 7 | self.n_samples = dataset.n_samples 8 | self._train = dataset.train 9 | self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32) 10 | 11 | self._perm = np.arange(self.n_samples) 12 | np.random.shuffle(self._perm) 13 | self._index_in_epoch = 0 14 | self._epochs_complete = 0 15 | print ("Dataset already") 16 | return 17 | 18 | def next_batch(self, batch_size): 19 | """ 20 | Args: 21 | batch_size 22 | Returns: 23 | [batch_size, (n_inputs)]: next batch images 24 | [batch_size, n_class]: next batch labels 25 | """ 26 | start = self._index_in_epoch 27 | self._index_in_epoch += batch_size 28 | # Another epoch finish 29 | if self._index_in_epoch > self.n_samples: 30 | if self._train: 31 | # Training stage need repeating get batch 32 | self._epochs_complete += 1 33 | # Shuffle the data 34 | np.random.shuffle(self._perm) 35 | # Start next epoch 36 | start = 0 37 | self._index_in_epoch = batch_size 38 | else: 39 | # Validation stage only process once 40 | start = self.n_samples - batch_size 41 | self._index_in_epoch = self.n_samples 42 | end = self._index_in_epoch 43 | 44 | data, label = self._dataset.data(self._perm[start:end]) 45 | return (data, label) 46 | 47 | def feed_batch_output(self, batch_size, output): 48 | """ 49 | Args: 50 | batch_size 51 | [batch_size, n_output] 52 | """ 53 | start = self._index_in_epoch - batch_size 54 | end = self._index_in_epoch 55 | self.output[self._perm[start:end], :] = output 56 | return 57 | 58 | @property 59 | def output(self): 60 | return self._output 61 | 62 | @property 63 | def label(self): 64 | return self._dataset.get_labels() 65 | 66 | def finish_epoch(self): 67 | self._index_in_epoch = 0 68 | np.random.shuffle(self._perm) 69 | 70 | -------------------------------------------------------------------------------- /architecture/mlp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | def txt_mlp_layers(txt, txt_dim, output_dim, stage, model_weights=None, with_tanh=True): 7 | deep_param_txt = {} 8 | train_layers = [] 9 | 10 | if model_weights is None: 11 | dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | model_weights = os.path.join( 13 | dir_path, "pretrained_model/reference_pretrain.npy") 14 | 15 | net_data = dict(np.load(model_weights, encoding='bytes').item()) 16 | 17 | # txt_fc1 18 | with tf.name_scope('txt_fc1'): 19 | if 'txt_fc1' not in net_data: 20 | txt_fc1w = tf.Variable(tf.truncated_normal([txt_dim, 4096], 21 | dtype=tf.float32, 22 | stddev=1e-2), name='weights') 23 | txt_fc1b = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32), 24 | trainable=True, name='biases') 25 | else: 26 | txt_fc1w = tf.Variable(net_data['txt_fc1'][0], name='weights') 27 | txt_fc1b = tf.Variable(net_data['txt_fc1'][1], name='biases') 28 | txt_fc1l = tf.nn.bias_add(tf.matmul(txt, txt_fc1w), txt_fc1b) 29 | 30 | txt_fc1 = tf.cond(stage > 0, lambda: tf.nn.relu( 31 | txt_fc1l), lambda: tf.nn.dropout(tf.nn.relu(txt_fc1l), 0.5)) 32 | 33 | train_layers += [txt_fc1w, txt_fc1b] 34 | deep_param_txt['txt_fc1'] = [txt_fc1w, txt_fc1b] 35 | 36 | # txt_fc2 37 | with tf.name_scope('txt_fc2'): 38 | if 'txt_fc2' not in net_data: 39 | txt_fc2w = tf.Variable(tf.truncated_normal([4096, output_dim], 40 | dtype=tf.float32, 41 | stddev=1e-2), name='weights') 42 | txt_fc2b = tf.Variable(tf.constant(0.0, shape=[output_dim], dtype=tf.float32), 43 | trainable=True, name='biases') 44 | else: 45 | txt_fc2w = tf.Variable(net_data['txt_fc2'][0], name='weights') 46 | txt_fc2b = tf.Variable(net_data['txt_fc2'][1], name='biases') 47 | 48 | txt_fc2l = tf.nn.bias_add(tf.matmul(txt_fc1, txt_fc2w), txt_fc2b) 49 | if with_tanh: 50 | txt_fc2 = tf.nn.tanh(txt_fc2l) 51 | else: 52 | txt_fc2 = txt_fc2l 53 | 54 | train_layers += [txt_fc2w, txt_fc2b] 55 | train_layers += [txt_fc2w, txt_fc2b] 56 | deep_param_txt['txt_fc2'] = [txt_fc2w, txt_fc2b] 57 | 58 | # return the output of text layer 59 | return txt_fc2, deep_param_txt, train_layers 60 | -------------------------------------------------------------------------------- /examples/dch/train_val_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import warnings 4 | import numpy as np 5 | import scipy.io as sio 6 | import model.dch as model 7 | import data_provider.image as dataset 8 | 9 | from pprint import pprint 10 | 11 | warnings.filterwarnings("ignore", category = DeprecationWarning) 12 | warnings.filterwarnings("ignore", category = FutureWarning) 13 | 14 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 15 | 16 | parser = argparse.ArgumentParser(description='Deep Cauchy Hashing') 17 | parser.add_argument('--lr', '--learning-rate', default=0.005, type=float) 18 | parser.add_argument('--output-dim', default=32, type=int) # 256, 128 19 | parser.add_argument('--alpha', default=0.5, type=float) 20 | parser.add_argument('--bias', default=0.0, type=float) 21 | parser.add_argument('--gamma', default=20, type=float) 22 | parser.add_argument('--iter-num', default=10000, type=int) 23 | parser.add_argument('--q-lambda', default=0.001, type=float) 24 | parser.add_argument('--dataset', default='cifar10', type=str) 25 | parser.add_argument('--gpus', default='0', type=str) 26 | parser.add_argument('--log-dir', default='tflog', type=str) 27 | parser.add_argument('-b', '--batch-size', default=128, type=int) 28 | parser.add_argument('-vb', '--val-batch-size', default=100, type=int) 29 | parser.add_argument('--decay-step', default=3000, type=int) 30 | parser.add_argument('--decay-factor', default=0.5, type=float) 31 | 32 | tanh_parser = parser.add_mutually_exclusive_group(required=False) 33 | tanh_parser.add_argument('--with-tanh', dest='with_tanh', action='store_true') 34 | tanh_parser.add_argument('--without-tanh', dest='with_tanh', action='store_false') 35 | parser.set_defaults(with_tanh=True) 36 | 37 | parser.add_argument('--img-model', default='alexnet', type=str) 38 | parser.add_argument('--model-weights', type=str) 39 | parser.add_argument('--finetune-all', default=True, type=bool) 40 | parser.add_argument('--save-dir', default="./models/", type=str) 41 | parser.add_argument('--data-dir', default="../../data/", type=str) 42 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true') 43 | 44 | args = parser.parse_args() 45 | 46 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus 47 | 48 | label_dims = {'cifar10': 10, 'cub': 200, 'nuswide_81': 81, 'coco': 80} 49 | Rs = {'cifar10': 54000, 'nuswide_81': 5000, 'coco': 5000} 50 | args.R = Rs[args.dataset] 51 | args.label_dim = label_dims[args.dataset] 52 | 53 | args.img_tr = os.path.join(args.data_dir, args.dataset, "train.txt") 54 | args.img_te = os.path.join(args.data_dir, args.dataset, "test.txt") 55 | args.img_db = os.path.join(args.data_dir, args.dataset, "database.txt") 56 | 57 | pprint(vars(args)) 58 | 59 | data_root = os.path.join(args.data_dir, args.dataset) 60 | query_img, database_img = dataset.import_validation(data_root, args.img_te, args.img_db) 61 | 62 | if not args.evaluate: 63 | train_img = dataset.import_train(data_root, args.img_tr) 64 | model_weights = model.train(train_img, args) 65 | args.model_weights = model_weights 66 | 67 | maps = model.validation(database_img, query_img, args) 68 | for key in maps: 69 | print(("{}\t{}".format(key, maps[key]))) 70 | 71 | pprint(vars(args)) 72 | -------------------------------------------------------------------------------- /distance/npversion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import util 4 | 5 | def norm(x, keepdims=False): 6 | ''' 7 | Param: 8 | x: matrix of shape (n1, n2, ..., nk) 9 | keepdims: Whether keep dims or not 10 | Return: norm of matrix of shape (n1, n2, ..., n_{k-1}) 11 | ''' 12 | return np.sqrt(np.sum(np.square(x), axis=-1, keepdims=keepdims)) 13 | 14 | def normed(x): 15 | ''' 16 | Param: matrix of shape (n1, n2, ..., nk) 17 | Return: normed matrix 18 | ''' 19 | return x / (1e-20 + norm(x, keepdims=True)) 20 | 21 | def euclidean2(x1, x2): 22 | return np.sum(np.square(x1 - x2), axis=-1) 23 | 24 | def euclidean(x1, x2): 25 | return np.sqrt(euclidean2(x1, x2)) 26 | 27 | def averaged_euclidean2(x1, x2): 28 | return np.mean(np.square(x1 - x2), axis=-1) 29 | 30 | def averaged_euclidean(x1, x2): 31 | return np.sqrt(averaged_euclidean2(x1, x2)) 32 | 33 | def normed_euclidean2(x1, x2): 34 | return euclidean2(normed(x1), normed(x2)) 35 | 36 | def inner_product(x1, x2, pair=False): 37 | if pair: 38 | return - np.inner(x1, x2) 39 | else: 40 | return - np.sum(x1 * x2, axis=-1) 41 | 42 | def cosine(x1, x2): 43 | return (1 + inner_product(normed(x1), normed(x2))) / 2 44 | 45 | def hamming(x1, x2): 46 | K = x1.shape[1] 47 | return (K - np.dot(x1, x2.transpose())) / 2 48 | 49 | def distance(x1, x2=None, pair=True, dist_type="euclidean2", ifsign=False): 50 | ''' 51 | Param: 52 | x2: if x2 is None, distance between x1 and x1 will be returned. 53 | pair: if True, for i, j, x1_i, x2_j will be calculated 54 | if False, for i, x1_i, x2_i will be calculated, and it requires the dimension of x1 and x2 is same. 55 | dist_type: distance type, can be euclidean2, normed_euclidean2, inner_product, cosine 56 | ''' 57 | if x2 is None: 58 | x2 = x1 59 | if ifsign: 60 | x1 = util.sign(x1) 61 | x2 = util.sign(x2) 62 | if dist_type == 'inner_product': 63 | return inner_product(x1, x2, pair) 64 | if pair: 65 | x1 = np.expand_dims(x1, 1) 66 | x2 = np.expand_dims(x2, 0) 67 | return getattr(sys.modules[__name__], dist_type)(x1, x2) 68 | 69 | if __name__ == "__main__": 70 | def myAssert(x1, x2): 71 | assert np.mean(x1 - x2) < 1e-8 72 | x1 = 2 * np.array([[1, 1, 1], [1, 1, 0], [1, 0, 1], [0, 1, 1]]) 73 | x2 = 3 * np.eye(3) 74 | myAssert(distance(x1, x2, pair=True, dist_type="euclidean2"), 75 | np.array([[ 9., 9., 9.], 76 | [ 5., 5., 17.], 77 | [ 5., 17., 5.], 78 | [ 17., 5., 5.]]) ) 79 | myAssert(distance(x1, x2, pair=True, dist_type="normed_euclidean2"), 80 | np.array([[ 0.84529946, 0.84529946, 0.84529946], 81 | [ 0.58578644, 0.58578644, 2. ], 82 | [ 0.58578644, 2. , 0.58578644], 83 | [ 2. , 0.58578644, 0.58578644]])) 84 | assert distance(x1, x2, pair=True, dist_type="cosine").shape == (4, 3) 85 | assert distance(x1, x2, pair=True, dist_type="inner_product").shape == (4, 3) 86 | 87 | assert np.all(distance(x1, x1[::-1], pair=False, dist_type="euclidean2") == np.array([4, 8, 8, 4])) 88 | myAssert(distance(x1, x1[::-1], pair=False, dist_type="normed_euclidean2"), np.array([ 0.36700684, 1., 1., 0.36700684])) 89 | myAssert(distance(x1, x1[::-1], pair=False, dist_type="cosine"), np.array([ 0.09175171, 0.25, 0.25, 0.09175171])) 90 | assert np.all(distance(x1, x1[::-1], pair=False, dist_type="inner_product") == np.array([-8, -4, -4, -8])) 91 | -------------------------------------------------------------------------------- /data_provider/pq.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Dataset(object): 4 | def __init__(self, dataset, output_dim, code_dim): 5 | print ("Initializing Dataset") 6 | self._dataset = dataset 7 | self.n_samples = dataset.n_samples 8 | self._train = dataset.train 9 | self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32) 10 | self._codes = np.zeros((self.n_samples, code_dim), dtype=np.float32) 11 | 12 | self._perm = np.arange(self.n_samples) 13 | np.random.shuffle(self._perm) 14 | self._index_in_epoch = 0 15 | self._epochs_complete = 0 16 | print ("Dataset already") 17 | return 18 | 19 | def next_batch(self, batch_size): 20 | """ 21 | Args: 22 | batch_size 23 | Returns: 24 | [batch_size, (n_inputs)]: next batch images 25 | [batch_size, n_class]: next batch labels 26 | """ 27 | start = self._index_in_epoch 28 | self._index_in_epoch += batch_size 29 | # Another epoch finish 30 | if self._index_in_epoch > self.n_samples: 31 | if self._train: 32 | # Training stage need repeating get batch 33 | self._epochs_complete += 1 34 | # Shuffle the data 35 | np.random.shuffle(self._perm) 36 | # Start next epoch 37 | start = 0 38 | self._index_in_epoch = batch_size 39 | else: 40 | # Validation stage only process once 41 | start = self.n_samples - batch_size 42 | self._index_in_epoch = self.n_samples 43 | end = self._index_in_epoch 44 | 45 | data, label = self._dataset.data(self._perm[start:end]) 46 | return (data, label, self.codes[self._perm[start: end], :]) 47 | 48 | def next_batch_output_codes(self, batch_size): 49 | start = self._index_in_epoch 50 | self._index_in_epoch += batch_size 51 | # Another epoch finish 52 | if self._index_in_epoch > self.n_samples: 53 | if self._train: 54 | # Shuffle the data 55 | np.random.shuffle(self._perm) 56 | # Start next epoch 57 | start = 0 58 | self._index_in_epoch = batch_size 59 | else: 60 | # Validation stage only process once 61 | start = self.n_samples - batch_size 62 | self._index_in_epoch = self.n_samples 63 | end = self._index_in_epoch 64 | 65 | return (self.output[self._perm[start: end], :], 66 | self.codes[self._perm[start: end], :]) 67 | 68 | def feed_batch_output(self, batch_size, output): 69 | start = self._index_in_epoch - batch_size 70 | end = self._index_in_epoch 71 | self.output[self._perm[start:end], :] = output 72 | return 73 | 74 | def feed_batch_codes(self, batch_size, codes): 75 | """ 76 | Args: 77 | batch_size 78 | [batch_size, n_output] 79 | """ 80 | start = self._index_in_epoch - batch_size 81 | end = self._index_in_epoch 82 | self.codes[self._perm[start:end], :] = codes 83 | return 84 | 85 | @property 86 | def output(self): 87 | return self._output 88 | 89 | @property 90 | def codes(self): 91 | return self._codes 92 | 93 | @property 94 | def label(self): 95 | return self._dataset.get_labels() 96 | 97 | def finish_epoch(self): 98 | self._index_in_epoch = 0 99 | np.random.shuffle(self._perm) 100 | 101 | -------------------------------------------------------------------------------- /distance/tfversion.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import sys 3 | import numpy as np 4 | 5 | def norm(x, keepdims=False): 6 | ''' 7 | Param: 8 | x: matrix of shape (n1, n2, ..., nk) 9 | keepdims: Whether keep dims or not 10 | Return: norm of matrix of shape (n1, n2, ..., n_{k-1}) 11 | ''' 12 | return tf.sqrt(tf.reduce_sum(tf.square(x), axis=-1, keepdims=keepdims)) 13 | 14 | def normed(x): 15 | ''' 16 | Param: matrix of shape (n1, n2, ..., nk) 17 | Return: normed matrix 18 | ''' 19 | return x / norm(x, keepdims=True) 20 | 21 | def euclidean2(x1, x2): 22 | return tf.reduce_sum(tf.square(x1 - x2), axis=-1) 23 | 24 | def euclidean(x1, x2): 25 | return tf.sqrt(euclidean2(x1, x2)) 26 | 27 | def averaged_euclidean2(x1, x2): 28 | return tf.reduce_mean(tf.square(x1 - x2), axis=-1) 29 | 30 | def averaged_euclidean(x1, x2): 31 | return tf.sqrt(averaged_euclidean2(x1, x2)) 32 | 33 | def normed_euclidean2(x1, x2): 34 | return euclidean2(normed(x1), normed(x2)) 35 | 36 | def inner_product(x1, x2): 37 | return - tf.reduce_sum(x1 * x2, axis=-1) 38 | 39 | def cosine(x1, x2): 40 | return (1 + inner_product(normed(x1), normed(x2))) / 2 41 | 42 | def my_inner_product(x1, x2): 43 | K = tf.cast(tf.shape(x1)[-1], tf.float32) 44 | return (1 + inner_product(x1, x2) / K) / 2 45 | 46 | def my_euclidean(x1, x2): 47 | K = tf.cast(tf.shape(x1)[-1], tf.float32) 48 | return euclidean(x1, x2) / tf.sqrt(4 * K) 49 | 50 | def my_euclidean2(x1, x2): 51 | K = tf.cast(tf.shape(x1)[-1], tf.float32) 52 | return euclidean2(x1, x2) / (4 * K) 53 | 54 | def my_normed_euclidean2(x1, x2): 55 | return normed_euclidean2(x1, x2) / 4 56 | 57 | def cos(x1, x2): 58 | return 1 - 2 * cosine(x1, x2) 59 | 60 | def distance(x1, x2=None, pair=True, dist_type="euclidean2"): 61 | ''' 62 | Param: 63 | x2: if x2 is None, distance between x1 and x1 will be returned. 64 | pair: if True, for i, j, x1_i, x2_j will be calculated 65 | if False, for i, x1_i, x2_i will be calculated, and it requires the dimension of x1 and x2 is same. 66 | dist_type: distance type, can be euclidean2, normed_euclidean2, inner_product, cosine 67 | ''' 68 | if x2 is None: 69 | x2 = x1 70 | if pair: 71 | x1 = tf.expand_dims(x1, 1) 72 | x2 = tf.expand_dims(x2, 0) 73 | return getattr(sys.modules[__name__], dist_type)(x1, x2) 74 | 75 | if __name__ == "__main__": 76 | sess = tf.InteractiveSession() 77 | def myAssert(x1, x2): 78 | assert np.mean(x1 - x2) < 1e-8 79 | x1 = 2 * np.array([[1, 1, 1], [1, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=float) 80 | x2 = 3 * np.eye(3, dtype=float) 81 | myAssert(distance(x1, x2, pair=True, dist_type="euclidean2").eval(), 82 | np.array([[ 9., 9., 9.], 83 | [ 5., 5., 17.], 84 | [ 5., 17., 5.], 85 | [ 17., 5., 5.]]) ) 86 | myAssert(distance(x1, x2, pair=True, dist_type="normed_euclidean2").eval(), 87 | np.array([[ 0.84529946, 0.84529946, 0.84529946], 88 | [ 0.58578644, 0.58578644, 2. ], 89 | [ 0.58578644, 2. , 0.58578644], 90 | [ 2. , 0.58578644, 0.58578644]])) 91 | assert distance(x1, x2, pair=True, dist_type="cosine").eval().shape == (4, 3) 92 | assert distance(x1, x2, pair=True, dist_type="inner_product").eval().shape == (4, 3) 93 | 94 | assert np.all(distance(x1, x1[::-1], pair=False, dist_type="euclidean2").eval() == np.array([4, 8, 8, 4])) 95 | myAssert(distance(x1, x1[::-1], pair=False, dist_type="normed_euclidean2").eval(), np.array([ 0.36700684, 1., 1., 0.36700684])) 96 | myAssert(distance(x1, x1[::-1], pair=False, dist_type="cosine").eval(), np.array([ 0.09175171, 0.25, 0.25, 0.09175171])) 97 | assert np.all(distance(x1, x1[::-1], pair=False, dist_type="inner_product").eval() == np.array([-8, -4, -4, -8])) 98 | -------------------------------------------------------------------------------- /examples/dhn/train_val_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import warnings 5 | import data_provider.image as dataset 6 | import model.dhn as model 7 | from pprint import pprint 8 | from util import Logger, str2bool 9 | 10 | 11 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21, 12 | 'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10} 13 | 14 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000, 15 | 'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000} 16 | 17 | 18 | def parse_args(argv): 19 | parser = argparse.ArgumentParser(description='Train and val model') 20 | 21 | # algorithm config 22 | algorithm_group = parser.add_argument_group(title='Algorithm config') 23 | algorithm_group.add_argument('--output-dim', type=int, default=32) 24 | algorithm_group.add_argument('--cq-lambda', type=float, default=0.01) 25 | algorithm_group.add_argument('--alpha', type=float, default=10) 26 | # network config 27 | network_group = parser.add_argument_group(title='Network config') 28 | network_group.add_argument('--gpu_id', type=str, default='0') 29 | network_group.add_argument('--max-iter', type=int, default=10000) 30 | network_group.add_argument('--batch-size', type=int, default=128) 31 | network_group.add_argument('--val-batch-size', type=int, default=100) 32 | network_group.add_argument('--decay-step', type=int, default=3000) 33 | network_group.add_argument('--learning-rate', type=float, default=0.0001) 34 | network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5) 35 | network_group.add_argument('--network', type=str, default='alexnet') 36 | network_group.add_argument('--network-weights', type=str) 37 | network_group.add_argument('--finetune-all', type=str2bool, default=True) 38 | network_group.add_argument('--test', default=False, action='store_true') 39 | network_group.add_argument('--debug', default=False, action='store_true') 40 | # dataset config 41 | dataset_group = parser.add_argument_group(title='Dataset config') 42 | dataset_group.add_argument('--dataset', type=str, default='cifar10') 43 | dataset_group.add_argument('--prefix', type=str, default='1') 44 | # config process 45 | config, rest = parser.parse_known_args() 46 | _dataset = config.dataset 47 | _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dhn/' + \ 48 | f'{config.prefix}_lambda{config.cq_lambda}_alpha{config.alpha}' 49 | dataset_group.add_argument('--R', type=int, default=Rs[_dataset]) 50 | dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset]) 51 | dataset_group.add_argument('--save-dir', type=str, default=_save_dir) 52 | 53 | return parser.parse_args(argv) 54 | 55 | 56 | def main(config): 57 | warnings.filterwarnings("ignore", category=DeprecationWarning) 58 | warnings.filterwarnings("ignore", category=FutureWarning) 59 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' 60 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id 61 | 62 | if not os.path.exists(config.save_dir): 63 | os.makedirs(config.save_dir) 64 | sys.stdout = Logger(os.path.join(config.save_dir, 'train.log')) 65 | 66 | pprint(vars(config)) 67 | data_root = os.path.join('../../data', config.dataset) 68 | img_tr = f'{data_root}/train.txt' 69 | img_te = f'{data_root}/test.txt' 70 | img_db = f'{data_root}/database.txt' 71 | 72 | if config.test == True: 73 | config.network_weights = os.path.join(config.save_dir, 'network_weights.npy') 74 | else: 75 | train_img = dataset.import_train(data_root, img_tr) 76 | network_weights = model.train(train_img, config) 77 | config.network_weights = network_weights 78 | 79 | query_img, database_img = dataset.import_validation(data_root, img_te, img_db) 80 | maps = model.validation(database_img, query_img, config) 81 | 82 | for key in maps: 83 | print(f"{key}: {maps[key]}") 84 | 85 | 86 | if __name__ == "__main__": 87 | main(parse_args(sys.argv[1:])) -------------------------------------------------------------------------------- /data_provider/text/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # ============================================================================== 14 | 15 | """Routine for decoding the NUS-WIDE binary file format.""" 16 | 17 | import h5py 18 | import numpy as np 19 | 20 | 21 | # Process images of this size. Note that this differs from the original nus-wide 22 | # image size of 224 x 224. If one alters this number, then the entire model 23 | # architecture will change and any model would need to be retrained. 24 | 25 | # Global constants describing the NUS-WIDE data set. 26 | 27 | class Dataset(object): 28 | def __init__(self, modal, path, train=True): 29 | self.lines = open(path, 'r').readlines() 30 | self.n_samples = len(self.lines) 31 | self.train = train 32 | if modal == 'txt': 33 | self.modal = 'txt' 34 | self._txt = [0] * self.n_samples 35 | self._label = [0] * self.n_samples 36 | self._load = [0] * self.n_samples 37 | self._load_num = 0 38 | self._status = 0 39 | self.data = self.txt_data 40 | self.all_data = self.txt_all_data 41 | 42 | def txt_data(self, index): 43 | if self._status: 44 | return (self._txt[index, :], self._label[index, :]) 45 | else: 46 | ret_txt = [] 47 | ret_label = [] 48 | for i in index: 49 | try: 50 | if self.train: 51 | if not self._load[i]: 52 | self._txt[i] = h5py.File(self.lines[i].split('\n')[0], 'r')["data"][0] 53 | self._label[i] = [int(j) for j in h5py.File(self.lines[i].split('\n')[0], 'r')['label1'][0]] 54 | self._load[i] = 1 55 | self._load_num += 1 56 | ret_txt.append(self._txt[i]) 57 | ret_label.append(self._label[i]) 58 | else: 59 | # self._label[i] = [int(j) for j in h5py.File(self.lines[i].split('\n')[0], 'r')['label1'][0]] 60 | f = h5py.File(self.lines[i].split('\n')[0], 'r') 61 | ret_txt.append(f["data"][0]) 62 | ret_label.append([int(j) for j in f['label1'][0]]) 63 | f.close() 64 | except: 65 | print('cannot open', self.lines[i].split('\n')[0]) 66 | 67 | if self._load_num == self.n_samples: 68 | self._status = 1 69 | self._txt = np.reshape(np.asarray(self._txt), (self.n_samples, len(self._txt[0]))) 70 | self._label = np.asarray(self._label) 71 | return np.reshape(np.asarray(ret_txt), (len(index), -1)), np.asarray(ret_label) 72 | 73 | def txt_all_data(self): 74 | if self._status: 75 | return (self._txt, self._label) 76 | 77 | def get_labels(self): 78 | for i in range(self.n_samples): 79 | if self._label[i] == 0: 80 | if self.modal == 'img': 81 | self._label[i] = [int(j) for j in self.lines[i].strip().split()[1:]] 82 | elif self.modal == 'txt': 83 | f = h5py.File(self.lines[i].split('\n')[0], 'r') 84 | self._label[i] = [int(j) for j in f['label1'][0]] 85 | f.close() 86 | return np.asarray(self._label) 87 | 88 | 89 | def import_train(txt_tr): 90 | return (Dataset('txt', txt_tr, train=True)) 91 | 92 | 93 | def import_validation(txt_te, txt_db): 94 | return (Dataset('txt', txt_te, train=False), 95 | Dataset('txt', txt_db, train=False)) 96 | -------------------------------------------------------------------------------- /examples/dtq/train_val_script.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io as sio 3 | import warnings 4 | import data_provider.image as dataset 5 | import model.dtq as model 6 | from pprint import pprint 7 | import os 8 | import argparse 9 | 10 | warnings.filterwarnings("ignore", category = DeprecationWarning) 11 | warnings.filterwarnings("ignore", category = FutureWarning) 12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 13 | 14 | parser = argparse.ArgumentParser(description='Triplet Hashing') 15 | parser.add_argument('--lr', '--learning-rate', default=0.00003, type=float) 16 | parser.add_argument('--triplet-margin', default=30, type=float) 17 | parser.add_argument('--select-strategy', default='margin', choices=['hard', 'all', 'margin']) 18 | parser.add_argument('--output-dim', default=64, type=int) # 256, 128 19 | parser.add_argument('--epochs', default=100, type=int) 20 | parser.add_argument('--cq-lambda', default=0, type=float) 21 | parser.add_argument('--n-subspace', default=4, type=int) 22 | parser.add_argument('--n-subcenter', default=256, type=int) 23 | parser.add_argument('--dataset', default='cifar10', type=str) 24 | parser.add_argument('--gpus', default='0,1,2,3', type=str) 25 | parser.add_argument('--log-dir', default='tflog', type=str) 26 | parser.add_argument('--dist-type', default='euclidean2', type=str, 27 | choices=['euclidean2', 'cosine', 'inner_product', 'euclidean']) 28 | parser.add_argument('-b', '--batch-size', default=128, type=int) 29 | parser.add_argument('-vb', '--val-batch-size', default=16, type=int) 30 | parser.add_argument('--decay-step', default=10000, type=int) 31 | parser.add_argument('--decay-factor', default=0.1, type=int) 32 | 33 | tanh_parser = parser.add_mutually_exclusive_group(required=False) 34 | tanh_parser.add_argument('--with-tanh', dest='with_tanh', action='store_true') 35 | tanh_parser.add_argument('--without-tanh', dest='with_tanh', action='store_false') 36 | parser.set_defaults(with_tanh=True) 37 | 38 | parser.add_argument('--img-model', default='alexnet', type=str) 39 | parser.add_argument('--model-weights', type=str, 40 | default='../../deephash/architecture/pretrained_model/reference_pretrain.npy') 41 | parser.add_argument('--finetune-all', default=True, type=bool) 42 | parser.add_argument('--max-iter-update-b', default=3, type=int) 43 | parser.add_argument('--max-iter-update-Cb', default=1, type=int) 44 | parser.add_argument('--code-batch-size', default=500, type=int) 45 | parser.add_argument('--n-part', default=20, type=int) 46 | parser.add_argument('--triplet-thresold', default=64000, type=int) 47 | parser.add_argument('--save-dir', default="./models/", type=str) 48 | parser.add_argument('--data-dir', default="../../data/", type=str) 49 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true') 50 | parser.add_argument('--val-freq', default=1, type=int) 51 | 52 | args = parser.parse_args() 53 | 54 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus 55 | 56 | label_dims = {'cifar10': 10, 'nuswide_81': 81, 'coco': 80, 'imagenet': 100} 57 | Rs = {'cifar10': 54000, 'nuswide_81': 5000, 'coco': 5000, 'imagenet': 5000} 58 | args.R = Rs[args.dataset] 59 | args.label_dim = label_dims[args.dataset] 60 | 61 | args.img_tr = os.path.join(args.data_dir, args.dataset, "train.txt") 62 | args.img_te = os.path.join(args.data_dir, args.dataset, "test.txt") 63 | args.img_db = os.path.join(args.data_dir, args.dataset, "database.txt") 64 | 65 | pprint(vars(args)) 66 | 67 | data_root = os.path.join(args.data_dir, args.dataset) 68 | query_img, database_img = dataset.import_validation(data_root, args.img_te, args.img_db) 69 | 70 | # if not args.evaluate: 71 | # train_img = dataset.import_train(data_root, args.img_tr) 72 | # model_weights = model.train(train_img, database_img, query_img, args) 73 | # args.model_weights = model_weights 74 | # else: 75 | # maps = model.validation(database_img, query_img, args) 76 | # for key in maps: 77 | # print(("{}\t{}".format(key, maps[key]))) 78 | 79 | train_img = dataset.import_train(data_root, args.img_tr) 80 | model_weights = model.train(train_img, query_img, database_img, args) 81 | 82 | args.model_weights = model_weights 83 | maps = model.validation(query_img, database_img, args) 84 | for key in maps: 85 | print(("{}\t{}".format(key, maps[key]))) 86 | 87 | pprint(vars(args)) 88 | -------------------------------------------------------------------------------- /data_provider/image/__init__.py: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Routine for decoding the NUS-WIDE binary file format.""" 17 | 18 | import os 19 | import cv2 20 | import numpy as np 21 | 22 | 23 | # Process images of this size. Note that this differs from the original nus-wide 24 | # image size of 224 x 224. If one alters this number, then the entire model 25 | # architecture will change and any model would need to be retrained. 26 | 27 | # Global constants describing the NUS-WIDE data set. 28 | 29 | 30 | class Dataset(object): 31 | def __init__(self, modal, data_root, path, train=True): 32 | self.lines = open(path, 'r').readlines() 33 | self.data_root = data_root 34 | self.n_samples = len(self.lines) 35 | self.train = train 36 | assert modal == 'img' 37 | self.modal = 'img' 38 | self._img = [0] * self.n_samples 39 | self._label = [0] * self.n_samples 40 | self._load = [0] * self.n_samples 41 | self._load_num = 0 42 | self._status = 0 43 | self.data = self.img_data 44 | self.all_data = self.img_all_data 45 | 46 | def get_img(self, i): 47 | path = os.path.join(self.data_root, self.lines[i].strip().split()[0]) 48 | return cv2.resize(cv2.imread(path), (256, 256)) 49 | 50 | def get_label(self, i): 51 | return [int(j) for j in self.lines[i].strip().split()[1:]] 52 | 53 | def img_data(self, indexes): 54 | if self._status: 55 | return (self._img[indexes, :], self._label[indexes, :]) 56 | else: 57 | ret_img = [] 58 | ret_label = [] 59 | for i in indexes: 60 | try: 61 | if self.train: 62 | if not self._load[i]: 63 | self._img[i] = self.get_img(i) 64 | self._label[i] = self.get_label(i) 65 | self._load[i] = 1 66 | self._load_num += 1 67 | ret_img.append(self._img[i]) 68 | ret_label.append(self._label[i]) 69 | else: 70 | self._label[i] = self.get_label(i) 71 | ret_img.append(self.get_img(i)) 72 | ret_label.append(self._label[i]) 73 | except Exception as e: 74 | print('cannot open {}, exception: {}'.format(self.lines[i].strip(), e)) 75 | 76 | if self._load_num == self.n_samples: 77 | self._status = 1 78 | self._img = np.asarray(self._img) 79 | self._label = np.asarray(self._label) 80 | return (np.asarray(ret_img), np.asarray(ret_label)) 81 | 82 | def img_all_data(self): 83 | if self._status: 84 | return (self._img, self._label) 85 | 86 | def get_labels(self): 87 | for i in range(self.n_samples): 88 | if self._label[i] is not list: 89 | self._label[i] = [int(j) 90 | for j in self.lines[i].strip().split()[1:]] 91 | return np.asarray(self._label) 92 | 93 | 94 | def import_train(data_root, img_tr): 95 | ''' 96 | return (img_tr, txt_tr) 97 | ''' 98 | return (Dataset('img', data_root, img_tr, train=True)) 99 | 100 | 101 | def import_validation(data_root, img_te, img_db): 102 | ''' 103 | return (img_te, txt_te, img_db, txt_db) 104 | ''' 105 | return (Dataset('img', data_root, img_te, train=False), 106 | Dataset('img', data_root, img_db, train=False)) 107 | -------------------------------------------------------------------------------- /examples/dhcs/train_val_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import warnings 5 | import data_provider.image as dataset 6 | import model.dhcs as model 7 | from pprint import pprint 8 | from util import Logger, str2bool 9 | 10 | 11 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21, 12 | 'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10} 13 | 14 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000, 15 | 'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000} 16 | 17 | 18 | def parse_args(argv): 19 | parser = argparse.ArgumentParser(description='Train and val model') 20 | 21 | # algorithm config 22 | algorithm_group = parser.add_argument_group(title='Algorithm config') 23 | algorithm_group.add_argument('--bit', type=int, default=32) 24 | algorithm_group.add_argument('--q-lambda', type=float, default=0.01) 25 | algorithm_group.add_argument('--b-lambda', type=float, default=0.0) 26 | algorithm_group.add_argument('--i-lambda', type=float, default=0.0) 27 | algorithm_group.add_argument('--alpha', type=float, default=5) 28 | # network config 29 | network_group = parser.add_argument_group(title='Network config') 30 | network_group.add_argument('--gpus', type=str, default='0') 31 | network_group.add_argument('--max-iter', type=int, default=10000) 32 | network_group.add_argument('--batch-size', type=int, default=128) 33 | network_group.add_argument('--val-batch-size', type=int, default=100) 34 | network_group.add_argument('--lr', type=float, default=0.0001) 35 | network_group.add_argument('--lr-decay-factor', type=float, default=0.5) 36 | network_group.add_argument('--decay-step', type=int, default=3000) 37 | network_group.add_argument('--network', type=str, default='alexnet') 38 | network_group.add_argument('--network-weights', type=str) 39 | network_group.add_argument('--finetune-all', type=str2bool, default=True) 40 | network_group.add_argument('--test', default=False, action='store_true') 41 | network_group.add_argument('--debug', default=False, action='store_true') 42 | # dataset config 43 | dataset_group = parser.add_argument_group(title='Dataset config') 44 | dataset_group.add_argument('--dataset', type=str, default='cifar10') 45 | dataset_group.add_argument('--prefix', type=str, default='1') 46 | dataset_group.add_argument('--suffix', type=str, default='exp') 47 | # config process 48 | config, rest = parser.parse_known_args() 49 | _dataset = config.dataset 50 | _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.bit}bit_{config.suffix}/' + \ 51 | f'{config.prefix}_q{config.q_lambda}' 52 | dataset_group.add_argument('--R', type=int, default=Rs[_dataset]) 53 | dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset]) 54 | dataset_group.add_argument('--save-dir', type=str, default=_save_dir) 55 | 56 | return parser.parse_args(argv) 57 | 58 | 59 | def main(config): 60 | warnings.filterwarnings("ignore", category=DeprecationWarning) 61 | warnings.filterwarnings("ignore", category=FutureWarning) 62 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' 63 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpus 64 | 65 | if not os.path.exists(config.save_dir): 66 | os.makedirs(config.save_dir) 67 | sys.stdout = Logger(os.path.join(config.save_dir, 'train.log')) 68 | 69 | pprint(vars(config)) 70 | data_root = os.path.join('../../data', config.dataset) 71 | config.wordvec_dict = f'{data_root}/wordvec.txt' 72 | img_tr = f'{data_root}/train.txt' 73 | img_te = f'{data_root}/test.txt' 74 | img_db = f'{data_root}/database.txt' 75 | 76 | if config.test == True: 77 | # config.save_dir = '../snapshot/cifar10_alexnet_32bit_hyper_sigmoid/debug' 78 | config.network_weights = os.path.join(config.save_dir, 'network_weights.npy') 79 | else: 80 | train_img = dataset.import_train(data_root, img_tr) 81 | network_weights = model.train(train_img, config) 82 | config.network_weights = network_weights 83 | 84 | sys.stdout = Logger(os.path.join(config.save_dir, 'test.log')) 85 | query_img, database_img = dataset.import_validation(data_root, img_te, img_db) 86 | maps = model.validation(database_img, query_img, config) 87 | 88 | for key in maps: 89 | print(f"{key}: {maps[key]}") 90 | 91 | 92 | if __name__ == "__main__": 93 | main(parse_args(sys.argv[1:])) -------------------------------------------------------------------------------- /architecture/vgg_f.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io 3 | import scipy.misc 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | 8 | def img_vgg_f(input_image, bit, model_weights=None): 9 | if model_weights is None: 10 | dir_path = os.path.dirname(os.path.realpath(__file__)) 11 | model_weights = os.path.join(dir_path, "pretrained_model/imagenet-vgg-f.mat") 12 | 13 | print("loading model from ", model_weights) 14 | data = scipy.io.loadmat(model_weights) 15 | 16 | layers = ( 17 | 'conv1', 'relu1', 'norm1', 'pool1','conv2', 'relu2', 'norm2', 'pool2','conv3', 'relu3', 'conv4', 'relu4', 'conv5', 18 | 'relu5', 'pool5','fc6', 'relu6', 'fc7', 'relu7','fc8') 19 | weights = data['layers'][0] 20 | mean = data['normalization'][0][0][0] 21 | net = {} 22 | ops = [] 23 | current = tf.convert_to_tensor(input_image,dtype='float32') 24 | for i, name in enumerate(layers[:-1]): 25 | if name.startswith('conv'): 26 | kernels, bias = weights[i][0][0][0][0] 27 | # matconvnet: weights are [width, height, in_channels, out_channels] 28 | # tensorflow: weights are [height, width, in_channels, out_channels] 29 | #kernels = np.transpose(kernels, (1, 0, 2, 3)) 30 | 31 | bias = bias.reshape(-1) 32 | pad = weights[i][0][0][1] 33 | stride = weights[i][0][0][4] 34 | current = _conv_layer(current,kernels,bias,pad,stride,i,ops,net) 35 | elif name.startswith('relu'): 36 | current = tf.nn.relu(current) 37 | elif name.startswith('pool'): 38 | stride = weights[i][0][0][1] 39 | pad = weights[i][0][0][2] 40 | area = weights[i][0][0][5] 41 | current = _pool_layer(current,stride,pad,area) 42 | elif name.startswith('fc'): 43 | kernels, bias = weights[i][0][0][0][0] 44 | # matconvnet: weights are [width, height, in_channels, out_channels] 45 | # tensorflow: weights are [height, width, in_channels, out_channels] 46 | #kernels = np.transpose(kernels, (1, 0, 2, 3)) 47 | 48 | bias = bias.reshape(-1) 49 | current = _full_conv(current,kernels,bias,i,ops,net) 50 | elif name.startswith('norm'): 51 | current = tf.nn.local_response_normalization(current, depth_radius=2, bias=2.000, alpha=0.0001, beta=0.75) 52 | net[name] = current 53 | W_fc8 = tf.random_normal([4096, bit], stddev=1.0) * 0.01 54 | 55 | b_fc8 = tf.random_normal([bit],stddev = 1.0) * 0.01 56 | w = tf.Variable(W_fc8, name='w' + str(20)) 57 | b = tf.Variable(b_fc8, name='bias' + str(20)) 58 | 59 | ### debugging................... 60 | # layer8 = scipy.io.loadmat('data/wb-image.mat') 61 | # 62 | # w = tf.Variable(np.squeeze(layer8['w8']) * 0.01, name='w' + str(20)) 63 | # b = tf.Variable(np.squeeze(layer8['b8']) * 0.01, name='bias' + str(20)) 64 | 65 | ops.append(w) 66 | ops.append(b) 67 | 68 | fc8 = tf.matmul(tf.squeeze(current),w) + b 69 | net['weigh21'] = w 70 | net['b21'] = b 71 | net[layers[-1]] = fc8 72 | return net, mean 73 | 74 | def _conv_layer(input, weights, bias,pad,stride,i,ops,net): 75 | pad = pad[0] 76 | stride= stride[0] 77 | input = tf.pad(input, [[0, 0], [pad[0], pad[1]], [pad[2], pad[3]], [0, 0]], "CONSTANT") 78 | w = tf.Variable(weights,name='w'+str(i),dtype='float32') 79 | b = tf.Variable(bias,name='bias'+str(i),dtype='float32') 80 | ops.append(w) 81 | ops.append(b) 82 | net['weights' + str(i)] = w 83 | net['b' + str(i)] = b 84 | conv = tf.nn.conv2d(input, w, strides=[1,stride[0],stride[1],1],padding='VALID',name='conv'+str(i)) 85 | return tf.nn.bias_add(conv, b,name='add'+str(i)) 86 | 87 | def _full_conv(input, weights, bias,i,ops,net): 88 | w = tf.Variable(weights, name='w' + str(i),dtype='float32') 89 | b = tf.Variable(bias, name='bias' + str(i),dtype='float32') 90 | ops.append(w) 91 | ops.append(b) 92 | net['weights' + str(i)] = w 93 | net['b' + str(i)] = b 94 | conv = tf.nn.conv2d(input, w,strides=[1,1,1,1],padding='VALID',name='fc'+str(i)) 95 | return tf.nn.bias_add(conv, b,name='add'+str(i)) 96 | 97 | def _pool_layer(input,stride,pad,area): 98 | pad = pad[0] 99 | area = area[0] 100 | stride = stride[0] 101 | input = tf.pad(input, [[0, 0], [pad[0], pad[1]], [pad[2], pad[3]], [0, 0]], "CONSTANT") 102 | return tf.nn.max_pool(input, ksize=[1, area[0], area[1], 1], strides=[1,stride[0],stride[1],1],padding='VALID') 103 | 104 | def preprocess(image, mean_pixel): 105 | return image - mean_pixel 106 | 107 | 108 | def unprocess(image, mean_pixel): 109 | return image + mean_pixel 110 | 111 | def get_meanpix(data_path): 112 | data = scipy.io.loadmat(data_path) 113 | mean = data['normalization'][0][0][0] 114 | return mean 115 | -------------------------------------------------------------------------------- /util/visualize.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import torch 5 | import torch.optim as optim 6 | from scipy.spatial import distance 7 | from sklearn import manifold 8 | 9 | 10 | def plot_distribution(data, path): 11 | N, D = data.shape 12 | plt.figure(figsize=(32, D)); 13 | for i in range(1, D+1): 14 | plt.subplot(D//4, 4, i); 15 | commutes = pd.Series(data[:, i-1]) 16 | commutes.plot.hist(grid=True, bins=200, rwidth=0.9, color='#607c8e'); 17 | plt.title(f'{i}bit') 18 | plt.savefig(f"{path}/data_distribution.png") 19 | 20 | res = '' 21 | for i in range(1, 11): 22 | t = i / 10 23 | ratio = (np.sum(data.flatten()>=t) + np.sum(data.flatten()<=-t)) / (N * D) 24 | res += f'threshold: {t:.1f}, quantizaion ratio: {ratio:.5f}\n' 25 | return res 26 | 27 | 28 | def plot_distance(db_feats, db_label, query_feats, query_label, path): 29 | S = np.matmul(db_label, query_label.transpose()) 30 | N = np.sum(S==1) 31 | 32 | plt.figure(figsize=[16, 6]) 33 | plt.subplot(121) 34 | cosine_32bit = distance.cdist(db_feats, query_feats, metric='cosine') / 2 35 | plt.title('cosine distribution') 36 | commutes = pd.Series(np.random.choice(cosine_32bit[S==1].flatten(), N)) 37 | commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#607c8e', alpha=0.7); 38 | commutes = pd.Series(np.random.choice(cosine_32bit[S==0].flatten(), N)) 39 | commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#500c8e', alpha=0.7); 40 | 41 | plt.subplot(122) 42 | euclidean_32bit = distance.cdist(db_feats, query_feats, metric='euclidean') 43 | plt.title('euclidean distribution') 44 | commutes = pd.Series(np.random.choice(euclidean_32bit[S==1].flatten(), N)) 45 | commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#607c8e', alpha=0.7); 46 | commutes = pd.Series(np.random.choice(euclidean_32bit[S==0].flatten(), N)) 47 | commutes.plot.hist(grid=True, bins=200, rwidth=1.5, color='#500c8e', alpha=0.7); 48 | 49 | plt.savefig(f"{path}/distance_distribution.png") 50 | 51 | 52 | def plot_tsne(data, label, path, R=2000): 53 | if label.shape[1] != 10: 54 | return 55 | label2name = np.array(['airplane', 'automobile', 'bird', 'cat', 'deer', \ 56 | 'dog', 'frog', 'horse', 'ship', 'truck']) 57 | label2color = np.array([(1,0,0), (0,1,0), (0,0,1), (1,0,1), (1,1,0), \ 58 | (0,1,1), (1,0.5,0), (0,0,0), (0.75,0.75,0.75), (0.25,0.5,0.5)]) 59 | if label.ndim > 1: 60 | label = label.argmax(axis=1) 61 | plt.figure(figsize=(16, 12)); 62 | embed = TSNE(n_components=2, perplexity=30, lr=1, eps=1e-9, n_iter=2000, device='cuda').fit_transform(data[:R]) 63 | plt.scatter(embed[:, 0], embed[:, 1], c=label2color[label[:R]], s=10) 64 | plt.savefig(f"{path}/data_t-SNE.png") 65 | 66 | 67 | class TSNE(object): 68 | 69 | def __init__(self, n_components=2, perplexity=30, lr=1, eps=1e-9, n_iter=2000, device='cpu'): 70 | self.perplexity = perplexity 71 | self.lr = lr 72 | self.eps = eps 73 | self.n_iter = n_iter 74 | self.device = device 75 | self.n_components = n_components 76 | 77 | def t_distribution(self, y): 78 | n = y.shape[0] 79 | dist = torch.sum((y.reshape(n, 1, -1) - y.reshape(1, n, -1)) ** 2, -1) 80 | affinity = 1 / (1 + dist) 81 | affinity *= (1 - torch.eye(n, device=self.device)) # set diag to zero 82 | q = affinity / affinity.sum() + self.eps 83 | return q 84 | 85 | def fit_transform(self, x): 86 | dist2 = distance.squareform(distance.pdist(x, metric='sqeuclidean')) 87 | p = distance.squareform(manifold.t_sne._joint_probabilities(dist2, self.perplexity, False)) + self.eps 88 | 89 | p = torch.tensor(p, device=self.device, dtype=torch.float32).reshape(-1) 90 | log_p = torch.log(p) 91 | 92 | y = torch.randn([dist2.shape[0], self.n_components], device=self.device, requires_grad=True) 93 | optimizer = optim.Adam([y], lr=self.lr) 94 | criterion = torch.nn.KLDivLoss() 95 | 96 | for i_iter in range(self.n_iter): 97 | q = self.t_distribution(y).reshape(-1) 98 | loss = (p * (log_p - torch.log(q))).sum() 99 | optimizer.zero_grad() 100 | loss.backward() 101 | optimizer.step() 102 | 103 | return y.detach().cpu().numpy() 104 | -------------------------------------------------------------------------------- /examples/dqn/train_val_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import warnings 5 | import data_provider.image as dataset 6 | import model.dqn as model 7 | from pprint import pprint 8 | from util import Logger, str2bool 9 | 10 | 11 | warnings.filterwarnings("ignore", category=DeprecationWarning) 12 | warnings.filterwarnings("ignore", category=FutureWarning) 13 | 14 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21, 15 | 'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10} 16 | 17 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000, 18 | 'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000} 19 | 20 | 21 | def parse_args(argv): 22 | parser = argparse.ArgumentParser(description='Train and val') 23 | 24 | # algorithm config 25 | algorithm_group = parser.add_argument_group(title='Algorithm config') 26 | algorithm_group.add_argument('--output-dim', type=int, default=64) 27 | algorithm_group.add_argument('--max-iter-update-b', type=int, default=3) 28 | algorithm_group.add_argument('--max-iter-update-Cb', type=int, default=1) 29 | algorithm_group.add_argument('--cq-lambda', type=float, default=0.0001) 30 | algorithm_group.add_argument('--code-batch-size', type=int, default=500) 31 | algorithm_group.add_argument('--n-subspace', type=int, default=4) 32 | algorithm_group.add_argument('--n-subcenter', type=int, default=256) 33 | # network config 34 | network_group = parser.add_argument_group(title='Network config') 35 | network_group.add_argument('--gpu_id', type=str, default='0') 36 | network_group.add_argument('--max-iter', type=int, default=5000) 37 | network_group.add_argument('--batch-size', type=int, default=256) 38 | network_group.add_argument('--val-batch-size', type=int, default=100) 39 | network_group.add_argument('--decay-step', type=int, default=1000, help='Epochs after which learning rate decays') 40 | network_group.add_argument('--learning-rate', type=float, default=0.002) # 0.02 for DVSQ, 0.002 for DQN 41 | network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5, help='Learning rate decay factor') 42 | network_group.add_argument('--network', type=str, default='alexnet') 43 | network_group.add_argument('--network-weights', type=str) 44 | network_group.add_argument('--finetune-all', type=str2bool, default=True) 45 | network_group.add_argument('--test', default=False, action='store_true') 46 | network_group.add_argument('--debug', default=False, action='store_true') 47 | # dataset config 48 | dataset_group = parser.add_argument_group(title='Dataset config') 49 | dataset_group.add_argument('--dataset', type=str, default='cifar10') 50 | dataset_group.add_argument('--prefix', type=str, default='1') 51 | # config process 52 | config, rest = parser.parse_known_args() 53 | _dataset = config.dataset 54 | _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dqn/' + \ 55 | f'{config.prefix}_subspace{config.n_subspace}_subcenter{config.n_subcenter}' 56 | dataset_group.add_argument('--R', type=int, default=Rs[_dataset]) 57 | dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset]) 58 | dataset_group.add_argument('--save-dir', type=str, default=_save_dir) 59 | 60 | return parser.parse_args(argv) 61 | 62 | 63 | def main(config): 64 | warnings.filterwarnings("ignore", category=DeprecationWarning) 65 | warnings.filterwarnings("ignore", category=FutureWarning) 66 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' 67 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id 68 | 69 | if not os.path.exists(config.save_dir): 70 | os.makedirs(config.save_dir) 71 | sys.stdout = Logger(os.path.join(config.save_dir, 'train.log')) 72 | 73 | pprint(vars(config)) 74 | data_root = os.path.join('../../data', config.dataset) 75 | img_tr = f'{data_root}/train.txt' 76 | img_te = f'{data_root}/test.txt' 77 | img_db = f'{data_root}/database.txt' 78 | 79 | if config.test == True: 80 | # config.network_weights = os.path.join(config.save_dir, 'network_weights.npy') 81 | config.network_weights = './models/lr0.002_cq0.0001_ss4_sc256_d64_cifar10.npy' 82 | else: 83 | train_img = dataset.import_train(data_root, img_tr) 84 | network_weights = model.train(train_img, config) 85 | config.network_weights = network_weights 86 | 87 | query_img, database_img = dataset.import_validation(data_root, img_te, img_db) 88 | maps = model.validation(database_img, query_img, config) 89 | 90 | for key in maps: 91 | print(f"{key}: {maps[key]}") 92 | 93 | 94 | if __name__ == "__main__": 95 | main(parse_args(sys.argv[1:])) -------------------------------------------------------------------------------- /examples/dvsq/train_val_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import warnings 5 | import data_provider.image as dataset 6 | import model.dvsq as model 7 | from pprint import pprint 8 | from util import Logger, str2bool 9 | 10 | 11 | warnings.filterwarnings("ignore", category=DeprecationWarning) 12 | warnings.filterwarnings("ignore", category=FutureWarning) 13 | 14 | label_dims = {'cifar10': 10, 'cifar10-s1': 10, 'cub': 200, 'nuswide_21': 21, 15 | 'nuswide_81': 81, 'coco': 80, 'imagenet': 100, 'cifar10_zero_shot': 10} 16 | 17 | Rs = {'cifar10': 54000, 'cifar10-s1': 50000, 'nuswide_81': 5000, 'coco': 5000, 18 | 'nuswide_21': 5000, 'imagenet': 5000, 'cifar10_zero_shot': 15000} 19 | 20 | 21 | def parse_args(argv): 22 | parser = argparse.ArgumentParser(description='Train and val') 23 | 24 | # algorithm config 25 | algorithm_group = parser.add_argument_group(title='Algorithm config') 26 | algorithm_group.add_argument('--output-dim', type=int, default=300) 27 | algorithm_group.add_argument('--max-iter-update-b', type=int, default=3) 28 | algorithm_group.add_argument('--max-iter-update-Cb', type=int, default=1) 29 | algorithm_group.add_argument('--cq-lambda', type=float, default=0.0001) 30 | algorithm_group.add_argument('--code-batch-size', type=int, default=500) 31 | algorithm_group.add_argument('--n-subspace', type=int, default=4) 32 | algorithm_group.add_argument('--n-subcenter', type=int, default=256) 33 | algorithm_group.add_argument('--margin', type=int, default=0.7) 34 | # network config 35 | network_group = parser.add_argument_group(title='Network config') 36 | network_group.add_argument('--gpu_id', type=str, default='0') 37 | network_group.add_argument('--max-iter', type=int, default=5000) 38 | network_group.add_argument('--batch-size', type=int, default=256) 39 | network_group.add_argument('--val-batch-size', type=int, default=100) 40 | network_group.add_argument('--decay-step', type=int, default=1000, help='Epochs after which learning rate decays') 41 | network_group.add_argument('--learning-rate', type=float, default=0.02) # 0.02 for DVSQ, 0.002 for DQN 42 | network_group.add_argument('--learning-rate-decay-factor', type=float, default=0.5, help='Learning rate decay factor') 43 | network_group.add_argument('--network', type=str, default='alexnet') 44 | network_group.add_argument('--network-weights', type=str) 45 | network_group.add_argument('--finetune-all', type=str2bool, default=True) 46 | network_group.add_argument('--test', default=False, action='store_true') 47 | network_group.add_argument('--debug', default=False, action='store_true') 48 | # dataset config 49 | dataset_group = parser.add_argument_group(title='Dataset config') 50 | dataset_group.add_argument('--dataset', type=str, default='cifar10') 51 | dataset_group.add_argument('--prefix', type=str, default='1') 52 | # config process 53 | config, rest = parser.parse_known_args() 54 | _dataset = config.dataset 55 | _save_dir = f'../snapshot/{config.dataset}_{config.network}_{config.output_dim}bit_dvsq/' + \ 56 | f'{config.prefix}_margin{config.margin}_subspace{config.n_subspace}_subcenter{config.n_subcenter}' 57 | dataset_group.add_argument('--R', type=int, default=Rs[_dataset]) 58 | dataset_group.add_argument('--label-dim', type=str, default=label_dims[_dataset]) 59 | dataset_group.add_argument('--save-dir', type=str, default=_save_dir) 60 | dataset_group.add_argument('--wordvec-dict', type=str, default="../../data/cifar10/wordvec.txt") 61 | 62 | return parser.parse_args(argv) 63 | 64 | 65 | def main(config): 66 | warnings.filterwarnings("ignore", category=DeprecationWarning) 67 | warnings.filterwarnings("ignore", category=FutureWarning) 68 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' 69 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_id 70 | 71 | if not os.path.exists(config.save_dir): 72 | os.makedirs(config.save_dir) 73 | sys.stdout = Logger(os.path.join(config.save_dir, 'train.log')) 74 | 75 | pprint(vars(config)) 76 | data_root = os.path.join('../../data', config.dataset) 77 | config.wordvec_dict = f'{data_root}/wordvec.txt' 78 | img_tr = f'{data_root}/train.txt' 79 | img_te = f'{data_root}/test.txt' 80 | img_db = f'{data_root}/database.txt' 81 | 82 | if config.test == True: 83 | config.network_weights = os.path.join(config.save_dir, 'network_weights.npy') 84 | else: 85 | train_img = dataset.import_train(data_root, img_tr) 86 | network_weights = model.train(train_img, config) 87 | config.network_weights = network_weights 88 | 89 | query_img, database_img = dataset.import_validation(data_root, img_te, img_db) 90 | maps = model.validation(database_img, query_img, config) 91 | 92 | for key in maps: 93 | print(f"{key}: {maps[key]}") 94 | 95 | 96 | if __name__ == "__main__": 97 | main(parse_args(sys.argv[1:])) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepHash-tensorflow 2 | 3 | This code repository is derived from [thulab/DeepHash](https://github.com/thulab/DeepHash) but adds more features. 4 | 5 | See the commits and update logs for details. 6 | 7 | ## Update logs: 8 | - Metrics 9 | - add WhRank 10 | - add mAP_finetune 11 | - add RAMAP 12 | - Backbone 13 | - add VGG-F network 14 | - Loss functions 15 | - add Independent loss 16 | - add balance loss 17 | - add orthogonal loss 18 | - add exp loss 19 | - add quantization losses, *i.e.*, L1 loss、L2 loss、cauchy loss 20 | - Tools 21 | - add visualization tool such as T-SNE, *etc* 22 | - add distance libs 23 | - Others 24 | - refactoring the `data_provider` code 25 | - refactoring the `model` code 26 | - refactoring `parser` code 27 | - training acceleration 28 | 29 | ## Contacts 30 | Maintainers of this library: 31 | * Shen Chen, Email: chenshen@stu.xmu.edu.cn 32 | 33 | --- 34 | DeepHash is a lightweight deep learning to hash library that implements state-of-the-art deep hashing/quantization algorithms. We will implement more representative deep hashing models continuously according to our released [deep hashing paper list](https://github.com/caoyue10/DeepHashingBaselines). Specifically, we welcome other researchers to contribute deep hashing models into this toolkit based on our framework. We will announce the contribution in this project. 35 | 36 | The implemented models include: 37 | 38 | * DQN: [Deep Quantization Network for Efficient Image Retrieval](http://yue-cao.me/doc/deep-quantization-networks-dqn-aaai16.pdf), Yue Cao, Mingsheng Long, Jianmin Wang, Han Zhu, Qingfu Wen, AAAI Conference on Artificial Intelligence (AAAI), 2016 39 | * DHN: [Deep Hashing Network for Efficient Similarity Retrieval](http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-hashing-network-aaai16.pdf), Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao, AAAI Conference on Artificial Intelligence (AAAI), 2016 40 | * DVSQ: [Deep Visual-Semantic Quantization for Efficient Image Retrieval](http://yue-cao.me/doc/deep-visual-semantic-quantization-cvpr17.pdf), Yue Cao, Mingsheng Long, Jianmin Wang, Shichen Liu, IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017 41 | * DCH: [Deep Cauchy Hashing for Hamming Space Retrieval](http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf), Yue Cao, Mingsheng Long, Bin Liu, Jianmin Wang, IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2018 42 | * DTQ: [Deep Triplet Quantization](ise.thss.tsinghua.edu.cn/~mlong/doc/deep-triplet-quantization-acmmm18.pdf), Bin Liu, Yue Cao, Mingsheng Long, Jianmin Wang, Jingdong Wang, ACM Multimedia (ACMMM), 2018 43 | 44 | Note: DTQ and DCH are updated while DQN, DHN, DVSQ maybe outdated, feel free to touch us if you have any questions. We welcome others to contribute! 45 | 46 | ## Requirements 47 | 48 | - Python3: Anaconda is recommended because it already contains a lot of packages: 49 | ``` 50 | conda create -n DeepHash python=3.6 anaconda 51 | source activate DeepHash 52 | ``` 53 | - Other packages: 54 | ``` 55 | conda install -y tensorflow-gpu 56 | conda install -y -c conda-forge opencv 57 | ``` 58 | 59 | To import the pakcages implemented in `./DeepHash`, we need to add the path of `./DeepHash` to environment variables as: 60 | 61 | ```shell 62 | export PYTHONPATH=/path/to/project/DeepHash/DeepHash:$PYTHONPATH 63 | ``` 64 | 65 | ## Data Preparation 66 | In `data/cifar10/train.txt`, we give an example to show how to prepare image training data. In `data/cifar10/test.txt` and `data/cifar10/database.txt`, the list of testing and database images could be processed during predicting procedure. If you want to add other datasets as the input, you need to prepare `train.txt`, `test.txt` and `database.txt` as CIFAR-10 dataset. 67 | 68 | What's more, We have put the whole cifar10 dataset including the images and data list in the [release page](https://github.com/thulab/DeepHash/releases/download/v0.1/cifar10.zip). You can directly download it and unzip to data/cifar10 folder. 69 | 70 | Make sure the tree of `/path/to/project/data/cifar10` looks like this: 71 | 72 | ``` 73 | . 74 | |-- database.txt 75 | |-- test 76 | |-- test.txt 77 | |-- train 78 | `-- train.txt 79 | ``` 80 | 81 | If you need run on NUSWIDE_81 and COCO, we recommend you to follow https://github.com/thuml/HashNet/tree/master/pytorch#datasets to prepare NUSWIDE_81 and COCO images. 82 | 83 | For *DVSQ* model, you also need the *word vector* of the semantic labels. Here we use word2vec model pretrained on GoogleNews Dataset (e.g. https://github.com/mmihaltz/word2vec-GoogleNews-vectors), to extract the word embeddings for the labels of images, e.g. dog, cat and so on. 84 | 85 | ## Get Started 86 | 87 | ### Pre-trained model 88 | 89 | You should manually download the model file of the Imagenet pre-tained AlexNet from [here](https://github.com/thulab/DeepHash/releases/download/v0.1/reference_pretrain.npy.zip) or from release page and unzip it to `/path/to/project/DeepHash/architecture/pretrained_model`. 90 | 91 | Make sure the tree of `/path/to/project/DeepHash/architecture` looks like this: 92 | 93 | ``` 94 | ├── __init__.py 95 | ├── pretrained_model 96 |    └── reference_pretrain.npy 97 | ``` 98 | 99 | ### Training and Testing 100 | 101 | The example of `$method` (DCH and DTQ) can be run like: 102 | 103 | ```shell 104 | cd example/$method/ 105 | python train_val_script.py --gpus "0,1" --data-dir $PWD/../../data --"other parameters descirbe in train_val_script.py" 106 | ``` 107 | 108 | For DVSQ, DQN and DHN, please refer to the `train_val.sh` and `train_val_script.py` in the examples folder. 109 | 110 | ## Citations 111 | If you find *DeepHash* is useful for your research, please consider citing the following papers: 112 | 113 | @InProceedings{cite:AAAI16DQN, 114 | Author = {Yue Cao and Mingsheng Long and Jianmin Wang and Han Zhu and Qingfu Wen}, 115 | Publisher = {AAAI}, 116 | Title = {Deep Quantization Network for Efficient Image Retrieval}, 117 | Year = {2016} 118 | } 119 | 120 | @InProceedings{cite:AAAI16DHN, 121 | Author = {Han Zhu and Mingsheng Long and Jianmin Wang and Yue Cao}, 122 | Publisher = {AAAI}, 123 | Title = {Deep Hashing Network for Efficient Similarity Retrieval}, 124 | Year = {2016} 125 | } 126 | 127 | @InProceedings{cite:CVPR17DVSQ, 128 | Title={Deep visual-semantic quantization for efficient image retrieval}, 129 | Author={Cao, Yue and Long, Mingsheng and Wang, Jianmin and Liu, Shichen}, 130 | Booktitle={CVPR}, 131 | Year={2017} 132 | } 133 | 134 | @InProceedings{cite:CVPR18DCH, 135 | Title={Deep Cauchy Hashing for Hamming Space Retrieval}, 136 | Author={Cao, Yue and Long, Mingsheng and Bin, Liu and Wang, Jianmin}, 137 | Booktitle={CVPR}, 138 | Year={2018} 139 | } 140 | 141 | @article{liu2018deep, 142 | title={Deep triplet quantization}, 143 | author={Liu, Bin and Cao, Yue and Long, Mingsheng and Wang, Jianmin and Wang, Jingdong}, 144 | journal={MM, ACM}, 145 | year={2018} 146 | } 147 | -------------------------------------------------------------------------------- /data_provider/triplet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from distance.npversion import distance 4 | 5 | class Dataset(object): 6 | def __init__(self, dataset, output_dim, code_dim): 7 | self._dataset = dataset 8 | self.n_samples = dataset.n_samples 9 | self._train = dataset.train 10 | self._output = np.zeros((self.n_samples, output_dim), dtype=np.float32) 11 | self._codes = np.zeros((self.n_samples, code_dim), dtype=np.float32) 12 | self._triplets = np.array([]) 13 | self._trip_index_in_epoch = 0 14 | self._index_in_epoch = 0 15 | self._epochs_complete = 0 16 | self._perm = np.arange(self.n_samples) 17 | np.random.shuffle(self._perm) 18 | return 19 | 20 | def update_triplets(self, margin, n_part=10, dist_type='euclidean2', select_strategy='margin'): 21 | """ 22 | :param select_strategy: hard, all, margin 23 | :param dist_type: distance type, e.g. euclidean2, cosine 24 | :param margin: triplet margin parameter 25 | :n_part: number of part to split data 26 | """ 27 | n_samples = self.n_samples 28 | np.random.shuffle(self._perm) 29 | embedding = self._output[self._perm[:n_samples]] 30 | labels = self._dataset.get_labels()[self._perm[:n_samples]] 31 | n_samples_per_part = int(math.ceil(n_samples / n_part)) 32 | triplets = [] 33 | for i in range(n_part): 34 | start = n_samples_per_part * i 35 | end = min(n_samples_per_part * (i+1), n_samples) 36 | dist = distance(embedding[start:end], pair=True, dist_type=dist_type) 37 | for idx_anchor in range(0, end - start): 38 | label_anchor = np.copy(labels[idx_anchor+start, :]) 39 | label_anchor[label_anchor==0] = -1 40 | all_pos = np.where(np.any(labels[start:end] == label_anchor, axis=1))[0] 41 | all_neg = np.array(list(set(range(end-start)) - set(all_pos))) 42 | 43 | if select_strategy == 'hard': 44 | idx_pos = all_pos[np.argmax(dist[idx_anchor, all_pos])] 45 | if idx_pos == idx_anchor: 46 | continue 47 | idx_neg = all_neg[np.argmin(dist[idx_anchor, all_neg])] 48 | triplets.append((idx_anchor + start, idx_pos + start, idx_neg + start)) 49 | continue 50 | 51 | for idx_pos in all_pos: 52 | if idx_pos == idx_anchor: 53 | continue 54 | 55 | if select_strategy == 'all': 56 | selected_neg = all_neg 57 | elif select_strategy == 'margin': 58 | selected_neg = all_neg[np.where(dist[idx_anchor, all_neg] - dist[idx_anchor, idx_pos] < margin)[0]] 59 | 60 | if selected_neg.shape[0] > 0: 61 | idx_neg = np.random.choice(selected_neg) 62 | triplets.append((idx_anchor + start, idx_pos + start, idx_neg + start)) 63 | self._triplets = np.array(triplets) 64 | np.random.shuffle(self._triplets) 65 | 66 | # assert 67 | anchor = labels[self._triplets[:, 0]] 68 | mapper = lambda anchor, other: np.any(anchor * (anchor == other), -1) 69 | assert(np.all(mapper(anchor, labels[self._triplets[:, 1]]))) 70 | assert(np.all(np.invert(anchor, labels[self._triplets[:, 2]]))) 71 | return 72 | 73 | def next_batch_triplet(self, batch_size): 74 | """ 75 | Args: 76 | batch_size 77 | Returns: 78 | data, label, codes 79 | """ 80 | start = self._trip_index_in_epoch 81 | self._trip_index_in_epoch += batch_size 82 | if self._trip_index_in_epoch > self.triplets.shape[0]: 83 | start = 0 84 | self._trip_index_in_epoch = batch_size 85 | end = self._trip_index_in_epoch 86 | 87 | # stack index of anchors, positive, negetive to one array 88 | arr = self.triplets[start:end] 89 | idx = self._perm[np.concatenate([arr[:, 0], arr[:, 1], arr[:, 2]], axis=0)] 90 | data, label = self._dataset.data(idx) 91 | 92 | return data, label, self._codes[idx] 93 | 94 | def next_batch(self, batch_size): 95 | """ 96 | Args: 97 | batch_size 98 | Returns: 99 | [batch_size, (n_inputs)]: next batch images, by stacking anchor, positive, negetive 100 | [batch_size, n_class]: next batch labels 101 | """ 102 | start = self._index_in_epoch 103 | self._index_in_epoch += batch_size 104 | if self._index_in_epoch > self.n_samples: 105 | if self._train: 106 | self._epochs_complete += 1 107 | start = 0 108 | self._index_in_epoch = batch_size 109 | else: 110 | # Validation stage only process once 111 | start = self.n_samples - batch_size 112 | self._index_in_epoch = self.n_samples 113 | end = self._index_in_epoch 114 | 115 | data, label = self._dataset.data(self._perm[start:end]) 116 | return (data, label, self._codes[self._perm[start: end], :]) 117 | 118 | def next_batch_output_codes(self, batch_size): 119 | start = self._index_in_epoch 120 | self._index_in_epoch += batch_size 121 | # Another epoch finish 122 | if self._index_in_epoch > self.n_samples: 123 | if self._train: 124 | # Start next epoch 125 | start = 0 126 | self._index_in_epoch = batch_size 127 | else: 128 | # Validation stage only process once 129 | start = self.n_samples - batch_size 130 | self._index_in_epoch = self.n_samples 131 | end = self._index_in_epoch 132 | 133 | return (self._output[self._perm[start: end], :], 134 | self._codes[self._perm[start: end], :]) 135 | 136 | def feed_batch_output(self, batch_size, output): 137 | start = self._index_in_epoch - batch_size 138 | end = self._index_in_epoch 139 | self._output[self._perm[start:end], :] = output 140 | return 141 | 142 | def feed_batch_triplet_output(self, batch_size, triplet_output): 143 | anchor, pos, neg = np.split(triplet_output, 3, axis=0) 144 | start = self._trip_index_in_epoch - batch_size 145 | end = self._trip_index_in_epoch 146 | idx = self._perm[self._triplets[start:end, :]] 147 | self._output[idx[:, 0]] = anchor 148 | self._output[idx[:, 1]] = pos 149 | self._output[idx[:, 2]] = neg 150 | return 151 | 152 | def feed_batch_codes(self, batch_size, codes): 153 | """ 154 | Args: 155 | batch_size 156 | [batch_size, n_output] 157 | """ 158 | start = self._index_in_epoch - batch_size 159 | end = self._index_in_epoch 160 | self._codes[self._perm[start:end], :] = codes 161 | return 162 | 163 | @property 164 | def output(self): 165 | return self._output 166 | 167 | @property 168 | def codes(self): 169 | return self._codes 170 | 171 | @property 172 | def triplets(self): 173 | return self._triplets 174 | 175 | @property 176 | def label(self): 177 | return self._dataset.get_labels() 178 | 179 | def finish_epoch(self): 180 | self._index_in_epoch = 0 181 | -------------------------------------------------------------------------------- /evaluation/load_and_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io as scio 3 | 4 | def pr_curve(params): 5 | database_code = np.array(params['database_code']) 6 | validation_code = np.array(params['validation_code']) 7 | database_labels = np.array(params['database_labels']) 8 | validation_labels = np.array(params['validation_labels']) 9 | 10 | query_num = validation_code.shape[0] 11 | database_num = database_code.shape[0] 12 | 13 | database_code = np.sign(database_code) 14 | database_code[database_code == -1] = 0 15 | database_code = database_code.astype(int) 16 | 17 | validation_code = np.sign(validation_code) 18 | validation_code[validation_code == -1] = 0 19 | validation_code = validation_code.astype(int) 20 | 21 | database_labels.astype(np.int) 22 | validation_labels.astype(np.int) 23 | 24 | WTrue = np.dot(validation_labels, database_labels.T) 25 | WTrue[WTrue >= 1] = 1 26 | WTrue[WTrue < 1] = 0 27 | print(WTrue.shape) 28 | print(np.max(WTrue)) 29 | print(np.min(WTrue)) 30 | 31 | DHat = np.zeros((query_num, database_num)) 32 | 33 | for i in range(query_num): 34 | query = validation_code[i, :] 35 | query_matrix = np.tile(query, (database_num, 1)) 36 | 37 | distance = np.sum(np.absolute(query_matrix - database_code), axis=1) 38 | DHat[i, :] = distance 39 | print(i) 40 | 41 | print(DHat.shape) 42 | print(np.max(DHat)) 43 | print(np.min(DHat)) 44 | 45 | mat_dic = dict( 46 | WTrue=WTrue, 47 | DHat=DHat 48 | ) 49 | scio.savemat('./data/data.mat', mat_dic) 50 | 51 | 52 | def precision_recall(params): 53 | database_code = np.array(params['database_code']) 54 | validation_code = np.array(params['validation_code']) 55 | database_labels = np.array(params['database_labels']) 56 | validation_labels = np.array(params['validation_labels']) 57 | database_code = np.sign(database_code) 58 | validation_code = np.sign(validation_code) 59 | database_labels.astype(np.int) 60 | validation_labels.astype(np.int) 61 | 62 | sim = np.dot(database_code, validation_code.T) 63 | ids = np.argsort(-sim, axis=0) 64 | ones = np.ones((ids.shape[0], ids.shape[1]), dtype=np.int) 65 | print(np.min(ids)) 66 | ids = ids + ones 67 | print(np.min(ids)) 68 | mat_ids = dict( 69 | ids=ids, 70 | LBase=database_labels, 71 | LTest=validation_labels 72 | ) 73 | scio.savemat('./data/data.mat', mat_ids) 74 | 75 | 76 | def hamming_precision(params): 77 | database_code = np.array(params['database_code']) 78 | validation_code = np.array(params['validation_code']) 79 | database_labels = np.array(params['database_labels']) 80 | validation_labels = np.array(params['validation_labels']) 81 | R = params['R'] 82 | query_num = validation_code.shape[0] 83 | database_num = database_code.shape[0] 84 | 85 | database_code = np.sign(database_code) 86 | database_code[database_code == -1] = 0 87 | database_code = database_code.astype(int) 88 | 89 | validation_code = np.sign(validation_code) 90 | validation_code[validation_code == -1] = 0 91 | validation_code = validation_code.astype(int) 92 | 93 | APx = [] 94 | 95 | for i in range(query_num): 96 | query = validation_code[i, :] 97 | query_matrix = np.tile(query, (database_num, 1)) 98 | 99 | label = validation_labels[i, :] 100 | label[label == 0] = -1 101 | label_matrix = np.tile(label, (database_num, 1)) 102 | 103 | distance = np.sum(np.absolute(query_matrix - database_code), axis=1) 104 | similarity = np.sum(database_labels == label_matrix, axis=1) 105 | similarity[similarity > 1] = 1 106 | 107 | total_rel_num = np.sum(distance <= R) 108 | true_positive = np.sum((distance <= R) * similarity) 109 | 110 | print('--------') 111 | print(i) 112 | print(true_positive) 113 | print(total_rel_num) 114 | print('--------') 115 | if total_rel_num != 0: 116 | APx.append(float(true_positive) / total_rel_num) 117 | else: 118 | APx.append(float(0)) 119 | 120 | print(np.sum(np.array(APx) != 0)) 121 | return np.mean(np.array(APx)) 122 | 123 | 124 | def precision_curve(params): 125 | database_code = np.array(params['database_code']) 126 | validation_code = np.array(params['validation_code']) 127 | database_labels = np.array(params['database_labels']) 128 | validation_labels = np.array(params['validation_labels']) 129 | query_num = validation_code.shape[0] 130 | database_code = np.sign(database_code) 131 | validation_code = np.sign(validation_code) 132 | 133 | sim = np.dot(database_code, validation_code.T) 134 | ids = np.argsort(-sim, axis=0) 135 | arr = [] 136 | 137 | for iter in range(10): 138 | R = (iter + 1) * 100 139 | APx = [] 140 | for i in range(query_num): 141 | label = validation_labels[i, :] 142 | label[label == 0] = -1 143 | idx = ids[:, i] 144 | imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0 145 | relevant_num = np.sum(imatch) 146 | APx.append(float(relevant_num) / R) 147 | arr.append(np.mean(np.array(APx))) 148 | print(arr) 149 | print(arr) 150 | 151 | 152 | def precision(params): 153 | database_code = np.array(params['database_code']) 154 | validation_code = np.array(params['validation_code']) 155 | database_labels = np.array(params['database_labels']) 156 | validation_labels = np.array(params['validation_labels']) 157 | R = params['R'] 158 | query_num = validation_code.shape[0] 159 | database_code = np.sign(database_code) 160 | validation_code = np.sign(validation_code) 161 | 162 | sim = np.dot(database_code, validation_code.T) 163 | ids = np.argsort(-sim, axis=0) 164 | APx = [] 165 | 166 | for i in range(query_num): 167 | label = validation_labels[i, :] 168 | label[label == 0] = -1 169 | idx = ids[:, i] 170 | imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0 171 | relevant_num = np.sum(imatch) 172 | APx.append(float(relevant_num) / R) 173 | 174 | return np.mean(np.array(APx)) 175 | 176 | 177 | def mean_average_precision(params): 178 | database_code = np.array(params['database_code']) 179 | validation_code = np.array(params['validation_code']) 180 | database_labels = np.array(params['database_labels']) 181 | validation_labels = np.array(params['validation_labels']) 182 | R = params['R'] 183 | query_num = validation_code.shape[0] 184 | database_code = np.sign(database_code) 185 | validation_code = np.sign(validation_code) 186 | 187 | sim = np.dot(database_code, validation_code.T) 188 | ids = np.argsort(-sim, axis=0) 189 | APx = [] 190 | 191 | for i in range(query_num): 192 | label = validation_labels[i, :] 193 | label[label == 0] = -1 194 | idx = ids[:, i] 195 | imatch = np.sum(database_labels[idx[0:R], :] == label, axis=1) > 0 196 | relevant_num = np.sum(imatch) 197 | Lx = np.cumsum(imatch) 198 | Px = Lx.astype(float) / np.arange(1, R+1, 1) 199 | if relevant_num != 0: 200 | APx.append(np.sum(Px * imatch) / relevant_num) 201 | 202 | return np.mean(np.array(APx)) 203 | 204 | 205 | def statistic_prob(params): 206 | database_code = np.array(params['database_code']) 207 | validation_code = np.array(params['validation_code']) 208 | sim = np.dot(database_code, validation_code.T) 209 | query_num = validation_code.shape[0] 210 | database_num = database_code.shape[0] 211 | ones = np.ones((database_num, query_num)) 212 | exp_sim = np.exp(sim) 213 | prob = ones / (1 + 1 / exp_sim) 214 | useless = np.sum(prob >= 0.95) + np.sum(prob <= 0.05) 215 | useful = query_num * database_num - useless 216 | print("useful") 217 | print(useful) 218 | print("useless") 219 | print(useless) 220 | 221 | -------------------------------------------------------------------------------- /architecture/alexnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | def img_alexnet_layers(img, batch_size, output_dim, stage, model_weights=None, val_batch_size=32, with_tanh=True): 7 | deep_param_img = {} 8 | train_layers = [] 9 | 10 | if model_weights is None: 11 | dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | model_weights = os.path.join(dir_path, "pretrained_model/alexnet_weights.npy") 13 | # model_weights = os.path.join(dir_path, "pretrained_model/alexnet_weights_trained.npy") 14 | 15 | print("loading img model from ", model_weights) 16 | net_data = dict(np.load(model_weights, encoding='bytes').item()) 17 | print(list(net_data.keys())) 18 | 19 | # swap(2,1,0), bgr -> rgb 20 | reshaped_image = tf.cast(img, tf.float32)[:, :, :, ::-1] 21 | 22 | height = 227 23 | width = 227 24 | 25 | # Randomly crop a [height, width] section of each image 26 | with tf.name_scope('preprocess'): 27 | def train_fn(): 28 | return tf.stack([tf.random_crop(tf.image.random_flip_left_right(each), [height, width, 3]) 29 | for each in tf.unstack(reshaped_image, batch_size)]) 30 | 31 | def val_fn(): 32 | def crop(img, x, y): return tf.image.crop_to_bounding_box( 33 | img, x, y, width, height) 34 | 35 | def distort(f, x, y): return tf.stack( 36 | [crop(f(each), x, y) for each in tf.unstack(reshaped_image, val_batch_size)]) 37 | 38 | def distort_raw(x, y): return distort(lambda x: x, x, y) 39 | 40 | def distort_fliped(x, y): return distort( 41 | tf.image.flip_left_right, x, y) 42 | 43 | distorted = tf.concat([distort_fliped(0, 0), distort_fliped(28, 0), 44 | distort_fliped( 45 | 0, 28), distort_fliped(28, 28), 46 | distort_fliped(14, 14), distort_raw(0, 0), 47 | distort_raw(28, 0), distort_raw(0, 28), 48 | distort_raw(28, 28), distort_raw(14, 14)], 0) 49 | return distorted 50 | 51 | distorted = tf.cond(stage > 0, val_fn, train_fn) 52 | 53 | # Zero-mean input 54 | mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[ 55 | 1, 1, 1, 3], name='img-mean') 56 | distorted = distorted - mean 57 | 58 | # Conv1 59 | # Output 96, kernel 11, stride 4 60 | with tf.name_scope('conv1') as scope: 61 | kernel = tf.Variable(net_data['conv1'][0], name='weights') 62 | conv = tf.nn.conv2d(distorted, kernel, [1, 4, 4, 1], padding='VALID') 63 | biases = tf.Variable(net_data['conv1'][1], name='biases') 64 | out = tf.nn.bias_add(conv, biases) 65 | conv1 = tf.nn.relu(out, name=scope) 66 | deep_param_img['conv1'] = [kernel, biases] 67 | train_layers += [kernel, biases] 68 | 69 | # Pool1 70 | pool1 = tf.nn.max_pool(conv1, 71 | ksize=[1, 3, 3, 1], 72 | strides=[1, 2, 2, 1], 73 | padding='VALID', 74 | name='pool1') 75 | 76 | # LRN1 77 | radius = 2 78 | alpha = 2e-05 79 | beta = 0.75 80 | bias = 1.0 81 | lrn1 = tf.nn.local_response_normalization(pool1, 82 | depth_radius=radius, 83 | alpha=alpha, 84 | beta=beta, 85 | bias=bias) 86 | 87 | # Conv2 88 | # Output 256, pad 2, kernel 5, group 2 89 | with tf.name_scope('conv2') as scope: 90 | kernel = tf.Variable(net_data['conv2'][0], name='weights') 91 | group = 2 92 | 93 | def convolve(i, k): return tf.nn.conv2d( 94 | i, k, [1, 1, 1, 1], padding='SAME') 95 | input_groups = tf.split(lrn1, group, 3) 96 | kernel_groups = tf.split(kernel, group, 3) 97 | output_groups = [convolve(i, k) 98 | for i, k in zip(input_groups, kernel_groups)] 99 | # Concatenate the groups 100 | conv = tf.concat(output_groups, 3) 101 | 102 | biases = tf.Variable(net_data['conv2'][1], name='biases') 103 | out = tf.nn.bias_add(conv, biases) 104 | conv2 = tf.nn.relu(out, name=scope) 105 | deep_param_img['conv2'] = [kernel, biases] 106 | train_layers += [kernel, biases] 107 | 108 | # Pool2 109 | pool2 = tf.nn.max_pool(conv2, 110 | ksize=[1, 3, 3, 1], 111 | strides=[1, 2, 2, 1], 112 | padding='VALID', 113 | name='pool2') 114 | 115 | # LRN2 116 | radius = 2 117 | alpha = 2e-05 118 | beta = 0.75 119 | bias = 1.0 120 | lrn2 = tf.nn.local_response_normalization(pool2, 121 | depth_radius=radius, 122 | alpha=alpha, 123 | beta=beta, 124 | bias=bias) 125 | 126 | # Conv3 127 | # Output 384, pad 1, kernel 3 128 | with tf.name_scope('conv3') as scope: 129 | kernel = tf.Variable(net_data['conv3'][0], name='weights') 130 | conv = tf.nn.conv2d(lrn2, kernel, [1, 1, 1, 1], padding='SAME') 131 | biases = tf.Variable(net_data['conv3'][1], name='biases') 132 | out = tf.nn.bias_add(conv, biases) 133 | conv3 = tf.nn.relu(out, name=scope) 134 | deep_param_img['conv3'] = [kernel, biases] 135 | train_layers += [kernel, biases] 136 | 137 | # Conv4 138 | # Output 384, pad 1, kernel 3, group 2 139 | with tf.name_scope('conv4') as scope: 140 | kernel = tf.Variable(net_data['conv4'][0], name='weights') 141 | group = 2 142 | 143 | def convolve(i, k): return tf.nn.conv2d( 144 | i, k, [1, 1, 1, 1], padding='SAME') 145 | input_groups = tf.split(conv3, group, 3) 146 | kernel_groups = tf.split(kernel, group, 3) 147 | output_groups = [convolve(i, k) 148 | for i, k in zip(input_groups, kernel_groups)] 149 | # Concatenate the groups 150 | conv = tf.concat(output_groups, 3) 151 | biases = tf.Variable(net_data['conv4'][1], name='biases') 152 | out = tf.nn.bias_add(conv, biases) 153 | conv4 = tf.nn.relu(out, name=scope) 154 | deep_param_img['conv4'] = [kernel, biases] 155 | train_layers += [kernel, biases] 156 | 157 | # Conv5 158 | # Output 256, pad 1, kernel 3, group 2 159 | with tf.name_scope('conv5') as scope: 160 | kernel = tf.Variable(net_data['conv5'][0], name='weights') 161 | group = 2 162 | 163 | def convolve(i, k): return tf.nn.conv2d( 164 | i, k, [1, 1, 1, 1], padding='SAME') 165 | input_groups = tf.split(conv4, group, 3) 166 | kernel_groups = tf.split(kernel, group, 3) 167 | output_groups = [convolve(i, k) 168 | for i, k in zip(input_groups, kernel_groups)] 169 | # Concatenate the groups 170 | conv = tf.concat(output_groups, 3) 171 | biases = tf.Variable(net_data['conv5'][1], name='biases') 172 | out = tf.nn.bias_add(conv, biases) 173 | conv5 = tf.nn.relu(out, name=scope) 174 | deep_param_img['conv5'] = [kernel, biases] 175 | train_layers += [kernel, biases] 176 | 177 | # Pool5 178 | pool5 = tf.nn.max_pool(conv5, 179 | ksize=[1, 3, 3, 1], 180 | strides=[1, 2, 2, 1], 181 | padding='VALID', 182 | name='pool5') 183 | 184 | # FC6 185 | # Output 4096 186 | with tf.name_scope('fc6'): 187 | shape = int(np.prod(pool5.get_shape()[1:])) 188 | fc6w = tf.Variable(net_data['fc6'][0], name='weights') 189 | fc6b = tf.Variable(net_data['fc6'][1], name='biases') 190 | pool5_flat = tf.reshape(pool5, [-1, shape]) 191 | fc6l = tf.nn.bias_add(tf.matmul(pool5_flat, fc6w), fc6b) 192 | fc6 = tf.nn.relu(fc6l) 193 | fc6 = tf.cond(stage > 0, lambda: fc6, lambda: tf.nn.dropout(fc6, 0.5)) 194 | fc6o = tf.nn.relu(fc6l) 195 | deep_param_img['fc6'] = [fc6w, fc6b] 196 | train_layers += [fc6w, fc6b] 197 | 198 | # FC7 199 | # Output 4096 200 | with tf.name_scope('fc7'): 201 | fc7w = tf.Variable(net_data['fc7'][0], name='weights') 202 | fc7b = tf.Variable(net_data['fc7'][1], name='biases') 203 | fc7l = tf.nn.bias_add(tf.matmul(fc6, fc7w), fc7b) 204 | fc7 = tf.nn.relu(fc7l) 205 | fc7 = tf.cond(stage > 0, lambda: fc7, lambda: tf.nn.dropout(fc7, 0.5)) 206 | deep_param_img['fc7'] = [fc7w, fc7b] 207 | train_layers += [fc7w, fc7b] 208 | 209 | # FC8 210 | # Output output_dim 211 | with tf.name_scope('fc8'): 212 | # Differ train and val stage by 'fc8' as key 213 | if 'fc8' in net_data: 214 | fc8w = tf.Variable(net_data['fc8'][0], name='weights') 215 | fc8b = tf.Variable(net_data['fc8'][1], name='biases') 216 | else: 217 | fc8w = tf.Variable(tf.random_normal([4096, output_dim], 218 | dtype=tf.float32, 219 | stddev=1e-2), name='weights') 220 | fc8b = tf.Variable(tf.constant(0.0, shape=[output_dim], 221 | dtype=tf.float32), name='biases') 222 | fc8l = tf.nn.bias_add(tf.matmul(fc7, fc8w), fc8b) 223 | if with_tanh: 224 | fc8_t = tf.nn.tanh(fc8l) 225 | else: 226 | fc8_t = fc8l 227 | 228 | def val_fn1(): 229 | concated = tf.concat([tf.expand_dims(i, 0) 230 | for i in tf.split(fc8_t, 10, 0)], 0) 231 | return tf.reduce_mean(concated, 0) 232 | fc8 = tf.cond(stage > 0, val_fn1, lambda: fc8_t) 233 | deep_param_img['fc8'] = [fc8w, fc8b] 234 | train_layers += [fc8w, fc8b] 235 | 236 | print("img model loading finished") 237 | # Return outputs 238 | return fc8, deep_param_img, train_layers -------------------------------------------------------------------------------- /model/dch.py: -------------------------------------------------------------------------------- 1 | ################################################################################# 2 | # Deep Cauchy Hashing for Hamming Space Retrieval # 3 | # Authors: Yue Cao, Mingsheng Long, Bin Liu, Jianmin Wang # 4 | # Contact: caoyue10@gmail.com # 5 | ################################################################################## 6 | 7 | import os 8 | import shutil 9 | import time 10 | from datetime import datetime 11 | from math import ceil 12 | 13 | import numpy as np 14 | import tensorflow as tf 15 | 16 | import util.plot as plot 17 | from architecture import img_alexnet_layers 18 | from evaluation import MAPs 19 | from data_provider.pairwise import Dataset 20 | from loss import * 21 | 22 | 23 | class DCH(object): 24 | def __init__(self, config): 25 | ### Initialize setting 26 | print ("initializing") 27 | np.set_printoptions(precision=4) 28 | 29 | with tf.name_scope('stage'): 30 | # 0 for training, 1 for validation 31 | self.stage = tf.placeholder_with_default(tf.constant(0), []) 32 | for k, v in vars(config).items(): 33 | setattr(self, k, v) 34 | self.file_name = 'lr_{}_cqlambda_{}_alpha_{}_bias_{}_gamma_{}_dataset_{}'.format( 35 | self.lr, 36 | self.q_lambda, 37 | self.alpha, 38 | self.bias, 39 | self.gamma, 40 | self.dataset) 41 | self.model_file = os.path.join(self.save_dir, self.file_name + '.npy') 42 | 43 | ### Setup session 44 | print ("launching session") 45 | configProto = tf.ConfigProto() 46 | configProto.gpu_options.allow_growth = True 47 | configProto.allow_soft_placement = True 48 | self.sess = tf.Session(config=configProto) 49 | 50 | ### Create variables and placeholders 51 | self.img = tf.placeholder(tf.float32, [None, 256, 256, 3]) 52 | self.img_label = tf.placeholder(tf.float32, [None, self.label_dim]) 53 | self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model() 54 | 55 | self.global_step = tf.Variable(0, trainable=False) 56 | self.train_op = self.apply_loss_function(self.global_step) 57 | self.sess.run(tf.global_variables_initializer()) 58 | return 59 | 60 | def load_model(self): 61 | if self.img_model == 'alexnet': 62 | img_output = img_alexnet_layers( 63 | self.img, 64 | self.batch_size, 65 | self.output_dim, 66 | self.stage, 67 | self.model_weights, 68 | self.with_tanh, 69 | self.val_batch_size) 70 | else: 71 | raise Exception('cannot use such CNN model as ' + self.img_model) 72 | return img_output 73 | 74 | def save_model(self, model_file=None): 75 | if model_file is None: 76 | model_file = self.model_file 77 | model = {} 78 | for layer in self.deep_param_img: 79 | model[layer] = self.sess.run(self.deep_param_img[layer]) 80 | print("saving model to %s" % model_file) 81 | if os.path.exists(self.save_dir) is False: 82 | os.makedirs(self.save_dir) 83 | 84 | np.save(model_file, np.array(model)) 85 | return 86 | 87 | def apply_loss_function(self, global_step): 88 | # loss function 89 | self.cos_loss = cauchy_cross_entropy_loss(self.img_last_layer, self.img_label, gamma=self.gamma, normed=True) 90 | self.q_loss = self.q_lambda * quantization_loss(self.img_last_layer) 91 | self.loss = self.cos_loss + self.q_loss 92 | 93 | ### Last layer has a 10 times learning rate 94 | lr = tf.train.exponential_decay(self.lr, global_step, self.decay_step, self.decay_factor, staircase=True) 95 | opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) 96 | grads_and_vars = opt.compute_gradients(self.loss, self.train_layers+self.train_last_layer) 97 | fcgrad, _ = grads_and_vars[-2] 98 | fbgrad, _ = grads_and_vars[-1] 99 | 100 | self.grads_and_vars = grads_and_vars 101 | tf.summary.scalar('loss', self.loss) 102 | tf.summary.scalar('cos_loss', self.cos_loss) 103 | tf.summary.scalar('q_loss', self.q_loss) 104 | tf.summary.scalar('lr', lr) 105 | self.merged = tf.summary.merge_all() 106 | 107 | if self.finetune_all: 108 | return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]), 109 | (grads_and_vars[1][0]*2, self.train_layers[1]), 110 | (grads_and_vars[2][0], self.train_layers[2]), 111 | (grads_and_vars[3][0]*2, self.train_layers[3]), 112 | (grads_and_vars[4][0], self.train_layers[4]), 113 | (grads_and_vars[5][0]*2, self.train_layers[5]), 114 | (grads_and_vars[6][0], self.train_layers[6]), 115 | (grads_and_vars[7][0]*2, self.train_layers[7]), 116 | (grads_and_vars[8][0], self.train_layers[8]), 117 | (grads_and_vars[9][0]*2, self.train_layers[9]), 118 | (grads_and_vars[10][0], self.train_layers[10]), 119 | (grads_and_vars[11][0]*2, self.train_layers[11]), 120 | (grads_and_vars[12][0], self.train_layers[12]), 121 | (grads_and_vars[13][0]*2, self.train_layers[13]), 122 | (fcgrad*10, self.train_last_layer[0]), 123 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 124 | else: 125 | return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]), 126 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 127 | 128 | def train(self, img_dataset): 129 | print("%s #train# start training" % datetime.now()) 130 | 131 | ### tensorboard 132 | tflog_path = os.path.join(self.log_dir, self.file_name) 133 | if os.path.exists(tflog_path): 134 | shutil.rmtree(tflog_path) 135 | train_writer = tf.summary.FileWriter(tflog_path, self.sess.graph) 136 | 137 | for train_iter in range(self.iter_num): 138 | images, labels = img_dataset.next_batch(self.batch_size) 139 | start_time = time.time() 140 | 141 | _, loss, cos_loss, output, summary = self.sess.run([self.train_op, self.loss, self.cos_loss, self.img_last_layer, self.merged], 142 | feed_dict={self.img: images, 143 | self.img_label: labels}) 144 | 145 | train_writer.add_summary(summary, train_iter) 146 | 147 | img_dataset.feed_batch_output(self.batch_size, output) 148 | duration = time.time() - start_time 149 | 150 | if train_iter % 100 == 0: 151 | print("%s #train# step %4d, loss = %.4f, cross_entropy loss = %.4f, %.1f sec/batch" 152 | %(datetime.now(), train_iter+1, loss, cos_loss, duration)) 153 | 154 | print("%s #traing# finish training" % datetime.now()) 155 | self.save_model() 156 | print ("model saved") 157 | 158 | self.sess.close() 159 | 160 | def validation(self, img_query, img_database, R=100): 161 | print("%s #validation# start validation" % (datetime.now())) 162 | query_batch = int(ceil(img_query.n_samples / float(self.val_batch_size))) 163 | img_query.finish_epoch() 164 | print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch)) 165 | for i in range(query_batch): 166 | images, labels = img_query.next_batch(self.val_batch_size) 167 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 168 | feed_dict={self.img: images, 169 | self.img_label: labels, 170 | self.stage: 1}) 171 | img_query.feed_batch_output(self.val_batch_size, output) 172 | print('Cosine Loss: %s'%loss) 173 | 174 | database_batch = int(ceil(img_database.n_samples / float(self.val_batch_size))) 175 | img_database.finish_epoch() 176 | print("%s #validation# totally %d database in %d batches" % (datetime.now(), img_database.n_samples, database_batch)) 177 | for i in range(database_batch): 178 | images, labels = img_database.next_batch(self.val_batch_size) 179 | 180 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 181 | feed_dict={self.img: images, 182 | self.img_label: labels, 183 | self.stage: 1}) 184 | img_database.feed_batch_output(self.val_batch_size, output) 185 | if i % 100 == 0: 186 | print('Cosine Loss[%d/%d]: %s'%(i, database_batch, loss)) 187 | 188 | mAPs = MAPs(R) 189 | 190 | self.sess.close() 191 | prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius_All(img_database, img_query) 192 | # for i in range(self.output_dim+1): 193 | # #prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, i) 194 | # plot.plot('prec', prec[i]) 195 | # plot.plot('rec', rec[i]) 196 | # plot.plot('mAP', mmap[i]) 197 | # plot.tick() 198 | # print('Results ham dist [%d], prec:%s, rec:%s, mAP:%s'%(i, prec[i], rec[i], mmap[i])) 199 | 200 | result_save_dir = os.path.join(self.save_dir, self.file_name) 201 | if os.path.exists(result_save_dir) is False: 202 | os.makedirs(result_save_dir) 203 | # plot.flush(result_save_dir) 204 | 205 | prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2) 206 | return { 207 | 'i2i_by_feature': mAPs.get_mAPs_by_feature(img_database, img_query), 208 | 'i2i_after_sign': mAPs.get_mAPs_after_sign(img_database, img_query), 209 | 'i2i_map_radius_2': mmap, 210 | 'i2i_prec_radius_2': prec, 211 | 'i2i_recall_radius_2': rec 212 | } 213 | 214 | 215 | def train(train_img, config): 216 | model = DCH(config) 217 | img_dataset = Dataset(train_img, config.output_dim) 218 | model.train(img_dataset) 219 | return model.model_file 220 | 221 | 222 | def validation(database_img, query_img, config): 223 | model = DCH(config) 224 | img_database = Dataset(database_img, config.output_dim) 225 | img_query = Dataset(query_img, config.output_dim) 226 | return model.validation(img_query, img_database, config.R) 227 | -------------------------------------------------------------------------------- /model/dhn.py: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # Deep Hashing Network for Efficient Similarity Retrieval # 3 | # Authors: Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao # 4 | # Contact: caoyue10@gmail.com # 5 | ################################################################################## 6 | 7 | import os 8 | import shutil 9 | import time 10 | from datetime import datetime 11 | from math import ceil 12 | 13 | import numpy as np 14 | import tensorflow as tf 15 | 16 | from architecture import img_alexnet_layers 17 | from evaluation import MAPs 18 | from loss import cross_entropy_loss, quantization_loss 19 | from data_provider.pairwise import Dataset 20 | 21 | 22 | class DHN(object): 23 | def __init__(self, config): 24 | # Initialize setting 25 | print("initializing") 26 | np.set_printoptions(precision=4) 27 | self.stage = tf.placeholder_with_default(tf.constant(0), []) 28 | self.device = '/gpu:' + config.gpu_id 29 | self.output_dim = config.output_dim 30 | self.n_class = config.label_dim 31 | self.cq_lambda = config.cq_lambda 32 | self.alpha = config.alpha 33 | 34 | self.batch_size = config.batch_size 35 | self.val_batch_size = config.val_batch_size 36 | self.max_iter = config.max_iter 37 | self.network = config.network 38 | self.learning_rate = config.learning_rate 39 | self.learning_rate_decay_factor = config.learning_rate_decay_factor 40 | self.decay_step = config.decay_step 41 | 42 | self.finetune_all = config.finetune_all 43 | 44 | self.model_file = os.path.join(config.save_dir, 'network_weights.npy') 45 | self.codes_file = os.path.join(config.save_dir, 'codes.npy') 46 | self.tflog_path = os.path.join(config.save_dir, 'tflog') 47 | 48 | # Setup session 49 | print("launching session") 50 | configProto = tf.ConfigProto() 51 | configProto.gpu_options.allow_growth = True 52 | configProto.allow_soft_placement = True 53 | self.sess = tf.Session(config=configProto) 54 | 55 | # Create variables and placeholders 56 | 57 | with tf.device(self.device): 58 | self.img = tf.placeholder(tf.float32, [None, 256, 256, 3]) 59 | self.img_label = tf.placeholder(tf.float32, [None, self.n_class]) 60 | 61 | self.network_weights = config.network_weights 62 | self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model() 63 | 64 | self.global_step = tf.Variable(0, trainable=False) 65 | self.train_op = self.apply_loss_function(self.global_step) 66 | self.sess.run(tf.global_variables_initializer()) 67 | 68 | if config.debug == True: 69 | from tensorflow.python import debug as tf_debug 70 | self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) 71 | 72 | def load_model(self): 73 | if self.network == 'alexnet': 74 | img_output = img_alexnet_layers( 75 | self.img, self.batch_size, self.output_dim, 76 | self.stage, self.network_weights, val_batch_size=self.val_batch_size) 77 | else: 78 | raise Exception('cannot use such CNN model as ' + self.network) 79 | return img_output 80 | 81 | def save_model(self, model_file=None): 82 | if model_file is None: 83 | model_file = self.model_file 84 | model = {} 85 | for layer in self.deep_param_img: 86 | model[layer] = self.sess.run(self.deep_param_img[layer]) 87 | print("saving model to %s" % model_file) 88 | folder = os.path.dirname(model_file) 89 | if os.path.exists(folder) is False: 90 | os.makedirs(folder) 91 | np.save(model_file, np.array(model)) 92 | return 93 | 94 | def load_codes(self, codes_file=None): 95 | if codes_file is None: 96 | codes_file = self.codes_file 97 | codes = np.load(codes_file).item() 98 | 99 | import collections 100 | mDataset = collections.namedtuple('Dataset', ['output', 'label']) 101 | database = mDataset(codes['db_features'], codes['db_label']) 102 | query = mDataset(codes['query_features'], codes['query_label']) 103 | return database, query 104 | 105 | def save_codes(self, database, query, codes_file=None): 106 | if codes_file is None: 107 | codes_file = self.codes_file 108 | codes = { 109 | 'db_features': database.output, 110 | 'db_label': database.label, 111 | 'query_features': query.output, 112 | 'query_label': query.label, 113 | } 114 | print("saving codes to %s" % codes_file) 115 | np.save(codes_file, np.array(codes)) 116 | 117 | def apply_loss_function(self, global_step): 118 | # loss function 119 | self.cos_loss = cross_entropy_loss(self.img_last_layer, self.img_label, self.alpha, normed=True, balanced=True) 120 | self.q_loss = self.cq_lambda * quantization_loss(self.img_last_layer) 121 | self.loss = self.cos_loss + self.q_loss 122 | 123 | # Last layer has a 10 times learning rate 124 | self.lr = tf.train.exponential_decay( 125 | self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True) 126 | opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9) 127 | grads_and_vars = opt.compute_gradients( 128 | self.loss, self.train_layers + self.train_last_layer) 129 | fcgrad, _ = grads_and_vars[-2] 130 | fbgrad, _ = grads_and_vars[-1] 131 | 132 | # for debug 133 | self.grads_and_vars = grads_and_vars 134 | tf.summary.scalar('loss', self.loss) 135 | tf.summary.scalar('ce_loss', self.cos_loss) 136 | tf.summary.scalar('q_loss', self.q_loss) 137 | tf.summary.scalar('lr', self.lr) 138 | self.merged = tf.summary.merge_all() 139 | 140 | if self.finetune_all: 141 | return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]), 142 | (grads_and_vars[1][0]*2, self.train_layers[1]), 143 | (grads_and_vars[2][0], self.train_layers[2]), 144 | (grads_and_vars[3][0]*2, self.train_layers[3]), 145 | (grads_and_vars[4][0], self.train_layers[4]), 146 | (grads_and_vars[5][0]*2, self.train_layers[5]), 147 | (grads_and_vars[6][0], self.train_layers[6]), 148 | (grads_and_vars[7][0]*2, self.train_layers[7]), 149 | (grads_and_vars[8][0], self.train_layers[8]), 150 | (grads_and_vars[9][0]*2, self.train_layers[9]), 151 | (grads_and_vars[10][0], self.train_layers[10]), 152 | (grads_and_vars[11][0]*2, self.train_layers[11]), 153 | (grads_and_vars[12][0], self.train_layers[12]), 154 | (grads_and_vars[13][0]*2, self.train_layers[13]), 155 | (fcgrad*10, self.train_last_layer[0]), 156 | (fbgrad*20, self.train_last_layer[1])], 157 | global_step=global_step) 158 | else: 159 | return opt.apply_gradients([(fcgrad * 10, self.train_last_layer[0]), 160 | (fbgrad * 20, self.train_last_layer[1])], global_step=global_step) 161 | 162 | def train(self, img_dataset): 163 | print("%s #train# start training" % datetime.now()) 164 | 165 | # tensorboard 166 | if os.path.exists(self.tflog_path): 167 | shutil.rmtree(self.tflog_path) 168 | train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph) 169 | 170 | for train_iter in range(self.max_iter): 171 | images, labels = img_dataset.next_batch(self.batch_size) 172 | start_time = time.time() 173 | 174 | _, loss, cos_loss, q_loss, output, summary = self.sess.run( 175 | [self.train_op, self.loss, self.cos_loss, self.q_loss, self.img_last_layer, self.merged], 176 | feed_dict={self.img: images, 177 | self.img_label: labels}) 178 | 179 | img_dataset.feed_batch_output(self.batch_size, output) 180 | duration = time.time() - start_time 181 | 182 | if train_iter % 1 == 0: 183 | train_writer.add_summary(summary, train_iter) 184 | print("%s #train# step %4d, loss = %.4f, cross_entropy loss = %.4f, quantization loss = %.4f, %.1f sec/batch" 185 | % (datetime.now(), train_iter + 1, loss, cos_loss, q_loss, duration)) 186 | 187 | print("%s #traing# finish training" % datetime.now()) 188 | self.save_model() 189 | print("model saved") 190 | 191 | self.sess.close() 192 | 193 | def validation(self, img_query, img_database, R=100): 194 | if os.path.exists(self.codes_file): 195 | print("loading ", self.codes_file) 196 | img_database, img_query = self.load_codes(self.codes_file) 197 | else: 198 | print("%s #validation# start validation" % (datetime.now())) 199 | query_batch = int(ceil(img_query.n_samples / self.val_batch_size)) 200 | print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch)) 201 | for i in range(query_batch): 202 | images, labels = img_query.next_batch(self.val_batch_size) 203 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 204 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 205 | img_query.feed_batch_output(self.val_batch_size, output) 206 | print('Cosine Loss: %s' % loss) 207 | 208 | database_batch = int(ceil(img_database.n_samples / self.val_batch_size)) 209 | print("%s #validation# totally %d database in %d batches" % 210 | (datetime.now(), img_database.n_samples, database_batch)) 211 | for i in range(database_batch): 212 | images, labels = img_database.next_batch(self.val_batch_size) 213 | 214 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 215 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 216 | img_database.feed_batch_output(self.val_batch_size, output) 217 | # print output[:10, :10] 218 | if i % 100 == 0: 219 | print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss)) 220 | # save features and codes 221 | self.save_codes(img_database, img_query) 222 | 223 | mAPs = MAPs(R) 224 | 225 | self.sess.close() 226 | prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2) 227 | return { 228 | 'i2i_by_feature': mAPs.get_mAPs_by_feature(img_database, img_query), 229 | 'i2i_after_sign': mAPs.get_mAPs_after_sign(img_database, img_query), 230 | 'i2i_map_radius_2': mmap, 231 | 'i2i_prec_radius_2': prec, 232 | 'i2i_recall_radius_2': rec 233 | } 234 | 235 | 236 | def train(train_img, config): 237 | model = DHN(config) 238 | img_dataset = Dataset(train_img, config.output_dim) 239 | model.train(img_dataset) 240 | return model.model_file 241 | 242 | 243 | def validation(database_img, query_img, config): 244 | model = DHN(config) 245 | img_database = Dataset(database_img, config.output_dim) 246 | img_query = Dataset(query_img, config.output_dim) 247 | return model.validation(img_query, img_database, config.R) 248 | -------------------------------------------------------------------------------- /model/dhcs.py: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | # Deep Hashing Network for Efficient Similarity Retrieval # 3 | # Authors: Han Zhu, Mingsheng Long, Jianmin Wang, Yue Cao # 4 | # Contact: caoyue10@gmail.com # 5 | ################################################################################## 6 | 7 | import os 8 | import shutil 9 | import time 10 | from datetime import datetime 11 | from math import ceil 12 | 13 | import numpy as np 14 | import tensorflow as tf 15 | 16 | from architecture import * 17 | from loss import * 18 | from util import * 19 | from evaluation import * 20 | from data_provider.pairwise import Dataset 21 | 22 | 23 | class DHCS(object): 24 | def __init__(self, config): 25 | # Initialize setting 26 | print("initializing") 27 | np.set_printoptions(precision=4) 28 | self.stage = tf.placeholder_with_default(tf.constant(0), []) 29 | self.device = '/gpu:' + config.gpus 30 | self.bit = config.bit 31 | self.n_class = config.label_dim 32 | self.q_lambda = config.q_lambda 33 | self.b_lambda = config.b_lambda 34 | self.i_lambda = config.i_lambda 35 | self.alpha = config.alpha 36 | self.wordvec_dict = config.wordvec_dict 37 | 38 | self.batch_size = config.batch_size 39 | self.val_batch_size = config.val_batch_size 40 | self.max_iter = config.max_iter 41 | self.network = config.network 42 | self.learning_rate = config.lr 43 | self.lr_decay_factor = config.lr_decay_factor 44 | self.decay_step = config.decay_step 45 | self.finetune_all = config.finetune_all 46 | 47 | self.save_dir = config.save_dir 48 | self.model_file = os.path.join(self.save_dir, 'network_weights.npy') 49 | self.codes_file = os.path.join(self.save_dir, 'codes.npy') 50 | self.tflog_path = os.path.join(self.save_dir, 'tflog') 51 | 52 | # Setup session 53 | print("launching session") 54 | configProto = tf.ConfigProto() 55 | configProto.gpu_options.allow_growth = True 56 | configProto.allow_soft_placement = True 57 | self.sess = tf.Session(config=configProto) 58 | 59 | # Create variables and placeholders 60 | 61 | with tf.device(self.device): 62 | self.img = tf.placeholder(tf.float32, [None, 256, 256, 3]) 63 | self.img_label = tf.placeholder(tf.float32, [None, self.n_class]) 64 | try: 65 | self.wordvec = tf.constant(np.loadtxt(self.wordvec_dict), dtype=tf.float32) 66 | except: 67 | print(f'{self.wordvec_dict} does not exist!') 68 | self.wordvec = None 69 | 70 | self.network_weights = config.network_weights 71 | self.img_last_layer, self.deep_param_img, self.train_layers = self.load_model() 72 | 73 | self.global_step = tf.Variable(0, trainable=False) 74 | self.train_op = self.apply_loss_function(self.global_step) 75 | self.sess.run(tf.global_variables_initializer()) 76 | 77 | if config.debug == True: 78 | from tensorflow.python import debug as tf_debug 79 | self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) 80 | 81 | 82 | def load_model(self): 83 | networks = {'alexnet': img_alexnet_layers, 'vgg16': img_vgg16_layers} 84 | try: 85 | img_output = networks[self.network]( 86 | self.img, self.batch_size, self.bit, 87 | self.stage, self.network_weights, self.val_batch_size) 88 | except: 89 | raise Exception('cannot use such CNN model as ' + self.network) 90 | return img_output 91 | 92 | 93 | def save_model(self, model_file=None): 94 | if model_file is None: 95 | model_file = self.model_file 96 | model = {} 97 | for layer in self.deep_param_img: 98 | model[layer] = self.sess.run(self.deep_param_img[layer]) 99 | print("saving model to %s" % model_file) 100 | folder = os.path.dirname(model_file) 101 | if os.path.exists(folder) is False: 102 | os.makedirs(folder) 103 | np.save(model_file, np.array(model)) 104 | return 105 | 106 | 107 | def load_codes(self, codes_file=None): 108 | if codes_file is None: 109 | codes_file = self.codes_file 110 | codes = np.load(codes_file).item() 111 | 112 | import collections 113 | mDataset = collections.namedtuple('Dataset', ['output', 'label']) 114 | database = mDataset(codes['db_features'], codes['db_label']) 115 | query = mDataset(codes['query_features'], codes['query_label']) 116 | return database, query 117 | 118 | 119 | def save_codes(self, database, query, codes_file=None): 120 | if codes_file is None: 121 | codes_file = self.codes_file 122 | codes = { 123 | 'db_features': database.output, 124 | 'db_label': database.label, 125 | 'query_features': query.output, 126 | 'query_label': query.label, 127 | } 128 | print("saving codes to %s" % codes_file) 129 | np.save(codes_file, np.array(codes)) 130 | 131 | 132 | def apply_loss_function(self, global_step): 133 | # loss function 134 | self.S_loss = exp_loss(self.img_last_layer, self.img_label, self.alpha, self.wordvec) 135 | self.q_loss = quantization_loss(self.img_last_layer, q_type='L2') 136 | self.b_loss = balance_loss(self.img_last_layer) 137 | self.i_loss = independence_loss(self.img_last_layer) 138 | self.loss = self.S_loss + self.q_lambda * self.q_loss + \ 139 | self.b_lambda * self.b_loss + \ 140 | self.i_lambda * self.i_loss 141 | 142 | # for debug 143 | tf.summary.scalar('loss', self.loss) 144 | tf.summary.scalar('similar_loss', self.S_loss) 145 | tf.summary.scalar('quantization_loss', self.q_loss) 146 | tf.summary.scalar('balance_loss', self.b_loss) 147 | tf.summary.scalar('independence_loss', self.i_loss) 148 | self.merged = tf.summary.merge_all() 149 | 150 | # Last layer has a 10 times learning rate 151 | lr = tf.train.exponential_decay( 152 | self.learning_rate, global_step, self.decay_step, self.lr_decay_factor, staircase=True) 153 | opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) 154 | grads_and_vars = opt.compute_gradients(self.loss, self.train_layers) 155 | 156 | capped_grads_and_vars = [] 157 | if self.finetune_all: 158 | for i, grad in enumerate(grads_and_vars[:-2]): 159 | if i % 2 == 0: 160 | capped_grads_and_vars.append((grad[0], grad[1])) 161 | else: 162 | capped_grads_and_vars.append((grad[0]*2, grad[1])) 163 | capped_grads_and_vars.append((grads_and_vars[-2][0]*10, grads_and_vars[-2][1])) 164 | capped_grads_and_vars.append((grads_and_vars[-1][0]*20, grads_and_vars[-1][1])) 165 | 166 | return opt.apply_gradients(capped_grads_and_vars, global_step=global_step) 167 | 168 | 169 | def train(self, img_dataset): 170 | print("%s #train# start training" % datetime.now()) 171 | 172 | # tensorboard 173 | if os.path.exists(self.tflog_path): 174 | shutil.rmtree(self.tflog_path) 175 | train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph) 176 | 177 | for train_iter in range(self.max_iter): 178 | images, labels = img_dataset.next_batch(self.batch_size) 179 | 180 | start_time = time.time() 181 | 182 | _, loss, S_loss, q_loss, output, summary = self.sess.run( 183 | [self.train_op, self.loss, self.S_loss, self.q_loss, self.img_last_layer, self.merged], 184 | feed_dict={self.img: images, 185 | self.img_label: labels}) 186 | 187 | img_dataset.feed_batch_output(self.batch_size, output) 188 | duration = time.time() - start_time 189 | 190 | train_writer.add_summary(summary, train_iter) 191 | if train_iter % 100 == 0: 192 | print("%s #train# step %4d, loss = %.4f, similar loss = %.4f, quantization loss = %.4f, %.1f sec/batch" 193 | % (datetime.now(), train_iter + 1, loss, S_loss, q_loss, duration)) 194 | 195 | print("%s #traing# finish training" % datetime.now()) 196 | self.save_model() 197 | print("model saved") 198 | 199 | self.sess.close() 200 | 201 | 202 | def validation(self, img_database, img_query, R=100): 203 | if os.path.exists(self.codes_file): 204 | print("loading ", self.codes_file) 205 | img_database, img_query = self.load_codes(self.codes_file) 206 | else: 207 | print("%s #validation# start validation" % (datetime.now())) 208 | query_batch = int(ceil(img_query.n_samples / self.val_batch_size)) 209 | print("%s #validation# totally %d query in %d batches" % (datetime.now(), img_query.n_samples, query_batch)) 210 | for i in range(query_batch): 211 | images, labels = img_query.next_batch(self.val_batch_size) 212 | output, loss = self.sess.run([self.img_last_layer, self.S_loss], 213 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 214 | img_query.feed_batch_output(self.val_batch_size, output) 215 | print('Cosine Loss: %s' % loss) 216 | 217 | database_batch = int(ceil(img_database.n_samples / self.val_batch_size)) 218 | print("%s #validation# totally %d database in %d batches" % 219 | (datetime.now(), img_database.n_samples, database_batch)) 220 | for i in range(database_batch): 221 | images, labels = img_database.next_batch(self.val_batch_size) 222 | 223 | output, loss = self.sess.run([self.img_last_layer, self.S_loss], 224 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 225 | img_database.feed_batch_output(self.val_batch_size, output) 226 | # print output[:10, :10] 227 | if i % 100 == 0: 228 | print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss)) 229 | # save features and codes 230 | self.save_codes(img_database, img_query) 231 | 232 | self.sess.close() 233 | 234 | db_feats = img_database.output 235 | db_codes = sign(img_database.output) 236 | db_labels = img_database.label 237 | q_feats = img_query.output 238 | q_codes = sign(img_query.output) 239 | q_labels = img_query.label 240 | 241 | print("visualizing data ...") 242 | plot_tsne(np.row_stack((db_codes, q_codes)), np.row_stack((db_labels, q_labels)), self.save_dir) 243 | plot_distance(db_feats, db_labels, q_feats, q_labels, self.save_dir) 244 | print(plot_distribution(db_feats, self.save_dir)) 245 | 246 | print("calculating metrics ...") 247 | mAPs = MAPs(R) 248 | prec, rec, mmap = mAPs.get_precision_recall_by_Hamming_Radius(img_database, img_query, 2) 249 | return { 250 | 'mAP_sign': mAPs.get_mAPs_after_sign(img_database, img_query), 251 | 'mAP_WhRank': get_whrank_mAP(q_feats, q_codes, q_labels, db_feats, db_codes, db_labels, Rs=R), 252 | 'mAP_finetune': get_finetune_mAP(q_feats, q_codes, q_labels, db_feats, db_codes, db_labels, Rs=R), 253 | 'mAP_feat': mAPs.get_mAPs_by_feature(img_database, img_query), 254 | 'RAMAP': get_RAMAP(q_codes, q_labels, db_codes, db_labels), 255 | 'mAP_radius2': mmap, 256 | 'prec_radius2': prec, 257 | 'recall_radius2': rec 258 | } 259 | 260 | 261 | def train(train_img, config): 262 | model = DHCS(config) 263 | img_dataset = Dataset(train_img, config.bit) 264 | model.train(img_dataset) 265 | return model.model_file 266 | 267 | 268 | def validation(database_img, query_img, config): 269 | model = DHCS(config) 270 | img_database = Dataset(database_img, config.bit) 271 | img_query = Dataset(query_img, config.bit) 272 | return model.validation(img_database, img_query, config.R) 273 | -------------------------------------------------------------------------------- /architecture/vgg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | 6 | def img_vgg16_layers(img, batch_size, output_dim, stage, model_weights=None, val_batch_size=32, with_tanh=True): 7 | deep_param_img = {} 8 | train_layers = [] 9 | 10 | if model_weights is None: 11 | dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | model_weights = os.path.join(dir_path, "pretrained_model/vgg16_weights.npy") 13 | 14 | print("loading img model from ", model_weights) 15 | net_data = dict(np.load(model_weights, encoding='bytes').item()) 16 | print(list(net_data.keys())) 17 | 18 | # swap(2,1,0), bgr -> rgb 19 | reshaped_image = tf.cast(img, tf.float32)[:, :, :, ::-1] 20 | 21 | height = 224 22 | width = 224 23 | 24 | # Randomly crop a [height, width] section of each image 25 | with tf.name_scope('preprocess'): 26 | def train_fn(): 27 | return tf.stack([tf.random_crop(tf.image.random_flip_left_right(each), [height, width, 3]) 28 | for each in tf.unstack(reshaped_image, batch_size)]) 29 | 30 | def val_fn(): 31 | unstacked = tf.unstack(reshaped_image, val_batch_size) 32 | 33 | def crop(img, x, y): return tf.image.crop_to_bounding_box( 34 | img, x, y, width, height) 35 | 36 | def distort(f, x, y): return tf.stack( 37 | [crop(f(each), x, y) for each in unstacked]) 38 | 39 | def distort_raw(x, y): return distort(lambda x: x, x, y) 40 | 41 | def distort_fliped(x, y): return distort( 42 | tf.image.flip_left_right, x, y) 43 | distorted = tf.concat([distort_fliped(0, 0), distort_fliped(28, 0), 44 | distort_fliped( 45 | 0, 28), distort_fliped(28, 28), 46 | distort_fliped(14, 14), distort_raw(0, 0), 47 | distort_raw(28, 0), distort_raw(0, 28), 48 | distort_raw(28, 28), distort_raw(14, 14)], 0) 49 | 50 | return distorted 51 | distorted = tf.cond(stage > 0, val_fn, train_fn) 52 | 53 | # Zero-mean input 54 | mean = tf.constant([103.939, 116.779, 123.68], dtype=tf.float32, shape=[ 55 | 1, 1, 1, 3], name='img-mean') 56 | distorted = distorted - mean 57 | 58 | # conv1_1 59 | with tf.name_scope('conv1_1') as scope: 60 | kernel = tf.Variable(net_data['conv1_1'][0], name='weights') 61 | conv = tf.nn.conv2d(distorted, kernel, [1, 1, 1, 1], padding='SAME') 62 | biases = tf.Variable(net_data['conv1_1'][1], trainable=True, name='biases') 63 | out = tf.nn.bias_add(conv, biases) 64 | conv1_1 = tf.nn.relu(out, name=scope) 65 | deep_param_img['conv1_1'] = [kernel, biases] 66 | train_layers += [kernel, biases] 67 | 68 | # conv1_2 69 | with tf.name_scope('conv1_2') as scope: 70 | kernel = tf.Variable(net_data['conv1_2'][0], name='weights') 71 | conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME') 72 | biases = tf.Variable(net_data['conv1_2'][1], 73 | trainable=True, name='biases') 74 | out = tf.nn.bias_add(conv, biases) 75 | conv1_2 = tf.nn.relu(out, name=scope) 76 | deep_param_img['conv1_2'] = [kernel, biases] 77 | train_layers += [kernel, biases] 78 | 79 | # pool1 80 | pool1 = tf.nn.max_pool(conv1_2, 81 | ksize=[1, 2, 2, 1], 82 | strides=[1, 2, 2, 1], 83 | padding='SAME', 84 | name='pool1') 85 | 86 | # conv2_1 87 | with tf.name_scope('conv2_1') as scope: 88 | kernel = tf.Variable(net_data['conv2_1'][0], name='weights') 89 | conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME') 90 | biases = tf.Variable(net_data['conv2_1'][1], 91 | trainable=True, name='biases') 92 | out = tf.nn.bias_add(conv, biases) 93 | conv2_1 = tf.nn.relu(out, name=scope) 94 | deep_param_img['conv2_1'] = [kernel, biases] 95 | train_layers += [kernel, biases] 96 | 97 | # conv2_2 98 | with tf.name_scope('conv2_2') as scope: 99 | kernel = tf.Variable(net_data['conv2_2'][0], name='weights') 100 | conv = tf.nn.conv2d(conv2_1, kernel, [1, 1, 1, 1], padding='SAME') 101 | biases = tf.Variable(net_data['conv2_2'][1], 102 | trainable=True, name='biases') 103 | out = tf.nn.bias_add(conv, biases) 104 | conv2_2 = tf.nn.relu(out, name=scope) 105 | deep_param_img['conv2_2'] = [kernel, biases] 106 | train_layers += [kernel, biases] 107 | 108 | # pool2 109 | pool2 = tf.nn.max_pool(conv2_2, 110 | ksize=[1, 2, 2, 1], 111 | strides=[1, 2, 2, 1], 112 | padding='SAME', 113 | name='pool2') 114 | 115 | # conv3_1 116 | with tf.name_scope('conv3_1') as scope: 117 | kernel = tf.Variable(net_data['conv3_1'][0], name='weights') 118 | conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME') 119 | biases = tf.Variable(net_data['conv3_1'][1], 120 | trainable=True, name='biases') 121 | out = tf.nn.bias_add(conv, biases) 122 | conv3_1 = tf.nn.relu(out, name=scope) 123 | deep_param_img['conv3_1'] = [kernel, biases] 124 | train_layers += [kernel, biases] 125 | 126 | # conv3_2 127 | with tf.name_scope('conv3_2') as scope: 128 | kernel = tf.Variable(net_data['conv3_2'][0], name='weights') 129 | conv = tf.nn.conv2d(conv3_1, kernel, [1, 1, 1, 1], padding='SAME') 130 | biases = tf.Variable(net_data['conv3_2'][1], 131 | trainable=True, name='biases') 132 | out = tf.nn.bias_add(conv, biases) 133 | conv3_2 = tf.nn.relu(out, name=scope) 134 | deep_param_img['conv3_2'] = [kernel, biases] 135 | train_layers += [kernel, biases] 136 | 137 | # conv3_3 138 | with tf.name_scope('conv3_3') as scope: 139 | kernel = tf.Variable(net_data['conv3_3'][0], name='weights') 140 | conv = tf.nn.conv2d(conv3_2, kernel, [1, 1, 1, 1], padding='SAME') 141 | biases = tf.Variable(net_data['conv3_3'][1], 142 | trainable=True, name='biases') 143 | out = tf.nn.bias_add(conv, biases) 144 | conv3_3 = tf.nn.relu(out, name=scope) 145 | deep_param_img['conv3_3'] = [kernel, biases] 146 | train_layers += [kernel, biases] 147 | 148 | # pool3 149 | pool3 = tf.nn.max_pool(conv3_3, 150 | ksize=[1, 2, 2, 1], 151 | strides=[1, 2, 2, 1], 152 | padding='SAME', 153 | name='pool3') 154 | 155 | # conv4_1 156 | with tf.name_scope('conv4_1') as scope: 157 | kernel = tf.Variable(net_data['conv4_1'][0], name='weights') 158 | conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME') 159 | biases = tf.Variable(net_data['conv4_1'][1], 160 | trainable=True, name='biases') 161 | out = tf.nn.bias_add(conv, biases) 162 | conv4_1 = tf.nn.relu(out, name=scope) 163 | deep_param_img['conv4_1'] = [kernel, biases] 164 | train_layers += [kernel, biases] 165 | 166 | # conv4_2 167 | with tf.name_scope('conv4_2') as scope: 168 | kernel = tf.Variable(net_data['conv4_2'][0], name='weights') 169 | conv = tf.nn.conv2d(conv4_1, kernel, [1, 1, 1, 1], padding='SAME') 170 | biases = tf.Variable(net_data['conv4_2'][1], 171 | trainable=True, name='biases') 172 | out = tf.nn.bias_add(conv, biases) 173 | conv4_2 = tf.nn.relu(out, name=scope) 174 | deep_param_img['conv4_2'] = [kernel, biases] 175 | train_layers += [kernel, biases] 176 | 177 | # conv4_3 178 | with tf.name_scope('conv4_3') as scope: 179 | kernel = tf.Variable(net_data['conv4_3'][0], name='weights') 180 | conv = tf.nn.conv2d(conv4_2, kernel, [1, 1, 1, 1], padding='SAME') 181 | biases = tf.Variable(net_data['conv4_3'][1], 182 | trainable=True, name='biases') 183 | out = tf.nn.bias_add(conv, biases) 184 | conv4_3 = tf.nn.relu(out, name=scope) 185 | deep_param_img['conv4_3'] = [kernel, biases] 186 | train_layers += [kernel, biases] 187 | 188 | # pool4 189 | pool4 = tf.nn.max_pool(conv4_3, 190 | ksize=[1, 2, 2, 1], 191 | strides=[1, 2, 2, 1], 192 | padding='SAME', 193 | name='pool4') 194 | 195 | # conv5_1 196 | with tf.name_scope('conv5_1') as scope: 197 | kernel = tf.Variable(net_data['conv5_1'][0], name='weights') 198 | conv = tf.nn.conv2d(pool4, kernel, [1, 1, 1, 1], padding='SAME') 199 | biases = tf.Variable(net_data['conv5_1'][1], 200 | trainable=True, name='biases') 201 | out = tf.nn.bias_add(conv, biases) 202 | conv5_1 = tf.nn.relu(out, name=scope) 203 | deep_param_img['conv5_1'] = [kernel, biases] 204 | train_layers += [kernel, biases] 205 | 206 | # conv5_2 207 | with tf.name_scope('conv5_2') as scope: 208 | kernel = tf.Variable(net_data['conv5_2'][0], name='weights') 209 | conv = tf.nn.conv2d(conv5_1, kernel, [1, 1, 1, 1], padding='SAME') 210 | biases = tf.Variable(net_data['conv5_2'][1], 211 | trainable=True, name='biases') 212 | out = tf.nn.bias_add(conv, biases) 213 | conv5_2 = tf.nn.relu(out, name=scope) 214 | deep_param_img['conv5_2'] = [kernel, biases] 215 | train_layers += [kernel, biases] 216 | 217 | # conv5_3 218 | with tf.name_scope('conv5_3') as scope: 219 | kernel = tf.Variable(net_data['conv5_3'][0], name='weights') 220 | conv = tf.nn.conv2d(conv5_2, kernel, [1, 1, 1, 1], padding='SAME') 221 | biases = tf.Variable(net_data['conv5_3'][1], 222 | trainable=True, name='biases') 223 | out = tf.nn.bias_add(conv, biases) 224 | conv5_3 = tf.nn.relu(out, name=scope) 225 | deep_param_img['conv5_3'] = [kernel, biases] 226 | train_layers += [kernel, biases] 227 | 228 | # pool5 229 | pool5 = tf.nn.max_pool(conv5_3, 230 | ksize=[1, 2, 2, 1], 231 | strides=[1, 2, 2, 1], 232 | padding='SAME', 233 | name='pool4') 234 | 235 | # fc6 236 | with tf.name_scope('fc6') as scope: 237 | shape = int(np.prod(pool5.get_shape()[1:])) 238 | fc6w = tf.Variable(net_data['fc6'][0], name='weights') 239 | fc6b = tf.Variable(net_data['fc6'][1], 240 | trainable=True, name='biases') 241 | pool5_flat = tf.reshape(pool5, [-1, shape]) 242 | fc6l = tf.nn.bias_add(tf.matmul(pool5_flat, fc6w), fc6b) 243 | fc6 = tf.nn.relu(fc6l) 244 | deep_param_img['fc6'] = [fc6w, fc6b] 245 | train_layers += [fc6w, fc6b] 246 | 247 | # fc7 248 | with tf.name_scope('fc7') as scope: 249 | fc7w = tf.Variable(net_data['fc7'][0], name='weights') 250 | fc7b = tf.Variable(net_data['fc7'][1], 251 | trainable=True, name='biases') 252 | fc7l = tf.nn.bias_add(tf.matmul(fc6, fc7w), fc7b) 253 | fc7 = tf.nn.relu(fc7l) 254 | deep_param_img['fc7'] = [fc7w, fc7b] 255 | train_layers += [fc7w, fc7b] 256 | 257 | # FC8 258 | # Output output_dim 259 | with tf.name_scope('fc8'): 260 | # Differ train and val stage by 'fc8' as key 261 | if 'fc8' in net_data: 262 | fc8w = tf.Variable(net_data['fc8'][0], name='weights') 263 | fc8b = tf.Variable(net_data['fc8'][1], name='biases') 264 | else: 265 | fc8w = tf.Variable(tf.random_normal([4096, output_dim], 266 | dtype=tf.float32, 267 | stddev=1e-2), name='weights') 268 | fc8b = tf.Variable(tf.constant(0.0, shape=[output_dim], 269 | dtype=tf.float32), name='biases') 270 | fc8l = tf.nn.bias_add(tf.matmul(fc7, fc8w), fc8b) 271 | 272 | if with_tanh: 273 | fc8_t = tf.nn.tanh(fc8l) 274 | else: 275 | fc8_t = fc8l 276 | 277 | def val_fn1(): 278 | concated = tf.concat([tf.expand_dims(i, 0) 279 | for i in tf.split(fc8_t, 10, 0)], 0) 280 | return tf.reduce_mean(concated, 0) 281 | fc8 = tf.cond(stage > 0, val_fn1, lambda: fc8_t) 282 | 283 | deep_param_img['fc8'] = [fc8w, fc8b] 284 | train_layers += [fc8w, fc8b] 285 | 286 | print("img model loading finished") 287 | 288 | return fc8, deep_param_img, train_layers 289 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from distance.npversion import distance 4 | from scipy.special import comb 5 | from util import sign 6 | 7 | 8 | def get_RAMAP(q_output, q_labels, db_output, db_labels, cost=False): 9 | ''' 10 | - On the Evaluation Metric for Hashing 11 | ''' 12 | M, Q = q_output.shape 13 | R = Q 14 | RAAPs = [] 15 | time_costs = [comb(Q, r) for r in range(Q+1)] 16 | distH = distance(q_output, db_output, pair=False, dist_type='hamming') 17 | gnds = np.dot(q_labels, db_labels.transpose()) > 0 18 | for i in range(M): 19 | gnd = gnds[i,:] 20 | hamm = distH[i,:] 21 | RAAP = 0 22 | for r in range(R+1): 23 | hamm_r_idx = np.where(hamm<=r) 24 | rel = len(hamm_r_idx[0]) 25 | if(rel == 0): 26 | continue 27 | imatch = np.sum(gnd[hamm_r_idx]) 28 | if cost: 29 | time_cost = np.sum(time_costs[:r+1]) 30 | RAAP += (imatch / (rel * time_cost)) 31 | else: 32 | RAAP += (imatch / rel) 33 | RAAP = RAAP / (R + 1) 34 | RAAPs.append(RAAP) 35 | return np.mean(RAAPs) 36 | 37 | 38 | def whrank(features, labels): 39 | N, D = features.shape 40 | classes = np.unique(labels) 41 | pairnum = N 42 | diffvals = np.zeros((pairnum, D)) 43 | for i in range(pairnum): 44 | clsid = np.random.choice(classes, 1) 45 | sampids = np.where(labels == clsid)[0] 46 | samps = np.random.permutation(sampids)[:2] 47 | diffvals[i] = features[samps[0], :] - features[samps[1], :] 48 | fmu = np.mean(diffvals, axis=0) 49 | fstd = np.std(diffvals, axis=0) 50 | return fmu, fstd 51 | 52 | 53 | def whrankHamm(q_codes, db_codes, q_feats, fmu, fstd, w_type='ones'): 54 | if w_type == 'ones': 55 | weights = np.ones_like(q_feats) 56 | elif w_type == 'q': 57 | weights = np.abs(q_feats) 58 | elif w_type == 'std': 59 | weights = np.ones_like(q_feats) / fstd 60 | elif w_type == 'q_std': 61 | weights = np.abs(q_feats) / fstd 62 | elif w_type == 'erf': 63 | Pr = 0.5 * (1 + q_codes * np.erf((-q_feats-fmu) / (np.sqrt(2)*fstd))) 64 | weights = np.log((1 - Pr) / Pr) 65 | 66 | num1 = q_codes.shape[0] 67 | num2 = db_codes.shape[0] 68 | distMat = np.zeros((num1, num2)) 69 | for i in range(num1): 70 | codediff = np.abs(np.tile(q_codes[i], (num2, 1)) - db_codes) / 2 71 | distMat[i] = np.dot(weights[i], codediff.transpose()) 72 | return distMat 73 | 74 | 75 | def get_whrank_mAP(q_features, q_output, q_labels, db_features, db_output, db_labels, Rs=54000): 76 | fmu, fstd = whrank(db_features, np.argmax(db_labels, axis=1)) 77 | dist = whrankHamm(q_output, db_output, q_features, fmu, fstd, w_type='erf') 78 | unsorted_ids = np.argpartition(dist, Rs - 1)[:, :Rs] 79 | APx = [] 80 | for i in range(dist.shape[0]): 81 | label = q_labels[i, :] 82 | label[label == 0] = -1 83 | idx = unsorted_ids[i, :] 84 | idx = idx[np.argsort(dist[i, :][idx])] 85 | imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0 86 | rel = np.sum(imatch) 87 | Lx = np.cumsum(imatch) 88 | Px = Lx.astype(float) / np.arange(1, Rs + 1, 1) 89 | if rel != 0: 90 | APx.append(np.sum(Px * imatch) / rel) 91 | return np.mean(np.array(APx)) 92 | 93 | 94 | def finetune_distID(dist, q_features, db_features): 95 | N, D = q_features.shape 96 | distID_finetune = np.zeros_like(dist) 97 | for i in range(N): 98 | cur = 0 99 | for j in range(D+1): 100 | idx = np.where(dist[i] == j)[0] 101 | num = len(idx) 102 | if num > 1: 103 | d = distance(q_features[i], db_features[idx], dist_type='inner_product', pair=True) 104 | idx = idx[np.argsort(d)] 105 | distID_finetune[i,cur:cur+num] = idx 106 | cur += num 107 | distID_finetune = distID_finetune.astype(int) 108 | return distID_finetune 109 | 110 | 111 | def get_finetune_mAP(q_features, q_output, q_labels, db_features, db_output, db_labels, Rs=54000): 112 | dist_raw = distance(q_output, db_output, pair=False, dist_type='hamming') 113 | dist_raw = np.partition(dist_raw, Rs - 1)[:, :Rs] 114 | dist_finetune_idx = finetune_distID(dist_raw, q_features, db_features) 115 | 116 | N = dist_raw.shape[0] 117 | dist_idx = dist_finetune_idx 118 | APx = [] 119 | for i in range(N): 120 | label = q_labels[i, :] 121 | label[label == 0] = -1 122 | idx = dist_idx[i, :] 123 | imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0 124 | rel = np.sum(imatch) 125 | Lx = np.cumsum(imatch) 126 | Px = Lx.astype(float) / np.arange(1, Rs + 1, 1) 127 | if rel != 0: 128 | APx.append(np.sum(Px * imatch) / rel) 129 | mAP = np.mean(np.array(APx)) 130 | return mAP 131 | 132 | 133 | # optimized 134 | def get_mAPs(q_output, q_labels, db_output, db_labels, Rs, dist_type='inner_product'): 135 | dist = distance(q_output, db_output, dist_type=dist_type, pair=True) 136 | unsorted_ids = np.argpartition(dist, Rs - 1)[:, :Rs] 137 | APx = [] 138 | q_labels_tmp = np.copy(q_labels) 139 | for i in range(dist.shape[0]): 140 | label = q_labels_tmp[i, :] 141 | label[label == 0] = -1 142 | idx = unsorted_ids[i, :] 143 | idx = idx[np.argsort(dist[i, :][idx])] 144 | imatch = np.sum(np.equal(db_labels[idx[0: Rs], :], label), 1) > 0 145 | rel = np.sum(imatch) 146 | Lx = np.cumsum(imatch) 147 | Px = Lx.astype(float) / np.arange(1, Rs + 1, 1) 148 | if rel != 0: 149 | APx.append(np.sum(Px * imatch) / rel) 150 | return np.mean(np.array(APx)) 151 | 152 | 153 | def get_mAPs_rerank(q_output, q_labels, db_output, db_labels, Rs, dist_type='inner_product'): 154 | query_output = sign(q_output) 155 | database_output = sign(db_output) 156 | 157 | bit_n = query_output.shape[1] 158 | 159 | ips = np.dot(query_output, database_output.T) 160 | ips = (bit_n - ips) / 2 161 | 162 | mAPX = [] 163 | query_labels = q_labels 164 | database_labels = db_labels 165 | for i in range(ips.shape[0]): 166 | label = query_labels[i, :] 167 | label[label == 0] = -1 168 | 169 | imatch = np.array([]) 170 | for j in range(bit_n): 171 | idx = np.reshape(np.argwhere(np.equal(ips[i, :], j)), (-1)) 172 | all_num = len(idx) 173 | 174 | if all_num != 0: 175 | ips_trad = np.dot(q_output[i, :], db_output[idx[:], :].T) 176 | ids_trad = np.argsort(-ips_trad, axis=0) 177 | db_labels_1 = database_labels[idx[:], :] 178 | 179 | imatch = np.append(imatch, np.sum( 180 | np.equal(db_labels_1[ids_trad, :], label), 1) > 0) 181 | if imatch.shape[0] > Rs: 182 | break 183 | 184 | imatch = imatch[0:Rs] 185 | rel = np.sum(imatch) 186 | Lx = np.cumsum(imatch) 187 | Px = Lx.astype(float) / np.arange(1, Rs + 1, 1) 188 | if rel != 0: 189 | mAPX.append(np.sum(Px * imatch) / rel) 190 | 191 | return np.mean(np.array(mAPX)) 192 | 193 | 194 | class MAPs: 195 | def __init__(self, R): 196 | self.R = R 197 | 198 | def get_mAPs_by_feature(self, database, query, Rs=None, dist_type='inner_product'): 199 | if Rs is None: 200 | Rs = self.R 201 | return get_mAPs(query.output, query.label, database.output, database.label, Rs, dist_type) 202 | 203 | def get_mAPs_after_sign(self, database, query, Rs=None, dist_type='inner_product'): 204 | if Rs is None: 205 | Rs = self.R 206 | q_output = sign(query.output) 207 | db_output = sign(database.output) 208 | return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type) 209 | 210 | def get_RAMAP_after_sign(self, database, query): 211 | q_output = sign(query.output) 212 | db_output = sign(database.output) 213 | return get_RAMAP(q_output, query.label, db_output, database.label) 214 | 215 | def get_mAPs_after_sign_with_feature_rerank(self, database, query, Rs=None, dist_type='inner_product'): 216 | if Rs is None: 217 | Rs = self.R 218 | return get_mAPs_rerank(query.output, query.label, database.output, database.label, Rs, dist_type) 219 | 220 | @staticmethod 221 | def get_precision_recall_by_Hamming_Radius(database, query, radius=2): 222 | query_output = sign(query.output) 223 | database_output = sign(database.output) 224 | 225 | bit_n = query_output.shape[1] 226 | 227 | ips = np.dot(query_output, database_output.T) 228 | ips = (bit_n - ips) / 2 229 | ids = np.argsort(ips, 1) 230 | 231 | precX = [] 232 | recX = [] 233 | mAPX = [] 234 | query_labels = query.label 235 | database_labels = database.label 236 | 237 | for i in range(ips.shape[0]): 238 | label = query_labels[i, :] 239 | label[label == 0] = -1 240 | idx = np.reshape(np.argwhere(ips[i, :] <= radius), (-1)) 241 | all_num = len(idx) 242 | 243 | if all_num != 0: 244 | imatch = np.sum(database_labels[idx[:], :] == label, 1) > 0 245 | match_num = np.sum(imatch) 246 | precX.append(np.float(match_num) / all_num) 247 | 248 | all_sim_num = np.sum( 249 | np.sum(database_labels[:, :] == label, 1) > 0) 250 | recX.append(np.float(match_num) / all_sim_num) 251 | 252 | if radius < 10: 253 | ips_trad = np.dot( 254 | query.output[i, :], database.output[ids[i, 0:all_num], :].T) 255 | ids_trad = np.argsort(-ips_trad, axis=0) 256 | db_labels = database_labels[ids[i, 0:all_num], :] 257 | 258 | rel = match_num 259 | imatch = np.sum(db_labels[ids_trad, :] == label, 1) > 0 260 | Lx = np.cumsum(imatch) 261 | Px = Lx.astype(float) / np.arange(1, all_num + 1, 1) 262 | if rel != 0: 263 | mAPX.append(np.sum(Px * imatch) / rel) 264 | else: 265 | mAPX.append(np.float(match_num) / all_num) 266 | 267 | else: 268 | precX.append(np.float(0.0)) 269 | recX.append(np.float(0.0)) 270 | mAPX.append(np.float(0.0)) 271 | 272 | return np.mean(np.array(precX)), np.mean(np.array(recX)), np.mean(np.array(mAPX)) 273 | 274 | @staticmethod 275 | def get_precision_recall_by_Hamming_Radius_All(database, query): 276 | query_output = sign(query.output) 277 | database_output = sign(database.output) 278 | 279 | bit_n = query_output.shape[1] 280 | 281 | ips = np.dot(query_output, database_output.T) 282 | ips = (bit_n - ips) / 2 283 | precX = np.zeros((ips.shape[0], bit_n + 1)) 284 | recX = np.zeros((ips.shape[0], bit_n + 1)) 285 | mAPX = np.zeros((ips.shape[0], bit_n + 1)) 286 | 287 | query_labels = query.label 288 | database_labels = database.label 289 | 290 | ids = np.argsort(ips, 1) 291 | 292 | for i in range(ips.shape[0]): 293 | label = query_labels[i, :] 294 | label[label == 0] = -1 295 | 296 | idx = ids[i, :] 297 | imatch = np.sum(database_labels[idx[:], :] == label, 1) > 0 298 | all_sim_num = np.sum(imatch) 299 | 300 | counts = np.bincount(ips[i, :].astype(np.int64)) 301 | 302 | for r in range(bit_n + 1): 303 | if r >= len(counts): 304 | precX[i, r] = precX[i, r - 1] 305 | recX[i, r] = recX[i, r - 1] 306 | mAPX[i, r] = mAPX[i, r - 1] 307 | continue 308 | 309 | all_num = np.sum(counts[0:r + 1]) 310 | 311 | if all_num != 0: 312 | match_num = np.sum(imatch[0:all_num]) 313 | precX[i, r] = np.float(match_num) / all_num 314 | recX[i, r] = np.float(match_num) / all_sim_num 315 | 316 | rel = match_num 317 | Lx = np.cumsum(imatch[0:all_num]) 318 | Px = Lx.astype(float) / np.arange(1, all_num + 1, 1) 319 | if rel != 0: 320 | mAPX[i, r] = np.sum(Px * imatch[0:all_num]) / rel 321 | return np.mean(np.array(precX), 0), np.mean(np.array(recX), 0), np.mean(np.array(mAPX), 0) 322 | 323 | 324 | class MAPs_CQ: 325 | def __init__(self, C, subspace_num, subcenter_num, R): 326 | self.C = C 327 | self.subspace_num = subspace_num 328 | self.subcenter_num = subcenter_num 329 | self.R = R 330 | 331 | def get_mAPs_SQD(self, database, query, Rs=None, dist_type='inner_product'): 332 | if Rs is None: 333 | Rs = self.R 334 | q_output = np.dot(query.codes, self.C) 335 | db_output = np.dot(database.codes, self.C) 336 | return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type) 337 | 338 | def get_mAPs_AQD(self, database, query, Rs=None, dist_type='inner_product'): 339 | if Rs is None: 340 | Rs = self.R 341 | q_output = query.output 342 | db_output = np.dot(database.codes, self.C) 343 | return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type) 344 | 345 | def get_mAPs_by_feature(self, database, query, Rs=None, dist_type='inner_product'): 346 | if Rs is None: 347 | Rs = self.R 348 | q_output = query.output 349 | db_output = database.output 350 | return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type) 351 | 352 | def get_mAPs_after_sign(self, database, query, Rs=None, dist_type='inner_product'): 353 | if Rs is None: 354 | Rs = self.R 355 | q_output = sign(query.output) 356 | db_output = sign(database.output) 357 | return get_mAPs(q_output, query.label, db_output, database.label, Rs, dist_type) 358 | 359 | 360 | if __name__ == "__main__": 361 | m = MAPs(4) 362 | radius = 2 363 | 364 | class ds: 365 | def __init__(self): 366 | self.output = [] 367 | self.label = [] 368 | database = ds() 369 | query = ds() 370 | 371 | database.output = np.sign(np.random.rand(10000, 64) - 0.5) 372 | database.label = np.sign(np.random.rand(10000, 20) - 0.5) 373 | database.label[database.label < 0] = 0 374 | query.output = np.sign(np.random.rand(1000, 64) - 0.5) 375 | query.label = np.sign(np.random.rand(1000, 20) - 0.5) 376 | query.label[query.label < 0] = 0 377 | 378 | print(m.get_mAPs_after_sign_with_feature_rerank(database, query, 500)) 379 | print(m.get_mAPs_by_feature(database, query, 500)) 380 | prec, rec, maps = m.get_precision_recall_by_Hamming_Radius_All( 381 | database, query) 382 | print(prec) 383 | print(rec) 384 | print(maps) 385 | -------------------------------------------------------------------------------- /loss/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from distance.tfversion import distance 4 | from util import sign, reduce_shaper 5 | 6 | 7 | '''pairwise loss 8 | ''' 9 | 10 | def inner_product_loss(u, label_u, balanced=True): 11 | '''pairwise inner product loss 12 | - Hash with graph 13 | - Supervised Hashing for Image Retrieval via Image Representation Learning 14 | - Deep Discrete Supervised Hashing 15 | ''' 16 | with tf.name_scope('inner_product_loss'): 17 | B = tf.cast(tf.shape(u)[1], tf.float32) 18 | ip = tf.matmul(u, u, transpose_b=True) 19 | 20 | # let sim = {0, 1} to be {-1, 1} 21 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 22 | Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0)) 23 | 24 | loss_1 = tf.square(tf.subtract(Sim, tf.div(ip, B))) 25 | 26 | if balanced: 27 | with tf.name_scope('balance'): 28 | sum_1 = tf.reduce_sum(S) 29 | sum_all = tf.reduce_sum(tf.abs(Sim)) 30 | balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 31 | tf.multiply(tf.div(sum_all, sum_1), S)) 32 | loss_1 = tf.multiply(loss_1, balance_param) 33 | 34 | loss = tf.reduce_mean(loss_1) 35 | return loss 36 | 37 | 38 | def cosine_loss(u, label_u, balanced=True): 39 | '''squared pairwise cosine loss 40 | - Deep Quantization Network for Efficient Image Retrieval 41 | ''' 42 | with tf.name_scope('cosine_loss'): 43 | ip_1 = tf.matmul(u, u, transpose_b=True) 44 | mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), reduce_shaper( 45 | tf.square(u)), transpose_b=True)) 46 | cos_1 = tf.div(ip_1, mod_1) 47 | 48 | # let Sim = {0, 1} to be {-1, 1} 49 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 50 | Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0)) 51 | 52 | loss_1 = tf.square(tf.subtract(Sim, cos_1)) 53 | 54 | if balanced: 55 | with tf.name_scope('balance'): 56 | sum_1 = tf.reduce_sum(S) 57 | sum_all = tf.reduce_sum(tf.abs(Sim)) 58 | balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 59 | tf.multiply(tf.div(sum_all, sum_1), S)) 60 | loss_1 = tf.multiply(loss_1, balance_param) 61 | 62 | loss = tf.reduce_mean(loss_1) 63 | return loss 64 | 65 | 66 | def cross_entropy_loss(u, label_u, alpha=0.5, normed=True, balanced=True): 67 | '''cross entropy loss 68 | - Deep Hashing Network for Efficient Similarity Retrieval 69 | ''' 70 | with tf.name_scope('cross_entropy_loss'): 71 | if normed: 72 | ip_1 = tf.matmul(u, tf.transpose(u)) 73 | mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), 74 | reduce_shaper(tf.square(u)), transpose_b=True)) 75 | ip = tf.div(ip_1, mod_1) 76 | else: 77 | ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1) 78 | 79 | ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]]) 80 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 81 | 82 | loss_1 = tf.log(ones + tf.exp(alpha * ip)) - S * alpha * ip 83 | 84 | if balanced: 85 | with tf.name_scope('balance'): 86 | # let Sim \in {-1, 1} 87 | Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0)) 88 | sum_1 = tf.reduce_sum(S) 89 | sum_all = tf.reduce_sum(tf.abs(Sim)) 90 | balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 91 | tf.multiply(tf.div(sum_all, sum_1), S)) 92 | loss_1 = tf.multiply(loss_1, balance_param) 93 | 94 | loss = tf.reduce_mean(loss_1) 95 | return loss 96 | 97 | 98 | def cauchy_cross_entropy_loss(u, label_u, gamma=16, normed=True): 99 | '''cauchy cross entropy loss 100 | - Deep Cauchy Hashing for Hamming Space Retrieval 101 | ''' 102 | with tf.name_scope('cauchy_cross_entropy_loss'): 103 | bit = tf.cast(tf.shape(u)[1], tf.float32) 104 | 105 | if normed: 106 | ip_1 = tf.matmul(u, tf.transpose(u)) 107 | mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), reduce_shaper( 108 | tf.square(u)) + tf.constant(1e-6), transpose_b=True)) 109 | dist = bit / 2.0 * (1.0 - tf.div(ip_1, mod_1) + tf.constant(1e-6)) 110 | else: 111 | r_u = tf.reshape(tf.reduce_sum(u * u, 1), [-1, 1]) 112 | r_v = tf.reshape(tf.reduce_sum(u * u, 1), [-1, 1]) 113 | 114 | dist = r_u - 2 * tf.matmul(u, tf.transpose(u)) + \ 115 | tf.transpose(r_v) + tf.constant(0.001) 116 | 117 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 118 | with tf.name_scope('balance'): 119 | Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0)) 120 | sum_1 = tf.reduce_sum(S) 121 | sum_all = tf.reduce_sum(tf.abs(Sim)) 122 | balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 123 | tf.multiply(tf.div(sum_all, sum_1), S)) 124 | 125 | mask = tf.equal(tf.eye(tf.shape(u)[0]), tf.constant(0.0)) 126 | cauchy = gamma / (dist + gamma) 127 | cauchy_mask = tf.boolean_mask(cauchy, mask) 128 | s_mask = tf.boolean_mask(S, mask) 129 | balance_p_mask = tf.boolean_mask(balance_param, mask) 130 | 131 | all_loss = - s_mask * \ 132 | tf.log(cauchy_mask) - (tf.constant(1.0) - s_mask) * \ 133 | tf.log(tf.constant(1.0) - cauchy_mask) 134 | 135 | loss = tf.reduce_mean(tf.multiply(all_loss, balance_p_mask)) 136 | return loss 137 | 138 | 139 | def contrastive_loss(u, label_u, margin=4, balanced=False): 140 | '''contrastive loss 141 | - Deep Supervised Hashing for Fast Image Retrieval 142 | ''' 143 | with tf.name_scope('contrastive_loss'): 144 | batch_size = tf.cast(tf.shape(u)[0], tf.float32) 145 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 146 | dist = distance(u) 147 | 148 | loss_1 = S * dist + (1 - S) * tf.maximum(margin - dist, 0.0) 149 | 150 | if balanced: 151 | # TODO DELETTE! In this setting, results will be worse. 152 | with tf.name_scope('balance'): 153 | # let Sim \in {-1, 1} 154 | Sim = tf.multiply(tf.add(S, tf.constant(-0.5)), tf.constant(2.0)) 155 | sum_1 = tf.reduce_sum(S) 156 | sum_all = tf.reduce_sum(tf.abs(Sim)) 157 | balance_param = tf.add(tf.abs(tf.add(S, tf.constant(-1.0))), 158 | tf.multiply(tf.div(sum_all, sum_1), S)) 159 | loss_1 = tf.multiply(loss_1, balance_param) 160 | 161 | loss = tf.reduce_sum(loss_1) / (batch_size*(batch_size-1)) 162 | return loss 163 | 164 | 165 | def exp_loss(u, label_u, alpha, wordvec=None, balanced=True): 166 | '''exponential loss 167 | ''' 168 | with tf.name_scope('exp_loss'): 169 | batch_size = tf.shape(u)[0] 170 | bit = tf.shape(u)[1] 171 | mask = tf.equal(tf.eye(batch_size), tf.constant(0.0)) 172 | S = tf.clip_by_value(tf.matmul(label_u, tf.transpose(label_u)), 0.0, 1.0) 173 | S_m = tf.boolean_mask(S, mask) 174 | 175 | # word vector 176 | if wordvec != None: 177 | wordvec_u = tf.matmul(label_u, wordvec) / tf.reduce_sum(label_u, axis=1, keepdims=True) 178 | W = distance(wordvec_u, dist_type='cosine') 179 | 180 | ## margin hinge-like loss 181 | # balanced = False 182 | # D = distance(u, dist_type='euclidean2') 183 | # E = D 184 | # E_m = tf.boolean_mask(E, mask) 185 | # loss_1 = S_m * E_m + (1 - S_m) * tf.maximum(alpha - E_m, 0.0) 186 | 187 | ## double margin hinge-like loss 188 | # balanced = False 189 | # D = distance(u, dist_type='cosine') 190 | # E = D 191 | # E_m = tf.boolean_mask(E, mask) 192 | # loss_1 = S_m * tf.maximum(E_m - 0.3, 0.0) + (1 - S_m) * tf.maximum(0.45 - E_m, 0.0) 193 | 194 | ## cauchy cross-entropy loss 195 | # D = distance(u, dist_type='cosine') 196 | # E = tf.log(1 + alpha * D) 197 | # E_m = tf.boolean_mask(E, mask) 198 | # loss_1 = S_m * E_m + (1 - S_m) * (E_m - tf.log(tf.exp(E_m) - 1 + 1e-6)) 199 | 200 | # sigmoid 201 | # D = distance(u, dist_type='cosine') 202 | # E = tf.log(1 + tf.exp(-alpha * (1-2*D))) 203 | # E_m = tf.boolean_mask(E, mask) 204 | # loss_1 = S_m * E_m + (1 - S_m) * (E_m - tf.log(tf.exp(E_m) - 1 + 1e-6)) 205 | 206 | ## hyper sigmoid 207 | balanced = False 208 | alpha = 9 209 | belta = 20 210 | gamma = 1.5 211 | margin = 0.25 212 | D = distance(u, dist_type='cosine') 213 | E1 = tf.log(1 + tf.exp(-alpha * (1-gamma*2*D))) 214 | E1_m = tf.boolean_mask(E1, mask) 215 | loss_s1 = S_m * E1_m 216 | E2 = tf.log(1 + tf.exp(-alpha * (1-gamma*2*(D-margin)))) 217 | E2_m = tf.boolean_mask(E2, mask) 218 | loss_s0 = (1 - S_m) * (E2_m - tf.log(tf.exp(E2_m) - 1 + 1e-6)) 219 | loss_1 = belta * loss_s1 + loss_s0 220 | 221 | ## margin exp loss 222 | # balanced = False 223 | # D = distance(u, dist_type='cosine') 224 | # E1 = tf.exp(2* D) - 1 225 | # E2 = tf.exp(2 * (1 - D)) - 1 226 | # E1_m = tf.boolean_mask(E1, mask) 227 | # E2_m = tf.boolean_mask(E2, mask) 228 | # loss_1 = S_m * E1_m + (1 - S_m) * E2_m 229 | 230 | ## post-tune 231 | # balanced = False 232 | # D = distance(u, dist_type='cosine') 233 | # E = D 234 | # E_m = tf.boolean_mask(E, mask) 235 | # margin = 0.05 236 | # loss_1 = S_m * tf.maximum(E_m - alpha + margin, 0.0) + (1 - S_m) * tf.maximum(alpha + margin - E_m, 0.0) 237 | # loss_1 = S_m * tf.maximum(E_m - alpha + margin, 0.0) 238 | # loss_1 = (1 - S_m) * tf.maximum(alpha + margin - E_m, 0.0) 239 | 240 | if balanced: 241 | S_all = tf.cast(batch_size * (batch_size - 1), tf.float32) 242 | S_1 = tf.reduce_sum(S) 243 | balance_param = (S_all / S_1) * S + (1 - S) 244 | B_m= tf.boolean_mask(balance_param, mask) 245 | loss_1 = B_m * loss_1 246 | 247 | loss = tf.reduce_mean(loss_1) 248 | return loss 249 | 250 | 251 | '''triplet loss 252 | ''' 253 | 254 | def triplet_loss(anchor, pos, neg, margin, dist_type='euclidean2'): 255 | '''triplet loss 256 | - Deep Triplet Quantization 257 | ''' 258 | with tf.name_scope('triplet_loss'): 259 | pos_dist = distance(anchor, pos, pair=False, dist_type=dist_type) 260 | neg_dist = distance(anchor, neg, pair=False, dist_type=dist_type) 261 | basic_loss = tf.maximum(pos_dist - neg_dist + margin, 0.0) 262 | loss = tf.reduce_mean(basic_loss, 0) 263 | 264 | tf.summary.histogram('pos_dist', pos_dist) 265 | tf.summary.histogram('neg_dist', neg_dist) 266 | tf.summary.histogram('pos_dist - neg_dist', pos_dist - neg_dist) 267 | return loss 268 | 269 | 270 | def cos_margin_multi_label_loss(u, label_u, wordvec, bit=300, soft=True, margin=0.7): 271 | '''cosine margin multi label loss 272 | - Deep Visual-Semantic Quantization for Efficient Image Retrieval 273 | ''' 274 | # N: batchsize, L: label_dim, D: 300 275 | # u: N * D 276 | # label_u: N * L 277 | # wordvec: L * D 278 | with tf.name_scope('cos_margin_multi_label_loss'): 279 | assert bit == 300 280 | 281 | batch_size = tf.cast(tf.shape(label_u)[0], tf.int32) 282 | n_class = tf.cast(tf.shape(label_u)[1], tf.int32) 283 | if soft == True: 284 | ip_2 = tf.matmul(u, wordvec, transpose_b=True) 285 | # multiply ids to inner product 286 | mod_2 = tf.sqrt(tf.matmul(reduce_shaper(tf.square( 287 | u)), reduce_shaper(tf.square(wordvec)), transpose_b=True)) 288 | # cos_2: N * L 289 | cos_2 = tf.div(ip_2, mod_2) 290 | 291 | # ip_3: L * L 292 | # compute soft margin 293 | ip_3 = tf.matmul(wordvec, wordvec, transpose_b=True) 294 | # use word_dic to avoid 0 in / 295 | mod_3 = tf.sqrt(tf.matmul(reduce_shaper(tf.square( 296 | wordvec)), reduce_shaper(tf.square(wordvec)), transpose_b=True)) 297 | margin_param = tf.subtract(tf.constant( 298 | 1.0, dtype=tf.float32), tf.div(ip_3, mod_3)) 299 | 300 | # cos - cos: N * L * L 301 | cos_cos_1 = tf.subtract(tf.expand_dims(margin_param, 0), tf.subtract( 302 | tf.expand_dims(cos_2, 2), tf.expand_dims(cos_2, 1))) 303 | # we need to let the wrong place be 0 304 | cos_cos = tf.multiply(cos_cos_1, tf.expand_dims(label_u, 2)) 305 | 306 | cos_loss = tf.reduce_sum(tf.maximum( 307 | tf.constant(0, dtype=tf.float32), cos_cos)) 308 | loss = tf.div(cos_loss, tf.multiply(tf.cast( 309 | n_class, dtype=tf.float32), tf.reduce_sum(label_u))) 310 | else: 311 | margin_param = tf.constant(margin, dtype=tf.float32) 312 | 313 | # v_label: N * L * D 314 | v_label = tf.multiply(tf.expand_dims(label_u, 2), tf.expand_dims(wordvec, 0)) 315 | # ip_1: N * L 316 | ip_1 = tf.reduce_sum(tf.multiply(tf.expand_dims(u, 1), v_label), 2) 317 | # mod_1: N * L 318 | v_label_mod = tf.multiply(tf.expand_dims( 319 | tf.ones([batch_size, n_class]), 2), tf.expand_dims(wordvec, 0)) 320 | mod_1 = tf.sqrt(tf.multiply(tf.expand_dims(tf.reduce_sum( 321 | tf.square(u), 1), 1), tf.reduce_sum(tf.square(v_label_mod), 2))) 322 | # cos_1: N * L 323 | cos_1 = tf.div(ip_1, mod_1) 324 | 325 | ip_2 = tf.matmul(u, wordvec, transpose_b=True) 326 | # multiply ids to inner product 327 | mod_2 = tf.sqrt(tf.matmul(reduce_shaper(tf.square( 328 | u)), reduce_shaper(tf.square(wordvec)), transpose_b=True)) 329 | # cos_2: N * L 330 | cos_2 = tf.div(ip_2, mod_2) 331 | 332 | # cos - cos: N * L * L 333 | cos_cos_1 = tf.subtract(margin_param, tf.subtract( 334 | tf.expand_dims(cos_1, 2), tf.expand_dims(cos_2, 1))) 335 | # we need to let the wrong place be 0 336 | cos_cos = tf.multiply(cos_cos_1, tf.expand_dims(label_u, 2)) 337 | 338 | cos_loss = tf.reduce_sum(tf.maximum( 339 | tf.constant(0, dtype=tf.float32), cos_cos)) 340 | loss = tf.div(cos_loss, tf.multiply(tf.cast( 341 | n_class, dtype=tf.float32), tf.reduce_sum(label_u))) 342 | return loss 343 | 344 | 345 | '''quantization loss 346 | ''' 347 | 348 | def quantization_loss(u, q_type='L2'): 349 | '''quantization loss 350 | - Deep Hashing Network for Efficient Similarity Retrieval 351 | - Deep Supervised Hashing for Fast Image Retrieval 352 | - Deep Cauchy Hashing for Hamming Space Retrieval 353 | - Deep Visual-Semantic Hashing for Cross-Modal Retrieval 354 | - Correlation Hashing Network for Efficient Cross-Modal Retrieval 355 | ''' 356 | with tf.name_scope('quantization_loss'): 357 | if q_type == 'L2': 358 | loss = tf.reduce_mean(tf.square(tf.abs(u) - tf.constant(1.0))) 359 | elif q_type == 'L1': 360 | loss = tf.reduce_mean(tf.abs(tf.abs(u) - tf.constant(1.0))) 361 | elif q_type == 'cauchy': 362 | epsilon = 0.58 363 | loss = tf.reduce_mean(tf.log(1 + tf.abs((tf.abs(u) - tf.constant(1.0))) / epsilon)) 364 | elif q_type == 'margin': 365 | margin = 0.5 366 | loss = tf.reduce_mean(tf.maximum(margin - tf.abs(u), 0.0)) 367 | elif q_type == 'max_margin': 368 | bit = tf.shape(u)[1] 369 | margin = 0.95 370 | D = distance(tf.abs(u), tf.ones(bit), dist_type='cos') 371 | loss = tf.reduce_mean(tf.maximum(margin - D, 0.0)) 372 | return loss 373 | 374 | 375 | def pq_loss(u, h, C, wordvec=None, squared=True): 376 | '''product quantization loss 377 | - Deep Quantization Network for Efficient Image Retrieval 378 | - Deep Visual-Semantic Quantization for Efficient Image Retrieval 379 | - Deep Triplet Quantization 380 | ''' 381 | with tf.name_scope('pq_loss'): 382 | dist = u - tf.matmul(h, C) 383 | 384 | if wordvec != None: 385 | dist = tf.matmul(dist, wordvec, transpose_b=True) 386 | 387 | if squared: 388 | dist = tf.square(dist) 389 | 390 | loss = tf.reduce_mean(tf.reduce_sum(dist, 1)) 391 | return loss 392 | 393 | 394 | '''balance and independence loss 395 | - Deep semantic ranking based hashing for multi-label image retrieval 396 | - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search 397 | ''' 398 | 399 | def balance_loss(u): 400 | '''balance loss 401 | 402 | Each bit should be half 0 and half 1. 403 | - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search 404 | ''' 405 | with tf.name_scope('balance_loss'): 406 | H = tf.sign(u) 407 | H_mean = tf.reduce_mean(H, axis=0) 408 | loss = tf.reduce_mean(tf.square(H_mean)) 409 | return loss 410 | 411 | 412 | def independence_loss(u): 413 | '''independence loss 414 | - Deep Triplet Quantization 415 | ''' 416 | with tf.name_scope('independence_loss'): 417 | batch_size = tf.shape(u)[0] 418 | bit = tf.shape(u)[1] 419 | H = tf.sign(u) 420 | I = tf.eye(bit) 421 | loss = tf.reduce_mean(tf.square(tf.matmul( 422 | H, H, transpose_a=True) / tf.cast(batch_size, tf.float32) - I)) 423 | return loss 424 | 425 | 426 | '''listwise loss 427 | - Hashing as Tie-Aware Learning to Rank 428 | ''' 429 | 430 | 431 | '''classification loss 432 | - Deep Semantic Hashing with Generative Adversarial Networks 433 | - Deep Supervised Discrete Hashing 434 | - Supervised Learning of Semantics-preserving Hashing via Deep Neural Networks for Large-scale Image Search 435 | - Deep Supervised Cross-modal Retrieval 436 | ''' 437 | -------------------------------------------------------------------------------- /model/dqn.py: -------------------------------------------------------------------------------- 1 | ################################################################################# 2 | # Deep Quantization Network for Efficient Image Retrieval # 3 | # Authors: Yue Cao, Mingsheng Long, Jianmin Wang, Han Zhu, Qingfu Wen # 4 | # Contact: caoyue10@gmail.com # 5 | ################################################################################## 6 | 7 | import os 8 | import random 9 | import shutil 10 | import time 11 | from datetime import datetime 12 | from math import ceil 13 | 14 | import numpy as np 15 | import tensorflow as tf 16 | from sklearn.cluster import MiniBatchKMeans 17 | 18 | from architecture import img_alexnet_layers 19 | from evaluation import MAPs_CQ 20 | from data_provider.pq import Dataset 21 | from loss import cosine_loss, pq_loss 22 | 23 | 24 | class DQN(object): 25 | def __init__(self, config): 26 | # Initialize setting 27 | print("initializing") 28 | np.set_printoptions(precision=4) 29 | self.stage = tf.placeholder_with_default(tf.constant(0), []) 30 | self.device = '/gpu:' + config.gpu_id 31 | self.output_dim = config.output_dim 32 | self.n_class = config.label_dim 33 | 34 | self.subspace_num = config.n_subspace 35 | self.subcenter_num = config.n_subcenter 36 | self.code_batch_size = config.code_batch_size 37 | self.cq_lambda = config.cq_lambda 38 | self.max_iter_update_Cb = config.max_iter_update_Cb 39 | self.max_iter_update_b = config.max_iter_update_b 40 | 41 | self.batch_size = config.batch_size 42 | self.val_batch_size = config.val_batch_size 43 | self.max_iter = config.max_iter 44 | self.network = config.network 45 | self.learning_rate = config.learning_rate 46 | self.learning_rate_decay_factor = config.learning_rate_decay_factor 47 | self.decay_step = config.decay_step 48 | 49 | self.finetune_all = config.finetune_all 50 | 51 | self.model_file = os.path.join(config.save_dir, 'network_weights.npy') 52 | self.codes_file = os.path.join(config.save_dir, 'codes.npy') 53 | self.tflog_path = os.path.join(config.save_dir, 'tflog') 54 | 55 | # Setup session 56 | print("launching session") 57 | configProto = tf.ConfigProto() 58 | configProto.gpu_options.allow_growth = True 59 | configProto.allow_soft_placement = True 60 | self.sess = tf.Session(config=configProto) 61 | 62 | # Create variables and placeholders 63 | 64 | with tf.device(self.device): 65 | self.img = tf.placeholder(tf.float32, [None, 256, 256, 3]) 66 | self.img_label = tf.placeholder(tf.float32, [None, self.n_class]) 67 | 68 | self.network_weights = config.network_weights 69 | self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model() 70 | 71 | self.C = tf.Variable(tf.random_uniform([self.subspace_num * self.subcenter_num, self.output_dim], 72 | minval=-1, maxval=1, dtype=tf.float32, name='centers')) 73 | self.deep_param_img['C'] = self.C 74 | 75 | # Centers shared in different modalities (image & text) 76 | # Binary codes for different modalities (image & text) 77 | self.img_output_all = tf.placeholder(tf.float32, [None, self.output_dim]) 78 | self.img_b_all = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num]) 79 | 80 | self.b_img = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num]) 81 | self.ICM_m = tf.placeholder(tf.int32, []) 82 | self.ICM_b_m = tf.placeholder(tf.float32, [None, self.subcenter_num]) 83 | self.ICM_b_all = tf.placeholder(tf.float32, [None, self.subcenter_num * self.subspace_num]) 84 | self.ICM_X = tf.placeholder(tf.float32, [self.code_batch_size, self.output_dim]) 85 | self.ICM_C_m = tf.slice(self.C, [self.ICM_m * self.subcenter_num, 0], [self.subcenter_num, self.output_dim]) 86 | self.ICM_X_residual = self.ICM_X - tf.matmul(self.ICM_b_all, self.C) + tf.matmul(self.ICM_b_m, self.ICM_C_m) 87 | ICM_X_expand = tf.expand_dims(self.ICM_X_residual, 1) # N * 1 * D 88 | ICM_C_m_expand = tf.expand_dims(self.ICM_C_m, 0) # 1 * M * D 89 | # N*sc*D * D*n 90 | ICM_sum_squares = tf.reduce_sum(tf.square(tf.squeeze( 91 | tf.subtract(ICM_X_expand, ICM_C_m_expand))), reduction_indices=2) 92 | ICM_best_centers = tf.argmin(ICM_sum_squares, 1) 93 | self.ICM_best_centers_one_hot = tf.one_hot( 94 | ICM_best_centers, self.subcenter_num, dtype=tf.float32) 95 | 96 | self.global_step = tf.Variable(0, trainable=False) 97 | self.train_op = self.apply_loss_function(self.global_step) 98 | self.sess.run(tf.global_variables_initializer()) 99 | 100 | if config.debug == True: 101 | from tensorflow.python import debug as tf_debug 102 | self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) 103 | 104 | def load_model(self): 105 | if self.network == 'alexnet': 106 | img_output = img_alexnet_layers( 107 | self.img, self.batch_size, self.output_dim, 108 | self.stage, self.network_weights, val_batch_size=self.val_batch_size) 109 | else: 110 | raise Exception('cannot use such CNN model as ' + self.network) 111 | return img_output 112 | 113 | def save_model(self, model_file=None): 114 | if model_file is None: 115 | model_file = self.model_file 116 | model = {} 117 | for layer in self.deep_param_img: 118 | model[layer] = self.sess.run(self.deep_param_img[layer]) 119 | print("saving model to %s" % model_file) 120 | folder = os.path.dirname(model_file) 121 | if os.path.exists(folder) is False: 122 | os.makedirs(folder) 123 | np.save(model_file, np.array(model)) 124 | return 125 | 126 | def load_codes(self, codes_file=None): 127 | if codes_file is None: 128 | codes_file = self.codes_file 129 | codes = np.load(codes_file).item() 130 | 131 | import collections 132 | mDataset = collections.namedtuple('Dataset', ['output', 'codes', 'label']) 133 | database = mDataset(codes['db_features'], codes['db_codes'], codes['db_label']) 134 | query = mDataset(codes['val_features'], codes['val_codes'], codes['val_label']) 135 | C = codes['C'] 136 | return database, query, C 137 | 138 | def save_codes(self, database, query, C, codes_file=None): 139 | if codes_file is None: 140 | codes_file = self.codes_file 141 | codes = { 142 | 'db_features': database.output, 143 | 'db_codes': database.codes, 144 | 'db_label': database.label, 145 | 'val_features': query.output, 146 | 'val_codes': query.codes, 147 | 'val_label': query.label, 148 | 'C': C, 149 | } 150 | print("saving codes to %s" % codes_file) 151 | np.save(codes_file, np.array(codes)) 152 | return 153 | 154 | def apply_loss_function(self, global_step): 155 | # loss function 156 | self.cos_loss = cosine_loss(self.img_last_layer, self.img_label) 157 | self.q_loss = self.cq_lambda * pq_loss(self.img_last_layer, self.b_img, self.C) 158 | self.loss = self.cos_loss + self.q_loss 159 | 160 | # Last layer has a 10 times learning rate 161 | self.lr = tf.train.exponential_decay( 162 | self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True) 163 | opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9) 164 | grads_and_vars = opt.compute_gradients( 165 | self.loss, self.train_layers + self.train_last_layer) 166 | fcgrad, _ = grads_and_vars[-2] 167 | fbgrad, _ = grads_and_vars[-1] 168 | 169 | # for debug 170 | self.grads_and_vars = grads_and_vars 171 | tf.summary.scalar('loss', self.loss) 172 | tf.summary.scalar('cosine_loss', self.cos_loss) 173 | tf.summary.scalar('quantization_loss', self.q_loss) 174 | tf.summary.scalar('lr', self.lr) 175 | self.merged = tf.summary.merge_all() 176 | 177 | if self.finetune_all: 178 | return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]), 179 | (grads_and_vars[1][0]*2, self.train_layers[1]), 180 | (grads_and_vars[2][0], self.train_layers[2]), 181 | (grads_and_vars[3][0]*2, self.train_layers[3]), 182 | (grads_and_vars[4][0], self.train_layers[4]), 183 | (grads_and_vars[5][0]*2, self.train_layers[5]), 184 | (grads_and_vars[6][0], self.train_layers[6]), 185 | (grads_and_vars[7][0]*2, self.train_layers[7]), 186 | (grads_and_vars[8][0], self.train_layers[8]), 187 | (grads_and_vars[9][0]*2, self.train_layers[9]), 188 | (grads_and_vars[10][0], self.train_layers[10]), 189 | (grads_and_vars[11][0]*2, self.train_layers[11]), 190 | (grads_and_vars[12][0], self.train_layers[12]), 191 | (grads_and_vars[13][0]*2, self.train_layers[13]), 192 | (fcgrad*10, self.train_last_layer[0]), 193 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 194 | else: 195 | return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]), 196 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 197 | 198 | def initial_centers(self, img_output): 199 | C_init = np.zeros( 200 | [self.subspace_num * self.subcenter_num, self.output_dim]) 201 | print("#DQN train# initilizing Centers") 202 | all_output = img_output 203 | div = int(self.output_dim / self.subspace_num) 204 | for i in range(self.subspace_num): 205 | kmeans = MiniBatchKMeans(n_clusters=self.subcenter_num).fit( 206 | all_output[:, i * div: (i + 1) * div]) 207 | C_init[i * self.subcenter_num: (i + 1) * self.subcenter_num, i * div: (i + 1) * div] = kmeans.cluster_centers_ 208 | print("step: ", i, " finish") 209 | return C_init 210 | 211 | def update_centers(self, img_dataset): 212 | ''' 213 | Optimize: 214 | self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1} 215 | self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T) 216 | but all the C need to be replace with C^T : 217 | self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V) 218 | ''' 219 | print("#DQN train# updating Centers") 220 | old_C_value = self.sess.run(self.C) 221 | 222 | h = self.img_b_all 223 | U = self.img_output_all 224 | smallResidual = tf.constant( 225 | np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001) 226 | Uh = tf.matmul(tf.transpose(h), U) 227 | hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual) 228 | compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh) 229 | 230 | update_C = self.C.assign(compute_centers) 231 | C_value = self.sess.run(update_C, feed_dict={ 232 | self.img_output_all: img_dataset.output, 233 | self.img_b_all: img_dataset.codes, 234 | }) 235 | 236 | C_sums = np.sum(np.square(C_value), axis=1) 237 | C_zeros_ids = np.where(C_sums < 1e-8) 238 | C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :] 239 | self.sess.run(self.C.assign(C_value)) 240 | 241 | def update_codes_ICM(self, output, code): 242 | ''' 243 | Optimize: 244 | min || output - self.C * codes || 245 | min || output - codes * self.C || 246 | args: 247 | output: [n_train, n_output] 248 | self.C: [n_subspace * n_subcenter, n_output] 249 | [C_1, C_2, ... C_M] 250 | codes: [n_train, n_subspace * n_subcenter] 251 | ''' 252 | 253 | code = np.zeros(code.shape) 254 | 255 | for iterate in range(self.max_iter_update_b): 256 | 257 | sub_list = [i for i in range(self.subspace_num)] 258 | random.shuffle(sub_list) 259 | for m in sub_list: 260 | best_centers_one_hot_val = self.sess.run(self.ICM_best_centers_one_hot, feed_dict={ 261 | self.ICM_b_m: code[:, m * self.subcenter_num: (m + 1) * self.subcenter_num], 262 | self.ICM_b_all: code, 263 | self.ICM_m: m, 264 | self.ICM_X: output, 265 | }) 266 | 267 | code[:, m * self.subcenter_num: (m + 1) * 268 | self.subcenter_num] = best_centers_one_hot_val 269 | return code 270 | 271 | def update_codes_batch(self, dataset, batch_size): 272 | ''' 273 | update codes in batch size 274 | ''' 275 | total_batch = int(ceil(dataset.n_samples / batch_size)) 276 | dataset.finish_epoch() 277 | 278 | for i in range(total_batch): 279 | output_val, code_val = dataset.next_batch_output_codes(batch_size) 280 | codes_val = self.update_codes_ICM(output_val, code_val) 281 | dataset.feed_batch_codes(batch_size, codes_val) 282 | 283 | def train(self, img_dataset): 284 | print("%s #train# start training" % datetime.now()) 285 | epoch = 0 286 | epoch_iter = int(ceil(img_dataset.n_samples / self.batch_size)) 287 | 288 | # tensorboard 289 | if os.path.exists(self.tflog_path): 290 | shutil.rmtree(self.tflog_path) 291 | train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph) 292 | 293 | for train_iter in range(self.max_iter): 294 | images, labels, codes = img_dataset.next_batch(self.batch_size) 295 | start_time = time.time() 296 | 297 | _, loss, output, summary = self.sess.run([self.train_op, self.loss, self.img_last_layer, self.merged], 298 | feed_dict={self.img: images, 299 | self.img_label: labels, 300 | self.b_img: codes}) 301 | 302 | img_dataset.feed_batch_output(self.batch_size, output) 303 | duration = time.time() - start_time 304 | 305 | # every epoch: update codes and centers 306 | if train_iter % (1 * epoch_iter) == 0 and train_iter != 0: 307 | if epoch == 0: 308 | with tf.device(self.device): 309 | for i in range(self.max_iter_update_Cb): 310 | self.sess.run(self.C.assign( 311 | self.initial_centers(img_dataset.output))) 312 | 313 | epoch = epoch + 1 314 | for i in range(self.max_iter_update_Cb): 315 | self.update_codes_batch(img_dataset, self.code_batch_size) 316 | self.update_centers(img_dataset) 317 | # self.sess.run(self.C.assign(self.initial_centers(img_dataset.output))) 318 | 319 | if train_iter % 1 == 0: 320 | train_writer.add_summary(summary, train_iter) 321 | print("%s #train# epoch %2d step %4d, loss = %.4f, %.1f sec/batch" 322 | % (datetime.now(), epoch, train_iter + 1, loss, duration)) 323 | 324 | print("%s #traing# finish training" % datetime.now()) 325 | self.save_model() 326 | print("model saved") 327 | 328 | self.sess.close() 329 | 330 | def validation(self, img_query, img_database, R=100): 331 | if os.path.exists(self.codes_file): 332 | print("loading ", self.codes_file) 333 | img_database, img_query, C_tmp = self.load_codes(self.codes_file) 334 | else: 335 | print("%s #validation# start validation" % (datetime.now())) 336 | query_batch = int(ceil(img_query.n_samples / self.val_batch_size)) 337 | print("%s #validation# totally %d query in %d batches" % 338 | (datetime.now(), img_query.n_samples, query_batch)) 339 | for i in range(query_batch): 340 | images, labels, codes = img_query.next_batch(self.val_batch_size) 341 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 342 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 343 | img_query.feed_batch_output(self.val_batch_size, output) 344 | print('Cosine Loss: %s' % loss) 345 | 346 | database_batch = int(ceil(img_database.n_samples / self.val_batch_size)) 347 | print("%s #validation# totally %d database in %d batches" % 348 | (datetime.now(), img_database.n_samples, database_batch)) 349 | for i in range(database_batch): 350 | images, labels, codes = img_database.next_batch(self.val_batch_size) 351 | 352 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 353 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 354 | img_database.feed_batch_output(self.val_batch_size, output) 355 | # print output[:10, :10] 356 | if i % 100 == 0: 357 | print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss)) 358 | 359 | self.update_codes_batch(img_query, self.code_batch_size) 360 | self.update_codes_batch(img_database, self.code_batch_size) 361 | 362 | print("%s #validation# calculating MAP@%d" % (datetime.now(), R)) 363 | C_tmp = self.sess.run(self.C) 364 | # save features and codes 365 | self.save_codes(img_database, img_query, C_tmp) 366 | 367 | mAPs = MAPs_CQ(C_tmp, self.subspace_num, self.subcenter_num, R) 368 | 369 | self.sess.close() 370 | return { 371 | 'i2i_nocq': mAPs.get_mAPs_by_feature(img_database, img_query), 372 | 'i2i_AQD': mAPs.get_mAPs_AQD(img_database, img_query), 373 | 'i2i_SQD': mAPs.get_mAPs_SQD(img_database, img_query) 374 | } 375 | 376 | 377 | def train(train_img, config): 378 | model = DQN(config) 379 | img_dataset = Dataset(train_img, config.output_dim, config.n_subspace * config.n_subcenter) 380 | model.train(img_dataset) 381 | return model.model_file 382 | 383 | 384 | def validation(database_img, query_img, config): 385 | model = DQN(config) 386 | img_database = Dataset(database_img, config.output_dim, config.n_subspace * config.n_subcenter) 387 | img_query = Dataset(query_img, config.output_dim, config.n_subspace * config.n_subcenter) 388 | return model.validation(img_query, img_database, config.R) 389 | -------------------------------------------------------------------------------- /model/dvsq.py: -------------------------------------------------------------------------------- 1 | ################################################################################# 2 | # Deep Visual-Semantic Quantization for Efficient Image Retrieval # 3 | # Authors: Yue Cao, Mingsheng Long, Jianmin Wang, Shichen Liu # 4 | # Contact: caoyue10@gmail.com # 5 | ################################################################################## 6 | 7 | import os 8 | import random 9 | import shutil 10 | import time 11 | from datetime import datetime 12 | from math import ceil 13 | 14 | import numpy as np 15 | import tensorflow as tf 16 | from sklearn.cluster import MiniBatchKMeans 17 | 18 | from architecture import img_alexnet_layers 19 | from evaluation import MAPs_CQ 20 | from data_provider.pq import Dataset 21 | from loss import cos_margin_multi_label_loss, pq_loss 22 | 23 | 24 | class DVSQ(object): 25 | def __init__(self, config): 26 | # Initialize setting 27 | print("initializing") 28 | np.set_printoptions(precision=4) 29 | self.stage = tf.placeholder_with_default(tf.constant(0), []) 30 | self.device = '/gpu:' + config.gpu_id 31 | self.output_dim = config.output_dim 32 | self.n_class = config.label_dim 33 | 34 | self.subspace_num = config.n_subspace 35 | self.subcenter_num = config.n_subcenter 36 | self.code_batch_size = config.code_batch_size 37 | self.cq_lambda = config.cq_lambda 38 | self.max_iter_update_Cb = config.max_iter_update_Cb 39 | self.max_iter_update_b = config.max_iter_update_b 40 | 41 | self.batch_size = config.batch_size 42 | self.val_batch_size = config.val_batch_size 43 | self.max_iter = config.max_iter 44 | self.network = config.network 45 | self.learning_rate = config.learning_rate 46 | self.learning_rate_decay_factor = config.learning_rate_decay_factor 47 | self.decay_step = config.decay_step 48 | 49 | self.finetune_all = config.finetune_all 50 | 51 | self.wordvec_dict = config.wordvec_dict 52 | 53 | self.model_file = os.path.join(config.save_dir, 'network_weights.npy') 54 | self.codes_file = os.path.join(config.save_dir, 'codes.npy') 55 | self.tflog_path = os.path.join(config.save_dir, 'tflog') 56 | 57 | # Setup session 58 | print("launching session") 59 | configProto = tf.ConfigProto() 60 | configProto.gpu_options.allow_growth = True 61 | configProto.allow_soft_placement = True 62 | self.sess = tf.Session(config=configProto) 63 | 64 | # Create variables and placeholders 65 | 66 | with tf.device(self.device): 67 | self.img = tf.placeholder(tf.float32, [None, 256, 256, 3]) 68 | self.img_label = tf.placeholder(tf.float32, [None, self.n_class]) 69 | 70 | self.network_weights = config.network_weights 71 | self.img_last_layer, self.deep_param_img, self.train_layers, self.train_last_layer = self.load_model() 72 | 73 | self.C = tf.Variable(tf.random_uniform([self.subspace_num * self.subcenter_num, self.output_dim], 74 | minval=-1, maxval=1, dtype=tf.float32, name='centers')) 75 | self.deep_param_img['C'] = self.C 76 | 77 | # Centers shared in different modalities (image & text) 78 | # Binary codes for different modalities (image & text) 79 | self.img_output_all = tf.placeholder(tf.float32, [None, self.output_dim]) 80 | self.img_b_all = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num]) 81 | 82 | self.b_img = tf.placeholder(tf.float32, [None, self.subspace_num * self.subcenter_num]) 83 | self.ICM_m = tf.placeholder(tf.int32, []) 84 | self.ICM_b_m = tf.placeholder(tf.float32, [None, self.subcenter_num]) 85 | self.ICM_b_all = tf.placeholder(tf.float32, [None, self.subcenter_num * self.subspace_num]) 86 | self.ICM_X = tf.placeholder(tf.float32, [self.code_batch_size, self.output_dim]) 87 | self.ICM_C_m = tf.slice(self.C, [self.ICM_m * self.subcenter_num, 0], [self.subcenter_num, self.output_dim]) 88 | self.ICM_X_residual = self.ICM_X - tf.matmul(self.ICM_b_all, self.C) + tf.matmul(self.ICM_b_m, self.ICM_C_m) 89 | ICM_X_expand = tf.expand_dims(self.ICM_X_residual, 1) # N * 1 * D 90 | ICM_C_m_expand = tf.expand_dims(self.ICM_C_m, 0) # 1 * M * D 91 | # N*sc*D * D*n 92 | self.wordvec = tf.constant(np.loadtxt(self.wordvec_dict), dtype=tf.float32) 93 | ICM_word_dict = tf.reshape(tf.matmul(tf.reshape( 94 | ICM_X_expand - ICM_C_m_expand, [self.code_batch_size * self.subcenter_num, self.output_dim]), 95 | tf.transpose(self.wordvec)), [self.code_batch_size, self.subcenter_num, self.n_class]) 96 | ICM_sum_squares = tf.reduce_sum( 97 | tf.square(ICM_word_dict), reduction_indices=2) 98 | ICM_best_centers = tf.argmin(ICM_sum_squares, 1) 99 | self.ICM_best_centers_one_hot = tf.one_hot( 100 | ICM_best_centers, self.subcenter_num, dtype=tf.float32) 101 | 102 | self.global_step = tf.Variable(0, trainable=False) 103 | self.train_op = self.apply_loss_function(self.global_step) 104 | self.sess.run(tf.global_variables_initializer()) 105 | 106 | if config.debug == True: 107 | from tensorflow.python import debug as tf_debug 108 | self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) 109 | 110 | def load_model(self): 111 | if self.network == 'alexnet': 112 | img_output = img_alexnet_layers( 113 | self.img, self.batch_size, self.output_dim, 114 | self.stage, self.network_weights, val_batch_size=self.val_batch_size) 115 | else: 116 | raise Exception('cannot use such CNN model as ' + self.network) 117 | return img_output 118 | 119 | def save_model(self, model_file=None): 120 | if model_file is None: 121 | model_file = self.model_file 122 | model = {} 123 | for layer in self.deep_param_img: 124 | model[layer] = self.sess.run(self.deep_param_img[layer]) 125 | print("saving model to %s" % model_file) 126 | folder = os.path.dirname(model_file) 127 | if os.path.exists(folder) is False: 128 | os.makedirs(folder) 129 | np.save(model_file, np.array(model)) 130 | return 131 | 132 | def load_codes(self, codes_file=None): 133 | if codes_file is None: 134 | codes_file = self.codes_file 135 | codes = np.load(codes_file).item() 136 | 137 | import collections 138 | mDataset = collections.namedtuple('Dataset', ['output', 'codes', 'label']) 139 | database = mDataset(codes['db_features'], codes['db_codes'], codes['db_label']) 140 | query = mDataset(codes['val_features'], codes['val_codes'], codes['val_label']) 141 | C = codes['C'] 142 | return database, query, C 143 | 144 | def save_codes(self, database, query, C, codes_file=None): 145 | if codes_file is None: 146 | codes_file = self.codes_file 147 | codes = { 148 | 'db_features': database.output, 149 | 'db_codes': database.codes, 150 | 'db_label': database.label, 151 | 'val_features': query.output, 152 | 'val_codes': query.codes, 153 | 'val_label': query.label, 154 | 'C': C, 155 | } 156 | print("saving codes to %s" % codes_file) 157 | np.save(codes_file, np.array(codes)) 158 | return 159 | 160 | def apply_loss_function(self, global_step): 161 | # loss function 162 | self.cos_loss = cos_margin_multi_label_loss(self.img_last_layer, self.img_label, self.wordvec, self.output_dim, soft=False) 163 | self.q_loss = self.cq_lambda * pq_loss(self.img_last_layer, self.b_img, self.C, self.wordvec) 164 | self.loss = self.cos_loss + self.q_loss 165 | 166 | # Last layer has a 10 times learning rate 167 | self.lr = tf.train.exponential_decay( 168 | self.learning_rate, global_step, self.decay_step, self.learning_rate_decay_factor, staircase=True) 169 | opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9) 170 | grads_and_vars = opt.compute_gradients( 171 | self.loss, self.train_layers + self.train_last_layer) 172 | fcgrad, _ = grads_and_vars[-2] 173 | fbgrad, _ = grads_and_vars[-1] 174 | 175 | # for debug 176 | self.grads_and_vars = grads_and_vars 177 | tf.summary.scalar('loss', self.loss) 178 | tf.summary.scalar('cosine_loss', self.cos_loss) 179 | tf.summary.scalar('quantization_loss', self.q_loss) 180 | tf.summary.scalar('lr', self.lr) 181 | self.merged = tf.summary.merge_all() 182 | 183 | if self.finetune_all: 184 | return opt.apply_gradients([(grads_and_vars[0][0], self.train_layers[0]), 185 | (grads_and_vars[1][0]*2, self.train_layers[1]), 186 | (grads_and_vars[2][0], self.train_layers[2]), 187 | (grads_and_vars[3][0]*2, self.train_layers[3]), 188 | (grads_and_vars[4][0], self.train_layers[4]), 189 | (grads_and_vars[5][0]*2, self.train_layers[5]), 190 | (grads_and_vars[6][0], self.train_layers[6]), 191 | (grads_and_vars[7][0]*2, self.train_layers[7]), 192 | (grads_and_vars[8][0], self.train_layers[8]), 193 | (grads_and_vars[9][0]*2, self.train_layers[9]), 194 | (grads_and_vars[10][0], self.train_layers[10]), 195 | (grads_and_vars[11][0]*2, self.train_layers[11]), 196 | (grads_and_vars[12][0], self.train_layers[12]), 197 | (grads_and_vars[13][0]*2, self.train_layers[13]), 198 | (fcgrad*10, self.train_last_layer[0]), 199 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 200 | else: 201 | return opt.apply_gradients([(fcgrad*10, self.train_last_layer[0]), 202 | (fbgrad*20, self.train_last_layer[1])], global_step=global_step) 203 | 204 | def initial_centers(self, img_output): 205 | C_init = np.zeros( 206 | [self.subspace_num * self.subcenter_num, self.output_dim]) 207 | print("#DVSQ train# initilizing Centers") 208 | all_output = img_output 209 | div = int(self.output_dim / self.subspace_num) 210 | for i in range(self.subspace_num): 211 | kmeans = MiniBatchKMeans(n_clusters=self.subcenter_num).fit( 212 | all_output[:, i * div: (i + 1) * div]) 213 | C_init[i * self.subcenter_num: (i + 1) * self.subcenter_num, i * div: (i + 1) * div] = kmeans.cluster_centers_ 214 | print("step: ", i, " finish") 215 | return C_init 216 | 217 | def update_centers(self, img_dataset): 218 | ''' 219 | Optimize: 220 | self.C = (U * hu^T + V * hv^T) (hu * hu^T + hv * hv^T)^{-1} 221 | self.C^T = (hu * hu^T + hv * hv^T)^{-1} (hu * U^T + hv * V^T) 222 | but all the C need to be replace with C^T : 223 | self.C = (hu * hu^T + hv * hv^T)^{-1} (hu^T * U + hv^T * V) 224 | ''' 225 | print("#DVSQ train# updating Centers") 226 | old_C_value = self.sess.run(self.C) 227 | 228 | h = self.img_b_all 229 | U = self.img_output_all 230 | smallResidual = tf.constant( 231 | np.eye(self.subcenter_num * self.subspace_num, dtype=np.float32) * 0.001) 232 | Uh = tf.matmul(tf.transpose(h), U) 233 | hh = tf.add(tf.matmul(tf.transpose(h), h), smallResidual) 234 | compute_centers = tf.matmul(tf.matrix_inverse(hh), Uh) 235 | 236 | update_C = self.C.assign(compute_centers) 237 | C_value = self.sess.run(update_C, feed_dict={ 238 | self.img_output_all: img_dataset.output, 239 | self.img_b_all: img_dataset.codes, 240 | }) 241 | 242 | C_sums = np.sum(np.square(C_value), axis=1) 243 | C_zeros_ids = np.where(C_sums < 1e-8) 244 | C_value[C_zeros_ids, :] = old_C_value[C_zeros_ids, :] 245 | self.sess.run(self.C.assign(C_value)) 246 | 247 | def update_codes_ICM(self, output, code): 248 | ''' 249 | Optimize: 250 | min || output - self.C * codes || 251 | min || output - codes * self.C || 252 | args: 253 | output: [n_train, n_output] 254 | self.C: [n_subspace * n_subcenter, n_output] 255 | [C_1, C_2, ... C_M] 256 | codes: [n_train, n_subspace * n_subcenter] 257 | ''' 258 | 259 | code = np.zeros(code.shape) 260 | 261 | for iterate in range(self.max_iter_update_b): 262 | 263 | sub_list = [i for i in range(self.subspace_num)] 264 | random.shuffle(sub_list) 265 | for m in sub_list: 266 | best_centers_one_hot_val = self.sess.run(self.ICM_best_centers_one_hot, feed_dict={ 267 | self.ICM_b_m: code[:, m * self.subcenter_num: (m + 1) * self.subcenter_num], 268 | self.ICM_b_all: code, 269 | self.ICM_m: m, 270 | self.ICM_X: output, 271 | }) 272 | 273 | code[:, m * self.subcenter_num: (m + 1) * 274 | self.subcenter_num] = best_centers_one_hot_val 275 | return code 276 | 277 | def update_codes_batch(self, dataset, batch_size): 278 | ''' 279 | update codes in batch size 280 | ''' 281 | total_batch = int(ceil(dataset.n_samples / batch_size)) 282 | dataset.finish_epoch() 283 | 284 | for i in range(total_batch): 285 | output_val, code_val = dataset.next_batch_output_codes(batch_size) 286 | codes_val = self.update_codes_ICM(output_val, code_val) 287 | dataset.feed_batch_codes(batch_size, codes_val) 288 | 289 | def train(self, img_dataset): 290 | print("%s #train# start training" % datetime.now()) 291 | epoch = 0 292 | epoch_iter = int(ceil(img_dataset.n_samples / self.batch_size)) 293 | 294 | # tensorboard 295 | if os.path.exists(self.tflog_path): 296 | shutil.rmtree(self.tflog_path) 297 | train_writer = tf.summary.FileWriter(self.tflog_path, self.sess.graph) 298 | 299 | for train_iter in range(self.max_iter): 300 | images, labels, codes = img_dataset.next_batch(self.batch_size) 301 | start_time = time.time() 302 | 303 | _, loss, output, summary = self.sess.run([self.train_op, self.loss, self.img_last_layer, self.merged], 304 | feed_dict={self.img: images, 305 | self.img_label: labels, 306 | self.b_img: codes}) 307 | 308 | img_dataset.feed_batch_output(self.batch_size, output) 309 | duration = time.time() - start_time 310 | 311 | # every epoch: update codes and centers 312 | if train_iter % (1 * epoch_iter) == 0 and train_iter != 0: 313 | if epoch == 0: 314 | with tf.device(self.device): 315 | for i in range(self.max_iter_update_Cb): 316 | self.sess.run(self.C.assign( 317 | self.initial_centers(img_dataset.output))) 318 | 319 | epoch = epoch + 1 320 | for i in range(self.max_iter_update_Cb): 321 | self.update_codes_batch(img_dataset, self.code_batch_size) 322 | self.update_centers(img_dataset) 323 | # self.sess.run(self.C.assign(self.initial_centers(img_dataset.output))) 324 | 325 | if train_iter % 1 == 0: 326 | train_writer.add_summary(summary, train_iter) 327 | print("%s #train# epoch %2d step %4d, loss = %.4f, %.1f sec/batch" 328 | % (datetime.now(), epoch, train_iter + 1, loss, duration)) 329 | 330 | print("%s #traing# finish training" % datetime.now()) 331 | self.save_model() 332 | print("model saved") 333 | 334 | self.sess.close() 335 | 336 | def validation(self, img_query, img_database, R=100): 337 | if os.path.exists(self.codes_file): 338 | print("loading ", self.codes_file) 339 | img_database, img_query, C_tmp = self.load_codes(self.codes_file) 340 | else: 341 | print("%s #validation# start validation" % (datetime.now())) 342 | query_batch = int(ceil(img_query.n_samples / self.val_batch_size)) 343 | print("%s #validation# totally %d query in %d batches" % 344 | (datetime.now(), img_query.n_samples, query_batch)) 345 | for i in range(query_batch): 346 | images, labels, codes = img_query.next_batch(self.val_batch_size) 347 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 348 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 349 | img_query.feed_batch_output(self.val_batch_size, output) 350 | print('Cosine Loss: %s' % loss) 351 | 352 | database_batch = int(ceil(img_database.n_samples / self.val_batch_size)) 353 | print("%s #validation# totally %d database in %d batches" % 354 | (datetime.now(), img_database.n_samples, database_batch)) 355 | for i in range(database_batch): 356 | images, labels, codes = img_database.next_batch(self.val_batch_size) 357 | 358 | output, loss = self.sess.run([self.img_last_layer, self.cos_loss], 359 | feed_dict={self.img: images, self.img_label: labels, self.stage: 1}) 360 | img_database.feed_batch_output(self.val_batch_size, output) 361 | # print output[:10, :10] 362 | if i % 100 == 0: 363 | print('Cosine Loss[%d/%d]: %s' % (i, database_batch, loss)) 364 | 365 | self.update_codes_batch(img_query, self.code_batch_size) 366 | self.update_codes_batch(img_database, self.code_batch_size) 367 | 368 | print("%s #validation# calculating MAP@%d" % (datetime.now(), R)) 369 | C_tmp = self.sess.run(self.C) 370 | # save features and codes 371 | self.save_codes(img_database, img_query, C_tmp) 372 | 373 | mAPs = MAPs_CQ(C_tmp, self.subspace_num, self.subcenter_num, R) 374 | 375 | self.sess.close() 376 | return { 377 | 'i2i_nocq': mAPs.get_mAPs_by_feature(img_database, img_query), 378 | 'i2i_AQD': mAPs.get_mAPs_AQD(img_database, img_query), 379 | 'i2i_SQD': mAPs.get_mAPs_SQD(img_database, img_query) 380 | } 381 | 382 | 383 | def train(train_img, config): 384 | model = DVSQ(config) 385 | img_dataset = Dataset(train_img, config.output_dim, config.n_subspace * config.n_subcenter) 386 | model.train(img_dataset) 387 | return model.model_file 388 | 389 | 390 | def validation(database_img, query_img, config): 391 | model = DVSQ(config) 392 | img_database = Dataset(database_img, config.output_dim, config.n_subspace * config.n_subcenter) 393 | img_query = Dataset(query_img, config.output_dim, config.n_subspace * config.n_subcenter) 394 | return model.validation(img_query, img_database, config.R) 395 | --------------------------------------------------------------------------------