├── Logger.py ├── OPTS.py ├── README.md ├── data ├── Youtube_ID.txt ├── audio_feature_extractor │ ├── librosa_feature.py │ └── youtube-8M_audio.py ├── mp4-to-mp3 │ └── mp4tomp3.py └── video_feature_extractor │ └── READ_ME.txt ├── embed_loss.py ├── embed_network_music.py ├── embed_network_video.py ├── embed_structure_loss.py ├── eval.py ├── figure ├── concept.JPG ├── demo.JPG ├── framework.JPG ├── retrieval_result.JPG └── video_sample.JPG ├── flip_gradient.py ├── network.py ├── network_structure.py ├── test.py └── train.py /Logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os.path 4 | from datetime import datetime 5 | 6 | class Logger: 7 | def __init__(self, path): 8 | if path.endswith('.txt'): 9 | file_path = path 10 | else: 11 | file_path = os.path.join(path,'log.txt') 12 | self.log_file = open(file_path,'a') 13 | 14 | def write(self, str): 15 | current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 16 | log_format = '[%s] %s'%(current_time, str) 17 | self.log_file.write(log_format + '\n') 18 | print (log_format) 19 | 20 | def __del__(self): 21 | self.log_file.close() -------------------------------------------------------------------------------- /OPTS.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | class OPTS: 4 | def __init__(self, name): 5 | self.__name = name 6 | 7 | def get_keys(self): 8 | return self.__dict__.keys() 9 | 10 | def assert_all_keys_valid(self): 11 | for k in self.get_keys(): 12 | try: 13 | assert self.__dict__[k] is not None 14 | except AssertionError: 15 | print('%s: Option "%s" is not valid'%(self.__name, k)) 16 | exit() 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | VM-NET in Tensorflow 2 | ==== 3 | 4 | Tensorflow implementation of [SSPP-DAN: Deep Domain Adaptation Network for Face Recognition with Single Sample Per Person](https://arxiv.org/abs/1704.06761) 5 | 6 | ![Alt text](./figure/concept.JPG) 7 | 8 | Demo 9 | ------------- 10 | [![Alt text](./figure/demo.JPG)](https://www.youtube.com/watch?v=ZyINqDMo3Fg) 11 | 12 | 13 | Prerequisites 14 | ------------- 15 | * [Python 2.7](https://www.python.org/downloads/) 16 | * [Tensorflow 0.12](https://www.tensorflow.org/versions/r0.12/) 17 | * [OpenCV 2.4.9](http://opencv.org/releases.html) 18 | * [NumPy](http://www.numpy.org/) 19 | * [SciPy](https://www.scipy.org/install.html) 20 | 21 | Usage 22 | ------------- 23 | ![Alt text](./figure/video_sample.JPG) 24 | First, download video dataset from "data/YoutubeID.txt." Then, extract audio features and video features from the videos. 25 | 26 | 27 | Network structure 28 | ------------- 29 | ![Alt text](./figure/framework.JPG) 30 | 31 | 32 | To train a model with downloaded dataset: 33 | ``` 34 | $ python train.py 35 | ``` 36 | 37 | To test with an existing model: 38 | ``` 39 | $ python test.py 40 | ``` 41 | 42 | Results 43 | ------------- 44 | ![Alt text](./figure/retrieval_result.JPG) 45 | 46 | -------------------------------------------------------------------------------- /data/audio_feature_extractor/librosa_feature.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | # We'll need numpy for some mathematical operations 4 | import numpy as np 5 | import scipy.io 6 | import pickle 7 | import os 8 | import librosa 9 | # And the display module for visualization 10 | import librosa.display 11 | 12 | # mel-spectrogram parameters 13 | # SR = 12000 14 | # N_FFT = 512 15 | # HOP_LEN = 256 16 | # DURA = 29.12 17 | 18 | def feature_extraction(indir, audio_name, ext, outdir, SR, N_FFT, HOP_LEN, DURA): 19 | 20 | # Load audio 21 | src, sr = librosa.load(indir+'/'+audio_name+ext, sr=SR) 22 | 23 | 24 | # Trim audio 25 | n_sample = src.shape[0] 26 | n_sample_wanted = int(DURA * SR) 27 | if n_sample < n_sample_wanted: # if too short 28 | src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,)))) 29 | elif n_sample > n_sample_wanted: # if too long 30 | src = src[(n_sample - n_sample_wanted) / 2:(n_sample + n_sample_wanted) / 2] 31 | 32 | 33 | # Perform harmonic percussive source separation (HSS) 34 | y_harmonic, y_percussive = librosa.effects.hpss(src) 35 | logam = librosa.logamplitude 36 | 37 | # for Spectral features 38 | for i in range(2): 39 | if i == 0: 40 | y = y_harmonic 41 | else: 42 | y = y_percussive 43 | 44 | fv = logam(librosa.feature.chroma_stft(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 45 | if i == 0: 46 | fv_total = fv 47 | else: 48 | fv_total = np.vstack((fv_total, fv)) 49 | 50 | fv = logam(librosa.feature.chroma_cens(y=y, sr=SR, hop_length=HOP_LEN), ref_power=np.max) 51 | fv_total = np.vstack((fv_total, fv)) 52 | 53 | fv = logam(librosa.feature.melspectrogram(y=y, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=96), ref_power=np.max) 54 | fv_total = np.vstack((fv_total, fv)) 55 | 56 | fv_mfcc = librosa.feature.mfcc(y=y, sr=SR, hop_length=HOP_LEN) 57 | fv = logam(fv_mfcc, ref_power=np.max) 58 | fv_total = np.vstack((fv_total, fv)) 59 | fv = logam(librosa.feature.delta(fv_mfcc), ref_power=np.max) 60 | fv_total = np.vstack((fv_total, fv)) 61 | fv = logam(librosa.feature.delta(fv_mfcc, order=2), ref_power=np.max) 62 | fv_total = np.vstack((fv_total, fv)) 63 | 64 | fv = logam(librosa.feature.rmse(y=y, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 65 | fv_total = np.vstack((fv_total, fv)) 66 | 67 | fv = logam(librosa.feature.spectral_centroid(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 68 | fv_total = np.vstack((fv_total, fv)) 69 | 70 | fv = logam(librosa.feature.spectral_bandwidth(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 71 | fv_total = np.vstack((fv_total, fv)) 72 | 73 | fv = logam(librosa.feature.spectral_rolloff(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 74 | fv_total = np.vstack((fv_total, fv)) 75 | 76 | fv = logam(librosa.feature.poly_features(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max) 77 | fv_total = np.vstack((fv_total, fv)) 78 | 79 | fv = logam(librosa.feature.poly_features(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, order=2), ref_power=np.max) 80 | fv_total = np.vstack((fv_total, fv)) 81 | 82 | fv = logam(librosa.feature.zero_crossing_rate(y=y, hop_length=HOP_LEN, frame_length=N_FFT), ref_power=np.max) 83 | fv_total = np.vstack((fv_total, fv)) 84 | 85 | # Feature aggregation 86 | fv_mean = np.mean(fv_total, axis=1) 87 | fv_var = np.var(fv_total, axis=1) 88 | fv_amax = np.amax(fv_total, axis=1) 89 | fv_aggregated = np.hstack((fv_mean, fv_var)) 90 | fv_aggregated = np.hstack((fv_aggregated, fv_amax)) 91 | 92 | # # for tempo features 93 | # tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=SR) 94 | # fv_aggregated = np.hstack((fv_aggregated, tempo)) 95 | 96 | # # for Rhythm features 97 | # oenv = librosa.onset.onset_strength(y=y_percussive, sr=SR) 98 | # tempo = librosa.feature.tempogram(onset_envelope=oenv, sr=SR) 99 | # tempo_mean = np.mean(tempo, axis=1) 100 | # tempo_var = np.var(tempo, axis=1) 101 | # tempo_amax = np.amax(tempo, axis=1) 102 | # tempo_aggregated = np.hstack((tempo_mean, tempo_var)) 103 | # tempo_aggregated = np.hstack((tempo_aggregated, tempo_amax)) 104 | 105 | # for pickle 106 | pklName = outdir + "/" + audio_name + '.pkl' 107 | f = open(pklName, 'wb') 108 | pickle.dump(fv_aggregated, f) 109 | f.close() 110 | 111 | 112 | print(audio_name) 113 | -------------------------------------------------------------------------------- /data/audio_feature_extractor/youtube-8M_audio.py: -------------------------------------------------------------------------------- 1 | from sys import argv # allows user to specify input and output directories 2 | import os # help with file handling 3 | import librosa_feature 4 | 5 | SR = 12000 6 | N_FFT = 512 7 | HOP_LEN = 256 8 | DURA = 29.12 9 | 10 | def check_file_exists(directory, filename, extension): 11 | path = directory + "/" + filename + extension 12 | return os.path.isfile(path) 13 | 14 | 15 | def main(indir, ext, outdir): 16 | try: 17 | # check specified folders exist 18 | if not os.path.exists(indir): 19 | exit("Error: Input directory \'" + indir + "\' does not exist. (try prepending './')") 20 | if not os.path.exists(outdir): 21 | exit("Error: Output directory \'" + outdir + "\' does not exist.") 22 | if not os.access(outdir, os.W_OK): 23 | exit("Error: Output directory \'" + outdir + "\' is not writeable.") 24 | 25 | print "[%s/*.mp3] --> [%s/feature]" % (indir, outdir) 26 | files = [] # files for exporting 27 | 28 | # get a list of all convertible files in the input directory 29 | filelist = [f for f in os.listdir(indir) if f.endswith(ext)] 30 | for path in filelist: 31 | basename = os.path.basename(path) 32 | filename = os.path.splitext(basename)[0] 33 | files.append(filename) 34 | # remove files that have already been outputted from the list 35 | files[:] = [f for f in files if not check_file_exists(outdir + "/pickle", f, ".pkl")] 36 | files[:] = [f for f in files if not check_file_exists(outdir + "/matlab", f, ".mat")] 37 | except OSError as e: 38 | exit(e) 39 | 40 | if len(files) == 0: 41 | exit("Could not find any files to convert that have not already been converted.") 42 | 43 | # convert all unconverted files 44 | for filename in files: 45 | librosa_feature.feature_extraction(indir, filename, ext, outdir, SR, N_FFT, HOP_LEN, DURA) 46 | 47 | 48 | # # set the default directories and try to get input directories 49 | # args = [".", "."] 50 | # for i in range(1, min(len(argv), 3)): 51 | # args[i - 1] = argv[i] 52 | # 53 | # # if only input directory is set, make the output directory the same 54 | # if len(argv) == 2: 55 | # args[1] = args[0] 56 | 57 | ext = '.mp3' 58 | indir = '/media/mass/D/wbim/project/youtube-8m_general/audio' 59 | outdir = '/media/mass/D/wbim/project/youtube-8m_general/ge_audio_feature' 60 | 61 | main(indir, ext, outdir) 62 | -------------------------------------------------------------------------------- /data/mp4-to-mp3/mp4tomp3.py: -------------------------------------------------------------------------------- 1 | # MP4 TO MP3 CONVERSION SCRIPT 2 | # script to convert mp4 video files to mp3 audio 3 | # useful for turning video from sites such as www.ted.com into audio files useable 4 | # on any old mp3 player. 5 | # 6 | # usage: python mp4tomp3.py [input directory [output directory]] 7 | # input directory (optional) - set directory containing mp4 files to convert (defaults to current folder) 8 | # output directory (optional) - set directory to export mp3 files to (defaults to input) 9 | # 10 | # NOTE: you will need python 2, mplayer and lame for this script to work 11 | # sudo apt-get install lame 12 | # sudo apt-get install mplayer 13 | # sudo apt-get install python2.7 14 | 15 | 16 | from subprocess import call # for calling mplayer and lame 17 | from sys import argv # allows user to specify input and output directories 18 | import os # help with file handling 19 | import pickle 20 | 21 | def check_file_exists(directory, filename, extension): 22 | path = directory + "/" + filename + extension 23 | return os.path.isfile(path) 24 | 25 | def main(indir, outdir): 26 | 27 | 28 | try: 29 | # get a list of all convertible files in the input directory 30 | 31 | files = [] 32 | filelist = [ indir + "/" + f + ".mp4" for f in list_mv ] 33 | for path in filelist: 34 | basename = os.path.basename(path) 35 | filename = os.path.splitext(basename)[0] 36 | files.append(filename) 37 | # remove files that have already been outputted from the list 38 | files[:] = [f for f in files if not check_file_exists(outdir, f, ".mp3")] 39 | except OSError as e: 40 | exit(e) 41 | 42 | if len(files) == 0: 43 | exit("Could not find any files to convert that have not already been converted.") 44 | 45 | # convert all unconverted files 46 | for filename in files: 47 | print "-- converting %s.mp4 to %s.mp3 --" % (indir + "/" + filename, outdir + "/" + filename) 48 | call(["mplayer", "-novideo", "-nocorrect-pts", "-ao", "pcm:waveheader", indir + "/" + filename + ".mp4"]) 49 | call(["lame", "-h", "-b", "128", "audiodump.wav", outdir + "/" + filename + ".mp3"]) 50 | try: 51 | os.remove("audiodump.wav") 52 | except: 53 | continue 54 | 55 | # set the default directories and try to get input directories 56 | args = [".", "."] 57 | for i in range(1, min(len(argv), 3)): 58 | args[i - 1] = argv[i] 59 | 60 | # if only input directory is set, make the output directory the same 61 | if len(argv) == 2: 62 | args[1] = args[0] 63 | 64 | f = open('no_list.pkl', 'rb') 65 | list_mv = pickle.load(f) 66 | 67 | indir = '/media/mass/D/wbim/project/youtube-8m-music-video/video' 68 | outdir = '/home/csehong/PycharmProjects/audio_feature_extraction/audio' 69 | 70 | main(indir, outdir) 71 | -------------------------------------------------------------------------------- /data/video_feature_extractor/READ_ME.txt: -------------------------------------------------------------------------------- 1 | We extract frame-level features using an Inception network trained on the large-scale ImageNet dataset following starter code provided by YouTube-8M dataset: https://github.com/google/youtube-8m#video-level-models 2 | 3 | You can also download the frame-level features from https://research.google.com/youtube8m/download.html 4 | 5 | -------------------------------------------------------------------------------- /embed_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | import OPTS 7 | 8 | class Triplet: 9 | class OPTS(OPTS.OPTS): 10 | class DIST: 11 | L2 = 0 12 | COS = 1 13 | 14 | def __init__(self): 15 | OPTS.OPTS.__init__(self,'Triplet OPTS') 16 | self.network_name = None 17 | self.margin = 0.1 18 | self.distance = self.DIST.L2 19 | self.is_l2_norm_already = True 20 | self.num_max_pos_pair = 1 21 | 22 | def __init__(self, opts=None): 23 | if opts is None: 24 | opts = self.OPTS() 25 | self.opts = opts 26 | self.opts.assert_all_keys_valid() 27 | 28 | def construct(self, embed_x, embed_y, aff_xy, K, name): 29 | with tf.variable_scope(name): 30 | if not self.opts.is_l2_norm_already: 31 | embed_x = tf.nn.l2_normalize(embed_x, 1) 32 | embed_y = tf.nn.l2_normalize(embed_y, 1) 33 | 34 | # convert simliarity to distance by (-1) >> high value indicates the distance between the samples is long 35 | dist_xy = (-1) *tf.matmul(embed_x, embed_y,transpose_a = False, transpose_b = True) 36 | self.dist_xy = dist_xy 37 | 38 | dist_pos_pair = tf.select(aff_xy, dist_xy, tf.ones_like(dist_xy,dtype=tf.float32)*(-1e+6)) 39 | dist_neg_pair = tf.select(tf.logical_not(aff_xy), dist_xy, tf.ones_like(dist_xy,dtype=tf.float32)*(1e+6)) 40 | 41 | # for top violating postive samples 42 | top_k_pos_pair_xy, _ = tf.nn.top_k(dist_pos_pair, k=self.opts.num_max_pos_pair) 43 | top_k_pos_pair_yx, _ = tf.nn.top_k(tf.transpose(dist_pos_pair),k=self.opts.num_max_pos_pair) 44 | top_k_pos_pair_yx = tf.transpose(top_k_pos_pair_yx) 45 | 46 | # for top violating negative samples 47 | top_k_neg_pair_xy, _ = tf.nn.top_k(tf.negative(dist_neg_pair),k=K) 48 | top_k_neg_pair_xy = tf.negative(top_k_neg_pair_xy) 49 | top_k_neg_pair_yx, _ = tf.nn.top_k(tf.transpose(tf.negative(dist_neg_pair)),k=K) 50 | top_k_neg_pair_yx = tf.negative(tf.transpose(top_k_neg_pair_yx)) 51 | 52 | top_k_pos_pair_xy = tf.tile(top_k_pos_pair_xy, [1,K]) 53 | top_k_pos_pair_yx = tf.tile(top_k_pos_pair_yx, [K,1]) 54 | shape_xy = tf.shape(aff_xy) 55 | 56 | top_k_pos_pair_xy = tf.reshape(top_k_pos_pair_xy, [shape_xy[0],K,-1]) 57 | top_k_pos_pair_yx = tf.reshape(top_k_pos_pair_yx, [-1,K,shape_xy[1]]) 58 | 59 | loss_xy = tf.maximum(0.,self.opts.margin + top_k_pos_pair_xy - tf.expand_dims(top_k_neg_pair_xy,2)) 60 | loss_yx = tf.maximum(0.,self.opts.margin + top_k_pos_pair_yx - tf.expand_dims(top_k_neg_pair_yx,0)) 61 | 62 | loss_xy = tf.reduce_mean(tf.reshape(loss_xy,[-1])) 63 | loss_yx = tf.reduce_mean(tf.reshape(loss_yx,[-1])) 64 | 65 | # xy_nonzero = tf.reduce_sum(tf.cast(tf.greater(loss_xy, 0.), tf.float32)) + 1e-13 66 | # yx_nonzero = tf.reduce_sum(tf.cast(tf.greater(loss_yx, 0.), tf.float32)) + 1e-13 67 | # 68 | # loss_xy = loss_xy / xy_nonzero 69 | # loss_yx = loss_yx / yx_nonzero 70 | 71 | return loss_xy, loss_yx 72 | -------------------------------------------------------------------------------- /embed_network_music.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | import OPTS 7 | 8 | 9 | class Music_Model: 10 | class OPTS(OPTS.OPTS): 11 | def __init__(self): 12 | OPTS.OPTS.__init__(self, 'Music_Model OPTS') 13 | self.network_name = None 14 | self.num_layer = 2 15 | 16 | def __init__(self, opts=None): 17 | if opts is None: 18 | opts = self.OPTS() 19 | self.opts = opts 20 | self.opts.assert_all_keys_valid() 21 | self.__is_constructed = False 22 | self.is_training = True # move into opts!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 23 | 24 | 25 | def construct(self, x_data, keep_prob, is_linear = False, is_training=None): 26 | with tf.variable_scope(self.opts.network_name): 27 | self.is_training = is_training 28 | self.fc1 = self.fc_layer(x_data, 2048, name='fc1') 29 | self.relu1 = tf.nn.relu(self.fc1, name='relu1') 30 | if (is_linear == False): 31 | self.relu1_drop = tf.nn.dropout(self.relu1, keep_prob=keep_prob, name='relu1_drop') 32 | else: 33 | self.relu1_drop = tf.nn.dropout(self.fc1, keep_prob=keep_prob, name='fc1_drop') 34 | 35 | self.fc2 = self.fc_layer(self.relu1_drop, 1024, name='fc2') 36 | self.relu2 = tf.nn.relu(self.fc2, name='relu2') 37 | if (is_linear == False): 38 | self.relu2_drop = tf.nn.dropout(self.relu2, keep_prob=keep_prob, name='relu2_drop') 39 | else: 40 | self.relu2_drop = tf.nn.dropout(self.fc2, keep_prob=keep_prob, name='fc2_drop') 41 | 42 | if (self.opts.num_layer == 3): # We only consier that the number of layers for music is only 3... 43 | self.relu_last = self.relu2_drop 44 | else: 45 | self.fc3 = self.fc_layer(self.relu2_drop, 1024, name='fc3') 46 | self.relu3 = tf.nn.relu(self.fc3, name='relu3') 47 | if (is_linear == False): 48 | self.relu3_drop = tf.nn.dropout(self.relu3, keep_prob=keep_prob, name='relu3_drop') 49 | else: 50 | self.relu3_drop = tf.nn.dropout(self.fc3, keep_prob=keep_prob, name='fc3_drop') 51 | 52 | self.fc4 = self.fc_layer(self.relu3_drop, 512, name='fc4') 53 | self.relu4 = tf.nn.relu(self.fc4, name='relu4') 54 | if (is_linear == False): 55 | self.relu4_drop = tf.nn.dropout(self.relu4, keep_prob=keep_prob, name='relu4_drop') #orignally I mistake relu4_drop -> relu_3_drop... 56 | else: 57 | self.relu4_drop = tf.nn.dropout(self.fc4, keep_prob=keep_prob, name='fc4_drop') 58 | 59 | if (self.opts.num_layer == 1): 60 | self.relu_last = x_data 61 | elif (self.opts.num_layer == 2): 62 | self.relu_last = self.relu1_drop 63 | elif (self.opts.num_layer == 4): 64 | self.relu_last = self.relu3_drop 65 | elif (self.opts.num_layer == 5): 66 | self.relu_last = self.relu4_drop 67 | else: 68 | print ("invalid layer, num layer should be 2~5") 69 | return 70 | 71 | self.fc5 = self.fc_layer(self.relu_last, 512, name='fc5') 72 | # self.bn = self.batch_normalization(self.fc2, 512, name='batch_normalize') 73 | self.bn = tf.contrib.layers.batch_norm(inputs=self.fc5, decay=0.99, center=True, scale=True, epsilon=1e-3, 74 | is_training=self.is_training, scope='BN') 75 | self.l2_norm = tf.nn.l2_normalize(self.bn, dim=1, name='l2_normalize') 76 | return self.l2_norm 77 | 78 | def fc_layer(self, bottom, output_dim, name, reuse=False): 79 | with tf.variable_scope(name, reuse=reuse): 80 | shape = bottom.get_shape().as_list() 81 | dim = 1 82 | for d in shape[1:]: 83 | dim *= d 84 | x = tf.reshape(bottom, [-1, dim]) 85 | 86 | weights = tf.get_variable(name='W', shape=[dim, output_dim], 87 | initializer=self.initializeWeight(shape=[dim, output_dim])) 88 | # fc = tf.matmul(x, weights) # to avoid conflict from beta of batch normalization 89 | bias = tf.get_variable(name='b', shape=[output_dim], 90 | initializer=tf.constant_initializer(value=0., dtype=tf.float32)) 91 | fc = tf.nn.xw_plus_b(x, weights, bias, name="fc") 92 | return fc 93 | 94 | # http://r2rt.com/implementing-batch-normalization-in-tensorflow.html 95 | def batch_normalization(self, x, dim, name, reuse=False): 96 | with tf.variable_scope(name): 97 | batch_mean, batch_var = tf.nn.moments(x, [0]) 98 | 99 | scale = tf.get_variable(name='scale', shape=[dim], 100 | initializer=tf.constant_initializer(value=1, dtype=tf.float32)) 101 | beta = tf.get_variable(name='beta', shape=[dim], 102 | initializer=tf.constant_initializer(value=0, dtype=tf.float32)) 103 | 104 | BN = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, 1e-3) 105 | return BN 106 | 107 | # xavier 108 | def initializeWeight(self, shape, method='xavier2'): 109 | inputsize = reduce(lambda x, y: x * y, shape[0:-1]) 110 | if method == 'xavier2': 111 | stddev = np.sqrt(2.0 / (inputsize)) 112 | else: 113 | outsize = shape[-1] 114 | stddev = np.sqrt(3.0 / (inputsize + outsize)) 115 | return tf.truncated_normal_initializer(stddev=stddev) 116 | -------------------------------------------------------------------------------- /embed_network_video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | import OPTS 7 | 8 | 9 | class Video_Model: 10 | class OPTS(OPTS.OPTS): 11 | def __init__(self): 12 | OPTS.OPTS.__init__(self, 'Video_Model OPTS') 13 | self.network_name = None 14 | self.num_layer = 2 15 | 16 | def __init__(self, opts=None): 17 | if opts is None: 18 | opts = self.OPTS() 19 | self.opts = opts 20 | self.opts.assert_all_keys_valid() 21 | self.__is_constructed = False 22 | self.is_training = True 23 | 24 | 25 | def construct(self, x_data, keep_prob, is_linear = False, is_training=None): 26 | with tf.variable_scope(self.opts.network_name): 27 | self.is_training = is_training 28 | self.fc1 = self.fc_layer(x_data, 2048, name='fc1') 29 | self.relu1 = tf.nn.relu(self.fc1, name='relu1') 30 | if(is_linear==False): 31 | self.relu1_drop = tf.nn.dropout(self.relu1, keep_prob=keep_prob, name='relu1_drop') 32 | else: 33 | self.relu1_drop = tf.nn.dropout(self.fc1, keep_prob=keep_prob, name='fc1_drop') 34 | 35 | self.fc2 = self.fc_layer(self.relu1_drop, 1024, name='fc2') 36 | self.relu2 = tf.nn.relu(self.fc2, name='relu2') 37 | if (is_linear == False): 38 | self.relu2_drop = tf.nn.dropout(self.relu2, keep_prob=keep_prob, name='relu2_drop') 39 | else: 40 | self.relu2_drop = tf.nn.dropout(self.fc2, keep_prob=keep_prob, name='fc2_drop') 41 | 42 | self.fc3 = self.fc_layer(self.relu2_drop, 1024, name='fc3') 43 | self.relu3 = tf.nn.relu(self.fc3, name='relu3') 44 | if (is_linear == False): 45 | self.relu3_drop = tf.nn.dropout(self.relu3, keep_prob=keep_prob, name='relu3_drop') 46 | else: 47 | self.relu3_drop = tf.nn.dropout(self.fc3, keep_prob=keep_prob, name='fc3_drop') 48 | 49 | self.fc4 = self.fc_layer(self.relu3_drop, 512, name='fc4') 50 | self.relu4 = tf.nn.relu(self.fc4, name='relu4') 51 | if (is_linear == False): 52 | self.relu4_drop = tf.nn.dropout(self.relu4, keep_prob=keep_prob, name='relu4_drop') #orignally I mistake relu4_drop -> relu_3_drop... 53 | else: 54 | self.relu4_drop = tf.nn.dropout(self.fc4, keep_prob=keep_prob, name='fc4_drop') 55 | 56 | 57 | if (self.opts.num_layer == 1): 58 | self.relu_last = x_data 59 | elif (self.opts.num_layer == 2): 60 | self.relu_last = self.relu1_drop 61 | elif (self.opts.num_layer == 3): 62 | self.relu_last = self.relu2_drop 63 | elif (self.opts.num_layer == 4): 64 | self.relu_last = self.relu3_drop 65 | elif (self.opts.num_layer == 5): 66 | self.relu_last = self.relu4_drop 67 | else: 68 | print ("invalid layer, num layer should be 2~5") 69 | return 70 | 71 | self.fc5 = self.fc_layer(self.relu_last , 512, name='fc5') 72 | # self.bn = self.batch_normalization(self.fc2, 512, name='batch_normalize') 73 | self.bn = tf.contrib.layers.batch_norm(inputs=self.fc5, decay=0.99, center=True, scale=True, epsilon=1e-3, 74 | is_training=self.is_training, scope='BN') 75 | self.l2_norm = tf.nn.l2_normalize(self.bn, dim=1, name='l2_normalize') 76 | return self.l2_norm 77 | 78 | def fc_layer(self, bottom, output_dim, name, reuse=False): 79 | with tf.variable_scope(name, reuse=reuse): 80 | shape = bottom.get_shape().as_list() 81 | dim = 1 82 | for d in shape[1:]: 83 | dim *= d 84 | x = tf.reshape(bottom, [-1, dim]) 85 | 86 | weights = tf.get_variable(name='W', shape=[dim, output_dim], 87 | initializer=self.initializeWeight(shape=[dim, output_dim])) 88 | # fc = tf.matmul(x, weights) # to avoid conflict from beta of batch normalization 89 | bias = tf.get_variable(name='b', shape=[output_dim], 90 | initializer=tf.constant_initializer(value=0., dtype=tf.float32)) 91 | fc = tf.nn.xw_plus_b(x, weights, bias, name="fc") 92 | return fc 93 | 94 | # http://r2rt.com/implementing-batch-normalization-in-tensorflow.html 95 | def batch_normalization(self, x, dim, name, reuse=False): 96 | with tf.variable_scope(name): 97 | batch_mean, batch_var = tf.nn.moments(x, [0]) 98 | 99 | scale = tf.get_variable(name='scale', shape=[dim], 100 | initializer=tf.constant_initializer(value=1, dtype=tf.float32)) 101 | beta = tf.get_variable(name='beta', shape=[dim], 102 | initializer=tf.constant_initializer(value=0, dtype=tf.float32)) 103 | 104 | BN = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, 1e-3) 105 | return BN 106 | 107 | # xavier 108 | def initializeWeight(self, shape, method='xavier2'): 109 | inputsize = reduce(lambda x, y: x * y, shape[0:-1]) 110 | if method == 'xavier2': 111 | stddev = np.sqrt(2.0 / (inputsize)) 112 | else: 113 | outsize = shape[-1] 114 | stddev = np.sqrt(3.0 / (inputsize + outsize)) 115 | return tf.truncated_normal_initializer(stddev=stddev) 116 | -------------------------------------------------------------------------------- /embed_structure_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | import random 6 | import OPTS 7 | 8 | 9 | 10 | 11 | class Triplet_Structure: 12 | class OPTS(OPTS.OPTS): 13 | class DIST: 14 | L2 = 0 15 | COS = 1 16 | 17 | def __init__(self): 18 | OPTS.OPTS.__init__(self,'Triplet Structure_OPTS') 19 | self.network_name = None 20 | self.margin = 0.1 21 | self.distance = self.DIST.L2 22 | self.is_l2_norm_already = True 23 | self.num_max_pos_pair = 1 24 | 25 | def __init__(self, opts=None): 26 | if opts is None: 27 | opts = self.OPTS() 28 | self.opts = opts 29 | self.opts.assert_all_keys_valid() 30 | 31 | def construct(self, low_feature, embed_feature, K, name): 32 | with tf.variable_scope(name): 33 | 34 | low_feature = tf.nn.l2_normalize(low_feature, 1) 35 | 36 | 37 | # convert simliarity to distance by (-1) >> high value indicates the distance between the samples is long 38 | dist_low = (-1) * tf.matmul(low_feature, low_feature, transpose_a=False, transpose_b=True) 39 | dist_embed = (-1) * tf.matmul(embed_feature, embed_feature, transpose_a=False, transpose_b=True) 40 | 41 | # d1_low = tf.slice(dist_low, [2, 0], [K, 1]) 42 | # d2_low = tf.slice(dist_low, [2, 1], [K, 1]) 43 | # d1_embed = tf.slice(dist_embed, [2, 0], [K, 1]) 44 | # d2_embed = tf.slice(dist_embed, [2, 1], [K, 1]) 45 | # coeff = tf.sign(d1_low - d2_low) - tf.sign(d1_embed - d2_embed) 46 | # dist_inverse = (d2_embed - d1_embed) # -1 * (d1_embed - d2_embed) 47 | # dist_tensor = tf.multiply(coeff, dist_inverse) 48 | # loss_structure = tf.reduce_mean(dist_tensor) 49 | 50 | d1_low = tf.slice(dist_low, [0, 0], [K, K-1]) 51 | d2_low = tf.slice(dist_low, [0, 1], [K, K-1]) 52 | d1_embed = tf.slice(dist_embed, [0, 0], [K, K-1]) 53 | d2_embed = tf.slice(dist_embed, [0, 1], [K, K-1]) 54 | coeff = tf.sign(d1_low - d2_low) - tf.sign(d1_embed - d2_embed) 55 | dist_inverse = (d2_embed - d1_embed) # -1 * (d1_embed - d2_embed) 56 | dist_tensor = tf.multiply(coeff, dist_inverse) 57 | loss_structure = tf.reduce_mean(dist_tensor) 58 | 59 | 60 | return loss_structure 61 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | import OPTS 5 | 6 | class Recall: 7 | class OPTS(OPTS.OPTS): 8 | class DIST: 9 | L2 = 0 10 | COS = 1 11 | def __init__(self): 12 | OPTS.OPTS.__init__(self,'Recall OPTS') 13 | self.network_name = None 14 | self.distance = self.DIST.COS 15 | 16 | def __init__(self, opts=None): 17 | if opts is None: 18 | opts = self.OPTS() 19 | self.opts = opts 20 | self.opts.assert_all_keys_valid() 21 | 22 | def construct(self, embed_x, embed_y, aff_xy, K_list): 23 | embed_x = tf.nn.l2_normalize(embed_x, 1) 24 | embed_y = tf.nn.l2_normalize(embed_y, 1) 25 | 26 | dist_xy = tf.matmul(embed_x, embed_y,transpose_a = False, transpose_b = True) 27 | 28 | self.to_debug = [] 29 | 30 | shape_xy = tf.shape(aff_xy) 31 | recall_k_xy = [] 32 | recall_k_yx = [] 33 | recall_k_xy_idx = 0 34 | recall_k_yx_idx = 0 35 | for K in K_list: 36 | indice_mat_xy = tf.tile(tf.expand_dims(tf.range(0,shape_xy[0]),1),[1,K]) 37 | indice_mat_xy = tf.expand_dims(indice_mat_xy, dim=1) 38 | indice_mat_yx = tf.tile(tf.expand_dims(tf.range(0,shape_xy[1]),1),[1,K]) 39 | indice_mat_yx = tf.expand_dims(indice_mat_yx, dim=1) 40 | 41 | _, xy_idx = tf.nn.top_k(dist_xy,k=K) 42 | _, yx_idx = tf.nn.top_k(tf.transpose(dist_xy),k=K) 43 | 44 | # variable for recall result 45 | recall_k_xy_idx = xy_idx 46 | recall_k_yx_idx = yx_idx 47 | 48 | xy_idx = tf.expand_dims(xy_idx, dim=1) 49 | yx_idx = tf.expand_dims(yx_idx, dim=1) 50 | xy_idx = tf.concat(1, [indice_mat_xy,xy_idx]) 51 | yx_idx = tf.concat(1, [yx_idx,indice_mat_yx]) 52 | 53 | xy_idx = tf.transpose(xy_idx, perm=[0,2,1]) 54 | yx_idx = tf.transpose(yx_idx, perm=[0,2,1]) 55 | 56 | xy_search = tf.cast(tf.gather_nd(aff_xy, xy_idx),dtype=tf.float32) 57 | yx_search = tf.cast(tf.gather_nd(aff_xy, yx_idx),dtype=tf.float32) 58 | 59 | xy_result = tf.cast(tf.not_equal(tf.reduce_sum(xy_search,1), 0.),tf.float32) 60 | yx_result = tf.cast(tf.not_equal(tf.reduce_sum(yx_search,1), 0.),tf.float32) 61 | 62 | recall_k_xy.append(tf.reduce_mean(xy_result)) 63 | recall_k_yx.append(tf.reduce_mean(yx_result)) 64 | 65 | return recall_k_xy, recall_k_yx, recall_k_xy_idx, recall_k_yx_idx 66 | 67 | 68 | if __name__ == '__main__': 69 | rec_opts = Recall.OPTS() 70 | rec_opts.network_name = 'Test_Object' 71 | 72 | recall = Recall(rec_opts) 73 | 74 | embed_x = np.random.normal(size=[20,10]) 75 | embed_y = np.random.normal(size=[20,10]) 76 | aff_xy = np.eye(20,20) 77 | K_list = [3] 78 | r_xy, r_yx = recall.construct(embed_x, embed_y, aff_xy, K_list) 79 | import scipy.io 80 | with tf.Session() as sess: 81 | sess.run(tf.global_variables_initializer()) 82 | d, r1, r2 = sess.run([recall.to_debug, r_xy, r_yx]) 83 | print r1, r2 84 | scipy.io.savemat('recall_test.mat',mdict={'d':d}) 85 | 86 | -------------------------------------------------------------------------------- /figure/concept.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/concept.JPG -------------------------------------------------------------------------------- /figure/demo.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/demo.JPG -------------------------------------------------------------------------------- /figure/framework.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/framework.JPG -------------------------------------------------------------------------------- /figure/retrieval_result.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/retrieval_result.JPG -------------------------------------------------------------------------------- /figure/video_sample.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/video_sample.JPG -------------------------------------------------------------------------------- /flip_gradient.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | 4 | class FlipGradientBuilder(object): 5 | def __init__(self): 6 | self.num_calls = 0 7 | 8 | def __call__(self, x, l=1.0): 9 | grad_name = "FlipGradient%d" % self.num_calls 10 | @ops.RegisterGradient(grad_name) 11 | def _flip_gradients(op, grad): 12 | return [tf.neg(grad) * l] 13 | 14 | g = tf.get_default_graph() 15 | with g.gradient_override_map({"Identity": grad_name}): 16 | y = tf.identity(x) 17 | 18 | self.num_calls += 1 19 | return y 20 | 21 | flip_gradient = FlipGradientBuilder() 22 | 23 | 24 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | from flip_gradient import * 7 | import embed_network as en 8 | import embed_network_music as enm 9 | import embed_network_video as env 10 | import embed_loss as el 11 | import eval 12 | 13 | import OPTS 14 | 15 | 16 | class Model: 17 | class OPTS(OPTS.OPTS): 18 | def __init__(self): 19 | OPTS.OPTS.__init__(self, 'Model OPTS') 20 | self.network_name = None 21 | self.x_dim = None 22 | self.y_dim = None 23 | self.x_num_layer = 2 24 | self.y_num_layer = 2 25 | self.constraint_weights = [2, 1] 26 | self.batch_size = 1024 27 | self.is_linear = False 28 | 29 | def __init__(self, opts): 30 | if opts is None: 31 | opts = self.OPTS() 32 | self.opts = opts 33 | self.opts.assert_all_keys_valid() 34 | 35 | def construct(self): 36 | 37 | self.x_data = tf.placeholder(tf.float32, [None, self.opts.x_dim], name='X_data') 38 | self.y_data = tf.placeholder(tf.float32, [None, self.opts.y_dim], name='Y_data') 39 | 40 | # Build embedding 41 | self.aff_xy = tf.placeholder(tf.bool, [None, None], name='aff_xy') 42 | self.K = tf.placeholder(tf.int32, name='K') 43 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') 44 | self.is_training = tf.placeholder(tf.bool, name='is_training') 45 | self.l = tf.placeholder(tf.float32, name='lambda_for_adv') 46 | 47 | x_net_opts = enm.Music_Model.OPTS() 48 | x_net_opts.network_name = 'X_network' 49 | x_net_opts.num_layer = self.opts.x_num_layer 50 | self.x_net = enm.Music_Model(x_net_opts) 51 | 52 | if(self.opts.x_num_layer == 3 and self.opts.y_num_layer == 2): 53 | y_net_opts = en.Wang_Model.OPTS() 54 | y_net_opts.network_name = 'Y_network' 55 | self.y_net = en.Wang_Model(y_net_opts) 56 | else: 57 | y_net_opts = env.Video_Model.OPTS() 58 | y_net_opts.network_name = 'Y_network' 59 | y_net_opts.num_layer = self.opts.y_num_layer 60 | self.y_net = env.Video_Model(y_net_opts) 61 | 62 | self.x_embed = self.x_net.construct(self.x_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training) 63 | self.y_embed = self.y_net.construct(self.y_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training) 64 | 65 | 66 | 67 | # Cross-modal Triplet loss 68 | el_opts = el.Triplet.OPTS() 69 | el_opts.network_name = 'Triplet' 70 | el_opts.distance = el_opts.DIST.COS 71 | 72 | el_net = el.Triplet(el_opts) 73 | self.loss_xy, self.loss_yx = el_net.construct(self.x_embed, self.y_embed, self.aff_xy, self.K, 'Triplet_Net') 74 | 75 | # Adversarial loss 76 | self.concat_feat = tf.concat(0, [self.x_embed, self.y_embed]) 77 | self.dom_label = tf.concat(0, [tf.tile([0.], [tf.shape(self.x_embed)[0]]), 78 | tf.tile([1.], [tf.shape(self.y_embed)[0]])]) 79 | self.dom_label = tf.expand_dims(self.dom_label, 1) 80 | self.dom_loss, self.dom_acc = self.dom_classifier(self.concat_feat, self.dom_label, self.l, self.keep_prob) 81 | 82 | self.loss = self.loss_xy * self.opts.constraint_weights[0] + self.loss_yx * self.opts.constraint_weights[1] 83 | 84 | # calculate gradients 85 | _, self.xy_rank_x = tf.nn.moments(tf.gradients(self.loss_xy, [self.x_embed])[0], [0, 1]) 86 | _, self.yx_rank_x = tf.nn.moments(tf.gradients(self.loss_yx, [self.x_embed])[0], [0, 1]) 87 | _, self.dom_x = tf.nn.moments(tf.gradients(self.dom_loss, [self.x_embed])[0], [0, 1]) 88 | _, self.xy_rank_y = tf.nn.moments(tf.gradients(self.loss_xy, [self.y_embed])[0], [0, 1]) 89 | _, self.yx_rank_y = tf.nn.moments(tf.gradients(self.loss_yx, [self.y_embed])[0], [0, 1]) 90 | _, self.dom_y = tf.nn.moments(tf.gradients(self.dom_loss, [self.y_embed])[0], [0, 1]) 91 | 92 | 93 | 94 | eval_opts = eval.Recall.OPTS() 95 | eval_opts.network_name = 'Recall' 96 | 97 | eval_net = eval.Recall(eval_opts) 98 | 99 | self.recall_xy, self.recall_yx, self.xy_idx, self.yx_idx = eval_net.construct(self.x_embed, self.y_embed, 100 | self.aff_xy, 101 | [1, 10, 25, 50, 100, 1000]) 102 | 103 | # self.debug_list = el_net.debug_list 104 | 105 | def dom_classifier(self, tensor, labels, l=1., keep_prob=1.): 106 | with tf.name_scope("dom_classifier"): 107 | feature = flip_gradient(tensor, l) 108 | 109 | d_fc1 = tf.nn.relu(self.fc_layer(feature, 512, "dom_fc1")) 110 | d_fc1 = tf.nn.dropout(d_fc1, keep_prob) 111 | 112 | d_fc2 = tf.nn.relu(self.fc_layer(d_fc1, 512, "dom_fc2")) 113 | d_fc2 = tf.nn.dropout(d_fc2, keep_prob) 114 | 115 | # d_logits = self.fc_layer(d_fc2, 2, "dom_logits") 116 | d_logit = self.fc_layer(d_fc2, 1, "dom_logit") 117 | 118 | with tf.name_scope("domain_acc_and_loss"): 119 | # self.domain_prediction = tf.nn.softmax(d_logits) 120 | domain_prediction = tf.sigmoid(d_logit) 121 | 122 | # self.domain_loss = tf.nn.softmax_cross_entropy_with_logits(d_logits, self.domain) 123 | domain_loss = tf.nn.sigmoid_cross_entropy_with_logits(d_logit, labels) 124 | domain_loss = tf.reduce_mean(domain_loss) 125 | 126 | # domain acc 127 | correct_domain_prediction = tf.equal(tf.round(domain_prediction), labels) 128 | domain_acc = tf.reduce_mean(tf.cast(correct_domain_prediction, tf.float32)) 129 | return domain_loss, domain_acc 130 | 131 | def fc_layer(self, tensor, dim, name, reuse=False): 132 | in_shape = tensor.get_shape() 133 | 134 | with tf.variable_scope(name, reuse=reuse): 135 | weights = tf.get_variable("weights", shape=[in_shape[1], dim], 136 | initializer=tf.contrib.layers.xavier_initializer()) 137 | biases = tf.get_variable("biases", shape=[dim], 138 | initializer=tf.constant_initializer(0.0)) 139 | fc = tf.nn.xw_plus_b(tensor, weights, biases) 140 | return fc 141 | 142 | -------------------------------------------------------------------------------- /network_structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | from flip_gradient import * 7 | import embed_network as en 8 | import embed_network_music as enm 9 | import embed_network_video as env 10 | import embed_loss as el 11 | import embed_structure_loss as esl 12 | import eval 13 | 14 | import OPTS 15 | 16 | 17 | class Model_structure: 18 | class OPTS(OPTS.OPTS): 19 | def __init__(self): 20 | OPTS.OPTS.__init__(self, 'Model OPTS') 21 | self.network_name = None 22 | self.x_dim = None 23 | self.y_dim = None 24 | self.x_num_layer = 2 25 | self.y_num_layer = 2 26 | self.constraint_weights = [2, 1] 27 | self.batch_size = 1024 28 | self.is_linear = False 29 | 30 | def __init__(self, opts): 31 | if opts is None: 32 | opts = self.OPTS() 33 | self.opts = opts 34 | self.opts.assert_all_keys_valid() 35 | 36 | def construct(self): 37 | 38 | self.x_data = tf.placeholder(tf.float32, [None, self.opts.x_dim], name='X_data') 39 | self.y_data = tf.placeholder(tf.float32, [None, self.opts.y_dim], name='Y_data') 40 | 41 | # Build embedding 42 | self.aff_xy = tf.placeholder(tf.bool, [None, None], name='aff_xy') 43 | self.K = tf.placeholder(tf.int32, name='K') 44 | self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') 45 | self.is_training = tf.placeholder(tf.bool, name='is_training') 46 | self.l = tf.placeholder(tf.float32, name='lambda_for_adv') 47 | 48 | x_net_opts = enm.Music_Model.OPTS() 49 | x_net_opts.network_name = 'X_network' 50 | x_net_opts.num_layer = self.opts.x_num_layer 51 | self.x_net = enm.Music_Model(x_net_opts) 52 | 53 | if(self.opts.x_num_layer == 3 and self.opts.y_num_layer == 2): 54 | y_net_opts = en.Wang_Model.OPTS() 55 | y_net_opts.network_name = 'Y_network' 56 | self.y_net = en.Wang_Model(y_net_opts) 57 | else: 58 | y_net_opts = env.Video_Model.OPTS() 59 | y_net_opts.network_name = 'Y_network' 60 | y_net_opts.num_layer = self.opts.y_num_layer 61 | self.y_net = env.Video_Model(y_net_opts) 62 | 63 | self.x_embed = self.x_net.construct(self.x_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training) 64 | self.y_embed = self.y_net.construct(self.y_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training) 65 | 66 | 67 | 68 | # Cross-modal Triplet loss 69 | el_opts = el.Triplet.OPTS() 70 | el_opts.network_name = 'Triplet' 71 | el_opts.distance = el_opts.DIST.COS 72 | 73 | el_net = el.Triplet(el_opts) 74 | self.loss_cross_xy, self.loss_cross_yx = el_net.construct(self.x_embed, self.y_embed, self.aff_xy, self.K, 'Triplet_Net') 75 | 76 | # Single-modal structure loss 77 | esl_opts = esl.Triplet_Structure.OPTS() 78 | esl_opts.network_name = 'Triplet_Structure' 79 | esl_opts.distance = el_opts.DIST.COS 80 | 81 | esl_net = esl.Triplet_Structure(esl_opts) 82 | self.loss_single_x = esl_net.construct(self.x_data, self.x_embed, self.K, 'Triplet_Strcture_x_Net') 83 | self.loss_single_y = esl_net.construct(self.y_data, self.y_embed, self.K, 'Triplet_Strcture_y_Net') 84 | 85 | # Adversarial loss 86 | self.concat_feat = tf.concat(0, [self.x_embed, self.y_embed]) 87 | self.dom_label = tf.concat(0, [tf.tile([0.], [tf.shape(self.x_embed)[0]]), 88 | tf.tile([1.], [tf.shape(self.y_embed)[0]])]) 89 | self.dom_label = tf.expand_dims(self.dom_label, 1) 90 | self.dom_loss, self.dom_acc = self.dom_classifier(self.concat_feat, self.dom_label, self.l, self.keep_prob) 91 | 92 | 93 | # Final loss 94 | self.loss = self.loss_cross_xy * self.opts.constraint_weights[0] + self.loss_cross_yx * self.opts.constraint_weights[1] \ 95 | + self.loss_single_x * self.opts.constraint_weights[2] + self.loss_single_y * self.opts.constraint_weights[3] 96 | 97 | # calculate gradients 98 | _, self.xy_rank_x = tf.nn.moments(tf.gradients(self.loss_cross_xy, [self.x_embed])[0], [0, 1]) 99 | _, self.yx_rank_x = tf.nn.moments(tf.gradients(self.loss_cross_yx, [self.x_embed])[0], [0, 1]) 100 | _, self.x_rank_x = tf.nn.moments(tf.gradients(self.loss_single_x, [self.x_embed])[0], [0, 1]) 101 | _, self.xy_rank_y = tf.nn.moments(tf.gradients(self.loss_cross_xy, [self.y_embed])[0], [0, 1]) 102 | _, self.yx_rank_y = tf.nn.moments(tf.gradients(self.loss_cross_yx, [self.y_embed])[0], [0, 1]) 103 | _, self.y_rank_y = tf.nn.moments(tf.gradients(self.loss_single_y, [self.y_embed])[0], [0, 1]) 104 | 105 | 106 | eval_opts = eval.Recall.OPTS() 107 | eval_opts.network_name = 'Recall' 108 | 109 | eval_net = eval.Recall(eval_opts) 110 | 111 | self.recall_xy, self.recall_yx, self.xy_idx, self.yx_idx = eval_net.construct(self.x_embed, self.y_embed, 112 | self.aff_xy, 113 | [1, 10, 25, 50, 100, 1000]) 114 | 115 | # self.debug_list = el_net.debug_list 116 | 117 | def dom_classifier(self, tensor, labels, l=1., keep_prob=1.): 118 | with tf.name_scope("dom_classifier"): 119 | feature = flip_gradient(tensor, l) 120 | 121 | d_fc1 = tf.nn.relu(self.fc_layer(feature, 512, "dom_fc1")) 122 | d_fc1 = tf.nn.dropout(d_fc1, keep_prob) 123 | 124 | d_fc2 = tf.nn.relu(self.fc_layer(d_fc1, 512, "dom_fc2")) 125 | d_fc2 = tf.nn.dropout(d_fc2, keep_prob) 126 | 127 | # d_logits = self.fc_layer(d_fc2, 2, "dom_logits") 128 | d_logit = self.fc_layer(d_fc2, 1, "dom_logit") 129 | 130 | with tf.name_scope("domain_acc_and_loss"): 131 | # self.domain_prediction = tf.nn.softmax(d_logits) 132 | domain_prediction = tf.sigmoid(d_logit) 133 | 134 | # self.domain_loss = tf.nn.softmax_cross_entropy_with_logits(d_logits, self.domain) 135 | domain_loss = tf.nn.sigmoid_cross_entropy_with_logits(d_logit, labels) 136 | domain_loss = tf.reduce_mean(domain_loss) 137 | 138 | # domain acc 139 | correct_domain_prediction = tf.equal(tf.round(domain_prediction), labels) 140 | domain_acc = tf.reduce_mean(tf.cast(correct_domain_prediction, tf.float32)) 141 | return domain_loss, domain_acc 142 | 143 | def fc_layer(self, tensor, dim, name, reuse=False): 144 | in_shape = tensor.get_shape() 145 | 146 | with tf.variable_scope(name, reuse=reuse): 147 | weights = tf.get_variable("weights", shape=[in_shape[1], dim], 148 | initializer=tf.contrib.layers.xavier_initializer()) 149 | biases = tf.get_variable("biases", shape=[dim], 150 | initializer=tf.constant_initializer(0.0)) 151 | fc = tf.nn.xw_plus_b(tensor, weights, biases) 152 | return fc 153 | 154 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import tensorflow as tf 3 | import numpy as np 4 | import os 5 | import scipy.io 6 | 7 | from network import Model 8 | 9 | flags = tf.app.flags 10 | FLAGS = flags.FLAGS 11 | 12 | flags.DEFINE_integer('num_layer_x', 3, 'Constraint Weight xy') 13 | flags.DEFINE_integer('num_layer_y',2, 'Constraint Weight yx') 14 | flags.DEFINE_integer('test_batch_size', 1086, 'Test batch size.') #flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.') 15 | flags.DEFINE_string('summaries_dir', 'expr_ware/lr_0.0003 dr_0.9 nx_3 ny_2 xy_3 yx_1 K_100 ba_2000', 'Directory to put the summary and log data.') 16 | 17 | net_opts = Model.OPTS() 18 | net_opts.network_name = 'Wrapping Network' 19 | net_opts.x_dim = 1140 #4096 20 | net_opts.y_dim = 1024 #6000 21 | net_opts.x_num_layer = FLAGS.num_layer_x 22 | net_opts.y_num_layer = FLAGS.num_layer_y 23 | net = Model(net_opts) 24 | net.construct() 25 | 26 | saver = tf.train.Saver(tf.global_variables()) 27 | with tf.Session() as sess: 28 | sess.run(tf.global_variables_initializer()) 29 | checkpoint_dir = os.path.join(FLAGS.summaries_dir, 'checkpoints') 30 | checkpoint_prefix = os.path.join(checkpoint_dir, "model.ckpt") 31 | if not os.path.exists(checkpoint_dir): 32 | os.makedirs(checkpoint_dir) 33 | 34 | step = 0 35 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 36 | if ckpt and ckpt.model_checkpoint_path: 37 | saver.restore(sess, ckpt.model_checkpoint_path) 38 | #saver.restore(sess, os.path.join(checkpoint_dir, 'model.ckpt-9399')) 39 | step = int(ckpt.model_checkpoint_path.split('-')[-1]) 40 | step += 1 41 | print('Session restored successfully. step: {0}'.format(step)) 42 | 43 | import data.himv as himv 44 | num_test = FLAGS.test_batch_size 45 | data_manager = himv.Data_Manager() 46 | data_name = data_manager.data_name 47 | data_name_test = data_name[-num_test:len(data_name)] 48 | test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.validation_pair_list, is_train=False) 49 | # test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.test_pair_list, is_train=False) 50 | 51 | K = 100 52 | x_batch, y_batch, aff_xy = test_batch.next() 53 | xy, yx, xy_idx, yx_idx = sess.run([net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx], feed_dict={ 54 | net.x_data: x_batch, 55 | net.y_data: y_batch, 56 | net.K: K, 57 | net.aff_xy: aff_xy, 58 | net.keep_prob: 1., net.is_training: False}) # actually, False 59 | 60 | 61 | print("[iter %d] xy: %s, yx: %s, " % (step, xy, yx)) 62 | 63 | 64 | 65 | import csv 66 | with open('./recall_xy(music-video)_general_vali.csv', 'wb') as csvfile: 67 | writer = csv.writer(csvfile, delimiter=',') 68 | top = ["Query Name"] + [str(i) for i in range(0, num_test)] 69 | writer.writerow(top) 70 | for idx_input in range(len(xy_idx)): 71 | recall_mv = [data_name_test[idx_recall] for idx_recall in xy_idx[idx_input]] 72 | result = [data_name_test[idx_input]] + recall_mv 73 | writer.writerow(result) 74 | 75 | with open('./recall_yx(video-music)_general_vali.csv', 'wb') as csvfile: 76 | writer = csv.writer(csvfile, delimiter=',') 77 | top = ["Query Name"] + [str(i) for i in range(0, num_test)] 78 | writer.writerow(top) 79 | for idx_input in range(len(yx_idx)): 80 | recall_mv = [data_name_test[idx_recall] for idx_recall in yx_idx[idx_input]] 81 | result = [data_name_test[idx_input]] + recall_mv 82 | writer.writerow(result) 83 | 84 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | import os 6 | import scipy.io 7 | 8 | from network import Model 9 | from network_structure import Model_structure 10 | from Logger import Logger 11 | 12 | flags = tf.app.flags 13 | FLAGS = flags.FLAGS 14 | flags.DEFINE_float('learning_rate', 3e-4 15 | , 'Initial learning rate.') 16 | flags.DEFINE_float('keep_prob', 0.9, 'Dropout keep probability rate.') 17 | flags.DEFINE_integer('num_layer_x', 3, 'Constraint Weight xy') 18 | flags.DEFINE_integer('num_layer_y',2, 'Constraint Weight yx') 19 | flags.DEFINE_integer('constraint_xy', 3, 'Constraint Weight xy') 20 | flags.DEFINE_integer('constraint_yx',1, 'Constraint Weight yx') 21 | flags.DEFINE_integer('constraint_x', 0.2, 'Constraint Structure Weight x') 22 | flags.DEFINE_integer('constraint_y', 0.2, 'Constraint Structure Weight y') 23 | flags.DEFINE_integer('top_K', 100, 'Top mosk K number for violation') 24 | flags.DEFINE_float('weight_decay', 0, 'Weight decay.') 25 | flags.DEFINE_integer('max_steps', 700000, 'Number of steps to run trainer.') 26 | flags.DEFINE_integer('train_batch_size', 1000, 'Train Batch size.') #flags.DEFINE_integer('batch_size', 1500, 'Batch size.') 27 | flags.DEFINE_integer('validation_batch_size', 4000, 'Validation Batch size.') #flags.DEFINE_integer('batch_size', 1500, 'Batch size.') 28 | flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.') #flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.') 29 | flags.DEFINE_integer('display_step', 50, 'Train display step.') 30 | flags.DEFINE_integer('test_step', 200, 'Test step.') 31 | flags.DEFINE_integer('save_step', 200, 'Checkpoint saving step.') 32 | flags.DEFINE_string('summaries_dir', "dir_name", 'Directory to put the summary and log data.') 33 | FLAGS.summaries_dir = "expr/Structure_test(K2_Linear)_" + "lr_" + str(FLAGS.learning_rate) + " dr_" + str(FLAGS.keep_prob) + " nx_" + str(FLAGS.num_layer_x) + " ny_" + str(FLAGS.num_layer_y) \ 34 | + " xy_" + str(FLAGS.constraint_xy) +" yx_" + str(FLAGS.constraint_yx) + " x_" + str(FLAGS.constraint_x) +" y_" + str(FLAGS.constraint_y) \ 35 | + " K_" + str(FLAGS.top_K) + " ba_" + str(FLAGS.train_batch_size) 36 | 37 | 38 | 39 | # Structure_test(K2_dom)_lr_0.0003 dr_0.7 nx_3 ny_2 xy_3 yx_1 x_0.2 y_0.2 K_100 ba_1000 40 | 41 | # net_opts = Model.OPTS() 42 | # net_opts.network_name = 'Wrapping Network' 43 | # net_opts.x_dim = 1140 44 | # net_opts.y_dim = 1024 45 | # net_opts.x_num_layer = FLAGS.num_layer_x 46 | # net_opts.y_num_layer = FLAGS.num_layer_y 47 | # net_opts.constraint_weights = [FLAGS.constraint_xy, FLAGS.constraint_yx] 48 | # net_opts.is_linear = False 49 | # net = Model(net_opts) 50 | # net.construct() 51 | 52 | # net_opts = Model.OPTS() 53 | net_opts = Model_structure.OPTS() 54 | net_opts.network_name = 'Wrapping Network' 55 | net_opts.x_dim = 1140 56 | net_opts.y_dim = 1024 57 | net_opts.x_num_layer = FLAGS.num_layer_x 58 | net_opts.y_num_layer = FLAGS.num_layer_y 59 | net_opts.constraint_weights = [FLAGS.constraint_xy, FLAGS.constraint_yx, FLAGS.constraint_x, FLAGS.constraint_y] 60 | net_opts.is_linear = True 61 | # net = Model(net_opts) 62 | net = Model_structure(net_opts) 63 | net.construct() 64 | 65 | 66 | lr = tf.placeholder(tf.float32, name='learning_rate') 67 | loss = net.loss 68 | # loss = net.loss + net.dom_loss 69 | 70 | 71 | global_step_ = tf.Variable(0, trainable=False) 72 | decay_step = 1000 73 | # learning_rate_ = tf.train.exponential_decay(FLAGS.learning_rate, global_step_, 74 | # decay_step, 0.96, staircase=True) 75 | learning_rate = FLAGS.learning_rate 76 | 77 | # top_k_ = tf.train.exponential_decay(float(FLAGS.top_K), global_step_, 78 | # decay_step, 0.96, staircase=True) 79 | top_k = FLAGS.top_K 80 | 81 | 82 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 83 | # Ensures that we execute the update_ops before performing the train_step (ref: http://ruishu.io/2016/12/27/batchnorm/ ) 84 | with tf.control_dependencies(update_ops): 85 | optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) 86 | # optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(loss, global_step=global_step_) 87 | 88 | config = tf.ConfigProto() 89 | config.gpu_options.allow_growth=True 90 | saver = tf.train.Saver(tf.all_variables(), max_to_keep = None) 91 | with tf.Session(config=config) as sess: 92 | sess.run(tf.global_variables_initializer()) 93 | 94 | train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',sess.graph) 95 | validation_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/validation') 96 | test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test') 97 | 98 | checkpoint_dir = os.path.join(FLAGS.summaries_dir, 'checkpoints') 99 | checkpoint_prefix = os.path.join(checkpoint_dir, "model.ckpt") 100 | if not os.path.exists(checkpoint_dir): 101 | os.makedirs(checkpoint_dir) 102 | 103 | logger = Logger(FLAGS.summaries_dir) 104 | logger.write(str(FLAGS.__flags)) 105 | import data.himv as himv 106 | data_manager = himv.Data_Manager() 107 | train_batch = data_manager.batch_iterator_thread(FLAGS.train_batch_size, data_manager.train_pair_list) 108 | validation_batch = data_manager.batch_iterator_thread(FLAGS.validation_batch_size, data_manager.validation_pair_list, is_train=False) 109 | test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.test_pair_list, is_train=False) 110 | 111 | step = 0 112 | ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 113 | if ckpt and ckpt.model_checkpoint_path: 114 | saver.restore(sess, ckpt.model_checkpoint_path) 115 | step = int(ckpt.model_checkpoint_path.split('-')[-1]) 116 | print('Session restored successfully. step: {0}'.format(step)) 117 | step = step + 1 118 | 119 | max_step_acc_xy = [0, -5e3] 120 | max_step_acc_yx = [0, -5e3] 121 | for i in range(step,FLAGS.max_steps): 122 | p = float(i) / FLAGS.max_steps 123 | lamb = 0 124 | # lamb = 2. / (1. + np.exp(-20. * p)) - 1 # lamb = 2. / (1. + np.exp(-10. * p)) - 1 125 | # lamb *= 10 126 | 127 | x_batch, y_batch, aff_xy = train_batch.next() 128 | # K = FLAGS.top_K 129 | 130 | # learning_rate = sess.run(learning_rate_) 131 | 132 | 133 | 134 | 135 | 136 | sess.run(optimizer,feed_dict={ 137 | net.x_data:x_batch, 138 | net.y_data:y_batch, 139 | net.K:int(top_k), 140 | net.aff_xy:aff_xy, 141 | net.keep_prob:FLAGS.keep_prob, 142 | lr: learning_rate, net.is_training: True, net.l: lamb}) 143 | 144 | if (i+1) % FLAGS.display_step == 0: 145 | loss_cross_xy, loss_single_x, loss_cross_yx, loss_single_y, xy_rank_x, yx_rank_x, x_rank_x, xy_rank_y, yx_rank_y, y_rank_y, l,xy,yx,dl,da = sess.run([net.loss_cross_xy, net.loss_single_x, net.loss_cross_yx, net.loss_single_y, net.xy_rank_x, net.yx_rank_x, net.x_rank_x, net.xy_rank_y, net.yx_rank_y, net.y_rank_y, loss,net.recall_xy, net.recall_yx, net.dom_loss, net.dom_acc],feed_dict={ 146 | net.x_data:x_batch, 147 | net.y_data:y_batch, 148 | net.K:int(top_k), 149 | net.aff_xy:aff_xy, 150 | net.keep_prob:1., net.is_training: True, net.l: lamb}) 151 | 152 | 153 | logger.write("[iter %d] (%s %s, %s %s)(%s %s, %s) (%s %s, %s) loss=%.4g, %s, %s "%(i+1, loss_cross_xy, loss_cross_yx, loss_single_x, loss_single_y, xy_rank_x, yx_rank_x, x_rank_x, xy_rank_y, yx_rank_y, y_rank_y, l, xy, yx)) 154 | short_summary = tf.Summary(value=[ 155 | tf.Summary.Value(tag="triplet_loss", simple_value=float(l)), 156 | tf.Summary.Value(tag="K", simple_value=float(top_k)), 157 | tf.Summary.Value(tag="lambda", simple_value=float(lamb)), 158 | tf.Summary.Value(tag="dom_loss", simple_value=float(dl)), 159 | tf.Summary.Value(tag="dom_acc", simple_value=float(da)), 160 | tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)), 161 | tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])), 162 | tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])), 163 | tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])), 164 | tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])), 165 | tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])), 166 | tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])), 167 | tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])), 168 | tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])), 169 | tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])), 170 | tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])), 171 | tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])), 172 | tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])), 173 | ]) 174 | train_writer.add_summary(short_summary, i) 175 | #scipy.io.savemat('debug.mat', mdict={'d':dl}) 176 | #exit() 177 | 178 | 179 | if (i+1) % FLAGS.test_step == 0: 180 | # # for validation 181 | # x_batch, y_batch, aff_xy = validation_batch.next() 182 | # l, xy, yx, xy_idx, yx_idx, dl, da = sess.run( 183 | # [loss, net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx, net.dom_loss, net.dom_acc], feed_dict={ 184 | # net.x_data: x_batch, 185 | # net.y_data: y_batch, 186 | # net.K: int(top_k), 187 | # net.aff_xy: aff_xy, 188 | # net.keep_prob: 1., net.is_training: False, net.l: lamb}) # actually, False 189 | # 190 | # logger.write("[Validation iter %d] loss=%.4g, %s, %s" % (i + 1, l, xy, yx)) 191 | # short_summary = tf.Summary(value=[ 192 | # tf.Summary.Value(tag="triplet_loss", simple_value=float(l)), 193 | # tf.Summary.Value(tag="K", simple_value=float(top_k)), 194 | # tf.Summary.Value(tag="lambda", simple_value=float(lamb)), 195 | # tf.Summary.Value(tag="dom_loss", simple_value=float(dl)), 196 | # tf.Summary.Value(tag="dom_acc", simple_value=float(da)), 197 | # tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)), 198 | # tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])), 199 | # tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])), 200 | # tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])), 201 | # tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])), 202 | # tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])), 203 | # tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])), 204 | # tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])), 205 | # tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])), 206 | # tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])), 207 | # tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])), 208 | # tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])), 209 | # tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])), 210 | # ]) 211 | # validation_writer.add_summary(short_summary, i) 212 | 213 | # for test 214 | x_batch, y_batch, aff_xy = test_batch.next() 215 | l,xy,yx, xy_idx, yx_idx,dl,da = sess.run([loss,net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx, net.dom_loss, net.dom_acc],feed_dict={ 216 | net.x_data:x_batch, 217 | net.y_data:y_batch, 218 | net.K:int(top_k), # caution: We currently use topK as test batch size 219 | net.aff_xy:aff_xy, 220 | net.keep_prob:1., net.is_training: False, net.l: lamb}) #actually, False 221 | 222 | logger.write("[TEST iter %d] loss=%.4g, %s, %s"%(i+1, l, xy, yx)) 223 | short_summary = tf.Summary(value=[ 224 | tf.Summary.Value(tag="triplet_loss", simple_value=float(l)), 225 | tf.Summary.Value(tag="K", simple_value=float(top_k)), 226 | tf.Summary.Value(tag="lambda", simple_value=float(lamb)), 227 | tf.Summary.Value(tag="dom_loss", simple_value=float(dl)), 228 | tf.Summary.Value(tag="dom_acc", simple_value=float(da)), 229 | tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)), 230 | tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])), 231 | tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])), 232 | tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])), 233 | tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])), 234 | tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])), 235 | tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])), 236 | tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])), 237 | tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])), 238 | tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])), 239 | tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])), 240 | tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])), 241 | tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])), 242 | ]) 243 | 244 | # if (float(xy[0]) > max_step_acc_xy[1]): 245 | # max_step_acc_xy[1] = float(xy[0]) 246 | # max_step_acc_xy[0] = i + 1 247 | # if (float(yx[0]) > max_step_acc_yx[1]): 248 | # max_step_acc_yx[1] = float(yx[0]) 249 | # max_step_acc_yx[0] = i + 1 250 | 251 | test_writer.add_summary(short_summary, i) 252 | if (i+1) % FLAGS.save_step == 0: 253 | saver.save(sess,checkpoint_prefix,global_step=i) 254 | logger.write("[Checkpoint at step %d saved]"%(i+1)) 255 | logger.write(FLAGS.summaries_dir) 256 | # logger.write("*** xy (%d, %f), yx (%d, %f) ***" %(max_step_acc_xy[0], max_step_acc_xy[1], max_step_acc_yx[0], max_step_acc_yx[1])) 257 | 258 | 259 | 260 | --------------------------------------------------------------------------------