├── Logger.py
├── OPTS.py
├── README.md
├── data
    ├── Youtube_ID.txt
    ├── audio_feature_extractor
    │   ├── librosa_feature.py
    │   └── youtube-8M_audio.py
    ├── mp4-to-mp3
    │   └── mp4tomp3.py
    └── video_feature_extractor
    │   └── READ_ME.txt
├── embed_loss.py
├── embed_network_music.py
├── embed_network_video.py
├── embed_structure_loss.py
├── eval.py
├── figure
    ├── concept.JPG
    ├── demo.JPG
    ├── framework.JPG
    ├── retrieval_result.JPG
    └── video_sample.JPG
├── flip_gradient.py
├── network.py
├── network_structure.py
├── test.py
└── train.py


/Logger.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import os.path
 4 | from datetime import datetime
 5 | 
 6 | class Logger:
 7 |   def __init__(self, path):
 8 |     if path.endswith('.txt'):
 9 |       file_path = path
10 |     else:
11 |       file_path = os.path.join(path,'log.txt')
12 |     self.log_file = open(file_path,'a')
13 |   
14 |   def write(self, str):
15 |     current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
16 |     log_format = '[%s] %s'%(current_time, str)
17 |     self.log_file.write(log_format + '\n')
18 |     print (log_format)
19 |   
20 |   def __del__(self):
21 |     self.log_file.close()


--------------------------------------------------------------------------------
/OPTS.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | class OPTS:
 4 |   def __init__(self, name):
 5 |     self.__name = name
 6 |     
 7 |   def get_keys(self):
 8 |     return self.__dict__.keys()
 9 |   
10 |   def assert_all_keys_valid(self):
11 |     for k in self.get_keys():
12 |       try:
13 |         assert self.__dict__[k] is not None
14 |       except AssertionError:
15 |         print('%s: Option "%s" is not valid'%(self.__name, k))
16 |         exit()
17 |     


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | VM-NET in Tensorflow
 2 | ====
 3 | 
 4 | Tensorflow implementation of [SSPP-DAN: Deep Domain Adaptation Network for Face Recognition with Single Sample Per Person](https://arxiv.org/abs/1704.06761)
 5 | 
 6 | ![Alt text](./figure/concept.JPG)
 7 | 
 8 | Demo
 9 | -------------
10 | [![Alt text](./figure/demo.JPG)](https://www.youtube.com/watch?v=ZyINqDMo3Fg)
11 | 
12 | 
13 | Prerequisites
14 | -------------
15 | * [Python 2.7](https://www.python.org/downloads/)
16 | * [Tensorflow 0.12](https://www.tensorflow.org/versions/r0.12/)
17 | * [OpenCV 2.4.9](http://opencv.org/releases.html)
18 | * [NumPy](http://www.numpy.org/)
19 | * [SciPy](https://www.scipy.org/install.html)
20 | 
21 | Usage
22 | -------------
23 | ![Alt text](./figure/video_sample.JPG)
24 | First, download video dataset from "data/YoutubeID.txt." Then, extract audio features and video features from the videos. 
25 | 
26 | 
27 | Network structure
28 | -------------
29 | ![Alt text](./figure/framework.JPG)
30 | 
31 | 
32 | To train a model with downloaded dataset:
33 | ```
34 | $ python train.py
35 | ```
36 | 
37 | To test with an existing model:
38 | ```
39 | $ python test.py
40 | ```
41 | 
42 | Results
43 | -------------
44 | ![Alt text](./figure/retrieval_result.JPG)
45 | 
46 | 


--------------------------------------------------------------------------------
/data/audio_feature_extractor/librosa_feature.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | # We'll need numpy for some mathematical operations
  4 | import numpy as np
  5 | import scipy.io
  6 | import pickle
  7 | import os
  8 | import librosa
  9 | # And the display module for visualization
 10 | import librosa.display
 11 | 
 12 | # mel-spectrogram parameters
 13 | # SR = 12000
 14 | # N_FFT = 512
 15 | # HOP_LEN = 256
 16 | # DURA = 29.12
 17 | 
 18 | def feature_extraction(indir, audio_name, ext, outdir, SR, N_FFT, HOP_LEN, DURA):
 19 | 
 20 |     # Load audio
 21 |     src, sr = librosa.load(indir+'/'+audio_name+ext, sr=SR)
 22 | 
 23 | 
 24 |     # Trim audio
 25 |     n_sample = src.shape[0]
 26 |     n_sample_wanted = int(DURA * SR)
 27 |     if n_sample < n_sample_wanted:  # if too short
 28 |         src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
 29 |     elif n_sample > n_sample_wanted:  # if too long
 30 |         src = src[(n_sample - n_sample_wanted) / 2:(n_sample + n_sample_wanted) / 2]
 31 | 
 32 | 
 33 |     # Perform harmonic percussive source separation (HSS)
 34 |     y_harmonic, y_percussive = librosa.effects.hpss(src)
 35 |     logam = librosa.logamplitude
 36 | 
 37 |     # for Spectral features
 38 |     for i in range(2):
 39 |         if i == 0:
 40 |             y = y_harmonic
 41 |         else:
 42 |             y = y_percussive
 43 | 
 44 |         fv = logam(librosa.feature.chroma_stft(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 45 |         if i == 0:
 46 |             fv_total = fv
 47 |         else:
 48 |             fv_total = np.vstack((fv_total, fv))
 49 | 
 50 |         fv = logam(librosa.feature.chroma_cens(y=y, sr=SR, hop_length=HOP_LEN), ref_power=np.max)
 51 |         fv_total = np.vstack((fv_total, fv))
 52 | 
 53 |         fv = logam(librosa.feature.melspectrogram(y=y, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=96), ref_power=np.max)
 54 |         fv_total = np.vstack((fv_total, fv))
 55 | 
 56 |         fv_mfcc = librosa.feature.mfcc(y=y, sr=SR, hop_length=HOP_LEN)
 57 |         fv = logam(fv_mfcc, ref_power=np.max)
 58 |         fv_total = np.vstack((fv_total, fv))
 59 |         fv = logam(librosa.feature.delta(fv_mfcc), ref_power=np.max)
 60 |         fv_total = np.vstack((fv_total, fv))
 61 |         fv = logam(librosa.feature.delta(fv_mfcc, order=2), ref_power=np.max)
 62 |         fv_total = np.vstack((fv_total, fv))
 63 | 
 64 |         fv = logam(librosa.feature.rmse(y=y, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 65 |         fv_total = np.vstack((fv_total, fv))
 66 | 
 67 |         fv = logam(librosa.feature.spectral_centroid(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 68 |         fv_total = np.vstack((fv_total, fv))
 69 | 
 70 |         fv = logam(librosa.feature.spectral_bandwidth(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 71 |         fv_total = np.vstack((fv_total, fv))
 72 | 
 73 |         fv = logam(librosa.feature.spectral_rolloff(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 74 |         fv_total = np.vstack((fv_total, fv))
 75 | 
 76 |         fv = logam(librosa.feature.poly_features(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT), ref_power=np.max)
 77 |         fv_total = np.vstack((fv_total, fv))
 78 | 
 79 |         fv = logam(librosa.feature.poly_features(y=y, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, order=2), ref_power=np.max)
 80 |         fv_total = np.vstack((fv_total, fv))
 81 | 
 82 |         fv = logam(librosa.feature.zero_crossing_rate(y=y, hop_length=HOP_LEN, frame_length=N_FFT), ref_power=np.max)
 83 |         fv_total = np.vstack((fv_total, fv))
 84 | 
 85 |     # Feature aggregation
 86 |     fv_mean = np.mean(fv_total, axis=1)
 87 |     fv_var = np.var(fv_total, axis=1)
 88 |     fv_amax = np.amax(fv_total, axis=1)
 89 |     fv_aggregated = np.hstack((fv_mean, fv_var))
 90 |     fv_aggregated = np.hstack((fv_aggregated, fv_amax))
 91 | 
 92 |     # # for tempo features
 93 |     # tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=SR)
 94 |     # fv_aggregated = np.hstack((fv_aggregated, tempo))
 95 | 
 96 |     # # for Rhythm features
 97 |     # oenv = librosa.onset.onset_strength(y=y_percussive, sr=SR)
 98 |     # tempo = librosa.feature.tempogram(onset_envelope=oenv, sr=SR)
 99 |     # tempo_mean = np.mean(tempo, axis=1)
100 |     # tempo_var = np.var(tempo, axis=1)
101 |     # tempo_amax = np.amax(tempo, axis=1)
102 |     # tempo_aggregated = np.hstack((tempo_mean, tempo_var))
103 |     # tempo_aggregated = np.hstack((tempo_aggregated, tempo_amax))
104 | 
105 |     # for pickle
106 |     pklName = outdir + "/" +  audio_name + '.pkl'
107 |     f = open(pklName, 'wb')
108 |     pickle.dump(fv_aggregated, f)
109 |     f.close()
110 | 
111 |    
112 |     print(audio_name)
113 | 


--------------------------------------------------------------------------------
/data/audio_feature_extractor/youtube-8M_audio.py:
--------------------------------------------------------------------------------
 1 | from sys import argv  # allows user to specify input and output directories
 2 | import os  # help with file handling
 3 | import librosa_feature
 4 | 
 5 | SR = 12000
 6 | N_FFT = 512
 7 | HOP_LEN = 256
 8 | DURA = 29.12
 9 | 
10 | def check_file_exists(directory, filename, extension):
11 |     path = directory + "/" + filename + extension
12 |     return os.path.isfile(path)
13 | 
14 | 
15 | def main(indir, ext, outdir):
16 |     try:
17 |         # check specified folders exist
18 |         if not os.path.exists(indir):
19 |             exit("Error: Input directory \'" + indir + "\' does not exist. (try prepending './')")
20 |         if not os.path.exists(outdir):
21 |             exit("Error: Output directory \'" + outdir + "\' does not exist.")
22 |         if not os.access(outdir, os.W_OK):
23 |             exit("Error: Output directory \'" + outdir + "\' is not writeable.")
24 | 
25 |         print "[%s/*.mp3] --> [%s/feature]" % (indir, outdir)
26 |         files = []  # files for exporting
27 | 
28 |         # get a list of all convertible files in the input directory
29 |         filelist = [f for f in os.listdir(indir) if f.endswith(ext)]
30 |         for path in filelist:
31 |             basename = os.path.basename(path)
32 |             filename = os.path.splitext(basename)[0]
33 |             files.append(filename)
34 |         # remove files that have already been outputted from the list
35 |         files[:] = [f for f in files if not check_file_exists(outdir + "/pickle", f, ".pkl")]
36 |         files[:] = [f for f in files if not check_file_exists(outdir + "/matlab", f, ".mat")]
37 |     except OSError as e:
38 |         exit(e)
39 | 
40 |     if len(files) == 0:
41 |         exit("Could not find any files to convert that have not already been converted.")
42 | 
43 |     # convert all unconverted files
44 |     for filename in files:
45 |         librosa_feature.feature_extraction(indir, filename, ext, outdir, SR, N_FFT, HOP_LEN, DURA)
46 | 
47 | 
48 | # # set the default directories and try to get input directories
49 | # args = [".", "."]
50 | # for i in range(1, min(len(argv), 3)):
51 | #     args[i - 1] = argv[i]
52 | #
53 | # # if only input directory is set, make the output directory the same
54 | # if len(argv) == 2:
55 | #     args[1] = args[0]
56 | 
57 | ext = '.mp3'
58 | indir = '/media/mass/D/wbim/project/youtube-8m_general/audio'
59 | outdir = '/media/mass/D/wbim/project/youtube-8m_general/ge_audio_feature'
60 | 
61 | main(indir, ext, outdir)
62 | 


--------------------------------------------------------------------------------
/data/mp4-to-mp3/mp4tomp3.py:
--------------------------------------------------------------------------------
 1 | # MP4 TO MP3 CONVERSION SCRIPT
 2 | # script to convert mp4 video files to mp3 audio
 3 | # useful for turning video from sites such as www.ted.com into audio files useable
 4 | # on any old mp3 player.
 5 | #
 6 | # usage: python mp4tomp3.py [input directory [output directory]]
 7 | # input directory (optional)  - set directory containing mp4 files to convert (defaults to current folder)
 8 | # output directory (optional) - set directory to export mp3 files to (defaults to input)
 9 | #
10 | # NOTE: you will need python 2, mplayer and lame for this script to work
11 | # sudo apt-get install lame
12 | # sudo apt-get install mplayer
13 | # sudo apt-get install python2.7
14 | 
15 | 
16 | from subprocess import call     # for calling mplayer and lame
17 | from sys import argv            # allows user to specify input and output directories
18 | import os                       # help with file handling
19 | import pickle
20 | 
21 | def check_file_exists(directory, filename, extension):
22 |     path = directory + "/" + filename + extension
23 |     return os.path.isfile(path)
24 | 
25 | def main(indir, outdir):
26 | 
27 | 
28 |     try:
29 |         # get a list of all convertible files in the input directory
30 | 
31 |         files = []
32 |         filelist = [ indir + "/" + f + ".mp4" for f in list_mv ]
33 |         for path in filelist:
34 |             basename = os.path.basename(path) 
35 |             filename = os.path.splitext(basename)[0]
36 |             files.append(filename)
37 |         # remove files that have already been outputted from the list
38 |         files[:] = [f for f in files if not check_file_exists(outdir, f, ".mp3")]
39 |     except OSError as e:
40 |         exit(e)
41 |     
42 |     if len(files) == 0:
43 |         exit("Could not find any files to convert that have not already been converted.")
44 | 
45 |     # convert all unconverted files
46 |     for filename in files:
47 |         print "-- converting %s.mp4 to %s.mp3 --" % (indir + "/" + filename, outdir + "/" + filename)
48 |         call(["mplayer", "-novideo", "-nocorrect-pts", "-ao", "pcm:waveheader", indir + "/" + filename + ".mp4"])
49 |         call(["lame", "-h", "-b", "128", "audiodump.wav", outdir + "/" + filename + ".mp3"])
50 |         try:
51 |           os.remove("audiodump.wav")
52 |         except:
53 |           continue
54 | 
55 | # set the default directories and try to get input directories
56 | args = [".", "."]
57 | for i in range(1, min(len(argv), 3)):
58 |     args[i - 1] = argv[i]
59 | 
60 | # if only input directory is set, make the output directory the same
61 | if len(argv) == 2:
62 |     args[1] = args[0]
63 | 
64 | f = open('no_list.pkl', 'rb')
65 | list_mv = pickle.load(f)
66 | 
67 | indir = '/media/mass/D/wbim/project/youtube-8m-music-video/video'
68 | outdir = '/home/csehong/PycharmProjects/audio_feature_extraction/audio'
69 | 
70 | main(indir, outdir)
71 | 


--------------------------------------------------------------------------------
/data/video_feature_extractor/READ_ME.txt:
--------------------------------------------------------------------------------
1 | We extract frame-level features using an Inception network trained on the large-scale ImageNet dataset following starter code provided by YouTube-8M dataset: https://github.com/google/youtube-8m#video-level-models
2 | 
3 | You can also download the frame-level features from https://research.google.com/youtube8m/download.html
4 | 
5 | 


--------------------------------------------------------------------------------
/embed_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | 
 6 | import OPTS
 7 | 
 8 | class Triplet:
 9 |   class OPTS(OPTS.OPTS):
10 |     class DIST:
11 |       L2 = 0
12 |       COS = 1
13 |       
14 |     def __init__(self):
15 |       OPTS.OPTS.__init__(self,'Triplet OPTS')
16 |       self.network_name = None
17 |       self.margin = 0.1
18 |       self.distance = self.DIST.L2
19 |       self.is_l2_norm_already = True
20 |       self.num_max_pos_pair = 1
21 | 
22 |   def __init__(self, opts=None):
23 |     if opts is None:
24 |       opts = self.OPTS()
25 |     self.opts = opts
26 |     self.opts.assert_all_keys_valid()
27 |   
28 |   def construct(self, embed_x, embed_y, aff_xy, K, name):
29 |     with tf.variable_scope(name):
30 |       if not self.opts.is_l2_norm_already:
31 |         embed_x = tf.nn.l2_normalize(embed_x, 1)
32 |         embed_y = tf.nn.l2_normalize(embed_y, 1)
33 | 
34 |       # convert simliarity to distance by (-1) >> high value indicates the distance between the samples is long
35 |       dist_xy = (-1) *tf.matmul(embed_x, embed_y,transpose_a = False, transpose_b = True)
36 |       self.dist_xy = dist_xy
37 | 
38 |       dist_pos_pair = tf.select(aff_xy, dist_xy, tf.ones_like(dist_xy,dtype=tf.float32)*(-1e+6))
39 |       dist_neg_pair = tf.select(tf.logical_not(aff_xy), dist_xy, tf.ones_like(dist_xy,dtype=tf.float32)*(1e+6))
40 | 
41 |       # for top violating postive samples
42 |       top_k_pos_pair_xy, _ = tf.nn.top_k(dist_pos_pair, k=self.opts.num_max_pos_pair)
43 |       top_k_pos_pair_yx, _ = tf.nn.top_k(tf.transpose(dist_pos_pair),k=self.opts.num_max_pos_pair)
44 |       top_k_pos_pair_yx = tf.transpose(top_k_pos_pair_yx)
45 | 
46 |       # for top violating negative samples
47 |       top_k_neg_pair_xy, _ = tf.nn.top_k(tf.negative(dist_neg_pair),k=K)
48 |       top_k_neg_pair_xy = tf.negative(top_k_neg_pair_xy)
49 |       top_k_neg_pair_yx, _ = tf.nn.top_k(tf.transpose(tf.negative(dist_neg_pair)),k=K)
50 |       top_k_neg_pair_yx = tf.negative(tf.transpose(top_k_neg_pair_yx))
51 | 
52 |       top_k_pos_pair_xy = tf.tile(top_k_pos_pair_xy, [1,K])
53 |       top_k_pos_pair_yx = tf.tile(top_k_pos_pair_yx, [K,1])
54 |       shape_xy = tf.shape(aff_xy)
55 |       
56 |       top_k_pos_pair_xy = tf.reshape(top_k_pos_pair_xy, [shape_xy[0],K,-1])
57 |       top_k_pos_pair_yx = tf.reshape(top_k_pos_pair_yx, [-1,K,shape_xy[1]])
58 | 
59 |       loss_xy = tf.maximum(0.,self.opts.margin + top_k_pos_pair_xy - tf.expand_dims(top_k_neg_pair_xy,2))
60 |       loss_yx = tf.maximum(0.,self.opts.margin + top_k_pos_pair_yx - tf.expand_dims(top_k_neg_pair_yx,0))
61 | 
62 |       loss_xy = tf.reduce_mean(tf.reshape(loss_xy,[-1]))
63 |       loss_yx = tf.reduce_mean(tf.reshape(loss_yx,[-1]))
64 | 
65 |       # xy_nonzero = tf.reduce_sum(tf.cast(tf.greater(loss_xy, 0.), tf.float32)) + 1e-13
66 |       # yx_nonzero = tf.reduce_sum(tf.cast(tf.greater(loss_yx, 0.), tf.float32)) + 1e-13
67 |       #
68 |       # loss_xy = loss_xy / xy_nonzero
69 |       # loss_yx = loss_yx / yx_nonzero
70 | 
71 |       return loss_xy, loss_yx
72 |       


--------------------------------------------------------------------------------
/embed_network_music.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | import OPTS
  7 | 
  8 | 
  9 | class Music_Model:
 10 |     class OPTS(OPTS.OPTS):
 11 |         def __init__(self):
 12 |             OPTS.OPTS.__init__(self, 'Music_Model OPTS')
 13 |             self.network_name = None
 14 |             self.num_layer = 2
 15 | 
 16 |     def __init__(self, opts=None):
 17 |         if opts is None:
 18 |             opts = self.OPTS()
 19 |         self.opts = opts
 20 |         self.opts.assert_all_keys_valid()
 21 |         self.__is_constructed = False
 22 |         self.is_training = True  # move into opts!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 23 | 
 24 | 
 25 |     def construct(self, x_data, keep_prob, is_linear = False, is_training=None):
 26 |         with tf.variable_scope(self.opts.network_name):
 27 |             self.is_training = is_training
 28 |             self.fc1 = self.fc_layer(x_data, 2048, name='fc1')
 29 |             self.relu1 = tf.nn.relu(self.fc1, name='relu1')
 30 |             if (is_linear == False):
 31 |                 self.relu1_drop = tf.nn.dropout(self.relu1, keep_prob=keep_prob, name='relu1_drop')
 32 |             else:
 33 |                 self.relu1_drop = tf.nn.dropout(self.fc1, keep_prob=keep_prob, name='fc1_drop')
 34 | 
 35 |             self.fc2 = self.fc_layer(self.relu1_drop, 1024, name='fc2')
 36 |             self.relu2 = tf.nn.relu(self.fc2, name='relu2')
 37 |             if (is_linear == False):
 38 |                 self.relu2_drop = tf.nn.dropout(self.relu2, keep_prob=keep_prob, name='relu2_drop')
 39 |             else:
 40 |                 self.relu2_drop = tf.nn.dropout(self.fc2, keep_prob=keep_prob, name='fc2_drop')
 41 | 
 42 |             if (self.opts.num_layer == 3): # We only consier that the number of layers for music is only 3...
 43 |                 self.relu_last = self.relu2_drop
 44 |             else:
 45 |                 self.fc3 = self.fc_layer(self.relu2_drop, 1024, name='fc3')
 46 |                 self.relu3 = tf.nn.relu(self.fc3, name='relu3')
 47 |                 if (is_linear == False):
 48 |                     self.relu3_drop = tf.nn.dropout(self.relu3, keep_prob=keep_prob, name='relu3_drop')
 49 |                 else:
 50 |                     self.relu3_drop = tf.nn.dropout(self.fc3, keep_prob=keep_prob, name='fc3_drop')
 51 | 
 52 |                 self.fc4 = self.fc_layer(self.relu3_drop, 512, name='fc4')
 53 |                 self.relu4 = tf.nn.relu(self.fc4, name='relu4')
 54 |                 if (is_linear == False):
 55 |                     self.relu4_drop = tf.nn.dropout(self.relu4, keep_prob=keep_prob, name='relu4_drop') #orignally I mistake relu4_drop -> relu_3_drop...
 56 |                 else:
 57 |                     self.relu4_drop = tf.nn.dropout(self.fc4, keep_prob=keep_prob, name='fc4_drop')
 58 | 
 59 |                 if (self.opts.num_layer == 1):
 60 |                     self.relu_last = x_data
 61 |                 elif (self.opts.num_layer == 2):
 62 |                     self.relu_last = self.relu1_drop
 63 |                 elif (self.opts.num_layer == 4):
 64 |                     self.relu_last = self.relu3_drop
 65 |                 elif (self.opts.num_layer == 5):
 66 |                     self.relu_last = self.relu4_drop
 67 |                 else:
 68 |                     print ("invalid layer, num layer should be 2~5")
 69 |                     return
 70 | 
 71 |             self.fc5 = self.fc_layer(self.relu_last, 512, name='fc5')
 72 |             # self.bn = self.batch_normalization(self.fc2, 512, name='batch_normalize')
 73 |             self.bn = tf.contrib.layers.batch_norm(inputs=self.fc5, decay=0.99, center=True, scale=True, epsilon=1e-3,
 74 |                                                    is_training=self.is_training, scope='BN')
 75 |             self.l2_norm = tf.nn.l2_normalize(self.bn, dim=1, name='l2_normalize')
 76 |             return self.l2_norm
 77 | 
 78 |     def fc_layer(self, bottom, output_dim, name, reuse=False):
 79 |         with tf.variable_scope(name, reuse=reuse):
 80 |             shape = bottom.get_shape().as_list()
 81 |             dim = 1
 82 |             for d in shape[1:]:
 83 |                 dim *= d
 84 |             x = tf.reshape(bottom, [-1, dim])
 85 | 
 86 |             weights = tf.get_variable(name='W', shape=[dim, output_dim],
 87 |                                       initializer=self.initializeWeight(shape=[dim, output_dim]))
 88 |             # fc = tf.matmul(x, weights)  # to avoid conflict from beta of  batch normalization
 89 |             bias = tf.get_variable(name='b', shape=[output_dim],
 90 |                                    initializer=tf.constant_initializer(value=0., dtype=tf.float32))
 91 |             fc = tf.nn.xw_plus_b(x, weights, bias, name="fc")
 92 |             return fc
 93 | 
 94 |     # http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
 95 |     def batch_normalization(self, x, dim, name, reuse=False):
 96 |         with tf.variable_scope(name):
 97 |             batch_mean, batch_var = tf.nn.moments(x, [0])
 98 | 
 99 |             scale = tf.get_variable(name='scale', shape=[dim],
100 |                                     initializer=tf.constant_initializer(value=1, dtype=tf.float32))
101 |             beta = tf.get_variable(name='beta', shape=[dim],
102 |                                    initializer=tf.constant_initializer(value=0, dtype=tf.float32))
103 | 
104 |             BN = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, 1e-3)
105 |             return BN
106 | 
107 |     # xavier
108 |     def initializeWeight(self, shape, method='xavier2'):
109 |         inputsize = reduce(lambda x, y: x * y, shape[0:-1])
110 |         if method == 'xavier2':
111 |             stddev = np.sqrt(2.0 / (inputsize))
112 |         else:
113 |             outsize = shape[-1]
114 |             stddev = np.sqrt(3.0 / (inputsize + outsize))
115 |         return tf.truncated_normal_initializer(stddev=stddev)
116 | 


--------------------------------------------------------------------------------
/embed_network_video.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | import OPTS
  7 | 
  8 | 
  9 | class Video_Model:
 10 |     class OPTS(OPTS.OPTS):
 11 |         def __init__(self):
 12 |             OPTS.OPTS.__init__(self, 'Video_Model OPTS')
 13 |             self.network_name = None
 14 |             self.num_layer = 2
 15 | 
 16 |     def __init__(self, opts=None):
 17 |         if opts is None:
 18 |             opts = self.OPTS()
 19 |         self.opts = opts
 20 |         self.opts.assert_all_keys_valid()
 21 |         self.__is_constructed = False
 22 |         self.is_training = True
 23 | 
 24 | 
 25 |     def construct(self, x_data, keep_prob, is_linear = False, is_training=None):
 26 |         with tf.variable_scope(self.opts.network_name):
 27 |             self.is_training = is_training
 28 |             self.fc1 = self.fc_layer(x_data, 2048, name='fc1')
 29 |             self.relu1 = tf.nn.relu(self.fc1, name='relu1')
 30 |             if(is_linear==False):
 31 |                 self.relu1_drop = tf.nn.dropout(self.relu1, keep_prob=keep_prob, name='relu1_drop')
 32 |             else:
 33 |                 self.relu1_drop = tf.nn.dropout(self.fc1, keep_prob=keep_prob, name='fc1_drop')
 34 | 
 35 |             self.fc2 = self.fc_layer(self.relu1_drop, 1024, name='fc2')
 36 |             self.relu2 = tf.nn.relu(self.fc2, name='relu2')
 37 |             if (is_linear == False):
 38 |                 self.relu2_drop = tf.nn.dropout(self.relu2, keep_prob=keep_prob, name='relu2_drop')
 39 |             else:
 40 |                 self.relu2_drop = tf.nn.dropout(self.fc2, keep_prob=keep_prob, name='fc2_drop')
 41 | 
 42 |             self.fc3 = self.fc_layer(self.relu2_drop, 1024, name='fc3')
 43 |             self.relu3 = tf.nn.relu(self.fc3, name='relu3')
 44 |             if (is_linear == False):
 45 |                 self.relu3_drop = tf.nn.dropout(self.relu3, keep_prob=keep_prob, name='relu3_drop')
 46 |             else:
 47 |                 self.relu3_drop = tf.nn.dropout(self.fc3, keep_prob=keep_prob, name='fc3_drop')
 48 | 
 49 |             self.fc4 = self.fc_layer(self.relu3_drop, 512, name='fc4')
 50 |             self.relu4 = tf.nn.relu(self.fc4, name='relu4')
 51 |             if (is_linear == False):
 52 |                 self.relu4_drop = tf.nn.dropout(self.relu4, keep_prob=keep_prob, name='relu4_drop')  #orignally I mistake relu4_drop -> relu_3_drop...
 53 |             else:
 54 |                 self.relu4_drop = tf.nn.dropout(self.fc4, keep_prob=keep_prob, name='fc4_drop')
 55 | 
 56 | 
 57 |             if (self.opts.num_layer == 1):
 58 |                 self.relu_last = x_data
 59 |             elif (self.opts.num_layer == 2):
 60 |                 self.relu_last = self.relu1_drop
 61 |             elif (self.opts.num_layer == 3):
 62 |                 self.relu_last = self.relu2_drop
 63 |             elif (self.opts.num_layer == 4):
 64 |                 self.relu_last = self.relu3_drop
 65 |             elif (self.opts.num_layer == 5):
 66 |                 self.relu_last = self.relu4_drop
 67 |             else:
 68 |                 print ("invalid layer, num layer should be 2~5")
 69 |                 return
 70 | 
 71 |             self.fc5 = self.fc_layer(self.relu_last , 512, name='fc5')
 72 |             # self.bn = self.batch_normalization(self.fc2, 512, name='batch_normalize')
 73 |             self.bn = tf.contrib.layers.batch_norm(inputs=self.fc5, decay=0.99, center=True, scale=True, epsilon=1e-3,
 74 |                                                    is_training=self.is_training, scope='BN')
 75 |             self.l2_norm = tf.nn.l2_normalize(self.bn, dim=1, name='l2_normalize')
 76 |             return self.l2_norm
 77 | 
 78 |     def fc_layer(self, bottom, output_dim, name, reuse=False):
 79 |         with tf.variable_scope(name, reuse=reuse):
 80 |             shape = bottom.get_shape().as_list()
 81 |             dim = 1
 82 |             for d in shape[1:]:
 83 |                 dim *= d
 84 |             x = tf.reshape(bottom, [-1, dim])
 85 | 
 86 |             weights = tf.get_variable(name='W', shape=[dim, output_dim],
 87 |                                       initializer=self.initializeWeight(shape=[dim, output_dim]))
 88 |             # fc = tf.matmul(x, weights)  # to avoid conflict from beta of  batch normalization
 89 |             bias = tf.get_variable(name='b', shape=[output_dim],
 90 |                                    initializer=tf.constant_initializer(value=0., dtype=tf.float32))
 91 |             fc = tf.nn.xw_plus_b(x, weights, bias, name="fc")
 92 |             return fc
 93 | 
 94 |     # http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
 95 |     def batch_normalization(self, x, dim, name, reuse=False):
 96 |         with tf.variable_scope(name):
 97 |             batch_mean, batch_var = tf.nn.moments(x, [0])
 98 | 
 99 |             scale = tf.get_variable(name='scale', shape=[dim],
100 |                                     initializer=tf.constant_initializer(value=1, dtype=tf.float32))
101 |             beta = tf.get_variable(name='beta', shape=[dim],
102 |                                    initializer=tf.constant_initializer(value=0, dtype=tf.float32))
103 | 
104 |             BN = tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, 1e-3)
105 |             return BN
106 | 
107 |     # xavier
108 |     def initializeWeight(self, shape, method='xavier2'):
109 |         inputsize = reduce(lambda x, y: x * y, shape[0:-1])
110 |         if method == 'xavier2':
111 |             stddev = np.sqrt(2.0 / (inputsize))
112 |         else:
113 |             outsize = shape[-1]
114 |             stddev = np.sqrt(3.0 / (inputsize + outsize))
115 |         return tf.truncated_normal_initializer(stddev=stddev)
116 | 


--------------------------------------------------------------------------------
/embed_structure_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | import random
 6 | import OPTS
 7 | 
 8 | 
 9 | 
10 | 
11 | class Triplet_Structure:
12 |   class OPTS(OPTS.OPTS):
13 |     class DIST:
14 |       L2 = 0
15 |       COS = 1
16 |       
17 |     def __init__(self):
18 |       OPTS.OPTS.__init__(self,'Triplet Structure_OPTS')
19 |       self.network_name = None
20 |       self.margin = 0.1
21 |       self.distance = self.DIST.L2
22 |       self.is_l2_norm_already = True
23 |       self.num_max_pos_pair = 1
24 | 
25 |   def __init__(self, opts=None):
26 |     if opts is None:
27 |       opts = self.OPTS()
28 |     self.opts = opts
29 |     self.opts.assert_all_keys_valid()
30 |   
31 |   def construct(self, low_feature, embed_feature, K, name):
32 |     with tf.variable_scope(name):
33 | 
34 |       low_feature = tf.nn.l2_normalize(low_feature, 1)
35 | 
36 | 
37 |       # convert simliarity to distance by (-1) >> high value indicates the distance between the samples is long
38 |       dist_low = (-1) * tf.matmul(low_feature, low_feature, transpose_a=False, transpose_b=True)
39 |       dist_embed = (-1) * tf.matmul(embed_feature, embed_feature, transpose_a=False, transpose_b=True)
40 | 
41 |       # d1_low = tf.slice(dist_low, [2, 0], [K, 1])
42 |       # d2_low = tf.slice(dist_low, [2, 1], [K, 1])
43 |       # d1_embed = tf.slice(dist_embed, [2, 0], [K, 1])
44 |       # d2_embed = tf.slice(dist_embed, [2, 1], [K, 1])
45 |       # coeff = tf.sign(d1_low - d2_low) - tf.sign(d1_embed - d2_embed)
46 |       # dist_inverse = (d2_embed - d1_embed)    # -1 * (d1_embed - d2_embed)
47 |       # dist_tensor = tf.multiply(coeff, dist_inverse)
48 |       # loss_structure = tf.reduce_mean(dist_tensor)
49 | 
50 |       d1_low = tf.slice(dist_low, [0, 0], [K, K-1])
51 |       d2_low = tf.slice(dist_low, [0, 1], [K, K-1])
52 |       d1_embed = tf.slice(dist_embed, [0, 0], [K, K-1])
53 |       d2_embed = tf.slice(dist_embed, [0, 1], [K, K-1])
54 |       coeff = tf.sign(d1_low - d2_low) - tf.sign(d1_embed - d2_embed)
55 |       dist_inverse = (d2_embed - d1_embed)  # -1 * (d1_embed - d2_embed)
56 |       dist_tensor = tf.multiply(coeff, dist_inverse)
57 |       loss_structure = tf.reduce_mean(dist_tensor)
58 | 
59 | 
60 |       return loss_structure
61 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | import OPTS
 5 | 
 6 | class Recall:
 7 |   class OPTS(OPTS.OPTS):
 8 |     class DIST:
 9 |       L2 = 0
10 |       COS = 1
11 |     def __init__(self):
12 |       OPTS.OPTS.__init__(self,'Recall OPTS')
13 |       self.network_name = None
14 |       self.distance = self.DIST.COS
15 |     
16 |   def __init__(self, opts=None):
17 |     if opts is None:
18 |       opts = self.OPTS()
19 |     self.opts = opts
20 |     self.opts.assert_all_keys_valid()
21 |     
22 |   def construct(self, embed_x, embed_y, aff_xy, K_list):
23 |     embed_x = tf.nn.l2_normalize(embed_x, 1)
24 |     embed_y = tf.nn.l2_normalize(embed_y, 1)
25 |         
26 |     dist_xy = tf.matmul(embed_x, embed_y,transpose_a = False, transpose_b = True)
27 |     
28 |     self.to_debug = []
29 |     
30 |     shape_xy = tf.shape(aff_xy)
31 |     recall_k_xy = []
32 |     recall_k_yx = []
33 |     recall_k_xy_idx = 0
34 |     recall_k_yx_idx = 0
35 |     for K in K_list:
36 |       indice_mat_xy = tf.tile(tf.expand_dims(tf.range(0,shape_xy[0]),1),[1,K])
37 |       indice_mat_xy = tf.expand_dims(indice_mat_xy, dim=1)
38 |       indice_mat_yx = tf.tile(tf.expand_dims(tf.range(0,shape_xy[1]),1),[1,K])
39 |       indice_mat_yx = tf.expand_dims(indice_mat_yx, dim=1)
40 |       
41 |       _, xy_idx = tf.nn.top_k(dist_xy,k=K)
42 |       _, yx_idx = tf.nn.top_k(tf.transpose(dist_xy),k=K)
43 | 
44 |       # variable for recall result
45 |       recall_k_xy_idx = xy_idx
46 |       recall_k_yx_idx = yx_idx
47 | 
48 |       xy_idx = tf.expand_dims(xy_idx, dim=1)
49 |       yx_idx = tf.expand_dims(yx_idx, dim=1)
50 |       xy_idx = tf.concat(1, [indice_mat_xy,xy_idx])
51 |       yx_idx = tf.concat(1, [yx_idx,indice_mat_yx])
52 | 
53 |       xy_idx = tf.transpose(xy_idx, perm=[0,2,1])
54 |       yx_idx = tf.transpose(yx_idx, perm=[0,2,1])
55 | 
56 |       xy_search = tf.cast(tf.gather_nd(aff_xy, xy_idx),dtype=tf.float32)
57 |       yx_search = tf.cast(tf.gather_nd(aff_xy, yx_idx),dtype=tf.float32)
58 | 
59 |       xy_result = tf.cast(tf.not_equal(tf.reduce_sum(xy_search,1), 0.),tf.float32)
60 |       yx_result = tf.cast(tf.not_equal(tf.reduce_sum(yx_search,1), 0.),tf.float32)
61 | 
62 |       recall_k_xy.append(tf.reduce_mean(xy_result))
63 |       recall_k_yx.append(tf.reduce_mean(yx_result))
64 | 
65 |     return recall_k_xy, recall_k_yx, recall_k_xy_idx, recall_k_yx_idx
66 |   
67 |   
68 | if __name__ == '__main__':
69 |   rec_opts = Recall.OPTS()
70 |   rec_opts.network_name = 'Test_Object'
71 |   
72 |   recall = Recall(rec_opts)
73 |   
74 |   embed_x = np.random.normal(size=[20,10])
75 |   embed_y = np.random.normal(size=[20,10])
76 |   aff_xy = np.eye(20,20)
77 |   K_list = [3]
78 |   r_xy, r_yx = recall.construct(embed_x, embed_y, aff_xy, K_list)
79 |   import scipy.io
80 |   with tf.Session() as sess:
81 |     sess.run(tf.global_variables_initializer())
82 |     d, r1, r2 = sess.run([recall.to_debug, r_xy, r_yx])
83 |     print r1, r2
84 |     scipy.io.savemat('recall_test.mat',mdict={'d':d})
85 |   
86 |   


--------------------------------------------------------------------------------
/figure/concept.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/concept.JPG


--------------------------------------------------------------------------------
/figure/demo.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/demo.JPG


--------------------------------------------------------------------------------
/figure/framework.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/framework.JPG


--------------------------------------------------------------------------------
/figure/retrieval_result.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/retrieval_result.JPG


--------------------------------------------------------------------------------
/figure/video_sample.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csehong/VM-NET/73c719bd7eecb0516fbb2809b7f828a7bc17421a/figure/video_sample.JPG


--------------------------------------------------------------------------------
/flip_gradient.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.framework import ops
 3 | 
 4 | class FlipGradientBuilder(object):
 5 | 	def __init__(self):
 6 | 		self.num_calls = 0
 7 | 
 8 | 	def __call__(self, x, l=1.0):
 9 | 		grad_name = "FlipGradient%d" % self.num_calls
10 | 		@ops.RegisterGradient(grad_name)
11 | 		def _flip_gradients(op, grad):
12 | 			return [tf.neg(grad) * l]
13 | 
14 | 		g = tf.get_default_graph()
15 | 		with g.gradient_override_map({"Identity": grad_name}):
16 | 			y = tf.identity(x)
17 | 
18 | 		self.num_calls += 1
19 | 		return y
20 | 
21 | flip_gradient = FlipGradientBuilder()
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/network.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | from flip_gradient import *
  7 | import embed_network as en
  8 | import embed_network_music as enm
  9 | import embed_network_video as env
 10 | import embed_loss as el
 11 | import eval
 12 | 
 13 | import OPTS
 14 | 
 15 | 
 16 | class Model:
 17 |     class OPTS(OPTS.OPTS):
 18 |         def __init__(self):
 19 |             OPTS.OPTS.__init__(self, 'Model OPTS')
 20 |             self.network_name = None
 21 |             self.x_dim = None
 22 |             self.y_dim = None
 23 |             self.x_num_layer = 2
 24 |             self.y_num_layer = 2
 25 |             self.constraint_weights = [2, 1]
 26 |             self.batch_size = 1024
 27 |             self.is_linear = False
 28 | 
 29 |     def __init__(self, opts):
 30 |         if opts is None:
 31 |             opts = self.OPTS()
 32 |         self.opts = opts
 33 |         self.opts.assert_all_keys_valid()
 34 | 
 35 |     def construct(self):
 36 | 
 37 |         self.x_data = tf.placeholder(tf.float32, [None, self.opts.x_dim], name='X_data')
 38 |         self.y_data = tf.placeholder(tf.float32, [None, self.opts.y_dim], name='Y_data')
 39 | 
 40 |          # Build embedding
 41 |         self.aff_xy = tf.placeholder(tf.bool, [None, None], name='aff_xy')
 42 |         self.K = tf.placeholder(tf.int32, name='K')
 43 |         self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 44 |         self.is_training = tf.placeholder(tf.bool, name='is_training')
 45 |         self.l = tf.placeholder(tf.float32, name='lambda_for_adv')
 46 | 
 47 |         x_net_opts = enm.Music_Model.OPTS()
 48 |         x_net_opts.network_name = 'X_network'
 49 |         x_net_opts.num_layer = self.opts.x_num_layer
 50 |         self.x_net = enm.Music_Model(x_net_opts)
 51 | 
 52 |         if(self.opts.x_num_layer == 3 and self.opts.y_num_layer == 2):
 53 |             y_net_opts = en.Wang_Model.OPTS()
 54 |             y_net_opts.network_name = 'Y_network'
 55 |             self.y_net = en.Wang_Model(y_net_opts)
 56 |         else:
 57 |             y_net_opts = env.Video_Model.OPTS()
 58 |             y_net_opts.network_name = 'Y_network'
 59 |             y_net_opts.num_layer = self.opts.y_num_layer
 60 |             self.y_net = env.Video_Model(y_net_opts)
 61 | 
 62 |         self.x_embed = self.x_net.construct(self.x_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training)
 63 |         self.y_embed = self.y_net.construct(self.y_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training)
 64 | 
 65 | 
 66 | 
 67 |         # Cross-modal Triplet loss
 68 |         el_opts = el.Triplet.OPTS()
 69 |         el_opts.network_name = 'Triplet'
 70 |         el_opts.distance = el_opts.DIST.COS
 71 | 
 72 |         el_net = el.Triplet(el_opts)
 73 |         self.loss_xy, self.loss_yx = el_net.construct(self.x_embed, self.y_embed, self.aff_xy, self.K, 'Triplet_Net')
 74 | 
 75 |         # Adversarial loss
 76 |         self.concat_feat = tf.concat(0, [self.x_embed, self.y_embed])
 77 |         self.dom_label = tf.concat(0, [tf.tile([0.], [tf.shape(self.x_embed)[0]]),
 78 |                                        tf.tile([1.], [tf.shape(self.y_embed)[0]])])
 79 |         self.dom_label = tf.expand_dims(self.dom_label, 1)
 80 |         self.dom_loss, self.dom_acc = self.dom_classifier(self.concat_feat, self.dom_label, self.l, self.keep_prob)
 81 | 
 82 |         self.loss = self.loss_xy * self.opts.constraint_weights[0] + self.loss_yx * self.opts.constraint_weights[1]
 83 | 
 84 |         # calculate gradients
 85 |         _, self.xy_rank_x = tf.nn.moments(tf.gradients(self.loss_xy, [self.x_embed])[0], [0, 1])
 86 |         _, self.yx_rank_x = tf.nn.moments(tf.gradients(self.loss_yx, [self.x_embed])[0], [0, 1])
 87 |         _, self.dom_x = tf.nn.moments(tf.gradients(self.dom_loss, [self.x_embed])[0], [0, 1])
 88 |         _, self.xy_rank_y = tf.nn.moments(tf.gradients(self.loss_xy, [self.y_embed])[0], [0, 1])
 89 |         _, self.yx_rank_y = tf.nn.moments(tf.gradients(self.loss_yx, [self.y_embed])[0], [0, 1])
 90 |         _, self.dom_y = tf.nn.moments(tf.gradients(self.dom_loss, [self.y_embed])[0], [0, 1])
 91 | 
 92 | 
 93 | 
 94 |         eval_opts = eval.Recall.OPTS()
 95 |         eval_opts.network_name = 'Recall'
 96 | 
 97 |         eval_net = eval.Recall(eval_opts)
 98 | 
 99 |         self.recall_xy, self.recall_yx, self.xy_idx, self.yx_idx = eval_net.construct(self.x_embed, self.y_embed,
100 |                                                                                       self.aff_xy,
101 |                                                                                       [1, 10, 25, 50, 100, 1000])
102 | 
103 |         # self.debug_list = el_net.debug_list
104 | 
105 |     def dom_classifier(self, tensor, labels, l=1., keep_prob=1.):
106 |         with tf.name_scope("dom_classifier"):
107 |             feature = flip_gradient(tensor, l)
108 | 
109 |             d_fc1 = tf.nn.relu(self.fc_layer(feature, 512, "dom_fc1"))
110 |             d_fc1 = tf.nn.dropout(d_fc1, keep_prob)
111 | 
112 |             d_fc2 = tf.nn.relu(self.fc_layer(d_fc1, 512, "dom_fc2"))
113 |             d_fc2 = tf.nn.dropout(d_fc2, keep_prob)
114 | 
115 |             # d_logits = self.fc_layer(d_fc2, 2, "dom_logits")
116 |             d_logit = self.fc_layer(d_fc2, 1, "dom_logit")
117 | 
118 |         with tf.name_scope("domain_acc_and_loss"):
119 |             # self.domain_prediction = tf.nn.softmax(d_logits)
120 |             domain_prediction = tf.sigmoid(d_logit)
121 | 
122 |             # self.domain_loss = tf.nn.softmax_cross_entropy_with_logits(d_logits, self.domain)
123 |             domain_loss = tf.nn.sigmoid_cross_entropy_with_logits(d_logit, labels)
124 |             domain_loss = tf.reduce_mean(domain_loss)
125 | 
126 |             # domain acc
127 |             correct_domain_prediction = tf.equal(tf.round(domain_prediction), labels)
128 |             domain_acc = tf.reduce_mean(tf.cast(correct_domain_prediction, tf.float32))
129 |         return domain_loss, domain_acc
130 | 
131 |     def fc_layer(self, tensor, dim, name, reuse=False):
132 |         in_shape = tensor.get_shape()
133 | 
134 |         with tf.variable_scope(name, reuse=reuse):
135 |             weights = tf.get_variable("weights", shape=[in_shape[1], dim],
136 |                                       initializer=tf.contrib.layers.xavier_initializer())
137 |             biases = tf.get_variable("biases", shape=[dim],
138 |                                      initializer=tf.constant_initializer(0.0))
139 |             fc = tf.nn.xw_plus_b(tensor, weights, biases)
140 |             return fc
141 | 
142 | 


--------------------------------------------------------------------------------
/network_structure.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | 
  6 | from flip_gradient import *
  7 | import embed_network as en
  8 | import embed_network_music as enm
  9 | import embed_network_video as env
 10 | import embed_loss as el
 11 | import embed_structure_loss as esl
 12 | import eval
 13 | 
 14 | import OPTS
 15 | 
 16 | 
 17 | class Model_structure:
 18 |     class OPTS(OPTS.OPTS):
 19 |         def __init__(self):
 20 |             OPTS.OPTS.__init__(self, 'Model OPTS')
 21 |             self.network_name = None
 22 |             self.x_dim = None
 23 |             self.y_dim = None
 24 |             self.x_num_layer = 2
 25 |             self.y_num_layer = 2
 26 |             self.constraint_weights = [2, 1]
 27 |             self.batch_size = 1024
 28 |             self.is_linear = False
 29 | 
 30 |     def __init__(self, opts):
 31 |         if opts is None:
 32 |             opts = self.OPTS()
 33 |         self.opts = opts
 34 |         self.opts.assert_all_keys_valid()
 35 | 
 36 |     def construct(self):
 37 | 
 38 |         self.x_data = tf.placeholder(tf.float32, [None, self.opts.x_dim], name='X_data')
 39 |         self.y_data = tf.placeholder(tf.float32, [None, self.opts.y_dim], name='Y_data')
 40 | 
 41 |         # Build embedding
 42 |         self.aff_xy = tf.placeholder(tf.bool, [None, None], name='aff_xy')
 43 |         self.K = tf.placeholder(tf.int32, name='K')
 44 |         self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
 45 |         self.is_training = tf.placeholder(tf.bool, name='is_training')
 46 |         self.l = tf.placeholder(tf.float32, name='lambda_for_adv')
 47 | 
 48 |         x_net_opts = enm.Music_Model.OPTS()
 49 |         x_net_opts.network_name = 'X_network'
 50 |         x_net_opts.num_layer = self.opts.x_num_layer
 51 |         self.x_net = enm.Music_Model(x_net_opts)
 52 | 
 53 |         if(self.opts.x_num_layer == 3 and self.opts.y_num_layer == 2):
 54 |             y_net_opts = en.Wang_Model.OPTS()
 55 |             y_net_opts.network_name = 'Y_network'
 56 |             self.y_net = en.Wang_Model(y_net_opts)
 57 |         else:
 58 |             y_net_opts = env.Video_Model.OPTS()
 59 |             y_net_opts.network_name = 'Y_network'
 60 |             y_net_opts.num_layer = self.opts.y_num_layer
 61 |             self.y_net = env.Video_Model(y_net_opts)
 62 | 
 63 |         self.x_embed = self.x_net.construct(self.x_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training)
 64 |         self.y_embed = self.y_net.construct(self.y_data, keep_prob=self.keep_prob, is_linear=self.opts.is_linear, is_training=self.is_training)
 65 | 
 66 | 
 67 | 
 68 |         # Cross-modal Triplet loss
 69 |         el_opts = el.Triplet.OPTS()
 70 |         el_opts.network_name = 'Triplet'
 71 |         el_opts.distance = el_opts.DIST.COS
 72 | 
 73 |         el_net = el.Triplet(el_opts)
 74 |         self.loss_cross_xy, self.loss_cross_yx = el_net.construct(self.x_embed, self.y_embed, self.aff_xy, self.K, 'Triplet_Net')
 75 | 
 76 |         # Single-modal structure loss
 77 |         esl_opts = esl.Triplet_Structure.OPTS()
 78 |         esl_opts.network_name = 'Triplet_Structure'
 79 |         esl_opts.distance = el_opts.DIST.COS
 80 | 
 81 |         esl_net = esl.Triplet_Structure(esl_opts)
 82 |         self.loss_single_x = esl_net.construct(self.x_data, self.x_embed, self.K, 'Triplet_Strcture_x_Net')
 83 |         self.loss_single_y = esl_net.construct(self.y_data, self.y_embed, self.K, 'Triplet_Strcture_y_Net')
 84 | 
 85 |         # Adversarial loss
 86 |         self.concat_feat = tf.concat(0, [self.x_embed, self.y_embed])
 87 |         self.dom_label = tf.concat(0, [tf.tile([0.], [tf.shape(self.x_embed)[0]]),
 88 |                                        tf.tile([1.], [tf.shape(self.y_embed)[0]])])
 89 |         self.dom_label = tf.expand_dims(self.dom_label, 1)
 90 |         self.dom_loss, self.dom_acc = self.dom_classifier(self.concat_feat, self.dom_label, self.l, self.keep_prob)
 91 | 
 92 | 
 93 |         # Final loss
 94 |         self.loss = self.loss_cross_xy * self.opts.constraint_weights[0] + self.loss_cross_yx * self.opts.constraint_weights[1] \
 95 |                         + self.loss_single_x * self.opts.constraint_weights[2] + self.loss_single_y * self.opts.constraint_weights[3]
 96 | 
 97 |         # calculate gradients
 98 |         _, self.xy_rank_x = tf.nn.moments(tf.gradients(self.loss_cross_xy, [self.x_embed])[0], [0, 1])
 99 |         _, self.yx_rank_x = tf.nn.moments(tf.gradients(self.loss_cross_yx, [self.x_embed])[0], [0, 1])
100 |         _, self.x_rank_x = tf.nn.moments(tf.gradients(self.loss_single_x, [self.x_embed])[0], [0, 1])
101 |         _, self.xy_rank_y = tf.nn.moments(tf.gradients(self.loss_cross_xy, [self.y_embed])[0], [0, 1])
102 |         _, self.yx_rank_y = tf.nn.moments(tf.gradients(self.loss_cross_yx, [self.y_embed])[0], [0, 1])
103 |         _, self.y_rank_y = tf.nn.moments(tf.gradients(self.loss_single_y, [self.y_embed])[0], [0, 1])
104 | 
105 | 
106 |         eval_opts = eval.Recall.OPTS()
107 |         eval_opts.network_name = 'Recall'
108 | 
109 |         eval_net = eval.Recall(eval_opts)
110 | 
111 |         self.recall_xy, self.recall_yx, self.xy_idx, self.yx_idx = eval_net.construct(self.x_embed, self.y_embed,
112 |                                                                                       self.aff_xy,
113 |                                                                                       [1, 10, 25, 50, 100, 1000])
114 | 
115 |         # self.debug_list = el_net.debug_list
116 | 
117 |     def dom_classifier(self, tensor, labels, l=1., keep_prob=1.):
118 |         with tf.name_scope("dom_classifier"):
119 |             feature = flip_gradient(tensor, l)
120 | 
121 |             d_fc1 = tf.nn.relu(self.fc_layer(feature, 512, "dom_fc1"))
122 |             d_fc1 = tf.nn.dropout(d_fc1, keep_prob)
123 | 
124 |             d_fc2 = tf.nn.relu(self.fc_layer(d_fc1, 512, "dom_fc2"))
125 |             d_fc2 = tf.nn.dropout(d_fc2, keep_prob)
126 | 
127 |             # d_logits = self.fc_layer(d_fc2, 2, "dom_logits")
128 |             d_logit = self.fc_layer(d_fc2, 1, "dom_logit")
129 | 
130 |         with tf.name_scope("domain_acc_and_loss"):
131 |             # self.domain_prediction = tf.nn.softmax(d_logits)
132 |             domain_prediction = tf.sigmoid(d_logit)
133 | 
134 |             # self.domain_loss = tf.nn.softmax_cross_entropy_with_logits(d_logits, self.domain)
135 |             domain_loss = tf.nn.sigmoid_cross_entropy_with_logits(d_logit, labels)
136 |             domain_loss = tf.reduce_mean(domain_loss)
137 | 
138 |             # domain acc
139 |             correct_domain_prediction = tf.equal(tf.round(domain_prediction), labels)
140 |             domain_acc = tf.reduce_mean(tf.cast(correct_domain_prediction, tf.float32))
141 |         return domain_loss, domain_acc
142 | 
143 |     def fc_layer(self, tensor, dim, name, reuse=False):
144 |         in_shape = tensor.get_shape()
145 | 
146 |         with tf.variable_scope(name, reuse=reuse):
147 |             weights = tf.get_variable("weights", shape=[in_shape[1], dim],
148 |                                       initializer=tf.contrib.layers.xavier_initializer())
149 |             biases = tf.get_variable("biases", shape=[dim],
150 |                                      initializer=tf.constant_initializer(0.0))
151 |             fc = tf.nn.xw_plus_b(tensor, weights, biases)
152 |             return fc
153 | 
154 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import os
 5 | import scipy.io
 6 | 
 7 | from network import Model
 8 | 
 9 | flags = tf.app.flags
10 | FLAGS = flags.FLAGS
11 | 
12 | flags.DEFINE_integer('num_layer_x', 3, 'Constraint Weight xy')
13 | flags.DEFINE_integer('num_layer_y',2, 'Constraint Weight yx')
14 | flags.DEFINE_integer('test_batch_size', 1086, 'Test batch size.') #flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.')
15 | flags.DEFINE_string('summaries_dir', 'expr_ware/lr_0.0003   dr_0.9   nx_3   ny_2   xy_3   yx_1   K_100   ba_2000', 'Directory to put the summary and log data.')
16 | 
17 | net_opts = Model.OPTS()
18 | net_opts.network_name = 'Wrapping Network'
19 | net_opts.x_dim = 1140 #4096
20 | net_opts.y_dim = 1024 #6000
21 | net_opts.x_num_layer = FLAGS.num_layer_x
22 | net_opts.y_num_layer = FLAGS.num_layer_y
23 | net = Model(net_opts)
24 | net.construct()
25 | 
26 | saver = tf.train.Saver(tf.global_variables())
27 | with tf.Session() as sess:
28 |   sess.run(tf.global_variables_initializer())
29 |   checkpoint_dir = os.path.join(FLAGS.summaries_dir, 'checkpoints')
30 |   checkpoint_prefix = os.path.join(checkpoint_dir, "model.ckpt")
31 |   if not os.path.exists(checkpoint_dir):
32 |     os.makedirs(checkpoint_dir)
33 | 
34 |   step = 0
35 |   ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
36 |   if ckpt and ckpt.model_checkpoint_path:
37 |     saver.restore(sess, ckpt.model_checkpoint_path)
38 |     #saver.restore(sess, os.path.join(checkpoint_dir, 'model.ckpt-9399'))
39 |     step = int(ckpt.model_checkpoint_path.split('-')[-1])
40 |     step += 1
41 |     print('Session restored successfully. step: {0}'.format(step))
42 | 
43 |   import data.himv as himv
44 |   num_test = FLAGS.test_batch_size
45 |   data_manager = himv.Data_Manager()
46 |   data_name = data_manager.data_name
47 |   data_name_test = data_name[-num_test:len(data_name)]
48 |   test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.validation_pair_list, is_train=False)
49 |   # test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.test_pair_list, is_train=False)
50 | 
51 |   K = 100
52 |   x_batch, y_batch, aff_xy = test_batch.next()
53 |   xy, yx, xy_idx, yx_idx = sess.run([net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx], feed_dict={
54 |       net.x_data: x_batch,
55 |       net.y_data: y_batch,
56 |       net.K: K,
57 |       net.aff_xy: aff_xy,
58 |       net.keep_prob: 1., net.is_training: False})  # actually, False
59 | 
60 | 
61 |   print("[iter %d] xy: %s, yx: %s, " % (step, xy, yx))
62 | 
63 | 
64 | 
65 |   import csv
66 |   with open('./recall_xy(music-video)_general_vali.csv', 'wb') as csvfile:
67 |       writer = csv.writer(csvfile, delimiter=',')
68 |       top = ["Query Name"] + [str(i) for i in range(0, num_test)]
69 |       writer.writerow(top)
70 |       for idx_input in range(len(xy_idx)):
71 |           recall_mv = [data_name_test[idx_recall]  for idx_recall in xy_idx[idx_input]]
72 |           result = [data_name_test[idx_input]] + recall_mv
73 |           writer.writerow(result)
74 | 
75 |   with open('./recall_yx(video-music)_general_vali.csv', 'wb') as csvfile:
76 |       writer = csv.writer(csvfile, delimiter=',')
77 |       top = ["Query Name"] + [str(i) for i in range(0, num_test)]
78 |       writer.writerow(top)
79 |       for idx_input in range(len(yx_idx)):
80 |           recall_mv = [data_name_test[idx_recall] for idx_recall in yx_idx[idx_input]]
81 |           result = [data_name_test[idx_input]] + recall_mv
82 |           writer.writerow(result)
83 | 
84 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | import scipy.io
  7 | 
  8 | from network import Model
  9 | from network_structure import Model_structure
 10 | from Logger import Logger
 11 | 
 12 | flags = tf.app.flags
 13 | FLAGS = flags.FLAGS
 14 | flags.DEFINE_float('learning_rate', 3e-4
 15 |                    , 'Initial learning rate.')
 16 | flags.DEFINE_float('keep_prob', 0.9, 'Dropout keep probability rate.')
 17 | flags.DEFINE_integer('num_layer_x', 3, 'Constraint Weight xy')
 18 | flags.DEFINE_integer('num_layer_y',2, 'Constraint Weight yx')
 19 | flags.DEFINE_integer('constraint_xy', 3, 'Constraint Weight xy')
 20 | flags.DEFINE_integer('constraint_yx',1, 'Constraint Weight yx')
 21 | flags.DEFINE_integer('constraint_x', 0.2, 'Constraint Structure Weight x')
 22 | flags.DEFINE_integer('constraint_y', 0.2, 'Constraint Structure Weight y')
 23 | flags.DEFINE_integer('top_K', 100, 'Top mosk K number for violation')
 24 | flags.DEFINE_float('weight_decay', 0, 'Weight decay.')
 25 | flags.DEFINE_integer('max_steps', 700000, 'Number of steps to run trainer.')
 26 | flags.DEFINE_integer('train_batch_size', 1000, 'Train Batch size.')  #flags.DEFINE_integer('batch_size', 1500, 'Batch size.')
 27 | flags.DEFINE_integer('validation_batch_size', 4000, 'Validation Batch size.')  #flags.DEFINE_integer('batch_size', 1500, 'Batch size.')
 28 | flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.') #flags.DEFINE_integer('test_batch_size', 1000, 'Test batch size.')
 29 | flags.DEFINE_integer('display_step', 50, 'Train display step.')
 30 | flags.DEFINE_integer('test_step', 200, 'Test step.')
 31 | flags.DEFINE_integer('save_step', 200, 'Checkpoint saving step.')
 32 | flags.DEFINE_string('summaries_dir', "dir_name", 'Directory to put the summary and log data.')
 33 | FLAGS.summaries_dir = "expr/Structure_test(K2_Linear)_" + "lr_" + str(FLAGS.learning_rate) + "   dr_" + str(FLAGS.keep_prob) +  "   nx_" + str(FLAGS.num_layer_x) +   "   ny_" + str(FLAGS.num_layer_y)  \
 34 |                     +  "   xy_" + str(FLAGS.constraint_xy) +"   yx_" + str(FLAGS.constraint_yx) + "   x_" + str(FLAGS.constraint_x) +"   y_" + str(FLAGS.constraint_y) \
 35 |                     + "   K_" + str(FLAGS.top_K) + "   ba_" + str(FLAGS.train_batch_size)
 36 | 
 37 | 
 38 | 
 39 | # Structure_test(K2_dom)_lr_0.0003   dr_0.7   nx_3   ny_2   xy_3   yx_1   x_0.2   y_0.2   K_100   ba_1000
 40 | 
 41 | # net_opts = Model.OPTS()
 42 | # net_opts.network_name = 'Wrapping Network'
 43 | # net_opts.x_dim = 1140
 44 | # net_opts.y_dim = 1024
 45 | # net_opts.x_num_layer = FLAGS.num_layer_x
 46 | # net_opts.y_num_layer = FLAGS.num_layer_y
 47 | # net_opts.constraint_weights = [FLAGS.constraint_xy, FLAGS.constraint_yx]
 48 | # net_opts.is_linear = False
 49 | # net = Model(net_opts)
 50 | # net.construct()
 51 | 
 52 | # net_opts = Model.OPTS()
 53 | net_opts = Model_structure.OPTS()
 54 | net_opts.network_name = 'Wrapping Network'
 55 | net_opts.x_dim = 1140
 56 | net_opts.y_dim = 1024
 57 | net_opts.x_num_layer = FLAGS.num_layer_x
 58 | net_opts.y_num_layer = FLAGS.num_layer_y
 59 | net_opts.constraint_weights = [FLAGS.constraint_xy, FLAGS.constraint_yx, FLAGS.constraint_x, FLAGS.constraint_y]
 60 | net_opts.is_linear = True
 61 | # net = Model(net_opts)
 62 | net = Model_structure(net_opts)
 63 | net.construct()
 64 | 
 65 | 
 66 | lr = tf.placeholder(tf.float32, name='learning_rate')
 67 | loss = net.loss
 68 | # loss = net.loss + net.dom_loss
 69 | 
 70 | 
 71 | global_step_ = tf.Variable(0, trainable=False)
 72 | decay_step = 1000
 73 | # learning_rate_ = tf.train.exponential_decay(FLAGS.learning_rate, global_step_,
 74 | #                                            decay_step, 0.96, staircase=True)
 75 | learning_rate = FLAGS.learning_rate
 76 | 
 77 | # top_k_ = tf.train.exponential_decay(float(FLAGS.top_K), global_step_,
 78 | #                                            decay_step, 0.96, staircase=True)
 79 | top_k = FLAGS.top_K
 80 | 
 81 | 
 82 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 83 | # Ensures that we execute the update_ops before performing the train_step (ref: http://ruishu.io/2016/12/27/batchnorm/ )
 84 | with tf.control_dependencies(update_ops):
 85 |     optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
 86 |     # optimizer = tf.train.AdamOptimizer(learning_rate_).minimize(loss, global_step=global_step_)
 87 | 
 88 | config = tf.ConfigProto()
 89 | config.gpu_options.allow_growth=True
 90 | saver = tf.train.Saver(tf.all_variables(), max_to_keep = None)
 91 | with tf.Session(config=config) as sess:
 92 |   sess.run(tf.global_variables_initializer())
 93 |   
 94 |   train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',sess.graph)
 95 |   validation_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/validation')
 96 |   test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
 97 |   
 98 |   checkpoint_dir = os.path.join(FLAGS.summaries_dir, 'checkpoints')
 99 |   checkpoint_prefix = os.path.join(checkpoint_dir, "model.ckpt")
100 |   if not os.path.exists(checkpoint_dir):
101 |     os.makedirs(checkpoint_dir)
102 |   
103 |   logger = Logger(FLAGS.summaries_dir)
104 |   logger.write(str(FLAGS.__flags))
105 |   import data.himv as himv
106 |   data_manager = himv.Data_Manager()
107 |   train_batch = data_manager.batch_iterator_thread(FLAGS.train_batch_size, data_manager.train_pair_list)
108 |   validation_batch = data_manager.batch_iterator_thread(FLAGS.validation_batch_size, data_manager.validation_pair_list, is_train=False)
109 |   test_batch = data_manager.batch_iterator_thread(FLAGS.test_batch_size, data_manager.test_pair_list, is_train=False)
110 |   
111 |   step = 0
112 |   ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
113 |   if ckpt and ckpt.model_checkpoint_path:
114 |     saver.restore(sess, ckpt.model_checkpoint_path)
115 |     step = int(ckpt.model_checkpoint_path.split('-')[-1])
116 |     print('Session restored successfully. step: {0}'.format(step))
117 |     step = step + 1
118 | 
119 |   max_step_acc_xy = [0, -5e3]
120 |   max_step_acc_yx = [0, -5e3]
121 |   for i in range(step,FLAGS.max_steps):
122 |     p = float(i) / FLAGS.max_steps
123 |     lamb = 0
124 |     # lamb = 2. / (1. + np.exp(-20. * p)) - 1   # lamb = 2. / (1. + np.exp(-10. * p)) - 1
125 |     # lamb *= 10
126 | 
127 |     x_batch, y_batch, aff_xy = train_batch.next()
128 |     # K = FLAGS.top_K
129 | 
130 |     # learning_rate = sess.run(learning_rate_)
131 | 
132 | 
133 | 
134 | 
135 | 
136 |     sess.run(optimizer,feed_dict={
137 |                       net.x_data:x_batch,
138 |                       net.y_data:y_batch,
139 |                       net.K:int(top_k),
140 |                       net.aff_xy:aff_xy,
141 |                       net.keep_prob:FLAGS.keep_prob,
142 |                       lr: learning_rate, net.is_training: True, net.l: lamb})
143 |     
144 |     if (i+1) % FLAGS.display_step == 0:
145 |       loss_cross_xy, loss_single_x, loss_cross_yx, loss_single_y, xy_rank_x, yx_rank_x, x_rank_x, xy_rank_y, yx_rank_y, y_rank_y, l,xy,yx,dl,da = sess.run([net.loss_cross_xy, net.loss_single_x, net.loss_cross_yx, net.loss_single_y, net.xy_rank_x, net.yx_rank_x, net.x_rank_x, net.xy_rank_y, net.yx_rank_y, net.y_rank_y, loss,net.recall_xy, net.recall_yx, net.dom_loss, net.dom_acc],feed_dict={
146 |                       net.x_data:x_batch,
147 |                       net.y_data:y_batch,
148 |                       net.K:int(top_k),
149 |                       net.aff_xy:aff_xy,
150 |                       net.keep_prob:1., net.is_training: True, net.l: lamb})
151 | 
152 | 
153 |       logger.write("[iter %d] (%s %s, %s %s)(%s %s, %s) (%s %s, %s) loss=%.4g, %s, %s "%(i+1, loss_cross_xy, loss_cross_yx, loss_single_x, loss_single_y,  xy_rank_x, yx_rank_x, x_rank_x, xy_rank_y, yx_rank_y, y_rank_y, l, xy, yx))
154 |       short_summary = tf.Summary(value=[
155 |         tf.Summary.Value(tag="triplet_loss", simple_value=float(l)),
156 |         tf.Summary.Value(tag="K", simple_value=float(top_k)),
157 |         tf.Summary.Value(tag="lambda", simple_value=float(lamb)),
158 |         tf.Summary.Value(tag="dom_loss", simple_value=float(dl)),
159 |         tf.Summary.Value(tag="dom_acc", simple_value=float(da)),
160 |         tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)),
161 |         tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])),
162 |         tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])),
163 |         tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])),
164 |         tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])),
165 |         tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])),
166 |         tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])),
167 |         tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])),
168 |         tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])),
169 |         tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])),
170 |         tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])),
171 |         tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])),
172 |         tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])),
173 |       ])
174 |       train_writer.add_summary(short_summary, i)
175 |       #scipy.io.savemat('debug.mat', mdict={'d':dl})
176 |       #exit()
177 | 
178 | 
179 |     if (i+1) % FLAGS.test_step == 0:
180 |       # # for validation
181 |       # x_batch, y_batch, aff_xy = validation_batch.next()
182 |       # l, xy, yx, xy_idx, yx_idx, dl, da = sess.run(
183 |       #   [loss, net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx, net.dom_loss, net.dom_acc], feed_dict={
184 |       #     net.x_data: x_batch,
185 |       #     net.y_data: y_batch,
186 |       #     net.K: int(top_k),
187 |       #     net.aff_xy: aff_xy,
188 |       #     net.keep_prob: 1., net.is_training: False, net.l: lamb})  # actually, False
189 |       #
190 |       # logger.write("[Validation iter %d] loss=%.4g, %s, %s" % (i + 1, l, xy, yx))
191 |       # short_summary = tf.Summary(value=[
192 |       #   tf.Summary.Value(tag="triplet_loss", simple_value=float(l)),
193 |       #   tf.Summary.Value(tag="K", simple_value=float(top_k)),
194 |       #   tf.Summary.Value(tag="lambda", simple_value=float(lamb)),
195 |       #   tf.Summary.Value(tag="dom_loss", simple_value=float(dl)),
196 |       #   tf.Summary.Value(tag="dom_acc", simple_value=float(da)),
197 |       #   tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)),
198 |       #   tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])),
199 |       #   tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])),
200 |       #   tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])),
201 |       #   tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])),
202 |       #   tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])),
203 |       #   tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])),
204 |       #   tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])),
205 |       #   tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])),
206 |       #   tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])),
207 |       #   tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])),
208 |       #   tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])),
209 |       #   tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])),
210 |       # ])
211 |       # validation_writer.add_summary(short_summary, i)
212 | 
213 |       # for test
214 |       x_batch, y_batch, aff_xy = test_batch.next()
215 |       l,xy,yx, xy_idx, yx_idx,dl,da = sess.run([loss,net.recall_xy, net.recall_yx, net.xy_idx, net.yx_idx, net.dom_loss, net.dom_acc],feed_dict={
216 |                       net.x_data:x_batch,
217 |                       net.y_data:y_batch,
218 |                       net.K:int(top_k),  # caution: We currently use topK as test batch size
219 |                       net.aff_xy:aff_xy,
220 |                       net.keep_prob:1., net.is_training: False, net.l: lamb})  #actually, False
221 | 
222 |       logger.write("[TEST iter %d] loss=%.4g, %s, %s"%(i+1, l, xy, yx))
223 |       short_summary = tf.Summary(value=[
224 |         tf.Summary.Value(tag="triplet_loss", simple_value=float(l)),
225 |         tf.Summary.Value(tag="K", simple_value=float(top_k)),
226 |         tf.Summary.Value(tag="lambda", simple_value=float(lamb)),
227 |         tf.Summary.Value(tag="dom_loss", simple_value=float(dl)),
228 |         tf.Summary.Value(tag="dom_acc", simple_value=float(da)),
229 |         tf.Summary.Value(tag="learning_rate", simple_value=float(learning_rate)),
230 |         tf.Summary.Value(tag="recall/xy_R@1", simple_value=float(xy[0])),
231 |         tf.Summary.Value(tag="recall/xy_R@10", simple_value=float(xy[1])),
232 |         tf.Summary.Value(tag="recall/xy_R@25", simple_value=float(xy[2])),
233 |         tf.Summary.Value(tag="recall/xy_R@50", simple_value=float(xy[3])),
234 |         tf.Summary.Value(tag="recall/xy_R@100", simple_value=float(xy[4])),
235 |         tf.Summary.Value(tag="recall/xy_R@1000", simple_value=float(xy[5])),
236 |         tf.Summary.Value(tag="recall/yx_R@1", simple_value=float(yx[0])),
237 |         tf.Summary.Value(tag="recall/yx_R@10", simple_value=float(yx[1])),
238 |         tf.Summary.Value(tag="recall/yx_R@25", simple_value=float(yx[2])),
239 |         tf.Summary.Value(tag="recall/yx_R@50", simple_value=float(yx[3])),
240 |         tf.Summary.Value(tag="recall/yx_R@100", simple_value=float(yx[4])),
241 |         tf.Summary.Value(tag="recall/yx_R@1000", simple_value=float(yx[5])),
242 |       ])
243 | 
244 |       # if (float(xy[0]) > max_step_acc_xy[1]):
245 |       #   max_step_acc_xy[1] = float(xy[0])
246 |       #   max_step_acc_xy[0] = i + 1
247 |       # if (float(yx[0]) > max_step_acc_yx[1]):
248 |       #   max_step_acc_yx[1] = float(yx[0])
249 |       #   max_step_acc_yx[0] = i + 1
250 | 
251 |       test_writer.add_summary(short_summary, i)
252 |     if (i+1) % FLAGS.save_step == 0:
253 |       saver.save(sess,checkpoint_prefix,global_step=i)
254 |       logger.write("[Checkpoint at step %d saved]"%(i+1))
255 |       logger.write(FLAGS.summaries_dir)
256 |       # logger.write("*** xy (%d, %f), yx (%d, %f) ***" %(max_step_acc_xy[0], max_step_acc_xy[1], max_step_acc_yx[0], max_step_acc_yx[1]))
257 | 
258 |   
259 |   
260 |       


--------------------------------------------------------------------------------