├── README.md
├── RRC.m
├── SDH.m
├── cat_apcal.m
├── compactbit.m
├── demo_SDH.m
├── evaluate_macro.m
├── hammingDist.m
├── normalize.m
├── prepare_dataset.m
├── sqdist.m
└── testbed
    ├── cifar_10_gist.part1.rar
    └── cifar_10_gist.part2.rar


/README.md:
--------------------------------------------------------------------------------
 1 | # Discret Hashing
 2 | 
 3 | Code for the following paper:
 4 | -
 5 | Fumin Shen, Chunhua Shen, Wei Liu, Heng Tao Shen, "Supervised Discrete Hashing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015.
 6 | 
 7 | 1. The folder './testbed' contains the mat files used for this demo code. You can also download the files via http://pan.baidu.com/s/1ntHYDVb.
 8 | 
 9 | 2. Run 'demo_SDH.m'. 
10 | 


--------------------------------------------------------------------------------
/RRC.m:
--------------------------------------------------------------------------------
 1 | function [W, labels, E] = RRC(tr_dat, tr_labels, lambda)
 2 | 
 3 | %projection matrix computing
 4 | if size(tr_dat,1) < size(tr_dat,2)
 5 |     Proj_M = tr_dat'/(tr_dat*tr_dat'+lambda*eye(length(tr_labels)));
 6 | else
 7 |     Proj_M = (tr_dat'*tr_dat+lambda*eye(size(tr_dat,2)))\tr_dat';
 8 | end
 9 | if isvector(tr_labels)
10 |     Y = sparse(1:length(tr_labels), double(tr_labels), 1); Y = full(Y);
11 | else
12 |     Y = tr_labels;
13 | end
14 | W = Proj_M * Y;
15 | %-------------------------------------------------------------------------
16 | %testing
17 | if nargout > 1
18 |     [~,labels] = max(tr_dat*W, [], 2);
19 | end
20 | if nargout > 2
21 |     E = sum(sum((Y - tr_dat*W).^2)) + lambda*sum(sum(W.^2));
22 | end
23 | 


--------------------------------------------------------------------------------
/SDH.m:
--------------------------------------------------------------------------------
  1 | function [G, F, B] = SDH(X,y,B,gmap,Fmap,tol,maxItr,debug)
  2 | 
  3 | % ---------- Argument defaults ----------
  4 | if ~exist('debug','var') || isempty(debug)
  5 |     debug=1;
  6 | end
  7 | if ~exist('tol','var') || isempty(tol)
  8 |     tol=1e-5;
  9 | end
 10 | if ~exist('maxItr','var') || isempty(maxItr)
 11 |     maxItr=1000;
 12 | end
 13 | nu = Fmap.nu;
 14 | delta = 1/nu;
 15 | % ---------- End ----------
 16 | 
 17 | % label matrix N x c
 18 | if isvector(y) 
 19 |     Y = sparse(1:length(y), double(y), 1); Y = full(Y);
 20 | else
 21 |     Y = y;
 22 | end
 23 | 
 24 | 
 25 | % G-step
 26 | switch gmap.loss
 27 |     case 'L2'
 28 |         [Wg, ~, ~] = RRC(B, Y, gmap.lambda); % (Z'*Z + gmap.lambda*eye(nbits))\Z'*Y;
 29 |     case 'Hinge'
 30 |         svm_option = ['-q -s 4 -c ', num2str(1/gmap.lambda)];
 31 |         model = train(double(y),sparse(B),svm_option);
 32 |         Wg = model.w';
 33 | end
 34 | G.W = Wg;
 35 | 
 36 | % F-step
 37 | 
 38 | [WF, ~, ~] = RRC(X, B, Fmap.lambda);
 39 | 
 40 | F.W = WF; F.nu = nu;
 41 | 
 42 | 
 43 | i = 0; 
 44 | while i < maxItr    
 45 |     i=i+1;  
 46 |     
 47 |     if debug,fprintf('Iteration  %03d: ',i);end
 48 |     
 49 |     % B-step
 50 |   
 51 |         XF = X*WF;
 52 |    
 53 |     switch gmap.loss
 54 |         case 'L2'
 55 |             Q = nu*XF + Y*Wg';
 56 |             
 57 |            % B = zeros(size(B));          
 58 |             for time = 1:10           
 59 |                Z0 = B;
 60 |                 for k = 1 : size(B,2)
 61 |                     Zk = B; Zk(:,k) = [];
 62 |                     Wkk = Wg(k,:); Wk = Wg; Wk(k,:) = [];                    
 63 |                     B(:,k) = sign(Q(:,k) -  Zk*Wk*Wkk');
 64 |                 end
 65 |                 
 66 |                 if norm(B-Z0,'fro') < 1e-6 * norm(Z0,'fro')
 67 |                     break
 68 |                 end
 69 |             end
 70 |         case 'Hinge' 
 71 |             
 72 |             for ix_z = 1 : size(B,1)
 73 |                 w_ix_z = bsxfun(@minus, Wg(:,y(ix_z)), Wg);
 74 |                 B(ix_z,:) = sign(2*nu*XF(ix_z,:) + delta*sum(w_ix_z,2)');
 75 |             end
 76 |              
 77 |     end
 78 | 
 79 |     
 80 |     % G-step
 81 |     switch gmap.loss
 82 |     case 'L2'
 83 |         [Wg, ~, ~] = RRC(B, Y, gmap.lambda); % (Z'*Z + gmap.lambda*eye(nbits))\Z'*Y;
 84 |     case 'Hinge'        
 85 |         model = train(double(y),sparse(B),svm_option);
 86 |         Wg = model.w';
 87 |     end
 88 |     G.W = Wg;
 89 |     
 90 |     % F-step 
 91 |     WF0 = WF;
 92 |     
 93 |     [WF, ~, ~] = RRC(X, B, Fmap.lambda);
 94 |    
 95 |     F.W = WF; F.nu = nu;
 96 |     
 97 |     
 98 |     
 99 |     
100 |     bias = norm(B-X*WF,'fro');
101 |     
102 |     if debug, fprintf('  bias=%g\n',bias); end
103 |     
104 |     if bias < tol*norm(B,'fro')
105 |             break;
106 |     end 
107 |     
108 |     
109 |     if norm(WF-WF0,'fro') < tol * norm(WF0)
110 |         break;
111 |     end
112 |     
113 |     
114 | end
115 | 


--------------------------------------------------------------------------------
/cat_apcal.m:
--------------------------------------------------------------------------------
 1 | function [ap] = cat_apcal(traingnd,testgnd, IX)
 2 | % ap=apcal(score,label)
 3 | % average precision (AP) calculation 
 4 | 
 5 | [numtrain, numtest] = size(IX);
 6 | 
 7 | apall = zeros(1,numtest);
 8 | for i = 1 : numtest
 9 |     y = IX(:,i);
10 |     x=0;
11 |     p=0;
12 |     new_label=zeros(1,numtrain);
13 |     new_label(traingnd==testgnd(i))=1;
14 |     
15 |     num_return_NN = numtrain;%5000; % only compute MAP on returned top 5000 neighbours.
16 |     for j=1:num_return_NN
17 |         if new_label(y(j))==1
18 |             x=x+1;
19 |             p=p+x/j;
20 |         end
21 |     end  
22 |     if p==0
23 |         apall(i)=0;
24 |     else
25 |         apall(i)=p/x;
26 |     end
27 |     
28 |     
29 | end
30 | 
31 | ap = mean(apall);
32 | 


--------------------------------------------------------------------------------
/compactbit.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | function cb = compactbit(b)
 3 | %
 4 | % Written by Rob Fergus
 5 | % b = bits array
 6 | % cb = compacted string of bits (using words of 'word' bits)
 7 | %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | [nSamples nbits] = size(b);
11 | nwords = ceil(nbits/8);
12 | cb = zeros([nSamples nwords], 'uint8');
13 | 
14 | for j = 1:nbits
15 |     w = ceil(j/8);
16 |     cb(:,w) = bitset(cb(:,w), mod(j-1,8)+1, b(:,j));
17 | end


--------------------------------------------------------------------------------
/demo_SDH.m:
--------------------------------------------------------------------------------
  1 | clear; close;
  2 | 
  3 | 
  4 | %addpath [liblinear-1.91/windows/] % for hinge loss
  5 | 
  6 | 
  7 | 
  8 | dataset = 'cifar_10_gist';
  9 | 
 10 | % prepare_dataset(dataset);
 11 | 
 12 | load(['../testbed/',dataset]);
 13 | traindata = double(traindata);
 14 | testdata = double(testdata);
 15 | 
 16 | 
 17 | if sum(traingnd == 0)
 18 |     traingnd = traingnd + 1;
 19 |     testgnd = testgnd + 1;
 20 | end
 21 | 
 22 | 
 23 | Ntrain = size(traindata,1);
 24 | % Use all the training data
 25 | X = traindata;
 26 | label = double(traingnd);
 27 | 
 28 | % get anchors
 29 | n_anchors = 1000;
 30 | % rand('seed',1);
 31 | anchor = X(randsample(Ntrain, n_anchors),:);
 32 | 
 33 | 
 34 | % % determin rbf width sigma
 35 | % Dis = EuDist2(X,anchor,0);
 36 | % % sigma = mean(mean(Dis)).^0.5;
 37 | % sigma = mean(min(Dis,[],2).^0.5);
 38 | % clear Dis
 39 | sigma = 0.4; % for normalized data
 40 | PhiX = exp(-sqdist(X,anchor)/(2*sigma*sigma));
 41 | PhiX = [PhiX, ones(Ntrain,1)];
 42 | 
 43 | Phi_testdata = exp(-sqdist(testdata,anchor)/(2*sigma*sigma)); clear testdata
 44 | Phi_testdata = [Phi_testdata, ones(size(Phi_testdata,1),1)];
 45 | Phi_traindata = exp(-sqdist(traindata,anchor)/(2*sigma*sigma)); clear traindata;
 46 | Phi_traindata = [Phi_traindata, ones(size(Phi_traindata,1),1)];
 47 | 
 48 | 
 49 | % learn G and F
 50 | maxItr = 5;
 51 | gmap.lambda = 1; gmap.loss = 'L2';
 52 | Fmap.type = 'RBF';
 53 | Fmap.nu = 1e-5; %  penalty parm for F term
 54 | Fmap.lambda = 1e-2;
 55 | 
 56 | 
 57 | %% run algo
 58 | nbits = 32;
 59 | 
 60 | % Init Z
 61 | randn('seed',3);
 62 | Zinit=sign(randn(Ntrain,nbits));
 63 | 
 64 | 
 65 | debug = 0;
 66 | [~, F, H] = SDH(PhiX,label,Zinit,gmap,Fmap,[],maxItr,debug);
 67 | 
 68 | 
 69 | 
 70 | 
 71 | %% evaluation
 72 | display('Evaluation...');
 73 | 
 74 | AsymDist = 0; % Use asymmetric hashing or not
 75 | 
 76 | if AsymDist 
 77 |     H = H > 0; % directly use the learned bits for training data
 78 | else
 79 |     H = Phi_traindata*F.W > 0;
 80 | end
 81 | 
 82 | tH = Phi_testdata*F.W > 0;
 83 | 
 84 | hammRadius = 2;
 85 | 
 86 | 
 87 | 
 88 | B = compactbit(H);
 89 | tB = compactbit(tH);
 90 | 
 91 | 
 92 | hammTrainTest = hammingDist(tB, B)';
 93 | % hash lookup: precision and reall
 94 | Ret = (hammTrainTest <= hammRadius+0.00001);
 95 | [Pre, Rec] = evaluate_macro(cateTrainTest, Ret)
 96 | 
 97 | % hamming ranking: MAP
 98 | [~, HammingRank]=sort(hammTrainTest,1);
 99 | MAP = cat_apcal(traingnd,testgnd,HammingRank)
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/evaluate_macro.m:
--------------------------------------------------------------------------------
 1 | function [p, r] = evaluate_macro(Rel, Ret)
 2 | % evaluate macro_averaged performance
 3 | % Input:
 4 | %    Rel = relevant  train documents for each test document
 5 | %    Ret = retrieved train documents for each test document
 6 | % Output:
 7 | %    p   = macro-averaged precision
 8 | %    r   = macro-averaged recall
 9 | 
10 | numTest = size(Rel,2);
11 | precisions = zeros(1,numTest);
12 | recalls    = zeros(1,numTest);
13 | 
14 | retrieved_relevant_pairs = (Rel & Ret);
15 | 
16 | for j = 1:numTest
17 |     retrieved_relevant_num = nnz(retrieved_relevant_pairs(:,j));
18 |     retrieved_num = nnz(Ret(:,j));
19 |     relevant_num  = nnz(Rel(:,j));
20 |     if retrieved_num
21 |         precisions(j) = retrieved_relevant_num / retrieved_num;
22 |     else
23 |         precisions(j) = 0;
24 |     end
25 |     if relevant_num
26 |         recalls(j) = retrieved_relevant_num / relevant_num;
27 |     else
28 |         recalls(j) = 0;
29 |     end
30 | end
31 | 
32 | p = mean(precisions);
33 | r = mean(recalls);
34 | 
35 | end
36 | 


--------------------------------------------------------------------------------
/hammingDist.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | function Dh=hammingDist(B1, B2)
 3 | %
 4 | % Written by Rob Fergus
 5 | % Compute hamming distance between two sets of samples (B1, B2)
 6 | %
 7 | % Dh=hammingDist(B1, B2);
 8 | %
 9 | % Input
10 | %    B1, B2: compact bit vectors. Each datapoint is one row.
11 | %    size(B1) = [ndatapoints1, nwords]
12 | %    size(B2) = [ndatapoints2, nwords]
13 | %    It is faster if ndatapoints1 < ndatapoints2
14 | % 
15 | % Output
16 | %    Dh = hamming distance. 
17 | %    size(Dh) = [ndatapoints1, ndatapoints2]
18 | %
19 | % example query
20 | % Dhamm = hammingDist(B2, B1);
21 | % this will give the same result than:
22 | %    Dhamm = distMat(U2>0, U1>0).^2;
23 | % the size of the distance matrix is:
24 | %    size(Dhamm) = [Ntest x Ntraining]
25 | %
26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
27 | 
28 | % loop-up table:
29 | bit_in_char = uint16([...
30 |     0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 1 2 2 3 2 3 ...
31 |     3 4 2 3 3 4 3 4 4 5 1 2 2 3 2 3 3 4 2 3 3 4 ...
32 |     3 4 4 5 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 1 2 ...
33 |     2 3 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 ...
34 |     3 4 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 ...
35 |     5 6 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 1 2 2 3 ...
36 |     2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 3 4 ...
37 |     4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 ...
38 |     3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 2 3 3 4 3 4 ...
39 |     4 5 3 4 4 5 4 5 5 6 3 4 4 5 4 5 5 6 4 5 5 6 ...
40 |     5 6 6 7 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 4 5 ...
41 |     5 6 5 6 6 7 5 6 6 7 6 7 7 8]);
42 | 
43 | n1 = size(B1,1);
44 | [n2, nwords] = size(B2);
45 | 
46 | Dh = zeros([n1 n2], 'uint16');
47 | for j = 1:n1
48 |     for n=1:nwords
49 |         y = bitxor(B1(j,n),B2(:,n));
50 |         Dh(j,:) = Dh(j,:) + bit_in_char(y+1);
51 |     end
52 | end
53 | 


--------------------------------------------------------------------------------
/normalize.m:
--------------------------------------------------------------------------------
 1 | function Xn = normalize(X)
 2 | % Normalize all feature vectors to unit length
 3 | X = double(X);
 4 | n = size(X,1);  % the number of samples
 5 | Xt = X';
 6 | l = sqrt(sum(Xt.^2));  % the row vector length (L2 norm)
 7 | Ni = sparse(1:n,1:n,l);
 8 | Ni(Ni>0) = 1./Ni(Ni>0);
 9 | Xn = (Xt*Ni)';
10 | 
11 | end
12 | 


--------------------------------------------------------------------------------
/prepare_dataset.m:
--------------------------------------------------------------------------------
 1 | function prepare_dataset(dataset)
 2 | % dataset is stored in a row-wise matrix
 3 | %%
 4 | load(['./datasets/',dataset]);
 5 | 
 6 | % Normalize all feature vectors to unit length
 7 | traindata = normalize(double(traindata));
 8 | testdata  = normalize(double(testdata));
 9 | 
10 | cateTrainTest = bsxfun(@eq, traingnd, testgnd'); % traingnd and testgnd are the labels.
11 | 
12 | save(['testbed/',dataset],'traindata','testdata','traingnd','testgnd','cateTrainTest', '-v7.3');
13 | 
14 | clear;
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/sqdist.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/sqdist.m


--------------------------------------------------------------------------------
/testbed/cifar_10_gist.part1.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/testbed/cifar_10_gist.part1.rar


--------------------------------------------------------------------------------
/testbed/cifar_10_gist.part2.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/testbed/cifar_10_gist.part2.rar


--------------------------------------------------------------------------------