├── README.md ├── RRC.m ├── SDH.m ├── cat_apcal.m ├── compactbit.m ├── demo_SDH.m ├── evaluate_macro.m ├── hammingDist.m ├── normalize.m ├── prepare_dataset.m ├── sqdist.m └── testbed ├── cifar_10_gist.part1.rar └── cifar_10_gist.part2.rar /README.md: -------------------------------------------------------------------------------- 1 | # Discret Hashing 2 | 3 | Code for the following paper: 4 | - 5 | Fumin Shen, Chunhua Shen, Wei Liu, Heng Tao Shen, "Supervised Discrete Hashing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015. 6 | 7 | 1. The folder './testbed' contains the mat files used for this demo code. You can also download the files via http://pan.baidu.com/s/1ntHYDVb. 8 | 9 | 2. Run 'demo_SDH.m'. 10 | -------------------------------------------------------------------------------- /RRC.m: -------------------------------------------------------------------------------- 1 | function [W, labels, E] = RRC(tr_dat, tr_labels, lambda) 2 | 3 | %projection matrix computing 4 | if size(tr_dat,1) < size(tr_dat,2) 5 | Proj_M = tr_dat'/(tr_dat*tr_dat'+lambda*eye(length(tr_labels))); 6 | else 7 | Proj_M = (tr_dat'*tr_dat+lambda*eye(size(tr_dat,2)))\tr_dat'; 8 | end 9 | if isvector(tr_labels) 10 | Y = sparse(1:length(tr_labels), double(tr_labels), 1); Y = full(Y); 11 | else 12 | Y = tr_labels; 13 | end 14 | W = Proj_M * Y; 15 | %------------------------------------------------------------------------- 16 | %testing 17 | if nargout > 1 18 | [~,labels] = max(tr_dat*W, [], 2); 19 | end 20 | if nargout > 2 21 | E = sum(sum((Y - tr_dat*W).^2)) + lambda*sum(sum(W.^2)); 22 | end 23 | -------------------------------------------------------------------------------- /SDH.m: -------------------------------------------------------------------------------- 1 | function [G, F, B] = SDH(X,y,B,gmap,Fmap,tol,maxItr,debug) 2 | 3 | % ---------- Argument defaults ---------- 4 | if ~exist('debug','var') || isempty(debug) 5 | debug=1; 6 | end 7 | if ~exist('tol','var') || isempty(tol) 8 | tol=1e-5; 9 | end 10 | if ~exist('maxItr','var') || isempty(maxItr) 11 | maxItr=1000; 12 | end 13 | nu = Fmap.nu; 14 | delta = 1/nu; 15 | % ---------- End ---------- 16 | 17 | % label matrix N x c 18 | if isvector(y) 19 | Y = sparse(1:length(y), double(y), 1); Y = full(Y); 20 | else 21 | Y = y; 22 | end 23 | 24 | 25 | % G-step 26 | switch gmap.loss 27 | case 'L2' 28 | [Wg, ~, ~] = RRC(B, Y, gmap.lambda); % (Z'*Z + gmap.lambda*eye(nbits))\Z'*Y; 29 | case 'Hinge' 30 | svm_option = ['-q -s 4 -c ', num2str(1/gmap.lambda)]; 31 | model = train(double(y),sparse(B),svm_option); 32 | Wg = model.w'; 33 | end 34 | G.W = Wg; 35 | 36 | % F-step 37 | 38 | [WF, ~, ~] = RRC(X, B, Fmap.lambda); 39 | 40 | F.W = WF; F.nu = nu; 41 | 42 | 43 | i = 0; 44 | while i < maxItr 45 | i=i+1; 46 | 47 | if debug,fprintf('Iteration %03d: ',i);end 48 | 49 | % B-step 50 | 51 | XF = X*WF; 52 | 53 | switch gmap.loss 54 | case 'L2' 55 | Q = nu*XF + Y*Wg'; 56 | 57 | % B = zeros(size(B)); 58 | for time = 1:10 59 | Z0 = B; 60 | for k = 1 : size(B,2) 61 | Zk = B; Zk(:,k) = []; 62 | Wkk = Wg(k,:); Wk = Wg; Wk(k,:) = []; 63 | B(:,k) = sign(Q(:,k) - Zk*Wk*Wkk'); 64 | end 65 | 66 | if norm(B-Z0,'fro') < 1e-6 * norm(Z0,'fro') 67 | break 68 | end 69 | end 70 | case 'Hinge' 71 | 72 | for ix_z = 1 : size(B,1) 73 | w_ix_z = bsxfun(@minus, Wg(:,y(ix_z)), Wg); 74 | B(ix_z,:) = sign(2*nu*XF(ix_z,:) + delta*sum(w_ix_z,2)'); 75 | end 76 | 77 | end 78 | 79 | 80 | % G-step 81 | switch gmap.loss 82 | case 'L2' 83 | [Wg, ~, ~] = RRC(B, Y, gmap.lambda); % (Z'*Z + gmap.lambda*eye(nbits))\Z'*Y; 84 | case 'Hinge' 85 | model = train(double(y),sparse(B),svm_option); 86 | Wg = model.w'; 87 | end 88 | G.W = Wg; 89 | 90 | % F-step 91 | WF0 = WF; 92 | 93 | [WF, ~, ~] = RRC(X, B, Fmap.lambda); 94 | 95 | F.W = WF; F.nu = nu; 96 | 97 | 98 | 99 | 100 | bias = norm(B-X*WF,'fro'); 101 | 102 | if debug, fprintf(' bias=%g\n',bias); end 103 | 104 | if bias < tol*norm(B,'fro') 105 | break; 106 | end 107 | 108 | 109 | if norm(WF-WF0,'fro') < tol * norm(WF0) 110 | break; 111 | end 112 | 113 | 114 | end 115 | -------------------------------------------------------------------------------- /cat_apcal.m: -------------------------------------------------------------------------------- 1 | function [ap] = cat_apcal(traingnd,testgnd, IX) 2 | % ap=apcal(score,label) 3 | % average precision (AP) calculation 4 | 5 | [numtrain, numtest] = size(IX); 6 | 7 | apall = zeros(1,numtest); 8 | for i = 1 : numtest 9 | y = IX(:,i); 10 | x=0; 11 | p=0; 12 | new_label=zeros(1,numtrain); 13 | new_label(traingnd==testgnd(i))=1; 14 | 15 | num_return_NN = numtrain;%5000; % only compute MAP on returned top 5000 neighbours. 16 | for j=1:num_return_NN 17 | if new_label(y(j))==1 18 | x=x+1; 19 | p=p+x/j; 20 | end 21 | end 22 | if p==0 23 | apall(i)=0; 24 | else 25 | apall(i)=p/x; 26 | end 27 | 28 | 29 | end 30 | 31 | ap = mean(apall); 32 | -------------------------------------------------------------------------------- /compactbit.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | function cb = compactbit(b) 3 | % 4 | % Written by Rob Fergus 5 | % b = bits array 6 | % cb = compacted string of bits (using words of 'word' bits) 7 | % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | [nSamples nbits] = size(b); 11 | nwords = ceil(nbits/8); 12 | cb = zeros([nSamples nwords], 'uint8'); 13 | 14 | for j = 1:nbits 15 | w = ceil(j/8); 16 | cb(:,w) = bitset(cb(:,w), mod(j-1,8)+1, b(:,j)); 17 | end -------------------------------------------------------------------------------- /demo_SDH.m: -------------------------------------------------------------------------------- 1 | clear; close; 2 | 3 | 4 | %addpath [liblinear-1.91/windows/] % for hinge loss 5 | 6 | 7 | 8 | dataset = 'cifar_10_gist'; 9 | 10 | % prepare_dataset(dataset); 11 | 12 | load(['../testbed/',dataset]); 13 | traindata = double(traindata); 14 | testdata = double(testdata); 15 | 16 | 17 | if sum(traingnd == 0) 18 | traingnd = traingnd + 1; 19 | testgnd = testgnd + 1; 20 | end 21 | 22 | 23 | Ntrain = size(traindata,1); 24 | % Use all the training data 25 | X = traindata; 26 | label = double(traingnd); 27 | 28 | % get anchors 29 | n_anchors = 1000; 30 | % rand('seed',1); 31 | anchor = X(randsample(Ntrain, n_anchors),:); 32 | 33 | 34 | % % determin rbf width sigma 35 | % Dis = EuDist2(X,anchor,0); 36 | % % sigma = mean(mean(Dis)).^0.5; 37 | % sigma = mean(min(Dis,[],2).^0.5); 38 | % clear Dis 39 | sigma = 0.4; % for normalized data 40 | PhiX = exp(-sqdist(X,anchor)/(2*sigma*sigma)); 41 | PhiX = [PhiX, ones(Ntrain,1)]; 42 | 43 | Phi_testdata = exp(-sqdist(testdata,anchor)/(2*sigma*sigma)); clear testdata 44 | Phi_testdata = [Phi_testdata, ones(size(Phi_testdata,1),1)]; 45 | Phi_traindata = exp(-sqdist(traindata,anchor)/(2*sigma*sigma)); clear traindata; 46 | Phi_traindata = [Phi_traindata, ones(size(Phi_traindata,1),1)]; 47 | 48 | 49 | % learn G and F 50 | maxItr = 5; 51 | gmap.lambda = 1; gmap.loss = 'L2'; 52 | Fmap.type = 'RBF'; 53 | Fmap.nu = 1e-5; % penalty parm for F term 54 | Fmap.lambda = 1e-2; 55 | 56 | 57 | %% run algo 58 | nbits = 32; 59 | 60 | % Init Z 61 | randn('seed',3); 62 | Zinit=sign(randn(Ntrain,nbits)); 63 | 64 | 65 | debug = 0; 66 | [~, F, H] = SDH(PhiX,label,Zinit,gmap,Fmap,[],maxItr,debug); 67 | 68 | 69 | 70 | 71 | %% evaluation 72 | display('Evaluation...'); 73 | 74 | AsymDist = 0; % Use asymmetric hashing or not 75 | 76 | if AsymDist 77 | H = H > 0; % directly use the learned bits for training data 78 | else 79 | H = Phi_traindata*F.W > 0; 80 | end 81 | 82 | tH = Phi_testdata*F.W > 0; 83 | 84 | hammRadius = 2; 85 | 86 | 87 | 88 | B = compactbit(H); 89 | tB = compactbit(tH); 90 | 91 | 92 | hammTrainTest = hammingDist(tB, B)'; 93 | % hash lookup: precision and reall 94 | Ret = (hammTrainTest <= hammRadius+0.00001); 95 | [Pre, Rec] = evaluate_macro(cateTrainTest, Ret) 96 | 97 | % hamming ranking: MAP 98 | [~, HammingRank]=sort(hammTrainTest,1); 99 | MAP = cat_apcal(traingnd,testgnd,HammingRank) 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /evaluate_macro.m: -------------------------------------------------------------------------------- 1 | function [p, r] = evaluate_macro(Rel, Ret) 2 | % evaluate macro_averaged performance 3 | % Input: 4 | % Rel = relevant train documents for each test document 5 | % Ret = retrieved train documents for each test document 6 | % Output: 7 | % p = macro-averaged precision 8 | % r = macro-averaged recall 9 | 10 | numTest = size(Rel,2); 11 | precisions = zeros(1,numTest); 12 | recalls = zeros(1,numTest); 13 | 14 | retrieved_relevant_pairs = (Rel & Ret); 15 | 16 | for j = 1:numTest 17 | retrieved_relevant_num = nnz(retrieved_relevant_pairs(:,j)); 18 | retrieved_num = nnz(Ret(:,j)); 19 | relevant_num = nnz(Rel(:,j)); 20 | if retrieved_num 21 | precisions(j) = retrieved_relevant_num / retrieved_num; 22 | else 23 | precisions(j) = 0; 24 | end 25 | if relevant_num 26 | recalls(j) = retrieved_relevant_num / relevant_num; 27 | else 28 | recalls(j) = 0; 29 | end 30 | end 31 | 32 | p = mean(precisions); 33 | r = mean(recalls); 34 | 35 | end 36 | -------------------------------------------------------------------------------- /hammingDist.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | function Dh=hammingDist(B1, B2) 3 | % 4 | % Written by Rob Fergus 5 | % Compute hamming distance between two sets of samples (B1, B2) 6 | % 7 | % Dh=hammingDist(B1, B2); 8 | % 9 | % Input 10 | % B1, B2: compact bit vectors. Each datapoint is one row. 11 | % size(B1) = [ndatapoints1, nwords] 12 | % size(B2) = [ndatapoints2, nwords] 13 | % It is faster if ndatapoints1 < ndatapoints2 14 | % 15 | % Output 16 | % Dh = hamming distance. 17 | % size(Dh) = [ndatapoints1, ndatapoints2] 18 | % 19 | % example query 20 | % Dhamm = hammingDist(B2, B1); 21 | % this will give the same result than: 22 | % Dhamm = distMat(U2>0, U1>0).^2; 23 | % the size of the distance matrix is: 24 | % size(Dhamm) = [Ntest x Ntraining] 25 | % 26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 27 | 28 | % loop-up table: 29 | bit_in_char = uint16([... 30 | 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 1 2 2 3 2 3 ... 31 | 3 4 2 3 3 4 3 4 4 5 1 2 2 3 2 3 3 4 2 3 3 4 ... 32 | 3 4 4 5 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 1 2 ... 33 | 2 3 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 ... 34 | 3 4 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 ... 35 | 5 6 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 1 2 2 3 ... 36 | 2 3 3 4 2 3 3 4 3 4 4 5 2 3 3 4 3 4 4 5 3 4 ... 37 | 4 5 4 5 5 6 2 3 3 4 3 4 4 5 3 4 4 5 4 5 5 6 ... 38 | 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 2 3 3 4 3 4 ... 39 | 4 5 3 4 4 5 4 5 5 6 3 4 4 5 4 5 5 6 4 5 5 6 ... 40 | 5 6 6 7 3 4 4 5 4 5 5 6 4 5 5 6 5 6 6 7 4 5 ... 41 | 5 6 5 6 6 7 5 6 6 7 6 7 7 8]); 42 | 43 | n1 = size(B1,1); 44 | [n2, nwords] = size(B2); 45 | 46 | Dh = zeros([n1 n2], 'uint16'); 47 | for j = 1:n1 48 | for n=1:nwords 49 | y = bitxor(B1(j,n),B2(:,n)); 50 | Dh(j,:) = Dh(j,:) + bit_in_char(y+1); 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /normalize.m: -------------------------------------------------------------------------------- 1 | function Xn = normalize(X) 2 | % Normalize all feature vectors to unit length 3 | X = double(X); 4 | n = size(X,1); % the number of samples 5 | Xt = X'; 6 | l = sqrt(sum(Xt.^2)); % the row vector length (L2 norm) 7 | Ni = sparse(1:n,1:n,l); 8 | Ni(Ni>0) = 1./Ni(Ni>0); 9 | Xn = (Xt*Ni)'; 10 | 11 | end 12 | -------------------------------------------------------------------------------- /prepare_dataset.m: -------------------------------------------------------------------------------- 1 | function prepare_dataset(dataset) 2 | % dataset is stored in a row-wise matrix 3 | %% 4 | load(['./datasets/',dataset]); 5 | 6 | % Normalize all feature vectors to unit length 7 | traindata = normalize(double(traindata)); 8 | testdata = normalize(double(testdata)); 9 | 10 | cateTrainTest = bsxfun(@eq, traingnd, testgnd'); % traingnd and testgnd are the labels. 11 | 12 | save(['testbed/',dataset],'traindata','testdata','traingnd','testgnd','cateTrainTest', '-v7.3'); 13 | 14 | clear; 15 | 16 | 17 | -------------------------------------------------------------------------------- /sqdist.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/sqdist.m -------------------------------------------------------------------------------- /testbed/cifar_10_gist.part1.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/testbed/cifar_10_gist.part1.rar -------------------------------------------------------------------------------- /testbed/cifar_10_gist.part2.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bd622/DiscretHashing/bfaafc13dddf9440c2b0a8637f0ba06a270e9aae/testbed/cifar_10_gist.part2.rar --------------------------------------------------------------------------------