├── README.md
├── banana3.m
├── check_dl_files.m
├── compute_map.m
├── dfs.m
├── findtrunc.m
├── gmpweights.m
├── imgfeatids.m
├── knn_wrap.m
├── knngraph.m
├── load_approx_knn.m
├── load_vectors.m
├── run_test.m
├── transition_matrix.m
├── trunclap.m
└── ymake.m


/README.md:
--------------------------------------------------------------------------------
 1 | # Diffusion for image retrieval
 2 | 
 3 | This is a Matlab script for our paper:
 4 | 
 5 | > A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. "Efficient Diffusion on Region Manifolds: Recovering Small Objects with Compact CNN Representations", CVPR 2017
 6 | 
 7 | ## Execution
 8 | Run the following script:
 9 | ```
10 | >> run_test
11 | ```
12 | 
13 | We provide the descriptors used in our paper and also the necessary ground-truth files for mAP evaluation.
14 | In addition to that, we also make available the approximate kNN graph computed off-line with Dong et al. [10] for large-scale datasets. All the required files are stored in the link below and will be downloaded automatically when the script run_test.m is run. 
15 | 
16 | ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/
17 | 
18 | Optional but recommended prerequisite:
19 | Yael Library
20 | http://yael.gforge.inria.fr/
21 | 
22 | All kNN computation will be done with Yael Library if it is found in the path. Even though this is not strictly required, we advise that Yael is installed for efficiency purposes.
23 | 
24 | Any questions or comments, should be addressed to ahmet.iscen@inria.fr
25 | 
26 | 
27 | ## License
28 | 
29 | This package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
30 | 
31 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
32 | 
33 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
34 | 


--------------------------------------------------------------------------------
/banana3.m:
--------------------------------------------------------------------------------
 1 | % function generating toy dataset of 3 manifolds in the 2D space
 2 | % X = banana3(n);
 3 | % n: number of points per manifold
 4 | % X: [3N x 2] matrix of data points
 5 | %
 6 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 7 | % Use rng(155); n = 100; to identically fit our Figure 1 in the CVPR 2017 paper.
 8 | function X = banana3(n)
 9 | 
10 | 	if nargin < 1, n = 100; end
11 | 
12 | 	a = .25;
13 | 	s = 1:n;
14 | 	m = n + 1;
15 | 	R = [1 0; 0 -1];
16 | 	A = banana(m) + repmat([1-a, 0], [m 1]);
17 | 	B = banana(m) + repmat([3+a, 0], [m 1]);
18 | 	C = banana(n) * R + repmat([2, a], [n 1]);
19 | 	X = [A(s+1,:); B(s,:); C];
20 | end
21 | 
22 | function X = banana(n, a, b)
23 | 
24 | 	if nargin < 2, a = .12; end
25 | 	if nargin < 3, b = .1; end
26 | 
27 | 	t = linspace(0, pi, n)';
28 | 	u = (1 - 2 * b) * t + b * pi;
29 | 	r = a * randn(n, 1) .* sin(u) + 1;
30 | 	X = [r .* cos(t) r .* sin(t)];
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/check_dl_files.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % Check if the required files exist, download them otherwise
 3 | function [data_file, gnd_file, graph_file] = check_dl_files(data_dir,test_set,cnn_model,feature_mode)
 4 | 
 5 |     data_file = sprintf('%s/%s_%s.mat',data_dir,test_set,cnn_model);
 6 |     gnd_file = sprintf('%s/gnd_%s.mat',data_dir,test_set);
 7 |     graph_file = sprintf('%s/%s_knn_kgraph_%s.mat',data_dir,test_set,cnn_model);
 8 | 
 9 |     if ~exist(data_file,'file')
10 |         if ~exist(data_dir,'dir')
11 |             mkdir(data_dir)
12 |         end 
13 |         warning('Warning: Downloading descriptors...')
14 |     	system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/data/%s_%s.mat -O %s',test_set,cnn_model,data_file)); 
15 |     end
16 | 
17 |     if ~exist(gnd_file,'file')
18 |         if ~exist(data_dir,'dir')
19 |             mkdir(data_dir)
20 |         end
21 |         warning('Warning: Downloading the ground-truth...')
22 |     	system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/gnd/gnd_%s.mat -O %s',test_set,gnd_file)); 
23 |     end
24 | 
25 |     % Download the precomputed approximate knn graph for regional large-scale
26 |     if ~exist(graph_file,'file') && ( strcmp(test_set,'oxford105k') || strcmp(test_set,'paris106k') ) && strcmp(feature_mode,'regional')
27 |         warning('Warning: Downloading the graph file for large-scale...')
28 |     	system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/data/%s_knn_kgraph_%s.mat -O %s',test_set,cnn_model,graph_file)); 
29 |     end
30 | 
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/compute_map.m:
--------------------------------------------------------------------------------
  1 | % This function computes the mAP for a given set of returned results.
  2 | %
  3 | % Usage: map = compute_map (ranks, gnd);
  4 | %
  5 | % Notes:
  6 | % 1) ranks starts from 1, size(ranks) = db_size X #queries
  7 | % 2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
  8 | function [map, aps] = compute_map (ranks, gnd, isJunkOk, verbose)
  9 | 
 10 | if nargin < 3
 11 |     verbose = false;
 12 |     isJunkOk = false;
 13 | end
 14 | 
 15 | if nargin < 4
 16 |     verbose = false;
 17 | end
 18 | 
 19 | 
 20 | map = 0;
 21 | nq = numel (gnd);   % number of queries
 22 | aps = zeros (nq, 1);
 23 | 
 24 | for i = 1:nq
 25 |     qgnd = gnd(i).ok;
 26 |     
 27 |     if isJunkOk
 28 |         qgndj = [];
 29 |         qgnd = [qgnd gnd(i).junk];
 30 |     else    
 31 |         if isfield (gnd(i), 'junk')
 32 |             qgndj = gnd(i).junk;
 33 |         else
 34 |             qgndj = [];
 35 |         end
 36 |     end
 37 |     
 38 |     % positions of positive and junk images
 39 |     [~, pos] = intersect (ranks (:,i), qgnd);
 40 |     [~, junk] = intersect (ranks (:,i), qgndj);
 41 |     
 42 |     pos = sort(pos);
 43 |     junk = sort(junk);
 44 |     
 45 |     k = 0;
 46 |     ij = 1;
 47 |     
 48 |     if length (junk)
 49 |         % decrease positions of positives based on the number of junk images appearing before them
 50 |         ip = 1;
 51 |         while ip <= numel (pos)
 52 |             
 53 |             while ( ij <= length (junk) & pos (ip) > junk (ij) )
 54 |                 k = k + 1;
 55 |                 ij = ij + 1;
 56 |             end
 57 |             
 58 |             pos (ip) = pos (ip) - k;
 59 |             ip = ip + 1;
 60 |         end
 61 |     end
 62 |     
 63 |     ap = score_ap_from_ranks1 (pos, length (qgnd));
 64 |     
 65 |     if verbose
 66 |         fprintf ('query no %d -> gnd = ', i);
 67 |         fprintf ('%d ', qgnd);
 68 |         fprintf ('\n              tp ranks = ');
 69 |         fprintf ('%d ', pos);
 70 |         fprintf (' -> ap=%.3f\n', ap);
 71 |     end
 72 |     map = map + ap;
 73 |     aps (i) = ap;
 74 |     
 75 | end
 76 | map = map / nq;
 77 | 
 78 | end
 79 | 
 80 | 
 81 | % This function computes the AP for a query
 82 | function ap = score_ap_from_ranks1 (ranks, nres)
 83 | 
 84 | % number of images ranked by the system
 85 | nimgranks = length (ranks);
 86 | ranks = ranks - 1;
 87 | 
 88 | % accumulate trapezoids in PR-plot
 89 | ap = 0;
 90 | 
 91 | recall_step = 1 / nres;
 92 | 
 93 | for j = 1:nimgranks
 94 |     rank = ranks(j);
 95 |     
 96 |     if rank == 0
 97 |         precision_0 = 1.0;
 98 |     else
 99 |         precision_0 = (j - 1) / rank;
100 |     end
101 |     
102 |     precision_1 = j / (rank + 1);
103 |     ap = ap + (precision_0 + precision_1) * recall_step / 2;
104 | end
105 | 
106 | end
107 | 


--------------------------------------------------------------------------------
/dfs.m:
--------------------------------------------------------------------------------
1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
2 | % function to perform diffusion
3 | % solving system A*f = y
4 | function f = dfs(A, y, tol, it)
5 | 	if nargin < 4, it = 20;	end
6 | 	if nargin < 3, tol = 1e-10; end
7 | 		
8 |    [f,~,~,~] = pcg(A,y,tol,it);


--------------------------------------------------------------------------------
/findtrunc.m:
--------------------------------------------------------------------------------
1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
2 | % find index for the truncated affinity
3 | function f = findtrunc(v, qv, k, imids)
4 | 
5 |    [knn, ~] = knn_wrap(v, qv, k,100);
6 |    f = find(ismember(imids,knn));
7 | 


--------------------------------------------------------------------------------
/gmpweights.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % function to compute generalized max pooling weights
 3 | % v: dxM set of regional vectors for the dataset
 4 | % imids: MX1 vector of image ids per regional vector
 5 | % coeff: GMP weightss
 6 | function coeff = gmpweights(v, imids)
 7 | 
 8 |     lambda = 1;
 9 | 
10 |     coeff = zeros(size(imids));
11 |     for i = 1:max(imids)
12 |         % weights for generalized max pooling
13 |         b = double((v(:,imids==i)'*v(:,imids==i)) + lambda.*eye(sum(imids==i))) \ double(ones(sum(imids==i),1)); 
14 |         assert( ~any(isnan(b)),'NaN Value')
15 |         coeff(imids==i) = b;
16 |     end
17 | 


--------------------------------------------------------------------------------
/imgfeatids.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % create image and feature ids for features of an image collection
 3 | % [image_ids, feature_ids] = imgfeatids (nof)
 4 | % nof: Nx1 vector with number of features per image
 5 | % image_ids: Mx1 image id per feature. M = sum(nof)
 6 | % feautre_ids: Mx1 unique feature ids
 7 | function [image_ids, feature_ids] = imgfeatids (nof)
 8 | 
 9 | % image ids for each database descriptor
10 | cs = cumsum(double (nof));
11 | [~, image_ids] = histc (1: cs (end), [1 cs+1]); %image ids here
12 | 
13 | % to create feature ids
14 | feature_ids = 1:sum (nof);
15 | rng = cs (1) + 1: length (feature_ids); %range such that values of first image are left unchanged
16 | feature_ids (rng) = feature_ids (rng) - cs ( image_ids (rng) - 1); % subtract number of features of previous images such that fids becomes number of feature id per image


--------------------------------------------------------------------------------
/knn_wrap.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % compute k-nearest neighbors of a query set to a database set
 3 | % wrapper that uses built-in or yael_nn 
 4 | function [knn, s] = knn_wrap(v, vq, k, batch_size)
 5 | 
 6 | 	if ~exist('yael_nn')
 7 | 		if ~exist('batch_size'), batch_size = 1000; end;
 8 | 		[knn, s] = knn_batch(v, vq, k, batch_size);
 9 | 	else
10 | 		[knn, s] = yael_nn(v, -vq, k, 16);
11 |     	s = -s;
12 | 	end
13 | 
14 | % compute k-nearest neighbors of a query set to a database set 
15 | function [knn, s] = knn_batch(v, vq, k, batch_size)
16 | 
17 | 	Nq = size(vq, 2);
18 | 	N = size(v, 2);
19 | 	batch_size = min(batch_size, Nq);	
20 | 	knn = zeros(k, Nq);
21 | 	s = zeros(k, Nq);
22 | 
23 | 	for i = 1:ceil(Nq/batch_size)
24 | 			rng = (i-1) * batch_size + [1:batch_size];
25 | 			rng(rng>Nq) = [];
26 | 
27 | 			x = v'*vq(:, rng);
28 | 			[sx, ix] = sort(x, 'descend');
29 | 			knn(:, rng) = ix(1:k, :);
30 | 			s(:, rng) = sx(1:k, :);
31 | 	end
32 | 


--------------------------------------------------------------------------------
/knngraph.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % A = knngraph(knn, sim)
 3 | % create the affinity matrix for the mutual kNN graph based on the knn lists
 4 | % knn: kxN list of knn per vector
 5 | % sim: kxN list of corresponding similarities for knn
 6 | % A: sparse affinity matrix NxN
 7 | function A = knngraph(knn, sim)
 8 | 
 9 | 	N = size(knn, 2);
10 | 	sim(sim<0) = 0; % similarity should be non-negative
11 | 
12 | 	I = [];
13 | 	J = [];
14 | 	W = [];
15 | 	for i = 1:N
16 | 		mem = sum(ismember(knn(:, knn(:, i)), i), 1);
17 | 		if any(mem)
18 | 			nk = sum(mem);
19 | 			I(end+[1:nk]) = i * ones(nk, 1);
20 | 			J(end+[1:nk]) = knn(find(mem), i);
21 | 			W(end+[1:nk]) = sim(find(mem), i);
22 | 		end
23 | 	end
24 | 
25 | 	A = sparse(I,J,double(W), N, N, numel(W));
26 |   	A(1:size(A,1)+1:end) = 0;  % diagonal to 0


--------------------------------------------------------------------------------
/load_approx_knn.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | 
 3 | function [knn_, s_] = load_approx_knn( graph_file )
 4 | 
 5 | 
 6 | if ~exist(graph_file,'file')
 7 |     error(sprintf(['Precomputed approximate knn file not found.\n', ...
 8 |         'You are strongly advised to download the precomputed knn file for large-scale.\n',...
 9 |         'Otherwise run the following line in run_test.m. Beware that it may take a while to compute it\n']))
10 |     %             coeff = poolweights(lvecs, imids, featureMode, poolMethod);
11 | else
12 |     fprintf('Warning! Loading the precomputed approximate nearest neighbors for large-scale \n')
13 |     load(graph_file);
14 | end
15 | 
16 | s_ = -s_;
17 | 
18 | end
19 | 
20 | 


--------------------------------------------------------------------------------
/load_vectors.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % script to load saved vectors 
 3 | 
 4 | % Check if feature and ground-truth files exist,download them otherwise
 5 | [data_file, gnd_file, graph_file] = check_dl_files(data_dir,test_set,cnn_model,feature_type);
 6 | 
 7 | % Load features and ground-truth files
 8 | load(data_file);
 9 | load(gnd_file);
10 | 
11 | switch feature_type
12 |     case 'regional'
13 |         V = reg.V;
14 |         qV = reg.Q;
15 |         
16 |         % Keep the global vectors for truncation in large-scale 
17 |         if strcmp(test_set,'oxford105k') || strcmp(test_set,'paris106k')
18 |             Vextra = cell2mat(glob.V);
19 |             qVextra = cell2mat(glob.Q);  
20 |         end
21 |        
22 |     case 'global'
23 |         V = glob.V;
24 |         qV = glob.Q;
25 | end
26 | 
27 | clear reg;
28 | clear glob;


--------------------------------------------------------------------------------
/run_test.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | 
 3 | alpha               = 0.99;                         % alpha for diffusion
 4 | it                  = 20;                           % iterations for CG
 5 | tol                 = 1e-6;                         % tolerance for CG
 6 | gamma               = 3;                            % similarity exponent
 7 | 
 8 | test_set            = 'oxford5k';                   % oxford5k, paris6k, instre, oxford105k, paris106k
 9 | cnn_model           = 'siamac';                     % siamac or resnet
10 | feature_type        = 'regional';                   % regional or global    
11 | 
12 | data_dir            = 'data/';                      % this is where descriptors should be stored
13 | load_vectors;
14 | 
15 | ndes = cell2mat(cellfun(@(x)size(x, 2), V, 'un', 0));  % number of vectors per image
16 | [imids, ~] = imgfeatids (ndes);                        % image and region ids here
17 | lvecs = cell2mat(V);                                   % set of all database vectors
18 | Nf = numel(imids);                                     % number of database vectors
19 | N = max(imids);                                        % number of images
20 | 
21 | fprintf('**** %s - Diffuse %s **** \n',test_set,feature_type);
22 | 
23 | % Off-line regional pooling weights
24 | fprintf('Computing the pooling weights... \n') 
25 | coeff = gmpweights(lvecs, imids);
26 | 
27 | % Diffusion parameters
28 | if strcmp(feature_type,'global'), k = 50; kq = 10;
29 | elseif strcmp(feature_type,'regional'), k = 200; kq = k;
30 | else error('Wrong feature mode.'); end
31 |   
32 | % Enable truncation for large-scale
33 | if (strcmp('oxford105k',test_set) || strcmp('paris106k',test_set)) && strcmp(feature_type,'regional'), dotrunc = 1; topn = 10000; else dotrunc = 0; end
34 | 
35 | % Create the graph
36 | if (strcmp('oxford105k',test_set) || strcmp('paris106k',test_set)) && strcmp(feature_type,'regional')
37 |     fprintf('Loading pre-computed kNN graph for large scale regional\n')  
38 |     [knn_, s_] = load_approx_knn( graph_file );
39 | else
40 |     fprintf('Computing kNN graph\n')  
41 |     [knn_, s_] = knn_wrap(lvecs, lvecs, k, 100);
42 | end
43 | A_ = knngraph(knn_(1:k, :), s_(1:k, :) .^ gamma);
44 | 
45 | % in case of truncation the Laplacian is computed per query
46 | if ~dotrunc 
47 |     S = transition_matrix(A_);
48 |     A = speye(size(S)) - alpha * S;
49 |     clear A_ S; 
50 | end
51 | 
52 | % Query
53 | clear scores;
54 | for q = 1:numel(gnd); % number of queries
55 | 
56 |     if ~dotrunc
57 |         y = ymake(lvecs, qV{q}, kq, gamma);                 % construction of y vector
58 |         f = dfs(A,y,tol,it)';                               % diffusion
59 |     else
60 |         sub = findtrunc(Vextra, qVextra(:,q), topn, imids);  % sub-index for truncation
61 |         L = trunclap(A_, sub, alpha);                       % truncated Laplacian
62 |         y = ymake(lvecs(:, sub), qV{q}, kq, gamma);         % construction of y vector
63 |         f = zeros(1,numel(imids)); 
64 |         f(sub) = dfs(L,y,tol,it);                           % diffusion 
65 |     end
66 | 
67 |     % pooling step for regional case
68 |     if strcmp(feature_type, 'regional')
69 |        f = accumarray(imids', f .*coeff, [N 1])';  
70 |     end
71 |     scores{q} = f;
72 | end
73 | 
74 | % sort images and evaluate
75 | [~, ranks] = sort (cell2mat(scores')', 'descend');
76 | map = compute_map (ranks, gnd);
77 | fprintf('k %d, map %.4f\n', k, map);
78 | 


--------------------------------------------------------------------------------
/transition_matrix.m:
--------------------------------------------------------------------------------
1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
2 | % construct transition matrix S from affinity matrix W
3 | function [S,D] = transition_matrix(W)
4 | 
5 | 	np = size (W, 1);
6 | 	D = full(sum(W,2)).^-0.5;;
7 | 	D = spdiags (D, 0, np, np);
8 | 	S = D * W * D;
9 | 


--------------------------------------------------------------------------------
/trunclap.m:
--------------------------------------------------------------------------------
1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
2 | % create truncated Laplacian 
3 | function L = trunclap(A, sub, alpha)
4 | 
5 |     T = transition_matrix(A(sub, sub));
6 |     L = speye(size(T)) - alpha * T;
7 | 


--------------------------------------------------------------------------------
/ymake.m:
--------------------------------------------------------------------------------
 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 
 2 | % construction of vector y for the query vector
 3 | % y = ymake(V, qv, k, gamma)
 4 | % v: dataset vectors
 5 | % qv: query vector
 6 | % k: number of nearest neighors to keep
 7 | % gamma: similarity exponent  
 8 | function y = ymake(v, qv, k, gamma)
 9 | 
10 |     N = size(v, 2);
11 |     [knn, s] = knn_wrap(v, qv, k, 100);  
12 |     sc = accumarray(knn(:), s(:), [N 1]);
13 |     [s,knn] = sort(sc,'descend');
14 |   
15 |     y = zeros(N,1);
16 |     y(knn(1:k)) = max(s(1:k) .^ gamma , 0);


--------------------------------------------------------------------------------