├── README.md ├── banana3.m ├── check_dl_files.m ├── compute_map.m ├── dfs.m ├── findtrunc.m ├── gmpweights.m ├── imgfeatids.m ├── knn_wrap.m ├── knngraph.m ├── load_approx_knn.m ├── load_vectors.m ├── run_test.m ├── transition_matrix.m ├── trunclap.m └── ymake.m /README.md: -------------------------------------------------------------------------------- 1 | # Diffusion for image retrieval 2 | 3 | This is a Matlab script for our paper: 4 | 5 | > A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. "Efficient Diffusion on Region Manifolds: Recovering Small Objects with Compact CNN Representations", CVPR 2017 6 | 7 | ## Execution 8 | Run the following script: 9 | ``` 10 | >> run_test 11 | ``` 12 | 13 | We provide the descriptors used in our paper and also the necessary ground-truth files for mAP evaluation. 14 | In addition to that, we also make available the approximate kNN graph computed off-line with Dong et al. [10] for large-scale datasets. All the required files are stored in the link below and will be downloaded automatically when the script run_test.m is run. 15 | 16 | ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/ 17 | 18 | Optional but recommended prerequisite: 19 | Yael Library 20 | http://yael.gforge.inria.fr/ 21 | 22 | All kNN computation will be done with Yael Library if it is found in the path. Even though this is not strictly required, we advise that Yael is installed for efficiency purposes. 23 | 24 | Any questions or comments, should be addressed to ahmet.iscen@inria.fr 25 | 26 | 27 | ## License 28 | 29 | This package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 30 | 31 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 32 | 33 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. 34 | -------------------------------------------------------------------------------- /banana3.m: -------------------------------------------------------------------------------- 1 | % function generating toy dataset of 3 manifolds in the 2D space 2 | % X = banana3(n); 3 | % n: number of points per manifold 4 | % X: [3N x 2] matrix of data points 5 | % 6 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 7 | % Use rng(155); n = 100; to identically fit our Figure 1 in the CVPR 2017 paper. 8 | function X = banana3(n) 9 | 10 | if nargin < 1, n = 100; end 11 | 12 | a = .25; 13 | s = 1:n; 14 | m = n + 1; 15 | R = [1 0; 0 -1]; 16 | A = banana(m) + repmat([1-a, 0], [m 1]); 17 | B = banana(m) + repmat([3+a, 0], [m 1]); 18 | C = banana(n) * R + repmat([2, a], [n 1]); 19 | X = [A(s+1,:); B(s,:); C]; 20 | end 21 | 22 | function X = banana(n, a, b) 23 | 24 | if nargin < 2, a = .12; end 25 | if nargin < 3, b = .1; end 26 | 27 | t = linspace(0, pi, n)'; 28 | u = (1 - 2 * b) * t + b * pi; 29 | r = a * randn(n, 1) .* sin(u) + 1; 30 | X = [r .* cos(t) r .* sin(t)]; 31 | 32 | end 33 | -------------------------------------------------------------------------------- /check_dl_files.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % Check if the required files exist, download them otherwise 3 | function [data_file, gnd_file, graph_file] = check_dl_files(data_dir,test_set,cnn_model,feature_mode) 4 | 5 | data_file = sprintf('%s/%s_%s.mat',data_dir,test_set,cnn_model); 6 | gnd_file = sprintf('%s/gnd_%s.mat',data_dir,test_set); 7 | graph_file = sprintf('%s/%s_knn_kgraph_%s.mat',data_dir,test_set,cnn_model); 8 | 9 | if ~exist(data_file,'file') 10 | if ~exist(data_dir,'dir') 11 | mkdir(data_dir) 12 | end 13 | warning('Warning: Downloading descriptors...') 14 | system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/data/%s_%s.mat -O %s',test_set,cnn_model,data_file)); 15 | end 16 | 17 | if ~exist(gnd_file,'file') 18 | if ~exist(data_dir,'dir') 19 | mkdir(data_dir) 20 | end 21 | warning('Warning: Downloading the ground-truth...') 22 | system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/gnd/gnd_%s.mat -O %s',test_set,gnd_file)); 23 | end 24 | 25 | % Download the precomputed approximate knn graph for regional large-scale 26 | if ~exist(graph_file,'file') && ( strcmp(test_set,'oxford105k') || strcmp(test_set,'paris106k') ) && strcmp(feature_mode,'regional') 27 | warning('Warning: Downloading the graph file for large-scale...') 28 | system(sprintf('wget ftp://ftp.irisa.fr/local/texmex/corpus/diffusion/data/%s_knn_kgraph_%s.mat -O %s',test_set,cnn_model,graph_file)); 29 | end 30 | 31 | end 32 | 33 | -------------------------------------------------------------------------------- /compute_map.m: -------------------------------------------------------------------------------- 1 | % This function computes the mAP for a given set of returned results. 2 | % 3 | % Usage: map = compute_map (ranks, gnd); 4 | % 5 | % Notes: 6 | % 1) ranks starts from 1, size(ranks) = db_size X #queries 7 | % 2) The junk results (e.g., the query itself) should be declared in the gnd stuct array 8 | function [map, aps] = compute_map (ranks, gnd, isJunkOk, verbose) 9 | 10 | if nargin < 3 11 | verbose = false; 12 | isJunkOk = false; 13 | end 14 | 15 | if nargin < 4 16 | verbose = false; 17 | end 18 | 19 | 20 | map = 0; 21 | nq = numel (gnd); % number of queries 22 | aps = zeros (nq, 1); 23 | 24 | for i = 1:nq 25 | qgnd = gnd(i).ok; 26 | 27 | if isJunkOk 28 | qgndj = []; 29 | qgnd = [qgnd gnd(i).junk]; 30 | else 31 | if isfield (gnd(i), 'junk') 32 | qgndj = gnd(i).junk; 33 | else 34 | qgndj = []; 35 | end 36 | end 37 | 38 | % positions of positive and junk images 39 | [~, pos] = intersect (ranks (:,i), qgnd); 40 | [~, junk] = intersect (ranks (:,i), qgndj); 41 | 42 | pos = sort(pos); 43 | junk = sort(junk); 44 | 45 | k = 0; 46 | ij = 1; 47 | 48 | if length (junk) 49 | % decrease positions of positives based on the number of junk images appearing before them 50 | ip = 1; 51 | while ip <= numel (pos) 52 | 53 | while ( ij <= length (junk) & pos (ip) > junk (ij) ) 54 | k = k + 1; 55 | ij = ij + 1; 56 | end 57 | 58 | pos (ip) = pos (ip) - k; 59 | ip = ip + 1; 60 | end 61 | end 62 | 63 | ap = score_ap_from_ranks1 (pos, length (qgnd)); 64 | 65 | if verbose 66 | fprintf ('query no %d -> gnd = ', i); 67 | fprintf ('%d ', qgnd); 68 | fprintf ('\n tp ranks = '); 69 | fprintf ('%d ', pos); 70 | fprintf (' -> ap=%.3f\n', ap); 71 | end 72 | map = map + ap; 73 | aps (i) = ap; 74 | 75 | end 76 | map = map / nq; 77 | 78 | end 79 | 80 | 81 | % This function computes the AP for a query 82 | function ap = score_ap_from_ranks1 (ranks, nres) 83 | 84 | % number of images ranked by the system 85 | nimgranks = length (ranks); 86 | ranks = ranks - 1; 87 | 88 | % accumulate trapezoids in PR-plot 89 | ap = 0; 90 | 91 | recall_step = 1 / nres; 92 | 93 | for j = 1:nimgranks 94 | rank = ranks(j); 95 | 96 | if rank == 0 97 | precision_0 = 1.0; 98 | else 99 | precision_0 = (j - 1) / rank; 100 | end 101 | 102 | precision_1 = j / (rank + 1); 103 | ap = ap + (precision_0 + precision_1) * recall_step / 2; 104 | end 105 | 106 | end 107 | -------------------------------------------------------------------------------- /dfs.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % function to perform diffusion 3 | % solving system A*f = y 4 | function f = dfs(A, y, tol, it) 5 | if nargin < 4, it = 20; end 6 | if nargin < 3, tol = 1e-10; end 7 | 8 | [f,~,~,~] = pcg(A,y,tol,it); -------------------------------------------------------------------------------- /findtrunc.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % find index for the truncated affinity 3 | function f = findtrunc(v, qv, k, imids) 4 | 5 | [knn, ~] = knn_wrap(v, qv, k,100); 6 | f = find(ismember(imids,knn)); 7 | -------------------------------------------------------------------------------- /gmpweights.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % function to compute generalized max pooling weights 3 | % v: dxM set of regional vectors for the dataset 4 | % imids: MX1 vector of image ids per regional vector 5 | % coeff: GMP weightss 6 | function coeff = gmpweights(v, imids) 7 | 8 | lambda = 1; 9 | 10 | coeff = zeros(size(imids)); 11 | for i = 1:max(imids) 12 | % weights for generalized max pooling 13 | b = double((v(:,imids==i)'*v(:,imids==i)) + lambda.*eye(sum(imids==i))) \ double(ones(sum(imids==i),1)); 14 | assert( ~any(isnan(b)),'NaN Value') 15 | coeff(imids==i) = b; 16 | end 17 | -------------------------------------------------------------------------------- /imgfeatids.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % create image and feature ids for features of an image collection 3 | % [image_ids, feature_ids] = imgfeatids (nof) 4 | % nof: Nx1 vector with number of features per image 5 | % image_ids: Mx1 image id per feature. M = sum(nof) 6 | % feautre_ids: Mx1 unique feature ids 7 | function [image_ids, feature_ids] = imgfeatids (nof) 8 | 9 | % image ids for each database descriptor 10 | cs = cumsum(double (nof)); 11 | [~, image_ids] = histc (1: cs (end), [1 cs+1]); %image ids here 12 | 13 | % to create feature ids 14 | feature_ids = 1:sum (nof); 15 | rng = cs (1) + 1: length (feature_ids); %range such that values of first image are left unchanged 16 | feature_ids (rng) = feature_ids (rng) - cs ( image_ids (rng) - 1); % subtract number of features of previous images such that fids becomes number of feature id per image -------------------------------------------------------------------------------- /knn_wrap.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % compute k-nearest neighbors of a query set to a database set 3 | % wrapper that uses built-in or yael_nn 4 | function [knn, s] = knn_wrap(v, vq, k, batch_size) 5 | 6 | if ~exist('yael_nn') 7 | if ~exist('batch_size'), batch_size = 1000; end; 8 | [knn, s] = knn_batch(v, vq, k, batch_size); 9 | else 10 | [knn, s] = yael_nn(v, -vq, k, 16); 11 | s = -s; 12 | end 13 | 14 | % compute k-nearest neighbors of a query set to a database set 15 | function [knn, s] = knn_batch(v, vq, k, batch_size) 16 | 17 | Nq = size(vq, 2); 18 | N = size(v, 2); 19 | batch_size = min(batch_size, Nq); 20 | knn = zeros(k, Nq); 21 | s = zeros(k, Nq); 22 | 23 | for i = 1:ceil(Nq/batch_size) 24 | rng = (i-1) * batch_size + [1:batch_size]; 25 | rng(rng>Nq) = []; 26 | 27 | x = v'*vq(:, rng); 28 | [sx, ix] = sort(x, 'descend'); 29 | knn(:, rng) = ix(1:k, :); 30 | s(:, rng) = sx(1:k, :); 31 | end 32 | -------------------------------------------------------------------------------- /knngraph.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % A = knngraph(knn, sim) 3 | % create the affinity matrix for the mutual kNN graph based on the knn lists 4 | % knn: kxN list of knn per vector 5 | % sim: kxN list of corresponding similarities for knn 6 | % A: sparse affinity matrix NxN 7 | function A = knngraph(knn, sim) 8 | 9 | N = size(knn, 2); 10 | sim(sim<0) = 0; % similarity should be non-negative 11 | 12 | I = []; 13 | J = []; 14 | W = []; 15 | for i = 1:N 16 | mem = sum(ismember(knn(:, knn(:, i)), i), 1); 17 | if any(mem) 18 | nk = sum(mem); 19 | I(end+[1:nk]) = i * ones(nk, 1); 20 | J(end+[1:nk]) = knn(find(mem), i); 21 | W(end+[1:nk]) = sim(find(mem), i); 22 | end 23 | end 24 | 25 | A = sparse(I,J,double(W), N, N, numel(W)); 26 | A(1:size(A,1)+1:end) = 0; % diagonal to 0 -------------------------------------------------------------------------------- /load_approx_knn.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | 3 | function [knn_, s_] = load_approx_knn( graph_file ) 4 | 5 | 6 | if ~exist(graph_file,'file') 7 | error(sprintf(['Precomputed approximate knn file not found.\n', ... 8 | 'You are strongly advised to download the precomputed knn file for large-scale.\n',... 9 | 'Otherwise run the following line in run_test.m. Beware that it may take a while to compute it\n'])) 10 | % coeff = poolweights(lvecs, imids, featureMode, poolMethod); 11 | else 12 | fprintf('Warning! Loading the precomputed approximate nearest neighbors for large-scale \n') 13 | load(graph_file); 14 | end 15 | 16 | s_ = -s_; 17 | 18 | end 19 | 20 | -------------------------------------------------------------------------------- /load_vectors.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % script to load saved vectors 3 | 4 | % Check if feature and ground-truth files exist,download them otherwise 5 | [data_file, gnd_file, graph_file] = check_dl_files(data_dir,test_set,cnn_model,feature_type); 6 | 7 | % Load features and ground-truth files 8 | load(data_file); 9 | load(gnd_file); 10 | 11 | switch feature_type 12 | case 'regional' 13 | V = reg.V; 14 | qV = reg.Q; 15 | 16 | % Keep the global vectors for truncation in large-scale 17 | if strcmp(test_set,'oxford105k') || strcmp(test_set,'paris106k') 18 | Vextra = cell2mat(glob.V); 19 | qVextra = cell2mat(glob.Q); 20 | end 21 | 22 | case 'global' 23 | V = glob.V; 24 | qV = glob.Q; 25 | end 26 | 27 | clear reg; 28 | clear glob; -------------------------------------------------------------------------------- /run_test.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | 3 | alpha = 0.99; % alpha for diffusion 4 | it = 20; % iterations for CG 5 | tol = 1e-6; % tolerance for CG 6 | gamma = 3; % similarity exponent 7 | 8 | test_set = 'oxford5k'; % oxford5k, paris6k, instre, oxford105k, paris106k 9 | cnn_model = 'siamac'; % siamac or resnet 10 | feature_type = 'regional'; % regional or global 11 | 12 | data_dir = 'data/'; % this is where descriptors should be stored 13 | load_vectors; 14 | 15 | ndes = cell2mat(cellfun(@(x)size(x, 2), V, 'un', 0)); % number of vectors per image 16 | [imids, ~] = imgfeatids (ndes); % image and region ids here 17 | lvecs = cell2mat(V); % set of all database vectors 18 | Nf = numel(imids); % number of database vectors 19 | N = max(imids); % number of images 20 | 21 | fprintf('**** %s - Diffuse %s **** \n',test_set,feature_type); 22 | 23 | % Off-line regional pooling weights 24 | fprintf('Computing the pooling weights... \n') 25 | coeff = gmpweights(lvecs, imids); 26 | 27 | % Diffusion parameters 28 | if strcmp(feature_type,'global'), k = 50; kq = 10; 29 | elseif strcmp(feature_type,'regional'), k = 200; kq = k; 30 | else error('Wrong feature mode.'); end 31 | 32 | % Enable truncation for large-scale 33 | if (strcmp('oxford105k',test_set) || strcmp('paris106k',test_set)) && strcmp(feature_type,'regional'), dotrunc = 1; topn = 10000; else dotrunc = 0; end 34 | 35 | % Create the graph 36 | if (strcmp('oxford105k',test_set) || strcmp('paris106k',test_set)) && strcmp(feature_type,'regional') 37 | fprintf('Loading pre-computed kNN graph for large scale regional\n') 38 | [knn_, s_] = load_approx_knn( graph_file ); 39 | else 40 | fprintf('Computing kNN graph\n') 41 | [knn_, s_] = knn_wrap(lvecs, lvecs, k, 100); 42 | end 43 | A_ = knngraph(knn_(1:k, :), s_(1:k, :) .^ gamma); 44 | 45 | % in case of truncation the Laplacian is computed per query 46 | if ~dotrunc 47 | S = transition_matrix(A_); 48 | A = speye(size(S)) - alpha * S; 49 | clear A_ S; 50 | end 51 | 52 | % Query 53 | clear scores; 54 | for q = 1:numel(gnd); % number of queries 55 | 56 | if ~dotrunc 57 | y = ymake(lvecs, qV{q}, kq, gamma); % construction of y vector 58 | f = dfs(A,y,tol,it)'; % diffusion 59 | else 60 | sub = findtrunc(Vextra, qVextra(:,q), topn, imids); % sub-index for truncation 61 | L = trunclap(A_, sub, alpha); % truncated Laplacian 62 | y = ymake(lvecs(:, sub), qV{q}, kq, gamma); % construction of y vector 63 | f = zeros(1,numel(imids)); 64 | f(sub) = dfs(L,y,tol,it); % diffusion 65 | end 66 | 67 | % pooling step for regional case 68 | if strcmp(feature_type, 'regional') 69 | f = accumarray(imids', f .*coeff, [N 1])'; 70 | end 71 | scores{q} = f; 72 | end 73 | 74 | % sort images and evaluate 75 | [~, ranks] = sort (cell2mat(scores')', 'descend'); 76 | map = compute_map (ranks, gnd); 77 | fprintf('k %d, map %.4f\n', k, map); 78 | -------------------------------------------------------------------------------- /transition_matrix.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % construct transition matrix S from affinity matrix W 3 | function [S,D] = transition_matrix(W) 4 | 5 | np = size (W, 1); 6 | D = full(sum(W,2)).^-0.5;; 7 | D = spdiags (D, 0, np, np); 8 | S = D * W * D; 9 | -------------------------------------------------------------------------------- /trunclap.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % create truncated Laplacian 3 | function L = trunclap(A, sub, alpha) 4 | 5 | T = transition_matrix(A(sub, sub)); 6 | L = speye(size(T)) - alpha * T; 7 | -------------------------------------------------------------------------------- /ymake.m: -------------------------------------------------------------------------------- 1 | % Authors: A. Iscen, G. Tolias, Y. Avrithis, T. Furon, O. Chum. 2017. 2 | % construction of vector y for the query vector 3 | % y = ymake(V, qv, k, gamma) 4 | % v: dataset vectors 5 | % qv: query vector 6 | % k: number of nearest neighors to keep 7 | % gamma: similarity exponent 8 | function y = ymake(v, qv, k, gamma) 9 | 10 | N = size(v, 2); 11 | [knn, s] = knn_wrap(v, qv, k, 100); 12 | sc = accumarray(knn(:), s(:), [N 1]); 13 | [s,knn] = sort(sc,'descend'); 14 | 15 | y = zeros(N,1); 16 | y(knn(1:k)) = max(s(1:k) .^ gamma , 0); --------------------------------------------------------------------------------