├── README.md ├── params.txt ├── results_OPE ├── UAV123 │ ├── error_OPE.png │ └── overlap_OPE.png └── UAVDT │ ├── error_OPE.png │ └── overlap_OPE.png ├── run_AMCF.m ├── tracker.m ├── tracker ├── computeHistogram.m ├── cropFilterResponse.m ├── fcn.m ├── fhog.m ├── freezeColors.m ├── gaussianResponse.m ├── getCenterLikelihood.m ├── getColourMap.m ├── getFeatureMap.m ├── getP.m ├── getScaleSubwindow.m ├── getSubwindow.m ├── readParams.m └── updateHistModel.m └── utility ├── dct.m ├── dct2.m ├── get_axis_aligned_BB.m ├── gradientMex.mexw64 ├── im2c.m ├── imResample.m ├── imResampleMex.cpp ├── imResampleMex.mexa64 ├── imResampleMex.mexw64 ├── initializeAllAreas.m ├── mergeResponses.m ├── mexResize.mexw64 ├── opencv_core242.dll ├── opencv_imgproc242.dll ├── unfreezeColors.m └── w2crs.mat /README.md: -------------------------------------------------------------------------------- 1 | # Augmented Memory for Correlation Filters in Real-Time UAV Tracking 2 | 3 | Matlab implementation of our Augmented Memory Correlation Filters (AMCF) tracker. 4 | 5 | # Abstract 6 | 7 | The outstanding computational efficiency of discriminative correlation filter (DCF) fades away with various complicated improvements. Previous appearances are also gradually forgotten due to the exponential decay of historical views in traditional appearance updating scheme of DCF framework, reducing the model’s robustness. In this work, a novel tracker based on DCF framework is proposed to augment memory of previously appeared views while running at real-time speed. Several historical views and the current view are simultaneously introduced in training to allow the tracker to adapt to new appearances as well as memorize previous ones. A novel rapid compressed context learning is proposed to increase the discriminative ability of the filter efficiently. Substantial experiments on UAVDT and UAV123 datasets have validated that the proposed tracker performs competitively against other 26 top DCF and deep-based trackers with over 40fps on CPU. 8 | 9 | # Publication 10 | 11 | AMCF tracker is proposed in our paper accepted by IROS 2020. Detailed explanation of our method can be found in the paper: 12 | 13 | Yiming Li, Changhong Fu, Fangqiang Ding, Ziyuan Huang, and Jia Pan. 14 | 15 | Augmented Memory for Correlation Filters in Real-Time UAV Tracking. 16 | 17 | You can find this paper at: https://arxiv.org/abs/1909.10989. 18 | 19 | If you want to refer to this paper, please cite it as follows: 20 | 21 | @article{Li2019Augment, 22 | 23 | author = {Yiming Li and Changhong Fu and Fangqiang Ding and Ziyuan Huang and Jia Pan}, 24 | 25 | title = {Augmented Memory for Correlation Filters in Real-Time UAV Tracking}, 26 | 27 | year = {2019}, 28 | 29 | journal = {arXiv:1909.10989} 30 | 31 | } 32 | 33 | 34 | # Contact 35 | 36 | Yiming Li 37 | 38 | Email: yimingli9702@gmail.com 39 | 40 | Changhong Fu 41 | 42 | Email: [changhong.fu@tongji.edu.cn](mailto:changhong.fu@tongji.edu.cn) 43 | 44 | # Demonstration running instructions 45 | 46 | This code is compatible with UAV123 benchmark and UAVDT benchmark. Therefore, if you want to run it in benchmark, just put AMCF folder in trackers, and config sequences and trackers according to instructions from UAV123 and UAVDT. 47 | 48 | # Results on UAV datasets 49 | 50 | ### UAV123@30fps 51 | 52 | ![](\results_OPE\UAV123\error_OPE.png) 53 | 54 | ![](\results_OPE\UAV123\overlap_OPE.png) 55 | 56 | ### UAVDT 57 | 58 | ![](\results_OPE\UAVDT\error_OPE.png) 59 | 60 | ![](\results_OPE\UAVDT\overlap_OPE.png) 61 | 62 | # Acknowledgements 63 | 64 | We thank the contribution of Bertinetto, Ning Wang and Lukezic for their previous work Staple, MCCT and CSR-DCF. The feature extraction modules and some of the parameter are borrowed from the MCCT tracker (https://github.com/594422814/MCCT) and Staple tracker (https://github.com/bertinetto/staple) . The channel weight modules is borrowed from the CSR-DCF tracker. (https://github.com/alanlukezic/csr-dcf). 65 | -------------------------------------------------------------------------------- /params.txt: -------------------------------------------------------------------------------- 1 | % configurable params 2 | params.hog_cell_size = 4; 3 | params.fixed_area = 150^2 % standard area to which we resize the target 4 | params.n_bins = 2^5 % number of bins for the color histograms (bg and fg models) 5 | params.lambda1 = 1e-3; 6 | params.lambda_scale = 1e-3 % regularization weight 7 | params.lambda3 = 0.2; % context weight 8 | params.lambda2 = 0.05; % memory weight 9 | params.max_num_view=5; 10 | params.inner_padding = 0.2; % defines inner area used to sample colors from the foreground 11 | params.context_factor=3; % enlarge the bg_area to ct_area 12 | params.merge_factor = 0.3; % fixed interpolation factor - how to linearly combine the two responses 13 | 14 | % Parameters for learning 15 | params.learning_rate_pwp = 0.023; %0.023\0.0225 on UAVDT\UAV123 % bg and fg color models learning rate 16 | params.learning_rate_cf = 0.023; %0.023\0.00801 on UAVDT\UAV123 % Hand-craft model learning rate 17 | params.weight_lr=0.026; %0.026\0.009 on UAVDT\UAV123 % Channel weight model learning rate 18 | 19 | 20 | % Parameters for scale filters 21 | params.learning_rate_scale = 0.02512; 22 | params.scale_sigma_factor =0.65; 23 | params.num_scales =51; 24 | params.scale_step = 1.0218; 25 | 26 | % Parameters for desired translation filter output 27 | params.output_sigma_factor = 0.035; 28 | params.gaussian_var_view=1.16; 29 | params.gaussian_var_first=1.28; 30 | params.gaussian_peak_view=0.75; 31 | params.gaussian_peak_first=0.75; 32 | 33 | 34 | % fixed setup 35 | params.hog_scale_cell_size = 4; 36 | params.scale_model_factor = 1.0; 37 | params.scale_model_max_area = 32*16; %32*16 38 | params.feature_type = 'fhog&cn'; %'fhog' 'fhog&cn' 'gray' 39 | params.grayscale_sequence = false; % suppose that sequence is colour 40 | params.merge_method = 'const_factor'; 41 | params.den_per_channel = false; 42 | 43 | %% debugging stuff 44 | params.use_weight_channel=true; 45 | params.visualization =false; % show output bbox on frame -------------------------------------------------------------------------------- /results_OPE/UAV123/error_OPE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vision4robotics/AMCF-tracker/fc706921a33c918b0cba08d44d961547a5a2382e/results_OPE/UAV123/error_OPE.png -------------------------------------------------------------------------------- /results_OPE/UAV123/overlap_OPE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vision4robotics/AMCF-tracker/fc706921a33c918b0cba08d44d961547a5a2382e/results_OPE/UAV123/overlap_OPE.png -------------------------------------------------------------------------------- /results_OPE/UAVDT/error_OPE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vision4robotics/AMCF-tracker/fc706921a33c918b0cba08d44d961547a5a2382e/results_OPE/UAVDT/error_OPE.png -------------------------------------------------------------------------------- /results_OPE/UAVDT/overlap_OPE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vision4robotics/AMCF-tracker/fc706921a33c918b0cba08d44d961547a5a2382e/results_OPE/UAVDT/overlap_OPE.png -------------------------------------------------------------------------------- /run_AMCF.m: -------------------------------------------------------------------------------- 1 | function results = run_AMCF(seq) 2 | % AMCF: Augmented Memory for Correlation Filters in Fast UAV Tracking 3 | 4 | %% Add path 5 | addpath('./tracker'); 6 | addpath('./utility'); 7 | 8 | %% Read params.txt 9 | params = readParams('params.txt'); 10 | params.img_files = seq.s_frames; 11 | params.img_path = ''; 12 | 13 | % Read the start video frame 14 | im = imread(params.img_files{1}); 15 | % grayscale sequence? --> use 1D instead of 3D histograms 16 | if(size(im,3)==1) 17 | params.grayscale_sequence = true; 18 | end 19 | 20 | region = seq.init_rect; 21 | 22 | if(numel(region)==8) 23 | % polygon format (VOT14, VOT15) 24 | [cx, cy, w, h] = getAxisAlignedBB(region); 25 | else % rectangle format (WuCVPR13) 26 | x = region(1); 27 | y = region(2); 28 | w = region(3); 29 | h = region(4); 30 | cx = x+w/2; 31 | cy = y+h/2; 32 | end 33 | 34 | % init_pos is the centre of the initial bounding box 35 | params.init_pos = [cy cx]; 36 | params.target_sz = round([h w]); 37 | 38 | [params,ct_area,bg_area, fg_area, area_resize_factor] = initializeAllAreas(im, params); 39 | 40 | % in runTracker we do not output anything because it is just for debug 41 | params.fout = -1; 42 | % set the hashing filter size 43 | params.filterSize=25; 44 | % start the actual tracking 45 | results = tracker(params, im,ct_area, bg_area, fg_area, area_resize_factor); 46 | fclose('all'); 47 | end -------------------------------------------------------------------------------- /tracker.m: -------------------------------------------------------------------------------- 1 | function [results] = tracker(p, im, ct_area, bg_area, fg_area, area_resize_factor) 2 | % AMCF: Augmented Memory for Correlation Filters in Fast UAV Tracking 3 | 4 | %% INITIALIZATION 5 | temp = load('w2crs'); 6 | w2c = temp.w2crs; 7 | num_frames = numel(p.img_files); 8 | res_positions = zeros(num_frames, 4); 9 | pos = p.init_pos; 10 | target_sz = p.target_sz; 11 | % Patch of the target + padding 12 | patch_padded = getSubwindow(im, pos, p.norm_bg_area, bg_area); 13 | % Initialize hist model 14 | new_pwp_model = true; 15 | [bg_hist, fg_hist] = updateHistModel(new_pwp_model, patch_padded, bg_area, fg_area, target_sz, p.norm_bg_area, p.n_bins, p.grayscale_sequence); 16 | bg_hist=single(bg_hist); 17 | fg_hist=single(fg_hist); 18 | new_pwp_model = false; 19 | K=p.max_num_view; 20 | num_view=0; 21 | weight_lr=p.weight_lr; 22 | % Hann (cosine) window 23 | hann_window = single(myHann(p.cf_response_size(1)) * myHann(p.cf_response_size(2))'); 24 | % Context suppression window 25 | context_width=ct_area(2); 26 | context_height=ct_area(1); 27 | Q=-context_width/2:context_width/2; 28 | P=-context_height/2:context_height/2; 29 | [P, Q]= ndgrid(P,Q); 30 | w=single(2*(P/context_height).^2+2*(Q/context_width).^2); 31 | w=mexResize(w, [p.cf_response_size(1),p.cf_response_size(2)] ,'auto'); 32 | 33 | % Gaussian-shaped desired responses initialization, centred in (1,1) 34 | % Bandwidth proportional to target size 35 | output_sigma = sqrt(prod(p.norm_target_sz)) * p.output_sigma_factor / p.hog_cell_size; 36 | % Reponses for selceted views 37 | Y=single(zeros(size(hann_window,1),size(hann_window,2),K)); 38 | for i=1:K 39 | Y(:,:,i)=single(gaussianResponse(p.cf_response_size,power(p.gaussian_var_view,K-i+1)*output_sigma,power(p.gaussian_peak_view,K-i+1))); 40 | end 41 | Yf = fft2(Y); 42 | % Response for the start frame 43 | y1=single(gaussianResponse(p.cf_response_size,p.gaussian_var_first*output_sigma,p.gaussian_peak_first)); 44 | yf1=fft2(y1); 45 | % Response for the current frame 46 | yc=single(gaussianResponse(p.cf_response_size,output_sigma,1)); 47 | yfc=fft2(yc); 48 | 49 | 50 | %% SCALE ADAPTATION INITIALIZATION 51 | % Code from DSST 52 | scale_factor = 1; 53 | base_target_sz = target_sz; 54 | scale_sigma = sqrt(p.num_scales) * p.scale_sigma_factor; 55 | ss = (1:p.num_scales) - ceil(p.num_scales/2); 56 | ys = exp(-0.5 * (ss.^2) / scale_sigma^2); 57 | ysf = ((fft(ys))); 58 | if mod(p.num_scales,2) == 0 59 | scale_window = single(hann(p.num_scales+1)); 60 | scale_window = scale_window(2:end); 61 | else 62 | scale_window = single(hann(p.num_scales)); 63 | end 64 | ss = 1:p.num_scales; 65 | scale_factors = p.scale_step.^(ceil(p.num_scales/2) - ss); 66 | if p.scale_model_factor^2 * prod(p.norm_target_sz) > p.scale_model_max_area 67 | p.scale_model_factor = sqrt(p.scale_model_max_area/prod(p.norm_target_sz)); 68 | end 69 | scale_model_sz = floor(p.norm_target_sz * p.scale_model_factor); 70 | % find maximum and minimum scales 71 | min_scale_factor = p.scale_step ^ ceil(log(max(5 ./ bg_area)) / log(p.scale_step)); 72 | max_scale_factor = p.scale_step ^ floor(log(min([size(im,1) size(im,2)] ./ target_sz)) / log(p.scale_step)); 73 | 74 | 75 | %% MAIN LOOP 76 | tic; 77 | t_imread = 0; 78 | for frame = 1:num_frames 79 | if frame>1 80 | tic_imread = tic; 81 | im = imread([p.img_path p.img_files{frame}]); 82 | t_imread = t_imread + toc(tic_imread); 83 | %% TESTING step 84 | % extract patch of size bg_area and resize to norm_bg_area 85 | im_patch_cf = getSubwindow(im, pos, p.norm_bg_area, bg_area); 86 | pwp_search_area = round(p.norm_pwp_search_area / area_resize_factor); 87 | % extract patch of size pwp_search_area and resize to norm_pwp_search_area 88 | im_patch_pwp = getSubwindow(im, pos, p.norm_pwp_search_area, pwp_search_area); 89 | % compute feature map 90 | xt = getFeatureMap(im_patch_cf, p.feature_type, p.cf_response_size, p.hog_cell_size, w2c); 91 | % apply Hann window 92 | xt_windowed = bsxfun(@times, hann_window, xt); 93 | % compute FFT 94 | xtf = fft2(xt_windowed); 95 | % Correlation between filter and test patch gives the response 96 | % Solve diagonal system per pixel. 97 | if p.den_per_channel 98 | hf = hf_num ./ (hf_den + p.lambda1); 99 | else 100 | hf = bsxfun(@rdivide, hf_num, sum(hf_den, 3)+p.lambda1); 101 | end 102 | % Calculate the correlation filter response with channel reliability 103 | if p.use_weight_channel 104 | response_cf_chann = single(ensure_real(ifft2(conj(hf) .* xtf))); 105 | response_cf=sum(bsxfun(@times, response_cf_chann,... 106 | reshape(model_chann_w, 1, 1, size(response_cf_chann,3))), 3); 107 | else 108 | response_cf = real(ifft2(sum(conj(hf) .* xtf,3))); 109 | end 110 | 111 | % Crop square search region (in feature pixels). 112 | response_cf = cropFilterResponse(response_cf, ... 113 | floor_odd(p.norm_delta_area / p.hog_cell_size)); 114 | if p.hog_cell_size > 1 115 | % Scale up to match center likelihood resolution. 116 | response_cf = mexResize(response_cf, p.norm_delta_area,'auto'); 117 | if p.use_weight_channel 118 | response_cf = single(response_cf)*size(model_chann_w,2); 119 | end 120 | end 121 | 122 | [likelihood_map] = getColourMap(im_patch_pwp, bg_hist, fg_hist, p.n_bins, p.grayscale_sequence); 123 | % (TODO) in theory it should be at 0.5 (unseen colors shoud have max entropy) 124 | likelihood_map(isnan(likelihood_map)) = 0; 125 | % each pixel of response_pwp loosely represents the likelihood that 126 | % the target (of size norm_target_sz) is centred on it 127 | response_pwp = getCenterLikelihood(likelihood_map, p.norm_target_sz); 128 | 129 | %% ESTIMATION 130 | response = mergeResponses(response_cf, response_pwp, p.merge_factor, p.merge_method); 131 | [row, col] = find(response == max(response(:)), 1); 132 | center = (1+p.norm_delta_area) / 2; 133 | pos = gather(pos + ([row, col] - center) / area_resize_factor); 134 | rect_position = [pos([2,1]) - target_sz([2,1])/2, target_sz([2,1])]; 135 | 136 | %% SCALE SPACE SEARCH 137 | im_patch_scale = getScaleSubwindow(im, pos, base_target_sz, scale_factor * scale_factors, scale_window, scale_model_sz, p.hog_scale_cell_size); 138 | xsf = fft(im_patch_scale,[],2); 139 | scale_response = real(ifft(sum(sf_num .* xsf, 1) ./ (sf_den + p.lambda_scale) )); 140 | recovered_scale = ind2sub(size(scale_response),find(scale_response == max(scale_response(:)), 1)); 141 | %set the scale 142 | scale_factor = scale_factor * scale_factors(recovered_scale); 143 | 144 | if scale_factor < min_scale_factor 145 | scale_factor = min_scale_factor; 146 | elseif scale_factor > max_scale_factor 147 | scale_factor = max_scale_factor; 148 | end 149 | % use new scale to update bboxes for target, filter, bg and fg models 150 | target_sz = round(base_target_sz * scale_factor); 151 | avg_dim = sum(target_sz)/2; 152 | bg_area = round(target_sz + avg_dim); 153 | if(bg_area(2)>size(im,2)), bg_area(2)=size(im,2)-1; end 154 | if(bg_area(1)>size(im,1)), bg_area(1)=size(im,1)-1; end 155 | bg_area = bg_area - mod(bg_area - target_sz, 2); 156 | fg_area = round(target_sz - avg_dim * p.inner_padding); 157 | fg_area = fg_area + mod(bg_area - fg_area, 2); 158 | % Compute the rectangle with (or close to) params.fixed_area and 159 | % same aspect ratio as the target bboxgetScaleSubwindow_v1 160 | area_resize_factor = sqrt(p.fixed_area/prod(bg_area)); 161 | end 162 | 163 | %% TRAINING 164 | % extract patch of size bg_area and resize to norm_bg_area 165 | im_patch_bg = getSubwindow(im, pos, p.norm_bg_area, bg_area); 166 | % extract patch of size ct_area and resize to norm_ct_area 167 | im_patch_ct = getSubwindow(im,pos,p.norm_ct_area,ct_area); 168 | %compute feature map, of cf_response_size 169 | bt = getFeatureMap(im_patch_ct,p.feature_type,p.cf_response_size,p.hog_cell_size,w2c); 170 | xt = getFeatureMap(im_patch_bg, p.feature_type, p.cf_response_size, p.hog_cell_size, w2c); 171 | % apply Hann window 172 | xt = bsxfun(@times, hann_window, single(xt)); 173 | % apply Context Suppression window 174 | bt = bsxfun(@times, w, single(bt)); 175 | % compute FFT 176 | btf = fft2(bt); 177 | xtf = fft2(xt); 178 | 179 | 180 | %% MEMORY INITIALIZATION 181 | if frame ==1 182 | rect_position = [pos([2,1]) - target_sz([2,1])/2, target_sz([2,1])]; 183 | % save feature map of the start video frame 184 | xtf1=xtf; 185 | ktf1=conj(xtf1).*xtf1; 186 | % get the fixed term of numerator and denominator (unrelated to views) 187 | hf_num_first = p.lambda2*bsxfun(@times,conj(yf1),xtf1); 188 | hf_den_first = p.lambda2*ktf1; 189 | 190 | % set the patch of the start frame to the patch of the last view 191 | last_view_patch=im_patch_bg; 192 | [lastMatrix,lastSize]=hashing(last_view_patch, p.filterSize); 193 | % initialization the number of views 194 | num_view=1; 195 | % initialization the memory space of views 196 | xtfn = single(zeros([size(xtf) K])); 197 | ktfn = single(zeros([size(xtf) K])); 198 | % add the start frame to the head position of memory space 199 | xtfn(:,:,:,1)=xtf; 200 | % get the term ralated to views of numerator and denominator 201 | for i=1:num_view 202 | hf_num_view=p.lambda2*bsxfun(@times, conj(Yf(:,:,K-num_view+i)), xtfn(:,:,:,i)); 203 | end 204 | for i=1:num_view 205 | ktfn(:,:,:,i)=conj(xtfn(:,:,:,i)).*xtfn(:,:,:,i); 206 | end 207 | hf_den_view= p.lambda2*sum(ktfn,4); 208 | end 209 | %% MEMORY UPDATE 210 | % for the visualization of updating views and term update 211 | update_view=0; 212 | % calculate the difference score between the last selected view and the current frame 213 | % for judging whether to update views 214 | dif_score=PHA(im_patch_bg,lastMatrix,lastSize,p.filterSize); 215 | if dif_score>1/2 216 | % for the visualization of updating views and term update 217 | update_view=1; 218 | % update the patch of the last selected view 219 | last_view_patch=im_patch_bg; 220 | [lastMatrix,lastSize]=hashing(last_view_patch, p.filterSize); 221 | % if the memory space is not full 222 | % current patch enter the first empty position of the memory space 223 | if num_view2) 363 | image=rgb2gray(image); %Converting image into grayscale if RGB 364 | end 365 | %Required Variables image and filter size for Hashing 366 | imDCT=dct2(image); 367 | [~,imcolumn]=size(imDCT); % size of the DCT matrix 368 | hashSize=uint32((imcolumn)*(filterSize/100)); 369 | imfiltered=imDCT(1:hashSize,1:hashSize,1); %Resizing the DCT matrix according to the Filtersize 370 | imMedian=median(imfiltered(:)); %Finding the median of the filtered matrix 371 | hashMatrix=imfiltered>=imMedian; %hash Matrix formed 372 | end 373 | 374 | -------------------------------------------------------------------------------- /tracker/computeHistogram.m: -------------------------------------------------------------------------------- 1 | function histogram = computeHistogram(patch, mask, n_bins, grayscale_sequence) 2 | %COMPUTEHISTOGRAM creates a colour (or grayscale) histogram of an image patch 3 | % MASK has the same size as the image patch and selects what should 4 | % be used when computing the histogram (i.e. out-of-frame regions are ignored) 5 | 6 | [h, w, d] = size(patch); 7 | 8 | assert(all([h w]==size(mask)) == 1, 'mask and image are not the same size'); 9 | 10 | bin_width = 256/n_bins; 11 | 12 | % convert image to 1d array with same n channels of img patch 13 | patch_array = reshape(double(patch), w*h, d); 14 | % compute to which bin each pixel (for all 3 channels) belongs to 15 | bin_indices = floor(patch_array/bin_width) + 1; 16 | 17 | if grayscale_sequence 18 | histogram = accumarray(bin_indices, mask(:), [n_bins 1])/sum(mask(:)); 19 | else 20 | % the histogram is a cube of side n_bins 21 | histogram = accumarray(bin_indices, mask(:), [n_bins n_bins n_bins])/sum(mask(:)); 22 | end 23 | 24 | end 25 | -------------------------------------------------------------------------------- /tracker/cropFilterResponse.m: -------------------------------------------------------------------------------- 1 | function new_response = cropFilterResponse(response_cf, response_size) 2 | %CROPFILTERRESPONSE makes RESPONSE_CF of size RESPONSE_SIZE (i.e. same size of colour response) 3 | 4 | [h,w] = size(response_cf); 5 | b = response_size(1); 6 | a = response_size(2); 7 | 8 | % a and b must be odd, as we want an exact center 9 | if ~all_odd([a, b]) 10 | error('dimensions must be odd'); 11 | end 12 | half_width = floor(a/2); 13 | half_height = floor(b/2); 14 | 15 | new_response = response_cf(... 16 | mod_one(-half_height:half_height, h), ... 17 | mod_one(-half_width:half_width, w)); 18 | end 19 | 20 | function y = mod_one(a, b) 21 | y = mod(a-1, b)+1; 22 | end 23 | 24 | function y = all_odd(x) 25 | y = all(mod(x, 2) == 1); 26 | end 27 | -------------------------------------------------------------------------------- /tracker/fcn.m: -------------------------------------------------------------------------------- 1 | function out = get_feature_map(im_patch, features, w2c) 2 | 3 | % out = get_feature_map(im_patch, features, w2c) 4 | % 5 | % Extracts the given features from the image patch. w2c is the 6 | % Color Names matrix, if used. 7 | 8 | if nargin < 3 9 | w2c = []; 10 | end 11 | 12 | % the names of the features that can be used 13 | valid_features = {'gray', 'cn'}; 14 | 15 | % the dimension of the valid features 16 | feature_levels = [1 10]'; 17 | 18 | num_valid_features = length(valid_features); 19 | used_features = false(num_valid_features, 1); 20 | 21 | % get the used features 22 | for i = 1:num_valid_features 23 | used_features(i) = any(strcmpi(valid_features{i}, features)); 24 | end 25 | 26 | % total number of used feature levels 27 | num_feature_levels = sum(feature_levels .* used_features); 28 | 29 | level = 0; 30 | 31 | % If grayscale image 32 | if size(im_patch, 3) == 1 33 | % Features that are available for grayscale sequances 34 | 35 | % Grayscale values (image intensity) 36 | out = single(im_patch)/255 - 0.5; 37 | else 38 | % Features that are available for color sequances 39 | 40 | % allocate space (for speed) 41 | out = zeros(size(im_patch, 1), size(im_patch, 2), num_feature_levels, 'single'); 42 | 43 | % Grayscale values (image intensity) 44 | if used_features(1) 45 | out(:,:,level+1) = single(rgb2gray(im_patch))/255 - 0.5; 46 | level = level + feature_levels(1); 47 | end 48 | 49 | % Color Names 50 | if used_features(2) 51 | if isempty(w2c) 52 | % load the RGB to color name matrix if not in input 53 | temp = load('w2crs'); 54 | w2c = temp.w2crs; 55 | end 56 | 57 | % extract color descriptor 58 | out(:,:,level+(1:10)) = im2c(single(im_patch), w2c, -2); 59 | level = level + feature_levels(2); 60 | end 61 | end -------------------------------------------------------------------------------- /tracker/fhog.m: -------------------------------------------------------------------------------- 1 | function H = fhog( I, binSize, nOrients, clip, crop ) 2 | % Efficiently compute Felzenszwalb's HOG (FHOG) features. 3 | % 4 | % A fast implementation of the HOG variant used by Felzenszwalb et al. 5 | % in their work on discriminatively trained deformable part models. 6 | % http://www.cs.berkeley.edu/~rbg/latent/index.html 7 | % Gives nearly identical results to features.cc in code release version 5 8 | % but runs 4x faster (over 125 fps on VGA color images). 9 | % 10 | % The computed HOG features are 3*nOrients+5 dimensional. There are 11 | % 2*nOrients contrast sensitive orientation channels, nOrients contrast 12 | % insensitive orientation channels, 4 texture channels and 1 all zeros 13 | % channel (used as a 'truncation' feature). Using the standard value of 14 | % nOrients=9 gives a 32 dimensional feature vector at each cell. This 15 | % variant of HOG, refered to as FHOG, has been shown to achieve superior 16 | % performance to the original HOG features. For details please refer to 17 | % work by Felzenszwalb et al. (see link above). 18 | % 19 | % This function is essentially a wrapper for calls to gradientMag() 20 | % and gradientHist(). Specifically, it is equivalent to the following: 21 | % [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2; 22 | % H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); 23 | % See gradientHist() for more general usage. 24 | % 25 | % This code requires SSE2 to compile and run (most modern Intel and AMD 26 | % processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. 27 | % 28 | % USAGE 29 | % H = fhog( I, [binSize], [nOrients], [clip], [crop] ) 30 | % 31 | % INPUTS 32 | % I - [hxw] color or grayscale input image (must have type single) 33 | % binSize - [8] spatial bin size 34 | % nOrients - [9] number of orientation bins 35 | % clip - [.2] value at which to clip histogram bins 36 | % crop - [0] if true crop boundaries 37 | % 38 | % OUTPUTS 39 | % H - [h/binSize w/binSize nOrients*3+5] computed hog features 40 | % 41 | % EXAMPLE 42 | % I=imResample(single(imread('peppers.png'))/255,[480 640]); 43 | % tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps 44 | % figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V) 45 | % 46 | % EXAMPLE 47 | % % comparison to features.cc (requires DPM code release version 5) 48 | % I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I); 49 | % tic, for i=1:100, H1=features(Id,8); end; disp(100/toc) 50 | % tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc) 51 | % figure(1); montage2(H1); figure(2); montage2(H2); 52 | % D=abs(H1-H2); mean(D(:)) 53 | % 54 | % See also hog, hogDraw, gradientHist 55 | % 56 | % Piotr's Image&Video Toolbox Version 3.23 57 | % Copyright 2013 Piotr Dollar. [pdollar-at-caltech.edu] 58 | % Please email me if you find bugs, or have suggestions or questions! 59 | % Licensed under the Simplified BSD License [see external/bsd.txt] 60 | 61 | %Note: modified to be more self-contained 62 | 63 | if( nargin<2 ), binSize=8; end 64 | if( nargin<3 ), nOrients=9; end 65 | if( nargin<4 ), clip=.2; end 66 | if( nargin<5 ), crop=0; end 67 | 68 | softBin = -1; useHog = 2; b = binSize; 69 | 70 | [M,O]=gradientMex('gradientMag',I,0,1); 71 | 72 | H = gradientMex('gradientHist',M,O,binSize,nOrients,softBin,useHog,clip); 73 | 74 | if( crop ), e=mod(size(I),b)