├── .gitignore ├── pairwiseModel ├── energyMinimization │ ├── trwsMex │ │ ├── src │ │ │ ├── LICENSE.TXT │ │ │ ├── CHANGES.TXT │ │ │ ├── instances.inc │ │ │ ├── instances.h │ │ │ ├── README.TXT │ │ │ ├── treeProbabilities.cpp │ │ │ ├── example.cpp │ │ │ └── ordering.cpp │ │ ├── build_trwsMex.m │ │ ├── example_trwsMex.m │ │ └── trwsMex.m │ ├── qpboMex │ │ ├── QPBO-v1.32.src │ │ │ ├── QPBO.h │ │ │ ├── instances.inc │ │ │ ├── CHANGES.TXT │ │ │ └── QPBO_postprocessing.cpp │ │ ├── build_qpboMex.m │ │ ├── example_qpboMex.m │ │ ├── LICENSE │ │ ├── qpboMex.m │ │ ├── README.TXT │ │ └── qpboMex.cpp │ ├── bruteForceBinaryPairwiseMex │ │ ├── build_bruteForceBinaryPairwiseMex.m │ │ └── example_bruteForceBinaryPairwiseMex.m │ ├── computeEnergyBinaryPairwise.m │ ├── reparameterizeEnergy.m │ ├── projectEnergyBinaryPairwise.m │ ├── computeMinMarginalsPairwiseBinary.m │ └── minimizeEnergyPairwiseBinary.m ├── vl_svmStructLoss_pairwiseCompactModel_test_derivative.mat ├── computeMinMarginalsBinaryMex │ ├── build_computeMinMarginalsBinaryMex.m │ └── example_computeMinMarginalsBinaryMex.m ├── assignPointsToClusters.m ├── cnn_computeScores_pairwiseModel.m ├── pruneCandidatesNms.m ├── vl_svmStructLoss_pairwiseCompactModel_test_derivative.m ├── vl_logisticScoreLoss_pairwiseCompactModel_test_derivative.m ├── run_training_pairwiseModel.m ├── vl_simplenn_pairwiseModel_forwardPass.m ├── vl_structuredNetwork_pairwiseModel_test_derivative.m ├── cnn_pairwiseModel.m ├── vl_simplenn_pairwiseModel_backwardPass.m └── run_computeScores_pairwiseModel_Casablanca.m ├── utils ├── HollywoodHeads │ ├── VOChash_lookup_HH.m │ ├── VOChash_init_HH.m │ ├── evaluate_detection_HH.m │ ├── VOCinit_HH.m │ └── VOCevaldet_HH.m ├── Casablanca │ ├── VOChash_lookup_Casablanca.m │ ├── VOChash_init_Casablanca.m │ ├── evaluate_detection_Casablanca.m │ ├── VOCinit_Casablanca.m │ └── VOCevaldet_Casablanca.m ├── VOCcode │ ├── VOCreadxml.m │ ├── VOCap.m │ ├── VOCxml2struct.m │ └── VOCreadrecxml.m ├── bbIntersection.m ├── convertBb_X1Y1WH_to_X1Y1X2Y2.m ├── convertBb_X1Y1X2Y2_to_X1Y1WH.m ├── convertBb_X1Y1WH_to_Y1X1Y2X2.m ├── convertBb_Y1X1Y2X2_to_X1Y1WH.m ├── writeLines.m ├── readLines.m ├── cropRectanglesMex │ ├── example_cropRectanglesMex.m │ ├── LICENSE │ ├── build_cropRectanglesMex.m │ ├── cropRectanglesMex.m │ └── README.txt ├── load_BB.m ├── bbIntersectionOverArea.m ├── bbIntersectionOverUnion.m ├── bbIntersectionArea.m ├── do_regression.m ├── showFirstLayerFilters.m ├── selectBoundingBoxesNonMaxSup.m ├── selective_search_boxes.m ├── load_det_local_pairwise_global.m ├── showBoundingBoxes.m ├── vl_argparse.m ├── hex2rgb.m ├── combine_global.m ├── load_det.m └── cropImagePatches.m ├── globalModel ├── vl_nnreshape.m ├── cnn_computeScores_globalModel.m ├── run_training_globalModel.m ├── cnn_globalModel.m ├── run_computeScores_globalModel_Casablanca.m ├── run_computeScores_globalModel.m └── cnn_initNet_globalModel.m ├── compile_mex.m ├── LICENSE ├── setup.m ├── localModel ├── run_training_localModel.m ├── cnn_computeScores_localModel.m ├── cnn_localModel.m ├── cnn_prepareData_localModel.m ├── cnn_initNet_localModel.m ├── run_computeScores_localModel_Casablanca.m └── run_computeScores_localModel.m └── demo_new_images.m /.gitignore: -------------------------------------------------------------------------------- 1 | *.m~ 2 | *.o 3 | *.mexa64 4 | data 5 | models 6 | results 7 | *.zip 8 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/LICENSE.TXT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/cnn_head_detection/HEAD/pairwiseModel/energyMinimization/trwsMex/src/LICENSE.TXT -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/QPBO-v1.32.src/QPBO.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/cnn_head_detection/HEAD/pairwiseModel/energyMinimization/qpboMex/QPBO-v1.32.src/QPBO.h -------------------------------------------------------------------------------- /pairwiseModel/vl_svmStructLoss_pairwiseCompactModel_test_derivative.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aosokin/cnn_head_detection/HEAD/pairwiseModel/vl_svmStructLoss_pairwiseCompactModel_test_derivative.mat -------------------------------------------------------------------------------- /utils/HollywoodHeads/VOChash_lookup_HH.m: -------------------------------------------------------------------------------- 1 | function ind = VOChash_lookup_HH(hash,s) 2 | 3 | hsize=numel(hash.key); 4 | h=mod(str2double(s([5:7 9:end])),hsize)+1; 5 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); 6 | -------------------------------------------------------------------------------- /utils/Casablanca/VOChash_lookup_Casablanca.m: -------------------------------------------------------------------------------- 1 | function ind = VOChash_lookup_Casablanca(hash,s) 2 | 3 | hsize=numel(hash.key); 4 | h=mod(str2double(s([12:end])),hsize)+1; 5 | ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); 6 | -------------------------------------------------------------------------------- /utils/VOCcode/VOCreadxml.m: -------------------------------------------------------------------------------- 1 | function rec = VOCreadxml(path) 2 | 3 | if length(path)>5&&strcmp(path(1:5),'http:') 4 | xml=urlread(path)'; 5 | else 6 | f=fopen(path,'r'); 7 | xml=fread(f,'*char')'; 8 | fclose(f); 9 | end 10 | rec=VOCxml2struct(xml); 11 | -------------------------------------------------------------------------------- /utils/VOCcode/VOCap.m: -------------------------------------------------------------------------------- 1 | function ap = VOCap(rec,prec) 2 | 3 | mrec=[0 ; rec ; 1]; 4 | mpre=[0 ; prec ; 0]; 5 | for i=numel(mpre)-1:-1:1 6 | mpre(i)=max(mpre(i),mpre(i+1)); 7 | end 8 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 9 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 10 | 11 | -------------------------------------------------------------------------------- /utils/bbIntersection.m: -------------------------------------------------------------------------------- 1 | function res = bbIntersection(rect1, rect2) 2 | res = zeros(1, 4); 3 | res(1) = max(rect1(1), rect2(1)); 4 | res(2) = max(rect1(2), rect2(2)); 5 | 6 | res(3) = min(rect1(1) + rect1(3), rect2(1) + rect2(3)) - res(1); 7 | res(4) = min(rect1(2) + rect1(4), rect2(2) + rect2(4)) - res(2); 8 | 9 | end -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/build_trwsMex.m: -------------------------------------------------------------------------------- 1 | function build_trwsMex 2 | % build_trwsMex builds package trwsMex 3 | % 4 | % Anton Osokin (firstname.lastname@gmail.com), 24.09.2014 5 | 6 | mex trwsMex.cpp src/ordering.cpp src/MRFEnergy.cpp src/treeProbabilities.cpp src/minimize.cpp -output trwsMex -largeArrayDims 7 | -------------------------------------------------------------------------------- /utils/HollywoodHeads/VOChash_init_HH.m: -------------------------------------------------------------------------------- 1 | function hash = VOChash_init_HH(strs) 2 | 3 | hsize=4999; 4 | hash.key=cell(hsize,1); 5 | hash.val=cell(hsize,1); 6 | 7 | for i=1:numel(strs) 8 | s=strs{i}; 9 | h=mod(str2double(s([5:7 9:end])),hsize)+1; 10 | j=numel(hash.key{h})+1; 11 | hash.key{h}{j}=strs{i}; 12 | hash.val{h}(j)=i; 13 | end 14 | 15 | -------------------------------------------------------------------------------- /utils/Casablanca/VOChash_init_Casablanca.m: -------------------------------------------------------------------------------- 1 | function hash = VOChash_init_Casablanca(strs) 2 | 3 | hsize=4999; 4 | hash.key=cell(hsize,1); 5 | hash.val=cell(hsize,1); 6 | 7 | for i=1:numel(strs) 8 | s=strs{i}; 9 | h=mod(str2double(s([12:end])),hsize)+1; 10 | j=numel(hash.key{h})+1; 11 | hash.key{h}{j}=strs{i}; 12 | hash.val{h}(j)=i; 13 | end 14 | 15 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/CHANGES.TXT: -------------------------------------------------------------------------------- 1 | Changes from version 1.2: 2 | 3 | - Fixed bug in typeBinaryFast 4 | - Added the option of reading min_marginals 5 | - Added function AddRandomMessages() 6 | 7 | Changes from version 1.1: 8 | 9 | - Fixed bug in memory allocation 10 | 11 | Changes from version 1.0: 12 | 13 | - Modified syntax of 'friend' declarations to make it compile under unix -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/instances.inc: -------------------------------------------------------------------------------- 1 | template class MRFEnergy; 2 | template class MRFEnergy; 3 | template class MRFEnergy; 4 | template class MRFEnergy; 5 | template class MRFEnergy; 6 | template class MRFEnergy; 7 | template class MRFEnergy; 8 | template class MRFEnergy; 9 | -------------------------------------------------------------------------------- /utils/convertBb_X1Y1WH_to_X1Y1X2Y2.m: -------------------------------------------------------------------------------- 1 | function newBb = convertBb_X1Y1WH_to_X1Y1X2Y2( bb ) 2 | %convertBb_X1Y1WH_to_X1Y1X2Y2 converts the bounding boxes from [X1,Y1,W,H] format to [X1,Y1,X2,Y2] format 3 | 4 | numBb = size(bb, 1); 5 | 6 | newBb = zeros(numBb, 4); 7 | 8 | newBb(:, 1) = bb(:, 1); 9 | newBb(:, 2) = bb(:, 2); 10 | newBb(:, 3) = bb(:, 1) + bb(:, 3) - 1; 11 | newBb(:, 4) = bb(:, 2) + bb(:, 4) - 1; 12 | 13 | end 14 | 15 | -------------------------------------------------------------------------------- /utils/convertBb_X1Y1X2Y2_to_X1Y1WH.m: -------------------------------------------------------------------------------- 1 | function newBb = convertBb_X1Y1X2Y2_to_X1Y1WH( bb ) 2 | %convertBb_X1Y1X2Y2_to_X1Y1WH converts the bounding boxes from [X1,Y1,X2,Y2] format to [X1,Y1,W,H] format 3 | 4 | numBb = size(bb, 1); 5 | 6 | newBb = zeros(numBb, 4); 7 | 8 | newBb(:, 1) = bb(:, 1); 9 | newBb(:, 2) = bb(:, 2); 10 | newBb(:, 3) = bb(:, 3) - bb(:, 1) + 1; 11 | newBb(:, 4) = bb(:, 4) - bb(:, 2) + 1; 12 | 13 | end 14 | 15 | -------------------------------------------------------------------------------- /utils/convertBb_X1Y1WH_to_Y1X1Y2X2.m: -------------------------------------------------------------------------------- 1 | function newBb = convertBb_X1Y1WH_to_Y1X1Y2X2( bb ) 2 | %convertBb_X1Y1WH_to_Y1X1Y2X2 converts the bounding boxes from [X1,Y1,W,H] format to [Y1,X1,Y2,X2] format 3 | 4 | numBb = size(bb, 1); 5 | 6 | newBb = zeros(numBb, 4); 7 | 8 | newBb(:, 1) = bb(:, 2); 9 | newBb(:, 2) = bb(:, 1); 10 | newBb(:, 3) = bb(:, 2) + bb(:, 4) - 1; % in [y1 x1 y2 x2] format border pixels are included 11 | newBb(:, 4) = bb(:, 1) + bb(:, 3) - 1; 12 | 13 | end 14 | 15 | -------------------------------------------------------------------------------- /utils/convertBb_Y1X1Y2X2_to_X1Y1WH.m: -------------------------------------------------------------------------------- 1 | function newBb = convertBb_Y1X1Y2X2_to_X1Y1WH( bb ) 2 | %convertBb_Y1X1Y2X2_to_X1Y1WH converts the bounding boxes from [Y1,X1,Y2,X2] format to [X1,Y1,W,H] format 3 | 4 | numBb = size(bb, 1); 5 | 6 | newBb = zeros(numBb, 4); 7 | 8 | newBb(:, 1) = bb(:, 2); 9 | newBb(:, 2) = bb(:, 1); 10 | newBb(:, 3) = bb(:, 4) - bb(:, 2) + 1; % in [y1 x1 y2 x2] format border pixels are included 11 | newBb(:, 4) = bb(:, 3) - bb(:, 1) + 1; 12 | 13 | end 14 | 15 | -------------------------------------------------------------------------------- /utils/writeLines.m: -------------------------------------------------------------------------------- 1 | function writeLines( fileName, lines ) 2 | %writeLines writes the cell array of strings to a text file 3 | % 4 | % writeLines( fileName, lines ); 5 | % 6 | % Input: 7 | % fileName - string containing the full file name 8 | % lines - cell array of lines contained in the file 9 | 10 | fileID = fopen(fileName, 'w'); 11 | if fileID == -1 12 | error(['File ', fileName, ' can not be opened!']); 13 | end 14 | 15 | fprintf(fileID,'%s\n', lines{:}); 16 | 17 | fclose(fileID); 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /utils/readLines.m: -------------------------------------------------------------------------------- 1 | function lines = readLines( fileName ) 2 | %readLines reads the file line by line 3 | % 4 | % lines = readLines( fileName ); 5 | % 6 | % Input: 7 | % fileName - string containing the full file name 8 | % 9 | % Output: 10 | % lines - cell array of lines contained in the file 11 | 12 | fileID = fopen(fileName, 'r'); 13 | if fileID == -1 14 | error(['File ', fileName, ' can not be opened!']); 15 | end 16 | lines = textscan(fileID, '%s\n'); 17 | fclose(fileID); 18 | 19 | lines = lines{1}; 20 | 21 | end 22 | 23 | -------------------------------------------------------------------------------- /pairwiseModel/computeMinMarginalsBinaryMex/build_computeMinMarginalsBinaryMex.m: -------------------------------------------------------------------------------- 1 | function build_computeMinMarginalsBinaryMex 2 | % build_computeMinMarginalsBinaryMex builds package computeMinMarginalsBinaryMex 3 | % 4 | % Anton Osokin, 12.04.2015 5 | 6 | srcFiles = { 'computeMinMarginalsBinaryMex.cpp' }; 7 | allFiles = ''; 8 | for iFile = 1 : length(srcFiles) 9 | allFiles = [allFiles, ' ', srcFiles{iFile}]; 10 | end 11 | 12 | cmdLine = ['mex ', allFiles, ' -output computeMinMarginalsBinaryMex -largeArrayDims ']; 13 | eval(cmdLine); 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/bruteForceBinaryPairwiseMex/build_bruteForceBinaryPairwiseMex.m: -------------------------------------------------------------------------------- 1 | function build_bruteForceBinaryPairwiseMex 2 | % build_bruteForceBinaryPairwiseMex builds package bruteForceBinaryPairwiseMex 3 | % 4 | % Anton Osokin, 03.04.2015 5 | 6 | srcFiles = { 'bruteForceBinaryPairwiseMex.cpp' }; 7 | allFiles = ''; 8 | for iFile = 1 : length(srcFiles) 9 | allFiles = [allFiles, ' ', srcFiles{iFile}]; 10 | end 11 | 12 | cmdLine = ['mex ', allFiles, ' -output bruteForceBinaryPairwiseMex -largeArrayDims ']; 13 | eval(cmdLine); 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/instances.h: -------------------------------------------------------------------------------- 1 | #ifndef __INSTANCES_H__ 2 | #define __INSTANCES_H__ 3 | 4 | 5 | #if defined(_MSC_VER) 6 | 7 | // C4661: '...' : no suitable definition provided for explicit template instantiation request 8 | #pragma warning(disable: 4661) 9 | 10 | #endif 11 | 12 | #include "typeBinary.h" 13 | #include "typeBinaryFast.h" 14 | #include "typePotts.h" 15 | #include "typeGeneral.h" 16 | #include "typeTruncatedLinear.h" 17 | #include "typeTruncatedQuadratic.h" 18 | #include "typeTruncatedLinear2D.h" 19 | #include "typeTruncatedQuadratic2D.h" 20 | 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /utils/HollywoodHeads/evaluate_detection_HH.m: -------------------------------------------------------------------------------- 1 | function [rec, prec, ap] = evaluate_detection_HH(det, id, VOCopts, classname) 2 | fprintf('VOC evaluation...'); 3 | opath = sprintf(VOCopts.detrespath, id, classname); 4 | fid = fopen(opath, 'w'); 5 | for i=1:length(det) 6 | f_name = det(i).id; 7 | for j=1:size(det(i).bb,1) 8 | fprintf(fid, '%s %f %f %f %f %f\n', f_name, det(i).bb(j, 5),... 9 | det(i).bb(j, 1), det(i).bb(j, 2),... 10 | det(i).bb(j, 1)+det(i).bb(j, 3)-1, det(i).bb(j, 2)+det(i).bb(j, 4)-1); 11 | end 12 | end 13 | fclose(fid); 14 | 15 | [rec, prec, ap]=VOCevaldet_HH(VOCopts, id, classname, false); 16 | 17 | end -------------------------------------------------------------------------------- /utils/cropRectanglesMex/example_cropRectanglesMex.m: -------------------------------------------------------------------------------- 1 | 2 | initialImage = im2single( imread('peppers.png') ); 3 | 4 | width = size(initialImage, 2); 5 | height = size(initialImage, 1); 6 | 7 | cropPositions = [ 1, 1, height, width; ... % full image 8 | 1, 1, height / 2, width / 3; ... % sub patch 9 | -height / 2, -width / 2, height / 2, width / 3 ]; ... % sub patch not inside the image 10 | crops = cropRectanglesMex( initialImage, cropPositions, [height, width] ); 11 | 12 | figure(1), imshow( initialImage ); 13 | figure(2), imshow( crops(:,:,:,1) ); 14 | figure(3), imshow( crops(:,:,:,2) ); 15 | figure(4), imshow( crops(:,:,:,3) ); 16 | 17 | -------------------------------------------------------------------------------- /utils/Casablanca/evaluate_detection_Casablanca.m: -------------------------------------------------------------------------------- 1 | function [rec, prec, ap] = evaluate_detection_Casablanca(det, id, VOCopts, classname) 2 | fprintf('VOC evaluation...'); 3 | opath = sprintf(VOCopts.detrespath, id, classname); 4 | fid = fopen(opath, 'w'); 5 | for i=1:length(det) 6 | f_name = det(i).id; 7 | for j=1:size(det(i).bb,1) 8 | fprintf(fid, '%s %f %f %f %f %f\n', f_name, det(i).bb(j, 5),... 9 | det(i).bb(j, 1), det(i).bb(j, 2),... 10 | det(i).bb(j, 1)+det(i).bb(j, 3)-1, det(i).bb(j, 2)+det(i).bb(j, 4)-1); 11 | end 12 | end 13 | fclose(fid); 14 | 15 | [rec, prec, ap]=VOCevaldet_Casablanca(VOCopts, id, classname, false); 16 | 17 | end -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/build_qpboMex.m: -------------------------------------------------------------------------------- 1 | function build_qpboMex 2 | % build_qpboMex builds package qpboMex 3 | % 4 | % Anton Osokin (firstname.lastname@gmail.com), 24.09.2014 5 | 6 | codePath = 'QPBO-v1.32.src'; 7 | 8 | srcFiles = { 'qpboMex.cpp', ... 9 | fullfile(codePath, 'QPBO.cpp'), ... 10 | fullfile(codePath, 'QPBO_maxflow.cpp'), ... 11 | fullfile(codePath, 'QPBO_postprocessing.cpp'), ... 12 | fullfile(codePath, 'QPBO_extra.cpp') }; 13 | allFiles = ''; 14 | for iFile = 1 : length(srcFiles) 15 | allFiles = [allFiles, ' ', srcFiles{iFile}]; 16 | end 17 | 18 | cmdLine = ['mex ', allFiles, ' -output qpboMex -largeArrayDims ', '-I', codePath]; 19 | eval(cmdLine); 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /globalModel/vl_nnreshape.m: -------------------------------------------------------------------------------- 1 | function Y = vl_nnreshape(X,dest_size,dzdy) 2 | % VL_NNRESHAPE CNN reshapes input to des_size 3 | % 4 | % DEST_SIZE: desired size 5 | % 6 | % DZDX = VL_NNRESHAPE(X, DEST_SIZE, DZDY) computes the derivative DZDX 7 | % of the CNN with respect to the input X given the derivative DZDY 8 | % with respect to the block output Y. DZDX has the same dimension 9 | % as X. 10 | 11 | % Copyright (C) 2015 Tuan-Hung VU. 12 | % All rights reserved. 13 | % 14 | % This file is made available under the terms of the BSD license (see the COPYING file). 15 | 16 | if nargin <= 2 17 | sz = [size(X,1) size(X,2) size(X,3) size(X,4)] ; 18 | Y = reshape(X, [size(X,1) size(X,2) dest_size(2) size(X,3)*size(X,4)/dest_size(2)]); 19 | else 20 | Y = reshape(dzdy, size(X)); 21 | end 22 | -------------------------------------------------------------------------------- /pairwiseModel/assignPointsToClusters.m: -------------------------------------------------------------------------------- 1 | function clusterIds = assignPointsToClusters( clusterCenters, features ) 2 | %assignPointsToClusters assigns point to clusters using L2-distance to the cluster centers 3 | % 4 | % clusterIds = assignPointsToClusters( clusterCenters, features ) 5 | % 6 | % Input: 7 | % clusterCenters - centers, double[ numClusters x numFeatures ] 8 | % features - points, double[ numPoints x numFeatures ] 9 | % 10 | % Output: 11 | % clusterIds - each point is assign to a cluster, 1-based indexing, double[numPoints x 1] 12 | 13 | dist = -2 * features * clusterCenters'; 14 | 15 | cL2 = sum(clusterCenters .^ 2, 2); 16 | fL2 = sum(features .^ 2, 2); 17 | 18 | dist = bsxfun(@plus, dist, fL2); 19 | dist = bsxfun(@plus, dist, cL2'); 20 | 21 | [~, clusterIds] = min( dist, [], 2 ); 22 | 23 | end 24 | 25 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/QPBO-v1.32.src/instances.inc: -------------------------------------------------------------------------------- 1 | #include "QPBO.h" 2 | 3 | #ifdef _MSC_VER 4 | #pragma warning(disable: 4661) 5 | #endif 6 | 7 | // Instantiations 8 | 9 | template class QPBO; 10 | template class QPBO; 11 | template class QPBO; 12 | 13 | template <> 14 | inline void QPBO::get_type_information(const char*& type_name, const char*& type_format) 15 | { 16 | type_name = "int"; 17 | type_format = "d"; 18 | } 19 | 20 | template <> 21 | inline void QPBO::get_type_information(const char*& type_name, const char*& type_format) 22 | { 23 | type_name = "float"; 24 | type_format = "f"; 25 | } 26 | 27 | template <> 28 | inline void QPBO::get_type_information(const char*& type_name, const char*& type_format) 29 | { 30 | type_name = "double"; 31 | type_format = "lf"; 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/example_trwsMex.m: -------------------------------------------------------------------------------- 1 | % example of usage of package trwsMex 2 | % 3 | % Anton Osokin (firstname.lastname@gmail.com), 24.09.2014 4 | 5 | % this example runs trwsMex_time on a simple binary energy of 5 variables 6 | % y1 - y2 + y1 * y5 - 10 * y1 * y3 - y3 * y4 + y3 * y5 7 | 8 | dataCost = [0 0 0 0 0; 1 -1 0 0 0]; 9 | 10 | neighbors = sparse([1; 1; 3; 3], [5; 3; 4; 5], [1; -10; -1; 1], 5, 5); 11 | 12 | metric = [0 0; 0 1]; 13 | 14 | options.maxIter = 100; 15 | options.verbosity = 1; 16 | [labels, energy, LB] = trwsMex(dataCost, neighbors, metric, options); 17 | 18 | % % correct answer: 19 | % energy = -11; 20 | % labels = [2; 2; 2; 2; 1]; 21 | 22 | if ~isequal(energy, -11) 23 | warning('Wrong value of energy!') 24 | end 25 | if ~isequal(labels, [2; 2; 2; 2; 1]) 26 | warning('Wrong value of labels!') 27 | end 28 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/example_qpboMex.m: -------------------------------------------------------------------------------- 1 | % example of usage of package qpboMex 2 | % 3 | % Anton Osokin (firstname.lastname@gmail.com), 24.09.2014 4 | 5 | nNodes=4; 6 | 7 | % [Dp(0), Dp(1)] - unary terms 8 | terminalWeights=[ 9 | 0,16; 10 | 0,13; 11 | 20,0; 12 | 4,0 13 | ]; 14 | 15 | % [p, q, Vpq(0, 0), Vpq(0, 1), Vpq(1,0), Vpq(1, 1)] - pairwise terms 16 | edgeWeights=[ 17 | 1,2,0,10,4,0; 18 | 1,3,0,12,-1,0; 19 | 2,3,0,-1,9,0; 20 | 2,4,0,14,0,0; 21 | 3,4,0,0,7,0 22 | ]; 23 | 24 | [lowerBound, labels] = qpboMex(terminalWeights, edgeWeights); 25 | 26 | % % correct answer: 27 | % lowerBound = 22; 28 | % labels = [0; 0; 1; 0]; 29 | 30 | if ~isequal(lowerBound, 22) 31 | warning('Wrong value of lowerBound!') 32 | end 33 | if ~isequal(labels, [0; 0; 1; 0]) 34 | warning('Wrong value of labels!') 35 | end 36 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/bruteForceBinaryPairwiseMex/example_bruteForceBinaryPairwiseMex.m: -------------------------------------------------------------------------------- 1 | % example of usage of package bruteForceBinaryPairwiseMex 2 | % 3 | % Anton Osokin, 03.04.2015 4 | 5 | numNodes = 4; 6 | numEdges = 5; 7 | 8 | % [Dp(1), Dp(2)] - unary terms 9 | unaryPotentials=[ 10 | 0,16; 11 | 0,13; 12 | 20,0; 13 | 4,0 14 | ]; 15 | 16 | % [p, q, Vpq(1, 1), Vpq(1, 2), Vpq(2, 1), Vpq(2, 2)] - pairwise terms 17 | pairwisePotentials=[ 18 | 1,2,0,10,4,0; 19 | 1,3,0,12,-1,0; 20 | 3,2,0,9,-1,0; 21 | 2,4,0,14,0,0; 22 | 3,4,0,0,7,0 23 | ]; 24 | 25 | [energy, labels] = bruteForceBinaryPairwiseMex(unaryPotentials, pairwisePotentials); 26 | 27 | % % correct answer: 28 | % energy = 22; 29 | % labels = [1; 1; 2; 1]; 30 | 31 | if ~isequal(energy, 22) 32 | warning('Wrong value of the energy!') 33 | end 34 | if ~isequal(labels, [1; 1; 2; 1]) 35 | warning('Wrong values of the labels!') 36 | end 37 | -------------------------------------------------------------------------------- /utils/load_BB.m: -------------------------------------------------------------------------------- 1 | function bb = load_BB(detector_type, res_path) 2 | %load bbox+score for each type of detector 3 | % Input: 4 | % - detector_type: 5 | % 'local': willow head local model 6 | % 'rcnn': rcnn model 7 | % 'rcnn_svm': rcnn model + svm on top 8 | % 'pairwise': unary + binary 9 | % 'dpm': DPM face detector 10 | % - res_path: path to load bb 11 | % Output: 12 | % - BB = [x y w h] 13 | bb = []; 14 | 15 | res = load(res_path, '-mat'); 16 | 17 | switch detector_type 18 | case 'rcnn_svm' 19 | bb = [bb; res.scores(:,1:5)]; 20 | bb(:, 1:4) = convertBb_X1Y1X2Y2_to_X1Y1WH(bb(:, 1:4)); 21 | case 'pairwise' 22 | bb = res.BB; 23 | case 'local' 24 | bb = res.BB; 25 | case 'dpm' 26 | bb = res.ds(:,[1 2 3 4 6]); 27 | bb(:, 1:4) = convertBb_X1Y1X2Y2_to_X1Y1WH(bb(:, 1:4)); 28 | end -------------------------------------------------------------------------------- /compile_mex.m: -------------------------------------------------------------------------------- 1 | function compile_mex( cudaRoot ) 2 | %compile_mex compiles all the MEX-functions included in this package 3 | 4 | rootDir = fileparts( mfilename( 'fullpath' ) ); 5 | 6 | % image cropping on a GPU 7 | cd( fullfile(rootDir, 'utils', 'cropRectanglesMex') ); 8 | if exist('cudaRoot', 'var') 9 | build_cropRectanglesMex( cudaRoot ); 10 | else 11 | build_cropRectanglesMex; 12 | end 13 | cd(rootDir); 14 | 15 | % computation of the min-marginals 16 | cd( fullfile(rootDir, 'pairwiseModel', 'computeMinMarginalsBinaryMex') ); 17 | build_computeMinMarginalsBinaryMex; 18 | cd(rootDir); 19 | 20 | % brute force energy minimization 21 | cd( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'bruteForceBinaryPairwiseMex') ); 22 | build_bruteForceBinaryPairwiseMex; 23 | cd( rootDir ); 24 | 25 | % QPBO 26 | cd( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'qpboMex') ); 27 | build_qpboMex; 28 | cd( rootDir ); 29 | 30 | % TRW-S 31 | cd( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'trwsMex') ); 32 | build_trwsMex; 33 | cd( rootDir ); 34 | 35 | 36 | end 37 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/README.TXT: -------------------------------------------------------------------------------- 1 | This software implements two algorithms for minimizing energy functions of the form 2 | 3 | E(x) = \sum_i D_i(x_i) + \sum_ij V_ij(x_i,x_j) 4 | where x_i are discrete variables. 5 | 6 | The two algorithms are max-product belief propagation (BP, Pearl'88) and 7 | sequential tree-reweighted max-product message passing (TRW-S, Kolmogorov'05). 8 | 9 | For example usage look at one of the type*.h files 10 | (typeBinary.h, typeBinaryFast.h, typePotts.h, typeGeneral.h, 11 | typeTruncatedLinear.h, typeTruncatedQuadratic.h, typeTruncatedLinear2D.h, typeTruncatedQuadratic2D.h). 12 | If your energy function does not belong to the classes defined 13 | in these files but terms V_ij allow fast distance transforms, then 14 | it should be possible to extend the algorithms to your functions 15 | using files type*.h as examples. 16 | 17 | Written by Vladimir Kolmogorov (vnk@microsoft.com), 2005. 18 | Tested under Microsoft Visual Studio .NET (Windows) 19 | and GNU c++ compiler version 2.96 (Red Hat Linux 7.1). 20 | 21 | (c) Microsoft Corporation. All rights reserved. 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Anton Osokin and Tuan-Hung Vu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /utils/cropRectanglesMex/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Anton Osokin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Anton Osokin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /pairwiseModel/computeMinMarginalsBinaryMex/example_computeMinMarginalsBinaryMex.m: -------------------------------------------------------------------------------- 1 | % example of usage of package computeMinMarginalsBinaryMex 2 | % 3 | % Anton Osokin, 12.04.2015 4 | 5 | numNodes = 4; 6 | numEdges = 5; 7 | 8 | % [Dp(1), Dp(2)] - unary terms 9 | unaryPotentials=[ 10 | 0,16; 11 | 0,13; 12 | 20,0; 13 | 4,0 14 | ]; 15 | 16 | % [p, q, Vpq(1, 1), Vpq(1, 2), Vpq(2, 1), Vpq(2, 2)] - pairwise terms 17 | pairwisePotentials=[ 18 | 1,2,0,10,4,0; 19 | 1,3,0,12,-1,0; 20 | 3,2,0,9,-1,0; 21 | 2,4,0,14,0,0; 22 | 3,4,0,0,7,0 23 | ]; 24 | 25 | [minMarginals, minMarginals_args] = computeMinMarginalsBinaryMex(unaryPotentials, pairwisePotentials); 26 | 27 | % % correct answer: 28 | % minMarginals = [22 29; 22 29; 24 22; 22 25 ]; 29 | % minMarginals_args = cat(3, [0 1; 0 1; 0 0; 0 0], [0 1; 0 1; 0 0; 0 0], [1 1; 1 1; 0 1; 1 1], [0 1; 0 1; 0 0; 0 1]); 30 | 31 | if ~isequal(minMarginals, [22 29; 22 29; 24 22; 22 25 ]) 32 | warning('Wrong value of the min-marginals!') 33 | end 34 | if ~isequal(minMarginals_args, cat(3, [0 1; 0 1; 0 0; 0 0], [0 1; 0 1; 0 0; 0 0], [1 1; 1 1; 0 1; 1 1], [0 1; 0 1; 0 0; 0 1]) ) 35 | warning('Wrong values of the args of the min-marginals!') 36 | end 37 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/qpboMex.m: -------------------------------------------------------------------------------- 1 | % qpboMex - Matlab interface to Vladimir Kolmogorov's implementation of QPBO algorithm downloadable from: 2 | % http://www.cs.ucl.ac.uk/staff/V.Kolmogorov/software.html 3 | % 4 | % Energy function: 5 | % E(x) = \sum_p D_p(x_p) + \sum_pq V_pq(x_p,x_q) 6 | % where x_p \in {0, 1}, 7 | % Vpq(0,0), Vpq(0, 1), Vpq(1,0), Vpq(1,1) can be arbitrary 8 | % Wrapper is computing weak (not strong!) persistent solution. 9 | % 10 | % Usage: 11 | % [LB] = qpboMex(unaryTerms, pairwiseTerms); 12 | % [LB, labels] = qpboMex(unaryTerms, pairwiseTerms); 13 | % 14 | % Inputs: 15 | % unaryTerms - of type double, array size [numNodes, 2]; the cost of assigning 0, 1 to the corresponding unary term ([Dp(0), Dp(1)]) 16 | % pairwiseTerms - of type double, array size [numEdges, 6]; each line corresponds to an edge [p, q, Vpq(0,0), Vpq(0, 1), Vpq(1,0), Vpq(1,1)]; 17 | % p and q - indecies of vertecies from 1,...,numNodes, p != q; 18 | % 19 | % Outputs: 20 | % LB - of type double, a single number; lower bound found by QPBO 21 | % labels - of type double, array size [numNodes, 1] of {0, 1, -1}; labeling found by QPBO; -1 means refusal to label the vertex 22 | % 23 | % Anton Osokin, firstname.lastname@gmail.com, 24.09.2014 24 | -------------------------------------------------------------------------------- /utils/bbIntersectionOverArea.m: -------------------------------------------------------------------------------- 1 | function IoA = bbIntersectionOverArea( candidates, reference ) 2 | %bbIntersectionOverArea computes the IoA score between bounding boxes. 3 | % For two bounding boxes the IoU score is the area of the intersection divided by the area of the reference box 4 | % 5 | % IoA = bbIntersectionOverArea( candidates, reference ); 6 | % 7 | % Input: 8 | % candidates - the candidate bounding boxes, double[ numBoundingBoxes x 4] 9 | % reference - the reference bounding box, double[ 1 x 4] 10 | % 11 | % Format for the bounding box representation: [x y w h] 12 | % (x, y) - position of the upper left corner 13 | % origin (0, 0) is the upper left corner of the image 14 | % 15 | % Output: 16 | % IoA - vector of the scores, double[ numBoundingBoxes, 1 ] 17 | 18 | if ~isnumeric( candidates ) || size( candidates, 2 ) ~= 4 19 | error('bbIntersectionOverArea:badInputCandidates', 'Input is of incorrect format'); 20 | end 21 | if ~isnumeric( reference ) || size( reference, 2 ) ~= 4 || size( reference, 1 ) ~= 1 22 | error('bbIntersectionOverArea:badInputReference', 'Input is of incorrect format'); 23 | end 24 | 25 | area = bbIntersectionArea( candidates, reference ); 26 | IoA = area ./ ( reference(3) * reference(4) ); 27 | 28 | end 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/treeProbabilities.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "MRFEnergy.h" 7 | 8 | /////////////////////////////////////////////////////////////////////////// 9 | /////////////////////////////////////////////////////////////////////////// 10 | /////////////////////////////////////////////////////////////////////////// 11 | 12 | template void MRFEnergy::SetMonotonicTrees() 13 | { 14 | Node* i; 15 | MRFEdge* e; 16 | 17 | if (!m_isEnergyConstructionCompleted) 18 | { 19 | CompleteGraphConstruction(); 20 | } 21 | 22 | for (i=m_nodeFirst; i; i=i->m_next) 23 | { 24 | REAL mu; 25 | 26 | int nForward = 0, nBackward = 0; 27 | for (e=i->m_firstForward; e; e=e->m_nextForward) 28 | { 29 | nForward ++; 30 | } 31 | for (e=i->m_firstBackward; e; e=e->m_nextBackward) 32 | { 33 | nBackward ++; 34 | } 35 | int ni = (nForward > nBackward) ? nForward : nBackward; 36 | 37 | mu = (REAL)1 / ni; 38 | for (e=i->m_firstBackward; e; e=e->m_nextBackward) 39 | { 40 | e->m_gammaBackward = mu; 41 | } 42 | for (e=i->m_firstForward; e; e=e->m_nextForward) 43 | { 44 | e->m_gammaForward = mu; 45 | } 46 | } 47 | } 48 | 49 | #include "instances.inc" 50 | 51 | -------------------------------------------------------------------------------- /utils/bbIntersectionOverUnion.m: -------------------------------------------------------------------------------- 1 | function IoU = bbIntersectionOverUnion( candidates, reference ) 2 | %bbIntersectionOverUnion computes the IoU score between bounding boxes. 3 | % For two bounding boxes the IoU score is the area of the intersection divided by the area of the union 4 | % 5 | % IoU = bbIntersectionOverUnion( candidates, reference ); 6 | % 7 | % Input: 8 | % candidates - the candidate bounding boxes, double[ numBoundingBoxes x 4] 9 | % reference - the reference bounding box, double[ 1 x 4] 10 | % 11 | % Format for the bounding box representation: [x y w h] 12 | % (x, y) - position of the upper left corner 13 | % origin (0, 0) is the upper left corner of the image 14 | % 15 | % Output: 16 | % IoU - vector of the scores, double[ numBoundingBoxes, 1 ] 17 | 18 | if ~isnumeric( candidates ) || size( candidates, 2 ) ~= 4 19 | error('bbIntersectionOverUnion:badInputCandidates', 'Input is of incorrect format'); 20 | end 21 | if ~isnumeric( reference ) || size( reference, 2 ) ~= 4 || size( reference, 1 ) ~= 1 22 | error('bbIntersectionOverUnion:badInputReference', 'Input is of incorrect format'); 23 | end 24 | 25 | area = bbIntersectionArea( candidates, reference ); 26 | IoU = area ./ (candidates(:,3) .* candidates(:,4) + reference(3) * reference(4) - area); 27 | 28 | end 29 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/computeEnergyBinaryPairwise.m: -------------------------------------------------------------------------------- 1 | function energy = computeEnergyBinaryPairwise( unaryTerms, pairwiseTerms, labels ) 2 | %computeEnergyBinaryPairwise computes the value of the energy with unary and pairwise potentials 3 | 4 | %% check input 5 | numLabels = 2; 6 | 7 | if ~isnumeric(unaryTerms) || ~ismatrix(unaryTerms) || size(unaryTerms, 2) ~= 2 8 | error('Incorrect format for unaryTerms, has to be numNodes x 2') 9 | end 10 | numNodes = size(unaryTerms, 1); 11 | 12 | if ~isnumeric(pairwiseTerms) || ~ismatrix(pairwiseTerms) || size(pairwiseTerms, 2) ~= 6 13 | error('Incorrect format for pairwiseTerms, has to be numEdges x 6') 14 | end 15 | numEdges = size(pairwiseTerms, 1); 16 | 17 | if ~isnumeric(labels) || ~isvector(labels) || length(labels) ~= numNodes 18 | error('Incorrect format for labels, has to be numNodes x 1') 19 | end 20 | labels = labels(:); 21 | if any(labels > numLabels) || any(labels < 1) 22 | error('Incorrect values for labels, has to be an integer from 1 to numLabels') 23 | end 24 | 25 | %% computation 26 | energy = sum( unaryTerms((1 : numNodes)' + numNodes * (labels - 1) ) ); 27 | 28 | label1 = labels(pairwiseTerms(:, 1)); 29 | label2 = labels(pairwiseTerms(:, 2)); 30 | labelMap = [ 3, 4; 5, 6]; 31 | jointLabelMap = labelMap( label1 + 2 * (label2 - 1) ); 32 | 33 | energy = energy + sum( pairwiseTerms( (1 : numEdges)' + numEdges * (jointLabelMap - 1) ) ); 34 | 35 | end 36 | 37 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/QPBO-v1.32.src/CHANGES.TXT: -------------------------------------------------------------------------------- 1 | QPBO, version 1.31. 2 | 3 | Changes from version 1.31: 4 | - made it compile without warnings (on g++ 4.6.3) 5 | - fixed an issue in Save() and Load() 6 | 7 | Changes from version 1.3: 8 | - fixed a bug in Improve(): the value INFTY used for 'fixing' nodes could have 9 | been underestimated. 10 | Thanks to Yu Miao for pointing this out. 11 | 12 | Changes from version 1.2: 13 | - fixed a bug: MergeParallelEdges() followed by Probe() may have worked incorrectly. 14 | Details: edges freed by MergeParallelEdges() are added to a list of "free arcs". 15 | These free arcs may then be used when Probe() needs to add a new pairwise term. 16 | However, there was an inconsistency between how MergeParallelEdges() marks free arcs, 17 | and how free arcs are treated in AddPairwiseTerm(). The result may have been a segmentation fault. 18 | Thanks to Lena Gorelick for pointing this out. 19 | 20 | Changes from version 1.1: 21 | - updated to make it compile under gcc 4.1.2. 22 | 23 | Changes from version 1.0: 24 | - fixed a bug in Probe(). (Thanks to Tian Taipeng for noticing that there is a bug). 25 | 26 | Details: In version 1.1 the transformed energy after calling Probe() was incorrect. 27 | As a result, the option ProbeOptions::weak_persistencies=1 was not working correctly, 28 | since it called the main probing function iteratively. 29 | 30 | - Added new function Improve() (without arguments), which generates a random permutation itself. 31 | 32 | 33 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/reparameterizeEnergy.m: -------------------------------------------------------------------------------- 1 | function [ unary, pairwise, constant ] = reparameterizeEnergy( unaryTerms, pairwiseTerms ) 2 | %reparameterizeEnergy performs the reparametrization of the energy such that it is represented as 3 | % \sum_i x_i + \sum_{ij} x_i * x_j, x_i, x_j \in \{0, 1\} 4 | 5 | numNodes = size( unaryTerms, 1 ); 6 | numEdges = size( pairwiseTerms, 1 ); 7 | numLabels = 2; 8 | if size(unaryTerms, 2) ~= 2 9 | error( 'This function is implemented for binary variables only' ); 10 | end 11 | 12 | pairwise = nan(numEdges, 3); 13 | pairwise(:, 1) = pairwiseTerms(:, 1); 14 | pairwise(:, 2) = pairwiseTerms(:, 2); 15 | pairwise(:, 3) = pairwiseTerms(:, 3) + pairwiseTerms(:, 6) ... 16 | -pairwiseTerms(:, 4) - pairwiseTerms(:, 5); 17 | 18 | unary = zeros(numNodes, 1); 19 | % compensate for x_i 20 | unary = updateVector(unary, pairwiseTerms(:,1), ... 21 | pairwiseTerms(:,5) - pairwiseTerms(:,3) ); 22 | % compensate for x_4 23 | unary = updateVector(unary, pairwiseTerms(:,2), ... 24 | pairwiseTerms(:,4) - pairwiseTerms(:,3) ); 25 | 26 | constant = sum( pairwiseTerms(:,3) ); 27 | 28 | % add initial unaries 29 | unary = unary + unaryTerms(:, 2) - unaryTerms(:, 1); 30 | constant = constant + sum( unaryTerms(:, 1) ); 31 | end 32 | 33 | function x = updateVector( x, index, update ) 34 | % x(index) = x(index) + update; 35 | % does not work well when there are matches in the index vector 36 | 37 | updateSummed = accumarray(index, update, [length(x), 1], @sum); 38 | x = x + updateSummed; 39 | end 40 | -------------------------------------------------------------------------------- /utils/bbIntersectionArea.m: -------------------------------------------------------------------------------- 1 | function area = bbIntersectionArea( candidates, reference ) 2 | %bbIntersectionArea computes the area of the intersection of the bounding boxes 3 | % 4 | % area = bbIntersectionArea( candidates, reference ); 5 | % 6 | % Input: 7 | % candidates - the candidate bounding boxes, double[ numBoundingBoxes x 4] 8 | % reference - the reference bounding box, double[ 1 x 4] 9 | % 10 | % Format for the bounding box representation: [x y w h] 11 | % (x, y) - position of the upper left corner 12 | % origin (0, 0) is the upper left corner of the image 13 | % 14 | % Output: 15 | % area - vector of the intersection areas between all the boxes in candidates with the box in reference, double[ numBoundingBoxes, 1 ] 16 | 17 | if ~isnumeric( candidates ) || size( candidates, 2 ) ~= 4 18 | error('bbIntersetionArea:badInputCandidates', 'Input is of incorrect format'); 19 | end 20 | if ~isnumeric( reference ) || size( reference, 2 ) ~= 4 || size( reference, 1 ) ~= 1 21 | error('bbIntersetionArea:badInputReference', 'Input is of incorrect format'); 22 | end 23 | 24 | leftBound = max( candidates(:,1), reference(1) ); 25 | rightBound = min( candidates(:,1) + candidates(:,3), reference(1) + reference(3) ); 26 | 27 | lowerBound = min( candidates(:,2) + candidates(:,4), reference(2) + reference(4) ); 28 | upperBound = max( candidates(:,2), reference(2) ); 29 | 30 | area = abs((lowerBound - upperBound) .* (rightBound - leftBound)); 31 | 32 | area( leftBound >= rightBound | lowerBound <= upperBound ) = 0; 33 | 34 | end 35 | -------------------------------------------------------------------------------- /utils/cropRectanglesMex/build_cropRectanglesMex.m: -------------------------------------------------------------------------------- 1 | function build_cropRectanglesMex( cudaRoot ) 2 | %build_cropRectanglesMex builds package cropResizeMex 3 | % 4 | % INPUT: 5 | % cudaRoot - path to the CUDA installation 6 | 7 | % Anton Osokin, firstname.lastname@gmail.com, March 2015 8 | 9 | if ~exist('cudaRoot', 'var') 10 | cudaRoot = '/usr/cuda-7.0' ; 11 | end 12 | nvccPath = fullfile(cudaRoot, 'bin', 'nvcc'); 13 | if ~exist(nvccPath, 'file') 14 | error('NVCC compiler was not found!'); 15 | end 16 | 17 | root = fileparts( mfilename('fullpath') ); 18 | 19 | % compiling 20 | compileCmd = [ '"', nvccPath, '"', ... 21 | ' -c cropRectanglesMex.cu', ... 22 | ' -DNDEBUG -DENABLE_GPU', ... 23 | ' -I"', fullfile( matlabroot, 'extern', 'include'), '"', ... 24 | ' -I"', fullfile( matlabroot, 'toolbox', 'distcomp', 'gpu', 'extern', 'include'), '"', ... 25 | ' -I"', fullfile( cudaRoot, 'include'), '"', ... 26 | ' -I"', fullfile( cudaRoot, 'samples', '7_CUDALibraries', 'common', 'UtilNPP'), '"', ... 27 | ' -I"', fullfile( cudaRoot, 'samples', 'common', 'inc'), '"', ... 28 | ' -Xcompiler', ' -fPIC', ... 29 | ' -o "', fullfile(root,'cropRectanglesMex.o'), '"']; 30 | system( compileCmd ); 31 | 32 | % linking 33 | mopts = {'-outdir', root, ... 34 | '-output', 'cropRectanglesMex', ... 35 | ['-L', fullfile(cudaRoot, 'lib64')], ... 36 | '-lcudart', '-lnppi', '-lnppc', '-lmwgpu', ... 37 | fullfile(root,'cropRectanglesMex.o') }; 38 | mex(mopts{:}) ; 39 | 40 | delete( fullfile(root,'cropRectanglesMex.o') ); 41 | -------------------------------------------------------------------------------- /utils/do_regression.m: -------------------------------------------------------------------------------- 1 | function BB = do_regression(BB, img_w, img_h, varargin) 2 | % This function is used to do bounding box regression given translation and 3 | % scale parameters 4 | % Input: 5 | % - BB: [x y w h] 6 | % - img_w: image width 7 | % - img_h: image height 8 | % - opts.param: [dx dy sx sy] 9 | % - opts.fix_ann: used to crop frame 10 | % Output: 11 | % - BB: [x y w h] 12 | 13 | opts = struct; % detection regression parameters 14 | opts.param = [0 0 1 1]; 15 | opts.fix_ann = struct; % fix annotation if needed (e.g. casablanca) 16 | opts.fix_ann.x_off = 0; 17 | opts.fix_ann.y_off = 0; 18 | opts.fix_ann.w = inf; 19 | opts.fix_ann.h = inf; 20 | opts.warp = 'none'; 21 | 22 | opts = vl_argparse(opts, varargin); 23 | 24 | switch opts.warp 25 | case 'square' 26 | BB(:, 1:4) = extend_square_head(BB(:, 1:4)); 27 | case 'square_UB' % for TVHI 28 | BB(:, 1:4) = extend_square_head(BB(:, 1:4)); 29 | BB(:, 1:4) = head_extended_bb_square(BB(:, 1:4)); 30 | end 31 | 32 | % fix annotation 33 | BB(:,1) = BB(:,1)-opts.fix_ann.x_off; 34 | BB(:,2) = BB(:,2)-opts.fix_ann.y_off; 35 | for j=1:size(BB, 1) 36 | w = BB(j,3); 37 | h = BB(j,4); 38 | %regression 39 | BB(j, 3) = w*opts.param(3); 40 | BB(j, 4) = h*opts.param(4); 41 | BB(j, 1) = BB(j, 1) + w*opts.param(1) -(BB(j, 3)-w)/2; 42 | BB(j, 2) = BB(j, 2) + h*opts.param(2) -(BB(j, 4)-h)/2; 43 | 44 | %fix edge 45 | BB(j,1:4) = bbIntersection(BB(j,1:4), [1 1 opts.fix_ann.w-opts.fix_ann.x_off+1 opts.fix_ann.h-opts.fix_ann.y_off+1]); 46 | BB(j,1:4) = bbIntersection(BB(j,1:4), [1 1 img_w img_h]); 47 | end 48 | 49 | end -------------------------------------------------------------------------------- /utils/cropRectanglesMex/cropRectanglesMex.m: -------------------------------------------------------------------------------- 1 | %cropRectanglesMex crops multiple bounding boxes from the initial image and resizes them to the standard output size. 2 | % The operation is performed on a GPU using NVIDIA Performance Primitives (NPP) library 3 | % cropRectanglesMex was created to prepare batches for training CNNs using MatConvNet (http://www.vlfeat.org/matconvnet/). 4 | % 5 | % Usage: 6 | % crops = cropRectanglesMex( im, boundingBoxes, outputSize); 7 | % 8 | % Inputs: 9 | % im - the image to crop from, should be a 3 channel image (dimension order: height, width, channels) of type single. 10 | % Normalization (e.g. [0,1] or [0, 255]) is not important. The image should be stored in RAM (not GPU). 11 | % boundingBoxes - bounding boxes to crop, double[ numBoundingBoxes x 4 ], each line corresponds to one bounding box. 12 | % The bounding box format is y1, x1, y2, x2, where the origin is in the top-left corner. 13 | % Pixels are indexed starting from 1 (e.g. [1 1 2 2] corresponds to the box containing the 4 top-left pixels of the image). 14 | % Bounding boxes can be partially outside of the image. The default value for filling such areas is 0 in all the channels. 15 | % outputSize - the target size of the resized crops, double[2 x 1]. outputSize(1) - the height, outputSize(2) - the width. 16 | % 17 | % Outputs: 18 | % crops - the cropped and resized patches, gpuArray, single[ outputSize(1), outputSize(2), numChannels = 3, numBoundingBoxes ] 19 | % 20 | % The function can be compiled using build_cropResizeMex.m. 21 | % example_cropRectanglesMex.m provides the example of usage 22 | 23 | % Anton Osokin, firstname.lastname@gmail.com, March 2015 24 | 25 | -------------------------------------------------------------------------------- /setup.m: -------------------------------------------------------------------------------- 1 | function setup( matconvnetPath ) 2 | %setup adds all the paths required by this package 3 | % 4 | % setup( matconvnetPath ) 5 | % 6 | % Input: 7 | % matconvnetPath - path to the root of MatConvNet 8 | 9 | rootDir = fileparts( mfilename( 'fullpath' ) ); 10 | 11 | % setup MatConvNet 12 | if exist('vl_setupnn.m', 'file') 13 | vl_setupnn; 14 | else 15 | if ~exist( 'matconvnetPath', 'var' ) 16 | warning('MatConvNet path is not provided. Not all functionality is available. Run setup( matconvnetPath ) where matconvnetPath is the path to the MatConvNet installation.') 17 | else 18 | run( fullfile(matconvnetPath, 'matlab', 'vl_setupnn.m') ); 19 | end 20 | end 21 | 22 | % helper functions 23 | addpath( fullfile(rootDir, 'utils') ); 24 | addpath( fullfile(rootDir, 'utils', 'cropRectanglesMex') ); 25 | addpath( fullfile(rootDir, 'utils', 'VOCcode') ); 26 | addpath( fullfile(rootDir, 'utils', 'HollywoodHeads') ); 27 | addpath( fullfile(rootDir, 'utils', 'Casablanca') ); 28 | 29 | 30 | % code for the local model 31 | addpath( fullfile(rootDir, 'localModel') ); 32 | 33 | % code for the pairwise model 34 | addpath( fullfile(rootDir, 'pairwiseModel') ); 35 | addpath( fullfile(rootDir, 'pairwiseModel', 'computeMinMarginalsBinaryMex' ) ); 36 | addpath( fullfile(rootDir, 'pairwiseModel', 'energyMinimization' ) ); 37 | addpath( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'bruteForceBinaryPairwiseMex' ) ); 38 | addpath( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'qpboMex' ) ); 39 | addpath( fullfile(rootDir, 'pairwiseModel', 'energyMinimization', 'trwsMex' ) ); 40 | 41 | % code for the global model 42 | addpath( fullfile(rootDir, 'globalModel') ); 43 | end 44 | -------------------------------------------------------------------------------- /utils/cropRectanglesMex/README.txt: -------------------------------------------------------------------------------- 1 | This software implements the MATLAB function to crop patches of an image and resize them to the standard size (on a GPU): 2 | a wrapper on top of NVIDIA Performance Primitives (https://developer.nvidia.com/NPP), function nppiResizeSqrPixel_32f_P3R 3 | The NPP library is included in the standard CUDA package. 4 | 5 | If you are using this code, please, consider citing the following paper: 6 | Tuan-Hung Vu, Anton Osokin, Ivan Laptev. Context-aware CNNs for person head detection. 7 | In proceedings of International Conference on Computer Vision (ICCV), 2015. 8 | 9 | The full detection code and our data can be found on the project page: http://www.di.ens.fr/willow/research/headdetection 10 | 11 | Anton Osokin, (firstname.lastname@gmail.com) 12 | March, 2015 13 | https://github.com/aosokin/cropRectanglesMex 14 | 15 | PACKAGE 16 | ----------------------------- 17 | 18 | ./cropRectanglesMex.cu - the source code 19 | 20 | ./build_cropRectanglesMex.m - the build script 21 | 22 | ./cropRectanglesMex.m - the description of the implemented function 23 | 24 | ./example_cropRectanglesMex.m - the example of usage 25 | 26 | USING THE CODE 27 | ----------------------------- 28 | 29 | 0) Install MATLAB, the supported compiler, and the appropriate version of CUDA 30 | 31 | 1) Run build_cropRectanglesMex.m 32 | 33 | 2) Run example_cropRectanglesMex.m to check if the code works 34 | 35 | The code was tested under 36 | - ubuntu-12.04-x64 using MATLAB R2014b, gcc-4.6.3, cuda-6.5 37 | 38 | This code was written to be used together with MatConvNet (http://www.vlfeat.org/matconvnet/) and in theory should work if MatConvNet works. 39 | If you face compilation problems the MatConvNet compilation script (vl_compilenn.m) might be of some help. 40 | -------------------------------------------------------------------------------- /utils/HollywoodHeads/VOCinit_HH.m: -------------------------------------------------------------------------------- 1 | % Generating VOCopts varible of VOC evalution toolkit for HollywoodHeads dataset 2 | % 3 | % This code is based on VOCinit.m from VOC2012 devkit. 4 | % Copyright Tuan-Hung VU - tuanhungvu@gmail.com 5 | 6 | clear VOCopts 7 | 8 | VOCopts.dataset='HollywoodHeads'; 9 | 10 | % change this path to a writable directory for your results 11 | curPath = pwd; 12 | 13 | % change this path to point to your copy of the HollywoodHeads dataset 14 | VOCopts.datadir= fullfile(curPath, 'data'); 15 | 16 | VOCopts.resdir=fullfile(curPath, 'results', VOCopts.dataset, 'VOCEval'); 17 | if ~exist(VOCopts.resdir, 'dir') 18 | mkdir(VOCopts.resdir); 19 | end 20 | 21 | % change this path to a writable local directory for the annotation cache 22 | VOCopts.localdir=fullfile(curPath, 'results', VOCopts.dataset, 'VOCEval', 'anns'); 23 | if ~exist(VOCopts.localdir, 'dir') 24 | mkdir(VOCopts.localdir); 25 | end 26 | 27 | % initialize the training set 28 | VOCopts.trainset='train'; % use train for development 29 | 30 | % initialize the test set 31 | VOCopts.testset='test'; % use validation data for development test set 32 | 33 | % initialize main paths 34 | VOCopts.annopath=fullfile(VOCopts.datadir, VOCopts.dataset, 'Annotations', '%s.xml'); 35 | VOCopts.imgpath=fullfile(VOCopts.datadir, VOCopts.dataset, 'JPEGImages', '%s.jpeg'); 36 | VOCopts.imgsetpath=fullfile(VOCopts.datadir, VOCopts.dataset, 'Splits', '%s.txt'); 37 | 38 | VOCopts.detrespath=fullfile(VOCopts.resdir, ['%s_det_' VOCopts.testset '_%s.txt']); 39 | 40 | VOCopts.classes={'head'}; 41 | VOCopts.nclasses=length(VOCopts.classes); 42 | 43 | % overlap threshold 44 | VOCopts.minoverlap=0.5; 45 | 46 | % annotation cache for evaluation 47 | VOCopts.annocachepath=fullfile(VOCopts.localdir, '%s_anno.mat'); -------------------------------------------------------------------------------- /utils/Casablanca/VOCinit_Casablanca.m: -------------------------------------------------------------------------------- 1 | % Generating VOCopts varible of VOC evalution toolkit for Casablanca dataset 2 | % 3 | % This code is based on VOCinit.m from VOC2012 devkit. 4 | % Copyright Tuan-Hung VU - tuanhungvu@gmail.com 5 | 6 | clear VOCopts 7 | 8 | VOCopts.dataset='Casablanca'; 9 | 10 | % change this path to a writable directory for your results 11 | curPath = pwd; 12 | 13 | % change this path to point to your copy of the Casablanca dataset 14 | VOCopts.datadir= fullfile(curPath, 'data', VOCopts.dataset); 15 | if ~exist(VOCopts.datadir, 'dir') 16 | VOCopts.datadir = curPath; 17 | end 18 | 19 | VOCopts.resdir=fullfile(curPath, 'results', VOCopts.dataset, 'VOCEval'); 20 | if ~exist(VOCopts.resdir, 'dir') 21 | mkdir(VOCopts.resdir); 22 | end 23 | 24 | % change this path to a writable local directory for the annotation cache 25 | VOCopts.localdir=fullfile(curPath, 'results', VOCopts.dataset, 'VOCEval', 'anns'); 26 | if ~exist(VOCopts.localdir, 'dir') 27 | mkdir(VOCopts.localdir); 28 | end 29 | 30 | % initialize the training set 31 | VOCopts.trainset='train'; % use train for development 32 | 33 | % initialize the test set 34 | VOCopts.testset='test'; % use validation data for development test set 35 | 36 | % initialize main paths 37 | VOCopts.annopath=fullfile(VOCopts.datadir, 'Annotations', '%s.xml'); 38 | VOCopts.imgpath=fullfile(VOCopts.datadir, 'JPEGImages', '%s.jpeg'); 39 | VOCopts.imgsetpath=fullfile(VOCopts.datadir, 'Splits', '%s.txt'); 40 | 41 | VOCopts.detrespath=fullfile(VOCopts.resdir, ['%s_det_' VOCopts.testset '_%s.txt']); 42 | 43 | VOCopts.classes={'head'}; 44 | VOCopts.nclasses=length(VOCopts.classes); 45 | 46 | % overlap threshold 47 | VOCopts.minoverlap=0.5; 48 | 49 | % annotation cache for evaluation 50 | VOCopts.annocachepath=fullfile(VOCopts.localdir, '%s_anno.mat'); -------------------------------------------------------------------------------- /utils/showFirstLayerFilters.m: -------------------------------------------------------------------------------- 1 | function showFirstLayerFilters( net, varargin ) 2 | %showFirstLayerFilters visualizes the filters of the first convolutional layer. 3 | % Input: 4 | % net - the network in MatConvNet format. Layer for visualization - net.layers{i} with minimum possible i such that isequal( net.layers{iLayer}.type, 'conv') 5 | % (parName, 6 | 7 | if ~exist('varargin', 'var') 8 | varargin = {}; 9 | end 10 | %% parameters 11 | opts = struct; 12 | opts.filterShowSize = [50 50]; 13 | opts.filterRowNumber = 10; 14 | opts = vl_argparse(opts, varargin); 15 | 16 | %% 17 | iLayer = 1; 18 | while ~isequal( net.layers{iLayer}.type, 'conv') && iLayer < length(net.layers) 19 | iLayer = iLayer + 1; 20 | end 21 | if ~isequal( net.layers{iLayer}.type, 'conv') 22 | error('showFirstLayerFilters:noConv', 'Convolution layer was not found'); 23 | end 24 | 25 | filters = gather( net.layers{iLayer}.weights{1} ); 26 | numFilters = size(filters, 4); 27 | if size(filters, 3) ~= 3 28 | error('showFirstLayerFilters:badSize', 'Filters have wrong number of channels, only 3 is supported'); 29 | end 30 | 31 | numRows = opts.filterRowNumber; 32 | numCols = ceil( numFilters / numRows ); 33 | 34 | filterImage = zeros( numRows * opts.filterShowSize(1), numCols * opts.filterShowSize(2), 3, 'single' ); 35 | 36 | for iFilter = 1 : numFilters 37 | iCol = ceil(iFilter / numRows); 38 | iRow = iFilter - (iCol - 1) * numRows; 39 | 40 | 41 | curX = (1 : opts.filterShowSize(2)) + (iCol - 1) * opts.filterShowSize(2); 42 | curY = (1 : opts.filterShowSize(1)) + (iRow - 1) * opts.filterShowSize(1); 43 | 44 | resizedFilter = imresize(filters(:,:,:,iFilter), opts.filterShowSize, 'nearest'); 45 | filterImage(curY, curX, :) = resizedFilter; 46 | end 47 | 48 | maxValue = max(filters(:)); 49 | minValue = min(filters(:)); 50 | 51 | filterImage = (filterImage - minValue) / (maxValue - minValue); 52 | 53 | imshow( filterImage ); 54 | 55 | 56 | end 57 | 58 | -------------------------------------------------------------------------------- /utils/selectBoundingBoxesNonMaxSup.m: -------------------------------------------------------------------------------- 1 | function idsNms = selectBoundingBoxesNonMaxSup( boundingBoxes, scores, varargin ) 2 | %selectBoundingBoxesNonMaxSup performs the NMS on the candidate bounding boxes 3 | % boxes should be in [X1,Y1,W,H] format 4 | % 5 | % idsNms = selectBoundingBoxesNonMaxSup( boundingBoxes, scores ) 6 | % 7 | % Input: 8 | % boundingBoxes - double[ numBoxes x 4], each line correponds to the bounding box in [X1,Y1,W,H] format 9 | % scores - double[numBoxes x 1], scores of the bounding boxes, will be sorted in the decreasing order 10 | % 11 | % Extra parameters: 12 | % nmsIntersectionOverAreaThreshold - IoA threshold used to select boxes 13 | % numBoundingBoxMax - maximum number of boxes selected by NMS 14 | % 15 | % Output: 16 | % idsNms - indices of the bounding boxes selected by NMS 17 | 18 | if ~exist('varargin', 'var') 19 | varargin = {}; 20 | end 21 | %% parameters 22 | opts = struct; 23 | opts.numBoundingBoxMax = inf; 24 | opts.nmsIntersectionOverAreaThreshold = 0.3; 25 | opts = vl_argparse(opts, varargin); 26 | 27 | % if opts.nmsIntersectionOverAreaThreshold == inf the code will get into the infinite loop 28 | opts.nmsIntersectionOverAreaThreshold = min( opts.nmsIntersectionOverAreaThreshold, 100 ); 29 | 30 | %% do the job 31 | numBbs = length(scores); 32 | [~, ids] = sort(scores, 'descend'); 33 | 34 | idsNms = nan( min(opts.numBoundingBoxMax, numel(ids)), 1 ); 35 | idsNms(1) = ids(1); 36 | 37 | numBbNms = 1; 38 | iBb = 1; 39 | while numBbNms < opts.numBoundingBoxMax && iBb < numBbs 40 | curIou = inf; 41 | while max( curIou(:) ) > opts.nmsIntersectionOverAreaThreshold && iBb < numBbs 42 | iBb = iBb + 1; 43 | curIou = bbIntersectionOverArea( boundingBoxes( idsNms(1 : numBbNms), : ), boundingBoxes( ids(iBb), : ) ); 44 | end 45 | 46 | if max( curIou(:) ) <= opts.nmsIntersectionOverAreaThreshold 47 | numBbNms = numBbNms + 1; 48 | idsNms( numBbNms ) = ids( iBb ); 49 | end 50 | end 51 | 52 | idsNms = idsNms(1 : numBbNms); 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/projectEnergyBinaryPairwise.m: -------------------------------------------------------------------------------- 1 | function [ unaryTermsNew, pairwiseTermsNew, energyConstant ] = projectEnergyBinaryPairwise( unaryTerms, pairwiseTerms, partialLabels ) 2 | %projectEnergyBinaryPairwise assigns spesified values to the subset of variables and constructs the energy from the unlabelled ones 3 | 4 | maskUnlabeled = partialLabels == 0; 5 | numUnlabeled = sum(maskUnlabeled); 6 | numNodes = size( unaryTerms, 1 ); 7 | numEdges = size( pairwiseTerms, 1); 8 | numLabels = 2; 9 | 10 | maskEdgesNew = maskUnlabeled( pairwiseTerms(:, 1) ) & maskUnlabeled( pairwiseTerms(:, 2) ); 11 | 12 | pairwiseTermsNew = pairwiseTerms(maskEdgesNew, :); 13 | newIds = nan(numUnlabeled, 1); 14 | newIds(maskUnlabeled) = 1 : numUnlabeled; 15 | pairwiseTermsNew(:, 1) = newIds( pairwiseTermsNew(:, 1) ); 16 | pairwiseTermsNew(:, 2) = newIds( pairwiseTermsNew(:, 2) ); 17 | 18 | energyConstant = 0; 19 | unaryTermsNew = unaryTerms(maskUnlabeled, :); 20 | labeledUnary = unaryTerms(~maskUnlabeled, :); 21 | goodLabels = partialLabels(~maskUnlabeled); 22 | energyConstant = energyConstant + ... 23 | sum( labeledUnary( (1 : numNodes - numUnlabeled)' + (goodLabels - 1) * (numNodes - numUnlabeled) ) ); 24 | 25 | labelMap = [ 3, 4; 5, 6]; 26 | 27 | for iEdge = 1 : numEdges 28 | node1 = pairwiseTerms(iEdge, 1); 29 | node2 = pairwiseTerms(iEdge, 2); 30 | label1 = partialLabels(node1); 31 | label2 = partialLabels(node2); 32 | if ~maskUnlabeled(node1) && ~maskUnlabeled(node2) 33 | energyConstant = energyConstant + pairwiseTerms( iEdge, labelMap(label1, label2) ); 34 | elseif maskUnlabeled(node1) && ~maskUnlabeled(node2) 35 | unaryTermsNew( newIds(node1), : ) = unaryTermsNew( newIds(node1), : ) + ... 36 | reshape( pairwiseTerms(iEdge, labelMap(:, label2)), 1, numLabels); 37 | elseif ~maskUnlabeled(node1) && maskUnlabeled(node2) 38 | unaryTermsNew( newIds(node2), : ) = unaryTermsNew( newIds(node2), : ) + ... 39 | reshape( pairwiseTerms(iEdge, labelMap(label1, :)), 1, numLabels); 40 | end 41 | end 42 | 43 | 44 | end 45 | 46 | -------------------------------------------------------------------------------- /globalModel/cnn_computeScores_globalModel.m: -------------------------------------------------------------------------------- 1 | function scores = cnn_computeScores_globalModel( net, imdb, getBatch, varargin ) 2 | %cnn_computeScores_localModel computes the scores for the images by applying the CN to all the candidate patches 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% compute results w.r.t. patches 9 | opts = struct; 10 | opts.batchSize = 32; 11 | opts.conserveMemory = false; 12 | opts.sync = true; 13 | opts.useGpu = true; 14 | opts.imageSet = 1 : size( imdb.imageFiles, 4 ); 15 | opts.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' o 'scoreDifference' 16 | %opts.detSavePathFormat = ''; 17 | 18 | % parse input 19 | opts = vl_argparse(opts, varargin); 20 | 21 | %% do the job 22 | numImages = length( opts.imageSet ); 23 | scores = cell( max(opts.imageSet), 1); 24 | 25 | res = [] ; 26 | 27 | nBatch = ceil(numImages/opts.batchSize); 28 | 29 | for iBatch = 1 : nBatch 30 | batchid = (iBatch-1)*opts.batchSize+1:min(iBatch*opts.batchSize, numImages); 31 | batch = opts.imageSet(batchid); 32 | fprintf('Working with batch %d of %d: ', iBatch, nBatch); 33 | 34 | [im, labels] = getBatch( imdb, batch ); 35 | 36 | tBatchStart = tic; 37 | im = gpuArray(im); 38 | 39 | net.layers{end}.class = labels ; 40 | res = vl_simplenn_globalModel(net, im, [], res, ... 41 | 'disableDropout', true, ... 42 | 'conserveMemory', opts.conserveMemory, ... 43 | 'sync', opts.sync ) ; 44 | curScores = gather( res(end - 2).x ); 45 | for j=1:length(batchid) 46 | score = curScores(:,:,:,j); 47 | sz = size(score); 48 | score = squeeze(reshape(score, [sz(1) sz(2) 2 sz(3)/2])); 49 | scores{opts.imageSet(batchid(j))} = score; 50 | 51 | %iImage = opts.imageSet( batchid(j) ); 52 | %[~,f_name,~] = fileparts( imdb.imageFiles{iImage} ); 53 | %save_path = sprintf(opts.detSavePathFormat, f_name); 54 | %save(save_path, 'score'); 55 | end 56 | fprintf('time: %fs\n', toc(tBatchStart) ); 57 | end 58 | 59 | 60 | end 61 | 62 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/trwsMex.m: -------------------------------------------------------------------------------- 1 | %trwsMex optimizes MRF energy using TRW-S or BP algorithm (wrapper to Vladimir Kolmogorov's code). 2 | % http://pub.ist.ac.at/~vnk/papers/TRW-S.html 3 | % 4 | % This version assumes that pairsise potentials can be "decomposed": V_{ij}(k,l) = P(i, j) * M(k, l). 5 | % Here P depends only on variable indeces, M depends only on labels. 6 | % 7 | % Input examples: 8 | % trwsMex(U, P) 9 | % trwsMex(U, P, M) 10 | % trwsMex(U, P, M, options) 11 | % Output examples: 12 | % S = trwsMex(U, P, M, options) 13 | % [S, E] = trwsMex(U, P, M, options) 14 | % [S, E, LB] = trwsMex(U, P, M, options) 15 | % [S, E, LB, lbPlot, energyPlot, timePlot] = trwsMex(U, P, M, options) 16 | % 17 | % INPUT: 18 | % U - unary terms (double[numLabels, numNodes]) 19 | % P - matrix of edge coefficients (sparse double[numNodes, numNodes]); only upper triangle is used 20 | % M - matrix of label dependencies (double[numLabels, numLabels]); if M is not specified, Potts is assumed 21 | % if you want to set options without M call: trwsMex(U, P, [], options) 22 | % options - Stucture that determines method to be used. 23 | % Fields: 24 | % method : method to use (string: 'trw-s' or 'bp') default: 'trw-s' 25 | % maxIter : maximum number of iterations (double) default: 100 26 | % funcEps : If functional change is less than funcEps then stop, TRW-S only (double) default: 1e-2 27 | % verbosity : verbosity level: 0 - no output; 1 - final output; 2 - full output (double) default: 0 28 | % printMinIter: After printMinIter iterations start printing the lower bound (double) default: 10 29 | % printIter : and print every printIter iterations (double) default: 5 30 | % 31 | % OUTPUT: 32 | % S - labeling that has energy E, vector numNodes * 1 of type double (indices are in [1,...,numLabels]) 33 | % E - energy of labeling S 34 | % LB - maximum value of lower bound of type double (only for TRW-S method) 35 | % lbPlot, energyPlot, timePlot - measurements per iteration 36 | % 37 | % Anton Osokin (firstname.lastname@gmail.com), 24.09.2014 38 | -------------------------------------------------------------------------------- /globalModel/run_training_globalModel.m: -------------------------------------------------------------------------------- 1 | %run_training_globalModel is the launching script fot the experiments with the global model 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | pretrainedNetworkPath = 'models'; 5 | dataPath = 'data/HollywoodHeads'; 6 | resultPath = 'results/HollywoodHeads'; 7 | 8 | if ~exist(resultPath, 'dir') 9 | mkdir(resultPath); 10 | end 11 | 12 | %% add all the required paths 13 | filePath = fileparts( mfilename('fullpath') ); 14 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 15 | 16 | %% set data files 17 | % network initialization 18 | pretrainedNetwork = fullfile( pretrainedNetworkPath, 'imagenet-torch-oquab.mat'); networkInputSize = [224, 224]; initNetworkName = 'torch-oquab'; addDropout = false; 19 | 20 | % get the mean image of the training set 21 | meanVector = [57, 52, 47]; 22 | 23 | % parameters 24 | opts_cnn = struct; 25 | opts_cnn.dataPath = dataPath; 26 | opts_cnn.dataset.trainingSetFile = fullfile('Splits', 'train.txt'); 27 | opts_cnn.dataset.validationSetFile = fullfile('Splits', 'val.txt'); 28 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 29 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 30 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 31 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 32 | 33 | opts_cnn.expDir = resultPath; 34 | 35 | opts_cnn.train.numValidationPerEpoch = 2; 36 | opts_cnn.train.numEpochs = 6; 37 | opts_cnn.train.learningRate = [0.0001 0.0001 0.00001 0.00001 0.000001 0.000001]; 38 | opts_cnn.train.continue = true; 39 | opts_cnn.train.batchSize = 32; 40 | opts_cnn.train.backPropDepth = +inf; 41 | opts_cnn.train.weightDecay = 0.0005; 42 | 43 | opts_cnn.train.expDir = fullfile( opts_cnn.expDir, 'global', 'models'); 44 | 45 | % get the mean image for normalization 46 | meanImage = single( repmat( reshape( meanVector, [1 1 3] ), networkInputSize ) ); 47 | 48 | % network initialization 49 | extraLayers = []; 50 | opts_cnn.networkInitialization = @() cnn_initNet_globalModel( pretrainedNetwork, 284, 2, meanImage, extraLayers, addDropout ); 51 | 52 | %% run training 53 | cnn_globalModel(opts_cnn ); 54 | -------------------------------------------------------------------------------- /pairwiseModel/cnn_computeScores_pairwiseModel.m: -------------------------------------------------------------------------------- 1 | function [scores, candidateIds] = cnn_computeScores_pairwiseModel( net, imdb, getBatch, varargin ) 2 | %cnn_computeScores_pairwiseModel computes the scores using the structure network 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% compute results w.r.t. patches 9 | opts = struct; 10 | opts.conserveMemory = true; 11 | opts.sync = true; 12 | opts.imageSet = 1 : size( imdb.imageFiles, 4 ); 13 | opts.scoreMode = 'maxMarginals'; 14 | % parse input 15 | opts = vl_argparse(opts, varargin); 16 | 17 | %% do the job 18 | numImages = length( opts.imageSet ); 19 | scores = cell( max(opts.imageSet), 1); 20 | candidateIds = cell( max(opts.imageSet), 1); 21 | 22 | for iImageId = 1 : numImages 23 | iImage = opts.imageSet( iImageId ); 24 | fprintf('Image %d/%d: ', iImageId, numImages); 25 | tImageStart = tic; 26 | 27 | [patchData, patchLabels, patchInfo] = getBatch( imdb, iImage ); 28 | curNumCandidates = size( patchData, 4 ); 29 | fprintf('patches=%d, ', curNumCandidates ); 30 | 31 | [~, ~, predictions] = vl_structuredNetwork_pairwiseModel(net, patchData, [], patchLabels, [], ... 32 | 'computeMaxMarginals', true, ... 33 | 'disableDropout', true, ... 34 | 'conserveMemory', opts.conserveMemory, ... 35 | 'sync', opts.sync); 36 | maxMarginals = predictions{1}.maxMarginals; 37 | 38 | candidateIds{ iImage } = int32( patchInfo.candidateIds{1}(:) ); 39 | 40 | 41 | switch opts.scoreMode 42 | case 'maxMarginals' 43 | curScores = maxMarginals(:, 1) - maxMarginals(:, 2); 44 | otherwise 45 | error('cnn_computeScores_pairwiseModel:unknownScoreMode', 'options scoreMode is of incorrect value'); 46 | end 47 | 48 | curScores = curScores(:); 49 | if numel(curScores) ~= numel( candidateIds{iImage} ) 50 | error('cnn_computeScores_pairwiseModel:inconsistentCandidates', 'something went wrong') 51 | end 52 | scores{ iImage } = single( curScores ); 53 | 54 | fprintf('time: %fs\n', toc(tImageStart) ); 55 | end 56 | 57 | 58 | end 59 | 60 | -------------------------------------------------------------------------------- /utils/VOCcode/VOCxml2struct.m: -------------------------------------------------------------------------------- 1 | function res = VOCxml2struct(xml) 2 | 3 | xml(xml==9|xml==10|xml==13)=[]; 4 | 5 | [res,xml]=parse(xml,1,[]); 6 | 7 | function [res,ind]=parse(xml,ind,parent) 8 | 9 | res=[]; 10 | if ~isempty(parent)&&xml(ind)~='<' 11 | i=findchar(xml,ind,'<'); 12 | res=trim(xml(ind:i-1)); 13 | ind=i; 14 | [tag,ind]=gettag(xml,i); 15 | if ~strcmp(tag,['/' parent]) 16 | error('<%s> closed with <%s>',parent,tag); 17 | end 18 | else 19 | while ind<=length(xml) 20 | [tag,ind]=gettag(xml,ind); 21 | if strcmp(tag,['/' parent]) 22 | return 23 | else 24 | [sub,ind]=parse(xml,ind,tag); 25 | if isstruct(sub) 26 | if isfield(res,tag) 27 | n=length(res.(tag)); 28 | fn=fieldnames(sub); 29 | for f=1:length(fn) 30 | res.(tag)(n+1).(fn{f})=sub.(fn{f}); 31 | end 32 | else 33 | res.(tag)=sub; 34 | end 35 | else 36 | if isfield(res,tag) 37 | if ~iscell(res.(tag)) 38 | res.(tag)={res.(tag)}; 39 | end 40 | res.(tag){end+1}=sub; 41 | else 42 | res.(tag)=sub; 43 | end 44 | end 45 | end 46 | end 47 | end 48 | 49 | function i = findchar(str,ind,chr) 50 | 51 | i=[]; 52 | while ind<=length(str) 53 | if str(ind)==chr 54 | i=ind; 55 | break 56 | else 57 | ind=ind+1; 58 | end 59 | end 60 | 61 | function [tag,ind]=gettag(xml,ind) 62 | 63 | if ind>length(xml) 64 | tag=[]; 65 | elseif xml(ind)=='<' 66 | i=findchar(xml,ind,'>'); 67 | if isempty(i) 68 | error('incomplete tag'); 69 | end 70 | tag=xml(ind+1:i-1); 71 | ind=i+1; 72 | else 73 | error('expected tag'); 74 | end 75 | 76 | function s = trim(s) 77 | 78 | for i=1:numel(s) 79 | if ~isspace(s(i)) 80 | s=s(i:end); 81 | break 82 | end 83 | end 84 | for i=numel(s):-1:1 85 | if ~isspace(s(i)) 86 | s=s(1:i); 87 | break 88 | end 89 | end 90 | 91 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "MRFEnergy.h" 3 | 4 | // Example: minimizing an energy function with Potts terms. 5 | // See type*.h files for other types of terms. 6 | 7 | void testPotts() 8 | { 9 | MRFEnergy* mrf; 10 | MRFEnergy::NodeId* nodes; 11 | MRFEnergy::Options options; 12 | TypePotts::REAL energy, lowerBound; 13 | 14 | const int nodeNum = 2; // number of nodes 15 | const int K = 3; // number of labels 16 | TypePotts::REAL D[K]; 17 | int x, y; 18 | 19 | mrf = new MRFEnergy(TypePotts::GlobalSize(K)); 20 | nodes = new MRFEnergy::NodeId[nodeNum]; 21 | 22 | // construct energy 23 | D[0] = 0; D[1] = 1; D[2] = 2; 24 | nodes[0] = mrf->AddNode(TypePotts::LocalSize(), TypePotts::NodeData(D)); 25 | D[0] = 3; D[1] = 4; D[2] = 5; 26 | nodes[1] = mrf->AddNode(TypePotts::LocalSize(), TypePotts::NodeData(D)); 27 | mrf->AddEdge(nodes[0], nodes[1], TypePotts::EdgeData(6)); 28 | 29 | // Function below is optional - it may help if, for example, nodes are added in a random order 30 | // mrf->SetAutomaticOrdering(); 31 | 32 | /////////////////////// TRW-S algorithm ////////////////////// 33 | options.m_iterMax = 30; // maximum number of iterations 34 | mrf->Minimize_TRW_S(options, lowerBound, energy); 35 | 36 | // read solution 37 | x = mrf->GetSolution(nodes[0]); 38 | y = mrf->GetSolution(nodes[1]); 39 | 40 | printf("Solution: %d %d\n", x, y); 41 | 42 | //////////////////////// BP algorithm //////////////////////// 43 | mrf->ZeroMessages(); // in general not necessary - it may be faster to start 44 | // with messages computed in previous iterations. 45 | // NOTE: in most cases, immediately after creating the energy 46 | // all messages are zero. EXCEPTION: typeBinary and typeBinaryFast. 47 | // So calling ZeroMessages for these types will NOT transform 48 | // the energy to the original state. 49 | 50 | options.m_iterMax = 30; // maximum number of iterations 51 | mrf->Minimize_BP(options, energy); 52 | 53 | // read solution 54 | x = mrf->GetSolution(nodes[0]); 55 | y = mrf->GetSolution(nodes[1]); 56 | 57 | printf("Solution: %d %d\n", x, y); 58 | 59 | // done 60 | delete nodes; 61 | delete mrf; 62 | } 63 | 64 | int main() 65 | { 66 | testPotts(); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/computeMinMarginalsPairwiseBinary.m: -------------------------------------------------------------------------------- 1 | function [minMarginals, bestLabeling, minMarginals_args] = computeMinMarginalsPairwiseBinary( unaryTerms, pairwiseTerms, varargin ) 2 | %computeMinMarginalsPairwiseBinary computes the min-marginals for the energy of unary and pairwise potentials and binary variables 3 | % if number of nodes is <= than 20 the computation is exact (see minimizeEnergyPairwiseBinary.m) otherwise approximations are used 4 | 5 | if ~exist('varargin', 'var') 6 | varargin = {}; 7 | end 8 | 9 | %% parameters 10 | opts = struct; 11 | opts.bigValue = 1e+4; 12 | % parse input 13 | opts = vl_argparse(opts, varargin); 14 | 15 | 16 | %% check input 17 | numLabels = 2; 18 | if ~isnumeric(unaryTerms) || ~ismatrix(unaryTerms) || size(unaryTerms, 2) ~= 2 19 | error('Incorrect format for unaryTerms, has to be numNodes x 2') 20 | end 21 | numNodes = size(unaryTerms, 1); 22 | 23 | if ~isnumeric(pairwiseTerms) || ~ismatrix(pairwiseTerms) || size(pairwiseTerms, 2) ~= 6 24 | error('Incorrect format for pairwiseTerms, has to be numEdges x 6') 25 | end 26 | numEdges = size(pairwiseTerms, 1); 27 | 28 | %% compute the min-marginals 29 | % run energy minimization without a loss 30 | [bestLabeling, energy] = minimizeEnergyPairwiseBinary( unaryTerms, pairwiseTerms ); 31 | 32 | % compute min marginals 33 | minMarginals = nan(numNodes, numLabels); 34 | minMarginals( (1 : numNodes)' + numNodes * (bestLabeling - 1) ) = energy; 35 | 36 | minMarginals_args = nan( numNodes, 2, numNodes ); 37 | 38 | for iNode = 1 : numNodes 39 | for iLabel = 1 : numLabels 40 | if iLabel ~= bestLabeling(iNode) 41 | curUnary = unaryTerms; 42 | curUnary(iNode, setdiff( 1 : numLabels, iLabel ) ) = opts.bigValue; 43 | [curLabels, curEnergy] = minimizeEnergyPairwiseBinary( curUnary, pairwiseTerms ); 44 | 45 | if curLabels(iNode) ~= iLabel 46 | error('Min-marginal computation did not work!'); 47 | end 48 | 49 | minMarginals(iNode, iLabel) = curEnergy; 50 | minMarginals_args( iNode, iLabel, : ) = reshape( curLabels, [1 1 numNodes] ); 51 | else 52 | minMarginals_args( iNode, iLabel, : ) = reshape( bestLabeling, [1 1 numNodes] ); 53 | end 54 | end 55 | end 56 | 57 | minMarginals_args = minMarginals_args - 1; % to be compatible with computeMinMarginalsBinaryMex.m 58 | 59 | end 60 | 61 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/README.TXT: -------------------------------------------------------------------------------- 1 | This software implements the MATLAB mex-wrapper for QPBO energy minimization algorithm by V.Kolmogorov: 2 | http://pub.ist.ac.at/~vnk/software/QPBO-v1.32.src.zip 3 | 4 | Anton Osokin, (firstname.lastname@gmail.com) 5 | 24.09.2014 6 | https://github.com/aosokin/qpboMex 7 | 8 | Please cite the following paper in any resulting publication: 9 | 10 | Vladimir Kolmogorov and Cartsen Rother. 11 | Minimizing non-submodular functions with graph cuts - a review. 12 | In IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 29(7):1274-1279, July 2007 13 | 14 | 15 | PACKAGE 16 | ----------------------------- 17 | 18 | ./qpboMex.cpp - the C++ code of the wrapper 19 | 20 | ./build_qpboMex.m - function to build the wrapper 21 | 22 | ./qpboMex.m - the description of the implemented function 23 | 24 | ./example_qpboMex.m - the example of usage 25 | 26 | ./QPBO-v1.32.src - C++ code by Vladimir Kolmogorov (the code is used as is) 27 | http://pub.ist.ac.at/~vnk/software/QPBO-v1.32.src.zip 28 | 29 | ./qpboMex.mexw64 - Win x64 binary file for the mex-function compiled using MATLAB R2014a + MSVC 2012 30 | 31 | ./qpboMex.mexa64 - Linux x64 binary file for the mex-function compiled using MATLAB R2012a + gcc-4.4 32 | 33 | USING THE CODE 34 | ----------------------------- 35 | 36 | 0) Install MATLAB and one of the supported compilers 37 | 38 | 1) Run build_qpboMex.m 39 | 40 | 2) Run example_qpboMex.m to check if the code works 41 | 42 | The code was tested under 43 | - Win7-x64 using MATLAB R2014a and MSVC 2012; 44 | - ubuntu-12.04-x64 using MATLAB R2012a and gcc-4.4 45 | 46 | OTHER PACKAGES 47 | ----------------------------- 48 | 49 | * BK max-flow/min-cut algorithm: 50 | https://github.com/aosokin/graphCutMex_BoykovKolmogorov 51 | 52 | BK-algorithm would be the most standard one to solve the graph cut problems. 53 | 54 | * BK max-flow/min-cut algorithm with interface supporting dynamic graph cuts: 55 | https://github.com/aosokin/graphCutDynamicMex_BoykovKolmogorov 56 | 57 | If you need to solve many similar graph cut problems in a row consider using dynamic graph cuts. 58 | 59 | * IBFS max-flow/min-cut algorithm: https://github.com/aosokin/graphCutMex_IBFS 60 | 61 | The IBFS algorithm has polynomial time runtime guarantees. The BK does not. 62 | In my experience BK works faster for graphs built for standard 4(8)-connected grid MRFs. 63 | If the graph becomes more complicated (especially hierarchical) consider trying IBFS instead. 64 | -------------------------------------------------------------------------------- /pairwiseModel/pruneCandidatesNms.m: -------------------------------------------------------------------------------- 1 | function [ newScores, newCandidateIds ] = pruneCandidatesNms( imdb, scoreFile, varargin ) 2 | %pruneCandidatesNms is a part of cnn_prepareData_pairwiseModel.m which preprocesses for the training of the pairwise model 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | %% parse parameters 8 | opts = struct; 9 | opts.maxNumPatchesPerImage = 16; 10 | opts.nmsIntersectionOverAreaThreshold = 0.3; 11 | opts.numThreads = 4; 12 | opts.dataPath = ''; 13 | % parse input 14 | opts = vl_argparse(opts, varargin); 15 | 16 | if ~exist('scoreFile', 'var') || isempty(scoreFile) || ~exist( fullfile(scoreFile), 'file') 17 | if ~isempty(opts.dataPath) 18 | scoreFile = fullfile(opts.dataPath, scoreFile); 19 | if ~exist( fullfile(scoreFile), 'file') 20 | error('pruneCandidatesNms:noScoreFile', 'Cannot get the file with the precomputed scores'); 21 | end 22 | end 23 | end 24 | 25 | %% do the job 26 | fprintf('Reading precomputed scores ... '); 27 | tStart = tic; 28 | load( scoreFile, 'scores', 'candidateIds' ); 29 | fprintf( '%fs\n', toc(tStart) ); 30 | 31 | fprintf('Pruning candidates with NMS ... '); 32 | tStart = tic; 33 | numImages = length( imdb.imageFiles ); 34 | newScores = cell(numImages, 1); 35 | newCandidateIds = cell(numImages, 1); 36 | candidateFiles = imdb.candidateFiles; 37 | curPrefix = opts.dataPath; 38 | parfor (iImage = 1 : numImages, opts.numThreads) 39 | % for iImage = 1 : numImages 40 | if isempty(scores{iImage}) || isempty(candidateIds{iImage}) 41 | continue; 42 | end 43 | if mod(iImage, 1000) == 0 44 | fprintf('Image %d\n', iImage); 45 | end 46 | 47 | curCandidates = load( fullfile(curPrefix, candidateFiles{ iImage } ) ); 48 | 49 | curCandidates = curCandidates.boxes( candidateIds{ iImage }, :); 50 | curScores = scores{ iImage }; 51 | 52 | % fix the Bb format: SelectiveSearch format [y1 x1 y2 x2] to format [x y w h] 53 | curBb = convertBb_Y1X1Y2X2_to_X1Y1WH( curCandidates(:, 1 : 4) ); 54 | 55 | %select only BBs with high scores and non-max-sup 56 | idsNms = selectBoundingBoxesNonMaxSup( curBb, curScores, ... 57 | 'numBoundingBoxMax', opts.maxNumPatchesPerImage, ... 58 | 'nmsIntersectionOverAreaThreshold', opts.nmsIntersectionOverAreaThreshold); 59 | 60 | newCandidateIds{iImage} = candidateIds{ iImage }(idsNms); 61 | newScores{iImage} = curScores(idsNms); 62 | end 63 | fprintf( '%fs\n', toc(tStart) ); 64 | 65 | end 66 | 67 | -------------------------------------------------------------------------------- /utils/selective_search_boxes.m: -------------------------------------------------------------------------------- 1 | function boxes = selective_search_boxes(im, fast_mode, im_width) 2 | % The function is taken from the R-CNN code by Ross Girshick: 3 | % https://github.com/rbgirshick/rcnn/blob/master/selective_search/selective_search_boxes.m 4 | % 5 | % Based on the demo.m file included in the Selective Search 6 | % IJCV code. 7 | 8 | if ~exist('fast_mode', 'var') || isempty(fast_mode) 9 | fast_mode = true; 10 | end 11 | 12 | if ~exist('im_width', 'var') || isempty(im_width) 13 | im_width = []; 14 | scale = 1; 15 | else 16 | scale = size(im, 2) / im_width; 17 | end 18 | 19 | if scale ~= 1 20 | im = imresize(im, [NaN im_width]); 21 | end 22 | 23 | % Parameters. Note that this controls the number of hierarchical 24 | % segmentations which are combined. 25 | colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; 26 | 27 | % Here you specify which similarity functions to use in merging 28 | simFunctionHandles = {@SSSimColourTextureSizeFillOrig, ... 29 | @SSSimTextureSizeFill, ... 30 | @SSSimBoxFillOrig, ... 31 | @SSSimSize}; 32 | 33 | % Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. 34 | % Note that by default, we set minSize = k, and sigma = 0.8. 35 | % controls size of segments of initial segmentation. 36 | ks = [50 100 150 300]; 37 | sigma = 0.8; 38 | 39 | % After segmentation, filter out boxes which have a width/height smaller 40 | % than minBoxWidth (default = 20 pixels). 41 | minBoxWidth = 20; 42 | 43 | % Comment the following three lines for the 'quality' version 44 | if fast_mode 45 | colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab 46 | simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies 47 | ks = ks(1:2); 48 | end 49 | 50 | idx = 1; 51 | for j = 1:length(ks) 52 | k = ks(j); % Segmentation threshold k 53 | minSize = k; % We set minSize = k 54 | for n = 1:length(colorTypes) 55 | colorType = colorTypes{n}; 56 | [boxesT{idx} blobIndIm blobBoxes hierarchy priorityT{idx}] = ... 57 | Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); 58 | idx = idx + 1; 59 | end 60 | end 61 | boxes = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies 62 | priority = cat(1, priorityT{:}); % Concatenate priorities 63 | 64 | % Do pseudo random sorting as in paper 65 | priority = priority .* rand(size(priority)); 66 | [priority sortIds] = sort(priority, 'ascend'); 67 | boxes = boxes(sortIds,:); 68 | 69 | boxes = FilterBoxesWidth(boxes, minBoxWidth); 70 | boxes = BoxRemoveDuplicates(boxes); 71 | 72 | if scale ~= 1 73 | boxes = (boxes - 1) * scale + 1; 74 | end -------------------------------------------------------------------------------- /pairwiseModel/vl_svmStructLoss_pairwiseCompactModel_test_derivative.m: -------------------------------------------------------------------------------- 1 | function vl_svmStructLoss_pairwiseCompactModel_test_derivative 2 | %vl_svmStructLoss_pairwiseCompactModel_test_derivative tests vl_svmStructLoss_pairwiseCompactModel 3 | 4 | load vl_svmStructLoss_pairwiseCompactModel_test_derivative.mat unaryPotentials pairwisePotentials labels dzdy 5 | 6 | [lossValue, unaryDerivative, pairwiseDerivative] = vl_svmStructLoss_pairwiseCompactModel( unaryPotentials, pairwisePotentials, labels, dzdy, [], 100 ); 7 | 8 | testEps = 1e-2; 9 | numTrials = 100; 10 | rng(1); 11 | 12 | % test unary derivatives 13 | empiricalUnaryDerivative = zeros(size(unaryPotentials), 'like', unaryPotentials); 14 | fprintf('Number of unary derivatives: %d, testing %d\n', numel(empiricalUnaryDerivative), min(numTrials, numel(empiricalUnaryDerivative))); 15 | 16 | randOrder = randperm( numel(empiricalUnaryDerivative), min(numTrials, numel(empiricalUnaryDerivative)) ); 17 | for iValueId = 1 : numel(randOrder) 18 | iValue = randOrder(iValueId); 19 | 20 | unaryNew = unaryPotentials; 21 | unaryNew(iValue) = unaryNew(iValue) + testEps; 22 | 23 | lossValue_test = vl_svmStructLoss_pairwiseCompactModel( unaryNew, pairwisePotentials, labels, [], [], 100); 24 | 25 | empiricalUnaryDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 26 | end 27 | 28 | empDer = empiricalUnaryDerivative(randOrder); 29 | trueDer = unaryDerivative(randOrder); 30 | unaryDerivativeError = gather( norm(empDer(:) - trueDer(:)) / norm(trueDer(:)) ); 31 | fprintf('Relative error of unary derivative: %f\n', unaryDerivativeError ); 32 | 33 | % test pairwise derivatives 34 | empiricalPairwiseDerivative = zeros(size(pairwisePotentials), 'like', pairwisePotentials); 35 | fprintf('Number of pairwise derivatives: %d, testing %d\n', numel(empiricalPairwiseDerivative), min(numTrials, numel(empiricalPairwiseDerivative))); 36 | 37 | randOrder = randperm( numel(empiricalPairwiseDerivative), min(numTrials, numel(empiricalPairwiseDerivative)) ); 38 | for iValueId = 1 : numel(randOrder) 39 | iValue = randOrder(iValueId); 40 | 41 | pairwiseNew = pairwisePotentials; 42 | pairwiseNew(iValue) = pairwiseNew(iValue) + testEps; 43 | 44 | lossValue_test = vl_svmStructLoss_pairwiseCompactModel( unaryPotentials, pairwiseNew, labels, [], [], 100); 45 | 46 | empiricalPairwiseDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 47 | end 48 | 49 | empDer = empiricalPairwiseDerivative(randOrder); 50 | trueDer = pairwiseDerivative(randOrder); 51 | pairwiseDerivativeError = gather( norm(empDer(:) - trueDer(:)) / norm(trueDer(:)) ); 52 | fprintf('Relative error of pairwise derivative: %f\n', pairwiseDerivativeError ); 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /pairwiseModel/vl_logisticScoreLoss_pairwiseCompactModel_test_derivative.m: -------------------------------------------------------------------------------- 1 | function vl_logisticScoreLoss_pairwiseCompactModel_test_derivative 2 | %vl_logisticScoreLoss_pairwiseCompactModel_test_derivative tests vl_logisticScoreLoss_pairwiseCompactModel 3 | 4 | % the .mat file is the same as for vl_svmStructLoss_pairwiseCompactModel_test_derivative.m 5 | load vl_svmStructLoss_pairwiseCompactModel_test_derivative.mat unaryPotentials pairwisePotentials labels dzdy 6 | 7 | [lossValue, unaryDerivative, pairwiseDerivative] = vl_logisticScoreLoss_pairwiseCompactModel( unaryPotentials, pairwisePotentials, labels, dzdy ); 8 | 9 | testEps = 1e-3; 10 | numTrials = 100; 11 | rng(1); 12 | 13 | % test unary derivatives 14 | empiricalUnaryDerivative = zeros(size(unaryPotentials), 'like', unaryPotentials); 15 | fprintf('Number of unary derivatives: %d, testing %d\n', numel(empiricalUnaryDerivative), min(numTrials, numel(empiricalUnaryDerivative))); 16 | 17 | randOrder = randperm( numel(empiricalUnaryDerivative), min(numTrials, numel(empiricalUnaryDerivative)) ); 18 | for iValueId = 1 : numel(randOrder) 19 | iValue = randOrder(iValueId); 20 | 21 | unaryNew = unaryPotentials; 22 | unaryNew(iValue) = unaryNew(iValue) + testEps; 23 | 24 | lossValue_test = vl_logisticScoreLoss_pairwiseCompactModel( unaryNew, pairwisePotentials, labels, []); 25 | 26 | empiricalUnaryDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 27 | end 28 | 29 | empDer = empiricalUnaryDerivative(randOrder); 30 | trueDer = unaryDerivative(randOrder); 31 | unaryDerivativeError = gather( norm(empDer(:) - trueDer(:)) / norm(trueDer(:)) ); 32 | fprintf('Relative error of unary derivative: %f\n', unaryDerivativeError ); 33 | 34 | % test pairwise derivatives 35 | empiricalPairwiseDerivative = zeros(size(pairwisePotentials), 'like', pairwisePotentials); 36 | fprintf('Number of pairwise derivatives: %d, testing %d\n', numel(empiricalPairwiseDerivative), min(numTrials, numel(empiricalPairwiseDerivative))); 37 | 38 | randOrder = randperm( numel(empiricalPairwiseDerivative), min(numTrials, numel(empiricalPairwiseDerivative)) ); 39 | for iValueId = 1 : numel(randOrder) 40 | iValue = randOrder(iValueId); 41 | 42 | pairwiseNew = pairwisePotentials; 43 | pairwiseNew(iValue) = pairwiseNew(iValue) + testEps; 44 | 45 | lossValue_test = vl_logisticScoreLoss_pairwiseCompactModel( unaryPotentials, pairwiseNew, labels, []); 46 | 47 | empiricalPairwiseDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 48 | end 49 | 50 | empDer = empiricalPairwiseDerivative(randOrder); 51 | trueDer = pairwiseDerivative(randOrder); 52 | pairwiseDerivativeError = gather( norm(empDer(:) - trueDer(:)) / norm(trueDer(:)) ); 53 | fprintf('Relative error of pairwise derivative: %f\n', pairwiseDerivativeError ); 54 | 55 | end 56 | 57 | -------------------------------------------------------------------------------- /utils/VOCcode/VOCreadrecxml.m: -------------------------------------------------------------------------------- 1 | function rec = VOCreadrecxml(path) 2 | 3 | x=VOCreadxml(path); 4 | x=x.annotation; 5 | 6 | rec.folder=x.folder; 7 | rec.filename=x.filename; 8 | rec.source.database=x.source.database; 9 | rec.source.annotation=x.source.annotation; 10 | rec.source.image=x.source.image; 11 | 12 | rec.size.width=str2double(x.size.width); 13 | rec.size.height=str2double(x.size.height); 14 | rec.size.depth=str2double(x.size.depth); 15 | 16 | rec.segmented=strcmp(x.segmented,'1'); 17 | 18 | rec.imgname=[x.folder '/JPEGImages/' x.filename]; 19 | rec.imgsize=str2double({x.size.width x.size.height x.size.depth}); 20 | rec.database=rec.source.database; 21 | 22 | for i=1:length(x.object) 23 | rec.objects(i)=xmlobjtopas(x.object(i)); 24 | end 25 | 26 | function p = xmlobjtopas(o) 27 | 28 | p.class=o.name; 29 | 30 | if isfield(o,'pose') 31 | if strcmp(o.pose,'Unspecified') 32 | p.view=''; 33 | else 34 | p.view=o.pose; 35 | end 36 | else 37 | p.view=''; 38 | end 39 | 40 | if isfield(o,'truncated') 41 | p.truncated=strcmp(o.truncated,'1'); 42 | else 43 | p.truncated=false; 44 | end 45 | 46 | if isfield(o,'occluded') 47 | p.occluded=strcmp(o.occluded,'1'); 48 | else 49 | p.occluded=false; 50 | end 51 | 52 | if isfield(o,'difficult') 53 | p.difficult=strcmp(o.difficult,'1'); 54 | else 55 | p.difficult=false; 56 | end 57 | 58 | p.label=['PAS' p.class p.view]; 59 | if p.truncated 60 | p.label=[p.label 'Trunc']; 61 | end 62 | if p.occluded 63 | p.label=[p.label 'Occ']; 64 | end 65 | if p.difficult 66 | p.label=[p.label 'Diff']; 67 | end 68 | 69 | p.orglabel=p.label; 70 | 71 | p.bbox=str2double({o.bndbox.xmin o.bndbox.ymin o.bndbox.xmax o.bndbox.ymax}); 72 | 73 | p.bndbox.xmin=str2double(o.bndbox.xmin); 74 | p.bndbox.ymin=str2double(o.bndbox.ymin); 75 | p.bndbox.xmax=str2double(o.bndbox.xmax); 76 | p.bndbox.ymax=str2double(o.bndbox.ymax); 77 | 78 | if isfield(o,'polygon') 79 | warning('polygon unimplemented'); 80 | p.polygon=[]; 81 | else 82 | p.polygon=[]; 83 | end 84 | 85 | if isfield(o,'mask') 86 | warning('mask unimplemented'); 87 | p.mask=[]; 88 | else 89 | p.mask=[]; 90 | end 91 | 92 | if isfield(o,'part')&&~isempty(o.part) 93 | p.hasparts=true; 94 | for i=1:length(o.part) 95 | p.part(i)=xmlobjtopas(o.part(i)); 96 | end 97 | else 98 | p.hasparts=false; 99 | p.part=[]; 100 | end 101 | 102 | if isfield(o,'point') 103 | p.haspoint=true; 104 | p.point.x=str2double(o.point.x); 105 | p.point.y=str2double(o.point.y); 106 | else 107 | p.point=[]; 108 | end 109 | 110 | if isfield(o,'actions') 111 | p.hasactions=true; 112 | fn=fieldnames(o.actions); 113 | for i=1:numel(fn) 114 | p.actions.(fn{i})=strcmp(o.actions.(fn{i}),'1'); 115 | end 116 | else 117 | p.hasactions=false; 118 | p.actions=[]; 119 | end 120 | -------------------------------------------------------------------------------- /localModel/run_training_localModel.m: -------------------------------------------------------------------------------- 1 | %run_training_localModel is the launching script for the training of the local model 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/HollywoodHeads'; 5 | resultPath = 'results/HollywoodHeads'; 6 | pretrainedNetworkPath = 'models'; 7 | 8 | %% add all the required paths 9 | filePath = fileparts( mfilename('fullpath') ); 10 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 11 | 12 | %% set data files 13 | % network initialization 14 | pretrainedNetwork = fullfile( pretrainedNetworkPath, 'imagenet-torch-oquab.mat'); networkInputSize = [224, 224]; initNetworkName = 'oquabTorch'; addDropout = false; 15 | %CAUTION: the MatConvNet pretrained networks do not have dropout layers! be careful when turning this option on! 16 | %pretrainedNetwork = fullfile( pretrainedNetworkPath, 'imagenet-caffe-alex.mat'); networkInputSize = [227, 227]; initNetworkName = 'alexCaffe'; addDropout = true; 17 | %pretrainedNetwork = fullfile( pretrainedNetworkPath, 'imagenet-vgg-s.mat'); networkInputSize = [227, 227]; initNetworkName = 'vggS'; addDropout = true; 18 | %pretrainedNetwork = fullfile( pretrainedNetworkPath, 'imagenet-vgg-verydeep-16.mat'); networkInputSize = [227, 227]; initNetworkName = 'vggVeryDeep16'; addDropout = true; 19 | 20 | % get the mean vector on the training set 21 | meanVector = [57, 52, 47]; 22 | 23 | %% preparation 24 | % parameters 25 | opts_cnn = struct; 26 | opts_cnn.dataPath = dataPath; 27 | opts_cnn.dataset.trainingSetFile = fullfile('Splits', 'train.txt'); 28 | opts_cnn.dataset.validationSetFile = fullfile('Splits', 'val.txt'); 29 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 30 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 31 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 32 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 33 | 34 | opts_cnn.expDir = resultPath; 35 | opts_cnn.maxGpuImagesEvaluation = 3000; 36 | opts_cnn.scoreMode = 'afterSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 37 | 38 | opts_cnn.train.numValidationPerEpoch = 8; 39 | opts_cnn.train.numEpochs = 3; 40 | opts_cnn.train.learningRate = [ 0.001 0.0001 0.00001 ]; 41 | opts_cnn.train.batchSize = 1; 42 | opts_cnn.train.backPropDepth = +inf; 43 | opts_cnn.train.weightDecay = 0.0005; 44 | 45 | opts_cnn.getBatch.numPatchesPerImage = 64; 46 | opts_cnn.getBatch.maxPositives = 32; 47 | opts_cnn.getBatch.iouPositiveThreshold = 0.6; 48 | opts_cnn.getBatch.iouNegativeThreshold = 0.5; 49 | opts_cnn.getBatch.jitterStd = 1; 50 | opts_cnn.getBatch.cropPad = [18, 18, 18, 18]; 51 | 52 | opts_cnn.evaluation.iouThreshold = 0.5; 53 | opts_cnn.evaluation.useDifficultImages = false; 54 | 55 | opts_cnn.train.expDir = fullfile( opts_cnn.expDir, 'local', 'models'); 56 | 57 | % get the mean image for normalization 58 | meanImage = single( repmat( reshape( meanVector, [1 1 3] ), networkInputSize ) ); 59 | 60 | % network initialization 61 | extraLayers = []; 62 | opts_cnn.networkInitialization = @() cnn_initNet_localModel( pretrainedNetwork, 2, meanImage, extraLayers, addDropout ); 63 | 64 | %% run training 65 | cnn_localModel( opts_cnn ); 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /utils/load_det_local_pairwise_global.m: -------------------------------------------------------------------------------- 1 | function det = load_det_local_pairwise_global(varargin) 2 | % Combine detection results of local, pairwise and global models 3 | 4 | opts = struct; 5 | opts.alpha_pairwise_range = 0.9:-0.1:0.1; 6 | opts.bias_range = 10:-1:-10; 7 | opts.local_res_path = ''; 8 | opts.pairwise_res_path = ''; 9 | 10 | opts.regression.param = [0 0 1 1]; 11 | opts.regression.fix_ann = struct; % fix annotation if needed 12 | opts.regression.fix_ann.x_off = 0; 13 | opts.regression.fix_ann.y_off = 0; 14 | opts.regression.fix_ann.w = inf; 15 | opts.regression.fix_ann.h = inf; 16 | opts.regression.warp = 'none'; 17 | 18 | opts.global = struct; % options of global model 19 | opts.global.alpha = 0; 20 | opts.global.scale_range = [1 2 4 8]; 21 | opts.global.stride_proportion = 2; 22 | opts.global.hm_size = {}; 23 | for scl=opts.global.scale_range 24 | tile_size = 224/scl; 25 | stride = tile_size/opts.global.stride_proportion; 26 | opts.global.hm_size{scl} = floor((224-stride)/stride); 27 | end 28 | opts.global.path_format = ''; 29 | opts.global.platform = 'matconvnet'; 30 | 31 | opts.ialpha = 1; 32 | opts.ibias = 1; 33 | 34 | opts.im_path_format = ''; 35 | opts.im_set = []; 36 | 37 | opts.verbose = true; 38 | opts.progress_part = 5; 39 | 40 | opts = vl_argparse(opts, varargin); 41 | 42 | %% Load local detections 43 | load(opts.local_res_path , 'det'); 44 | det_lcl = det; 45 | %% load pairwise detections 46 | load(opts.pairwise_res_path , 'det'); 47 | det_pw = det; 48 | %% Combined detections 49 | alpha_pairwise = opts.alpha_pairwise_range(opts.ialpha); 50 | bias = opts.bias_range(opts.ibias); 51 | 52 | DET = []; 53 | N_img = length(det_lcl); 54 | 55 | fprintf('Combining detections\n'); 56 | progress_part_num = ceil(N_img/opts.progress_part); 57 | 58 | for i=1:N_img 59 | if (opts.verbose) 60 | if (i==N_img) 61 | fprintf('...100%%\n'); 62 | else 63 | if ~mod(i,progress_part_num) 64 | fprintf('...%d%%', i*100/(progress_part_num*opts.progress_part)); 65 | end 66 | end 67 | end 68 | d1 = det_lcl(i).bb(:,1:5); 69 | d2 = det_pw(i).bb(:,1:5); 70 | 71 | dist = pdist2(d1(:,1:4), d2(:,1:4)); 72 | [~,m_i] = min(dist); 73 | 74 | d_c = zeros(size(d1)); 75 | d_c(1:size(d2,1), 1:4) = d1(m_i, 1:4); 76 | d_c(1:size(d2,1), 5) = d1(m_i, 5)*(1-alpha_pairwise) + d2(:,5)*alpha_pairwise + bias; 77 | 78 | d_c(size(d2,1)+1:size(d1,1),:) = d1(setdiff(1:size(d1,1), m_i), :); 79 | 80 | det(i) = det_lcl(i); 81 | det(i).bb = d_c; 82 | 83 | if (~iscell(opts.im_set)) 84 | global_det_path = sprintf(opts.global.path_format, idname); 85 | im_path = sprintf(opts.im_path_format , idname); 86 | else 87 | global_det_path = sprintf(opts.global.path_format, opts.im_set{i}); 88 | im_path = sprintf(opts.im_path_format , opts.im_set{i}); 89 | end 90 | im = imread(im_path); 91 | [img_h, img_w, ~] = size(im); 92 | 93 | det(i).bb(:,1:5) = combine_global(det(i).bb(:,1:5), global_det_path, img_w, img_h, opts.global); 94 | det(i).bb(:,1:5) = do_regression(det(i).bb(:,1:5), img_w, img_h, opts.regression); 95 | end 96 | 97 | 98 | end -------------------------------------------------------------------------------- /localModel/cnn_computeScores_localModel.m: -------------------------------------------------------------------------------- 1 | function [scores, candidateIds] = cnn_computeScores_localModel( net, imdb, getBatch, varargin ) 2 | %cnn_computeScores_localModel computes the scores for the images by applying the CNN to all the candidate patches 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% compute results w.r.t. patches 9 | opts = struct; 10 | opts.gpuBatchSize = 256; 11 | opts.conserveMemory = true; 12 | opts.sync = true; 13 | opts.useGpu = true; 14 | opts.imageSet = 1 : size( imdb.imageFiles, 4 ); 15 | opts.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' o 'scoreDifference' 16 | 17 | % parse input 18 | opts = vl_argparse(opts, varargin); 19 | 20 | %% do the job 21 | numImages = length( opts.imageSet ); 22 | scores = cell( max(opts.imageSet), 1); 23 | candidateIds = cell( max(opts.imageSet), 1); 24 | res = [] ; 25 | 26 | for iImageId = 1 : numImages 27 | iImage = opts.imageSet( iImageId ); 28 | fprintf('Image %d/%d: ', iImageId, numImages); 29 | tImageStart = tic; 30 | 31 | [patchData, patchLabels, patchInfo] = getBatch( imdb, iImage ); 32 | curNumCandidates = size( patchData, 4 ); 33 | fprintf('patches=%d, ', curNumCandidates ); 34 | 35 | scores{ iImage } = nan( [curNumCandidates, 1], 'single' ); 36 | candidateIds{ iImage } = int32(patchInfo.candidateIds{1}); 37 | 38 | numBatches = ceil( curNumCandidates / opts.gpuBatchSize ); 39 | for iBatch = 1 : numBatches 40 | curIds = (iBatch - 1) * opts.gpuBatchSize + 1 : min( iBatch * opts.gpuBatchSize, curNumCandidates); 41 | 42 | im = patchData(:,:,:,curIds); 43 | labels = ones( size( patchLabels(curIds) ) ); 44 | 45 | net.layers{end}.class = labels ; 46 | 47 | im = gpuArray(im); 48 | res = vl_simplenn_localModel(net, im, [], res, ... 49 | 'disableDropout', true, ... 50 | 'conserveMemory', opts.conserveMemory, ... 51 | 'sync', opts.sync ) ; 52 | 53 | curScores = gather( res(end - 1).x ); 54 | if ~( size(curScores, 1) == 1 && size(curScores, 2) == 1 && size(curScores, 3) == 2 && size(curScores, 4) == length(curIds) ) 55 | error('Scores produced by the network are of strange format'); 56 | end 57 | curScores = squeeze(curScores); 58 | 59 | switch opts.scoreMode 60 | case 'beforeSoftMax' 61 | curScores = curScores(1, :); 62 | case 'afterSoftMax' 63 | maxVal = max(curScores, [], 1); 64 | normalizedScores = bsxfun(@minus, curScores, maxVal); 65 | curScores = normalizedScores(1, :) - log( sum( exp(normalizedScores), 1) ); 66 | case 'scoreDifference' 67 | curScores = curScores(1, :) - curScores(2, :); 68 | otherwise 69 | error('cnn_computeScores_localModel:unknownScoreMode', 'options scoreMode is of incorrect value'); 70 | end 71 | 72 | curScores = curScores(:); 73 | assert( length(curScores) == length(curIds) ); 74 | scores{ iImage }(curIds) = single( curScores ); 75 | end 76 | fprintf('time: %fs\n', toc(tImageStart) ); 77 | end 78 | 79 | 80 | end 81 | 82 | -------------------------------------------------------------------------------- /utils/showBoundingBoxes.m: -------------------------------------------------------------------------------- 1 | function finalImage = showBoundingBoxes(curImage, boundingBoxes, colors) 2 | %showBoundingBoxes paint the candidate bounding boxes on top of the image 3 | % 4 | % finalImage = showBoundingBoxes(curImage, boundingBoxes, colors) 5 | % 6 | % Input: 7 | % curImage - image to show 8 | % boundingBoxes - double[ numBoxes x 4], each line correponds to the bounding box in [x y w h] format 9 | % colors - cell array containing colors of the boxed, cell can contain char symbols (ymcrgbwk) or vector of length 3. 10 | % 11 | % Output: 12 | % finalImage - the image with bounding boxes on top 13 | 14 | if max(curImage(:)) > 1 15 | curImage = single(curImage) / 255; 16 | end 17 | 18 | numBoxes = size(boundingBoxes, 1); 19 | 20 | if ~exist('colors', 'var') || isempty(colors) 21 | colors = cell(numBoxes, 1); 22 | colors(:) = {'m'}; 23 | elseif ~iscell(colors) 24 | colorsOld = colors; 25 | colors = cell(numBoxes, 1); 26 | colors(:) = {colorsOld}; 27 | end 28 | for iColor = 1 : numel(colors) 29 | if ischar( colors{iColor} ) 30 | colors{iColor} = getColorFromString( colors{iColor} ); 31 | end 32 | end 33 | 34 | finalImage = curImage; 35 | 36 | bBWidth = 2; % width of the dounding box in pixels 37 | 38 | boundingBoxes = double( boundingBoxes ); 39 | for iBox = numBoxes : -1 : 1 40 | curColor = colors{iBox}; 41 | 42 | % draw the line segments 43 | % top 44 | finalImage( boundingBoxes(iBox, 2) : boundingBoxes(iBox, 2) + bBWidth - 1, ... 45 | boundingBoxes(iBox, 1) : boundingBoxes(iBox, 1) + boundingBoxes(iBox, 3) - 1, : ) = ... 46 | repmat( reshape( curColor, [1 1 3] ), bBWidth, boundingBoxes(iBox, 3) ); 47 | 48 | % bottom 49 | finalImage( boundingBoxes(iBox, 2) + boundingBoxes(iBox, 4) - bBWidth : boundingBoxes(iBox, 2) + boundingBoxes(iBox, 4) - 1, ... 50 | boundingBoxes(iBox, 1) : boundingBoxes(iBox, 1) + boundingBoxes(iBox, 3) - 1, : ) = ... 51 | repmat( reshape( curColor, [1 1 3] ), bBWidth, boundingBoxes(iBox, 3) ); 52 | 53 | % left 54 | finalImage( boundingBoxes(iBox, 2) : boundingBoxes(iBox, 2) + boundingBoxes(iBox, 4) - 1, ... 55 | boundingBoxes(iBox, 1) : boundingBoxes(iBox, 1) + bBWidth - 1, : ) = ... 56 | repmat( reshape( curColor, [1 1 3] ), boundingBoxes(iBox, 4), bBWidth ); 57 | 58 | % right 59 | finalImage( boundingBoxes(iBox, 2) : boundingBoxes(iBox, 2) + boundingBoxes(iBox, 4) - 1, ... 60 | boundingBoxes(iBox, 1) + boundingBoxes(iBox, 3) - bBWidth : boundingBoxes(iBox, 1) + boundingBoxes(iBox, 3) - 1, : ) = ... 61 | repmat( reshape( curColor, [1 1 3] ), boundingBoxes(iBox, 4), bBWidth ); 62 | 63 | end 64 | 65 | end 66 | 67 | function color = getColorFromString( name ) 68 | switch name 69 | case 'y' 70 | color = [1, 1, 0]; 71 | case 'm' 72 | color = [1, 0, 1]; 73 | case 'c' 74 | color = [0, 1, 1]; 75 | case 'r' 76 | color = [1, 0, 0]; 77 | case 'g' 78 | color = [0, 1, 0]; 79 | case 'b' 80 | color = [0, 0, 1]; 81 | case 'w' 82 | color = [1, 1, 1]; 83 | case 'k' 84 | color = [0, 0, 0]; 85 | otherwise 86 | error('getColorFromString:unknownColorName', ['Color name ', name, ' is not recognized'] ); 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /utils/vl_argparse.m: -------------------------------------------------------------------------------- 1 | function [opts, args] = vl_argparse(opts, args) 2 | % VL_ARGPARSE Parse list of parameter-value pairs 3 | % OPTS = VL_ARGPARSE(OPTS, ARGS) updates the structure OPTS based on 4 | % the specified parameter-value pairs ARGS={PAR1, VAL1, ... PARN, 5 | % VALN}. The function produces an error if an unknown parameter name 6 | % is passed on. Values that are structures are copied recursively. 7 | % 8 | % Any of the PAR, VAL pairs can be replaced by a structure; in this 9 | % case, the fields of the structure are used as paramaters and the 10 | % field values as values. 11 | % 12 | % [OPTS, ARGS] = VL_ARGPARSE(OPTS, ARGS) copies any parameter in 13 | % ARGS that does not match OPTS back to ARGS instead of producing an 14 | % error. Options specified as structures are expaned back to PAR, 15 | % VAL pairs. 16 | % 17 | % Example:: 18 | % The function can be used to parse a list of arguments 19 | % passed to a MATLAB functions: 20 | % 21 | % function myFunction(x,y,z,varargin) 22 | % opts.parameterName = defaultValue ; 23 | % opts = vl_argparse(opts, varargin) 24 | % 25 | % If only a subset of the options should be parsed, for example 26 | % because the other options are interpreted by a subroutine, then 27 | % use the form 28 | % 29 | % [opts, varargin] = vl_argparse(opts, varargin) 30 | % 31 | % that copies back to VARARGIN any unknown parameter. 32 | % 33 | % See also: VL_HELP(). 34 | 35 | % Authors: Andrea Vedaldi 36 | 37 | % Copyright (C) 2015 Andrea Vedaldi. 38 | % Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson. 39 | % All rights reserved. 40 | % 41 | % This file is part of the VLFeat library and is made available under 42 | % the terms of the BSD license (see the COPYING file). 43 | 44 | if ~isstruct(opts), error('OPTS must be a structure') ; end 45 | if ~iscell(args), args = {args} ; end 46 | 47 | % convert ARGS into a structure 48 | ai = 1 ; 49 | params = {} ; 50 | values = {} ; 51 | while ai <= length(args) 52 | if isstr(args{ai}) 53 | params{end+1} = args{ai} ; ai = ai + 1 ; 54 | values{end+1} = args{ai} ; ai = ai + 1 ; 55 | elseif isstruct(args{ai}) ; 56 | params = horzcat(params, fieldnames(args{ai})') ; 57 | values = horzcat(values, struct2cell(args{ai})') ; 58 | ai = ai + 1 ; 59 | else 60 | error('Expected either a param-value pair or a structure') ; 61 | end 62 | end 63 | args = {} ; 64 | 65 | % copy parameters in the opts structure, recursively 66 | for i = 1:numel(params) 67 | if isfield(opts, params{i}) 68 | if isstruct(values{i}) 69 | if ~isstruct(opts.(params{i})) 70 | error('The value of parameter %d is a structure in the arguments but not a structure in OPT.',params{i}) ; 71 | end 72 | if nargout > 1 73 | [opts.(params{i}), rest] = vl_argparse(opts.(params{i}), values{i}) ; 74 | args = horzcat(args, {params{i}, cell2struct(rest(2:2:end), rest(1:2:end), 2)}) ; 75 | else 76 | opts.(params{i}) = vl_argparse(opts.(params{i}), values{i}) ; 77 | end 78 | else 79 | opts.(params{i}) = values{i} ; 80 | end 81 | else 82 | if nargout <= 1 83 | error('Uknown parameter ''%s''', params{i}) ; 84 | else 85 | args = horzcat(args, {params{i}, values{i}}) ; 86 | end 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /pairwiseModel/run_training_pairwiseModel.m: -------------------------------------------------------------------------------- 1 | %run_training_pairwiseModel is the launching script for the training of the pairwise model 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | pretrainedNetworkPath = 'data/matconvnet'; 5 | dataPath = 'data/HollywoodHeads'; 6 | resultPath = 'results/HollywoodHeads'; 7 | 8 | %% add all the required paths 9 | filePath = fileparts( mfilename('fullpath') ); 10 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 11 | 12 | % network initialization 13 | initNetworkFile = fullfile( 'models', 'local.mat' ); 14 | initNetworkName = 'oquab'; 15 | 16 | % scores to preselect candidates 17 | candidateSelectionScoresFile = fullfile( resultPath, 'local', 'localModel-scores-trainValTest.mat' ); 18 | 19 | % pairwise clusters 20 | if exist( fullfile(resultPath, 'pairwise', 'imdb_pairwise_precomputedClusters.mat'), 'file' ) 21 | % use the precomputed clusters 22 | clusters = load( fullfile(resultPath, 'pairwise', 'imdb_pairwise_precomputedClusters.mat'), 'clusterInfo', 'clusterFunction' ); 23 | else 24 | % compute the clustering on the fly 25 | clusters = struct; 26 | clusters.clusterInfo = struct('type', [], 'mean', [], 'std', [], 'numClusters', [], 'clusterCenters', [] ); 27 | clusters.clusterFunction = []; 28 | end 29 | 30 | %% preparation 31 | % parameters 32 | opts_cnn = struct; 33 | opts_cnn.dataPath = dataPath; 34 | opts_cnn.dataset.trainingSetFile = fullfile('Splits', 'train.txt'); 35 | opts_cnn.dataset.validationSetFile = fullfile('Splits', 'val.txt'); 36 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 37 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 38 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 39 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 40 | opts_cnn.dataset.scoreFile = candidateSelectionScoresFile; 41 | opts_cnn.dataset.maxNumPatchesPerImage = 16; 42 | opts_cnn.dataset.nmsIntersectionOverAreaThreshold = 0.3; 43 | opts_cnn.dataset.numPairwiseClusters = 20; 44 | opts_cnn.dataset.clusterInfo = clusters.clusterInfo; 45 | opts_cnn.dataset.clusterFunction = clusters.clusterFunction; 46 | 47 | opts_cnn.loss = 'logisticScoresCompact'; 48 | opts_cnn.expDir = resultPath; 49 | opts_cnn.scoreMode = 'maxMarginals'; 50 | 51 | opts_cnn.train.batchSize = 4; 52 | opts_cnn.train.numEpochs = 8; 53 | opts_cnn.train.learningRate = [ 0.00001*ones(1, 4), 0.000001*ones(1, 4) ]; 54 | opts_cnn.train.weightDecay = 0.0005 / 100; 55 | opts_cnn.train.numValidationPerEpoch = 1; 56 | opts_cnn.train.backPropagateType = 'unaryAndPairwise'; % 'all', 'unaryAndPairwise', 'onlyUnary', 'onlyPairwise' 57 | opts_cnn.train.disableDropoutFeatureExtractor = false; 58 | 59 | opts_cnn.getBatch.cropMode = 'warp'; 60 | opts_cnn.getBatch.jitterStd = 1; 61 | opts_cnn.getBatch.iouPositiveNegativeThreshold = 0.5; 62 | opts_cnn.getBatch.randomizeCandidates = false; 63 | opts_cnn.getBatch.nmsIntersectionOverAreaThreshold = 0.3; 64 | opts_cnn.getBatch.cropPad = [18, 18, 18, 18]; 65 | 66 | opts_cnn.evaluation.iouThreshold = 0.5; 67 | opts_cnn.evaluation.useDifficultImages = false; 68 | 69 | opts_cnn.networkInitialization = @() cnn_initNet_pairwiseModel( initNetworkFile, opts_cnn.loss, opts_cnn.dataset.numPairwiseClusters ); 70 | 71 | opts_cnn.train.expDir = fullfile( opts_cnn.expDir, 'pairwise', 'models'); 72 | 73 | %% run training 74 | cnn_pairwiseModel( opts_cnn ); 75 | -------------------------------------------------------------------------------- /utils/hex2rgb.m: -------------------------------------------------------------------------------- 1 | function [ rgb ] = hex2rgb(hex,range) 2 | % hex2rgb converts hex color values to rgb arrays on the range 0 to 1. 3 | % 4 | % 5 | % * * * * * * * * * * * * * * * * * * * * 6 | % SYNTAX: 7 | % rgb = hex2rgb(hex) returns rgb color values in an n x 3 array. Values are 8 | % scaled from 0 to 1 by default. 9 | % 10 | % rgb = hex2rgb(hex,256) returns RGB values scaled from 0 to 255. 11 | % 12 | % 13 | % * * * * * * * * * * * * * * * * * * * * 14 | % EXAMPLES: 15 | % 16 | % myrgbvalue = hex2rgb('#334D66') 17 | % = 0.2000 0.3020 0.4000 18 | % 19 | % 20 | % myrgbvalue = hex2rgb('334D66') % <-the # sign is optional 21 | % = 0.2000 0.3020 0.4000 22 | % 23 | % 24 | % myRGBvalue = hex2rgb('#334D66',256) 25 | % = 51 77 102 26 | % 27 | % 28 | % myhexvalues = ['#334D66';'#8099B3';'#CC9933';'#3333E6']; 29 | % myrgbvalues = hex2rgb(myhexvalues) 30 | % = 0.2000 0.3020 0.4000 31 | % 0.5020 0.6000 0.7020 32 | % 0.8000 0.6000 0.2000 33 | % 0.2000 0.2000 0.9020 34 | % 35 | % 36 | % myhexvalues = ['#334D66';'#8099B3';'#CC9933';'#3333E6']; 37 | % myRGBvalues = hex2rgb(myhexvalues,256) 38 | % = 51 77 102 39 | % 128 153 179 40 | % 204 153 51 41 | % 51 51 230 42 | % 43 | % HexValsAsACharacterArray = {'#334D66';'#8099B3';'#CC9933';'#3333E6'}; 44 | % rgbvals = hex2rgb(HexValsAsACharacterArray) 45 | % 46 | % * * * * * * * * * * * * * * * * * * * * 47 | % Chad A. Greene, April 2014 48 | % 49 | % Updated August 2014: Functionality remains exactly the same, but it's a 50 | % little more efficient and more robust. Thanks to Stephen Cobeldick for 51 | % the improvement tips. In this update, the documentation now shows that 52 | % the range may be set to 256. This is more intuitive than the previous 53 | % style, which scaled values from 0 to 255 with range set to 255. Now you 54 | % can enter 256 or 255 for the range, and the answer will be the same--rgb 55 | % values scaled from 0 to 255. Function now also accepts character arrays 56 | % as input. 57 | % 58 | % * * * * * * * * * * * * * * * * * * * * 59 | % See also rgb2hex, dec2hex, hex2num, and ColorSpec. 60 | % 61 | 62 | %% Input checks: 63 | 64 | assert(nargin>0&nargin<3,'hex2rgb function must have one or two inputs.') 65 | 66 | if nargin==2 67 | assert(isscalar(range)==1,'Range must be a scalar, either "1" to scale from 0 to 1 or "256" to scale from 0 to 255.') 68 | end 69 | 70 | %% Tweak inputs if necessary: 71 | 72 | if iscell(hex) 73 | assert(isvector(hex)==1,'Unexpected dimensions of input hex values.') 74 | 75 | % In case cell array elements are separated by a comma instead of a 76 | % semicolon, reshape hex: 77 | if isrow(hex) 78 | hex = hex'; 79 | end 80 | 81 | % If input is cell, convert to matrix: 82 | hex = cell2mat(hex); 83 | end 84 | 85 | if strcmpi(hex(1,1),'#') 86 | hex(:,1) = []; 87 | end 88 | 89 | if nargin == 1 90 | range = 1; 91 | end 92 | 93 | %% Convert from hex to rgb: 94 | 95 | switch range 96 | case 1 97 | rgb = reshape(sscanf(hex.','%2x'),3,[]).'/255; 98 | 99 | case {255,256} 100 | rgb = reshape(sscanf(hex.','%2x'),3,[]).'; 101 | 102 | otherwise 103 | error('Range must be either "1" to scale from 0 to 1 or "256" to scale from 0 to 255.') 104 | end 105 | 106 | end 107 | -------------------------------------------------------------------------------- /utils/combine_global.m: -------------------------------------------------------------------------------- 1 | function BB = combine_global(BB, global_det_path, img_w, img_h, varargin) 2 | 3 | opts = struct; % options of global model 4 | opts.alpha = 0; 5 | opts.scale_range = [1 2 4 8]; 6 | opts.stride_proportion = 2; 7 | opts.hm_size = {}; 8 | for scl=opts.scale_range 9 | tile_size = 224/scl; 10 | stride = tile_size/opts.stride_proportion; 11 | opts.hm_size{scl} = floor((224-stride)/stride); 12 | end 13 | opts.path_format = ''; 14 | opts.platform = 'matconvnet'; 15 | 16 | opts = vl_argparse(opts, varargin); 17 | 18 | %global term 19 | if (opts.alpha > 0) 20 | output = load(global_det_path, '-mat'); 21 | switch opts.platform 22 | case 'matconvnet' 23 | output = output.score(1,:)'; 24 | case 'torch' 25 | output = output.x(2,:)'; 26 | end 27 | 28 | cnt = 0; 29 | for scl=opts.scale_range 30 | hm{scl} = reshape(output(cnt+1:cnt+opts.hm_size{scl}^2), ... 31 | opts.hm_size{scl}, opts.hm_size{scl}); 32 | cnt = cnt+opts.hm_size{scl}^2; 33 | end 34 | 35 | % update score 36 | %combine with global term 37 | scale_factor = 224/img_w; 38 | pad_size = floor((img_w-img_h)/2); 39 | 40 | BB_pad = BB; 41 | BB_pad(:, 2) = BB_pad(:, 2) + pad_size; 42 | BB_pad = BB_pad*scale_factor; 43 | BB_pad = convertBb_X1Y1WH_to_X1Y1X2Y2(BB_pad); 44 | 45 | for BB_pad_cnt= 1:size(BB_pad, 1) 46 | max_ov_allscale = -inf; 47 | max_c_allscale = 0; 48 | max_d_allscale = 0; 49 | max_scl = 0; 50 | 51 | for scl=opts.scale_range 52 | tile_size = 224/scl; 53 | stride = tile_size/opts.stride_proportion; 54 | %determine which cell the top-left belonging to 55 | cell_c_tl = floor((BB_pad(BB_pad_cnt, 1)-1)/stride); 56 | cell_d_tl = floor((BB_pad(BB_pad_cnt, 2)-1)/stride); 57 | %determine which cell the bot_right belonging to 58 | cell_c_br = floor((BB_pad(BB_pad_cnt, 3)-1)/stride); 59 | cell_d_br = floor((BB_pad(BB_pad_cnt, 4)-1)/stride); 60 | 61 | if (cell_c_tl <= 0) 62 | cell_c_tl = 1; 63 | end 64 | if (cell_d_tl <= 0) 65 | cell_d_tl = 1; 66 | end 67 | if (cell_c_br <= 0) 68 | cell_c_br = 1; 69 | end 70 | if (cell_d_br <= 0) 71 | cell_d_br = 1; 72 | end 73 | 74 | max_ov = -inf; 75 | max_c = 0; 76 | max_d = 0; 77 | for c = cell_c_tl:cell_c_br 78 | for d = cell_d_tl:cell_d_br 79 | ov = bbIntersectionOverUnion([(c-1)*stride+1 (d-1)*stride+1 tile_size tile_size], convertBb_X1Y1X2Y2_to_X1Y1WH(BB_pad(BB_pad_cnt, 1:4))); 80 | 81 | if (ov > max_ov) 82 | max_ov = ov; 83 | max_c = c; 84 | max_d = d; 85 | 86 | end 87 | end 88 | end 89 | 90 | if (max_ov > max_ov_allscale) 91 | max_ov_allscale = max_ov; 92 | max_scl = scl; 93 | max_c_allscale = max_c; 94 | max_d_allscale = max_d; 95 | end 96 | end 97 | BB(BB_pad_cnt,5) = BB(BB_pad_cnt,5)*(1-opts.alpha) + opts.alpha*hm{max_scl}(max_d_allscale, max_c_allscale); 98 | end 99 | end -------------------------------------------------------------------------------- /globalModel/cnn_globalModel.m: -------------------------------------------------------------------------------- 1 | function cnn_globalModel(varargin ) 2 | %cnn_globalModel runs the fulls instance of the CNN training procedure for the global model 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% parse parameters 9 | opts = struct; 10 | opts.networkInitialization = []; % handle to the function initializing the network 11 | opts.expDir = ''; 12 | opts.randSeed = 1; 13 | opts.dataPath = ''; 14 | opts.imdbPath = ''; % path to the file with the dataset information created by cnn_prepareData_localModel.m; default: fullfile(opts.expDir, 'imdb.mat') 15 | 16 | % dataset info 17 | opts.dataset = struct; 18 | opts.dataset.trainingSetFile = ''; 19 | opts.dataset.validationSetFile = ''; 20 | opts.dataset.testSetFile = ''; 21 | opts.dataset.groundTruthLocalPrefix = ''; 22 | opts.dataset.imageLocalPrefix = ''; 23 | opts.dataset.candidateLocalPrefix = ''; 24 | 25 | % CNN training 26 | opts.train.batchSize = 32; % number of images to form a batch 27 | opts.train.numEpochs = 3; 28 | opts.train.learningRate = [0.0001 0.00001 0.000001]; 29 | opts.train.weightDecay = 0.0005; 30 | opts.train.backPropDepth = +inf; 31 | opts.train.expDir = ''; 32 | opts.train.numValidationPerEpoch = 2; 33 | opts.train.restartEpoch = nan; 34 | opts.train.conserveMemory = true; 35 | opts.train.continue = false; 36 | 37 | % training batch generation 38 | %opts.getBatch.randSeed = 1; 39 | opts.getBatch.jitterStd = 0; 40 | opts.getBatch.grid_size = [1 2 4 8]; 41 | 42 | % parse input 43 | opts = vl_argparse(opts, varargin); 44 | 45 | if isempty( opts.imdbPath ) 46 | opts.imdbPath = fullfile(opts.expDir, 'imdb.mat'); 47 | end 48 | 49 | %% random seed 50 | % the CPU random seed 51 | cpu_rs = RandStream('mt19937ar','Seed',opts.randSeed); 52 | RandStream.setGlobalStream(cpu_rs); 53 | % the GPU random seed 54 | gpu_rs = parallel.gpu.RandStream('CombRecursive','Seed',opts.randSeed); 55 | parallel.gpu.RandStream.setGlobalStream(gpu_rs); 56 | 57 | %% Prepare data 58 | generateDataFlag = true; 59 | if exist(opts.imdbPath, 'file') 60 | fprintf('Reading imdb file %s\n', opts.imdbPath); 61 | imdb = load(opts.imdbPath); 62 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts.dataset ) 63 | generateDataFlag = false; 64 | else 65 | generateDataFlag = true; 66 | warning('imdb file is not consistent with the parameters, need to generate it again'); 67 | end 68 | end 69 | if generateDataFlag 70 | fprintf('Generating imdb file %s\n', opts.imdbPath); 71 | imdb = cnn_prepareData_localModel( opts.dataset, 'dataPath', opts.dataPath ); 72 | imdb.opts = opts.dataset; 73 | mkdir(opts.expDir); 74 | save(opts.imdbPath, '-struct', 'imdb', '-v7.3') ; 75 | end 76 | 77 | %% random seed 78 | % fix random seed here as well to exclude the effect of changing the seed while generating data 79 | % the CPU random seed 80 | cpu_rs = RandStream('mt19937ar','Seed',opts.randSeed); 81 | RandStream.setGlobalStream(cpu_rs); 82 | % the GPU random seed 83 | gpu_rs = parallel.gpu.RandStream('CombRecursive','Seed',opts.randSeed); 84 | parallel.gpu.RandStream.setGlobalStream(gpu_rs); 85 | 86 | 87 | %% Initialize network 88 | if ~isempty(opts.networkInitialization) 89 | net = opts.networkInitialization(); 90 | else 91 | error('cnn_globalModel:noInitNetwork', 'Initialization not provided'); 92 | end 93 | 94 | opts.getBatch.meanImage = net.normalization.averageImage; 95 | %opts.getBatch.randStream = RandStream('mt19937ar','Seed',opts.getBatch.randSeed); 96 | opts.getBatch.dataPath = opts.dataPath; 97 | 98 | %% batch generator 99 | batchWrapper = @(imdb, batch) cnn_getBatch_globalModel(imdb, batch, opts.getBatch) ; 100 | 101 | %% training 102 | cnn_train_globalModel(net, imdb, batchWrapper, ... 103 | opts.train, ... 104 | 'train', find(imdb.images.set == 1), ... 105 | 'val', find(imdb.images.set == 2) ); 106 | end 107 | -------------------------------------------------------------------------------- /pairwiseModel/vl_simplenn_pairwiseModel_forwardPass.m: -------------------------------------------------------------------------------- 1 | function res = vl_simplenn_pairwiseModel_forwardPass(net, x, trainableLayers, res, varargin) 2 | %vl_simplenn_pairwiseModel_forwardPass performs the forward pass using the prodided CNN 3 | 4 | opts = struct; 5 | opts.res = [] ; 6 | opts.conserveMemory = false ; 7 | opts.sync = false ; 8 | opts.disableDropout = false ; 9 | opts.freezeDropout = false ; 10 | opts.saveDataForBackwardPass = true; 11 | opts = vl_argparse(opts, varargin); 12 | 13 | n = numel(net.layers) ; 14 | 15 | doder = opts.saveDataForBackwardPass; 16 | 17 | gpuMode = isa(x, 'gpuArray') ; 18 | 19 | if nargin <= 3 || isempty(res) 20 | res = struct(... 21 | 'x', cell(1,n+1), ... 22 | 'dzdx', cell(1,n+1), ... 23 | 'dzdw', cell(1,n+1), ... 24 | 'aux', cell(1,n+1), ... 25 | 'time', num2cell(zeros(1,n+1)), ... 26 | 'backwardTime', num2cell(zeros(1,n+1))) ; 27 | end 28 | res(1).x = x ; 29 | 30 | for i=1:n 31 | l = net.layers{i} ; 32 | res(i).time = tic ; 33 | switch l.type 34 | case 'conv' 35 | res(i+1).x = vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, 'pad', l.pad, 'stride', l.stride) ; 36 | case 'convPtr' 37 | id = l.index; 38 | res(i+1).x = vl_nnconv(res(i).x, trainableLayers{id}.weights{1}, trainableLayers{id}.weights{2}, ... 39 | 'pad', trainableLayers{id}.pad, 'stride', trainableLayers{id}.stride) ; 40 | 41 | case 'pool' 42 | res(i+1).x = vl_nnpool(res(i).x, l.pool, 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; 43 | case 'normalize' 44 | res(i+1).x = vl_nnnormalize(res(i).x, l.param) ; 45 | case 'softmax' 46 | res(i+1).x = vl_nnsoftmax(res(i).x) ; 47 | case 'loss' 48 | res(i+1).x = vl_nnloss(res(i).x, l.class) ; 49 | case 'softmaxloss' 50 | res(i+1).x = vl_nnsoftmaxloss(res(i).x, l.class) ; 51 | case 'svmloss_multiclass' 52 | res(i+1).x = vl_nnsvmloss(res(i).x, l.class) ; 53 | case 'relu' 54 | res(i+1).x = vl_nnrelu(res(i).x) ; 55 | case 'sigmoid' 56 | res(i+1).x = vl_nnsigmoid(res(i).x) ; 57 | case 'noffset' 58 | res(i+1).x = vl_nnnoffset(res(i).x, l.param) ; 59 | case 'spnorm' 60 | res(i+1).x = vl_nnspnorm(res(i).x, l.param) ; 61 | case 'dropout' 62 | if opts.disableDropout 63 | res(i+1).x = res(i).x ; 64 | elseif opts.freezeDropout 65 | [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate, 'mask', res(i+1).aux) ; 66 | else 67 | [res(i+1).x, res(i+1).aux] = vl_nndropout(res(i).x, 'rate', l.rate) ; 68 | end 69 | case 'bnorm' 70 | if isfield(l, 'weights') 71 | res(i+1).x = vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}) ; 72 | else 73 | res(i+1).x = vl_nnbnorm(res(i).x, l.filters, l.biases) ; 74 | end 75 | case 'pdist' 76 | res(i+1) = vl_nnpdist(res(i).x, l.p, 'noRoot', l.noRoot, 'epsilon', l.epsilon) ; 77 | case 'custom' 78 | res(i+1) = l.forward(l, res(i), res(i+1)) ; 79 | otherwise 80 | error('Unknown layer type %s', l.type) ; 81 | end 82 | % optionally forget intermediate results 83 | forget = opts.conserveMemory ; 84 | forget = forget & (~doder || strcmp(l.type, 'relu')) ; 85 | forget = forget & ~(strcmp(l.type, 'loss') || strcmp(l.type, 'softmaxloss')) ; 86 | forget = forget & (~isfield(l, 'rememberOutput') || ~l.rememberOutput) ; 87 | if forget 88 | res(i).x = [] ; 89 | end 90 | if gpuMode & opts.sync 91 | % This should make things slower, but on MATLAB 2014a it is necessary 92 | % for any decent performance. 93 | wait(gpuDevice) ; 94 | end 95 | res(i).time = toc(res(i).time) ; 96 | end 97 | 98 | end 99 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/QPBO-v1.32.src/QPBO_postprocessing.cpp: -------------------------------------------------------------------------------- 1 | /* QPBO_postprocessing.cpp */ 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | #include "QPBO.h" 8 | 9 | 10 | template 11 | void QPBO::ComputeWeakPersistencies() 12 | { 13 | if (stage == 0) return; 14 | 15 | Node* i; 16 | Node* j; 17 | Node* stack = NULL; 18 | int component; 19 | 20 | for (i=nodes[0]; ilabel>=-1 && i->label<=1); 23 | 24 | Node* i1 = GetMate0(i); 25 | 26 | if (i->label >= 0) 27 | { 28 | i->dfs_parent = i; 29 | i1->dfs_parent = i1; 30 | i->region = i1->region = 0; 31 | } 32 | else 33 | { 34 | i->dfs_parent = i1->dfs_parent = NULL; 35 | i->region = i1->region = -1; 36 | } 37 | } 38 | 39 | // first DFS 40 | for (i=nodes[0]; idfs_parent) continue; 44 | 45 | // DFS starting from i 46 | i->dfs_parent = i; 47 | i->dfs_current = i->first; 48 | while ( 1 ) 49 | { 50 | if (!i->dfs_current) 51 | { 52 | i->next = stack; 53 | stack = i; 54 | 55 | if (i->dfs_parent == i) break; 56 | i = i->dfs_parent; 57 | i->dfs_current = i->dfs_current->next; 58 | continue; 59 | } 60 | 61 | j = i->dfs_current->head; 62 | if (!(i->dfs_current->r_cap>0) || j->dfs_parent) 63 | { 64 | i->dfs_current = i->dfs_current->next; 65 | continue; 66 | } 67 | 68 | j->dfs_parent = i; 69 | i = j; 70 | i->dfs_current = i->first; 71 | } 72 | } 73 | 74 | // second DFS 75 | component = 0; 76 | while ( stack ) 77 | { 78 | i = stack; 79 | stack = i->next; 80 | if (i->region > 0) continue; 81 | 82 | i->region = ++ component; 83 | i->dfs_parent = i; 84 | i->dfs_current = i->first; 85 | while ( 1 ) 86 | { 87 | if (!i->dfs_current) 88 | { 89 | if (i->dfs_parent == i) break; 90 | i = i->dfs_parent; 91 | i->dfs_current = i->dfs_current->next; 92 | continue; 93 | } 94 | 95 | j = i->dfs_current->head; 96 | if (!(i->dfs_current->sister->r_cap>0) || j->region>=0) 97 | { 98 | i->dfs_current = i->dfs_current->next; 99 | continue; 100 | } 101 | 102 | j->dfs_parent = i; 103 | i = j; 104 | i->dfs_current = i->first; 105 | i->region = component; 106 | } 107 | } 108 | 109 | // assigning labels 110 | for (i=nodes[0]; ilabel < 0) 113 | { 114 | code_assert(i->region > 0); 115 | if (i->region > GetMate0(i)->region) { i->label = 0; i->region = 0; } 116 | else if (i->region < GetMate0(i)->region) { i->label = 1; i->region = 0; } 117 | } 118 | else code_assert(i->region == 0); 119 | } 120 | } 121 | 122 | template 123 | void QPBO::Stitch() 124 | { 125 | if (stage == 0) return; 126 | 127 | Node* i; 128 | Node* i_mate; 129 | Node* j; 130 | Arc* a; 131 | Arc* a_mate; 132 | 133 | for (a=arcs[0], a_mate=arcs[1]; asister) 135 | { 136 | a->r_cap = a_mate->r_cap = a->r_cap + a_mate->r_cap; 137 | 138 | i = a->sister->head; 139 | j = a->head; 140 | 141 | if (i->region==0 || i->region != j->region) continue; 142 | if (IsNode0(i)) 143 | { 144 | if (i->user_label != 0) continue; 145 | } 146 | else 147 | { 148 | if (GetMate1(i)->user_label != 1) continue; 149 | } 150 | if (IsNode0(j)) 151 | { 152 | if (j->user_label != 1) continue; 153 | } 154 | else 155 | { 156 | if (GetMate1(j)->user_label != 0) continue; 157 | } 158 | 159 | a->r_cap = a_mate->r_cap = 0; 160 | } 161 | 162 | for (i=nodes[0], i_mate=nodes[1]; itr_cap = i->tr_cap - i_mate->tr_cap; 165 | i_mate->tr_cap = -i->tr_cap; 166 | } 167 | 168 | ComputeWeakPersistencies(); 169 | } 170 | 171 | #include "instances.inc" 172 | -------------------------------------------------------------------------------- /utils/HollywoodHeads/VOCevaldet_HH.m: -------------------------------------------------------------------------------- 1 | function [rec,prec,ap] = VOCevaldet_HH(VOCopts,id,cls,draw) 2 | 3 | % load test set 4 | tic; 5 | cp=sprintf(VOCopts.annocachepath,VOCopts.testset); 6 | if exist(cp,'file') 7 | fprintf('%s: pr: loading ground truth\n',cls); 8 | load(cp,'gtids','recs'); 9 | else 10 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); 11 | for i=1:length(gtids) 12 | % display progress 13 | if toc>1 14 | fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); 15 | drawnow; 16 | tic; 17 | end 18 | 19 | % read annotation 20 | rec_t = VOCreadrecxml(sprintf(VOCopts.annopath,gtids{i})); 21 | if ~isfield(rec_t, 'objects') 22 | rec_t.objects(1).class = 'dummy'; 23 | rec_t.objects(1).bbox = [0 0 0 0]; 24 | rec_t.objects(1).difficult = 1; 25 | end 26 | recs(i) = rec_t; 27 | end 28 | save(cp,'gtids','recs'); 29 | end 30 | 31 | fprintf('%s: pr: evaluating detections\n',cls); 32 | 33 | % hash image ids 34 | hash=VOChash_init_HH(gtids); 35 | 36 | % extract ground truth objects 37 | 38 | npos=0; 39 | gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); 40 | for i=1:length(gtids) 41 | % extract objects of class 42 | clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); 43 | gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; 44 | gt(i).diff=[recs(i).objects(clsinds).difficult]; 45 | gt(i).det=false(length(clsinds),1); 46 | npos=npos+sum(~gt(i).diff); 47 | end 48 | 49 | % load results 50 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); 51 | BB=[b1 b2 b3 b4]'; 52 | 53 | % sort detections by decreasing confidence 54 | [sc,si]=sort(-confidence); 55 | ids=ids(si); 56 | BB=BB(:,si); 57 | 58 | % assign detections to ground truth objects 59 | nd=length(confidence); 60 | tp=zeros(nd,1); 61 | fp=zeros(nd,1); 62 | tic; 63 | for d=1:nd 64 | % display progress 65 | if toc>1 66 | fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); 67 | drawnow; 68 | tic; 69 | end 70 | 71 | % find ground truth image 72 | i=VOChash_lookup_HH(hash,ids{d}); 73 | if isempty(i) 74 | error('unrecognized image "%s"',ids{d}); 75 | elseif length(i)>1 76 | error('multiple image "%s"',ids{d}); 77 | end 78 | 79 | % assign detection to ground truth object if any 80 | bb=BB(:,d); 81 | ovmax=-inf; 82 | for j=1:size(gt(i).BB,2) 83 | bbgt=gt(i).BB(:,j); 84 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 85 | iw=bi(3)-bi(1)+1; 86 | ih=bi(4)-bi(2)+1; 87 | if iw>0 & ih>0 88 | % compute overlap as area of intersection / area of union 89 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 90 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 91 | iw*ih; 92 | ov=iw*ih/ua; 93 | if ov>ovmax 94 | ovmax=ov; 95 | jmax=j; 96 | end 97 | end 98 | end 99 | % assign detection as true positive/don't care/false positive 100 | if ovmax>=VOCopts.minoverlap 101 | if ~gt(i).diff(jmax) 102 | if ~gt(i).det(jmax) 103 | tp(d)=1; % true positive 104 | gt(i).det(jmax)=true; 105 | else 106 | fp(d)=1; % false positive (multiple detection) 107 | end 108 | end 109 | else 110 | fp(d)=1; % false positive 111 | end 112 | end 113 | 114 | % compute precision/recall 115 | fp=cumsum(fp); 116 | tp=cumsum(tp); 117 | rec=tp/npos; 118 | prec=tp./(fp+tp); 119 | 120 | ap=VOCap(rec,prec); 121 | 122 | if draw 123 | % plot precision/recall 124 | plot(rec,prec,'-'); 125 | grid; 126 | xlabel 'recall' 127 | ylabel 'precision' 128 | title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); 129 | end 130 | -------------------------------------------------------------------------------- /utils/Casablanca/VOCevaldet_Casablanca.m: -------------------------------------------------------------------------------- 1 | function [rec,prec,ap] = VOCevaldet_Casablanca(VOCopts,id,cls,draw) 2 | 3 | % load test set 4 | tic; 5 | cp=sprintf(VOCopts.annocachepath,VOCopts.testset); 6 | if exist(cp,'file') 7 | fprintf('%s: pr: loading ground truth\n',cls); 8 | load(cp,'gtids','recs'); 9 | else 10 | [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); 11 | for i=1:length(gtids) 12 | % display progress 13 | if toc>1 14 | fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); 15 | drawnow; 16 | tic; 17 | end 18 | 19 | % read annotation 20 | rec_t = VOCreadrecxml(sprintf(VOCopts.annopath,gtids{i})); 21 | if ~isfield(rec_t, 'objects') 22 | rec_t.objects(1).class = 'dummy'; 23 | rec_t.objects(1).bbox = [0 0 0 0]; 24 | rec_t.objects(1).difficult = 1; 25 | end 26 | recs(i) = rec_t; 27 | end 28 | save(cp,'gtids','recs'); 29 | end 30 | 31 | fprintf('%s: pr: evaluating detections\n',cls); 32 | 33 | % hash image ids 34 | hash=VOChash_init_Casablanca(gtids); 35 | 36 | % extract ground truth objects 37 | 38 | npos=0; 39 | gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); 40 | for i=1:length(gtids) 41 | % extract objects of class 42 | clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); 43 | gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; 44 | gt(i).diff=[recs(i).objects(clsinds).difficult]; 45 | gt(i).det=false(length(clsinds),1); 46 | npos=npos+sum(~gt(i).diff); 47 | end 48 | 49 | % load results 50 | [ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); 51 | BB=[b1 b2 b3 b4]'; 52 | 53 | % sort detections by decreasing confidence 54 | [sc,si]=sort(-confidence); 55 | ids=ids(si); 56 | BB=BB(:,si); 57 | 58 | % assign detections to ground truth objects 59 | nd=length(confidence); 60 | tp=zeros(nd,1); 61 | fp=zeros(nd,1); 62 | tic; 63 | for d=1:nd 64 | % display progress 65 | if toc>1 66 | fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); 67 | drawnow; 68 | tic; 69 | end 70 | 71 | % find ground truth image 72 | i=VOChash_lookup_Casablanca(hash,ids{d}); 73 | if isempty(i) 74 | error('unrecognized image "%s"',ids{d}); 75 | elseif length(i)>1 76 | error('multiple image "%s"',ids{d}); 77 | end 78 | 79 | % assign detection to ground truth object if any 80 | bb=BB(:,d); 81 | ovmax=-inf; 82 | for j=1:size(gt(i).BB,2) 83 | bbgt=gt(i).BB(:,j); 84 | bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; 85 | iw=bi(3)-bi(1)+1; 86 | ih=bi(4)-bi(2)+1; 87 | if iw>0 & ih>0 88 | % compute overlap as area of intersection / area of union 89 | ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... 90 | (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... 91 | iw*ih; 92 | ov=iw*ih/ua; 93 | if ov>ovmax 94 | ovmax=ov; 95 | jmax=j; 96 | end 97 | end 98 | end 99 | % assign detection as true positive/don't care/false positive 100 | if ovmax>=VOCopts.minoverlap 101 | if ~gt(i).diff(jmax) 102 | if ~gt(i).det(jmax) 103 | tp(d)=1; % true positive 104 | gt(i).det(jmax)=true; 105 | else 106 | fp(d)=1; % false positive (multiple detection) 107 | end 108 | end 109 | else 110 | fp(d)=1; % false positive 111 | end 112 | end 113 | 114 | % compute precision/recall 115 | fp=cumsum(fp); 116 | tp=cumsum(tp); 117 | rec=tp/npos; 118 | prec=tp./(fp+tp); 119 | 120 | ap=VOCap(rec,prec); 121 | 122 | if draw 123 | % plot precision/recall 124 | plot(rec,prec,'-'); 125 | grid; 126 | xlabel 'recall' 127 | ylabel 'precision' 128 | title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); 129 | end 130 | -------------------------------------------------------------------------------- /globalModel/run_computeScores_globalModel_Casablanca.m: -------------------------------------------------------------------------------- 1 | %run_computeScores_globalModel applies the global model to the whole input images to produce multi-scale confidence heatmaps 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/Casablanca'; 5 | resultPath = 'results/Casablanca'; 6 | 7 | % network to evaluate 8 | netFile = fullfile( 'models', 'global.mat' ); 9 | 10 | % file to store the scores 11 | resultFile = fullfile( resultPath, 'global', 'globalModel-scores-test.mat' ); 12 | 13 | if ~exist(fullfile(resultPath, 'global'), 'dir') 14 | mkdir(fullfile(resultPath, 'global')); 15 | end 16 | 17 | % Casablanca dataset contains only the test set, so scoreSubset has to be equal to 3 18 | scoreSubset = 3; 19 | 20 | %% setup paths 21 | filePath = fileparts( mfilename('fullpath') ); 22 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 23 | 24 | %% parameters 25 | opts_cnn = struct; 26 | 27 | opts_cnn.dataPath = dataPath; 28 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 29 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 30 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 31 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 32 | 33 | opts_cnn.expDir = resultPath; 34 | opts_cnn.batchSize = 32; 35 | opts_cnn.imdbName = 'globalModel'; 36 | opts_cnn.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 37 | 38 | %% load dataset 39 | opts_cnn.imdbPath = fullfile(opts_cnn.expDir, 'imdb.mat'); 40 | generateDataFlag = true; 41 | if exist(opts_cnn.imdbPath, 'file') 42 | fprintf('Reading imdb file %s\n', opts_cnn.imdbPath); 43 | imdb = load(opts_cnn.imdbPath); 44 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts_cnn.dataset ) 45 | fprintf('imdb.opts is compatible with the opt_cnn.dataset\n') 46 | else 47 | warning('opts_cnn.dataset parameters are not compatible with the provided imdb file. Be careful!'); 48 | end 49 | else 50 | warning('imdb file is not found. Making it will take some time.'); 51 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 52 | imdb = cnn_prepareData_localModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 53 | imdb.opts = opts_cnn.dataset; 54 | mkdir(opts_cnn.expDir); 55 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 56 | end 57 | 58 | %% load network 59 | net = load( netFile, '-mat'); 60 | if ~isfield(net, 'layers') && isfield(net, 'net') 61 | net = net.net; 62 | end 63 | if ~isfield(net, 'layers') 64 | error('Could not load the network!'); 65 | end 66 | net = vl_simplenn_move(net, 'gpu'); 67 | 68 | 69 | %% select the test set to run evaluation 70 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 71 | for iSubset = 1 : numel( scoreSubset ) 72 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 73 | end 74 | imageSetToDoPr = find( imageSetToDoPr ); 75 | 76 | %% start the evaluation 77 | opts_cnn.getBatchEvaluation = struct; 78 | opts_cnn.getBatchEvaluation.grid_size = [1 2 4 8]; 79 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 80 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 81 | opts_cnn.getBatchEvaluation.jitterStd = 0; 82 | 83 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_globalModel(imdb, batch, ... 84 | opts_cnn.getBatchEvaluation) ; 85 | 86 | scores = cnn_computeScores_globalModel( net, imdb, batchWrapperEvaluation, ... 87 | 'imageSet', imageSetToDoPr, ... 88 | 'batchSize', opts_cnn.batchSize, ... 89 | 'scoreMode', opts_cnn.scoreMode); 90 | 91 | if ~exist(fileparts(resultFile), 'dir') 92 | mkdir(fileparts(resultFile)); 93 | end 94 | save( resultFile, 'scores', '-v7.3' ); 95 | 96 | %% save detections for files 97 | detSavePath = fullfile( resultPath, 'global', 'dets'); 98 | if ~exist(detSavePath, 'dir') 99 | mkdir(detSavePath) 100 | end 101 | detSaveFormat = fullfile(detSavePath, '%s.mat'); 102 | disp('Saving detection files'); 103 | for i=1:length(imageSetToDoPr) 104 | imgIdx = imageSetToDoPr(i); 105 | score = scores{imgIdx}; 106 | %load candidate 107 | [~, im_name, ~] = fileparts(imdb.imageFiles{imgIdx}); 108 | savePath = sprintf(detSaveFormat, im_name); 109 | save(savePath, 'score'); 110 | end -------------------------------------------------------------------------------- /pairwiseModel/vl_structuredNetwork_pairwiseModel_test_derivative.m: -------------------------------------------------------------------------------- 1 | function vl_structuredNetwork_pairwiseModel_test_derivative 2 | %vl_structuredNetwork_pairwiseModel_test_derivative tests vl_structuredNetwork_pairwiseModel 3 | 4 | % create the following file from the variables of cnn_train_pairwiseModel.m right before the call of vl_structuredNetwork_pairwiseModel.m 5 | load( 'vl_structuredNetwork_pairwiseModel_test_derivative.mat', 'net', 'im', 'gradients', 'labels', 'one' ); 6 | testEps = 1e-3; 7 | 8 | fprintf('Computing the gradient ... '); 9 | tStart = tic; 10 | [lossValue, gradients, predictions] = vl_structuredNetwork_pairwiseModel(net, im, gradients, labels, one, ... 11 | 'conserveMemory', true, ... 12 | 'sync', true, ... 13 | 'disableDropout', true ) ; 14 | fprintf( '%f\n', toc(tStart) ); 15 | 16 | maxGroupTests = 100; 17 | rng(1); 18 | 19 | % test derivatives 20 | for iLayer = length( net.layers ) : -1 : 1 21 | if ~isequal( net.layers{iLayer}.type, 'conv' ) 22 | continue; 23 | end 24 | fprintf('Layer %d: %s\n', iLayer, net.layers{iLayer}.name); 25 | 26 | empiricalDerivative = zeros( numel(gradients{ iLayer }.dzdw{2}), 1, 'like', gradients{ iLayer }.dzdw{2}); 27 | fprintf('Number of bias derivatives: %d\n', numel(empiricalDerivative)); 28 | 29 | randOrder = randperm(numel(empiricalDerivative)); 30 | numTests = min( numel(empiricalDerivative), maxGroupTests); 31 | for iValueIndex = 1 : numTests 32 | if mod(iValueIndex, 1000) == 0 33 | fprintf('Derivative #%d\n', iValueIndex); 34 | end 35 | iValue = randOrder(iValueIndex); 36 | 37 | initValue = net.layers{iLayer}.weights{2}(iValue); 38 | net.layers{iLayer}.weights{2}(iValue) = initValue + testEps; 39 | 40 | [lossValue_test, ~, predictions_test] = vl_structuredNetwork_pairwiseModel(net, im, gradients, labels, [], ... 41 | 'conserveMemory', true, ... 42 | 'sync', true, ... 43 | 'disableDropout', true) ; 44 | 45 | empiricalDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 46 | 47 | net.layers{iLayer}.weights{2}(iValue) = initValue; 48 | end 49 | 50 | testIndices = randOrder(1 : numTests); 51 | emphiricalGradient = empiricalDerivative(testIndices); 52 | computedGradient = gradients{ iLayer }.dzdw{2}(testIndices); 53 | derivativeError = gather( norm(emphiricalGradient(:) - computedGradient(:)) / norm(computedGradient(:)) ); 54 | 55 | fprintf('Relative error of bias derivatives: %f\n', derivativeError ); 56 | fprintf('Norm of tested derivatives: %f\n', norm(computedGradient(:)) ); 57 | 58 | 59 | 60 | empiricalDerivative = zeros( numel(gradients{ iLayer }.dzdw{1}), 1, 'like', gradients{ iLayer }.dzdw{1}); 61 | fprintf('Number of filter derivatives: %d\n', numel(empiricalDerivative)); 62 | 63 | randOrder = randperm(numel(empiricalDerivative)); 64 | numTests = min( numel(empiricalDerivative), maxGroupTests); 65 | for iValueIndex = 1 : numTests 66 | if mod(iValueIndex, 1000) == 0 67 | fprintf('Derivative #%d\n', iValueIndex); 68 | end 69 | 70 | iValue = randOrder(iValueIndex); 71 | 72 | initValue = net.layers{iLayer}.weights{1}(iValue); 73 | net.layers{iLayer}.weights{1}(iValue) = initValue + testEps; 74 | 75 | [lossValue_test, ~, predictions_test] = vl_structuredNetwork_pairwiseModel(net, im, gradients, labels, [], ... 76 | 'conserveMemory', true, ... 77 | 'sync', true, ... 78 | 'disableDropout', true) ; 79 | 80 | empiricalDerivative(iValue) = sum(lossValue_test - lossValue) / testEps; 81 | 82 | net.layers{iLayer}.weights{1}(iValue) = initValue; 83 | end 84 | 85 | testIndices = randOrder(1 : numTests); 86 | emphiricalGradient = empiricalDerivative(testIndices); 87 | computedGradient = gradients{ iLayer }.dzdw{1}(testIndices); 88 | derivativeError = gather( norm(emphiricalGradient(:) - computedGradient(:)) / norm(computedGradient(:)) ); 89 | 90 | fprintf('Relative error of filter derivatives: %f\n', derivativeError ); 91 | fprintf('Norm of tested derivatives: %f\n', norm(computedGradient(:)) ); 92 | 93 | end 94 | 95 | 96 | end 97 | 98 | -------------------------------------------------------------------------------- /utils/load_det.m: -------------------------------------------------------------------------------- 1 | function det = load_det(varargin) 2 | % Load detection results of different local/pairwise models 3 | 4 | %% parse parameters 5 | opts = struct; 6 | % model type 7 | opts.fix_ann.x_off = 0; 8 | opts.fix_ann.y_off = 0; 9 | opts.fix_ann.w = inf; 10 | opts.fix_ann.h = inf; 11 | 12 | opts.regression = struct; % detection regression parameters 13 | opts.regression.param = [0 0 1 1]; 14 | opts.regression.fix_ann = struct; % fix annotation if needed 15 | opts.regression.fix_ann.x_off = 0; 16 | opts.regression.fix_ann.y_off = 0; 17 | opts.regression.fix_ann.w = inf; 18 | opts.regression.fix_ann.h = inf; 19 | opts.regression.warp = 'none'; 20 | 21 | opts.nms = struct; 22 | opts.nms.nmsIntersectionOverAreaThreshold = 0.3; 23 | opts.nms.numBoundingBoxMax = inf; 24 | 25 | opts.regression_before_nms = false; % do regression before nms 26 | 27 | opts.det = struct; 28 | opts.det.modeltype = 'local'; 29 | opts.det.thres = -inf; 30 | opts.det.path_format = ''; 31 | opts.det.scoretype = 'raw'; 32 | opts.det.as_filter = false; % remove *weird* aspect ratio 33 | opts.det.as_ratio = 1.5; 34 | 35 | opts.im_path_format = ''; 36 | opts.im_set = []; 37 | 38 | opts.viz = struct; % visulization 39 | opts.viz.doviz = false; 40 | opts.viz.max_det = 5; 41 | 42 | opts.verbose = true; 43 | opts.progress_part = 5; 44 | 45 | opts = vl_argparse(opts, varargin); 46 | 47 | det = struct; 48 | numimage = length(opts.im_set); 49 | 50 | fprintf('Loading detections'); 51 | progress_part_num = ceil(numimage/opts.progress_part); 52 | 53 | for i = 1:numimage 54 | if (opts.verbose) 55 | if (i==numimage) 56 | fprintf('...100%%\n'); 57 | else 58 | if ~mod(i,progress_part_num) 59 | fprintf('...%d%%', i*100/(progress_part_num*opts.progress_part)); 60 | end 61 | end 62 | end 63 | 64 | if (~iscell(opts.im_set)) 65 | idname = opts.im_set(i); 66 | det_path = sprintf(opts.det.path_format, idname); 67 | im_path = sprintf(opts.im_path_format , idname); 68 | 69 | else 70 | idname = opts.im_set{i}; 71 | det_path = sprintf(opts.det.path_format, opts.im_set{i}); 72 | im_path = sprintf(opts.im_path_format , opts.im_set{i}); 73 | end 74 | det(i).path = det_path; 75 | det(i).impath = im_path; 76 | det(i).id = idname; 77 | 78 | im = imread(im_path); 79 | [img_h, img_w, ~] = size(im); 80 | 81 | BB = load_BB(opts.det.modeltype, det_path); 82 | if (isempty(BB)) 83 | det(i).bb = BB; 84 | continue; 85 | end 86 | 87 | % if (strcmp(detector,'globalmasked')~=0) 88 | % scale_factor = 224/size(im,2); 89 | % pad_size = floor((size(im,2)-size(im,1))/2); 90 | % BB_unpad = BB/scale_factor; 91 | % BB_unpad(:, 2) = BB_unpad(:, 2) - pad_size; 92 | % BB = BB_unpad; 93 | % end 94 | 95 | %thresholding lowscore detections 96 | BB = BB(BB(:,5)>opts.det.thres, :); 97 | if (isempty(BB)) det(i).bb = BB; continue; end 98 | 99 | %remove *weird* aspect ratio if needed 100 | if (opts.det.as_filter) 101 | w = BB(:,3); 102 | h = BB(:,4); 103 | as = w./h; 104 | BB = BB(as < opts.det.as_ratio && as > 1/opts.det.as_ratio, :); 105 | end 106 | if (isempty(BB)) det(i).bb = BB; continue; end 107 | 108 | if (~opts.regression_before_nms) 109 | top = selectBoundingBoxesNonMaxSup(BB(:,1:4), BB(:,5), opts.nms); 110 | BB = BB(top, :); 111 | BB = do_regression(BB, img_w, img_h, opts.regression); 112 | else 113 | BB = do_regression(BB, img_w, img_h, opts.regression); 114 | top = selectBoundingBoxesNonMaxSup(BB(:,1:4), BB(:,5), opts.nms); 115 | BB = BB(top, :); 116 | end 117 | 118 | if (opts.viz.doviz) 119 | fig = figure; 120 | imshow(im); hold on; 121 | for z=1:min(opts.viz.max_det, size(BB,1)) 122 | rectangle('position', BB(z, 1:4), 'edgecolor', 'r'); 123 | text(double(BB(z, 1))+5, double(BB(z, 2))+5, num2str(BB(z, 5)), 'color', 'yellow'); 124 | end 125 | disp('Press any key to continue...'); 126 | pause; 127 | close(fig); 128 | end 129 | 130 | det(i).bb = BB; 131 | end 132 | end -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/minimizeEnergyPairwiseBinary.m: -------------------------------------------------------------------------------- 1 | function [labels, energy, isOptimal] = minimizeEnergyPairwiseBinary( unaryTerms, pairwiseTerms, varargin ) 2 | %minimizeEnergyPairwiseBinary minimizes (approximately) the energy consisting of the unary and the pairwise potentials. 3 | % Several methods are applied: first - QPBO; if the number of unlabelled nodes is small, the exhaustive search, and TRW-S otherwise. 4 | 5 | if ~exist('varargin', 'var') 6 | varargin = {}; 7 | end 8 | 9 | %% parameters 10 | opts = struct; 11 | opts.energyComputationTolerance = 1e-5; 12 | opts.labelingToCheck = []; 13 | opts.maxBruteForceVars = 20; 14 | % parse input 15 | opts = vl_argparse(opts, varargin); 16 | 17 | 18 | %% check input 19 | numLabels = 2; 20 | if ~isnumeric(unaryTerms) || ~ismatrix(unaryTerms) || size(unaryTerms, 2) ~= 2 21 | error('Incorrect format for unaryTerms, has to be numNodes x 2') 22 | end 23 | numNodes = size(unaryTerms, 1); 24 | 25 | if ~isnumeric(pairwiseTerms) || ~ismatrix(pairwiseTerms) || size(pairwiseTerms, 2) ~= 6 26 | error('Incorrect format for pairwiseTerms, has to be numEdges x 6') 27 | end 28 | numEdges = size(pairwiseTerms, 1); 29 | 30 | %% Run QPBO to get partially optimal labelling 31 | [lowerBoundQpbo, labels_qpbo] = qpboMex(double(unaryTerms), double(pairwiseTerms)); 32 | maskUnlabeled = labels_qpbo < 0; 33 | numUnlabeled = sum(maskUnlabeled); 34 | 35 | labels_qpbo = labels_qpbo + 1; 36 | if numUnlabeled == 0 37 | % everything is labeled 38 | labels = labels_qpbo; 39 | energy = lowerBoundQpbo; 40 | isOptimal = true; 41 | return; 42 | end 43 | isOptimal = false; 44 | 45 | %% Reduce energy to only unlabeled nodes 46 | if numUnlabeled < numNodes 47 | [ unaryTermsNew, pairwiseTermsNew, energyConstant ] = projectEnergyBinaryPairwise( unaryTerms, pairwiseTerms, labels_qpbo ); 48 | else 49 | unaryTermsNew = unaryTerms; 50 | pairwiseTermsNew = pairwiseTerms; 51 | energyConstant = 0; 52 | end 53 | 54 | if numUnlabeled < opts.maxBruteForceVars 55 | %% run brute force 56 | [ energy, labelsPartial ] = bruteForceBinaryPairwiseMex(double(unaryTermsNew), double(pairwiseTermsNew)); 57 | else 58 | 59 | %% Run TRW-S 60 | [ unaryTerms_reparam, pairwiseTerms_reparam, reparametrizationConstant ] = reparameterizeEnergy( unaryTermsNew, pairwiseTermsNew ); 61 | 62 | numNodesNew = length( unaryTerms_reparam ); 63 | unaryTrws = [zeros(1, numNodesNew); unaryTerms_reparam']; 64 | 65 | maskChange = pairwiseTerms_reparam(:, 1) > pairwiseTerms_reparam(:, 2); 66 | swapVariable = pairwiseTerms_reparam(maskChange, 1); 67 | pairwiseTerms_reparam(maskChange, 1) = pairwiseTerms_reparam(maskChange, 2); 68 | pairwiseTerms_reparam(maskChange, 2) = swapVariable; 69 | 70 | pairwiseTrws = sparse( pairwiseTerms_reparam(:, 1), pairwiseTerms_reparam(:, 2), pairwiseTerms_reparam(:, 3), numNodesNew, numNodesNew ); 71 | metricTrws = [0 0 ; 0 1]; 72 | 73 | optionsTrws = struct; 74 | optionsTrws.verbosity = 0; 75 | optionsTrws.funcEps = 1e-6; 76 | optionsTrws.maxIter = 1000; 77 | [labelsPartial, energy_trws] = trwsMex_time(double(unaryTrws), double(pairwiseTrws), double(metricTrws), optionsTrws); 78 | energy = energy_trws + reparametrizationConstant; 79 | 80 | % energyCheck = computeEnergyBinaryPairwise( unaryTermsNew, pairwiseTermsNew, labels_trws ); 81 | % if abs( energyCheck - energy ) > 1e-5 82 | % error('TRW-S energy is computed wrong') 83 | % end 84 | end 85 | 86 | 87 | %% check if provided labeling is better 88 | if ~isempty(opts.labelingToCheck) 89 | labelsOfInterest = opts.labelingToCheck(maskUnlabeled); 90 | energyCheck = computeEnergyBinaryPairwise( unaryTermsNew, pairwiseTermsNew, labelsOfInterest ); 91 | 92 | if energyCheck < energy 93 | energy = energyCheck; 94 | labelsPartial = labelsOfInterest; 95 | end 96 | end 97 | 98 | %% produce the result 99 | labels = labels_qpbo; 100 | labels(maskUnlabeled) = labelsPartial; 101 | energy = energy + energyConstant; 102 | 103 | %% check energy computation 104 | energy_check = computeEnergyBinaryPairwise( unaryTerms, pairwiseTerms, labels ); 105 | if abs(energy - energy_check ) > opts.energyComputationTolerance 106 | warning(['Energy is computed incorrectly, error: ', num2str(abs(energy - energy_check ))]); 107 | end 108 | 109 | 110 | end 111 | 112 | -------------------------------------------------------------------------------- /localModel/cnn_localModel.m: -------------------------------------------------------------------------------- 1 | function cnn_localModel( varargin ) 2 | %cnn_localModel runs the fulls instance of the CNN training procedure for the local model 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% parse parameters 9 | opts = struct; 10 | opts.networkInitialization = []; % handle to the function initializing the network 11 | opts.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 12 | opts.expDir = ''; % folder to store the results of network training 13 | opts.randSeed = 1; 14 | opts.maxGpuImagesEvaluation = 256; % maximal number of patches to crop on a GPU at the same time 15 | opts.dataPath = ''; 16 | opts.imdbPath = ''; % path to the file with the dataset information created by cnn_prepareData_localModel.m; default: fullfile(opts.expDir, 'imdb.mat') 17 | 18 | % dataset info 19 | opts.dataset = struct; 20 | opts.dataset.trainingSetFile = ''; 21 | opts.dataset.validationSetFile = ''; 22 | opts.dataset.testSetFile = ''; 23 | opts.dataset.groundTruthLocalPrefix = ''; 24 | opts.dataset.imageLocalPrefix = ''; 25 | opts.dataset.candidateLocalPrefix = ''; 26 | 27 | % CNN training 28 | opts.train.batchSize = 1; % number of images to form a batch 29 | opts.train.numEpochs = 3; 30 | opts.train.learningRate = [ 0.001 0.0001 0.00001 ]; 31 | opts.train.weightDecay = 0.0005; 32 | opts.train.backPropDepth = +inf; 33 | opts.train.expDir = ''; 34 | opts.train.numValidationPerEpoch = 8; 35 | opts.train.restartEpoch = nan; 36 | opts.train.conserveMemory = true; 37 | 38 | % training batch generation 39 | opts.getBatch.numPatchesPerImage = 64; 40 | opts.getBatch.maxPositives = 32; 41 | opts.getBatch.randSeed = 1; 42 | opts.getBatch.iouPositiveThreshold = 0.5; 43 | opts.getBatch.iouNegativeThreshold = 0.6; 44 | opts.getBatch.cropMode = 'warp'; 45 | opts.getBatch.jitterStd = 0; 46 | opts.getBatch.cropPad = [18 18 18 18]; 47 | 48 | % evaluation parameters 49 | opts.evaluation.iouThreshold = 0.5; 50 | opts.evaluation.useDifficultImages = false; 51 | 52 | % parse input 53 | opts = vl_argparse(opts, varargin); 54 | 55 | if isempty( opts.imdbPath ) 56 | opts.imdbPath = fullfile(opts.expDir, 'imdb.mat'); 57 | end 58 | 59 | %% random seed 60 | % the CPU random seed 61 | cpu_rs = RandStream('mt19937ar','Seed',opts.randSeed); 62 | RandStream.setGlobalStream(cpu_rs); 63 | % the GPU random seed 64 | gpu_rs = parallel.gpu.RandStream('CombRecursive','Seed',opts.randSeed); 65 | parallel.gpu.RandStream.setGlobalStream(gpu_rs); 66 | 67 | %% Prepare data 68 | generateDataFlag = true; 69 | if exist(opts.imdbPath, 'file') 70 | fprintf('Reading imdb file %s\n', opts.imdbPath); 71 | imdb = load(opts.imdbPath); 72 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts.dataset ) 73 | generateDataFlag = false; 74 | else 75 | generateDataFlag = true; 76 | warning('imdb file is not consistent with the parameters, need to generate it again'); 77 | end 78 | end 79 | if generateDataFlag 80 | fprintf('Generating imdb file %s\n', opts.imdbPath); 81 | imdb = cnn_prepareData_localModel( opts.dataset, 'dataPath', opts.dataPath ); 82 | imdb.opts = opts.dataset; 83 | mkdir(opts.expDir); 84 | save(opts.imdbPath, '-struct', 'imdb', '-v7.3') ; 85 | end 86 | 87 | %% random seed 88 | % fix the random seeds here as well to exclude the effect of changing the seed while generating data 89 | % the CPU random seed 90 | cpu_rs = RandStream('mt19937ar','Seed',opts.randSeed); 91 | RandStream.setGlobalStream(cpu_rs); 92 | % the GPU random seed 93 | gpu_rs = parallel.gpu.RandStream('CombRecursive','Seed',opts.randSeed); 94 | parallel.gpu.RandStream.setGlobalStream(gpu_rs); 95 | 96 | 97 | %% Initialize network 98 | if ~isempty(opts.networkInitialization) 99 | net = opts.networkInitialization(); 100 | else 101 | error('cnn_localModel:noInitNetwork', 'Initialization not provided'); 102 | end 103 | 104 | opts.getBatch.meanImage = net.normalization.averageImage; 105 | opts.getBatch.randStream = RandStream('mt19937ar','Seed',opts.getBatch.randSeed); 106 | opts.getBatch.dataPath = opts.dataPath; 107 | 108 | 109 | %% batch generator 110 | batchWrapper = @(imdb, batch) cnn_getBatch_localModel(imdb, batch, opts.getBatch ) ; 111 | 112 | %% training 113 | cnn_train_localModel(net, imdb, batchWrapper, ... 114 | opts.train, ... 115 | 'train', find(imdb.images.set == 1), ... 116 | 'val', find(imdb.images.set == 2) ); 117 | 118 | end 119 | -------------------------------------------------------------------------------- /localModel/cnn_prepareData_localModel.m: -------------------------------------------------------------------------------- 1 | function imdb = cnn_prepareData_localModel( varargin ) 2 | %cnn_prepareData_localModel prepares the dataset for training the local model 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% parse parameters 9 | opts = struct; 10 | opts.dataPath = ''; 11 | opts.trainingSetFile = ''; 12 | opts.validationSetFile = ''; 13 | opts.testSetFile = ''; 14 | opts.groundTruthLocalPrefix = ''; 15 | opts.imageLocalPrefix = ''; 16 | opts.candidateLocalPrefix = ''; 17 | 18 | opts.numThreads = 4; 19 | 20 | % parse input 21 | opts = vl_argparse(opts, varargin); 22 | 23 | %% get images and candidates 24 | fileLists = { opts.trainingSetFile; opts.validationSetFile; opts.testSetFile }; 25 | fileNames = cell( numel( fileLists ), 1 ); 26 | numImages = 0; 27 | for iList = 1 : numel( fileLists ) 28 | if ~isempty( fileLists{iList} ) 29 | fileNames{iList} = readLines( fullfile( opts.dataPath, fileLists{iList} ) ); 30 | numImages = numImages + numel( fileNames{iList} ); 31 | end 32 | end 33 | imageFiles = cell( numImages, 1 ); 34 | candidateFiles = cell( numImages, 1 ); 35 | groundTruthFiles = cell( numImages, 1 ); 36 | globalImageCount = 0; 37 | for iList = 1 : numel( fileLists ) 38 | if ~isempty( fileLists{iList} ) 39 | for iImage = 1 : numel( fileNames{iList} ) 40 | globalImageCount = globalImageCount + 1; 41 | imageFiles{globalImageCount} = fullfile( opts.imageLocalPrefix, [fileNames{iList}{iImage}, '.jpeg'] ); 42 | if ~exist(fullfile(opts.dataPath, imageFiles{globalImageCount}), 'file') 43 | % look for suitable files 44 | suitable_files = dir(fullfile( opts.dataPath, opts.imageLocalPrefix, [fileNames{iList}{iImage}, '.*'])); 45 | if length(suitable_files) ~= 1 46 | error(['Suitable image file for ', fileNames{iList}{iImage}, ' was not identified']) 47 | end 48 | imageFiles{globalImageCount} = fullfile( opts.imageLocalPrefix, suitable_files(1).name ); 49 | end 50 | candidateFiles{globalImageCount} = fullfile( opts.candidateLocalPrefix, [fileNames{iList}{iImage}, '.mat'] ); 51 | groundTruthFiles{globalImageCount} = fullfile( opts.groundTruthLocalPrefix, [fileNames{iList}{iImage}, '.xml'] ); 52 | end 53 | end 54 | end 55 | 56 | %% setup the imdb structure 57 | imdb = struct; 58 | imdb.imageFiles = imageFiles; 59 | imdb.candidateFiles = candidateFiles; 60 | imdb.groundTruthFiles = groundTruthFiles; 61 | imdb.images.set = zeros(numImages, 1); 62 | imdb.images.set( 1 : numel(fileNames{1}) ) = 1; % training set 63 | imdb.images.set( numel(fileNames{1}) + 1 : numel(fileNames{1}) + numel(fileNames{2}) ) = 2; % validation set 64 | imdb.images.set( numel(fileNames{1}) + numel(fileNames{2}) + 1 : numel(fileNames{1}) + numel(fileNames{2}) + numel(fileNames{3}) ) = 3; % test set 65 | 66 | %% get GT 67 | groundTruth = cell(numImages, 1); 68 | isDifficultGroundTruth = cell(numImages, 1); 69 | groundTruthFiles = imdb.groundTruthFiles; 70 | 71 | fprintf('Reading annotation for %d images\n', numImages); 72 | parfor (iImage = 1 : numImages, opts.numThreads) 73 | if mod( iImage, 1000 ) == 0 74 | fprintf( 'Image %d\n', iImage ) 75 | end 76 | 77 | annotation_file = fullfile( opts.dataPath, groundTruthFiles{iImage} ); 78 | have_annotation = false; 79 | if exist( annotation_file, 'file') 80 | annotation = VOCreadrecxml( annotation_file ); 81 | if isfield(annotation, 'objects') 82 | have_annotation = true; 83 | groundTruth{iImage} = nan( numel( annotation.objects ), 4 ); 84 | isDifficultGroundTruth{iImage} = false( numel( annotation.objects ), 1 ); 85 | 86 | for iGt = 1 : numel( annotation.objects ) 87 | groundTruth{iImage}(iGt, :) = annotation.objects(iGt).bbox; % bbox is in X1 Y1 X2 Y2 format 88 | isDifficultGroundTruth{iImage}(iGt) = annotation.objects(iGt).difficult; 89 | end 90 | end 91 | end 92 | if ~have_annotation 93 | % there is no annotation for this image 94 | groundTruth{iImage} = nan( 0, 4 ); 95 | isDifficultGroundTruth{iImage} = false( 0, 1 ); 96 | end 97 | end 98 | imdb.groundTruth = groundTruth; 99 | imdb.isDifficultGroundTruth = isDifficultGroundTruth; 100 | 101 | end 102 | 103 | 104 | -------------------------------------------------------------------------------- /globalModel/run_computeScores_globalModel.m: -------------------------------------------------------------------------------- 1 | %run_computeScores_globalModel applies the global model to the whole input images to produce multi-scale confidence heatmaps 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/HollywoodHeads'; 5 | resultPath = 'results/HollywoodHeads'; 6 | 7 | % network to evaluate 8 | netFile = fullfile( 'models', 'global.mat' ); 9 | 10 | % file to store the scores 11 | resultFile = fullfile( resultPath, 'global', 'globalModel-scores-test.mat' ); 12 | 13 | if ~exist(fullfile(resultPath, 'global'), 'dir') 14 | mkdir(fullfile(resultPath, 'global')); 15 | end 16 | 17 | % Choose subset of data to compute the scores. 18 | % To run the evaluation of the global model you need the test test (3). 19 | scoreSubset = 3; % 1 - train subset, 2 - validation, 3 - test; can do [1,2,3] to compute scores on all the subsets 20 | 21 | %% setup paths 22 | filePath = fileparts( mfilename('fullpath') ); 23 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 24 | 25 | %% parameters 26 | opts_cnn = struct; 27 | 28 | opts_cnn.dataPath = dataPath; 29 | opts_cnn.dataset.trainingSetFile = fullfile('Splits', 'train.txt'); 30 | opts_cnn.dataset.validationSetFile = fullfile('Splits', 'val.txt'); 31 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 32 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 33 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 34 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 35 | 36 | opts_cnn.expDir = resultPath; 37 | opts_cnn.batchSize = 32; 38 | opts_cnn.imdbName = 'globalModel'; 39 | opts_cnn.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 40 | 41 | %% load dataset 42 | opts_cnn.imdbPath = fullfile(opts_cnn.expDir, 'imdb.mat'); 43 | generateDataFlag = true; 44 | if exist(opts_cnn.imdbPath, 'file') 45 | fprintf('Reading imdb file %s\n', opts_cnn.imdbPath); 46 | imdb = load(opts_cnn.imdbPath); 47 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts_cnn.dataset ) 48 | fprintf('imdb.opts is compatible with the opt_cnn.dataset\n') 49 | else 50 | warning('opts_cnn.dataset parameters are not compatible with the provided imdb file. Be careful!'); 51 | end 52 | else 53 | warning('imdb file is not found. Making it will take some time.'); 54 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 55 | imdb = cnn_prepareData_localModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 56 | imdb.opts = opts_cnn.dataset; 57 | mkdir(opts_cnn.expDir); 58 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 59 | end 60 | 61 | %% load network 62 | net = load( netFile, '-mat'); 63 | if ~isfield(net, 'layers') && isfield(net, 'net') 64 | net = net.net; 65 | end 66 | if ~isfield(net, 'layers') 67 | error('Could not load the network!'); 68 | end 69 | net = vl_simplenn_move(net, 'gpu'); 70 | 71 | 72 | %% select the test set to run evaluation 73 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 74 | for iSubset = 1 : numel( scoreSubset ) 75 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 76 | end 77 | imageSetToDoPr = find( imageSetToDoPr ); 78 | 79 | %% start the evaluation 80 | opts_cnn.getBatchEvaluation = struct; 81 | opts_cnn.getBatchEvaluation.grid_size = [1 2 4 8]; 82 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 83 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 84 | opts_cnn.getBatchEvaluation.jitterStd = 0; 85 | 86 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_globalModel(imdb, batch, ... 87 | opts_cnn.getBatchEvaluation) ; 88 | 89 | scores = cnn_computeScores_globalModel( net, imdb, batchWrapperEvaluation, ... 90 | 'imageSet', imageSetToDoPr, ... 91 | 'batchSize', opts_cnn.batchSize, ... 92 | 'scoreMode', opts_cnn.scoreMode); 93 | 94 | if ~exist(fileparts(resultFile), 'dir') 95 | mkdir(fileparts(resultFile)); 96 | end 97 | save( resultFile, 'scores', '-v7.3' ); 98 | 99 | %% save detections for files 100 | detSavePath = fullfile( resultPath, 'global', 'dets'); 101 | if ~exist(detSavePath, 'dir') 102 | mkdir(detSavePath) 103 | end 104 | detSaveFormat = fullfile(detSavePath, '%s.mat'); 105 | disp('Saving detection files'); 106 | for i=1:length(imageSetToDoPr) 107 | imgIdx = imageSetToDoPr(i); 108 | score = scores{imgIdx}; 109 | %load candidate 110 | [~, im_name, ~] = fileparts(imdb.imageFiles{imgIdx}); 111 | savePath = sprintf(detSaveFormat, im_name); 112 | save(savePath, 'score'); 113 | end -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/qpboMex/qpboMex.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "QPBO.h" 4 | #include "mex.h" 5 | 6 | #include 7 | #include 8 | 9 | #define INFTY INT_MAX 10 | 11 | double round(double a); 12 | int isInteger(double a); 13 | 14 | #define MATLAB_ASSERT(expr,msg) if (!(expr)) { mexErrMsgTxt(msg);} 15 | 16 | #if !defined(MX_API_VER) || MX_API_VER < 0x07030000 17 | typedef int mwSize; 18 | typedef int mwIndex; 19 | #endif 20 | 21 | typedef QPBO GraphType; 22 | 23 | void mexFunction(int nlhs, mxArray *plhs[], 24 | int nrhs, const mxArray *prhs[]) 25 | { 26 | MATLAB_ASSERT( nrhs == 2, "qpboMex: Wrong number of input parameters: expected 2"); 27 | MATLAB_ASSERT( nlhs <= 2, "qpboMex: Too many output arguments: expected 2 or less"); 28 | 29 | //Fix input parameter order: 30 | const mxArray *uInPtr = (nrhs >= 1) ? prhs[0] : NULL; //unary 31 | const mxArray *pInPtr = (nrhs >= 2) ? prhs[1] : NULL; //pairwise 32 | 33 | //Fix output parameter order: 34 | mxArray **cOutPtr = (nlhs >= 1) ? &plhs[0] : NULL; //LB 35 | mxArray **lOutPtr = (nlhs >= 2) ? &plhs[1] : NULL; //labels 36 | 37 | //node number 38 | mwSize numNodes; 39 | 40 | // get unary potentials 41 | MATLAB_ASSERT(mxGetNumberOfDimensions(uInPtr) == 2, "qpboMex: The unary paramater is not 2-dimensional"); 42 | MATLAB_ASSERT(mxGetClassID(uInPtr) == mxDOUBLE_CLASS, "qpboMex: Unary potentials are of wrong type"); 43 | MATLAB_ASSERT(mxGetPi(uInPtr) == NULL, "qpboMex: Unary potentials should not be complex"); 44 | 45 | numNodes = mxGetM(uInPtr); 46 | 47 | MATLAB_ASSERT(numNodes >= 1, "qpboMex: The number of nodes is not positive"); 48 | MATLAB_ASSERT(mxGetN(uInPtr) == 2, "qpboMex: The edge paramater is not of size #nodes x 2"); 49 | 50 | double* termW = (double*)mxGetData(uInPtr); 51 | 52 | //get pairwise potentials 53 | MATLAB_ASSERT(mxGetNumberOfDimensions(pInPtr) == 2, "qpboMex: The edge paramater is not 2-dimensional"); 54 | 55 | mwSize numEdges = mxGetM(pInPtr); 56 | 57 | MATLAB_ASSERT( mxGetN(pInPtr) == 6, "qpboMex: The edge paramater is not of size #edges x 6"); 58 | MATLAB_ASSERT(mxGetClassID(pInPtr) == mxDOUBLE_CLASS, "qpboMex: Pairwise potentials are of wrong type"); 59 | 60 | double* edges = (double*)mxGetData(pInPtr); 61 | for(mwSize i = 0; i < numEdges; i++) 62 | { 63 | MATLAB_ASSERT(1 <= round(edges[i]) && round(edges[i]) <= numNodes, "qpboMex: error in pairwise terms array"); 64 | MATLAB_ASSERT(isInteger(edges[i]), "qpboMex: error in pairwise terms array"); 65 | MATLAB_ASSERT(1 <= round(edges[i + numEdges]) && round(edges[i + numEdges]) <= numNodes, "qpboMex: error in pairwise terms array"); 66 | MATLAB_ASSERT(isInteger(edges[i + numEdges]), "qpboMex: error in pairwise terms array"); 67 | } 68 | 69 | 70 | 71 | // start computing 72 | if (nlhs == 0){ 73 | return; 74 | } 75 | 76 | //prepare graph 77 | GraphType *g = new GraphType(numNodes, numEdges); 78 | 79 | //add unary potentials 80 | g -> AddNode(numNodes); 81 | for(mwSize i = 0; i < numNodes; i++) 82 | { 83 | g -> AddUnaryTerm((GraphType::NodeId) i, termW[i], termW[numNodes + i]); 84 | } 85 | 86 | //add pairwise terms 87 | for(mwSize i = 0; i < numEdges; i++) 88 | if(edges[i] < 1 || edges[i] > numNodes || edges[numEdges + i] < 1 || edges[numEdges + i] > numNodes || edges[i] == edges[numEdges + i] || !isInteger(edges[i]) || !isInteger(edges[numEdges + i])){ 89 | mexWarnMsgIdAndTxt("qpboMex:pairwisePotentials", "Some edge has invalid vertex numbers and therefore it is ignored"); 90 | } 91 | else 92 | { 93 | g -> AddPairwiseTerm((GraphType::NodeId) (edges[i] - 1), (GraphType::NodeId) (edges[numEdges + i] - 1), edges[2 * numEdges + i], edges[3 * numEdges + i], edges[4 * numEdges + i], edges[5 * numEdges + i]); 94 | } 95 | 96 | //Merge edges 97 | g -> MergeParallelEdges(); 98 | 99 | //Solve 100 | g -> Solve(); 101 | g -> ComputeWeakPersistencies(); 102 | 103 | //output lower bound value 104 | if (cOutPtr != NULL){ 105 | *cOutPtr = mxCreateNumericMatrix(1, 1, mxDOUBLE_CLASS, mxREAL); 106 | *(double*)mxGetData(*cOutPtr) = 0.5 * (g -> ComputeTwiceLowerBound()); 107 | } 108 | 109 | //output labeling 110 | if (lOutPtr != NULL){ 111 | *lOutPtr = mxCreateNumericMatrix(numNodes, 1, mxDOUBLE_CLASS, mxREAL); 112 | double* segment = (double*)mxGetData(*lOutPtr); 113 | for(mwSize i = 0; i < numNodes; i++) 114 | segment[i] = g -> GetLabel(i); 115 | } 116 | 117 | delete g; 118 | } 119 | 120 | 121 | double round(double a) 122 | { 123 | return (mwSize)floor(a + 0.5); 124 | } 125 | 126 | int isInteger(double a) 127 | { 128 | return (a - round(a) < 1e-6); 129 | } 130 | -------------------------------------------------------------------------------- /pairwiseModel/cnn_pairwiseModel.m: -------------------------------------------------------------------------------- 1 | function cnn_pairwiseModel( varargin) 2 | %cnn_pairwiseModel runs the fulls instance of the CNN training procedure for the pairwise model 3 | 4 | if ~exist('varargin', 'var') 5 | varargin = {}; 6 | end 7 | 8 | %% parse parameters 9 | opts = struct; 10 | opts.networkInitialization = []; 11 | opts.scoreMode = 'maxMarginals'; % 'maxMarginals' - the only implemented option 12 | opts.expDir = ''; % folder to store the results of network training 13 | opts.randomSeed = 1; 14 | opts.loss = 'logisticScoresCompact'; % 'logisticScoresCompact' or 'svmStructCompact' 15 | opts.dataPath = ''; 16 | opts.imdbPath = ''; % path to the file with the dataset information created by cnn_prepareData_pairwiseModel; default: fullfile(opts.expDir, 'imdb_pairwise.mat') 17 | 18 | % dataset info 19 | opts.dataset = struct; 20 | % same as in the local model: 21 | opts.dataset.trainingSetFile = ''; 22 | opts.dataset.validationSetFile = ''; 23 | opts.dataset.testSetFile = ''; 24 | opts.dataset.groundTruthLocalPrefix = ''; 25 | opts.dataset.imageLocalPrefix = ''; 26 | opts.dataset.candidateLocalPrefix = ''; 27 | % specific to the pairwise model: 28 | opts.dataset.scoreFile = []; 29 | opts.dataset.maxNumPatchesPerImage = 16; 30 | opts.dataset.nmsIntersectionOverAreaThreshold = 0.3; 31 | opts.dataset.numPairwiseClusters = 20; 32 | opts.dataset.clusterInfo = struct('type', [], 'mean', [], 'std', [], 'numClusters', [], 'clusterCenters', [] ); 33 | opts.dataset.clusterFunction = []; 34 | 35 | % CNN training 36 | opts.train.batchSize = 4; % number of images to form a batch 37 | opts.train.numEpochs = 8; 38 | opts.train.learningRate = [ 0.00001*ones(1, 4), 0.000001*ones(1, 4) ]; 39 | opts.train.weightDecay = 0.0005 / 100; 40 | opts.train.backPropagateType = 'all'; % 'all', 'unaryAndPairwise', 'onlyUnary', 'onlyPairwise' 41 | opts.train.expDir = fullfile( opts.expDir, 'pairwiseModel' ); 42 | opts.train.numValidationPerEpoch = 2; 43 | opts.train.conserveMemory = true; 44 | opts.train.disableDropoutFeatureExtractor = false; 45 | 46 | % training batch generation 47 | opts.getBatch.cropMode = 'warp'; 48 | opts.getBatch.jitterStd = 1; 49 | opts.getBatch.iouPositiveNegativeThreshold = 0.5; 50 | opts.getBatch.randomizeCandidates = false; % select the candidates not based on the precomputed scores but randomly 51 | opts.getBatch.nmsIntersectionOverAreaThreshold = 0.3; % only active if opts.randomizeCandidates == true 52 | opts.getBatch.cropPad = [18, 18, 18, 18]; 53 | opts.getBatch.randSeed = 1; 54 | 55 | % evaluation parameters 56 | opts.evaluation.iouThreshold = 0.5; 57 | opts.evaluation.useDifficultImages = false; 58 | 59 | % parse input 60 | opts = vl_argparse(opts, varargin); 61 | 62 | if isempty( opts.imdbPath ) 63 | opts.imdbPath = fullfile(opts.expDir, 'imdb_pairwise.mat'); 64 | end 65 | 66 | 67 | 68 | %% Prepare data 69 | generateDataFlag = true; 70 | if exist(opts.imdbPath, 'file') 71 | fprintf('Reading imdb file %s\n', opts.imdbPath); 72 | imdb = load(opts.imdbPath); 73 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts.dataset ) 74 | generateDataFlag = false; 75 | else 76 | generateDataFlag = true; 77 | warning('imdb file is not consistent with the parameters, need to generate it again'); 78 | end 79 | end 80 | if generateDataFlag 81 | fprintf('Generating imdb file %s\n', opts.imdbPath); 82 | imdb = cnn_prepareData_pairwiseModel( opts.dataset, 'dataPath', opts.dataPath ); 83 | imdb.opts = opts.dataset; 84 | mkdir(opts.expDir); 85 | save(opts.imdbPath, '-struct', 'imdb', '-v7.3') ; 86 | end 87 | 88 | %% random seed for the learning process 89 | % the CPU random seed 90 | cpu_rs = RandStream('mt19937ar','Seed',opts.randomSeed); 91 | RandStream.setGlobalStream(cpu_rs); 92 | % the GPU random seed 93 | gpu_rs = parallel.gpu.RandStream('CombRecursive','Seed',opts.randomSeed); 94 | parallel.gpu.RandStream.setGlobalStream(gpu_rs); 95 | 96 | %% Initialize network 97 | if ~isempty( opts.networkInitialization ) 98 | net = opts.networkInitialization(); 99 | else 100 | error('cnn_pairwiseModel:noInitNetwork', 'Initialization not provided'); 101 | end 102 | 103 | opts.getBatch.meanImage = net.normalization.averageImage; 104 | opts.getBatch.randStream = RandStream('mt19937ar','Seed', opts.getBatch.randSeed); 105 | opts.getBatch.dataPath = opts.dataPath; 106 | 107 | %% batch generator 108 | batchWrapper = @(imdb, batch) cnn_getBatch_pairwiseModel(imdb, batch, opts.getBatch) ; 109 | 110 | %% training 111 | cnn_train_pairwiseModel(net, imdb, batchWrapper, ... 112 | opts.train, ... 113 | 'train', find(imdb.images.set == 1), ... 114 | 'val', find(imdb.images.set == 2) ); 115 | 116 | end 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /localModel/cnn_initNet_localModel.m: -------------------------------------------------------------------------------- 1 | function net = cnn_initNet_localModel( networkFile, numClasses, meanImage, numNodesExtraLayers, addDropout ) 2 | %cnn_initNet_localModel initializes CNN from a pretrained network in MatConvNet format 3 | % 4 | % net = cnn_initNet_localModel( networkFile, numClasses); 5 | % net = cnn_initNet_localModel( networkFile, numClasses, meanImage ); 6 | % net = cnn_initNet_localModel( networkFile, numClasses, meanImage, numNodesExtraLayers ); 7 | % net = cnn_initNet_localModel( networkFile, numClasses, meanImage, numNodesExtraLayers, addDropout ); 8 | % 9 | % Input: 10 | % networkFile - file with the pretrained network 11 | % numClasses - number of class for which to construct the network 12 | % meanImage - the new mean image of the network 13 | % numNodesExtraLayers - number of nodes to add to the extra layers, weights are initialized randomly (default: []) 14 | % addDropout - true of false whether to add dropout layers after the fully-connected layers (default: false) 15 | % 16 | % Output: 17 | % net - struct containing the network in the matconvnet format 18 | 19 | if ~exist('numNodesExtraLayers', 'var') || isempty(numNodesExtraLayers) 20 | numNodesExtraLayers = []; 21 | end 22 | if ~exist('addDropout', 'var') || isempty(addDropout) 23 | addDropout = false; 24 | end 25 | if ~exist('meanImage', 'var') || isempty(meanImage) 26 | meanImage = []; 27 | end 28 | 29 | 30 | % the initialization parameters 31 | scal = 1 ; 32 | init_bias = 0.1; 33 | dropout_rate = 0.5; 34 | 35 | %% read the network file 36 | net = load( networkFile ); 37 | net.layers(end - 1 : end) = []; % cut off the loss layer 38 | 39 | %% determine the number of output features 40 | % find the last convolutional layer 41 | iLayer = length(net.layers); 42 | while ~isequal( net.layers{ iLayer }.type, 'conv' ) && iLayer > 1 43 | iLayer = iLayer - 1; 44 | end 45 | if isequal( net.layers{ iLayer }.type, 'conv' ) 46 | numFeatures = size( net.layers{ iLayer }.weights{1}, 4 ); 47 | else 48 | error('cnn_initNet_localModel:wrongNetwork', 'Cannot determine the number of features from the pretrained network'); 49 | end 50 | 51 | %% regularize the fully connected layers by dropout 52 | if addDropout 53 | numDropout = 0; 54 | iLayer = 1; 55 | while iLayer <= length(net.layers) 56 | if isequal( net.layers{iLayer}.type, 'conv' ) && isequal( net.layers{iLayer}.name(1 : 2), 'fc' ) 57 | % if iLayer is the fully connected layer insert the dropout layer 58 | curNumLayers = length(net.layers); 59 | if iLayer < curNumLayers % if this is not the last layer 60 | % move all the remaining layers one layer forward 61 | net.layers(iLayer + 2 : curNumLayers + 1) = net.layers(iLayer + 1 : curNumLayers); 62 | end 63 | numDropout = numDropout + 1; 64 | net.layers{iLayer+1} = struct('type', 'dropout', ... 65 | 'rate', dropout_rate, ... 66 | 'name', ['dropout', num2str(numDropout)] ); 67 | end 68 | iLayer = iLayer + 1; 69 | end 70 | end 71 | 72 | %% add new layers 73 | numExtraLayers = length(numNodesExtraLayers); 74 | numNodesExtraLayers = [numFeatures; numNodesExtraLayers(:)]; 75 | for iLayer = 1 : numExtraLayers 76 | net.layers{end+1} = struct('type', 'conv', ... 77 | 'weights', {{ 0.01/scal * randn(1, 1, numNodesExtraLayers(iLayer), numNodesExtraLayers(iLayer+1),'single'),... % filters 78 | init_bias*ones(1,numNodesExtraLayers(iLayer+1),'single') }} , ... % biases 79 | 'stride', 1, ... 80 | 'pad', 0, ... 81 | 'learningRate', [1, 2], ... 82 | 'weightDecay', [1, 0], ... 83 | 'name', ['fc_extra', num2str(iLayer)]) ; 84 | net.layers{end+1} = struct('type', 'dropout', ... 85 | 'rate', dropout_rate, ... 86 | 'name', ['dropout_extra', num2str(iLayer)] ); 87 | net.layers{end+1} = struct('type', 'relu', ... 88 | 'name', ['relu_extra', num2str(iLayer)]) ; 89 | end 90 | net.layers{end+1} = struct('type', 'conv', ... 91 | 'weights', {{ 0.01/scal * randn(1, 1, numNodesExtraLayers(end), numClasses,'single'),... % filters 92 | init_bias*ones(1,numClasses,'single') }}, ... % biases 93 | 'stride', 1, ... 94 | 'pad', 0, ... 95 | 'learningRate', [1, 2], ... 96 | 'weightDecay', [1, 0], ... 97 | 'name', 'fc_classes') ; 98 | 99 | % The loss 100 | net.layers{end+1} = struct('type', 'softmaxloss') ; 101 | 102 | % Extra info 103 | net.classes = struct; 104 | net.classes.name = {'head', 'background'}; 105 | net.classes.description = {'head', 'background'}; 106 | 107 | % set the new mean image 108 | if ~isempty(meanImage) 109 | net.normalization = struct; 110 | net.normalization.averageImage = meanImage; 111 | end 112 | 113 | end 114 | 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /localModel/run_computeScores_localModel_Casablanca.m: -------------------------------------------------------------------------------- 1 | %run_computeScores_localModel applies the local model to the bounding-box proposals to compute their scores 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/Casablanca'; 5 | resultPath = 'results/Casablanca'; 6 | 7 | % network to evaluate 8 | netFile = fullfile( 'models', 'local.mat' ); 9 | 10 | % file to store the scores 11 | resultFile = fullfile( resultPath, 'local', 'localModel-scores-test.mat' ); 12 | 13 | % Casablanca dataset contains only the test set, so scoreSubset has to be equal to 3 14 | scoreSubset = 3; 15 | 16 | %% setup paths 17 | filePath = fileparts( mfilename('fullpath') ); 18 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 19 | 20 | %% parameters 21 | opts_cnn = struct; 22 | opts_cnn.dataPath = dataPath; 23 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 24 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 25 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 26 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 27 | 28 | opts_cnn.expDir = resultPath; 29 | opts_cnn.maxGpuImagesEvaluation = 3000; 30 | opts_cnn.gpuBatchSize = 128; 31 | opts_cnn.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 32 | 33 | opts_cnn.evaluation.iouThreshold = 0.5; 34 | opts_cnn.evaluation.nmsMaxCandidateNumber = inf; 35 | opts_cnn.evaluation.useDifficultImages = false; 36 | 37 | %% load dataset 38 | opts_cnn.imdbPath = fullfile(opts_cnn.expDir, 'imdb.mat'); 39 | if exist(opts_cnn.imdbPath, 'file') 40 | fprintf('Reading imdb file %s\n', opts_cnn.imdbPath); 41 | imdb = load(opts_cnn.imdbPath); 42 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts_cnn.dataset ) 43 | fprintf('imdb.opts is compatible with the opt_cnn.dataset\n') 44 | else 45 | warning('opts_cnn.dataset parameters are not compatible with the provided imdb file. Be careful!'); 46 | end 47 | else 48 | warning('imdb file is not found. Making it will take some time.'); 49 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 50 | imdb = cnn_prepareData_localModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 51 | imdb.opts = opts_cnn.dataset; 52 | mkdir(opts_cnn.expDir); 53 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 54 | end 55 | 56 | %% load network 57 | net = load( netFile, '-mat'); 58 | if ~isfield(net, 'layers') && isfield(net, 'net') 59 | net = net.net; 60 | end 61 | if ~isfield(net, 'layers') 62 | error('Could not load the network!'); 63 | end 64 | net = vl_simplenn_move(net, 'gpu'); 65 | 66 | %% select the set of images to run evaluation 67 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 68 | for iSubset = 1 : numel( scoreSubset ) 69 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 70 | end 71 | imageSetToDoPr = find( imageSetToDoPr ); 72 | 73 | %% start the evaluation 74 | opts_cnn.getBatchEvaluation = struct; 75 | opts_cnn.getBatchEvaluation.cropMode = 'warp'; 76 | opts_cnn.getBatchEvaluation.cropPad = [18, 18, 18, 18]; 77 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 78 | opts_cnn.getBatchEvaluation.numPatchesPerImage = inf; 79 | opts_cnn.getBatchEvaluation.maxPositives = inf; 80 | opts_cnn.getBatchEvaluation.maxGpuImages = opts_cnn.maxGpuImagesEvaluation; 81 | opts_cnn.getBatchEvaluation.iouPositiveThreshold = opts_cnn.evaluation.iouThreshold; 82 | opts_cnn.getBatchEvaluation.iouNegativeThreshold = opts_cnn.evaluation.iouThreshold; 83 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 84 | opts_cnn.getBatchEvaluation.jitterStd = 0; 85 | 86 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_localModel(imdb, batch, ... 87 | opts_cnn.getBatchEvaluation) ; 88 | 89 | [scores, candidateIds] = cnn_computeScores_localModel( net, imdb, batchWrapperEvaluation, ... 90 | 'imageSet', imageSetToDoPr, ... 91 | 'gpuBatchSize', opts_cnn.gpuBatchSize, ... 92 | 'scoreMode', opts_cnn.scoreMode ); 93 | 94 | if exist(resultFile, 'file') 95 | warning('The results file already exists. Overwriting!'); 96 | end 97 | if ~exist(fileparts(resultFile), 'dir') 98 | mkdir(fileparts(resultFile)); 99 | end 100 | save( resultFile, 'scores', 'candidateIds', '-v7.3' ); 101 | 102 | %% save detections to files 103 | detSavePath = fullfile( resultPath, 'local', 'dets'); 104 | if ~exist(detSavePath, 'dir') 105 | mkdir(detSavePath) 106 | end 107 | detSaveFormat = fullfile(detSavePath, '%s.mat'); 108 | disp('Saving detection files'); 109 | for i=1:length(imageSetToDoPr) 110 | imgIdx = imageSetToDoPr(i); 111 | cand_path = fullfile(dataPath, imdb.candidateFiles{imgIdx}); 112 | load(cand_path); 113 | %load candidate 114 | BB = [convertBb_X1Y1X2Y2_to_X1Y1WH(boxes(candidateIds{imgIdx},[2 1 4 3])) scores{imgIdx}]; 115 | [~, im_name, ~] = fileparts(imdb.imageFiles{imgIdx}); 116 | savePath = sprintf(detSaveFormat, im_name); 117 | save(savePath, 'BB'); 118 | end 119 | -------------------------------------------------------------------------------- /pairwiseModel/energyMinimization/trwsMex/src/ordering.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "MRFEnergy.h" 6 | 7 | template void MRFEnergy::SetAutomaticOrdering() 8 | { 9 | int dMin; 10 | Node* i; 11 | Node* iMin; 12 | Node* list; 13 | Node* listBoundary; 14 | MRFEdge* e; 15 | 16 | if (m_isEnergyConstructionCompleted) 17 | { 18 | m_errorFn("Error in SetAutomaticOrdering(): function cannot be called after graph construction is completed"); 19 | } 20 | 21 | if ( verbosityLevel == 2 ) 22 | printf("Setting automatic ordering... "); 23 | 24 | list = m_nodeFirst; 25 | listBoundary = NULL; 26 | m_nodeFirst = m_nodeLast = NULL; 27 | for (i=list; i; i=i->m_next) 28 | { 29 | i->m_ordering = 2*m_nodeNum; // will contain remaining degree mod m_nodeNum (i.e. number of edges connecting to nodes in 'listBoundary' and 'list') 30 | // if i->m_ordering \in [2*m_nodeNum; 3*m_nodeNum) - not assigned yet, belongs to 'list' 31 | // if i->m_ordering \in [m_nodeNum; 2*m_nodeNum) - not assigned yet, belongs to 'listBoundary' 32 | // if i->m_ordering \in [0; m_nodeNum ) - assigned, belongs to 'm_nodeFirst' 33 | for (e=i->m_firstForward; e; e=e->m_nextForward) 34 | { 35 | i->m_ordering ++; 36 | } 37 | for (e=i->m_firstBackward; e; e=e->m_nextBackward) 38 | { 39 | i->m_ordering ++; 40 | } 41 | } 42 | 43 | while (list) 44 | { 45 | // find node with the smallest remaining degree in list 46 | dMin = m_nodeNum; 47 | for (i=list; i; i=i->m_next) 48 | { 49 | assert(i->m_ordering >= 2*m_nodeNum); 50 | if (dMin > i->m_ordering - 2*m_nodeNum) 51 | { 52 | dMin = i->m_ordering - 2*m_nodeNum; 53 | iMin = i; 54 | } 55 | } 56 | i = iMin; 57 | 58 | // remove i from list 59 | if (i->m_prev) i->m_prev->m_next = i->m_next; 60 | else list = i->m_next; 61 | if (i->m_next) i->m_next->m_prev = i->m_prev; 62 | 63 | // add i to listBoundary 64 | listBoundary = i; 65 | i->m_prev = NULL; 66 | i->m_next = NULL; 67 | i->m_ordering -= m_nodeNum; 68 | 69 | while (listBoundary) 70 | { 71 | // find node with the smallest remaining degree in listBoundary 72 | dMin = m_nodeNum; 73 | for (i=listBoundary; i; i=i->m_next) 74 | { 75 | assert(i->m_ordering >= m_nodeNum && i->m_ordering < 2*m_nodeNum); 76 | if (dMin > i->m_ordering - m_nodeNum) 77 | { 78 | dMin = i->m_ordering - m_nodeNum; 79 | iMin = i; 80 | } 81 | } 82 | i = iMin; 83 | 84 | // remove i from listBoundary 85 | if (i->m_prev) i->m_prev->m_next = i->m_next; 86 | else listBoundary = i->m_next; 87 | if (i->m_next) i->m_next->m_prev = i->m_prev; 88 | 89 | // add i to m_nodeFirst 90 | if (m_nodeLast) 91 | { 92 | m_nodeLast->m_next = i; 93 | i->m_ordering = m_nodeLast->m_ordering + 1; 94 | } 95 | else 96 | { 97 | m_nodeFirst = i; 98 | i->m_ordering = 0; 99 | } 100 | i->m_prev = m_nodeLast; 101 | m_nodeLast = i; 102 | i->m_next = NULL; 103 | 104 | // process neighbors of i=m_nodeLast: decrease their remaining degree, 105 | // put them into listBoundary (if they are in list) 106 | for (e=m_nodeLast->m_firstForward; e; e=e->m_nextForward) 107 | { 108 | assert(m_nodeLast == e->m_tail); 109 | i = e->m_head; 110 | if (i->m_ordering >= m_nodeNum) 111 | { 112 | i->m_ordering --; // decrease remaining degree of i 113 | if (i->m_ordering >= 2*m_nodeNum) 114 | { 115 | // remove i from list 116 | if (i->m_prev) i->m_prev->m_next = i->m_next; 117 | else list = i->m_next; 118 | if (i->m_next) i->m_next->m_prev = i->m_prev; 119 | 120 | // add i to listBoundary 121 | if (listBoundary) listBoundary->m_prev = i; 122 | i->m_prev = NULL; 123 | i->m_next = listBoundary; 124 | listBoundary = i; 125 | i->m_ordering -= m_nodeNum; 126 | } 127 | } 128 | } 129 | for (e=m_nodeLast->m_firstBackward; e; e=e->m_nextBackward) 130 | { 131 | assert(m_nodeLast == e->m_head); 132 | i = e->m_tail; 133 | if (i->m_ordering >= m_nodeNum) 134 | { 135 | i->m_ordering --; // decrease remaining degree of i 136 | if (i->m_ordering >= 2*m_nodeNum) 137 | { 138 | // remove i from list 139 | if (i->m_prev) i->m_prev->m_next = i->m_next; 140 | else list = i->m_next; 141 | if (i->m_next) i->m_next->m_prev = i->m_prev; 142 | 143 | // add i to listBoundary 144 | if (listBoundary) listBoundary->m_prev = i; 145 | i->m_prev = NULL; 146 | i->m_next = listBoundary; 147 | listBoundary = i; 148 | i->m_ordering -= m_nodeNum; 149 | } 150 | } 151 | } 152 | } 153 | } 154 | 155 | if( verbosityLevel == 2 ) 156 | printf("done\n"); 157 | 158 | CompleteGraphConstruction(); 159 | } 160 | 161 | #include "instances.inc" 162 | 163 | 164 | -------------------------------------------------------------------------------- /globalModel/cnn_initNet_globalModel.m: -------------------------------------------------------------------------------- 1 | function net = cnn_initNet_globalModel( networkFile, numOutput, numClasses, meanImage, numNodesExtraLayers, addDropout ) 2 | %cnn_initNet_localModel initializes CNN from a pretrained network in MatConvNet format 3 | % 4 | % net = cnn_initNet_globalModel( networkFile, numOutput, numClasses); 5 | % net = cnn_initNet_globalModel( networkFile, numOutput, numClasses, meanImage ); 6 | % net = cnn_initNet_globalModel( networkFile, numOutput, numClasses, meanImage, numNodesExtraLayers ); 7 | % net = cnn_initNet_globalModel( networkFile, numOutput, numClasses, meanImage, numNodesExtraLayers, addDropout ); 8 | % 9 | % Input: 10 | % networkFile - file with the pretrained network 11 | % numOutput - total number of heatmap cells across all scales 12 | % numClasses - number of class for which to construct the network 13 | % meanImage - the new mean image of the network 14 | % numNodesExtraLayers - number of nodes to add to the extra layers, weights are initialized randomly (default: []) 15 | % addDropout - true of false whether to add dropout layers after the fully-connected layers (default: false) 16 | % 17 | % Output: 18 | % net - struct containing the network in the matconvnet format 19 | 20 | if ~exist('numNodesExtraLayers', 'var') || isempty(numNodesExtraLayers) 21 | numNodesExtraLayers = []; 22 | end 23 | if ~exist('addDropout', 'var') || isempty(addDropout) 24 | addDropout = false; 25 | end 26 | if ~exist('meanImage', 'var') || isempty(meanImage) 27 | meanImage = []; 28 | end 29 | 30 | 31 | % the initialization parameters 32 | scal = 1 ; 33 | init_bias = 0.1; 34 | dropout_rate = 0.5; 35 | 36 | %% read the network file 37 | net = load( networkFile ); 38 | net.layers(end - 1 : end) = []; % cut off the loss layer 39 | 40 | %% determine the number of output features 41 | % find the last convolutional layer 42 | iLayer = length(net.layers); 43 | while ~isequal( net.layers{ iLayer }.type, 'conv' ) && iLayer > 1 44 | iLayer = iLayer - 1; 45 | end 46 | if isequal( net.layers{ iLayer }.type, 'conv' ) 47 | numFeatures = size( net.layers{ iLayer }.weights{1}, 4 ); 48 | else 49 | error('cnn_initNet_localModel:wrongNetwork', 'Cannot determine the number of features from the pretrained network'); 50 | end 51 | 52 | %% regularize the fully connected layers by dropout 53 | if addDropout 54 | numDropout = 0; 55 | iLayer = 1; 56 | while iLayer <= length(net.layers) 57 | if isequal( net.layers{iLayer}.type, 'conv' ) && isequal( net.layers{iLayer}.name(1 : 2), 'fc' ) 58 | % if iLayer is the fully connected layer insert the dropout layer 59 | curNumLayers = length(net.layers); 60 | if iLayer < curNumLayers % if this is not the last layer 61 | % move all the remaining layers one layer forward 62 | net.layers(iLayer + 2 : curNumLayers + 1) = net.layers(iLayer + 1 : curNumLayers); 63 | end 64 | numDropout = numDropout + 1; 65 | net.layers{iLayer+1} = struct('type', 'dropout', ... 66 | 'rate', dropout_rate, ... 67 | 'name', ['dropout', num2str(numDropout)] ); 68 | end 69 | iLayer = iLayer + 1; 70 | end 71 | end 72 | 73 | %% add new layers 74 | numExtraLayers = length(numNodesExtraLayers); 75 | numNodesExtraLayers = [numFeatures; numNodesExtraLayers(:)]; 76 | for iLayer = 1 : numExtraLayers 77 | net.layers{end+1} = struct('type', 'conv', ... 78 | 'weights', {{ 0.01/scal * randn(1, 1, numNodesExtraLayers(iLayer), numNodesExtraLayers(iLayer+1),'single'),... % filters 79 | init_bias*ones(1,numNodesExtraLayers(iLayer+1),'single') }} , ... % biases 80 | 'stride', 1, ... 81 | 'pad', 0, ... 82 | 'learningRate', [1, 2], ... 83 | 'weightDecay', [1, 0], ... 84 | 'name', ['fc_extra', num2str(iLayer)]) ; 85 | net.layers{end+1} = struct('type', 'dropout', ... 86 | 'rate', dropout_rate, ... 87 | 'name', ['dropout_extra', num2str(iLayer)] ); 88 | net.layers{end+1} = struct('type', 'relu', ... 89 | 'name', ['relu_extra', num2str(iLayer)]) ; 90 | end 91 | net.layers{end+1} = struct('type', 'conv', ... 92 | 'weights', {{ 0.01/scal * randn(1, 1, numNodesExtraLayers(end), numOutput*numClasses,'single'),... % filters 93 | init_bias*ones(1,numOutput*numClasses,'single') }}, ... % biases 94 | 'stride', 1, ... 95 | 'pad', 0, ... 96 | 'learningRate', [1, 2], ... 97 | 'weightDecay', [1, 0], ... 98 | 'name', 'fc_classes') ; 99 | 100 | % reshape layer 101 | net.layers{end+1} = struct('type', 'reshape',... 102 | 'numOutput', numOutput,... 103 | 'numClasses', numClasses); 104 | % The loss 105 | net.layers{end+1} = struct('type', 'softmaxloss') ; 106 | 107 | net.classes = struct; 108 | net.classes.name = {'head', 'background'}; 109 | net.classes.description = {'head', 'background'}; 110 | 111 | % set the new mean image 112 | if ~isempty(meanImage) 113 | net.normalization = struct; 114 | net.normalization.averageImage = meanImage; 115 | end 116 | 117 | end 118 | -------------------------------------------------------------------------------- /localModel/run_computeScores_localModel.m: -------------------------------------------------------------------------------- 1 | %run_computeScores_localModel applies the local model to the bounding-box proposals to compute their scores 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/HollywoodHeads'; 5 | resultPath = 'results/HollywoodHeads'; 6 | 7 | % network to evaluate 8 | netFile = fullfile( 'models', 'local.mat' ); 9 | 10 | % file to store the scores 11 | resultFile = fullfile( resultPath, 'local', 'localModel-scores-test.mat' ); 12 | 13 | % Choose subset of data to compute the scores. 14 | % To run the evaluation of the local model you need the test test (3). 15 | % To run the training of the pairwise model you need all the subsets ([1,2,3]). It might be a good idea to do this in batches on multiple GPUs. 16 | scoreSubset = 3; % 1 - train subset, 2 - validation, 3 - test; can do [1,2,3] to compute scores on all the subsets 17 | 18 | %% setup paths 19 | filePath = fileparts( mfilename('fullpath') ); 20 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 21 | 22 | %% parameters 23 | opts_cnn = struct; 24 | opts_cnn.dataPath = dataPath; 25 | opts_cnn.dataset.trainingSetFile = fullfile('Splits', 'train.txt'); 26 | opts_cnn.dataset.validationSetFile = fullfile('Splits', 'val.txt'); 27 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 28 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 29 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 30 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 31 | 32 | opts_cnn.expDir = resultPath; 33 | opts_cnn.maxGpuImagesEvaluation = 3000; 34 | opts_cnn.gpuBatchSize = 128; 35 | opts_cnn.scoreMode = 'beforeSoftMax'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 36 | 37 | opts_cnn.evaluation.iouThreshold = 0.5; 38 | opts_cnn.evaluation.nmsMaxCandidateNumber = inf; 39 | opts_cnn.evaluation.useDifficultImages = false; 40 | 41 | %% load dataset 42 | opts_cnn.imdbPath = fullfile(opts_cnn.expDir, 'imdb.mat'); 43 | if exist(opts_cnn.imdbPath, 'file') 44 | fprintf('Reading imdb file %s\n', opts_cnn.imdbPath); 45 | imdb = load(opts_cnn.imdbPath); 46 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts_cnn.dataset ) 47 | fprintf('imdb.opts is compatible with the opt_cnn.dataset\n') 48 | else 49 | warning('opts_cnn.dataset parameters are not compatible with the provided imdb file. Be careful!'); 50 | end 51 | else 52 | warning('imdb file is not found. Making it will take some time.'); 53 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 54 | imdb = cnn_prepareData_localModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 55 | imdb.opts = opts_cnn.dataset; 56 | mkdir(opts_cnn.expDir); 57 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 58 | end 59 | 60 | %% load network 61 | net = load( netFile, '-mat'); 62 | if ~isfield(net, 'layers') && isfield(net, 'net') 63 | net = net.net; 64 | end 65 | if ~isfield(net, 'layers') 66 | error('Could not load the network!'); 67 | end 68 | net = vl_simplenn_move(net, 'gpu'); 69 | 70 | %% select the set of images to run evaluation 71 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 72 | for iSubset = 1 : numel( scoreSubset ) 73 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 74 | end 75 | imageSetToDoPr = find( imageSetToDoPr ); 76 | 77 | %% start the evaluation 78 | opts_cnn.getBatchEvaluation = struct; 79 | opts_cnn.getBatchEvaluation.cropMode = 'warp'; 80 | opts_cnn.getBatchEvaluation.cropPad = [18, 18, 18, 18]; 81 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 82 | opts_cnn.getBatchEvaluation.numPatchesPerImage = inf; 83 | opts_cnn.getBatchEvaluation.maxPositives = inf; 84 | opts_cnn.getBatchEvaluation.maxGpuImages = opts_cnn.maxGpuImagesEvaluation; 85 | opts_cnn.getBatchEvaluation.iouPositiveThreshold = opts_cnn.evaluation.iouThreshold; 86 | opts_cnn.getBatchEvaluation.iouNegativeThreshold = opts_cnn.evaluation.iouThreshold; 87 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 88 | opts_cnn.getBatchEvaluation.jitterStd = 0; 89 | 90 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_localModel(imdb, batch, ... 91 | opts_cnn.getBatchEvaluation) ; 92 | 93 | [scores, candidateIds] = cnn_computeScores_localModel( net, imdb, batchWrapperEvaluation, ... 94 | 'imageSet', imageSetToDoPr, ... 95 | 'gpuBatchSize', opts_cnn.gpuBatchSize, ... 96 | 'scoreMode', opts_cnn.scoreMode ); 97 | 98 | if exist(resultFile, 'file') 99 | warning('The results file already exists. Overwriting!'); 100 | end 101 | if ~exist(fileparts(resultFile), 'dir') 102 | mkdir(fileparts(resultFile)); 103 | end 104 | save( resultFile, 'scores', 'candidateIds', '-v7.3' ); 105 | 106 | %% save detections to files 107 | detSavePath = fullfile( resultPath, 'local', 'dets'); 108 | if ~exist(detSavePath, 'dir') 109 | mkdir(detSavePath) 110 | end 111 | detSaveFormat = fullfile(detSavePath, '%s.mat'); 112 | disp('Saving detection files'); 113 | for i=1:length(imageSetToDoPr) 114 | imgIdx = imageSetToDoPr(i); 115 | cand_path = fullfile(dataPath, imdb.candidateFiles{imgIdx}); 116 | load(cand_path); 117 | %load candidate 118 | BB = [convertBb_X1Y1X2Y2_to_X1Y1WH(boxes(candidateIds{imgIdx},[2 1 4 3])) scores{imgIdx}]; 119 | [~, im_name, ~] = fileparts(imdb.imageFiles{imgIdx}); 120 | savePath = sprintf(detSaveFormat, im_name); 121 | save(savePath, 'BB'); 122 | end 123 | -------------------------------------------------------------------------------- /pairwiseModel/vl_simplenn_pairwiseModel_backwardPass.m: -------------------------------------------------------------------------------- 1 | function [res, gradients] = vl_simplenn_pairwiseModel_backwardPass(net, x, trainableLayers, res, gradients, dzdy, varargin) 2 | %vl_simplenn_pairwiseModel_backwardPass performs the backward pass using the prodided CNN 3 | 4 | opts = struct; 5 | opts.res = [] ; 6 | opts.conserveMemory = false ; 7 | opts.sync = false ; 8 | opts.disableDropout = false ; 9 | opts.freezeDropout = false ; 10 | opts.doder = false; 11 | opts.backPropDepth = +inf; 12 | opts = vl_argparse(opts, varargin); 13 | 14 | n = numel(net.layers) ; 15 | 16 | gpuMode = isa(x, 'gpuArray') ; 17 | 18 | res(n+1).dzdx = dzdy ; 19 | for i=n:-1:max(1, n-opts.backPropDepth+1) 20 | l = net.layers{i} ; 21 | res(i).backwardTime = tic ; 22 | switch l.type 23 | case 'conv' 24 | if ~opts.accumulate 25 | [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ... 26 | vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, ... 27 | res(i+1).dzdx, ... 28 | 'pad', l.pad, 'stride', l.stride) ; 29 | else 30 | dzdw = cell(1,2) ; 31 | [res(i).dzdx, dzdw{1}, dzdw{2}] = ... 32 | vl_nnconv(res(i).x, l.weights{1}, l.weights{2}, ... 33 | res(i+1).dzdx, ... 34 | 'pad', l.pad, 'stride', l.stride) ; 35 | for j=1:2 36 | res(i).dzdw{j} = res(i).dzdw{j} + dzdw{j} ; 37 | end 38 | clear dzdw ; 39 | end 40 | 41 | case 'convPtr' 42 | id = l.index; 43 | [res(i).dzdx, filterGradients, biasGradients] = ... 44 | vl_nnconv(res(i).x, trainableLayers{id}.weights{1}, trainableLayers{id}.weights{2}, ... 45 | res(i+1).dzdx, ... 46 | 'pad', trainableLayers{id}.pad, 'stride', trainableLayers{id}.stride) ; 47 | gradients{id}.dzdw{1} = gradients{id}.dzdw{1} + filterGradients; 48 | gradients{id}.dzdw{2} = gradients{id}.dzdw{2} + biasGradients; 49 | 50 | case 'pool' 51 | res(i).dzdx = vl_nnpool(res(i).x, l.pool, res(i+1).dzdx, ... 52 | 'pad', l.pad, 'stride', l.stride, 'method', l.method) ; 53 | case 'normalize' 54 | res(i).dzdx = vl_nnnormalize(res(i).x, l.param, res(i+1).dzdx) ; 55 | case 'softmax' 56 | res(i).dzdx = vl_nnsoftmax(res(i).x, res(i+1).dzdx) ; 57 | case 'loss' 58 | res(i).dzdx = vl_nnloss(res(i).x, l.class, res(i+1).dzdx) ; 59 | case 'softmaxloss' 60 | res(i).dzdx = vl_nnsoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ; 61 | case 'svmloss_multiclass' 62 | res(i).dzdx = vl_nnsvmloss(res(i).x, l.class, res(i+1).dzdx) ; 63 | case 'relu' 64 | if ~isempty(res(i).x) 65 | res(i).dzdx = vl_nnrelu(res(i).x, res(i+1).dzdx) ; 66 | else 67 | % if res(i).x is empty, it has been optimized away, so we use this 68 | % hack (which works only for ReLU): 69 | res(i).dzdx = vl_nnrelu(res(i+1).x, res(i+1).dzdx) ; 70 | end 71 | case 'sigmoid' 72 | res(i).dzdx = vl_nnsigmoid(res(i).x, res(i+1).dzdx) ; 73 | case 'noffset' 74 | res(i).dzdx = vl_nnnoffset(res(i).x, l.param, res(i+1).dzdx) ; 75 | case 'spnorm' 76 | res(i).dzdx = vl_nnspnorm(res(i).x, l.param, res(i+1).dzdx) ; 77 | case 'dropout' 78 | if opts.disableDropout 79 | res(i).dzdx = res(i+1).dzdx ; 80 | else 81 | res(i).dzdx = vl_nndropout(res(i).x, res(i+1).dzdx, 'mask', res(i+1).aux) ; 82 | end 83 | case 'bnorm' 84 | if ~opts.accumulate 85 | if isfield(l, 'weights') 86 | [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ... 87 | vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}, ... 88 | res(i+1).dzdx) ; 89 | else 90 | [res(i).dzdx, res(i).dzdw{1}, res(i).dzdw{2}] = ... 91 | vl_nnbnorm(res(i).x, l.filters, l.biases, ... 92 | res(i+1).dzdx) ; 93 | end 94 | else 95 | dzdw = cell(1,2) ; 96 | if isfield(l, 'weights') 97 | [res(i).dzdx, dzdw{1}, dzdw{2}] = ... 98 | vl_nnbnorm(res(i).x, l.weights{1}, l.weights{2}, ... 99 | res(i+1).dzdx) ; 100 | else 101 | [res(i).dzdx, dzdw{1}, dzdw{2}] = ... 102 | vl_nnbnorm(res(i).x, l.filters, l.biases, ... 103 | res(i+1).dzdx) ; 104 | end 105 | for j=1:2 106 | res(i).dzdw{j} = res(i).dzdw{j} + dzdw{j} ; 107 | end 108 | clear dzdw ; 109 | end 110 | case 'pdist' 111 | res(i).dzdx = vl_nnpdist(res(i).x, l.p, res(i+1).dzdx, ... 112 | 'noRoot', l.noRoot, 'epsilon', l.epsilon) ; 113 | case 'custom' 114 | res(i) = l.backward(l, res(i), res(i+1)) ; 115 | end 116 | if opts.conserveMemory 117 | res(i+1).dzdx = [] ; 118 | end 119 | if gpuMode & opts.sync 120 | wait(gpuDevice) ; 121 | end 122 | res(i).backwardTime = toc(res(i).backwardTime) ; 123 | end 124 | 125 | end 126 | -------------------------------------------------------------------------------- /demo_new_images.m: -------------------------------------------------------------------------------- 1 | % Demo code 2 | % Tuan-Hung Vu, Anton Osokin, Ivan Laptev, Context-aware CNNs for person head detection, ICCV 2015 3 | % This script shows how to try our local model on some new images. 4 | 5 | % SETUP THESE PATHS TO RUN THE CODE 6 | dataPath = 'data/new_data'; 7 | resultPath = 'results/new_data'; 8 | 9 | % Put the images into 'data/new_data/images' 10 | % To get an example image run 11 | % wget -P data/new_data/images http://tech.velmont.net/files/2009/04/lenna-lg.jpg 12 | 13 | %% Setup 14 | matconvnetPath = '~/local/software/matlab_toolboxes/matconvnet-1.0-beta18'; 15 | setup( matconvnetPath ); 16 | 17 | % cudaRoot = '/usr/cuda-7.0' ; 18 | % compile_mex(cudaRoot); 19 | 20 | % Assumes that Selective search is downloaded like this: 21 | % wget http://huppelen.nl/publications/SelectiveSearchCodeIJCV.zip 22 | % unzip SelectiveSearchCodeIJCV.zip 23 | addpath('SelectiveSearchCodeIJCV', 'SelectiveSearchCodeIJCV/Dependencies'); 24 | 25 | 26 | if ~exist(fullfile(dataPath, 'splits'), 'dir') 27 | mkdir(fullfile(dataPath, 'splits')); 28 | end 29 | if ~exist(fullfile(dataPath, 'candidates'), 'dir') 30 | mkdir(fullfile(dataPath, 'candidates')); 31 | end 32 | 33 | all_files = dir(fullfile(dataPath, 'images')); 34 | image_names = cell(0,0); 35 | for i_file = 1 : length(all_files) 36 | if length(all_files(i_file).name) > 2 37 | [~, cur_image_name, ~] = fileparts(all_files(i_file).name); 38 | image_names{end+1} = cur_image_name; 39 | end 40 | end 41 | writeLines( fullfile(dataPath, 'splits', 'test.txt'), image_names ); 42 | 43 | % network to evaluate 44 | netFile = fullfile( 'models', 'local.mat' ); 45 | 46 | % file to store the scores 47 | resultFile = fullfile( resultPath, 'local', 'localModel-scores-test.mat' ); 48 | 49 | % Only test images are used 50 | scoreSubset = 3; 51 | 52 | %% Compute candidates by using Selective Search on all the images 53 | image_files = dir(fullfile(dataPath, 'images','*.jpg')); 54 | fprintf('Running Selective Search on %d images\n', length(image_files)); 55 | for i_image = 1 : length(image_files) 56 | fprintf('Image %d of %d\n', i_image, length(image_files)); 57 | curImage = imread(fullfile(dataPath, 'images', image_files(i_image).name)); 58 | boxes = selective_search_boxes(curImage, true); 59 | [~,image_name,~] = fileparts(image_files(i_image).name); 60 | save( fullfile(dataPath, 'candidates', [image_name, '.mat']), 'boxes') 61 | end 62 | 63 | %% parameters 64 | opts_cnn = struct; 65 | opts_cnn.dataPath = dataPath; 66 | opts_cnn.dataset.testSetFile = fullfile('splits', 'test.txt'); 67 | opts_cnn.dataset.imageLocalPrefix = 'images'; 68 | opts_cnn.dataset.candidateLocalPrefix = 'candidates'; 69 | opts_cnn.expDir = resultPath; 70 | opts_cnn.scoreMode = 'scoreDifference'; % 'beforeSoftMax' or 'afterSoftMax' or 'scoreDifference'; 71 | 72 | %% load dataset 73 | opts_cnn.imdbPath = fullfile(opts_cnn.expDir, 'imdb.mat'); 74 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 75 | imdb = cnn_prepareData_localModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 76 | imdb.opts = opts_cnn.dataset; 77 | if ~exist(opts_cnn.expDir, 'dir') 78 | mkdir(opts_cnn.expDir); 79 | end 80 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 81 | 82 | %% load network 83 | net = load( netFile, '-mat'); 84 | if ~isfield(net, 'layers') && isfield(net, 'net') 85 | net = net.net; 86 | end 87 | if ~isfield(net, 'layers') 88 | error('Could not load the network!'); 89 | end 90 | net = vl_simplenn_move(net, 'gpu'); 91 | 92 | %% select the set of images to run evaluation 93 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 94 | for iSubset = 1 : numel( scoreSubset ) 95 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 96 | end 97 | imageSetToDoPr = find( imageSetToDoPr ); 98 | 99 | %% start the evaluation 100 | opts_cnn.getBatchEvaluation = struct; 101 | opts_cnn.getBatchEvaluation.cropMode = 'warp'; 102 | opts_cnn.getBatchEvaluation.cropPad = [18, 18, 18, 18]; 103 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 104 | opts_cnn.getBatchEvaluation.maxGpuImages = 128; 105 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 106 | opts_cnn.getBatchEvaluation.jitterStd = 0; 107 | opts_cnn.getBatchEvaluation.numPatchesPerImage = inf; 108 | 109 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_localModel(imdb, batch, ... 110 | opts_cnn.getBatchEvaluation) ; 111 | 112 | [scores, candidateIds] = cnn_computeScores_localModel( net, imdb, batchWrapperEvaluation, ... 113 | 'imageSet', imageSetToDoPr, ... 114 | 'gpuBatchSize', opts_cnn.getBatchEvaluation.maxGpuImages, ... 115 | 'scoreMode', opts_cnn.scoreMode ); 116 | 117 | if exist(resultFile, 'file') 118 | warning('The results file already exists. Overwriting!'); 119 | end 120 | if ~exist(fileparts(resultFile), 'dir') 121 | mkdir(fileparts(resultFile)); 122 | end 123 | save( resultFile, 'scores', 'candidateIds', '-v7.3' ); 124 | 125 | %% vizualize the detections 126 | iImage = 1; 127 | curImage = imread( fullfile(opts_cnn.dataPath, imdb.imageFiles{iImage}) ); 128 | curCandidates = load( fullfile(opts_cnn.dataPath, imdb.candidateFiles{iImage}), 'boxes' ); 129 | curCandidates = convertBb_Y1X1Y2X2_to_X1Y1WH(curCandidates.boxes); 130 | 131 | idsNms = selectBoundingBoxesNonMaxSup( curCandidates(candidateIds{iImage},:), scores{iImage}); 132 | 133 | toPlotCandidates = curCandidates( candidateIds{iImage}(idsNms(1)), :); 134 | 135 | imageWithBoxes = showBoundingBoxes(curImage, toPlotCandidates, 'y'); 136 | imshow(imageWithBoxes); 137 | -------------------------------------------------------------------------------- /utils/cropImagePatches.m: -------------------------------------------------------------------------------- 1 | function [cropsResized, preparationTime, resizeTime] = cropImagePatches(curImage, boundingBoxes, cropPad, outputSize, maxGpuImages, cropMode, jitterStd) 2 | %cropImagePatches crops patches from an image and resizes them to the standard size 3 | % 4 | % cropsResized = cropImagePatches(curImage, boundingBoxes, cropPad, outputSize); 5 | % [cropsResized, cropTime, resizeTime] = cropImagePatches(curImage, boundingBoxes, cropPad, outputSize, maxGpuImages, cropMode, jitterStd); 6 | % 7 | % Input: 8 | % cropImage - image to crop from (has to be single) 9 | % boundingBoxes - bounding boxes in format [y1 x1 y2 x2]. x is col, y is row 10 | % cropPad - padding of the bounding boxes (padding is measure after crop and resize), format: left, top, right, bottom 11 | % outputSize - target size of the crops 12 | % maxGpuImages - maximal number of patches to crop on a GPU at the same time (default : 256) 13 | % cropMode - 'square' or 'warp' (mimicing the R-CNN cropping code) (default: 'warp') 14 | % jitterStd - ammount of jittering to do: std of a gaussian distribution of a patch-border shift (default: 0) 15 | % 16 | % Output: 17 | % cropsResized - the cropped patches 18 | % preparationTime - time on the computations of the crop parameters 19 | % resizeTime - time spent on cropping and resizing 20 | % 21 | % This function depends on cropRectanglesMex 22 | 23 | tStart = tic; 24 | 25 | if ~exist( 'maxGpuImages', 'var') || isempty(maxGpuImages) 26 | maxGpuImages = 256; 27 | end 28 | if ~exist( 'cropMode', 'var') || isempty(cropMode) 29 | cropMode = 'warp'; 30 | end 31 | if ~exist('jitterStd', 'var') || isempty(jitterStd) 32 | jitterStd = 0; 33 | end 34 | useSquare = false; 35 | if isequal( cropMode, 'square') 36 | useSquare = true; 37 | end 38 | 39 | numChannels = size(curImage, 3); 40 | numBoxes = size( boundingBoxes, 1 ); 41 | if numBoxes <= maxGpuImages 42 | resultOnGpuGlag = true; 43 | else 44 | resultOnGpuGlag = false; 45 | end 46 | 47 | cropBoxes = nan(numBoxes, 4); 48 | for iBox = 1 : numBoxes 49 | %% get crop position: take padding into account 50 | leftBorder = boundingBoxes(iBox, 2); 51 | rightBorder = boundingBoxes(iBox, 4); 52 | 53 | topBorder = boundingBoxes(iBox, 1); 54 | bottomBorder = boundingBoxes(iBox, 3); 55 | 56 | % add the square mode 57 | if useSquare 58 | halfWidth = (rightBorder - leftBorder + 1 ) / 2; 59 | halfHeight = (bottomBorder - topBorder + 1 ) / 2; 60 | 61 | centerX = leftBorder + halfWidth; 62 | centerY = topBorder + halfHeight; 63 | 64 | if halfHeight > halfWidth 65 | halfWidth = halfHeight; 66 | else 67 | halfHeight = halfWidth; 68 | end 69 | 70 | topBorder = centerY - halfHeight; 71 | bottomBorder = centerY + halfHeight; 72 | leftBorder = centerX - halfWidth; 73 | rightBorder = centerX + halfWidth; 74 | end 75 | 76 | % compute the transformation 77 | posOld = [ topBorder, leftBorder, 1; ... % top left corner 78 | topBorder, rightBorder, 1; ... % top right corner 79 | bottomBorder, leftBorder, 1; ... % bottom left corner 80 | bottomBorder, rightBorder, 1; ... % bottom right corner 81 | ]'; 82 | 83 | % crop corners after padding 84 | posNew = [ 1 + cropPad(2), 1 + cropPad(1), 1; ... % top left corner 85 | 1 + cropPad(2), outputSize(2) - cropPad(3), 1; ... % top right corner 86 | outputSize(1) - cropPad(4), 1 + cropPad(1), 1; ... % bottom left corner 87 | outputSize(1) - cropPad(4), outputSize(2)- cropPad(3), 1; ... % bottom right corner 88 | ]'; 89 | 90 | % solve linear system 91 | M = posOld / posNew; 92 | 93 | % actual crop corners 94 | posNew = [ 1, 1, 1; ... % top left corner 95 | 1, outputSize(2), 1; ... % top right corner 96 | outputSize(1), 1, 1; ... % bottom left corner 97 | outputSize(1), outputSize(2), 1; ... % bottom right corner 98 | ]'; 99 | 100 | cropPos = (M * posNew)'; 101 | 102 | leftBorder = (cropPos(1, 2) + cropPos(3, 2)) / 2; 103 | rightBorder = (cropPos(2, 2) + cropPos(4, 2)) / 2; 104 | 105 | topBorder = (cropPos(1, 1) + cropPos(2, 1)) / 2; 106 | bottomBorder = (cropPos(3, 1) + cropPos(4, 1)) / 2; 107 | 108 | cropBoxes(iBox, 1) = topBorder; 109 | cropBoxes(iBox, 2) = leftBorder; 110 | cropBoxes(iBox, 3) = bottomBorder; 111 | cropBoxes(iBox, 4) = rightBorder; 112 | 113 | end 114 | 115 | %% apply jittering 116 | borderNoise = randn( size( cropBoxes) ) * jitterStd; 117 | cropBoxes = cropBoxes + borderNoise; 118 | % check if the bouding boxes are still valid. If not (might happen if the size of the box is too small) than remove jittering 119 | badMask = cropBoxes(:, 1) > cropBoxes(iBox, 3) | cropBoxes(:, 2) > cropBoxes(iBox, 4); 120 | cropBoxes( badMask, : ) = cropBoxes( badMask, : ) - borderNoise( badMask, : ); 121 | 122 | preparationTime = toc(tStart); 123 | 124 | %% crop the image 125 | if ~isequal( class( curImage ), 'single') 126 | curImage = single(curImage); 127 | end 128 | if resultOnGpuGlag 129 | cropsResized = cropRectanglesMex( curImage, cropBoxes, outputSize ); 130 | cropsResized = single(cropsResized); 131 | else 132 | cropsResized = zeros(outputSize(1), outputSize(2), numChannels, numBoxes, 'single'); 133 | for iBatchStart = 1 : maxGpuImages : numBoxes 134 | curIds = iBatchStart : 1 : min( numBoxes, iBatchStart + maxGpuImages - 1); 135 | curBoxes = cropBoxes( curIds, : ); 136 | curCrops = cropRectanglesMex( curImage, curBoxes, outputSize ); 137 | cropsResized(:,:,:,curIds) = gather(curCrops); 138 | end 139 | cropsResized = single(cropsResized); 140 | end 141 | 142 | resizeTime = toc(tStart) - preparationTime; 143 | 144 | end 145 | -------------------------------------------------------------------------------- /pairwiseModel/run_computeScores_pairwiseModel_Casablanca.m: -------------------------------------------------------------------------------- 1 | %run_computeScores_pairwiseModel applies the pairwise model to the bounding-box proposals to compute their scores 2 | 3 | % SETUP THESE PATHS TO RUN THE CODE 4 | dataPath = 'data/Casablanca'; 5 | resultPath = 'results/Casablanca'; 6 | 7 | % network to evaluate 8 | netFile = fullfile( 'models', 'pairwise.mat' ); 9 | 10 | % file to store the scores 11 | resultFile = fullfile( resultPath, 'pairwise', 'pairwiseModel-scores-test.mat' ); 12 | 13 | % Casablanca dataset contains only the test set, so scoreSubset has to be equal to 3 14 | scoreSubset = 3; 15 | 16 | %% setup paths 17 | filePath = fileparts( mfilename('fullpath') ); 18 | run( fullfile( fileparts( filePath ), 'setup.m' ) ); 19 | 20 | % scores to preselect candidates 21 | candidateSelectionScoresFile = fullfile( resultPath, 'local', 'localModel-scores-trainValTest.mat' ); 22 | if ~exist( candidateSelectionScoresFile, 'file' ) && isequal( scoreSubset, 3 ) 23 | candidateSelectionScoresFile = fullfile( resultPath, 'local', 'localModel-scores-test.mat' ); 24 | end 25 | 26 | % pairwise clusters 27 | if exist( fullfile(resultPath, 'pairwise', 'imdb_pairwise_precomputedClusters.mat'), 'file' ) 28 | % use the precomputed clusters 29 | clusters = load( fullfile(resultPath, 'pairwise', 'imdb_pairwise_precomputedClusters.mat'), 'clusterInfo', 'clusterFunction' ); 30 | else 31 | % compute the clustering on the fly 32 | clusters = struct; 33 | clusters.clusterInfo = struct('type', [], 'mean', [], 'std', [], 'numClusters', [], 'clusterCenters', [] ); 34 | clusters.clusterFunction = []; 35 | % CAUTION: you will need score of the local model on the training set to do this operation 36 | candidateSelectionScoresFile = fullfile( resultPath, 'local', 'localModel-scores-trainValTest.mat' ); 37 | end 38 | 39 | %% parameters 40 | opts_cnn = struct; 41 | opts_cnn.dataPath = dataPath; 42 | opts_cnn.dataset.testSetFile = fullfile('Splits', 'test.txt'); 43 | opts_cnn.dataset.groundTruthLocalPrefix = 'Annotations'; 44 | opts_cnn.dataset.imageLocalPrefix = 'JPEGImages'; 45 | opts_cnn.dataset.candidateLocalPrefix = 'Candidates'; 46 | opts_cnn.dataset.scoreFile = candidateSelectionScoresFile; 47 | opts_cnn.dataset.maxNumPatchesPerImage = 16; 48 | opts_cnn.dataset.nmsIntersectionOverAreaThreshold = 0.3; 49 | opts_cnn.dataset.numPairwiseClusters = 20; 50 | opts_cnn.dataset.clusterInfo = clusters.clusterInfo; 51 | opts_cnn.dataset.clusterFunction = clusters.clusterFunction; 52 | 53 | opts_cnn.expDir = resultPath; 54 | opts_cnn.scoreMode = 'maxMarginals'; 55 | 56 | opts_cnn.evaluation.iouThreshold = 0.5; 57 | opts_cnn.evaluation.useDifficultImages = false; 58 | 59 | %% load dataset 60 | opts_cnn.imdbPath = fullfile( opts_cnn.expDir, 'imdb_pairwise.mat' ); 61 | if exist(opts_cnn.imdbPath, 'file') 62 | fprintf('Reading imdb file %s\n', opts_cnn.imdbPath); 63 | imdb = load(opts_cnn.imdbPath); 64 | if isfield(imdb, 'opts') && isequal( imdb.opts, opts_cnn.dataset ) 65 | fprintf('imdb.opts is compatible with the opt_cnn.dataset\n') 66 | else 67 | warning('opts_cnn.dataset parameters are not compatible with the provided imdb file. Be careful!'); 68 | end 69 | else 70 | warning('imdb file is not found. Making it will take some time.'); 71 | fprintf('Generating imdb file %s\n', opts_cnn.imdbPath); 72 | imdb = cnn_prepareData_pairwiseModel( opts_cnn.dataset, 'dataPath', opts_cnn.dataPath ); 73 | imdb.opts = opts_cnn.dataset; 74 | mkdir(opts_cnn.expDir); 75 | save(opts_cnn.imdbPath, '-struct', 'imdb', '-v7.3') ; 76 | end 77 | 78 | %% prepare the network 79 | net = load( netFile, '-mat'); 80 | if ~isfield(net, 'layers') && isfield(net, 'net') 81 | net = net.net; 82 | end 83 | if ~isfield(net, 'layers') 84 | error('Could not load the network!'); 85 | end 86 | net = vl_simplenn_move(net, 'gpu'); 87 | 88 | %% select the set of images to run evaluation 89 | imageSetToDoPr = false( numel( imdb.images.set ), 1 ); 90 | for iSubset = 1 : numel( scoreSubset ) 91 | imageSetToDoPr = imageSetToDoPr | ( imdb.images.set(:) == scoreSubset( iSubset ) ); 92 | end 93 | imageSetToDoPr = find( imageSetToDoPr ); 94 | 95 | %% start the evaluation 96 | opts_cnn.getBatchEvaluation = struct; 97 | opts_cnn.getBatchEvaluation.meanImage = net.normalization.averageImage; 98 | opts_cnn.getBatchEvaluation.jitterStd = 0; 99 | opts_cnn.getBatchEvaluation.iouPositiveNegativeThreshold = opts_cnn.evaluation.iouThreshold; 100 | opts_cnn.getBatchEvaluation.dataPath = opts_cnn.dataPath; 101 | opts_cnn.getBatchEvaluation.randomizeCandidates = false; 102 | opts_cnn.getBatch.cropMode = 'warp'; 103 | opts_cnn.getBatch.cropPad = [18, 18, 18, 18]; 104 | 105 | batchWrapperEvaluation = @(imdb, batch) cnn_getBatch_pairwiseModel(imdb, batch, opts_cnn.getBatchEvaluation); 106 | 107 | [scores, candidateIds] = cnn_computeScores_pairwiseModel( net, imdb, batchWrapperEvaluation, ... 108 | 'imageSet', imageSetToDoPr, ... 109 | 'scoreMode', opts_cnn.scoreMode ); 110 | 111 | if exist(resultFile, 'file') 112 | warning('The results file already exists. Overwriting!'); 113 | end 114 | if ~exist(fileparts(resultFile), 'dir') 115 | mkdir(fileparts(resultFile)); 116 | end 117 | save( resultFile, 'scores', 'candidateIds', '-v7.3' ); 118 | 119 | %% save detections to files 120 | detSavePath = fullfile( resultPath, 'pairwise', 'dets'); 121 | if ~exist(detSavePath, 'dir') 122 | mkdir(detSavePath) 123 | end 124 | detSaveFormat = fullfile(detSavePath, '%s.mat'); 125 | disp('Saving detection files'); 126 | for i=1:length(imageSetToDoPr) 127 | imgIdx = imageSetToDoPr(i); 128 | cand_path = fullfile(dataPath, imdb.candidateFiles{imgIdx}); 129 | load(cand_path); 130 | %load candidate 131 | BB = [convertBb_X1Y1X2Y2_to_X1Y1WH(boxes(candidateIds{imgIdx},[2 1 4 3])) scores{imgIdx}]; 132 | [~, im_name, ~] = fileparts(imdb.imageFiles{imgIdx}); 133 | savePath = sprintf(detSaveFormat, im_name); 134 | save(savePath, 'BB'); 135 | end 136 | --------------------------------------------------------------------------------