├── charDetMixtures.m ├── charDetVideo.m ├── chardetexp.m ├── cluster_traindata.m ├── computeWordFeatures.m ├── configsgen.m ├── connect.m ├── connect_test.m ├── data ├── .svn │ ├── entries │ ├── prop-base │ │ ├── demo.jpg.svn-base │ │ ├── fern_synth.mat.svn-base │ │ ├── svm_icdar.mat.svn-base │ │ └── svm_svt.mat.svn-base │ └── text-base │ │ ├── demo.jpg.svn-base │ │ ├── fern_synth.mat.svn-base │ │ ├── svm_icdar.mat.svn-base │ │ └── svm_svt.mat.svn-base ├── demo.jpg ├── fern_synth.mat ├── models_real_nomixture.mat ├── related_matrix.mat ├── svm_icdar.mat ├── svm_svt.mat └── validfonts.mat ├── demo ├── demoIcdar.m ├── demoImg.m └── demoSvt.m ├── detection ├── charDet.m ├── charDetSVM.m ├── detect.m ├── featpyramid.m ├── get_filter_responses.m └── wordDet.m ├── evaluation ├── Fscore.m ├── evalReading.m ├── genPrCurves.m └── genPrCurvesEZ.m ├── experiments ├── chardet_exp.m ├── precompFullImage.m ├── precompSwtAbbyy.m ├── precompSwtPlex.m ├── runFullVidReal.m ├── run_chardet_exp.m └── yt_experiments.m ├── figure_production ├── createTable1.m ├── createTable2.m └── createTable2Abbyy.m ├── fscore2.m ├── genHistGraph.m ├── genLexIcdar.m ├── getHeatmapMixtures.m ├── get_filter_responses.m ├── globals.m ├── lexicon ├── loadLex.m └── procAbbyy.m ├── libraries ├── bin │ ├── .gitignore │ ├── bounded_dt.mexa64 │ ├── cascade.mexa64 │ ├── compute_overlap.mexa64 │ ├── dt.mexa64 │ ├── fconv.mexa64 │ ├── fconv_var_dim.mexa64 │ ├── features.mexa64 │ ├── fv_cache.mexa64 │ ├── get_detection_trees.mexa64 │ └── resize.mexa64 ├── gdetect │ ├── bounded_dt.cc │ ├── compute_overlap.cc │ ├── compute_overlaps.m │ ├── dt.cc │ ├── fconv_var_dim.cc │ ├── fconv_var_dim_MT.cc │ ├── fconvsse.cc │ ├── gdetect.m │ ├── gdetect_dp.m │ ├── gdetect_dp.m~ │ ├── gdetect_parse.m │ ├── gdetect_pos.m │ ├── gdetect_pos_prepare.m │ ├── gdetect_write.m │ ├── get_detection_trees.cc │ ├── imgdetect.m │ ├── loss_func.m │ ├── loss_pyramid.m │ ├── tree_mat_to_struct.m │ ├── validate_levels.m │ └── write_zero_fv.m └── vis │ ├── HOGpicture.m │ ├── draw_wire_frame.m │ ├── foldHOG.m │ ├── showboxes.m │ ├── vis_derived_filter.m │ ├── vis_grammar.m │ ├── visualizeHOG.m │ └── visualizemodel.m ├── loadBB.m ├── loadMixtureModels.m ├── loadModels.m ├── misc ├── checkValidGt.m ├── equivClass.m ├── filterValidGt.m ├── findRanks.m ├── hogOld.m ├── readAllImgs.m ├── readSwt.m ├── spellCheck.m └── writeAllImgs.m ├── mixture_models └── A_1.mat ├── models_real_nomixture.mat ├── plex_icdar_video.m ├── prep_script ├── prepC74k.m ├── prepIcdar.m ├── prepMsrc.m ├── prepSvt.m ├── prepSynthEasy.m └── prepSynthHard.m ├── readVideoFromFrames.m ├── readme.txt ├── results └── fscores.mat ├── runExp.m ├── run_chardetSVM_exp.m ├── sandbox ├── 0.jpg ├── 148.jpg ├── demoYoutube.m └── result_analysis.m ├── sw.m ├── synthesis ├── genChar.m └── validateFonts.m ├── third_party ├── .svn │ ├── entries │ └── text-base │ │ └── EditDist.m.svn-base └── EditDist.m ├── todo.txt ├── train ├── extractAndTrain.m ├── get_negative.m ├── mine_negative.m ├── mining_hard_negative.m ├── trainAll.m ├── trainChar.m ├── trainClassifier.m ├── trainMixtures.m ├── trainRoot.m ├── trainSVM.m ├── train_cluster.m └── train_temporal_weights.m ├── training ├── trainChClfs.m ├── trainRescore.m └── trainWdClfs.m ├── trainscript.m ├── visualization ├── charDetDraw.m ├── displayTopDet.m ├── visualizeHOG.m ├── visualizeModel.m ├── visualizeResults.m └── wordDetDraw.m ├── wordNms.m └── wordSpot.m /charDetMixtures.m: -------------------------------------------------------------------------------- 1 | function varargout=charDetMixtures(action,varargin) 2 | % Detect characters using multiple mixtures of a model 3 | % 4 | % INPUTS 5 | % frames: [4-D] array containing the frames in order 6 | % 7 | % OUTPUTS: 8 | % bbs_videos: matrix of bounding box output 9 | % Usage 10 | % hms = charDetVideo('gethm',videos,models) 11 | % bbs = charDetVideo('getbbs',hms,models) 12 | 13 | varargout = cell(1,max(1,nargout)); 14 | [varargout{:}] = feval(action,varargin{:}); 15 | end 16 | 17 | % This function returns the heatmap of the videos 18 | function [hms,scales]=gethm(I,models) 19 | % INPUTS: 20 | % I - image 21 | % models - two layers cell-array, models{charindex}{mixtureindex} 22 | % 23 | % OUTPUTS: 24 | % hms - one layer cell-array, hms{charindex} the heatmap response is 25 | % combined through taking the max of all the response. 26 | % scales - the scales used for detections 27 | configs = configsgen; 28 | [hms,scales] = getHeatmapMixtures(I,models,configs); 29 | end 30 | 31 | function bbs=getbbs(I,models,hmsPath) 32 | % Return the bbs from the previously collected hms 33 | % 34 | % INPUTS: 35 | % I - image 36 | % models - two layers cell-array, models{charindex,mixtureindex} 37 | % 38 | % OUTPUTS: 39 | % hms - one layer cell-array, hms{charindex} the heatmap response is 40 | % combined through taking the max of all the response. 41 | % scales - the scales used for detections 42 | configs = configsgen; 43 | % load the hms 44 | lstruct = load(hmsPath); 45 | hms = lstruct.hms; 46 | scales = lstruct.scales; 47 | bbs = getbbsHelper(models,hms,scales,configs); 48 | end 49 | 50 | function bbs=getbbsHelper(models,hms,scales,configs) 51 | initthres = configs.initThres; 52 | total_bbs = zeros(1e6,6); 53 | total = 0; 54 | nScales = length(scales); 55 | for iLevel=1:nScales 56 | hmsAtCurrentScale = hms{iLevel}; 57 | if isempty(hmsAtCurrentScale); continue; end; 58 | current_scale = scales(iLevel); 59 | for iModel = 1:size(models,1) 60 | currentModel = models{iModel,1}; 61 | charDims = currentModel.char_dims; 62 | hm = hmsAtCurrentScale{iModel} + currentModel.bias; 63 | 64 | ind = find(hm > initthres); % Get the locations of the response 65 | [y,x] = ind2sub(size(hm),ind); 66 | if (isempty(x)); continue; end; 67 | 68 | scores = hm(ind); 69 | if (size(x,2) > 1); x = x'; y = y'; scores = scores'; end 70 | %assert(length(x)==length(scores)); 71 | 72 | %Correct the position 73 | x = x * configs.bin_size/current_scale; 74 | y = y * configs.bin_size/current_scale; 75 | width = floor(charDims(2)/current_scale); 76 | height = floor(charDims(1)/current_scale); 77 | 78 | ind = strfind(configs.alphabets,currentModel.char_index); 79 | bbType = ones(length(x),1)*ind; 80 | bbs = [x,y,repmat(width,length(x),1),repmat(height,... 81 | length(x),1),scores,bbType]; 82 | 83 | currentCount = size(bbs,1); 84 | if currentCount > 0 85 | total_bbs(total+1:total+currentCount,:) = bbs; 86 | total = total + currentCount; 87 | end 88 | end 89 | end 90 | 91 | bbs=total_bbs(1:total,:); 92 | end -------------------------------------------------------------------------------- /chardetexp.m: -------------------------------------------------------------------------------- 1 | % Given that you have mixtures of models, this script is to run and output 2 | % the performance on the ICDAR dataset 3 | configs=configsgen; 4 | dataset_path = fullfile(configs.icdar,'test'); 5 | imagesPath = fullfile(dataset_path,'images'); 6 | modelPath = 'mixture_models'; 7 | gtDir = fullfile(configs.icdar,'test','charAnn'); 8 | dtDir = fullfile(configs.icdar,'test','det_results_synth'); 9 | dtDirMix = fullfile(configs.icdar,'test','det_results_mix'); 10 | dtDirReal = fullfile(configs.icdar,'test','det_results_real'); 11 | 12 | %% load the models 13 | models = loadMixtureModels(modelPath); 14 | 15 | %% get the image list 16 | imgLst = dir(fullfile(imagesPath,'*.jpg')); 17 | nImg = length(imgLst); 18 | hmsPath = fullfile('hms'); 19 | if ~exist(hmsPath,'dir'); mkdir(hmsPath); end 20 | 21 | %% Get the heatmap 22 | savehms = @(sf,hms,scales) save(sf,'hms','scales'); 23 | parfor i=1:nImg 24 | i 25 | savePath = fullfile(hmsPath,sprintf('%s.mat',imgLst(i).name)) 26 | if exist(savePath,'file'); continue; end; 27 | imgPath = fullfile(imagesPath,imgLst(i).name); 28 | I = imread(imgPath); 29 | tic; [hms,scales] = charDetMixtures('gethm',I,models); toc; 30 | savehms(savePath,hms,scales); 31 | end 32 | 33 | %% Get the bbs 34 | savebbs = @(sf,bbs) save(sf,'bbs'); 35 | parfor i=1:nImg 36 | i 37 | imgPath = fullfile(imagesPath,imgLst(i).name); 38 | I = imread(imgPath); 39 | currentHmsPath = fullfile(hmsPath,sprintf('%s.mat',imgLst(i).name)); 40 | bbs = charDetMixtures('getbbs',I,models,currentHmsPath); 41 | bbs = bbNms(bbs,'type','max','overlap',.5,'separate',1); 42 | savePath = fullfile(dtDirMix,sprintf('%s.mat',imgLst(i).name)); 43 | savebbs(savePath,bbs); 44 | end 45 | 46 | %% Get the f-score 47 | fscores = zeros(length(configs.alphabets),2); 48 | %% 49 | beta = 2; 50 | for iChar = 1:length(configs.alphabets) 51 | try 52 | iChar 53 | currentChar = configs.alphabets(iChar); 54 | [gt0,~] = bbGt('loadAll',gtDir,[],{'lbls',currentChar}); 55 | 56 | fprintf('Load synth\n'); 57 | dtsynth = loadBB(dtDir,iChar); 58 | fprintf('Load real\n'); 59 | dtreal = loadBB(dtDirReal,iChar); 60 | 61 | % Computing score for synth 62 | [gts,dts] = bbGt( 'evalRes', gt0, dtsynth); 63 | [xss,yss,~]=bbGt('compRoc', gts, dts, 0); 64 | fs = fscore2(xss,yss,beta); 65 | fscores(iChar,1) = fs; 66 | 67 | % Computing score for real 68 | [gtr,dtr] = bbGt( 'evalRes', gt0, dtreal); 69 | [xsr,ysr,~]=bbGt('compRoc', gtr, dtr, 0); 70 | fs = fscore2(xsr,ysr,beta); 71 | fscores(iChar,2) = fs; 72 | fscores 73 | catch e 74 | e 75 | continue 76 | end 77 | end -------------------------------------------------------------------------------- /cluster_traindata.m: -------------------------------------------------------------------------------- 1 | function idx = cluster_traindata(features,K) 2 | [U,mu,~] = pca(features'); 3 | d=100; 4 | [~, Xhat, ~] = pcaApply(features', U, mu, K ); 5 | Xhat = Xhat'; 6 | rotated = Xhat(:,1:d); 7 | [ idx, ~, ~ ] = kmeans2( rotated, K); 8 | end -------------------------------------------------------------------------------- /computeWordFeatures.m: -------------------------------------------------------------------------------- 1 | function y=computeWordFeatures(word) 2 | % Compute word-level features for SVM 3 | % 4 | % USAGE 5 | % y = computeWordFeatures( word ) 6 | % 7 | % INPUTS 8 | % word - word object 9 | % 10 | % OUTPUTS 11 | % y - feature vector 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | y=[]; 20 | %%% unary features %%% 21 | % - mean detection score 22 | % - stdv detection score 23 | y=[y,word.bb(5)]; % 1 24 | y=[y,median(word.bbs(:,5))]; % 2 25 | y=[y,mean(word.bbs(:,5))]; % 3 26 | y=[y,min(word.bbs(:,5))]; % 4 27 | y=[y,std(word.bbs(:,5))]; % 5 28 | 29 | %%% pairwise features %%% 30 | y=[y,pairwise(word.bbs)]; % 6,7,8 31 | 32 | %%% global features %%% 33 | 34 | % - horizontal std 35 | % - min(hspace)/max(hspace) 36 | % - vertical std 37 | % - min(vspace)/max(vspace) 38 | % - scale std 39 | % - min(height)/max(height) 40 | hgaps=abs((word.bbs(2:end,1)+word.bbs(2:end,3)/2)... 41 | - (word.bbs(1:end-1,1)+word.bbs(1:end-1,3)/2))... 42 | ./min([word.bbs(2:end,3),word.bbs(1:end-1,3)],[],2); 43 | vgaps=abs((word.bbs(2:end,2)+word.bbs(2:end,4)/2)... 44 | - (word.bbs(1:end-1,2)+word.bbs(1:end-1,4)/2))... 45 | ./min([word.bbs(2:end,4),word.bbs(1:end-1,4)],[],2); 46 | y=[y,std(hgaps)]; % 9 10 47 | if(min(hgaps)==0), y=[y,max(hgaps)/.01]; else y=[y,max(hgaps)/min(hgaps)]; end 48 | y=[y,std(vgaps)]; % 11 12 49 | if(min(vgaps)==0), y=[y,max(vgaps)/.01]; else y=[y,max(vgaps)/min(vgaps)]; end 50 | y=[y,std(word.bbs(:,3))]; % 13 51 | y=[y,max(word.bbs(:,3))/min(word.bbs(:,3))]; % 14 52 | 53 | y=[y,size(word.bbs,1)]; % 15 54 | y=[y,sum(word.bbs(:,5))]; % 16 55 | y=[y,min(word.bbs(:,3))/word.bb(:,3)]; % 17 56 | 57 | end 58 | 59 | function y=pairwise(bbs) 60 | 61 | y=[]; 62 | xdistSum=0; ydistSum=0; sdistSum=0; 63 | for i=1:size(bbs,1)-1; 64 | parbb=bbs(i,:); 65 | chibb=bbs(i+1,:); 66 | xcent=parbb(1,1)+parbb(1,3); 67 | ycent=parbb(1,2)+.5*parbb(1,4); 68 | xdist=abs(chibb(1,1)-xcent)/min(chibb(1,3),parbb(1,3)); 69 | if(chibb(1,1) .5 65 | votes(mi) = bbfr(mi,7); 66 | end 67 | end 68 | valid_votes = votes(votes>0); 69 | if isempty(valid_votes) 70 | id = curid; next_id = curid + 1; 71 | return 72 | end 73 | id = mode(valid_votes); 74 | end 75 | 76 | function r=overlap(a,b) 77 | u = bbApply('union',a,b); in = bbApply('intersect',a,b); 78 | r = bbApply('area',in)/bbApply('area',u); 79 | end 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /connect_test.m: -------------------------------------------------------------------------------- 1 | %% This script is to test the correctness of connect 2 | 3 | bbs = ... 4 | [1 1 10 10 1 1; 5 | 1 1 8 8 1 1; 6 | 5 5 20 20 1 1; 7 | 1 1 8 8 1 2; 8 | 2 2 11 11 2 3; 9 | 2 2 11 11 2 4; 10 | 2 2 11 11 2 5; 11 | 2 2 11 11 2 6]; 12 | 13 | x = connect(bbs); 14 | x(:,7) 15 | 16 | %% Need more tests 17 | 18 | bbs = ... 19 | [1 1 10 10 1 1; 20 | 1 1 8 8 1 1; 21 | 5 5 20 20 1 1; 22 | 1 1 8 8 1 2; 23 | 2 2 11 11 1 3; 24 | 2 2 11 11 1 4; 25 | 2 2 11 11 1 5; 26 | 10 10 3 3 1 16 27 | 11 10 1 1 1 17; 28 | 11 10 8 1 1 18; 29 | 2 2 11 11 1 19]; 30 | 31 | x = connect(bbs); % [1 2 3 2 1 1 1 4 5 6 7] 32 | x(:,7) -------------------------------------------------------------------------------- /data/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 20894 5 | svn+ssh://kai@lumo.ucsd.edu/projects/p1/svnroot/kai/iccv11/alpha/data 6 | svn+ssh://kai@lumo.ucsd.edu/projects/p1/svnroot 7 | 8 | 9 | 10 | 2011-10-25T04:13:51.784002Z 11 | 20893 12 | kai 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 52fe0c90-79fe-0310-8a18-a0b98ad248f8 28 | 29 | fern_synth.mat 30 | file 31 | 20903 32 | 33 | 34 | 35 | 2011-10-27T07:03:02.000000Z 36 | 86049aeafc0f1c0753dc9bae5e1a7e66 37 | 2011-10-31T00:39:33.308233Z 38 | 20903 39 | kai 40 | has-props 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 15906572 62 | 63 | pretrained_word_svm_icdar.mat 64 | file 65 | 20903 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | deleted 86 | 87 | svm_svt.mat 88 | file 89 | 20903 90 | 91 | 92 | 93 | 2011-10-12T18:41:18.000000Z 94 | bb4102fbc42a75884b14949e31d14760 95 | 2011-10-31T00:39:33.308233Z 96 | 20903 97 | kai 98 | has-props 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 993280 120 | 121 | demo.jpg 122 | file 123 | 20903 124 | 125 | 126 | 127 | 2011-10-19T17:43:49.000000Z 128 | d4eee05f71398f4a93854ccfa01cc7d1 129 | 2011-10-31T00:39:33.308233Z 130 | 20903 131 | kai 132 | has-props 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 170477 154 | 155 | pretrained_char_fern.mat 156 | file 157 | 20903 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | deleted 178 | 179 | svm_icdar.mat 180 | file 181 | 20903 182 | 183 | 184 | 185 | 2011-10-12T19:26:10.000000Z 186 | c38786b819b135cf553b90d98aaadfe1 187 | 2011-10-31T00:39:33.308233Z 188 | 20903 189 | kai 190 | has-props 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 742691 212 | 213 | pretrained_word_svm_svt.mat 214 | file 215 | 20903 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | deleted 236 | 237 | -------------------------------------------------------------------------------- /data/.svn/prop-base/demo.jpg.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /data/.svn/prop-base/fern_synth.mat.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /data/.svn/prop-base/svm_icdar.mat.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /data/.svn/prop-base/svm_svt.mat.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /data/.svn/text-base/demo.jpg.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/.svn/text-base/demo.jpg.svn-base -------------------------------------------------------------------------------- /data/.svn/text-base/fern_synth.mat.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/.svn/text-base/fern_synth.mat.svn-base -------------------------------------------------------------------------------- /data/.svn/text-base/svm_icdar.mat.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/.svn/text-base/svm_icdar.mat.svn-base -------------------------------------------------------------------------------- /data/.svn/text-base/svm_svt.mat.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/.svn/text-base/svm_svt.mat.svn-base -------------------------------------------------------------------------------- /data/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/demo.jpg -------------------------------------------------------------------------------- /data/fern_synth.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/fern_synth.mat -------------------------------------------------------------------------------- /data/models_real_nomixture.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/models_real_nomixture.mat -------------------------------------------------------------------------------- /data/related_matrix.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/related_matrix.mat -------------------------------------------------------------------------------- /data/svm_icdar.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/svm_icdar.mat -------------------------------------------------------------------------------- /data/svm_svt.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/svm_svt.mat -------------------------------------------------------------------------------- /data/validfonts.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/data/validfonts.mat -------------------------------------------------------------------------------- /demo/demoIcdar.m: -------------------------------------------------------------------------------- 1 | function demoIcdar(idx) 2 | % Demo of PLEX running on an image from the ICDAR dataset. 3 | % 4 | % USAGE 5 | % demoIcdar( idx ) 6 | % 7 | % INPUTS 8 | % idx - filenumber to test: 1-249 9 | % 10 | % EXAMPLE 11 | % demoIcdar(23); litter,colchester,borough [rt=~20s] 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | dPath=globals; 20 | 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | % Load image and request lexicon 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | I=imread(fullfile(dPath,'icdar','test','images',sprintf('I%05i.jpg',idx))); 25 | im(I); lexIn=input('Enter comma-separated strings for lexicon:','s'); 26 | lexS=textscan(lexIn,'%s','Delimiter',',')'; lexS=lexS{1}'; 27 | lexS=strtrim(lexS); 28 | if isempty(lexS), error('must enter lexicon\n'); end 29 | 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | % Load classifiers 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | % word threshold 34 | wordThr=-.75; 35 | % character fern 36 | clfPath=fullfile('data','fern_synth.mat'); 37 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 38 | fModel=load(clfPath); 39 | % word svm 40 | svmPath=fullfile('data','svm_icdar.mat'); 41 | if(~exist(svmPath,'file')), error('SVM MODEL DOES NOT EXIST?!\n'); end 42 | sModel=load(svmPath); wordSvm=sModel.pNms1; wordSvm.thr=wordThr; 43 | 44 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 45 | % Run word recognition (PLEX) 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | tic; words=wordSpot(I,lexS,fModel,wordSvm); toc 48 | wordDetDraw( words, 0, 0, 1, [0 1 0] ); 49 | -------------------------------------------------------------------------------- /demo/demoImg.m: -------------------------------------------------------------------------------- 1 | function demoImg 2 | % Demo of PLEX running on an image in the data folder 3 | % 4 | % USAGE 5 | % demoImg 6 | % 7 | % CREDITS 8 | % Written and maintained by Kai Wang and Boris Babenko 9 | % Copyright notice: license.txt 10 | % Changelog: changelog.txt 11 | % Please email kaw006@cs.ucsd.edu if you have questions. 12 | 13 | I=imread(fullfile('data','demo.jpg')); 14 | lexS={'michaels','world','market','fitness'}; 15 | 16 | im(I); drawnow; 17 | 18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 19 | % Load classifiers 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | % word threshold 22 | wordThr=-1; 23 | % character fern 24 | clfPath=fullfile('data','fern_synth.mat'); 25 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 26 | fModel=load(clfPath); 27 | % word svm 28 | svmPath=fullfile('data','svm_svt.mat'); 29 | if(~exist(svmPath,'file')), error('SVM MODEL DOES NOT EXIST?!\n'); end 30 | model=load(svmPath); wordSvm=model.pNms1; wordSvm.thr=wordThr; 31 | 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | % Run word recognition (PLEX) 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | tic; words=wordSpot(I,lexS,fModel,wordSvm,[],{'minH',.04}); toc 36 | wordDetDraw( words, 1, 1, 1, [0 1 0] ); 37 | 38 | -------------------------------------------------------------------------------- /demo/demoSvt.m: -------------------------------------------------------------------------------- 1 | function demoSvt(f) 2 | % Demo of PLEX running on an image from the SVT dataset. 3 | % 4 | % USAGE 5 | % demoSvt( idx ) 6 | % 7 | % INPUTS 8 | % idx - filenumber to test: 1-249 9 | % 10 | % EXAMPLE 11 | % demoSvt(18); paramount,avenue [rt=~20s] 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | dPath=globals; 20 | 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | % Load image and request lexicon 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | I=imread(fullfile(dPath,'svt','test','images',sprintf('I%05i.jpg',f))); 25 | im(I); uLex=input('Enter comma-separated strings for lexicon:','s'); 26 | lexS=textscan(uLex,'%s','Delimiter',',')'; lexS=lexS{1}'; 27 | lexS=strtrim(lexS); 28 | if isempty(lexS), error('must enter lexicon\n'); end 29 | 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | % Load classifiers 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | % word threshold 34 | wordThr=-1; 35 | % character fern 36 | clfPath=fullfile('data','fern_synth.mat'); 37 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 38 | fModel=load(clfPath); 39 | % word svm 40 | svmPath=fullfile('data','svm_svt.mat'); 41 | if(~exist(svmPath,'file')), error('SVM MODEL DOES NOT EXIST?!\n'); end 42 | model=load(svmPath); wordSvm=model.pNms1; wordSvm.thr=wordThr; 43 | 44 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 45 | % Run word spotting 46 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 47 | tic; words=wordSpot(I,lexS,fModel,wordSvm); toc 48 | wordDetDraw( words, 0, 1, 1, [0 1 0] ); 49 | -------------------------------------------------------------------------------- /detection/charDet.m: -------------------------------------------------------------------------------- 1 | function bbs=charDet(I,fModel,varargin) 2 | % Multi-scale sliding window character detection using Ferns 3 | % 4 | % USAGE 5 | % bbs = charDet( I, fModel, varargin) 6 | % 7 | % INPUTS 8 | % I - image 9 | % fModel - fern object 10 | % varargin - additional params 11 | % .ss - [2^(1/4)] scale step 12 | % .minH - [.04] min sliding window size ratio 13 | % .maxH - [1] max sliding window size ratio 14 | % .thr - [-75] character detection threshold 15 | % 16 | % OUTPUTS 17 | % bbs - matrix of bounding box output: location, scale, score 18 | % 19 | % CREDITS 20 | % Written and maintained by Kai Wang and Boris Babenko 21 | % Copyright notice: license.txt 22 | % Changelog: changelog.txt 23 | % Please email kaw006@cs.ucsd.edu if you have questions. 24 | 25 | dfs={'ss',2^(1/4),'minH',.04,'maxH',1,'thr',-75}; 26 | [ss,minH,maxH,thr] = getPrmDflt(varargin,dfs,1); 27 | ferns=fModel.ferns; 28 | sBin=fModel.sBin; oBin=fModel.oBin; sz=[fModel.chH,fModel.chH]; 29 | hImg=size(I,1); wImg=size(I,2); k=0; 30 | 31 | bbs=zeros(1e6,6); 32 | minHP=minH*min(size(I,1),size(I,2)); maxHP=maxH*min(size(I,1),size(I,2)); 33 | sStart=ceil(max(log(sz(1)/maxHP),log(sz(2)/wImg))/log(ss)); 34 | sEnd=floor(log(sz(1)/minHP)/log(ss)); 35 | for s=sStart:sEnd, r=ss^s; 36 | if(s==0), I1=I; else I1=imResample(I,[round(hImg*r),round(wImg*r)]); end 37 | bbs1=detect1(I1,ferns,sz,sBin,oBin,thr); 38 | if(isempty(bbs1)), continue; end 39 | bbs1(:,1:4)=bbs1(:,1:4)/r; 40 | k1=size(bbs1,1); bbs(k+1:k+k1,:)=bbs1; k=k+k1; 41 | end 42 | bbs=bbs(1:k,:); 43 | end 44 | 45 | function bbs=detect1(I,ferns,sz,sBin,oBin,thr) 46 | x=5*hogOld(single(I),sBin,oBin); xs=size(x); 47 | %sz1=sz/sBin; % uncomment to use the updated HOG 48 | sz1=sz/sBin-2; 49 | x=fevalArrays(x,@im2col,sz1); if(ndims(x)==2),x=permute(x,[1 3 2]); end 50 | x=permute(x,[1 3 2]); 51 | x=reshape(x,[],size(x,3))'; 52 | xinds=fernsInds(double(x),ferns.fids,ferns.thrs); 53 | [~,ph]=fernsClfApply([],ferns,xinds); 54 | ph=reshape(ph,xs(1)-sz1(1)+1,xs(2)-sz1(2)+1,[]); 55 | bbs=zeros(numel(ph),6); 56 | if(size(ph,3)==63), ph=bsxfun(@minus,ph(:,:,1:62),ph(:,:,63)); end 57 | 58 | k=0; bbw=sz1(1)+2; bbh=sz1(2)+2; 59 | for j=1:size(ph,3), M=ph(:,:,j); 60 | ind=find(M>thr); sub=ind2sub2(size(M),ind); 61 | if(isempty(sub)); continue; end 62 | bbs1=[fliplr(sub) sub]; bbs1(:,5)=M(ind); 63 | bbs1(:,1)=bbs1(:,1)-floor(bbw/2); bbs1(:,3)=bbw; 64 | bbs1(:,2)=bbs1(:,2)-floor(bbh/2); bbs1(:,4)=bbh; 65 | 66 | k1=size(bbs1,1); bbs(k+1:k+k1,1:5)=bbs1; bbs(k+1:k+k1,6)=j; 67 | k=k+k1; 68 | end 69 | bbs=bbs(1:k,:); if(k<1), return; end 70 | bbs(:,1:2)=bbs(:,1:2)+2; 71 | bbs(:,1:4)=bbs(:,1:4)*sBin; bbs(:,1:2)=bbs(:,1:2)+0.5; 72 | end -------------------------------------------------------------------------------- /detection/charDetSVM.m: -------------------------------------------------------------------------------- 1 | function bbs=charDetSVM(I,models,varargin) 2 | % Multi-scale sliding window character detection using Ferns 3 | % 4 | % USAGE 5 | % bbs = charDet( I, fModel, varargin) 6 | % 7 | % INPUTS 8 | % I - image 9 | % fModel - fern object 10 | % varargin - additional params 11 | % .ss - [2^(1/4)] scale step 12 | % .minH - [.04] min sliding window size ratio 13 | % .maxH - [1] max sliding window size ratio 14 | % .thr - [-75] character detection threshold 15 | % 16 | % OUTPUTS 17 | % bbs - matrix of bounding box output: location, scale, score 18 | % 19 | % CREDITS 20 | % Written and maintained by Kai Wang and Boris Babenko 21 | % Copyright notice: license.txt 22 | % Changelog: changelog.txt 23 | % Please email kaw006@cs.ucsd.edu if you have questions. 24 | bbs=zeros(1e6,6); 25 | configs = configsgen; 26 | total_bbs = zeros(1e6,6); 27 | total = 0; 28 | [hms,scales]=get_filter_responses(I,models); 29 | initial_thresholds = -2; 30 | for level=1:length(scales) 31 | hms_scale = hms{level}; 32 | if isempty(hms_scale) 33 | continue; 34 | end 35 | current_scale = scales(level); 36 | for model_index = 1:length(models) 37 | char_dims = models{model_index}.char_dims; 38 | hm = hms_scale{model_index} + models{model_index}.bias; 39 | 40 | ind = find(hm > initial_thresholds); % Get the locations of the response 41 | [y,x] = ind2sub(size(hm),ind); 42 | if (isempty(x)); continue; end 43 | 44 | scores = hm(ind); 45 | 46 | if (size(x,2) > 1); x = x'; y = y'; scores = scores'; end 47 | 48 | %assert(length(x)==length(scores)); 49 | 50 | %Correct the position 51 | x = x * configs.bin_size/current_scale; 52 | y = y * configs.bin_size/current_scale; 53 | width = floor(char_dims(2)/current_scale); 54 | height = floor(char_dims(1)/current_scale); 55 | 56 | bbType = ones(length(x),1)*models{model_index}.char_index; 57 | bbs = [x,y,repmat(width,length(x),1),repmat(height,... 58 | length(x),1),scores,bbType]; 59 | 60 | current_count = size(bbs,1); 61 | if current_count > 0 62 | total_bbs(total+1:total+current_count,:) = bbs; 63 | total = total + current_count; 64 | end 65 | end 66 | end 67 | 68 | bbs=total_bbs(1:total,:); 69 | end -------------------------------------------------------------------------------- /detection/detect.m: -------------------------------------------------------------------------------- 1 | function bbs=detect(I,models,threshold) 2 | % Detect letters in image I 3 | % 4 | % INPUT 5 | % I - input image 6 | % models - cells of model 7 | % threshold - cutoff for detection 8 | % 9 | % OUTPUT 10 | % bbs - the bounding boxes results 11 | configs = configsgen; 12 | total_bbs = zeros(1e4,6); 13 | total = 0; 14 | [hms,scales]=get_filter_responses(I,models,configs); 15 | nScales = length(scales); 16 | for level=1:nScales 17 | hms_scale = hms{level}; 18 | if isempty(hms_scale); continue; end; 19 | current_scale = scales(level); 20 | for model_index = 1:length(models) 21 | char_dims = models{model_index}.char_dims; 22 | hm = hms_scale{model_index} + models{model_index}.bias; 23 | 24 | ind = find(hm > threshold); % Get the locations of the response 25 | [y,x] = ind2sub(size(hm),ind); 26 | if (isempty(x)); continue; end; 27 | 28 | scores = hm(ind); 29 | if (size(x,2) > 1); x = x'; y = y'; scores = scores'; end 30 | %assert(length(x)==length(scores)); 31 | 32 | %Correct the position 33 | x = x * configs.bin_size/current_scale; 34 | y = y * configs.bin_size/current_scale; 35 | width = floor(char_dims(2)/current_scale); 36 | height = floor(char_dims(1)/current_scale); 37 | 38 | if models{model_index}.char_index > 60 39 | ind = strfind(configs.alphabets,currentModel.char_index); 40 | else 41 | ind = models{model_index}.char_index; 42 | end 43 | bbType = ones(length(x),1)*ind; 44 | bbs = [x,y,repmat(width,length(x),1),repmat(height,... 45 | length(x),1),scores,bbType]; 46 | 47 | current_count = size(bbs,1); 48 | if current_count > 0 49 | total_bbs(total+1:total+current_count,:) = bbs; 50 | total = total + current_count; 51 | end 52 | end 53 | end 54 | total_bbs = total_bbs(1:total,:); 55 | % if more than a 1000 take the top 1000; 56 | if total > 1000 57 | [~,sort_order] = sort(total_bbs(:,5),'descend'); 58 | total_bbs = total_bbs(sort_order(1:1000),:); 59 | end 60 | bbs = total_bbs; 61 | bbs = bbNms(total_bbs,'type','max','overlap',.5,'separate',1); 62 | end 63 | -------------------------------------------------------------------------------- /detection/featpyramid.m: -------------------------------------------------------------------------------- 1 | function pyramid=featpyramid(I,configs,varargin) 2 | dfs={'ss',2^(1/4),'minH',.04,'maxH',1,'thr',-75}; 3 | [ss,minH,maxH,~] = getPrmDflt(varargin,dfs,1); 4 | sz=[configs.canonical_scale(1),configs.canonical_scale(2)]; 5 | hImg=size(I,1); 6 | wImg=size(I,2); 7 | minHP=minH*min(hImg,wImg); 8 | maxHP=maxH*min(hImg,wImg); 9 | sStart=ceil(max(log(sz(1)/maxHP),log(sz(2)/wImg))/log(ss)); 10 | sEnd=floor(log(sz(1)/minHP)/log(ss)); 11 | pyramid.feat = cell(sEnd-sStart+1,1); 12 | pyramid.scales = ss.^(sStart:sEnd); 13 | level = 1; 14 | for s=sStart:sEnd, r=ss^s; 15 | if(s==0), I1=I; 16 | else I1=imResample(I,[round(hImg*r),round(wImg*r)]); 17 | end 18 | hogI = 5*hog(single(I1),configs.bin_size,configs.n_orients); 19 | pyramid.feat{level} = hogI; 20 | level = level + 1; 21 | end 22 | end -------------------------------------------------------------------------------- /detection/get_filter_responses.m: -------------------------------------------------------------------------------- 1 | function [hms,scales] = get_filter_responses(I,models) 2 | configs = configsgen; 3 | filters = cell(length(models),1); 4 | % contructing the filters 5 | for i=1:length(models) 6 | model = models{i}; 7 | char_dims = model.char_dims; 8 | r = floor(char_dims(1)/configs.bin_size); 9 | c = floor(char_dims(2)/configs.bin_size); 10 | filter = reshape(model.w,[r,c,configs.n_orients*4]); 11 | filters{i} = single(filter); 12 | end 13 | 14 | pyramid = featpyramid(I,configs); 15 | scales = pyramid.scales; 16 | hms = cell(length(pyramid.scales),1); 17 | for level=1:length(pyramid.scales) 18 | hogI = pyramid.feat{level}; 19 | try 20 | r=fconv(hogI,filters,1,length(filters)); 21 | catch 22 | r = {}; 23 | end 24 | 25 | hms{level} = r; 26 | end 27 | end -------------------------------------------------------------------------------- /evaluation/Fscore.m: -------------------------------------------------------------------------------- 1 | function [f,x,y,i]=Fscore(xs,ys) 2 | % Compute F-score 3 | % 4 | % USAGE 5 | % [f,x,y,i]=Fscore( xs, ys ) 6 | % 7 | % INPUTS 8 | % xs - precision 9 | % ys - recall 10 | % 11 | % OUTPUTS 12 | % f - fscore 13 | % x - precision at best fscore 14 | % y - recall at best fscore 15 | % i - index of best fscore 16 | % 17 | % CREDITS 18 | % Written and maintained by Kai Wang and Boris Babenko 19 | % Copyright notice: license.txt 20 | % Changelog: changelog.txt 21 | % Please email kaw006@cs.ucsd.edu if you have questions. 22 | 23 | fs=1./(.5./xs+.5./ys); 24 | [f,i]=max(fs); x=xs(i); y=ys(i); -------------------------------------------------------------------------------- /evaluation/evalReading.m: -------------------------------------------------------------------------------- 1 | function [gt,dt,gtw,dtw,files1] = evalReading(gtDir, dtDir, varargin) 2 | % Process output 3 | % 4 | % USAGE 5 | % [gt,dt,gtw,dtw,files1] = evalReading( gtDir, dtDir, kVals, varargin ) 6 | % 7 | % INPUTS 8 | % gtDir - directory for groundtruth annotations 9 | % dtDir - directory of precomputed outputs 10 | % kVals - vector of distractor values 11 | % varargin - additional parameters 12 | % .thr - [.5] overlap requirement for match 13 | % .mul - [0] can match multiple groundtruths 14 | % .resize - {} resize factor for detected bounding boxes 15 | % .f0 - [1] start offset 16 | % .f1 - [inf] end index 17 | % .imDir - [''] directory for images 18 | % .lexDir - [''] directory for lexicon files 19 | % .pNms - additional word-level NMS params 20 | % .type - ['none'] NMS type (currently just 'none' or 'maxg') 21 | % .thr - [-inf] word threshold 22 | % .ocr - [0] correct OCR output 23 | % 24 | % OUTPUTS 25 | % gt - ground truth bounding boxes and if found match 26 | % dt - detected bounding boxes if found match 27 | % gtw - ground truth word objects 28 | % dtw - detected word objects 29 | % files1 - paths of the image files 30 | % 31 | % CREDITS 32 | % Written and maintained by Kai Wang and Boris Babenko 33 | % Copyright notice: license.txt 34 | % Changelog: changelog.txt 35 | % Please email kaw006@cs.ucsd.edu if you have questions. 36 | 37 | dfs={'thr',.5,'mul',0,'resize',{},'f0',1,'f1',inf,'imDir','',... 38 | 'lexDir','','pNms',struct('type','none','thr',-inf),'ocr',0}; 39 | [thr,mul,rs,f0,f1,imDir,lexDir,pNms,ocr]=getPrmDflt(varargin,dfs,1); 40 | if(isempty(imDir)), imDir=gtDir; end 41 | 42 | % collect all strings in ground truth 43 | files=dir(fullfile(gtDir,'*.txt')); files={files.name}; 44 | files=files(f0:min(f1,end)); n=length(files); assert(n>0); 45 | 46 | % get files in ground truth directory 47 | gt=cell(1,n); dt=cell(1,n); dtw=dt; gtw=gt; 48 | ticId=ticStatus('evaluating'); 49 | 50 | % loop over images 51 | for i=1:n 52 | gtNm=fullfile(gtDir,files{i}); 53 | gt1=bbGt('bbLoad',gtNm); 54 | [gtWords,gtInds]=filterValidGt(gt1); gt1=gt1(gtInds); 55 | dtNm=fullfile(dtDir,[files{i}(1:end-8),'.mat']); 56 | if(~exist(dtNm,'file')), dta=[]; else res=load(dtNm); dta=res.words; end 57 | if(isempty(lexDir)), error('Lexicon directory is empty.'); end 58 | 59 | % load lexicon 60 | fid=fopen(fullfile(lexDir,files{i}),'r'); 61 | lexS=textscan(fid,'%s'); lexS=unique(lexS{1}'); 62 | fclose(fid); 63 | 64 | % filter/spell-check detections 65 | if(isempty(dta)), dt1=[]; else 66 | if(ocr), 67 | dt1=spellCheck(dta,lexS); 68 | for j=1:length(dt1) 69 | dt1(j).bb=bbApply('resize',dt1(j).bb,.75,.75); 70 | end 71 | else 72 | dt1=dta(ismember(upper({dta.word}),upper(lexS))); 73 | end 74 | end 75 | 76 | % flip signs of word scores 77 | for j=1:length(dt1), dt1(j).bb(:,5)=-dt1(j).bb(:,5); end 78 | 79 | % word nonmax suppr 80 | dt1=wordNms(dt1,pNms); 81 | files1{i}=fullfile(imDir,files{i}(1:end-4)); 82 | 83 | % evaluate detections 84 | [gt2,dt2,gtw2,dtw2] = evalReading1(gt1,dt1,thr,mul); 85 | gt{1,i}=gt2; dt{1,i}=dt2; dtw{1,i}=dtw2; gtw{1,i}=gtw2; 86 | 87 | tocStatus(ticId,i/n); 88 | end 89 | 90 | end 91 | 92 | function [gt, dt, gt0, dt0] = evalReading1( gt0, dt0, thr, mul ) 93 | if(nargin<3 || isempty(thr)), thr=.5; end 94 | if(nargin<4 || isempty(mul)), mul=0; end 95 | nd=length(dt0); ng=length(gt0); 96 | for g=1:ng, gt0(g).det=0; gt0(g).read=0; end 97 | if(ng==0), gt=zeros(0,5); else 98 | [~,ord]=sort([gt0.ign],'ascend'); gt0=gt0(ord,:); 99 | gt=reshape([gt0.bb],4,[])'; gt(:,5)=-[gt0.ign]; 100 | end 101 | if(nd==0), dt=zeros(0,6); else 102 | wbbs=reshape([dt0.bb],5,[])'; [~,ord]=sort(wbbs(:,5),'descend'); 103 | dt0=dt0(ord); 104 | dt=reshape([dt0.bb],5,[])'; dt(:,6)=0; 105 | end 106 | if(nd==0||ng==0), return; end 107 | 108 | % Attempt to match each (sorted) dt to each (sorted) gt 109 | for d=1:nd 110 | bstOa=thr; bstg=0; bstm=0; % info about best match so far 111 | for g=1:ng 112 | % if this gt already matched, continue to next gt 113 | m=gt(g,5); if( m==1 && ~mul ), continue; end 114 | % if dt already matched, and on ignore gt, nothing more to do 115 | if( bstm~=0 && m==-1 ), break; end 116 | % compute overlap area, continue to next gt unless better match made 117 | oa=bbGt('compOa',dt0(d).bb(1:4),gt0(g).bb(1:4),m==-1); 118 | if(oa=0 && ~strcmpi(gt0(g).lbl,dt0(d).word)), continue; end 121 | gt0(g).read=1; 122 | % match successful and best so far, store appropriately 123 | bstOa=oa; bstg=g; if(m==0), bstm=1; else bstm=-1; end 124 | end 125 | % store type of match for both dt and gt 126 | if(bstm~=0), gt(bstg,5)=bstm; dt(d,6)=bstm; end 127 | end 128 | end 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /evaluation/genPrCurves.m: -------------------------------------------------------------------------------- 1 | function genPrCurves 2 | % Generate precision/recall curves. Given properly formatted output, this 3 | % function will do the evaluation with nonmax suppression 4 | % 5 | % This code is to be run separately for ICDAR and SVT, and separately for 6 | % various lexicon sizes (for ICDAR). The 'paramSets' variable controls what 7 | % gets run and plotted. 8 | % 9 | % CREDITS 10 | % Written and maintained by Kai Wang and Boris Babenko 11 | % Copyright notice: license.txt 12 | % Changelog: changelog.txt 13 | % Please email kaw006@cs.ucsd.edu if you have questions. 14 | 15 | [dPath,ch,ch1,chC,chClfNm]=globals; 16 | RandStream.getDefaultStream.reset(); 17 | 18 | S=6; M=256; nTrn=Inf; 19 | trnT='charHard'; trnBg='msrcBt'; nBg=10000; 20 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,'nBg',nBg,'nTrn',nTrn}; 21 | cNm=chClfNm(clfPrms{:}); 22 | 23 | % testing conditions (ICDAR) 24 | % -- paramSet={result directory, with/without spell check (for OCR)} 25 | 26 | % -- other settings (SVT) 27 | tstD='svt'; tstSpl='test'; lexD='lex'; 28 | paramSets={{fullfile('res-synth',cNm),0},... 29 | {fullfile('res-synth-svm',cNm),0}}; 30 | 31 | % -- settings when reproducing results 32 | % tstD='icdar'; tstSpl='test'; lexD='lex20'; % lex5, lex50 33 | % paramSets={{fullfile('res-swtPad','res-synth-svm',cNm),0},... 34 | % {fullfile('res-synth-svm',cNm),0}}; 35 | 36 | % lexicon folder 37 | lexDir=fullfile(dPath,tstD,tstSpl,lexD); 38 | 39 | pNms=struct('thr',-inf,'ovrDnm','min','overlap',.5); pNms.type='max'; 40 | 41 | % figure setup 42 | hs=[]; lgs=cell(0); figure(1); clf; axis normal; 43 | if(~strcmp(tstD,'svt')), axis([0 .8 .5 1]); else axis([0 .5 0 1]); end 44 | set(gcf,'Position',[50 50 600 300]); hold on; 45 | 46 | xlabel('Recall','FontSize',16); ylabel('Precision','FontSize',16); 47 | 48 | % eval params 49 | iDir=fullfile(dPath,tstD,tstSpl,'images'); 50 | evalPrm={'thr',.5,'imDir',iDir,'f0',1,'f1',inf,'lexDir',lexDir,... 51 | 'pNms',pNms}; 52 | 53 | gtDir=fullfile(dPath,tstD,tstSpl,'wordAnn'); 54 | 55 | % loop over each paramset and plot it on the same figure 56 | for p=1:length(paramSets) 57 | paramSet=paramSets{p}; 58 | resD=paramSet{1}; isOcr=paramSet{2}; 59 | dtDir=fullfile(dPath,tstD,tstSpl,resD,'images'); 60 | [gt,dt] = evalReading(gtDir,dtDir,'ocr',isOcr,evalPrm{:}); 61 | 62 | [xs,ys,sc]=bbGt('compRoc', gt, dt, 0); 63 | [fs,~,~,idx]=Fscore(xs,ys); 64 | hs(end+1)=plot(xs,ys,'Color',rand(3,1),'LineWidth',3); 65 | lgs{end+1}=sprintf('%i [%1.3f] thr=%1.3f',p,fs,sc(idx)); 66 | legend(hs,lgs,'Location','SouthWest','FontSize',14); 67 | end 68 | saveas(gcf,fullfile(dPath,tstD,sprintf('%s_%s_%s',cNm,lexD,tstSpl)),'fig'); 69 | savefig(fullfile(dPath,tstD,sprintf('%s_%s_%s',cNm,lexD,tstSpl)),... 70 | 'pdf','-fonts'); 71 | -------------------------------------------------------------------------------- /evaluation/genPrCurvesEZ.m: -------------------------------------------------------------------------------- 1 | function genPrCurvesEZ 2 | % Generate precision/recall curves. Given properly formatted output, this 3 | % function will do the evaluation with nonmax suppression 4 | % 5 | % This code is to be run separately for ICDAR and SVT, and separately for 6 | % various lexicon sizes (for ICDAR). The 'paramSets' variable controls what 7 | % gets run and plotted. 8 | % 9 | % CREDITS 10 | % Written and maintained by Kai Wang and Boris Babenko 11 | % Copyright notice: license.txt 12 | % Changelog: changelog.txt 13 | % Please email kaw006@cs.ucsd.edu if you have questions. 14 | 15 | dPath=globals; 16 | RandStream.getDefaultStream.reset(); 17 | 18 | % testing conditions (ICDAR) 19 | % -- paramSet={result directory, with/without spell check (for OCR)} 20 | 21 | % tstD='icdar'; tstSpl='test'; lexD='lex20'; % lex5, lex50 22 | % swtPlexD=fullfile(dPath,tstD,tstSpl,'EZ','swt+plex+r','images'); 23 | % plexD=fullfile(dPath,tstD,tstSpl,'EZ','plex+r','images'); 24 | % paramSets={{swtPlexD,0},... 25 | % {plexD,0}}; 26 | 27 | % -- other settings (SVT) 28 | tstD='svt'; tstSpl='test'; lexD='lex'; 29 | plexD=fullfile(dPath,tstD,tstSpl,'EZ','plex+r','images'); 30 | paramSets={{plexD,0}}; 31 | 32 | % lexicon folder 33 | lexDir=fullfile(dPath,tstD,tstSpl,lexD); 34 | 35 | pNms=struct('thr',-inf,'ovrDnm','min','overlap',.5); pNms.type='max'; 36 | 37 | % figure setup 38 | hs=[]; lgs=cell(0); figure(1); clf; axis normal; 39 | if(~strcmp(tstD,'svt')), axis([0 .8 .5 1]); else axis([0 .5 0 1]); end 40 | set(gcf,'Position',[50 50 600 300]); hold on; 41 | 42 | xlabel('Recall','FontSize',16); ylabel('Precision','FontSize',16); 43 | 44 | % eval params 45 | iDir=fullfile(dPath,tstD,tstSpl,'images'); 46 | evalPrm={'thr',.5,'imDir',iDir,'f0',1,'f1',inf,'lexDir',lexDir,... 47 | 'pNms',pNms}; 48 | 49 | gtDir=fullfile(dPath,tstD,tstSpl,'wordAnn'); 50 | 51 | % loop over each paramset and plot it on the same figure 52 | for p=1:length(paramSets) 53 | paramSet=paramSets{p}; 54 | dtDir=paramSet{1}; isOcr=paramSet{2}; 55 | 56 | [gt,dt] = evalReading(gtDir,dtDir,'ocr',isOcr,evalPrm{:}); 57 | [xs,ys,sc]=bbGt('compRoc', gt, dt, 0); 58 | [fs,~,~,idx]=Fscore(xs,ys); 59 | hs(end+1)=plot(xs,ys,'Color',rand(3,1),'LineWidth',3); 60 | lgs{end+1}=sprintf('%i [%1.3f] thr=%1.3f',p,fs,sc(idx)); 61 | legend(hs,lgs,'Location','SouthWest','FontSize',14); 62 | end 63 | 64 | -------------------------------------------------------------------------------- /experiments/chardet_exp.m: -------------------------------------------------------------------------------- 1 | function chardet_exp(models,test_dataset,output_path) 2 | % This function aims to perform different experiments on the effectiveness 3 | % of various character detection pipeline 4 | % models: the trained character models 5 | % test_dataset: the path to the test_dataset 6 | 7 | image_paths = dir(fullfile(test_dataset,'*.jpg')); 8 | nImg = length(image_paths); 9 | ticId=ticStatus('Running PLEX on full images',1,30,1); 10 | for i=1:nImg 11 | current_image = image_paths(i).name; 12 | I = imread(fullfile(test_dataset,current_image)); 13 | bbs=charDet(I,models,{}); 14 | 15 | % save the bbs 16 | sF = fullfile(output_path,current_image); 17 | save(sF,'bbs'); 18 | tocStatus(ticId,i/nImg); 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /experiments/precompFullImage.m: -------------------------------------------------------------------------------- 1 | function precompFullImage 2 | % Run end-to-end PLEX pipeline on the ICDAR and SVT datasets 3 | % 4 | % One MAT file is created for each image to record the results. After all 5 | % the precomp*.m files are complete, run genPrCurves.m to display results. 6 | % 7 | % CREDITS 8 | % Written and maintained by Kai Wang and Boris Babenko 9 | % Copyright notice: license.txt 10 | % Changelog: changelog.txt 11 | % Please email kaw006@cs.ucsd.edu if you have questions. 12 | 13 | [dPath,~,~,~,chClfNm]=globals; 14 | 15 | % fern parameters 16 | S=6; M=256; nTrn=Inf; 17 | 18 | % paramSet={train dataset,with/without neighboring chars, 19 | % bg dataset,# bg images, test dataset, test split} 20 | paramSets={{'synth','charHard','msrcBt',10000,'svt','test'}%,... 21 | %{'icdar','charHard','icdarBt',10000,'svt','test'},... 22 | %{'synth','charHard','msrcBt',10000,'svt','train'},... 23 | %{'icdar','charHard','icdarBt',10000,'svt','train'} 24 | }; 25 | 26 | % paramSets={{'synth','charHard','msrcBt',10000,'icdar','test'},... 27 | % {'icdar','charHard','icdarBt',10000,'icdar','test'},... 28 | % {'synth','charHard','msrcBt',10000,'svt','test'},... 29 | % {'icdar','charHard','icdarBt',10000,'svt','test'},... 30 | % {'synth','charHard','msrcBt',10000,'icdar','train'},... 31 | % {'icdar','charHard','icdarBt',10000,'icdar','train'},... 32 | % {'synth','charHard','msrcBt',10000,'svt','train'},... 33 | % {'icdar','charHard','icdarBt',10000,'svt','train'}}; 34 | 35 | for p=1:length(paramSets) 36 | RandStream.getDefaultStream.reset(); 37 | paramSet=paramSets{p}; 38 | trnD=paramSet{1}; trnT=paramSet{2}; trnBg=paramSet{3}; nBg=paramSet{4}; 39 | tstD=paramSet{5}; tstSpl=paramSet{6}; tstDir=fullfile(dPath,tstD,tstSpl); 40 | 41 | fprintf('Working on: '); 42 | disp(paramSet); 43 | 44 | allwords=loadLex(tstDir); 45 | % set up classifiers 46 | cDir=fullfile(dPath,trnD,'clfs'); 47 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,'nBg',... 48 | nBg,'nTrn',nTrn}; 49 | cNm='fern_S6M256sBin8oBin8trnSettraintrnTcharHardchH48bgDirmsrcBtnBg10000smartTrn0'; 50 | %chClfNm(clfPrms{:}); 51 | clfPath=fullfile(cDir,[cNm,'.mat']); 52 | 53 | % set up output locations 54 | d1=fullfile(tstDir,['res-' trnD],cNm,'images'); 55 | 56 | %if(~exist(d1,'dir')), mkdir(d1); end 57 | if(exist(d1,'dir')), rmdir(d1,'s'); end 58 | mkdir(d1); 59 | save(fullfile(d1,'workspace')); % save all variables up to now 60 | saveRes=@(f,words,t1,t2,t3)save(f,'words','t1','t2','t3'); 61 | 62 | % load clfs 63 | %if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 64 | clfPath=fullfile('data','fern_synth.mat'); 65 | fModel=load(clfPath); 66 | nImg=length(dir(fullfile(tstDir,'wordAnn','*.txt'))); 67 | 68 | ticId=ticStatus('Running PLEX on full images',1,30,1); 69 | for f=0:nImg-1 70 | sF=fullfile(d1,sprintf('I%05d.mat',f)); 71 | I=imread(fullfile(tstDir,sprintf('images/I%05i.jpg',f))); 72 | lexF=fullfile(tstDir,sprintf('lex/I%05i.jpg.txt',f)); 73 | if(exist(lexF,'file')) 74 | fid=fopen(lexF,'r'); 75 | lexS=textscan(fid,'%s'); lexS=lexS{1}'; 76 | fclose(fid); 77 | else 78 | lexS=allwords; 79 | end 80 | t3S=tic; [words,t1,t2]=wordSpot(I,lexS,fModel); t3=toc(t3S); 81 | saveRes(sF,words,t1,t2,t3); 82 | tocStatus(ticId,f/nImg); 83 | end 84 | end 85 | 86 | end 87 | -------------------------------------------------------------------------------- /experiments/precompSwtAbbyy.m: -------------------------------------------------------------------------------- 1 | function precompSwtAbbyy 2 | % Run ABBYY on regions returned by Stroke Width Transform on the ICDAR 3 | % dataset 4 | % 5 | % One MAT file is created for each image to record the results. After all 6 | % the precomp*.m files are complete, run genPrCurves.m to display results. 7 | % 8 | % CREDITS 9 | % Written and maintained by Kai Wang and Boris Babenko 10 | % Copyright notice: license.txt 11 | % Changelog: changelog.txt 12 | % Please email kaw006@cs.ucsd.edu if you have questions. 13 | 14 | dPath=globals; 15 | 16 | tstDir=fullfile(dPath,'icdar','test'); 17 | abbyyDir=fullfile(tstDir,'abbyy','wordsSWTpad'); 18 | d1=fullfile(tstDir,'res-swtPad','abbyy','images'); 19 | if(~exist(d1,'dir')), mkdir(d1); end 20 | % read abby SWT results into common structure 21 | 22 | bbList=[]; 23 | dir1=dir(fullfile(abbyyDir,'*txt')); 24 | for i=1:length(dir1) 25 | fname=dir1(i).name; 26 | [imId,bb]=parse_fname(fname); 27 | wd=procAbbyy(fullfile(abbyyDir,fname)); 28 | bbList{end+1,1}=imId; bbList{end,2}=bb; bbList{end,3}=wd; 29 | end 30 | [B,I,J]=unique(bbList(:,1)); 31 | for i=1:length(B) 32 | idx=find(J==i); 33 | imId=B{i}; 34 | words=[]; 35 | for j=1:length(idx) 36 | bb1=bbList{idx(j),2}; 37 | word1=bbList{idx(j),3}; 38 | words(end+1).word=word1; 39 | words(end).bb=[bb1, 0]; 40 | end 41 | save(fullfile(d1,[imId,'.mat']),'words'); 42 | end 43 | 44 | end 45 | 46 | function [imId,bb]=parse_fname(str) 47 | uscore=find(str=='_'); dotind=find(str=='.'); 48 | imId=str(1:uscore(1)-1); 49 | xval=str2double(str(uscore(1)+1:uscore(2)-1)); 50 | yval=str2double(str(uscore(2)+1:uscore(3)-1)); 51 | wval=str2double(str(uscore(3)+1:uscore(4)-1)); 52 | hval=str2double(str(uscore(4)+1:dotind(1)-1)); 53 | bb=[xval,yval,wval,hval]; 54 | end 55 | -------------------------------------------------------------------------------- /experiments/precompSwtPlex.m: -------------------------------------------------------------------------------- 1 | function precompSwtPlex 2 | % Run PLEX on regions returned by Stroke Width Transform on the ICDAR 3 | % dataset 4 | % 5 | % One MAT file is created for each image to record the results. After all 6 | % the precomp*.m files are complete, run genPrCurves.m to display results. 7 | % 8 | % CREDITS 9 | % Written and maintained by Kai Wang and Boris Babenko 10 | % Copyright notice: license.txt 11 | % Changelog: changelog.txt 12 | % Please email kaw006@cs.ucsd.edu if you have questions. 13 | 14 | [dPath,ch,ch1,chC,chClfNm]=globals; 15 | 16 | type='swtPad'; 17 | % fern parameters 18 | S=6; M=256; nTrn=Inf; 19 | % only consider words that span at least half the image width 20 | widthThr=.5; 21 | frnPrms={'ss',2^(1/5),'minH',.6}; 22 | nmsPrms={'thr',-75,'separate',1,'type','maxg','resize',{3/4,1/2},... 23 | 'ovrDnm','union','overlap',.3,'maxn',inf}; 24 | 25 | % paramSet={train dataset,with/without neighboring chars, 26 | % bg dataset,# background images,test split} 27 | paramSets={{'synth','charHard','msrcBt',10000,'test'},... 28 | {'icdar','charHard','icdarBt',10000,'test'},... 29 | {'synth','charHard','msrcBt',10000,'train'},... 30 | {'icdar','charHard','icdarBt',10000,'train'}}; 31 | 32 | for p=1:length(paramSets) 33 | RandStream.getDefaultStream.reset(); 34 | paramSet=paramSets{p}; 35 | trnD=paramSet{1}; trnT=paramSet{2}; trnBg=paramSet{3}; nBg=paramSet{4}; 36 | tstSpl=paramSet{5}; tstDir=fullfile(dPath,'icdar',tstSpl); 37 | 38 | lexS=loadLex(tstDir); 39 | % set up classifiers 40 | cDir=fullfile(dPath,trnD,'clfs'); 41 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,'nBg',nBg,'nTrn',nTrn}; 42 | cNm=chClfNm(clfPrms{:}); 43 | clfPath=fullfile(cDir,[cNm,'.mat']); 44 | 45 | % set up output locations 46 | d1=fullfile(tstDir,['res-', type],['res-' trnD],cNm,'images'); 47 | if(~exist(d1,'dir')), mkdir(d1); end 48 | save(fullfile(d1,'workspace')); % save all variables up to now 49 | saveRes=@(f,words,t1,t2,t3)save(f,'words','t1','t2','t3'); 50 | 51 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 52 | fModel=load(clfPath); 53 | 54 | imDir=fullfile(tstDir,'images'); 55 | filesJpg=dir(fullfile(imDir,'*jpg')); 56 | tot1=[]; tot2=[]; tot3=[]; 57 | 58 | for i=1:length(filesJpg) 59 | subSwtDir=fullfile(imDir,[filesJpg(i).name,'_',type]); 60 | if(~exist(subSwtDir,'dir')), continue; end 61 | filesSwtJpg=dir(fullfile(subSwtDir,'*jpg')); 62 | imId=filesJpg(i).name; didx=find(imId=='.'); imId=imId(1:didx(end)-1); 63 | sF=fullfile(d1,[imId,'.mat']); words=[]; 64 | for j=1:length(filesSwtJpg) 65 | I=imread(fullfile(subSwtDir,filesSwtJpg(j).name)); 66 | 67 | t3S=tic; 68 | [words1,t1,t2]=wordSpot(I,lexS,fModel,{},nmsPrms,frnPrms); 69 | t3=toc(t3S); 70 | 71 | tot1=[tot1,t1]; tot2=[tot2,t2]; tot3=[tot3,t3]; 72 | 73 | if(isempty(words1)), continue; end 74 | % width threshold 75 | wbb=reshape([words1.bb],5,[])'; inds=wbb(:,3)>(size(I,2)*widthThr); 76 | words1=words1(inds); if(isempty(words1)), continue; end 77 | 78 | % fix bb offset 79 | swtBb=parse_bb(filesSwtJpg(j).name); 80 | for k=1:length(words1) 81 | bbOffset=zeros(1,size(words1(k).bb,2)); 82 | bbOffset(1:2)=swtBb(1:2); 83 | words1(k).bb=words1(k).bb+bbOffset; 84 | bbsOffset=zeros(1,size(words1(k).bbs,2)); 85 | bbsOffset(1:2)=swtBb(1:2); 86 | words1(k).bbs=words1(k).bbs+repmat(bbsOffset,size(words1(k).bbs,1),1); 87 | words1(k).bid=j; 88 | end 89 | words=[words,words1]; 90 | end 91 | saveRes(sF,words,t1,t2,t3); 92 | end 93 | end 94 | 95 | end 96 | 97 | function bb=parse_bb(str) 98 | uscore=find(str=='_'); dotind=find(str=='.'); 99 | xval=str2double(str(1:uscore(1)-1)); 100 | yval=str2double(str(uscore(1)+1:uscore(2)-1)); 101 | wval=str2double(str(uscore(2)+1:uscore(3)-1)); 102 | hval=str2double(str(uscore(3)+1:dotind(1)-1)); 103 | bb=[xval,yval,wval,hval]; 104 | end -------------------------------------------------------------------------------- /experiments/runFullVidReal.m: -------------------------------------------------------------------------------- 1 | function runFullVidReal(indeces) 2 | % It is generating the bounding boxes for the frames 3 | configs=configsgen; 4 | models = loadModels('models'); 5 | vidPath = fullfile(configs.icdar_video,'test','videos','mp4'); 6 | vidps = dir(fullfile(vidPath,'*.mp4')); 7 | 8 | if ~exist('indeces','var'); indeces=1:length(vidps); end 9 | 10 | % Getting the hms first 11 | for iVid = indeces 12 | vpath = fullfile(vidPath,vidps(iVid).name); 13 | fprintf('Extracting hms from %s\n',vpath); 14 | done = false; 15 | while ~done; 16 | vidobject = VideoReader(vpath); 17 | if vidobject.NumberOfFrames > 0; done = true; end 18 | end 19 | charDetVideo('gethm',vidobject,models); 20 | end 21 | 22 | % Then get the bbs 23 | for iVid = indeces 24 | vpath = fullfile(vidPath,vidps(iVid).name); 25 | fprintf('Getting bbs on %s\n',vpath); 26 | while ~done; 27 | vidobject = VideoReader(vpath); 28 | if vidobject.NumberOfFrames > 0; done = true; end 29 | end 30 | charDetVideo('getbbs',vidobject,models); 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /experiments/run_chardet_exp.m: -------------------------------------------------------------------------------- 1 | % This script runs all the experiments for the chardet_exp 2 | [dPath,ch,ch1,chC,chClfNm,dfNP]=globals; 3 | %% First experiments: 4 | % Run Ferns character detections trained on synth against the icdar test 5 | % dataset 6 | clfPath=fullfile('data','fern_synth.mat'); 7 | fmodel=load(clfPath); 8 | test_dataset = fullfile(dPath,'icdar','test','images'); 9 | output_path = fullfile(dPath,'icdar','test','det_results'); 10 | chardet_exp(fmodel,test_dataset,output_path); 11 | 12 | %% 13 | fprintf('The results are at %s\n',output_path); 14 | 15 | %% Calculating the F-score for the characters 16 | gtDir = fullfile(dPath,'icdar','test','charAnn'); 17 | fscores = zeros(length(ch),1); 18 | ticId=ticStatus('Collecting Fscore',1,30,1); 19 | for char_index = 1:length(ch)-1 20 | [gt0,~] = bbGt('loadAll',gtDir,[],{'lbls',ch(char_index)}); 21 | dt0 = loadBB(output_path,char_index); 22 | current_char = ch(char_index); 23 | 24 | % filter out the groundtruth 25 | [gt,dt] = bbGt( 'evalRes', gt0, dt0); 26 | [xs,ys,sc]=bbGt('compRoc', gt, dt, 0); 27 | fs = Fscore(xs,ys); 28 | fscores(char_index) = fs; 29 | tocStatus(ticId,char_index/(length(ch))); 30 | end 31 | 32 | %% Second experiments: 33 | % Run the current trained models for SVM against the icdar dataset 34 | -------------------------------------------------------------------------------- /experiments/yt_experiments.m: -------------------------------------------------------------------------------- 1 | % Youtube experiments without lexicons 2 | % experiments parameters 3 | withLex = 0; 4 | % reading in the video 5 | configs=configsgen; 6 | video_folder = configs.YVT_path; 7 | temporal_data_folder = 'temp'; 8 | 9 | %% Read videos 10 | % Can't, not enough memory 11 | %videos = readVideoFromFrames(fullfile(video_folder,'frames')); 12 | 13 | %% play the videos 14 | %matlabpool 15 | 16 | %% 17 | % Run the character detection on every videos 18 | frames_path = fullfile(configs.YVT_path,'frames'); 19 | matfiles_path = fullfile(configs.YVT_path,'matfiles'); 20 | vidpaths = dir(frames_path); 21 | vidpaths = vidpaths(3:end); 22 | 23 | % save function 24 | saveVideo = @(sF,vid) save(sF,'vid','-v7.3'); 25 | for vidindex = 1:length(vidpaths) 26 | vidpath = fullfile(frames_path,vidpaths(vidindex).name); 27 | matpath = fullfile(matfiles_path,[vidpaths(vidindex).name '.mat']); 28 | fprintf('Working on %s\n',vidpath); 29 | if exist(matpath,'file') > 0 30 | fprintf('%s existed.\n',matpath); 31 | continue 32 | end 33 | vid=readVideoFromFrames(vidpath); 34 | saveVideo(matpath,vid); 35 | clear vid 36 | %bbs_videos = charDetVideo(curVid.frames,models); 37 | %saveVideo(fullfile('bbs_videos',curVid.name),bbs_videos); 38 | end 39 | 40 | %% Generating the bounding boxes for the frames 41 | lstruct = load('data/models_real_nomixture'); 42 | models = lstruct.models; 43 | videos_path = fullfile(configs.YVT_path,'videos'); 44 | vidpaths = dir(fullfile(videos_path,'*.mp4')); 45 | for vidindex = 1:length(vidpaths) 46 | vpath = fullfile(videos_path,vidpaths(vidindex).name); 47 | fprintf('Working on %s\n',vpath); 48 | vidobject = VideoReader(vpath); 49 | bbs_videos=charDetVideo(vidobject,models); 50 | end 51 | 52 | %% Forming words from the character detections 53 | words_videos = cell(length(videos),1); 54 | for video_index = 1:length(videos) 55 | curVid = videos{video_index}; 56 | path_to_vid = fullfile('bbs_videos',curVid.name); 57 | lstruct = load(path_to_vid); detections = lstruct.bbs_videos; 58 | % load the bbs_videos from the previous sections 59 | if withLex 60 | words = wordSpotLex(detections,params); % not yet implemented 61 | else 62 | words = wordSpotNoLex(detections,params); % not yet implemented 63 | end 64 | 65 | words_videos{video_index} = words; 66 | end 67 | 68 | %% Visualization 69 | visVideo(video,detections); 70 | %% Performance evaluation -------------------------------------------------------------------------------- /figure_production/createTable1.m: -------------------------------------------------------------------------------- 1 | function createTable1 2 | % Generate the results for Table 1, from our paper, for character 3 | % classification. 4 | % 5 | % CREDITS 6 | % Written and maintained by Kai Wang and Boris Babenko 7 | % Copyright notice: license.txt 8 | % Changelog: changelog.txt 9 | % Please email kaw006@cs.ucsd.edu if you have questions. 10 | 11 | [dPath,ch,ch1,chC,chClfNm]=globals; 12 | 13 | % paramSet={training dataset,with/without neighboring chars} 14 | paramSets={{'icdar','charEasy'},... 15 | {'icdar','charHard'},... 16 | {'synth','charHard'}}; 17 | % tstSet={testing dataset, with/without neighboring chars} 18 | tstSets={{'icdar','charEasy'},{'icdar','charHard'},{'synth','charHard'}}; 19 | 20 | RandStream.getDefaultStream.reset(); 21 | sBin=8; oBin=8; chH=48; 22 | S=6; M=256; thrr=[0 1]; nTrn=Inf; 23 | cHogFtr=@(I)reshape((5*hogOld(single(imResample(I,[chH,chH])),sBin,oBin)),[],1); 24 | cFtr=cHogFtr; 25 | dbgFileNm=sprintf('table1_%i_%i_%i_%i_%i_%1.2f.txt',clock); 26 | fid=fopen(dbgFileNm,'w'); 27 | for p=1:length(paramSets) 28 | clear I y; 29 | paramSet=paramSets{p}; 30 | trnD=paramSet{1}; trnT=paramSet{2}; 31 | 32 | cDir=fullfile(dPath,trnD,'clfs'); 33 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir','none','nBg',0,'nTrn',nTrn}; 34 | cNm=chClfNm(clfPrms{:}); 35 | clfPath=fullfile(cDir,[cNm,'.mat']); 36 | 37 | % train fern if doesn't already exist 38 | if(~exist(clfPath,'file')) 39 | [I,y]=readAllImgs(fullfile(dPath,trnD,'train',trnT),chC,nTrn); 40 | % extract features 41 | x=fevalArrays(I,cFtr)'; 42 | % train ferns 43 | [ferns,~]=fernsClfTrain(double(x),y,struct('S',S,'M',M,'thrr',thrr,'bayes',1)); 44 | if(~exist(cDir,'dir')),mkdir(cDir); end 45 | save(clfPath,'ferns'); 46 | else 47 | load(clfPath); 48 | end 49 | fprintf(fid,'CLF:%s\n',clfPath); 50 | 51 | % run classifier on all the test sets 52 | for j=1:length(tstSets) 53 | tstSet=tstSets{j}; tstD=tstSet{1}; tstT=tstSet{2}; 54 | clear I y; 55 | [I,y]=readAllImgs(fullfile(dPath,tstD,'test',tstT),chC,Inf); 56 | % extract features 57 | x=fevalArrays(I,cFtr)'; 58 | % run ferns: yh are the class ids, and ph are the scores 59 | [yh,ph]=fernsClfApply(double(x),ferns); [~,yha]=sort(ph,2,'descend'); 60 | [y1,~]=equivClass(y,ch); yh1=equivClass(yh,ch); yha1=equivClass(yha,ch); 61 | m=findRanks(y,yha); m1=findRanks(y1,yha1); 62 | fprintf(fid,'TRAIN:%s-%s TEST:%s-%s: top1 error = %f, top3 error = %f\n',... 63 | trnD,trnT,tstD,tstT,mean(y~=yh), mean(m>3)); 64 | fprintf(fid,'EQ:TRAIN:%s-%s TEST:%s-%s: top1 error = %f, top3 error = %f\n',... 65 | trnD,trnT,tstD,tstT,mean(y1~=yh1), mean(m1>3)); 66 | end 67 | end 68 | fclose(fid); 69 | end -------------------------------------------------------------------------------- /figure_production/createTable2.m: -------------------------------------------------------------------------------- 1 | function createTable2 2 | % Generate the PLEX results for Table 2 in 'End-to-End Scene Text 3 | % Recognition.' 4 | % 5 | % CREDITS 6 | % Written and maintained by Kai Wang and Boris Babenko 7 | % Copyright notice: license.txt 8 | % Changelog: changelog.txt 9 | % Please email kaw006@cs.ucsd.edu if you have questions. 10 | 11 | [dPath,ch,ch1,chC,chClfNm]=globals; 12 | 13 | S=6; M=256; nTrn=Inf; minH=.6; topK=100; 14 | frnPrms={'ss',2^(1/5),'minH',minH}; 15 | nmsPrms={'thr',-75,'separate',1,'type','maxg','resize',{1,1/2},... 16 | 'ovrDnm','union','overlap',.3,'maxn',inf}; 17 | % only consider words that span at least half the image width 18 | widthThr=.5; 19 | 20 | % paramSet={train dataset,with/without neighboring chars, bg dataset,# bg images} 21 | paramSets={{'synth','charHard','msrcBt',10000},... 22 | {'icdar','charHard','icdarBt',10000}}; 23 | % tstSet={test dataset, number of distractors} 24 | tstSets={{'svt',Inf},{'icdar',50},{'icdar',Inf}}; 25 | 26 | RandStream.getDefaultStream.reset(); 27 | dbgFileNm=sprintf('table2_%i_%i_%i_%i_%i_%1.2f.txt',clock); 28 | fid=fopen(dbgFileNm,'w'); fprintf('LOG:%s\n',dbgFileNm); 29 | labNm='wordCharAnnPad'; datNm='wordsPad'; 30 | for p=1:length(paramSets) 31 | clear I y; paramSet=paramSets{p}; 32 | trnD=paramSet{1}; trnT=paramSet{2}; trnBg=paramSet{3}; nBg=paramSet{4}; 33 | 34 | cDir=fullfile(dPath,trnD,'clfs'); 35 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,... 36 | 'nBg',nBg,'nTrn',nTrn}; 37 | cNm=chClfNm(clfPrms{:}); clfPath=fullfile(cDir,[cNm,'.mat']); 38 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 39 | fModel=load(clfPath); 40 | fprintf(fid,'CLF:%s\n',clfPath); 41 | 42 | % loop over test sets 43 | for i=1:length(tstSets) 44 | tstSet=tstSets{i}; tstD=tstSet{1}; kVal=tstSet{2}; 45 | tstDir=fullfile(dPath,tstD,'test'); 46 | fprintf(fid,'TEST DIR:%s\n',tstDir); fprintf(fid,'KVAL:%i\n',kVal); 47 | 48 | allGtStrs=[]; 49 | % collect all ground truth words 50 | for j=0:length(dir(fullfile(tstDir,labNm,'*.txt')))-1 51 | objs=bbGt('bbLoad',fullfile(tstDir,labNm,sprintf('I%05i.jpg.txt',j))); 52 | gt=upper([objs.lbl]); 53 | if(~checkValidGt(gt)), continue; end 54 | allGtStrs{end+1}=gt; 55 | end 56 | allGtStrs=unique(upper(allGtStrs)); 57 | 58 | strMatchPos=[]; tot1=[]; tot2=[]; tot3=[]; 59 | fclose(fid); 60 | 61 | % loop over images 62 | for f=0:length(dir(fullfile(tstDir,labNm,'*.txt')))-1 63 | fid1=fopen(dbgFileNm,'a'); fprintf(fid1,'%i,',f); fclose(fid1); 64 | objs=bbGt('bbLoad',fullfile(tstDir,sprintf('%s/I%05i.jpg.txt',labNm,f))); 65 | gt=upper([objs.lbl]); if(~checkValidGt(gt)), continue; end 66 | I=imread(fullfile(tstDir,sprintf('%s/I%05i.jpg',datNm,f))); 67 | 68 | if(~strcmp(tstSet,'svt')) 69 | if(isinf(kVal)), 70 | lexS=allGtStrs; 71 | else 72 | % add K random distractors 73 | lexS=unique(upper({gt})); numGt=length(lexS); 74 | while(length(lexS)<(kVal+numGt)) 75 | lexS=[lexS,allGtStrs(randSample(length(allGtStrs),... 76 | kVal+numGt-length(lexS)))]; 77 | lexS=unique(lexS); 78 | end 79 | end 80 | else 81 | lfile=fullfile(tstDir,'wordLexPad',sprintf('I%05i.jpg.txt',f)); 82 | fid1=fopen(lfile); 83 | lexS=textscan(fid1,'%s'); lexS=lexS{1}'; 84 | fclose(fid1); 85 | end 86 | 87 | t3S=tic; 88 | [words,t1,t2]=wordSpot(I,lexS,fModel,{},nmsPrms,frnPrms); 89 | t3=toc(t3S); 90 | 91 | tot1=[tot1,t1]; tot2=[tot2,t2]; tot3=[tot3,t3]; 92 | 93 | words1=words(1:min(length(words),topK)); 94 | % 1=miss, >1=match ind + 1 95 | [strMatch,words1]=getWordMatchInd(objs,words1,... 96 | size(I,2)*widthThr); 97 | strMatchPos=[strMatchPos,strMatch]; 98 | end 99 | 100 | fid=fopen(dbgFileNm,'a'); 101 | fprintf(fid,'$%s\n',clfPath); 102 | fprintf(fid,'(total examples):%i\n',length(strMatchPos)); 103 | 104 | fprintf(fid,'(string match results):'); 105 | u1=unique(strMatchPos); 106 | for j=1:length(u1) 107 | fprintf(fid,'%i:%i,',u1(j)-1,sum(strMatchPos==u1(j))); 108 | end 109 | fprintf(fid,'\n'); 110 | fprintf(fid,'top1:%1.4f\n',sum(strMatchPos==2)/length(strMatchPos)); 111 | end 112 | end 113 | fclose(fid); 114 | end 115 | 116 | % check if word is recalled in the result list 117 | % ID of 1 is a miss; subtract 1 from all other positions (2=>1,3=>2...) 118 | function [strMatch,words1]=getWordMatchInd(gtObj,words,widthThr) 119 | if(nargin==2), widthThr=0; end 120 | if((widthThr>0) && ~isempty(words)) 121 | wbb=reshape([words.bb],5,[])'; 122 | inds=wbb(:,3)>widthThr; words1=words(inds); wbb=wbb(inds,:); 123 | else 124 | words1=words; 125 | end 126 | strMatch=1; 127 | % scan through list and track when we find a match 128 | for i=1:length(words1) 129 | if((strMatch==1) && strcmpi([gtObj.lbl],words1(i).word)) 130 | strMatch=i+1; 131 | end 132 | end 133 | end 134 | -------------------------------------------------------------------------------- /figure_production/createTable2Abbyy.m: -------------------------------------------------------------------------------- 1 | function createTable2Abbyy 2 | % Generate the ABBYY results for Table 2 in 'End-to-End Scene Text Recognition.' 3 | % 4 | % CREDITS 5 | % Written and maintained by Kai Wang and Boris Babenko 6 | % Copyright notice: license.txt 7 | % Changelog: changelog.txt 8 | % Please email kaw006@cs.ucsd.edu if you have questions. 9 | 10 | fprintf('Results for ABBYY on SVT\n'); 11 | evalSvt; 12 | fprintf('Results for ABBYY on ICDAR\n'); 13 | evalIcdar(-1); % no 'spell check' 14 | evalIcdar(Inf); 15 | evalIcdar(50); 16 | 17 | end 18 | 19 | function evalIcdar(kVal) 20 | % Evaluate ABBYY output on ICDAR data 21 | 22 | dPath=globals; 23 | RandStream.getDefaultStream.reset(); 24 | tstDir=fullfile(dPath,'icdar','test'); 25 | nTot=0; nCor=0; labNm='wordCharAnnPad'; 26 | datadir=fullfile(dPath,'icdar','test','abbyy','wordsPad'); 27 | allGtStrs=[]; 28 | for j=0:length(dir(fullfile(tstDir,labNm,'*.txt')))-1 29 | objs=bbGt('bbLoad',fullfile(tstDir,labNm,sprintf('I%05i.jpg.txt',j))); 30 | gt=upper([objs.lbl]); 31 | if(~checkValidGt(gt)), continue; end 32 | allGtStrs{end+1}=gt; 33 | end 34 | allGtStrs=unique(upper(allGtStrs)); 35 | 36 | ticId=ticStatus('evaluating'); 37 | n=length(dir(fullfile(tstDir,labNm,'*.txt'))); 38 | for f=0:n-1 39 | objs=bbGt('bbLoad',fullfile(tstDir,labNm,sprintf('I%05i.jpg.txt',f))); 40 | gt=upper([objs.lbl]); 41 | if(~checkValidGt(gt)), continue; end 42 | abbyyRes=procAbbyy(fullfile(datadir,sprintf('I%05i.txt',f))); 43 | tmpWord=[]; tmpWord.word=abbyyRes; tmpWord.bb=[]; 44 | 45 | if(isinf(kVal)), activeWords=allGtStrs; 46 | else 47 | % add K random distractors 48 | activeWords=unique(upper({gt})); numGt=length(activeWords); 49 | while(length(activeWords)<(kVal+numGt)) 50 | activeWords=[activeWords,... 51 | allGtStrs(randSample(length(allGtStrs),kVal+numGt-length(activeWords)))]; %#ok<*AGROW> 52 | activeWords=unique(activeWords); 53 | end 54 | end 55 | 56 | if(kVal==-1) 57 | if(~isempty(tmpWord) && strcmpi(tmpWord.word,gt)), nCor=nCor+1; end 58 | else tmpWord=spellCheck(tmpWord,activeWords); 59 | if(~isempty(tmpWord) && strcmpi(tmpWord.word,gt)), nCor=nCor+1; end 60 | end 61 | 62 | nTot=nTot+1; tocStatus(ticId,f/n); 63 | end 64 | fprintf('kVal=%f\n',kVal); 65 | fprintf('%i/%i, %f correct\n',nCor,nTot,nCor/nTot); 66 | end 67 | 68 | function evalSvt 69 | % Evaluate ABBYY output on SVT data 70 | 71 | dPath=globals; 72 | tstSet=fullfile('svt','test'); 73 | nDet=0; nTot=0; nCor=0; 74 | labNm='wordCharAnnPad'; lexNm='wordLexPad'; 75 | datadir=fullfile(dPath,'svt','test','abbyy','wordsPad'); 76 | n=length(dir(fullfile(dPath,tstSet,labNm,'*.txt'))); 77 | for f=0:n-1 78 | objs=bbGt('bbLoad',fullfile(dPath,tstSet,labNm,sprintf('I%05i.jpg.txt',f))); 79 | lfile=fullfile(dPath,tstSet,lexNm,sprintf('I%05i.jpg.txt',f)); 80 | 81 | fid=fopen(lfile,'r'); 82 | lexS=textscan(fid,'%s'); lexS=lexS{1}'; 83 | fclose(fid); 84 | 85 | gt=upper([objs.lbl]); 86 | if(~checkValidGt(gt)), continue; end 87 | abbyyRes=procAbbyy(fullfile(datadir,sprintf('I%05i.txt',f))); 88 | tmpWord=[]; tmpWord.word=abbyyRes; tmpWord.bb=[]; 89 | tmpWord1=spellCheck(tmpWord,lexS); 90 | if(~isempty(tmpWord1)), nDet=nDet+1; 91 | if(strcmpi(tmpWord1.word,gt)), nCor=nCor+1; end 92 | end 93 | nTot=nTot+1; 94 | end 95 | fprintf('%i/%i, %f correct\n',nCor,nTot,nCor/nTot); 96 | end 97 | 98 | -------------------------------------------------------------------------------- /fscore2.m: -------------------------------------------------------------------------------- 1 | function [f,x,y,i]=fscore2(xs,ys,beta) 2 | % Compute F-score using beta so the 3 | % 4 | % USAGE 5 | % [f,x,y,i]=Fscore( xs, ys ) 6 | % 7 | % INPUTS 8 | % xs - precision 9 | % ys - recall 10 | % beta - weight factor, 2 favors recall, 1 fair, .5 favors precision 11 | % 12 | % OUTPUTS 13 | % f - fscore 14 | % x - precision at best fscore 15 | % y - recall at best fscore 16 | % i - index of best fscore 17 | % 18 | % CREDITS 19 | % Written and maintained by Kai Wang and Boris Babenko 20 | % Copyright notice: license.txt 21 | % Changelog: changelog.txt 22 | % Please email kaw006@cs.ucsd.edu if you have questions. 23 | 24 | fs=(1+beta^2)*(xs.*ys)./(beta^2*ys+xs); 25 | [f,i]=max(fs); x=xs(i); y=ys(i); -------------------------------------------------------------------------------- /genHistGraph.m: -------------------------------------------------------------------------------- 1 | % Get fscores and draw 2 | configs=configsgen; 3 | gtDir = fullfile(configs.icdar,'test','charAnn'); 4 | dtDir = fullfile(configs.icdar,'test','det_results'); 5 | dtDirMix = fullfile(configs.icdar,'test','det_results_mix'); 6 | dtDirReal = fullfile(configs.icdar,'test','det_results_real'); 7 | fscores = zeros(length(configs.alphabets),2); 8 | beta = 2; 9 | for iChar = 1:length(configs.alphabets) 10 | try 11 | iChar 12 | currentChar = configs.alphabets(iChar); 13 | [gt0,~] = bbGt('loadAll',gtDir,[],{'lbls',currentChar}); 14 | 15 | fprintf('Load synth\n'); 16 | dtsynth = loadBB(dtDir,iChar); 17 | fprintf('Load real\n'); 18 | dtreal = loadBB(dtDirReal,iChar); 19 | 20 | % Computing score for synth 21 | [gts,dts] = bbGt( 'evalRes', gt0, dtsynth); 22 | [xss,yss,~]=bbGt('compRoc', gts, dts, 0); 23 | fs = fscore2(xss,yss,beta); 24 | fscores(iChar,1) = fs; 25 | 26 | % Computing score for real 27 | [gtr,dtr] = bbGt( 'evalRes', gt0, dtreal); 28 | [xsr,ysr,~]=bbGt('compRoc', gtr, dtr, 0); 29 | fs = fscore2(xsr,ysr,beta); 30 | fscores(iChar,2) = fs; 31 | catch e 32 | e 33 | continue 34 | end 35 | end 36 | 37 | %% 38 | bar(fscores) -------------------------------------------------------------------------------- /genLexIcdar.m: -------------------------------------------------------------------------------- 1 | function genLexIcdar 2 | % Create a synthetic lexicon for the icdar images 3 | % 4 | % CREDITS 5 | % Written and maintained by Kai Wang and Boris Babenko 6 | % Copyright notice: license.txt 7 | % Changelog: changelog.txt 8 | % Please email kaw006@cs.ucsd.edu if you have questions. 9 | 10 | dPath=globals; 11 | RandStream.setDefaultStream(RandStream('mrg32k3a', 'Seed', sum('iccv11'))); 12 | 13 | % paramSet={dataset, test split, k distractors} 14 | paramSets={{'icdar','test',5},... 15 | {'icdar','test',20},... 16 | {'icdar','test',50},... 17 | {'icdar','train',5},... 18 | {'icdar','train',20},... 19 | {'icdar','train',50}}; 20 | 21 | for p=1:length(paramSets) 22 | paramSet=paramSets{p}; 23 | tstD=paramSet{1}; tstSpl=paramSet{2}; kVal=paramSet{3}; 24 | 25 | gtDir=fullfile(dPath,tstD,tstSpl,'wordAnn'); 26 | lexDir=fullfile(dPath,tstD,tstSpl,sprintf('lex%i',kVal)); 27 | if(~exist(lexDir,'dir')), mkdir(lexDir); end 28 | 29 | files=dir([gtDir '/*.txt']); files={files.name}; 30 | allGtS=[]; 31 | % collect all ground truth words 32 | for i=1:length(files) 33 | % load ground truth and prepare for evaluation 34 | gtNm=[gtDir '/' files{i}]; 35 | gt1=bbGt('bbLoad',gtNm); 36 | gt1=filterValidGt(gt1); 37 | for j=1:length(gt1), allGtS{end+1}=gt1(j).lbl; end 38 | end 39 | allGtS=unique(upper(allGtS)); numAll=length(allGtS); 40 | 41 | % create lexicons for each file 42 | for i=1:length(files) 43 | gtNm=[gtDir '/' files{i}]; 44 | gt1=bbGt('bbLoad',gtNm); 45 | gt1=filterValidGt(gt1); 46 | lexS=unique({gt1.lbl}); 47 | numGt=length(lexS); 48 | while(length(lexS)<(kVal+numGt)) 49 | lexS=[lexS,allGtS(randSample(numAll,kVal+numGt-length(lexS)))]; 50 | lexS=unique(lexS); 51 | end 52 | lexP=fullfile(lexDir,files{i}); 53 | fid=fopen(lexP,'w'); 54 | for j=1:length(lexS); fprintf(fid,'%s\n',lexS{j}); end 55 | fclose(fid); 56 | end 57 | end -------------------------------------------------------------------------------- /getHeatmapMixtures.m: -------------------------------------------------------------------------------- 1 | function [hms,scales]=getHeatmapMixtures(I,models,configs) 2 | % getHeatmapMixtures - Get the heatmap for the mixture case 3 | % 4 | % Synopsis 5 | % [] = () 6 | % 7 | % Description 8 | % 9 | % 10 | % Inputs ([]s are optional) 11 | % (matrix) I color image 12 | % (matrix) models [NxK] cell array of trained models - N class and K mixtures 13 | % (struct) configs configs generated by running configsgen 14 | % 15 | % Outputs ([]s are optional) 16 | % (matrix) hms - the return heatmap, size(hms, 1) = N; 17 | % (matrix) scales - the scales used when performing the filter 18 | % 19 | % Examples 20 | % 21 | % 22 | % See also 23 | % 24 | % 25 | % Requirements 26 | % None 27 | % 28 | % References 29 | % 30 | % ... 31 | % 32 | % Authors 33 | % Phuc Xuan Nguyen nguyen.phuc.x@gmail.com 34 | % 35 | % License 36 | % 37 | % 38 | % Changes 39 | % 40 | % 41 | filters = cell(size(models,1)*configs.nMixtures,1); 42 | % contructing the filters 43 | curIndex = 1; 44 | for iModel=1:size(models,1) 45 | for iMix = 1:configs.nMixtures 46 | model = models{iModel,iMix}; 47 | charDims = model.char_dims; 48 | r = floor(charDims(1)/configs.bin_size); 49 | c = floor(charDims(2)/configs.bin_size); 50 | filter = reshape(model.w,[r,c,configs.n_orients*4]); 51 | filters{curIndex} = single(filter); 52 | curIndex = curIndex + 1; 53 | end 54 | end 55 | 56 | pyramid = featpyramid(I,configs); 57 | scales = pyramid.scales; 58 | hms = cell(length(pyramid.scales),1); 59 | for level=1:length(pyramid.scales) 60 | hogI = pyramid.feat{level}; 61 | try 62 | allRes = fconv(hogI,filters,1,length(filters)); 63 | res = cell(size(models,1),1); 64 | curInd = 1; 65 | for iModel=1:size(models,1) 66 | temp = zeros(size(allRes{1},1),size(allRes{1},2),configs.nMixtures); 67 | for iMix = 1:configs.nMixtures 68 | currentRes = allRes{curInd}; 69 | temp(:,:,iMix) = currentRes; 70 | curInd = curInd + 1; 71 | end 72 | maxtemp = max(temp, [], 3); % take the maximum response 73 | res{iModel} = maxtemp; 74 | end 75 | catch e 76 | res = {}; 77 | end 78 | 79 | hms{level} = res; 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /get_filter_responses.m: -------------------------------------------------------------------------------- 1 | function [hms,scales]=get_filter_responses(I,models,configs) 2 | filters = cell(length(models),1); 3 | % contructing the filters 4 | for i=1:length(models) 5 | model = models{i}; 6 | char_dims = model.char_dims; 7 | r = floor(char_dims(1)/configs.bin_size); 8 | c = floor(char_dims(2)/configs.bin_size); 9 | filter = reshape(model.w,[r,c,configs.n_orients*4]); 10 | filters{i} = single(filter); 11 | end 12 | 13 | pyramid = featpyramid(I,configs); 14 | scales = pyramid.scales; 15 | hms = cell(length(pyramid.scales),1); 16 | for level=1:length(pyramid.scales) 17 | hogI = pyramid.feat{level}; 18 | try r=fconv(hogI,filters,1,length(filters)); 19 | catch e; r = {}; end 20 | hms{level} = r; 21 | end 22 | end -------------------------------------------------------------------------------- /globals.m: -------------------------------------------------------------------------------- 1 | function [dPath,ch,ch1,chC,chClfNm,dfNP]=globals 2 | % Global variables 3 | % 4 | % USAGE 5 | % [dPath,ch,ch1,chC,chClfNm,dfNP]=globals 6 | % 7 | % OUTPUTS 8 | % dPath - base directory for data (modify this before running!) 9 | % ch - list of character classes 10 | % ch1 - list of collapsed character classes (upper == lower) 11 | % chC - character classes in a column cell 12 | % chClfNm - function handle to return a crazy classifier name from vars 13 | % dfNP - default character NMS params 14 | % 15 | % CREDITS 16 | % Written and maintained by Kai Wang and Boris Babenko 17 | % Copyright notice: license.txt 18 | % Changelog: changelog.txt 19 | % Please email kaw006@cs.ucsd.edu if you have questions. 20 | 21 | %dPath = '/home/kai/datafresh/'; 22 | %dPath = '/users/u1/kai/sharedata/plex/'; 23 | 24 | [~,hostname] = system('hostname'); 25 | hostname = strtrim(hostname); 26 | switch hostname 27 | case 'phuc-ThinkPad-T420' 28 | dPath = '/home/phuc/Research/data2/'; 29 | case 'deepthought' 30 | dPath = '/home/nguyenpx/data2/'; 31 | end 32 | 33 | ch='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_'; 34 | chC=mat2cell(ch',ones(length(ch),1)); 35 | chClfNm=@(varargin)chClfNm1(varargin{:}); 36 | % make equivalent lower case and capital letters 37 | [~,ch1]=equivClass(1:length(ch),ch); 38 | 39 | % default character-level NMS parameters 40 | dfNP={'thr',-75,'separate',1,'type','maxg','resize',{3/4,1/2},... 41 | 'ovrDnm','union','overlap',.2,'maxn',inf}; 42 | end 43 | 44 | function t=chClfNm1(varargin) 45 | dfs={'S',6,'M',256,'trnSet','train','trnT','charHard','bgDir','none',... 46 | 'nBg',5000,'nTrn',Inf}; 47 | [S,M,trnSet,trnT,bgDir,nBg,nTrn]=getPrmDflt(varargin,dfs,1); 48 | 49 | % a naming convention for the fern based on its parameters 50 | t=sprintf('fern_S%01iM%03itrnSet%strnT%sbgDir%snBg%inTrn%i',S,M,trnSet,... 51 | trnT,bgDir,nBg,nTrn); 52 | end 53 | 54 | -------------------------------------------------------------------------------- /lexicon/loadLex.m: -------------------------------------------------------------------------------- 1 | function [allwords,lex]=loadLex(tstDir) 2 | % Read in all ground truth words from a test directory 3 | % 4 | % USAGE 5 | % [allwords,lex] = loadLex( tstDir ) 6 | % 7 | % INPUTS 8 | % tstDir - directory path 9 | % 10 | % OUTPUTS 11 | % allwords - cell array of all words 12 | % lex - trie structure 13 | % 14 | % CREDITS 15 | % Written and maintained by Kai Wang and Boris Babenko 16 | % Copyright notice: license.txt 17 | % Changelog: changelog.txt 18 | % Please email kaw006@cs.ucsd.edu if you have questions. 19 | 20 | allwords=cell(0); 21 | for k=0:length(dir(fullfile(tstDir,'wordAnn','*.txt')))-1; 22 | gt=bbGt('bbLoad',fullfile(tstDir,'wordAnn',... 23 | sprintf('I%05i.jpg.txt',k))); 24 | for j=1:length(gt) 25 | if(~checkValidGt(gt(j).lbl)), continue; end 26 | allwords{end+1}=gt(j).lbl; 27 | end 28 | end 29 | if nargout==2, lex=wordDet('build',allwords); end 30 | end 31 | -------------------------------------------------------------------------------- /lexicon/procAbbyy.m: -------------------------------------------------------------------------------- 1 | function wd=procAbbyy(fpath) 2 | % Collect cleaned ABBYY output from the filename 3 | % 4 | % USAGE 5 | % wd = procAbbyy( fpath ) 6 | % 7 | % INPUTS 8 | % fpath - filename of abbyy output 9 | % 10 | % OUTPUTS 11 | % wd - cleaned string 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | wd=''; 20 | fid = fopen(fpath); 21 | tline = fgets(fid); 22 | while ischar(tline) 23 | tline1=strtrim(tline); 24 | tline1=tline1(isstrprop(tline1,'alphanum')); 25 | tline = fgets(fid); 26 | wd=[wd, tline1]; 27 | end 28 | fclose(fid); 29 | end 30 | -------------------------------------------------------------------------------- /libraries/bin/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/.gitignore -------------------------------------------------------------------------------- /libraries/bin/bounded_dt.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/bounded_dt.mexa64 -------------------------------------------------------------------------------- /libraries/bin/cascade.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/cascade.mexa64 -------------------------------------------------------------------------------- /libraries/bin/compute_overlap.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/compute_overlap.mexa64 -------------------------------------------------------------------------------- /libraries/bin/dt.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/dt.mexa64 -------------------------------------------------------------------------------- /libraries/bin/fconv.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/fconv.mexa64 -------------------------------------------------------------------------------- /libraries/bin/fconv_var_dim.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/fconv_var_dim.mexa64 -------------------------------------------------------------------------------- /libraries/bin/features.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/features.mexa64 -------------------------------------------------------------------------------- /libraries/bin/fv_cache.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/fv_cache.mexa64 -------------------------------------------------------------------------------- /libraries/bin/get_detection_trees.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/get_detection_trees.mexa64 -------------------------------------------------------------------------------- /libraries/bin/resize.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/libraries/bin/resize.mexa64 -------------------------------------------------------------------------------- /libraries/gdetect/bounded_dt.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mex.h" 4 | 5 | static inline int min(int a, int b) { return a <= b ? a : b; } 6 | static inline int max(int a, int b) { return a >= b ? a : b; } 7 | static inline int square(int x) { return x*x; } 8 | 9 | static void max_filter_1d(const double *vals, double *out_vals, int32_t *I, 10 | int s, int step, int n, double a, double b) { 11 | for (int i = 0; i < n; i++) { 12 | double max_val = -INFINITY; 13 | int argmax = 0; 14 | int first = max(0, i-s); 15 | int last = min(n-1, i+s); 16 | for (int j = first; j <= last; j++) { 17 | double val = *(vals + j*step) - a*square(i-j) - b*(i-j); 18 | if (val > max_val) { 19 | max_val = val; 20 | argmax = j; 21 | } 22 | } 23 | *(out_vals + i*step) = max_val; 24 | *(I + i*step) = argmax; 25 | } 26 | } 27 | 28 | // matlab entry point 29 | // [M, Ix, Iy] = bounded_dt(vals, ax, bx, ay, by, s) 30 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 31 | if (nrhs != 6) 32 | mexErrMsgTxt("Wrong number of inputs"); 33 | if (nlhs != 3) 34 | mexErrMsgTxt("Wrong number of outputs"); 35 | if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS) 36 | mexErrMsgTxt("Invalid input type rhs[0] (expected double)"); 37 | 38 | const int *dims = mxGetDimensions(prhs[0]); 39 | double *vals = (double *)mxGetPr(prhs[0]); 40 | double ax = mxGetScalar(prhs[1]); 41 | double bx = mxGetScalar(prhs[2]); 42 | double ay = mxGetScalar(prhs[3]); 43 | double by = mxGetScalar(prhs[4]); 44 | int s = (int)mxGetScalar(prhs[5]); 45 | 46 | mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); 47 | mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); 48 | mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); 49 | double *M = mxGetPr(mxM); 50 | int32_t *Ix = (int32_t *)mxGetPr(mxIx); 51 | int32_t *Iy = (int32_t *)mxGetPr(mxIy); 52 | 53 | double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double)); 54 | int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); 55 | int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); 56 | 57 | for (int x = 0; x < dims[1]; x++) 58 | max_filter_1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], s, 1, dims[0], ay, by); 59 | 60 | for (int y = 0; y < dims[0]; y++) 61 | max_filter_1d(tmpM+y, M+y, tmpIx+y, s, dims[0], dims[1], ax, bx); 62 | 63 | // get argmins and adjust for matlab indexing from 1 64 | for (int x = 0; x < dims[1]; x++) { 65 | for (int y = 0; y < dims[0]; y++) { 66 | int p = x*dims[0]+y; 67 | Ix[p] = tmpIx[p]+1; 68 | Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1; 69 | } 70 | } 71 | 72 | mxFree(tmpM); 73 | mxFree(tmpIx); 74 | mxFree(tmpIy); 75 | plhs[0] = mxM; 76 | plhs[1] = mxIx; 77 | plhs[2] = mxIy; 78 | } 79 | -------------------------------------------------------------------------------- /libraries/gdetect/compute_overlap.cc: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #include 3 | using namespace std; 4 | 5 | // 0 1 2 3 4 5 6 | // function o = compute_overlap(bbox, fdimy, fdimx, dimy, dimx, scale, 7 | // 6 7 8 8 | // padx, pady, imsize) 9 | // bbox bounding box image coordinates [x1 y1 x2 y2] 10 | // fdimy number of rows in filter 11 | // fdimx number of cols in filter 12 | // dimy number of rows in feature map 13 | // dimx number of cols in feature map 14 | // scale image scale the feature map was computed at 15 | // padx x padding added to feature map 16 | // pady y padding added to feature map 17 | // imsize size of the image [h w] 18 | 19 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 20 | // inputs 21 | const double *bbox = mxGetPr(prhs[0]); 22 | const double bbox_x1 = bbox[0] - 1; 23 | const double bbox_y1 = bbox[1] - 1; 24 | const double bbox_x2 = bbox[2] - 1; 25 | const double bbox_y2 = bbox[3] - 1; 26 | 27 | const double filter_dim_y = mxGetScalar(prhs[1]); 28 | const double filter_dim_x = mxGetScalar(prhs[2]); 29 | 30 | const int feat_dim_y = (int)mxGetScalar(prhs[3]); 31 | const int feat_dim_x = (int)mxGetScalar(prhs[4]); 32 | 33 | const double scale = mxGetScalar(prhs[5]); 34 | 35 | const double pad_x = mxGetScalar(prhs[6]); 36 | const double pad_y = mxGetScalar(prhs[7]); 37 | 38 | const double *im_size = mxGetPr(prhs[8]); 39 | const double im_size_x = im_size[1]; 40 | const double im_size_y = im_size[0]; 41 | 42 | const double im_area = im_size_x * im_size_y; 43 | const double bbox_area = (bbox_x2 - bbox_x1 + 1) * (bbox_y2 - bbox_y1 + 1); 44 | 45 | // clip detection window to image boundary only if 46 | // the bbox is less than 70% of the image area 47 | const bool im_clip = (double)bbox_area / (double)im_area < 0.7; 48 | 49 | // outputs 50 | const int dims[] = {feat_dim_y, feat_dim_x}; 51 | mxArray *mx_overlap = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); 52 | double *overlap = (double *)mxGetPr(mx_overlap); 53 | plhs[0] = mx_overlap; 54 | 55 | // compute overlap for each placement of the filter 56 | for (int x = 0; x < feat_dim_x; x++) { 57 | for (int y = 0; y < feat_dim_y; y++) { 58 | // pixel bounding box for filter 59 | double x1 = (x - pad_x) * scale; 60 | double y1 = (y - pad_y) * scale; 61 | double x2 = x1 + filter_dim_x*scale - 1; 62 | double y2 = y1 + filter_dim_y*scale - 1; 63 | 64 | if (im_clip) { 65 | x1 = min(max(x1, 0.0), im_size_x-1); 66 | y1 = min(max(y1, 0.0), im_size_y-1); 67 | x2 = min(max(x2, 0.0), im_size_x-1); 68 | y2 = min(max(y2, 0.0), im_size_y-1); 69 | } 70 | 71 | // intersect with bbox 72 | double xx1 = max(x1, bbox_x1); 73 | double yy1 = max(y1, bbox_y1); 74 | double xx2 = min(x2, bbox_x2); 75 | double yy2 = min(y2, bbox_y2); 76 | 77 | double int_w = xx2 - xx1 + 1; 78 | double int_h = yy2 - yy1 + 1; 79 | 80 | if (int_w > 0 && int_h > 0) { 81 | double filter_w = x2 - x1 + 1; 82 | double filter_h = y2 - y1 + 1; 83 | double filter_area = filter_w * filter_h; 84 | double int_area = int_w * int_h; 85 | double union_area = filter_area + bbox_area - int_area; 86 | 87 | *(overlap + feat_dim_y*x + y) = int_area / union_area; 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /libraries/gdetect/compute_overlaps.m: -------------------------------------------------------------------------------- 1 | function overlaps = compute_overlaps(pyra, model, boxes) 2 | % Compute intersection over union overlap between each detection window 3 | % in the model and each bounding box in boxes at every detection window 4 | % position in the input feature pyramid. 5 | % overlaps = compute_overlaps(pyra, model, boxes) 6 | % 7 | % Return value 8 | % overlaps Array struct storing overlap values (more details below) 9 | % 10 | % Arguments 11 | % pyra 12 | % model 13 | % boxes 14 | % 15 | % The overlaps structure: 16 | % The computed overlap values are organized into 17 | % overlaps(c).box(b).o{l}, 18 | % where c is a top-level rule (i.e. component) index, 19 | % b specifies the bounding box boxes(b,:), and 20 | % l is a feature pyramid level. 21 | % The value stored in overlaps(c).box(b).o{l} is a matrix 22 | % with the same size as pyra.feat{l}. Each matrix entry 23 | % is the intersection over union overlap between the detection 24 | % window for component c and the bounding box boxes(b,:). 25 | 26 | num_comps = length(model.rules{model.start}); 27 | num_boxes = size(boxes, 1); 28 | overlaps = []; 29 | 30 | for comp = 1:num_comps 31 | detwin = model.rules{model.start}(comp).detwindow; 32 | shift = model.rules{model.start}(comp).shiftwindow; 33 | for b = 1:num_boxes 34 | overlaps(comp).box(b).o = cell(pyra.num_levels, 1); 35 | end 36 | 37 | for level = 1:pyra.num_levels 38 | if pyra.valid_levels(level) 39 | scoresz = size(model.rules{model.start}(comp).score{level}); 40 | scale = model.sbin/pyra.scales(level); 41 | 42 | for b = 1:num_boxes 43 | overlaps(comp).box(b).o{level} ... 44 | = compute_overlap(boxes(b,:), detwin(1), detwin(2), ... 45 | scoresz(1), scoresz(2), scale, ... 46 | pyra.padx+shift(2), pyra.pady+shift(1), ... 47 | pyra.imsize); 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /libraries/gdetect/dt.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mex.h" 4 | 5 | /* 6 | * Generalized distance transforms. 7 | * We use a simple nlog(n) divide and conquer algorithm instead of the 8 | * theoretically faster linear method, for no particular reason except 9 | * that this is a bit simpler and I wanted to test it out. 10 | * 11 | * The code is a bit convoluted because dt1d can operate either along 12 | * a row or column of an array. 13 | */ 14 | 15 | static inline int square(int x) { return x*x; } 16 | 17 | // dt helper function 18 | void dt_helper(double *src, double *dst, int *ptr, int step, 19 | int s1, int s2, int d1, int d2, double a, double b) { 20 | if (d2 >= d1) { 21 | int d = (d1+d2) >> 1; 22 | int s = s1; 23 | for (int p = s1+1; p <= s2; p++) 24 | if (src[s*step] - a*square(d-s) - b*(d-s) < 25 | src[p*step] - a*square(d-p) - b*(d-p)) 26 | s = p; 27 | dst[d*step] = src[s*step] - a*square(d-s) - b*(d-s); 28 | ptr[d*step] = s; 29 | dt_helper(src, dst, ptr, step, s1, s, d1, d-1, a, b); 30 | dt_helper(src, dst, ptr, step, s, s2, d+1, d2, a, b); 31 | } 32 | } 33 | 34 | // dt of 1d array 35 | void dt1d(double *src, double *dst, int *ptr, int step, int n, 36 | double a, double b) { 37 | dt_helper(src, dst, ptr, step, 0, n-1, 0, n-1, a, b); 38 | } 39 | 40 | // matlab entry point 41 | // [M, Ix, Iy] = dt(vals, ax, bx, ay, by) 42 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 43 | if (nrhs != 5) 44 | mexErrMsgTxt("Wrong number of inputs"); 45 | if (nlhs != 3) 46 | mexErrMsgTxt("Wrong number of outputs"); 47 | if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS) 48 | mexErrMsgTxt("Invalid input"); 49 | 50 | const int *dims = mxGetDimensions(prhs[0]); 51 | double *vals = (double *)mxGetPr(prhs[0]); 52 | double ax = mxGetScalar(prhs[1]); 53 | double bx = mxGetScalar(prhs[2]); 54 | double ay = mxGetScalar(prhs[3]); 55 | double by = mxGetScalar(prhs[4]); 56 | 57 | mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); 58 | mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); 59 | mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); 60 | double *M = (double *)mxGetPr(mxM); 61 | int32_t *Ix = (int32_t *)mxGetPr(mxIx); 62 | int32_t *Iy = (int32_t *)mxGetPr(mxIy); 63 | 64 | double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double)); 65 | int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); 66 | int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); 67 | 68 | for (int x = 0; x < dims[1]; x++) 69 | dt1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], 1, dims[0], ay, by); 70 | 71 | for (int y = 0; y < dims[0]; y++) 72 | dt1d(tmpM+y, M+y, tmpIx+y, dims[0], dims[1], ax, bx); 73 | 74 | // get argmins and adjust for matlab indexing from 1 75 | for (int x = 0; x < dims[1]; x++) { 76 | for (int y = 0; y < dims[0]; y++) { 77 | int p = x*dims[0]+y; 78 | Ix[p] = tmpIx[p]+1; 79 | Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1; 80 | } 81 | } 82 | 83 | mxFree(tmpM); 84 | mxFree(tmpIx); 85 | mxFree(tmpIy); 86 | plhs[0] = mxM; 87 | plhs[1] = mxIx; 88 | plhs[2] = mxIy; 89 | } 90 | -------------------------------------------------------------------------------- /libraries/gdetect/fconv_var_dim.cc: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | #include 3 | #include 4 | 5 | /* 6 | * This code is used for computing filter responses. It computes the 7 | * response of a set of filters with a feature map. 8 | * 9 | * Basic version, relatively slow but very compatible. 10 | */ 11 | 12 | struct thread_data { 13 | float *A; 14 | float *B; 15 | double *C; 16 | mxArray *mxC; 17 | const mwSize *A_dims; 18 | const mwSize *B_dims; 19 | mwSize C_dims[2]; 20 | }; 21 | 22 | // convolve A and B 23 | void process(void *thread_arg) { 24 | thread_data *args = (thread_data *)thread_arg; 25 | float *A = args->A; 26 | float *B = args->B; 27 | double *C = args->C; 28 | const mwSize *A_dims = args->A_dims; 29 | const mwSize *B_dims = args->B_dims; 30 | const mwSize *C_dims = args->C_dims; 31 | int num_features = args->A_dims[2]; 32 | 33 | for (int f = 0; f < num_features; f++) { 34 | double *dst = C; 35 | float *A_src = A + f*A_dims[0]*A_dims[1]; 36 | float *B_src = B + f*B_dims[0]*B_dims[1]; 37 | for (int x = 0; x < C_dims[1]; x++) { 38 | for (int y = 0; y < C_dims[0]; y++) { 39 | double val = 0; 40 | for (int xp = 0; xp < B_dims[1]; xp++) { 41 | float *A_off = A_src + (x+xp)*A_dims[0] + y; 42 | float *B_off = B_src + xp*B_dims[0]; 43 | switch(B_dims[0]) { 44 | case 20: val += A_off[19] * B_off[19]; 45 | case 19: val += A_off[18] * B_off[18]; 46 | case 18: val += A_off[17] * B_off[17]; 47 | case 17: val += A_off[16] * B_off[16]; 48 | case 16: val += A_off[15] * B_off[15]; 49 | case 15: val += A_off[14] * B_off[14]; 50 | case 14: val += A_off[13] * B_off[13]; 51 | case 13: val += A_off[12] * B_off[12]; 52 | case 12: val += A_off[11] * B_off[11]; 53 | case 11: val += A_off[10] * B_off[10]; 54 | case 10: val += A_off[9] * B_off[9]; 55 | case 9: val += A_off[8] * B_off[8]; 56 | case 8: val += A_off[7] * B_off[7]; 57 | case 7: val += A_off[6] * B_off[6]; 58 | case 6: val += A_off[5] * B_off[5]; 59 | case 5: val += A_off[4] * B_off[4]; 60 | case 4: val += A_off[3] * B_off[3]; 61 | case 3: val += A_off[2] * B_off[2]; 62 | case 2: val += A_off[1] * B_off[1]; 63 | case 1: val += A_off[0] * B_off[0]; 64 | break; 65 | default: 66 | for (int yp = 0; yp < B_dims[0]; yp++) { 67 | val += *(A_off++) * *(B_off++); 68 | } 69 | } 70 | } 71 | *(dst++) += val; 72 | } 73 | } 74 | } 75 | } 76 | 77 | // matlab entry point 78 | // C = fconv(A, cell of B, start, end); 79 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 80 | if (nrhs != 4) 81 | mexErrMsgTxt("Wrong number of inputs"); 82 | if (nlhs != 1) 83 | mexErrMsgTxt("Wrong number of outputs"); 84 | 85 | // get A 86 | const mxArray *mxA = prhs[0]; 87 | if (mxGetNumberOfDimensions(mxA) != 3 || 88 | mxGetClassID(mxA) != mxSINGLE_CLASS) 89 | mexErrMsgTxt("Invalid input: A"); 90 | 91 | // get B and start/end 92 | const mxArray *cellB = prhs[1]; 93 | mwSize num_bs = mxGetNumberOfElements(cellB); 94 | int start = (int)mxGetScalar(prhs[2]) - 1; 95 | int end = (int)mxGetScalar(prhs[3]) - 1; 96 | if (start < 0 || end >= num_bs || start > end) 97 | mexErrMsgTxt("Invalid input: start/end"); 98 | int len = end-start+1; 99 | 100 | // output cell 101 | plhs[0] = mxCreateCellMatrix(1, len); 102 | 103 | // do convolutions 104 | thread_data td; 105 | const mwSize *A_dims = mxGetDimensions(mxA); 106 | float *A = (float *)mxGetPr(mxA); 107 | for (int i = 0; i < len; i++) { 108 | const mxArray *mxB = mxGetCell(cellB, i+start); 109 | td.A_dims = A_dims; 110 | td.A = A; 111 | td.B_dims = mxGetDimensions(mxB); 112 | td.B = (float *)mxGetPr(mxB); 113 | if (mxGetNumberOfDimensions(mxB) != 3 || 114 | mxGetClassID(mxB) != mxSINGLE_CLASS || 115 | td.A_dims[2] != td.B_dims[2]) 116 | mexErrMsgTxt("Invalid input: B"); 117 | 118 | // compute size of output 119 | int height = td.A_dims[0] - td.B_dims[0] + 1; 120 | int width = td.A_dims[1] - td.B_dims[1] + 1; 121 | if (height < 1 || width < 1) 122 | mexErrMsgTxt("Invalid input: B should be smaller than A"); 123 | td.C_dims[0] = height; 124 | td.C_dims[1] = width; 125 | td.mxC = mxCreateNumericArray(2, td.C_dims, mxDOUBLE_CLASS, mxREAL); 126 | td.C = (double *)mxGetPr(td.mxC); 127 | process((void *)&td); 128 | mxSetCell(plhs[0], i, td.mxC); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /libraries/gdetect/gdetect.m: -------------------------------------------------------------------------------- 1 | function [ds, bs, trees] = gdetect(pyra, model, thresh, max_num) 2 | % Detect objects in a feature pyramid using a model and a score threshold. 3 | % Higher thresholds lead to fewer detections. 4 | % [ds, bs, trees] = gdetect(pyra, model, thresh, max_num) 5 | % 6 | % Return values (more details are below) 7 | % ds Detection windows 8 | % bs Bounding boxes for all filters used in each detection 9 | % trees Derivation trees corresponding to each detection 10 | % 11 | % Arguments 12 | % pyra Feature pyramid to get detections from (output of featpyramid.m) 13 | % model Model to use for detection 14 | % thresh Detection threshold (scores must be > thresh) 15 | % max_num Maximum number of detections to return 16 | % 17 | % ds 18 | % A matrix with 6 columns and one row per detection. Columns 1-4 19 | % give the pixel coordinates (x1,y1,x2,y2) of each detection bounding box. 20 | % Column 5 specifies the model component used for each detection and column 21 | % 6 gives the score of each detection. 22 | % 23 | % bs 24 | % A matrix with one row per detection and each sequential group 25 | % of 4 columns specifies the pixel coordinates of each model filter bounding 26 | % box (i.e., where the parts were placed). The index in the sequence is 27 | % the same as the index in model.filters. 28 | % 29 | % trees 30 | % Detailed information about each detection required for extracted feature 31 | % vectors during learning. Each entry in trees describes the derivation 32 | % tree, under the grammar model, that corresponds to each detection. 33 | 34 | if nargin < 4 35 | max_num = inf; 36 | end 37 | 38 | model = gdetect_dp(pyra, model); 39 | [ds, bs, trees] = gdetect_parse(model, pyra, thresh, max_num); 40 | -------------------------------------------------------------------------------- /libraries/gdetect/gdetect_parse.m: -------------------------------------------------------------------------------- 1 | function [ds, bs, trees] = gdetect_parse(model, pyra, thresh, max_num) 2 | % Compute the set of detections from the dynamic programming tables stored 3 | % in model. 4 | % [ds, bs, trees] = gdetect_parse(model, pyra, thresh, max_num) 5 | % 6 | % This function identifies the highest scoring placements of the grammar's 7 | % start symbol. It then traces back through the dynamic programming tables 8 | % in order to recover the derivation trees used for each detection. While 9 | % doing this, detection windows and bounding boxes for each placed filter 10 | % are recovered. 11 | % 12 | % Return values (see gdetect.m) 13 | % 14 | % Arguments 15 | % pyra Feature pyramid to get detections from (output of featpyramid.m) 16 | % model Model to use for detection 17 | % thresh Detection threshold (scores must be > thresh) 18 | % max_num Maximum number of detections to return 19 | 20 | % Find scores above the threshold 21 | X = zeros(0, 'int32'); 22 | Y = zeros(0, 'int32'); 23 | I = zeros(0, 'int32'); 24 | L = zeros(0, 'int32'); 25 | S = []; 26 | for level = 1:pyra.num_levels 27 | score = model.symbols(model.start).score{level}; 28 | tmpI = find(score > thresh); 29 | [tmpY, tmpX] = ind2sub(size(score), tmpI); 30 | X = [X; tmpX]; 31 | Y = [Y; tmpY]; 32 | I = [I; tmpI]; 33 | L = [L; level*ones(length(tmpI), 1)]; 34 | S = [S; score(tmpI)]; 35 | end 36 | 37 | [ign, ord] = sort(S, 'descend'); 38 | if ~isempty(ord) 39 | ord = ord(1:min(length(ord), max_num)); 40 | end 41 | X = X(ord); 42 | Y = Y(ord); 43 | I = I(ord); 44 | L = L(ord); 45 | S = S(ord); 46 | 47 | get_loss = false; 48 | if isfield(model.rules{model.start}, 'loss') 49 | get_loss = true; 50 | end 51 | 52 | % Compute detection windows, filter bounding boxes, and derivation trees 53 | [ds, bs, trees] = get_detection_trees(model, pyra.padx, pyra.pady, ... 54 | pyra.scales, X, Y, L, S, get_loss); 55 | -------------------------------------------------------------------------------- /libraries/gdetect/gdetect_pos_prepare.m: -------------------------------------------------------------------------------- 1 | function [pyra, model_dp] = gdetect_pos_prepare(im, model, boxes, fg_overlap) 2 | % Prepare a set of foreground examples in the same image for processing 3 | % with gdetect_pos.m. 4 | % [pyra, model_dp] = gdetect_pos_prepare(im, model, boxes, fg_overlap) 5 | % 6 | % Return values 7 | % pyra Feature pyramid for image im 8 | % model_dp Model augmented with dynamic programming tables 9 | % 10 | % Arguments 11 | % im Foreground image with one or more foreground examples 12 | % model Object model 13 | % boxes Foreground example bounding boxes from foreground image im 14 | % fg_overlap Amount of overlap required between a belief 15 | % and a foreground example 16 | 17 | 18 | % get feature pyramid 19 | pyra = featpyramid(im, model); 20 | 21 | % mark valid levels (skip levels that don't have sufficient 22 | % overlap with any box in boxes 23 | pyra.valid_levels = validate_levels(model, pyra, boxes, fg_overlap); 24 | 25 | % compute dynamic programming tables (stored in model_dp) 26 | model_dp = gdetect_dp(pyra, model); 27 | 28 | % compute overlap info for each component, box, and valid pyramid level 29 | % (We end up computing overlap twice -- once here and once in 30 | % validate_levels. At the expense of making the code yet more complex 31 | % we could this computation only once. The reason this isn't straight- 32 | % forward is that the overlaps need to have exactly the same dimensions 33 | % as the score tables computed by gdetect_dp. But we don't want to call 34 | % gdetect_dp until we know which levels can be skipped, which requires 35 | % computing overlaps... At any rate, this isn't a major bottleneck.) 36 | pyra.overlaps = compute_overlaps(pyra, model_dp, boxes); 37 | -------------------------------------------------------------------------------- /libraries/gdetect/imgdetect.m: -------------------------------------------------------------------------------- 1 | function [ds, bs, trees] = imgdetect(im, model, thresh) 2 | % Wrapper around gdetect.m that computes detections in an image. 3 | % [ds, bs, trees] = imgdetect(im, model, thresh) 4 | % 5 | % Return values (see gdetect.m) 6 | % 7 | % Arguments 8 | % im Input image 9 | % model Model to use for detection 10 | % thresh Detection threshold (scores must be > thresh) 11 | 12 | im = color(im); % Make the image 3 channel 13 | pyra = featpyramid(im, model); 14 | [ds, bs, trees] = gdetect(pyra, model, thresh); 15 | -------------------------------------------------------------------------------- /libraries/gdetect/loss_func.m: -------------------------------------------------------------------------------- 1 | function losses = loss_func(o) 2 | % Compute the loss associated with the intersection over union 3 | % overlap between a ground-truth bounding box and any other 4 | % windows. 5 | % losses = loss_func(o) 6 | % 7 | % Return value 8 | % losses Loss for each element in the input 9 | % 10 | % Argument 11 | % o Vector of overlap values 12 | 13 | % The PASCAL VOC detection task loss 14 | % Loss is 0 for IoU >= 0.5 15 | % Loss is 1 for IoU < 0.5 16 | losses = zeros(size(o)); 17 | I = find(o < 0.5); 18 | losses(I) = 1.0; 19 | -------------------------------------------------------------------------------- /libraries/gdetect/loss_pyramid.m: -------------------------------------------------------------------------------- 1 | function model = loss_pyramid(h_loss_func, pyra, model, fg_box, ... 2 | bg_boxes, min_fg_overlap, max_bg_overlap) 3 | % Computes a pyramid of loss function values for each top-level 4 | % rule in the grammar. 5 | % model = loss_pyramid(h_loss_func, pyra, model, fg_box, ... 6 | % bg_boxes, min_fg_overlap, max_bg_overlap) 7 | % 8 | % These loss values are used for computing the loss adjusted inference: 9 | % \max_{s \in S(x)} w \dot \psi(x,s) + L_margin(y,s) 10 | % The set of valid outputs S(x) is enforced by making L(y,s) = -inf for 11 | % some values of s, which prevents them from being selected in the 12 | % maximization. 13 | % 14 | % Return value 15 | % model Model augmented to store the computed loss pyramids 16 | % 17 | % Arguments 18 | % h_loss_func Handle to loss function 19 | % model Model 20 | % (augmented with DP tables from gdetect_dp.m) 21 | % pyra Feature pyramid 22 | % (augmented with overlaps from gdetect_pos_prepare.m) 23 | % fg_box Selected foreground bounding box index 24 | % bg_boxes Indices of non-selected bounding boxes in image 25 | % min_fg_overlap Minimum required amount of overlap with fg box 26 | % max_bg_overlap Maximum allowed amount of overlap with bg bounding boxes 27 | 28 | num_bg_boxes = length(bg_boxes); 29 | 30 | % For each model component 31 | for comp = 1:length(model.rules{model.start}) 32 | % For each feature pyramid level 33 | for level = 1:pyra.num_levels 34 | if pyra.valid_levels(level) 35 | % Assign loss for root locations based on the selected foreground box 36 | o = pyra.overlaps(comp).box(fg_box).o{level}; 37 | losses = h_loss_func(o); 38 | model.rules{model.start}(comp).loss{level} = losses; 39 | 40 | % Require at least some overlap with the foreground bounding box 41 | % Rationale: 42 | % In an image with multiple objects, this constraint encourages a 43 | % diverse set of false positives (otherwise, they will tend to come 44 | % from the same high-scoring / low-overlapping region of the image 45 | % -- i.e. somewhere in the background) 46 | I = find(o < min_fg_overlap); 47 | model.rules{model.start}(comp).loss{level}(I) = -inf; 48 | 49 | % Mark root locations that have too much overlap with background boxes 50 | % as invalid 51 | % Rationale: 52 | % We don't want to select detections of other foreground objects 53 | % in the image as false positives (i.e., no true positive should 54 | % be allowed to be used as a false positive) 55 | for b = 1:num_bg_boxes 56 | o = pyra.overlaps(comp).box(bg_boxes(b)).o{level}; 57 | inds = find(o >= max_bg_overlap); 58 | model.rules{model.start}(comp).loss{level}(inds) = -inf; 59 | end 60 | else 61 | model.rules{model.start}(comp).loss{level} = 0; 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /libraries/gdetect/tree_mat_to_struct.m: -------------------------------------------------------------------------------- 1 | function s = tree_mat_to_struct(m) 2 | % Convert a tree matrix returned by get_detection_trees() into a struct. 3 | % s = tree_mat_to_struct(m) 4 | % 5 | % Return value 6 | % s Array struct with one entry per symbol (column in m) 7 | % 8 | % Argument 9 | % m Tree matrix from get_detection_trees() 10 | % Each column comes from a symbol in a derivation tree 11 | % Each row corresponds to a field (N_* below) 12 | 13 | % Indexes into tree from get_detection_trees.cc 14 | N_PARENT = 1; 15 | N_IS_LEAF = 2; 16 | N_SYMBOL = 3; 17 | N_RULE_INDEX = 4; 18 | N_RHS_INDEX = 5; 19 | N_X = 6; 20 | N_Y = 7; 21 | N_L = 8; 22 | N_DS = 9; 23 | N_DX = 10; 24 | N_DY = 11; 25 | N_SCORE = 12; 26 | N_LOSS = 13; 27 | N_SZ = 14; 28 | 29 | l = size(m, 2); 30 | f = @(i) mat2cell(m(i, :), 1, ones(1,l)); 31 | s = struct('parent', f(N_PARENT), ... 32 | 'is_leaf', f(N_IS_LEAF), ... 33 | 'symbol', f(N_SYMBOL), ... 34 | 'rule_index', f(N_RULE_INDEX), ... 35 | 'rhs_index', f(N_RHS_INDEX), ... 36 | 'x', f(N_X), ... 37 | 'y', f(N_Y), ... 38 | 'l', f(N_L), ... 39 | 'ds', f(N_DS), ... 40 | 'dx', f(N_DX), ... 41 | 'dy', f(N_DY), ... 42 | 'score', f(N_SCORE), ... 43 | 'loss', f(N_LOSS)); 44 | -------------------------------------------------------------------------------- /libraries/gdetect/validate_levels.m: -------------------------------------------------------------------------------- 1 | function do_levels = validate_levels(model, pyra, boxes, overlap) 2 | % Determine which feature pyramid levels permit high overlap between 3 | % the model and any of the input boxes. 4 | % do_levels = validate_levels(model, pyra, boxes, overlap) 5 | % 6 | % Return value 7 | % do_levels Boolean array indicating on which feature pyramid levels 8 | % we need to compute convolutions 9 | % 10 | % Arguments 11 | % model Object model 12 | % pyra Feature pyramid 13 | % boxes Ground truth bounding boxes 14 | % overlap Overlap threshold 15 | 16 | num_boxes = size(boxes,1); 17 | do_levels = false(pyra.num_levels, 1); 18 | % for each pyramid level 19 | % for each box 20 | % for each component (in test overlap) 21 | for l = 1:pyra.num_levels 22 | for b = 1:num_boxes 23 | if testoverlap(l, model, pyra, boxes(b,:), overlap) 24 | do_levels(l) = true; 25 | % WARNING: assumes that models only have one level of parts 26 | if l - model.interval > 0 27 | do_levels(l-model.interval) = true; 28 | end 29 | end 30 | end 31 | end 32 | 33 | 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | % ok=true if any detection window has sufficient overlap at level 36 | % ok=false otherwise 37 | function ok = testoverlap(level, model, pyra, bbox, overlap) 38 | % level pyramid level 39 | % model object model 40 | % pyra feature pyramid 41 | % bbox ground truth bbox 42 | % overlap overlap threshold 43 | 44 | ok = false; 45 | scale = model.sbin/pyra.scales(level); 46 | for r = 1:length(model.rules{model.start}) 47 | detwin = model.rules{model.start}(r).detwindow; 48 | shift = model.rules{model.start}(r).shiftwindow; 49 | o = compute_overlap(bbox, detwin(1), detwin(2), ... 50 | size(pyra.feat{level},1), ... 51 | size(pyra.feat{level},2), ... 52 | scale, pyra.padx+shift(2), ... 53 | pyra.pady+shift(1), pyra.imsize); 54 | 55 | inds = find(o >= overlap); 56 | if ~isempty(inds) 57 | ok = true; 58 | break; 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /libraries/gdetect/write_zero_fv.m: -------------------------------------------------------------------------------- 1 | function write_zero_fv(from_pos, key) 2 | % Write a zero vector to the feature vector cache. 3 | % write_zero_fv(from_pos, key) 4 | % 5 | % Arguments 6 | % from_pos True if the zero vector is to be used as the background output 7 | % feature vector for a foreground example 8 | % False if the zero vector is to be used as the belief feature 9 | % vector for a background example 10 | % key Feature vector cache key (see fv_cache.h and gdetect_write.m) 11 | 12 | if from_pos 13 | % The zero vector is being used as the feature vector associated with the 14 | % background output for a foreground example 15 | loss = 1; 16 | is_mined = 0; 17 | is_belief = 0; 18 | else 19 | % The zero vector is being used as the feature vector associated with the 20 | % belief for a background example 21 | loss = 0; 22 | is_mined = 1; 23 | is_belief = 1; 24 | end 25 | 26 | byte_size = fv_cache('add', int32(key), int32([]), single([]), ... 27 | int32(is_belief), int32(is_mined), loss); 28 | -------------------------------------------------------------------------------- /libraries/vis/HOGpicture.m: -------------------------------------------------------------------------------- 1 | function im = HOGpicture(w, bs) 2 | % Make picture of positive HOG weights. 3 | % im = HOGpicture(w, bs) 4 | 5 | % construct a "glyph" for each orientaion 6 | bim1 = zeros(bs, bs); 7 | bim1(:,round(bs/2):round(bs/2)+1) = 1; 8 | bim = zeros([size(bim1) 9]); 9 | bim(:,:,1) = bim1; 10 | for i = 2:9, 11 | bim(:,:,i) = imrotate(bim1, -(i-1)*20, 'crop'); 12 | end 13 | 14 | % make pictures of positive weights bs adding up weighted glyphs 15 | s = size(w); 16 | w(w < 0) = 0; 17 | im = zeros(bs*s(1), bs*s(2)); 18 | for i = 1:s(1), 19 | iis = (i-1)*bs+1:i*bs; 20 | for j = 1:s(2), 21 | jjs = (j-1)*bs+1:j*bs; 22 | for k = 1:9, 23 | im(iis,jjs) = im(iis,jjs) + bim(:,:,k) * w(i,j,k); 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /libraries/vis/draw_wire_frame.m: -------------------------------------------------------------------------------- 1 | function draw_wire_frame(points,edges) 2 | % Draw the points first 3 | plot3(points(:,1),points(:,2),points(:,3),'rx'); 4 | 5 | hold on; 6 | 7 | for edge_index=1:size(edges,1) 8 | edge_to_draw = edges(edge_index,:); 9 | plot3(points(edge_to_draw,1),... 10 | points(edge_to_draw,2),... 11 | points(edge_to_draw,3),'b'); 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /libraries/vis/foldHOG.m: -------------------------------------------------------------------------------- 1 | function f = foldHOG(w) 2 | % Condense HOG features into one orientation histogram. 3 | % f = foldHOG(w) 4 | % 5 | % Used for displaying features and filters 6 | 7 | % Return the contrast insensitive orientations 8 | f = w(:,:,19:27); 9 | 10 | % f=max(w(:,:,1:9),0)+max(w(:,:,10:18),0)+max(w(:,:,19:27),0); 11 | -------------------------------------------------------------------------------- /libraries/vis/showboxes.m: -------------------------------------------------------------------------------- 1 | function showboxes(im, boxes, out) 2 | % Draw bounding boxes on top of an image. 3 | % showboxes(im, boxes, out) 4 | % 5 | % If out is given, a pdf of the image is generated (requires export_fig). 6 | 7 | if nargin > 2 8 | % different settings for producing pdfs 9 | print = true; 10 | %wwidth = 2.25; 11 | %cwidth = 1.25; 12 | cwidth = 1.4; 13 | wwidth = cwidth + 1.1; 14 | imsz = size(im); 15 | % resize so that the image is 300 pixels per inch 16 | % and 1.2 inches tall 17 | scale = 1.2 / (imsz(1)/300); 18 | im = imresize(im, scale, 'method', 'cubic'); 19 | %f = fspecial('gaussian', [3 3], 0.5); 20 | %im = imfilter(im, f); 21 | boxes = (boxes-1)*scale+1; 22 | else 23 | print = false; 24 | cwidth = 2; 25 | end 26 | 27 | image(im); 28 | if print 29 | truesize(gcf); 30 | end 31 | axis image; 32 | axis off; 33 | set(gcf, 'Color', 'white'); 34 | 35 | if ~isempty(boxes) 36 | numfilters = floor(size(boxes, 2)/4); 37 | if print 38 | % if printing, increase the contrast around the boxes 39 | % by printing a white box under each color box 40 | for i = 1:numfilters 41 | x1 = boxes(:,1+(i-1)*4); 42 | y1 = boxes(:,2+(i-1)*4); 43 | x2 = boxes(:,3+(i-1)*4); 44 | y2 = boxes(:,4+(i-1)*4); 45 | % remove unused filters 46 | del = find(((x1 == 0) .* (x2 == 0) .* (y1 == 0) .* (y2 == 0)) == 1); 47 | x1(del) = []; 48 | x2(del) = []; 49 | y1(del) = []; 50 | y2(del) = []; 51 | if i == 1 52 | w = wwidth; 53 | else 54 | w = wwidth; 55 | end 56 | 57 | % if i == 13+1 || i == 14+1 58 | % c = 'k'; 59 | % w = cwidth + 0.5; 60 | % else 61 | c = 'w'; 62 | % end 63 | 64 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', 'color', c, 'linewidth', w); 65 | end 66 | end 67 | % draw the boxes with the detection window on top (reverse order) 68 | for i = numfilters:-1:1 69 | x1 = boxes(:,1+(i-1)*4); 70 | y1 = boxes(:,2+(i-1)*4); 71 | x2 = boxes(:,3+(i-1)*4); 72 | y2 = boxes(:,4+(i-1)*4); 73 | % remove unused filters 74 | del = find(((x1 == 0) .* (x2 == 0) .* (y1 == 0) .* (y2 == 0)) == 1); 75 | x1(del) = []; 76 | x2(del) = []; 77 | y1(del) = []; 78 | y2(del) = []; 79 | if i == 1 80 | c = 'r'; %[160/255 0 0]; 81 | s = '-'; 82 | % elseif i == 13+1 || i == 14+1 83 | % c = 'c'; 84 | % s = '--'; 85 | else 86 | c = 'b'; 87 | s = '-'; 88 | end 89 | line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', 'color', c, 'linewidth', cwidth, 'linestyle', s); 90 | end 91 | end 92 | 93 | % save to pdf 94 | if print 95 | % requires export_fig from http://www.mathworks.com/matlabcentral/fileexchange/23629-exportfig 96 | export_fig([out]); 97 | end 98 | -------------------------------------------------------------------------------- /libraries/vis/vis_derived_filter.m: -------------------------------------------------------------------------------- 1 | function f = vis_derived_filter(model, tree) 2 | 3 | conf = voc_config(); 4 | 5 | % indexes into info from get_detection_trees.cc 6 | % replace with tree_mat_to_struct 7 | N_PARENT = 1; 8 | N_IS_LEAF = 2; 9 | N_SYMBOL = 3; 10 | N_RULE_INDEX = 4; 11 | N_RHS_INDEX = 5; 12 | N_X = 6; 13 | N_Y = 7; 14 | N_L = 8; 15 | N_DS = 9; 16 | N_DX = 10; 17 | N_DY = 11; 18 | N_SCORE = 12; 19 | N_LOSS = 13; 20 | N_SZ = 14; 21 | 22 | rx = tree(N_X, 1); 23 | ry = tree(N_Y, 1); 24 | rl = tree(N_L, 1); 25 | 26 | f = zeros([0 0 conf.features.dim]); 27 | off_x = 0; 28 | off_y = 0; 29 | 30 | for i = 2:size(tree,2) 31 | s = tree(N_SYMBOL, i); 32 | if model.symbols(s).type == 'T' 33 | x = off_x + tree(N_X, i) - rx; 34 | y = off_y + tree(N_Y, i) - ry; 35 | l = tree(N_L, i) - rl; 36 | 37 | pad = [abs(min(0, [y x])) 0]; 38 | f = padarray(f, pad, 0, 'pre'); 39 | if pad(1) > 0 40 | off_y = off_y + pad(1); 41 | end 42 | if pad(2) > 0 43 | off_x = off_x + pad(2); 44 | end 45 | 46 | w = model_get_block(model, model.filters(model.symbols(s).filter)); 47 | wsz = size(w); 48 | fsz = size(f); 49 | req_fsz = [off_y + y + wsz(1), off_x + x + wsz(2), wsz(3)]; 50 | pad = max(0, req_fsz - fsz); 51 | f = padarray(f, pad, 0, 'post'); 52 | f(off_y+1+y:off_y+1+y+wsz(1)-1, off_x+1+x:off_x+1+x+wsz(2)-1, :) = ... 53 | f(off_y+1+y:off_y+1+y+wsz(1)-1, off_x+1+x:off_x+1+x+wsz(2)-1, :) + w; 54 | end 55 | end 56 | 57 | visualizeHOG(max(0, f)); 58 | -------------------------------------------------------------------------------- /libraries/vis/vis_grammar.m: -------------------------------------------------------------------------------- 1 | function vis_grammar(model) 2 | 3 | % visualize random derivations...forever 4 | while true 5 | f = vis_grammar_rand(model); 6 | visualizeHOG(max(0, f)); 7 | pause; 8 | end 9 | 10 | 11 | function f = vis_grammar_rand(model, s, p, f) 12 | 13 | conf = voc_config(); 14 | 15 | if nargin < 2 16 | s = model.start; 17 | p = [0 0 0]; 18 | f = zeros([0 0 conf.features.dim]); 19 | end 20 | 21 | if model.symbols(s).type == 'T' 22 | w = model_get_block(model, model.filters(model.symbols(s).filter)); 23 | wsz = size(w); 24 | fsz = size(f); 25 | req_fsz = [p(2) + wsz(1), p(1) + wsz(2), wsz(3)]; 26 | pad = max(0, req_fsz - fsz); 27 | f = padarray(f, pad, 0, 'post'); 28 | f(1+p(2):1+p(2)+wsz(1)-1, 1+p(1):1+p(1)+wsz(2)-1, :) = ... 29 | f(1+p(2):1+p(2)+wsz(1)-1, 1+p(1):1+p(1)+wsz(2)-1, :) + w; 30 | else 31 | % sample a rule weighted by production score 32 | len = length(model.rules{s}); 33 | z = zeros(len,1); 34 | for i = 1:len 35 | z(i) = model_get_block(model, model.rules{s}(i).offset); 36 | end 37 | z = exp(z); 38 | Z = sum(z); 39 | if Z ~= 0 40 | r = find(mnrnd(1, z./Z) == 1); 41 | else 42 | r = ceil(rand*length(model.rules{s})); 43 | end 44 | 45 | if model.rules{s}(r).type == 'D' 46 | cs = model.rules{s}(r).rhs(1); 47 | f = vis_grammar_rand(model, cs, p, f); 48 | else 49 | for i = 1:length(model.rules{s}(r).rhs) 50 | cs = model.rules{s}(r).rhs(i); 51 | anchor = model.rules{s}(r).anchor{i}; 52 | f = vis_grammar_rand(model, cs, p + anchor, f); 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /libraries/vis/visualizeHOG.m: -------------------------------------------------------------------------------- 1 | function visualizeHOG(w) 2 | % Visualize HOG features/weights. 3 | % visualizeHOG(w) 4 | 5 | % Make pictures of positive and negative weights 6 | bs = 20; 7 | w = w(:,:,1:9); 8 | scale = max(max(w(:)),max(-w(:))); 9 | pos = HOGpicture(w, bs) * 255/scale; 10 | neg = HOGpicture(-w, bs) * 255/scale; 11 | 12 | % Put pictures together and draw 13 | buff = 10; 14 | pos = padarray(pos, [buff buff], 128, 'both'); 15 | if min(w(:)) < 0 16 | neg = padarray(neg, [buff buff], 128, 'both'); 17 | im = uint8([pos; neg]); 18 | else 19 | im = uint8(pos); 20 | end 21 | imagesc(im); 22 | colormap gray; 23 | axis equal; 24 | axis off; 25 | -------------------------------------------------------------------------------- /libraries/vis/visualizemodel.m: -------------------------------------------------------------------------------- 1 | function visualizemodel(model, components, layers) 2 | % Visualize a mixture of star models. 3 | % visualizemodel(model) 4 | % 5 | % Arguments 6 | % model Model to visualize 7 | % components Which components to draw 8 | 9 | clf; 10 | if nargin < 2 11 | components = 1:length(model.rules{model.start}); 12 | end 13 | 14 | if nargin < 3 15 | layers = 1; 16 | end 17 | 18 | k = 1; 19 | for i = components 20 | for layer = layers 21 | visualizecomponent(model, i, length(layers)*length(components), k, layer); 22 | k = k+1; 23 | end 24 | end 25 | 26 | function visualizecomponent(model, c, nc, k, layer) 27 | 28 | rhs = model.rules{model.start}(c).rhs; 29 | root = -1; 30 | parts = []; 31 | defs = {}; 32 | anchors = {}; 33 | % assume the root filter is first on the rhs of the start rules 34 | if model.symbols(rhs(1)).type == 'T' 35 | % handle case where there's no deformation model for the root 36 | root = model.symbols(rhs(1)).filter; 37 | else 38 | % handle case where there is a deformation model for the root 39 | root = model.symbols(model.rules{rhs(1)}(layer).rhs).filter; 40 | end 41 | for i = 2:length(rhs) 42 | defs{end+1} = model_get_block(model, model.rules{rhs(i)}(layer).def); 43 | anchors{end+1} = model.rules{model.start}(c).anchor{i}; 44 | fi = model.symbols(model.rules{rhs(i)}(layer).rhs).filter; 45 | parts = [parts fi]; 46 | end 47 | % make picture of root filter 48 | pad = 2; 49 | bs = 20; 50 | w = foldHOG(model_get_block(model, model.filters(root))); 51 | scale = max(w(:)); 52 | im = HOGpicture(w, bs); 53 | im = imresize(im, 2); 54 | im = padarray(im, [pad pad], 0); 55 | im = uint8(im * (255/scale)); 56 | 57 | % draw root 58 | numparts = length(parts); 59 | if numparts > 0 60 | subplot(nc,3,1+3*(k-1)); 61 | else 62 | subplot(nc,1,k); 63 | end 64 | imagesc(im) 65 | colormap gray; 66 | axis equal; 67 | axis off; 68 | 69 | % draw parts and deformation model 70 | if numparts > 0 71 | def_im = zeros(size(im)); 72 | def_scale = 500; 73 | for i = 1:numparts 74 | % part filter 75 | w = model_get_block(model, model.filters(parts(i))); 76 | p = HOGpicture(foldHOG(w), bs); 77 | p = padarray(p, [pad pad], 0); 78 | p = uint8(p * (255/scale)); 79 | % border 80 | p(:,1:2*pad) = 128; 81 | p(:,end-2*pad+1:end) = 128; 82 | p(1:2*pad,:) = 128; 83 | p(end-2*pad+1:end,:) = 128; 84 | % paste into root 85 | x1 = (anchors{i}(1))*bs+1; 86 | y1 = (anchors{i}(2))*bs+1; 87 | x2 = x1 + size(p, 2)-1; 88 | y2 = y1 + size(p, 1)-1; 89 | im(y1:y2, x1:x2) = p; 90 | 91 | % deformation model 92 | probex = size(p,2)/2; 93 | probey = size(p,1)/2; 94 | for y = 2*pad+1:size(p,1)-2*pad 95 | for x = 2*pad+1:size(p,2)-2*pad 96 | px = ((probex-x)/bs); 97 | py = ((probey-y)/bs); 98 | v = [px^2; px; py^2; py]; 99 | p(y, x) = defs{i} * v * def_scale; 100 | end 101 | end 102 | def_im(y1:y2, x1:x2) = p; 103 | end 104 | 105 | % plot parts 106 | subplot(nc,3,2+3*(k-1)); 107 | imagesc(im); 108 | colormap gray; 109 | axis equal; 110 | axis off; 111 | 112 | % plot deformation model 113 | subplot(nc,3,3+3*(k-1)); 114 | imagesc(def_im); 115 | colormap gray; 116 | axis equal; 117 | axis off; 118 | end 119 | 120 | set(gcf, 'Color', 'white') 121 | -------------------------------------------------------------------------------- /loadBB.m: -------------------------------------------------------------------------------- 1 | function dt0 = loadBB(outputPath, charIndex) 2 | % Helper funtion to load the bounding box 3 | % TODO: need to put more information 4 | % 5 | % USAGE 6 | % dt0 = loadBB(outputPath,charIndex) 7 | % 8 | % INPUTS 9 | % outputPath - class IDs 10 | % charIndex - the index of the character you want to return 11 | % 12 | % OUTPUTS 13 | % y1 - new class IDs 14 | % ch2 - new (equivalent) character classes 15 | configs=configsgen; 16 | [fs,~] = bbGt('getFiles', {outputPath}); 17 | dt0 = cell(length(fs),1); 18 | parfor i=1:length(fs) 19 | lstruct = load(fs{i}); bbs = lstruct.bbs; 20 | 21 | % equivocate the class 22 | bbs(:,6)=equivClass(bbs(:,6),configs.alphabets); 23 | 24 | bbs = bbs(bbs(:,6)==charIndex,1:5); 25 | 26 | % take the top 100 if there are more than that 27 | if size(bbs,1) > 100 28 | [~,order] = sort(bbs(:,5),'descend'); 29 | bbs = bbs(order(1:100),:); 30 | end 31 | 32 | % run nms 33 | bbs = bbNms(bbs,'type','max','overlap',.5,'separate',1); 34 | 35 | % only take the character of interest 36 | dt0{i} = bbs; 37 | end 38 | end -------------------------------------------------------------------------------- /loadMixtureModels.m: -------------------------------------------------------------------------------- 1 | function models=loadMixtureModels(path) 2 | % this function load the entire mixtures models 3 | % 4 | % INPUTS 5 | % path: the folder where all the models are stored 6 | % 7 | % OUTPUTS 8 | % models - format models{charind}{clusterind} 9 | 10 | configs=configsgen; 11 | models = cell(length(configs.alphabets),configs.nMixtures); 12 | %models = cell(2,configs.nMixtures); 13 | for iChar=1:length(configs.alphabets) 14 | for iMixture=1:configs.nMixtures 15 | toload = fullfile(path,sprintf('%s_%d.mat',configs.alphabets(iChar),iMixture)); 16 | if ~exist(toload,'file'); continue; end 17 | lstruct= load(toload); 18 | models{iChar,iMixture} = lstruct.model; 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /loadModels.m: -------------------------------------------------------------------------------- 1 | function models=loadModels(path) 2 | % this function loa the trained models given the path 3 | % 4 | % INPUTS 5 | % path: the folder where all the models are stored 6 | % 7 | % OUTPUTS 8 | % models - format models{charind}{clusterind} 9 | 10 | configs=configsgen; 11 | models = cell(64,1); 12 | count = 1; 13 | for iChar=1:length(configs.alphabets) 14 | toload = fullfile(path,sprintf('%s.mat',configs.alphabets(iChar))); 15 | if ~exist(toload,'file'); continue; end 16 | lstruct= load(toload); 17 | models{count} = lstruct.model; 18 | count = count + 1; 19 | end 20 | models = models(1:count-1); 21 | end 22 | -------------------------------------------------------------------------------- /misc/checkValidGt.m: -------------------------------------------------------------------------------- 1 | function valid=checkValidGt(str) 2 | % Check if ground truth string is greater than three characters and doesn't 3 | % contain non alphanumeric symbols. 4 | % 5 | % CREDITS 6 | % Written and maintained by Kai Wang and Boris Babenko 7 | % Copyright notice: license.txt 8 | % Changelog: changelog.txt 9 | % Please email kaw006@cs.ucsd.edu if you have questions. 10 | 11 | [~,~,ch1]=globals; 12 | valid=1; 13 | if(length(str)<3), valid=0; return; end 14 | for j=1:length(str), 15 | if(size(find(ch1==upper(str(j))),2)==0), valid=0; return; end; 16 | end 17 | end -------------------------------------------------------------------------------- /misc/equivClass.m: -------------------------------------------------------------------------------- 1 | function [y1,ch2]=equivClass(y,ch) 2 | % Helper function to specify equivalence of upper and lower case characters 3 | % 4 | % USAGE 5 | % [y1,ch2] = equivClass( y, ch ) 6 | % 7 | % INPUTS 8 | % y - class IDs 9 | % ch - string of all character classes 10 | % 11 | % OUTPUTS 12 | % y1 - new class IDs 13 | % ch2 - new (equivalent) character classes 14 | % 15 | % CREDITS 16 | % Written and maintained by Kai Wang and Boris Babenko 17 | % Copyright notice: license.txt 18 | % Changelog: changelog.txt 19 | % Please email kaw006@cs.ucsd.edu if you have questions. 20 | 21 | ch1=upper(ch); ch2=unique(ch1); y1=y; 22 | for k=1:length(ch2) 23 | y1(ch1(y)==ch2(k))=k; 24 | end 25 | end -------------------------------------------------------------------------------- /misc/filterValidGt.m: -------------------------------------------------------------------------------- 1 | function [gtOut,inds]=filterValidGt(gt) 2 | % For simplicity, filter out words that are fewer than three 3 | % characters and 4 | % 5 | % CREDITS 6 | % Written and maintained by Kai Wang and Boris Babenko 7 | % Copyright notice: license.txt 8 | % Changelog: changelog.txt 9 | % Please email kaw006@cs.ucsd.edu if you have questions. 10 | 11 | inds=false(1,length(gt)); 12 | for i=1:length(gt), inds(i)=checkValidGt(gt(i).lbl)>0; end 13 | gtOut=gt(inds); 14 | end -------------------------------------------------------------------------------- /misc/findRanks.m: -------------------------------------------------------------------------------- 1 | function m=findRanks(y,yh) 2 | % Return the rank of the correct result in the output 3 | % 4 | % USAGE 5 | % [m] = findRanks( y, yh ) 6 | % 7 | % INPUTS 8 | % y - vector of class IDs 9 | % yh - matrix of results 10 | % 11 | % OUTPUTS 12 | % m - rank of each output 13 | % 14 | % CREDITS 15 | % Written and maintained by Kai Wang and Boris Babenko 16 | % Copyright notice: license.txt 17 | % Changelog: changelog.txt 18 | % Please email kaw006@cs.ucsd.edu if you have questions. 19 | 20 | m=zeros(length(y),1); 21 | for k=1:length(y) 22 | t=find(y(k)==yh(k,:),1); if(isempty(t)), t=inf; end 23 | m(k)=t; 24 | end 25 | -------------------------------------------------------------------------------- /misc/hogOld.m: -------------------------------------------------------------------------------- 1 | function H = hogOld( I, binSize, nOrients) 2 | % A wrapper for Piotr Dollar's HOG that clips the cells at the edges, as 3 | % was done by his toolbox prior to version 3. 4 | % 5 | % CREDITS 6 | % Written and maintained by Kai Wang and Boris Babenko 7 | % Copyright notice: license.txt 8 | % Changelog: changelog.txt 9 | % Please email kaw006@cs.ucsd.edu if you have questions. 10 | 11 | if( nargin<2 ), binSize=8; end 12 | if( nargin<3 ), nOrients=9; end 13 | H = hog(I, binSize, nOrients); 14 | H = H(2:end-1, 2:end-1, :); -------------------------------------------------------------------------------- /misc/readAllImgs.m: -------------------------------------------------------------------------------- 1 | function [I,yC]=readAllImgs(d,lbls,maxn,bgd,maxbg) 2 | % Helper function to read all images from a directory into a matrix 3 | % 4 | % USAGE 5 | % [I,yC] = readAllImgs( d, lbls, maxn, bgd, maxbg ) 6 | % 7 | % INPUTS 8 | % d - base directory 9 | % lbls - cell array of class labels 10 | % maxn - max number of images per class to be read 11 | % bgd - background class directory 12 | % maxbg - max number of images for background class to be read 13 | % 14 | % OUTPUTS 15 | % I - array of read images 16 | % yC - labels for images 17 | % 18 | % CREDITS 19 | % Written and maintained by Kai Wang and Boris Babenko 20 | % Copyright notice: license.txt 21 | % Changelog: changelog.txt 22 | % Please email kaw006@cs.ucsd.edu if you have questions. 23 | 24 | if(nargin<3), maxn=inf; end 25 | if(nargin<4), bgd=[]; end 26 | if(nargin<5), maxbg=maxn; end 27 | I=[]; yC=zeros(1e5,1); k0=1; 28 | ticId=ticStatus('reading images',1,30,1); 29 | for k=1:length(lbls), lbl=lbls{k}; 30 | if(upper(lbl)~=lbl), lbl=['-',lbl]; end %#ok 31 | if(lbl=='_'), lbl='charBg'; dd=fullfile(bgd,lbl); maxi=maxbg; 32 | else dd=fullfile(d,lbl); maxi=maxn; end 33 | if(~exist(dd,'dir') || size(dir(fullfile(dd,'*.png')),1) == 0) 34 | fprintf(1,'Warning: directory %s empty/non-existant',dd); continue; 35 | end 36 | I1=imwrite2([],1,0,dd); n1=size(I1,4); 37 | 38 | if(n1>maxi) 39 | rids=randSample(n1,maxi); n1=length(rids); I1=I1(:,:,:,rids); 40 | end 41 | if(k==1), I=zeros([size(I1,1),size(I1,2),3,5e4],'uint8'); end 42 | I(:,:,:,k0:k0+n1-1)=I1; yC(k0:k0+n1-1)=k; 43 | k0=k0+n1; tocStatus(ticId,k/length(lbls)); 44 | end 45 | I=I(:,:,:,1:k0-1); yC=yC(1:k0-1); 46 | fprintf('\n'); 47 | end -------------------------------------------------------------------------------- /misc/readSwt.m: -------------------------------------------------------------------------------- 1 | function bbsByFile=readSwt(fpath) 2 | % Process output files created by Stroke Width Transform 3 | % 4 | % USAGE 5 | % bbsByFile = readSwt( fpath ) 6 | % 7 | % INPUTS 8 | % fpath - path of SWT output file 9 | % 10 | % OUTPUTS 11 | % bbsByFile - bounding boxes grouped by image file 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | bbsByFile=[]; state=0; fname=''; bbs=[]; 20 | fid = fopen(fpath); 21 | tline = fgets(fid); 22 | while ischar(tline) 23 | tline1=strtrim(tline); tline=fgets(fid); 24 | if(isempty(tline1) && ~isempty(fname)), bbsByFile{end+1,1}=fname; 25 | bbsByFile{end,2}=bbs; state=0; fname=''; bbs=[]; continue; end 26 | if(isempty(tline1)), state=0; continue; end 27 | if(state==0), fname=tline1; state=1; continue; end 28 | if(state==1), 29 | spinds=find(tline1==' '); 30 | xval=str2double(tline1(spinds(1)+1:spinds(2)-1)); 31 | yval=str2double(tline1(spinds(2)+1:spinds(3)-1)); 32 | wval=str2double(tline1(spinds(3)+1:spinds(4)-1)); 33 | hval=str2double(tline1(spinds(4)+1:end)); 34 | bbs(end+1,:)=[xval, yval, wval, hval]; 35 | end 36 | end 37 | bbsByFile{end+1,1}=fname; bbsByFile{end,2}=bbs; 38 | fclose(fid); 39 | 40 | 41 | end -------------------------------------------------------------------------------- /misc/spellCheck.m: -------------------------------------------------------------------------------- 1 | function dtOut=spellCheck(dt,gtStrs) 2 | % Correct OCR output to closest ground truth string 3 | % 4 | % USAGE 5 | % dtOut = spellCheck( dt, gtStrs ) 6 | % 7 | % INPUTS 8 | % dt - detected word objects 9 | % gtStrs - possible ground truth strings 10 | % 11 | % OUTPUTS 12 | % dtOut - detected word objects after spell check 13 | % 14 | % CREDITS 15 | % Written and maintained by Kai Wang and Boris Babenko 16 | % Copyright notice: license.txt 17 | % Changelog: changelog.txt 18 | % Please email kaw006@cs.ucsd.edu if you have questions. 19 | 20 | dtOut=[]; if(isempty(gtStrs)), return; end 21 | for i=1:length(dt) 22 | dtStr=dt(i).word; 23 | if(isempty(dtStr)), continue; end 24 | dvec=[]; 25 | for j=1:length(gtStrs) 26 | dvec(j)=EditDist(upper(dtStr),upper(gtStrs{j})); 27 | end 28 | [val,ind]=min(dvec); 29 | dtOut(end+1).word=upper(gtStrs{ind}); 30 | dtOut(end).bb=dt(i).bb; 31 | dtOut(end).bb(5)=val; 32 | end 33 | 34 | end -------------------------------------------------------------------------------- /misc/writeAllImgs.m: -------------------------------------------------------------------------------- 1 | function writeAllImgs(I,y,lbls,d,ctr) 2 | % Helper function to write all images in a matrix by class label 3 | % 4 | % USAGE 5 | % writeAllImgs( I, y, lbls, d, ctr ) 6 | % 7 | % INPUTS 8 | % I - image matrix 9 | % y - class labels for matrix 10 | % lbls - mapping from class id number to label 11 | % d - base directory 12 | % ctr - [0] write index offset 13 | % 14 | % CREDITS 15 | % Written and maintained by Kai Wang and Boris Babenko 16 | % Copyright notice: license.txt 17 | % Changelog: changelog.txt 18 | % Please email kaw006@cs.ucsd.edu if you have questions. 19 | 20 | if nargin<5, ctr=0; end 21 | ticId=ticStatus('writing images',1,30,1); 22 | for k=1:length(lbls) 23 | lbl=lbls{k}; if(lbl>=97&&lbl<=122), lbl=['-' lbl]; end %#ok 24 | if(lbl=='_'), continue; end 25 | dd=fullfile(d,lbl); if(~exist(dd,'dir')), mkdir(dd); end 26 | I1=I(:,:,:,y==k); 27 | if(size(I1,4)>1), imwrite2(uint8(I1),1,ctr,dd); 28 | elseif(size(I1,4)==1), imwrite2(uint8(I1),0,ctr,dd); 29 | end 30 | tocStatus(ticId,k/length(lbls)); 31 | end 32 | end -------------------------------------------------------------------------------- /mixture_models/A_1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/mixture_models/A_1.mat -------------------------------------------------------------------------------- /models_real_nomixture.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/models_real_nomixture.mat -------------------------------------------------------------------------------- /plex_icdar_video.m: -------------------------------------------------------------------------------- 1 | function plex_icdar_video(indeces) 2 | % this script is to run kai system out of the box on the icdar video system 3 | configs=configsgen; 4 | vidpath = fullfile(configs.icdar_video,'test','videos','mp4'); 5 | resFolder = fullfile(configs.icdar_video,'test','res'); 6 | lexicon_path = fullfile(configs.icdar_video,'test','lex'); 7 | vidps = dir(fullfile(vidpath,'*.mp4')); 8 | saveRes=@(f,words,bbs)save(f,'words','bbs'); 9 | 10 | % Loading the character models 11 | % Ferns + synthesis 12 | clfPath=fullfile('data','fern_synth.mat'); fModel=load(clfPath); 13 | svmPath=fullfile('data','svm_svt.mat'); model=load(svmPath); 14 | wordSvm=model.pNms1; wordSvm.thr=-1; 15 | 16 | if ~exist('indeces','var'); indeces=1:length(vidps); end; 17 | for vidindex = indeces 18 | vpath = fullfile(vidpath,vidps(vidindex).name); 19 | fprintf('Working on %s\n',vpath); 20 | 21 | % Get the video 22 | done = false; 23 | while ~done 24 | vidobject = VideoReader(vpath); 25 | if vidobject.NumberOfFrames > 0; done = true; end 26 | end 27 | 28 | % Parse the name 29 | [~,name,~] = fileparts(vidps(vidindex).name); 30 | [~,name,~] = fileparts(name); 31 | 32 | %create the folder 33 | if exist(fullfile(resFolder,name),'dir') == 0 34 | mkdir(fullfile(resFolder,name)); 35 | end 36 | 37 | % read in the lexicons 38 | lexpath = fullfile(lexicon_path,[name '.xml.lex']); 39 | fid=fopen(lexpath,'r'); lexS=textscan(fid,'%s'); lexS=lexS{1}'; fclose(fid); 40 | allframes = read(vidobject); 41 | nFrame = size(allframes,4); 42 | nDone = length(dir(fullfile(resFolder,name,'*,mat'))); 43 | if nFrame == nDone; continue; end 44 | 45 | clear allframes; 46 | parfor iFrame = 1:nFrame 47 | fprintf('%s: frame %d...',name,iFrame); 48 | sf = fullfile(resFolder,name,sprintf('%d.mat',iFrame)); 49 | if exist(sf,'file') > 0; fprintf('Skipped\n'); continue; end; 50 | 51 | try 52 | I = read(vidobject,iFrame); 53 | tic; [words,~,~,bbs]=wordSpot(I,lexS,fModel,wordSvm,[],{'minH',.04}); 54 | toc; 55 | saveRes(sf,words,bbs); 56 | catch e 57 | e 58 | end 59 | 60 | fprintf('Done\n'); 61 | end 62 | end 63 | end -------------------------------------------------------------------------------- /prep_script/prepC74k.m: -------------------------------------------------------------------------------- 1 | function prepC74k 2 | % Process the raw files downloaded from C74k into a common format 3 | % Download site, 4 | % http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/ 5 | % 6 | % Move the English and Lists folder here, 7 | % [dPath]/c74k/raw/ 8 | % After moving, the folder should look like, 9 | % [dPath]/c74k/raw/Kannada/. 10 | % [dPath]/c74k/raw/Lists/. 11 | % 12 | 13 | configs=globals; 14 | c74k_path = fullfile(configs.data_base, 'c74k','raw'); 15 | 16 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 17 | % process the IMG data 18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 19 | img_list = load(fullfile(c74k_path, 'Lists','English','Img','lists.mat')); 20 | list = img_list.list; 21 | sz=100; padding=.5; 22 | I=zeros(sz,sz,3,sum(list.is_good)); 23 | Ipd=zeros(sz,sz,3,sum(list.is_good)); 24 | img_base = fullfile(c74k_path,'English','Img'); 25 | img_paths = list.ALLnames(list.is_good,:); 26 | img_labels = list.ALLlabels(list.is_good,:); 27 | for i = 1:size(img_paths,1) 28 | I1 = imread(fullfile(img_base,[list.ALLnames(i,:),'.png'])); 29 | 30 | % The commented out code instead 'squarifies' bounding in different 31 | % ways. This might be worth trying as an alternative to stretching. 32 | bb=[1 1 size(I1,2) size(I1,1)]; 33 | bb=bbApply('squarify',bb,3,1); 34 | P=bbApply('crop',I1,bb,'symmetric',[sz sz]); P=P{1}; 35 | %P=imResample(I1,[sz,sz]); 36 | if(size(P,3)==1), P=cat(3,P,P,P); end 37 | I(:,:,:,i)=P; 38 | end 39 | 40 | % crappy way of converting number array to cell array of strings 41 | all_classes = cell(max(img_labels),1); 42 | for i = 1:max(img_labels), all_classes{i} = num2str(i); end 43 | 44 | writeAllImgs(I,img_labels,all_classes,... 45 | fullfile(configs.data_base,'c74k','English','img','char')); -------------------------------------------------------------------------------- /prep_script/prepMsrc.m: -------------------------------------------------------------------------------- 1 | function prepMsrc 2 | % Process the raw files downloaded from MSRC into a common format 3 | % Download site, 4 | % http://research.microsoft.com/en-us/downloads/b94de342-60dc-45d0-830b-9f6eff91b301/default.aspx 5 | % 6 | % Move the scenes,buildings, and miscellaneous folders here, 7 | % [dPath]/msrc/raw/ 8 | % After moving, the folder should look like, 9 | % [dPath]/msrc/raw/scenes/. 10 | % [dPath]/msrc/raw/scenes/countryside/. 11 | % [dPath]/msrc/raw/scenes/office/. 12 | % [dPath]/msrc/raw/scenes/urban/. 13 | % [dPath]/msrc/raw/buildings 14 | % [dPath]/msrc/raw/miscellaneous 15 | % 16 | % CREDITS 17 | % Written and maintained by Kai Wang and Boris Babenko 18 | % Copyright notice: license.txt 19 | % Changelog: changelog.txt 20 | % Please email kaw006@cs.ucsd.edu if you have questions. 21 | 22 | dPath=globals; 23 | % common character dimensions and # background samples 24 | sz=100; nBg=5000; 25 | RandStream.getDefaultStream.reset(); 26 | 27 | % --This block needs to be run before the crop functions can be called 28 | subdirs={fullfile('scenes','countryside'),... 29 | fullfile('scenes','office'),... 30 | fullfile('scenes','urban'),... 31 | 'buildings','miscellaneous'}; 32 | repackage(dPath,fullfile('msrc','raw'),fullfile('msrc','train'),... 33 | fullfile('msrc','test'),subdirs); 34 | 35 | cropChars('train',sz,nBg); 36 | cropChars('test',sz,nBg); 37 | 38 | end 39 | 40 | % This function needs to be called with easy = {0,1} to produce all the 41 | function cropChars(d,sz,nBg) 42 | [dPath]=globals; d1=fullfile(dPath,'msrc',d); 43 | files=dir(fullfile(d1,'images','*.jpg')); n=length(files); 44 | B=zeros([sz,sz,3,nBg],'uint8'); b0=1; nBg1=ceil(nBg/n); 45 | for k=1:n 46 | I1=imread(fullfile(d1,'images',files(k).name)); 47 | %bbBg=bbApply('random',size(I1,2),size(I1,1),... 48 | % [50 size(I1,2)],[50 size(I1,1)],nBg1*5); 49 | bbBg=bbApply('random','dims',[size(I1,1),size(I1,2)],... 50 | 'wRng',[50 size(I1,2)],'hRng',[50 size(I1,1)],'n',nBg1*5); 51 | bbBg=bbApply('squarify',bbBg,1); 52 | B1 = bbGt( 'sampleWins', I1,... 53 | {'bbs',bbBg,'dims',[sz sz],'thr',.1} ); 54 | B1=B1(1:min(nBg1,length(B1))); if(isempty(B1)), continue; end 55 | B(:,:,:,b0:b0+length(B1)-1)=cell2array(B1); b0=b0+length(B1); 56 | end 57 | B=B(:,:,:,1:b0-1); 58 | bgD=fullfile(dPath,'msrc',d,'charBg'); 59 | if(~exist(bgD,'dir')), mkdir(bgD); end; imwrite2(B,1,0,bgD); 60 | end 61 | 62 | % 1. Move MSRC images into train and test folder 63 | % Place every other image into train/test folder 64 | function repackage(basedir, datarel, outtrainrel, outtestrel, subdirs) 65 | 66 | [dPath]=globals; 67 | dtrain=fullfile(basedir,outtrainrel,'images'); 68 | dtest=fullfile(basedir,outtestrel,'images'); 69 | if(~exist(dtrain,'dir')), mkdir(dtrain); end 70 | if(~exist(dtest,'dir')), mkdir(dtest); end 71 | ctr=0; 72 | for i=1:length(subdirs) 73 | sd=subdirs{i}; 74 | d1=fullfile(dPath,datarel,sd) 75 | files=dir(fullfile(d1,'*.JPG')); n=length(files); 76 | for k=1:2:n-1 77 | I1=imread(fullfile(d1,files(k).name)); 78 | I2=imread(fullfile(d1,files(k+1).name)); 79 | 80 | newimgbase = sprintf('I%05d', ctr); 81 | 82 | imgdest = fullfile(outtrainrel,'images',[newimgbase, '.jpg']); 83 | imwrite(I1, fullfile(basedir, imgdest), 'jpg'); 84 | 85 | imgdest = fullfile(outtestrel,'images',[newimgbase, '.jpg']); 86 | imwrite(I2, fullfile(basedir, imgdest), 'jpg'); 87 | 88 | ctr=ctr+1; 89 | end 90 | end 91 | 92 | 93 | end -------------------------------------------------------------------------------- /prep_script/prepSvt.m: -------------------------------------------------------------------------------- 1 | function prepSvt 2 | % Process the raw files downloaded from Street View Text into a common 3 | % format. Download site, 4 | % http://vision.ucsd.edu/~kai/svt 5 | % 6 | % Move the img folder and xml files here, 7 | % [dPath]/svt/raw/ 8 | % After moving, the folder should look like, 9 | % [dPath]/svt/raw/img/. 10 | % [dPath]/svt/raw/img/test.xml 11 | % [dPath]/svt/raw/img/train.xml 12 | % 13 | % CREDITS 14 | % Written and maintained by Kai Wang and Boris Babenko 15 | % Copyright notice: license.txt 16 | % Changelog: changelog.txt 17 | % Please email kaw006@cs.ucsd.edu if you have questions. 18 | 19 | dPath=globals; 20 | RandStream.getDefaultStream.reset(); 21 | 22 | datarel=fullfile('svt','raw'); 23 | repackage(dPath, datarel, 'train.xml', fullfile('svt','train')); 24 | repackage(dPath, datarel, 'test.xml', fullfile('svt','test')); 25 | 26 | cropWords('train',0); 27 | cropWords('train',1); 28 | cropWords('test',0); 29 | cropWords('test',1); 30 | 31 | end 32 | 33 | function cropWords(d,usePad) 34 | dPath=globals; d1=fullfile(dPath,'svt',d); 35 | 36 | if usePad 37 | wdir='wordsPad'; adir='wordCharAnnPad'; ldir='wordLexPad'; percpad=.2; 38 | else 39 | wdir='words'; adir='wordCharAnn'; ldir='wordLex'; percpad=0; 40 | end 41 | 42 | d2=fullfile(d1,wdir); if(~exist(d2,'dir')), mkdir(d2); end 43 | d2=fullfile(d1,adir); if(~exist(d2,'dir')), mkdir(d2); end 44 | d2=fullfile(d1,ldir); if(~exist(d2,'dir')), mkdir(d2); end 45 | 46 | files=dir(fullfile(d1,'images','*.jpg')); n=length(files); 47 | wctr = 0; 48 | for k=1:n 49 | fprintf('%d out of %d\n',k,n); 50 | I=imread(fullfile(d1,'images',files(k).name)); 51 | wobjs=bbGt('bbLoad',fullfile(d1,'wordAnn',[files(k).name,'.txt'])); 52 | fid=fopen(fullfile(d1,'lex',[files(k).name,'.txt'])); 53 | lexS=textscan(fid,'%s\n'); lexS=lexS{1}'; 54 | fclose(fid); 55 | 56 | % loop through wobjs 57 | for j=1:length(wobjs) 58 | wobj=wobjs(j); wbb=bbGt('get',wobj,'bb'); 59 | 60 | % crop and save word image 61 | xpad=wbb(3)*percpad; 62 | ypad=wbb(4)*percpad; 63 | pwbb=[wbb(1)-xpad,wbb(2)-ypad,wbb(3)+2*xpad,wbb(4)+2*ypad]; 64 | Iw=bbApply('crop',I,pwbb,'replicate'); 65 | newimgbase=sprintf('I%05d',wctr); wctr=wctr+1; 66 | imwrite(Iw{1},fullfile(d1,wdir,[newimgbase,'.jpg']),'jpg'); 67 | 68 | savecobjs=bbGt('create',length(wobj.lbl)); 69 | savecobjs=bbGt('set',savecobjs,'lbl',... 70 | mat2cell(wobj.lbl',ones(length(wobj.lbl),1))'); 71 | labdest=fullfile(d1,adir,[newimgbase,'.jpg.txt']); 72 | bbGt('bbSave',savecobjs,labdest); 73 | lexdest=fullfile(d1,ldir,[newimgbase,'.jpg.txt']); 74 | %fid=fopen(lexdest,'w'); fprintf(fid,'%s',lex{1}); fclose(fid); 75 | 76 | % save lexicon 77 | fid=fopen(lexdest,'w'); 78 | for jj=1:length(lexS), fprintf(fid,'%s\n',lexS{jj}); end 79 | fclose(fid); 80 | end 81 | end 82 | 83 | end 84 | 85 | 86 | % 1. Move all the images into a single folder 87 | % 2. Create a BB file for word labels output into single folder 88 | % 3. Create a BB file for char labels output into single folder 89 | function repackage(basedir, datarel, labfile, outrel) 90 | d2=fullfile(basedir,outrel,'images'); if(~exist(d2,'dir')), mkdir(d2); end 91 | d2=fullfile(basedir,outrel,'wordAnn'); if(~exist(d2,'dir')), mkdir(d2); end 92 | d2=fullfile(basedir,outrel,'lex'); if(~exist(d2,'dir')), mkdir(d2); end 93 | 94 | tree=xmlread(fullfile(basedir, datarel, labfile)); 95 | img_elms=tree.getElementsByTagName('image'); 96 | 97 | for i = 0:img_elms.getLength-1 98 | img_item=img_elms.item(i); 99 | img_path=char(img_item.getElementsByTagName('imageName').item(0).getFirstChild.getData); 100 | img_lex=char(img_item.getElementsByTagName('lex').item(0).getFirstChild.getData); 101 | wbb_list=img_item.getElementsByTagName('taggedRectangles').item(0).getElementsByTagName('taggedRectangle'); 102 | 103 | wlbls=[]; wbbs=[]; 104 | 105 | for j=0:wbb_list.getLength-1 106 | wbb=wbb_list.item(j); 107 | tag=char(wbb.getElementsByTagName('tag').item(0).getFirstChild.getData); 108 | 109 | wordx=str2double(wbb.getAttribute('x')); 110 | wordy=str2double(wbb.getAttribute('y')); 111 | wordw=str2double(wbb.getAttribute('width')); 112 | wordh=str2double(wbb.getAttribute('height')); 113 | wlbls{j+1}=tag; wbbs(j+1,:)=[wordx, wordy, wordw, wordh]; 114 | end 115 | 116 | I=imread(fullfile(basedir,datarel,img_path)); 117 | newimgbase = sprintf('I%05d',i); 118 | 119 | % save image to new location 120 | imgdest=fullfile(outrel,'images',[newimgbase,'.jpg']); 121 | imwrite(I,fullfile(basedir,imgdest),'jpg'); 122 | 123 | % save word bbs 124 | wobjs=bbGt('create',wbb_list.getLength); 125 | wobjs=bbGt('set',wobjs,'lbl',wlbls); 126 | wobjs=bbGt('set',wobjs,'bb',wbbs); 127 | 128 | labdest=fullfile(outrel,'wordAnn',[newimgbase,'.jpg.txt']); 129 | bbGt('bbSave',wobjs,fullfile(basedir,labdest)); 130 | 131 | % save lexicon 132 | lexdest=fullfile(basedir,outrel,'lex',[newimgbase,'.jpg.txt']); 133 | lexS=textscan(img_lex,'%s','Delimiter',','); lexS=lexS{1}; 134 | fid=fopen(lexdest,'w'); 135 | for j=1:length(lexS), fprintf(fid,'%s\n',lexS{j}); end 136 | fclose(fid); 137 | end 138 | 139 | end 140 | -------------------------------------------------------------------------------- /prep_script/prepSynthEasy.m: -------------------------------------------------------------------------------- 1 | function prepSynthEasy(type) 2 | % Generate synthetic training data for character classifiers 3 | % 4 | % USAGE 5 | % prepSynthEasy( type ) 6 | % 7 | % INPUTS 8 | % type - ['train'] should be either 'test' or 'train' 9 | % 10 | % CREDITS 11 | % Written and maintained by Kai Wang and Boris Babenko 12 | % Copyright notice: license.txt 13 | % Changelog: changelog.txt 14 | % Please email kaw006@cs.ucsd.edu if you have questions. 15 | 16 | if(nargin==0), 17 | type = 'train'; 18 | else 19 | if(~(strcmp(type,'train') || strcmp(type,'test'))), 20 | error('Must be either train or test'); 21 | end 22 | end 23 | 24 | [dPath,ch,ch1,chC]=globals; 25 | RandStream.setDefaultStream(RandStream('mrg32k3a', 'Seed', sum(type))); 26 | % render 500 instances per class at size 100 pixels 27 | n=500; sz=100; 28 | for k=1:length(ch) 29 | I=zeros(sz,sz,3,n,'uint8'); k0=1; 30 | for i=1:n % n synthetic examples per character 31 | if strcmp(type,'train') 32 | I(:,:,:,k0)=genChar(ch(k),sz, ' ', [1 1]); k0=k0+1; 33 | else 34 | I(:,:,:,k0)=genChar(ch(k),sz, ' ', [500 1]); k0=k0+1; 35 | end 36 | end 37 | y=k*ones(1,n); y=y(:); 38 | writeAllImgs(I,y,chC,fullfile(dPath,'synth_easy',type,'char')); 39 | clear I; 40 | end 41 | end -------------------------------------------------------------------------------- /prep_script/prepSynthHard.m: -------------------------------------------------------------------------------- 1 | function prepSynthHard(type) 2 | % Generate synthetic training data for character classifiers 3 | % 4 | % USAGE 5 | % prepSynthHard( type ) 6 | % 7 | % INPUTS 8 | % type - ['train'] should be either 'test' or 'train' 9 | % 10 | % CREDITS 11 | % Written and maintained by Kai Wang and Boris Babenko 12 | % Copyright notice: license.txt 13 | % Changelog: changelog.txt 14 | % Please email kaw006@cs.ucsd.edu if you have questions. 15 | 16 | if(nargin==0), 17 | type = 'train'; 18 | else 19 | if(~(strcmp(type,'train') || strcmp(type,'test'))), 20 | error('Must be either train or test'); 21 | end 22 | end 23 | 24 | [dPath,ch,ch1,chC]=globals; 25 | RandStream.setDefaultStream(RandStream('mrg32k3a', 'Seed', sum(type))); 26 | % render 500 instances per class at size 100 pixels 27 | n=500; sz=100; 28 | for k=1:length(ch) 29 | if(ch(k)=='_'), continue; end 30 | I=zeros(sz,sz,3,n,'uint8'); k0=1; 31 | fInfo=zeros(n,3); 32 | for i=1:n % n synthetic examples per character 33 | if strcmp(type,'train') 34 | [I(:,:,:,k0),fInfo(k0,1),fInfo(k0,2),fInfo(k0,3)]=genChar(ch(k),sz, [ch, ' '], [1 500]); k0=k0+1; 35 | else 36 | [I(:,:,:,k0),fInfo(k0,1),fInfo(k0,2),fInfo(k0,3)]=genChar(ch(k),sz, [ch, ' '], [500 500]); k0=k0+1; 37 | end 38 | end 39 | y=k*ones(1,n); y=y(:); 40 | writeAllImgs(I,y,chC,fullfile(dPath,'synth_w_fonts',type,'charHard')); 41 | lbl=ch(k); 42 | if(lbl>=97&&lbl<=122), lbl=['-' lbl]; end 43 | dlmwrite(fullfile(dPath,'synth_w_fonts',type,'charHard',lbl,'fids.dat'),fInfo); 44 | clear I; 45 | end 46 | end -------------------------------------------------------------------------------- /readVideoFromFrames.m: -------------------------------------------------------------------------------- 1 | function video = readVideoFromFrames(video_folder_path) 2 | % This function returns the videos read in from a frame folder 3 | frm_paths = dir(fullfile(video_folder_path,'*.jpg')); 4 | nFrame = length(frm_paths); 5 | 6 | % Read the first frame to initiate the structure 7 | I = imread(fullfile(video_folder_path,frm_paths(1).name)); 8 | [rows,cols,~] = size(I); 9 | video = zeros(rows,cols,3,nFrame); 10 | for frame_index = 1:length(frm_paths) 11 | video(:,:,:,frame_index) = uint8(imread(fullfile(video_folder_path,... 12 | frm_paths(frame_index).name))); 13 | end 14 | end -------------------------------------------------------------------------------- /results/fscores.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/results/fscores.mat -------------------------------------------------------------------------------- /runExp.m: -------------------------------------------------------------------------------- 1 | %% This scripts run all the top level experiments 2 | plex_icdar_video(); 3 | 4 | %% load gts 5 | configs=configsgen; 6 | gtpath = fullfile(configs.icdar_video,'test','gt'); 7 | blpath = fullfile(configs.icdar_video,'test','baseline'); 8 | tspath = fullfile(configs.icdar_video,'test','textspotter'); 9 | xmls = dir(fullfile(gtpath,'*.xml')); 10 | gts = cell(length(xmls),1); 11 | bls = cell(length(xmls),1); 12 | for i=1:length(xmls) 13 | xmlpath = fullfile(gtpath,xmls(i).name); 14 | gts{i} = loadGT(xmlpath); 15 | bls{i} = loadDT(blpath); 16 | end 17 | 18 | %% Get ATA icdar_video baseline 19 | xmls = filf 20 | %% Get ATA icdar_video textspotter 21 | -------------------------------------------------------------------------------- /run_chardetSVM_exp.m: -------------------------------------------------------------------------------- 1 | % This script runs all the experiments for the chardet_exp 2 | [dPath,ch]=globals; 3 | clfPath=fullfile('data','models_real_nomixture.mat'); 4 | fmodel=load(clfPath); 5 | models = fmodel.models; 6 | test_dataset = fullfile(dPath,'icdar','test','images'); 7 | output_path = fullfile(dPath,'icdar','test','det_results_real'); 8 | 9 | %% Actually running the SVM 10 | image_paths = dir(fullfile(test_dataset,'*.jpg')); 11 | nImg = length(image_paths); 12 | %ticId=ticStatus('Running PLEX on full images',1,30,1); 13 | saveRes=@(f,bbs)save(f,'bbs'); 14 | parfor i=1:nImg 15 | bbs = []; 16 | current_image = image_paths(i).name; 17 | fprintf('Working on index: %d, image: %s\n',i,current_image); 18 | sF = fullfile(output_path,[current_image '.mat']); 19 | if exist(sF,'file') > 0 20 | fprintf('%s already exists. Skipped\n',sF); 21 | continue 22 | end 23 | I = imread(fullfile(test_dataset,current_image)); 24 | try 25 | bbs=charDetSVM(I,models,{}); 26 | catch e 27 | fprintf('Error at index %d\n',i); 28 | continue 29 | end 30 | 31 | % save the bbs 32 | saveRes(sF,bbs); 33 | %tocStatus(ticId,i/nImg); 34 | end 35 | 36 | 37 | %% Calculating the F-score for the characters 38 | gtDir = fullfile(dPath,'icdar','test','charAnn'); 39 | fscores = zeros(length(ch),1); 40 | ticId=ticStatus('Collecting Fscore',1,30,1); 41 | for char_index = 11 42 | [gt0,~] = bbGt('loadAll',gtDir,[],{'lbls',ch(char_index)}); 43 | dt0 = loadBB(output_path,char_index); 44 | current_char = ch(char_index); 45 | 46 | % filter out the groundtruth 47 | [gt,dt] = bbGt( 'evalRes', gt0, dt0); 48 | [xs,ys,sc]=bbGt('compRoc', gt, dt, 0); 49 | fs = Fscore(xs,ys); 50 | fscores(char_index) = fs; 51 | tocStatus(ticId,char_index/(length(ch))); 52 | end 53 | -------------------------------------------------------------------------------- /sandbox/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/sandbox/0.jpg -------------------------------------------------------------------------------- /sandbox/148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/sandbox/148.jpg -------------------------------------------------------------------------------- /sandbox/demoYoutube.m: -------------------------------------------------------------------------------- 1 | function demoYoutube 2 | % Demo of PLEX running on Youtube image 3 | % 4 | % USAGE 5 | % demoYoutube 6 | % 7 | % CREDITS 8 | % Written and maintained by Kai Wang and Boris Babenko 9 | % Copyright notice: license.txt 10 | % Changelog: changelog.txt 11 | % Please email kaw006@cs.ucsd.edu if you have questions. 12 | 13 | I = imread('sandbox/0.jpg'); 14 | lexS={'Parlophone','Records','Present','Mark','Trade','WINE','COFFEE','fight',... 15 | 'Party','Idea','romantic','texting','cnn','movie','candy','record','action','generic'}; 16 | 17 | im(I); drawnow; 18 | 19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 20 | % Load classifiers 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | % word threshold 23 | wordThr=-1; 24 | % character fern 25 | clfPath=fullfile('data','fern_synth.mat'); 26 | if(~exist(clfPath,'file')), error('FERN DOES NOT EXIST?!\n'); end 27 | fModel=load(clfPath); 28 | % word svm 29 | svmPath=fullfile('data','svm_svt.mat'); 30 | if(~exist(svmPath,'file')), error('SVM MODEL DOES NOT EXIST?!\n'); end 31 | model=load(svmPath); wordSvm=model.pNms1; wordSvm.thr=wordThr; 32 | 33 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34 | % Run word recognition (PLEX) 35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 36 | tic; words=wordSpot(I,lexS,fModel,wordSvm,[],{'minH',.04}); toc 37 | wordDetDraw( words, 1, 1, 1, [0 1 0] ); 38 | 39 | -------------------------------------------------------------------------------- /sandbox/result_analysis.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pxnguyen/videotext/24d10a10d464b98436920c179263df3e128932c3/sandbox/result_analysis.m -------------------------------------------------------------------------------- /sw.m: -------------------------------------------------------------------------------- 1 | configs=configsgen; 2 | 3 | % Add piotr toolbox 4 | addpath(genpath(configs.piotr_toolbox)) 5 | addpath(genpath(configs.libsvm)); 6 | addpath(genpath(configs.liblinear)); 7 | 8 | % Add path ihog 9 | %addpath(genpath(configs.ihog)) 10 | 11 | % Add local libraries 12 | addpath(genpath('.')); 13 | 14 | % Open all the matlab works -------------------------------------------------------------------------------- /synthesis/genChar.m: -------------------------------------------------------------------------------- 1 | function [P fontid rweight rangle]=genChar(c,sz,alphabet,pos) 2 | % Generate a character with noise 3 | % 4 | % For this to work, you must first run validateFonts.m to get a list of 5 | % fonts that your machine can properly display. 6 | % 7 | % USAGE 8 | % [P,fontid,rweight,rangle] = genChar(c, sz, alphabet,pos) 9 | % 10 | % INPUTS 11 | % c - specifies the character to generate 12 | % sz - specifies the height and width 13 | % alphabet - specifies random characters to generate to left and right 14 | % if none is specified, then no other chars are rendered. 15 | % This can/should contain space characters. 16 | % pos - [1,1] location to open the figure for rendering 17 | % 18 | % OUTPUTS 19 | % P - image of rendered character 20 | % fontid - id for the font during render 21 | % rweight - weight during render 22 | % rangle - angle during render 23 | % 24 | % CREDITS 25 | % Written and maintained by Kai Wang and Boris Babenko 26 | % Copyright notice: license.txt 27 | % Changelog: changelog.txt 28 | % Please email kaw006@cs.ucsd.edu if you have questions. 29 | 30 | if nargin < 3, error('Not enough input arguments'); end 31 | if nargin < 4, pos = [1,1]; end 32 | 33 | if ~exist('validfonts.mat', 'file') 34 | error('You must first run validateFonts.m to filter font list'); 35 | end 36 | 37 | weights={'normal','bold'}; fangles={'normal','italic'}; 38 | r=@(mn,mx) (mx-mn)*rand+mn; 39 | bgH=rand; bgS=r(.5,1); bgV=[r(.1,.3) r(.7,.9)]; bgV=bgV((rand>0.5)+1); 40 | fgS=r(.5,1); fgH=rand; fgV=r(.1,.9); 41 | 42 | % make sure there's enough contrast 43 | while(abs(bgV-fgV)<.3 || min(abs(bgH-fgH),1-abs(bgH-fgH))<.4 ) 44 | fgH=rand; if(bgV<.5), fgV=r(.5,1); else fgV=r(0,.5); end 45 | end 46 | 47 | % select left and right chars 48 | lch = ' '; rch = ' '; 49 | if nargin >= 3 50 | lind = randi([1,length(alphabet)]); 51 | rind = randi([1,length(alphabet)]); 52 | lch = alphabet(lind); 53 | rch = alphabet(rind); 54 | while(lch=='_'), lind=randi([1,length(alphabet)]); lch=alphabet(lind); end 55 | while(rch=='_'), rind=randi([1,length(alphabet)]); rch=alphabet(rind); end 56 | end 57 | renderstr = [lch, c, rch]; 58 | 59 | bg=hsv2rgb([bgH bgS bgV]); fg=hsv2rgb([fgH fgS fgV]); 60 | P=repmat(permute(bg,[1 3 2]),[sz,sz,1]); 61 | 62 | % choose a random font that will actually render 63 | load('validfonts', 'validfonts'); 64 | fontid = randi([1,length(validfonts)]); 65 | fontname = validfonts{fontid}; 66 | 67 | % render left character alone for alignment 68 | rweight = (rand>0.5)+1; 69 | rangle = (rand>0.5)+1; 70 | rsz = sz/r(1,1.5); 71 | 72 | % get dimensions of left character 73 | hf=figure('Visible','off'); clf; im(P,[],0); truesize; hold on; 74 | ht=text(0,0,renderstr(1),'fontsize', rsz(1),'color',fg,... 75 | 'fontweight',weights{rweight},.... 76 | 'fontangle',fangles{rangle},... 77 | 'fontname',fontname,... 78 | 'horizontalalignment','center','units','pixels'); 79 | left_e=get(ht,'Extent'); 80 | close(hf); 81 | 82 | % get dimensions of center character 83 | hf=figure('Visible','off'); clf; im(P,[],0); truesize; hold on; 84 | ht=text(0,0,renderstr(2),'fontsize',rsz(1),'color',fg,... 85 | 'fontweight',weights{rweight},.... 86 | 'fontangle',fangles{rangle},... 87 | 'fontname',fontname,... 88 | 'horizontalalignment','center','units','pixels'); 89 | mid_e=get(ht,'Extent'); 90 | close(hf); 91 | 92 | hf=figure('Visible','off'); clf; im(P,[],0); 93 | truesize; 94 | hold on; 95 | ht=text(0,0,renderstr,'fontsize',rsz(1),'color',fg,... 96 | 'fontweight',weights{rweight},.... 97 | 'fontangle',fangles{rangle},... 98 | 'fontname',fontname,... 99 | 'horizontalalignment','center','units','pixels'); 100 | all_e=get(ht,'Extent'); 101 | c=[all_e(1)+left_e(3)+mid_e(3)/2,all_e(2)+all_e(4)/2]; 102 | set(ht,'Position',[sz/2-c(1),sz/2-c(2)]); 103 | curp=get(hf,'Position'); 104 | set(hf,'Position',[pos, curp(3:4)]); 105 | 106 | tim=getframe(gca); 107 | %tim2 = im2frame(zbuffer_cdata(hf)); 108 | close(hf); 109 | P=tim.cdata; 110 | 111 | P=P(2:end-1,2:end-1,:); P=padarray(P,[sz sz],'replicate'); 112 | angle = 5 - randi(10); 113 | R = rotationMatrix(angle*pi/180); T=[0; 0]; H=[R T; 0 0 1]; 114 | P=fevalArrays(P,@(I)imtransform2(I,H,'method','linear','bbox','crop')); 115 | P=double(P)+randn(size(P))*25*rand; % noise 116 | b=r(.5,2); P=uint8(gaussSmooth(P,[b b 0],'same')); % smoothing 117 | P=uint8(P(sz:end-sz,sz:end-sz,:)); 118 | 119 | end 120 | 121 | % HAVEN'T GOTTEN THIS TO WORK 100% 122 | % Has weird alignment issues somewhat randomly 123 | function cdata = zbuffer_cdata(hfig) 124 | % Get CDATA from hardcopy using zbuffer 125 | 126 | % Need to have PaperPositionMode be auto 127 | orig_mode = get(hfig, 'PaperPositionMode'); 128 | set(hfig, 'PaperPositionMode', 'auto'); 129 | 130 | cdata = hardcopy(hfig, '-Dzbuffer', '-r0'); 131 | 132 | % Restore figure to original state 133 | set(hfig, 'PaperPositionMode', orig_mode); % end 134 | end 135 | -------------------------------------------------------------------------------- /synthesis/validateFonts.m: -------------------------------------------------------------------------------- 1 | function validateFonts 2 | % Generate a list for valid fonts on the machine 3 | % 4 | % A figure pops up for each font, to verify if it renders properly on your 5 | % machine. 6 | % 7 | % CREDITS 8 | % Written and maintained by Kai Wang and Boris Babenko 9 | % Copyright notice: license.txt 10 | % Changelog: changelog.txt 11 | % Please email kaw006@cs.ucsd.edu if you have questions. 12 | 13 | allfonts=listfonts; 14 | validfonts=[]; 15 | for i=1:length(allfonts) 16 | fontname = allfonts{i}; 17 | figure(1); clf; 18 | text(0,.5,fontname,'fontsize', 100,'fontname',fontname); 19 | in=input('Render properly y/n? ','s'); 20 | if in=='y', validfonts{end+1} = fontname; end 21 | end 22 | save('validfonts', 'validfonts'); 23 | end -------------------------------------------------------------------------------- /third_party/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 20906 5 | svn+ssh://kai@lumo.ucsd.edu/projects/p1/svnroot/kai/iccv11/alpha/third_party 6 | svn+ssh://kai@lumo.ucsd.edu/projects/p1/svnroot 7 | 8 | 9 | 10 | 2011-10-31T04:52:25.938429Z 11 | 20906 12 | kai 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 52fe0c90-79fe-0310-8a18-a0b98ad248f8 28 | 29 | EditDist.m 30 | file 31 | 32 | 33 | 34 | 35 | 2011-09-29T05:19:58.000000Z 36 | 77ec20033991389add1012520d56c255 37 | 2011-10-31T04:52:25.938429Z 38 | 20906 39 | kai 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 2783 62 | 63 | -------------------------------------------------------------------------------- /third_party/.svn/text-base/EditDist.m.svn-base: -------------------------------------------------------------------------------- 1 | function d = EditDist(s1,s2,varargin) 2 | %EDITDIST Finds the Edit Distance between strings s1 and s2. The Edit Distance 3 | % is defined as the minimum number of single-character edit operations 4 | % (deletions, insertions, and/or replacements) that would convert 5 | % s1 into s2 or vice-versa. Uses an efficient dynamic programming 6 | % algorithm. Useful for gene sequence matching, among other applications. 7 | % 8 | % Example: d = EditDist('cow','house') returns a value of 4. 9 | % Example: s1 = 'now'; s2 = 'cow'; EditDist(s1,s2) returns a value of 1. 10 | % Example from gene sequence matching: 11 | % EditDist('ATTTGCATTA','ATTGCTT') returns a value of 3. 12 | % 13 | % If there are more than two inputs, the 3d, 4th, and 5th inputs will be 14 | % interpreted as the costs of the three edit operations: DELETION, 15 | % INSERTION, and REPLACEMENT respectively. The default is 1 for all 16 | % three operations. Note that if the cost of replacement is at least twice 17 | % the respective costs of deletion and insertion, replacements will never be 18 | % performed. 19 | % 20 | % Example: EditDist('cow','house',1,1,1) returns a value of 4. 21 | % Example: EditDist('cow','house',1,2,1.5) returns a value of 5. 22 | % Example: EditDist('cow','house',1,1,2) returns a value of 6. 23 | % 24 | % 25 | %USAGE: d = EditDist('string1','string2'); 26 | % 27 | % d = EditDist('string1,'string2',1.5,1,2); 28 | % 29 | % 30 | 31 | %Written and tested in Matlab 5.3, Release 11.1 (should work with earlier versions). 32 | %Copyright 2000, Miguel A. Castro 6/4/2000 33 | %talk2miguel@yahoo.com 34 | %------------------------------------------------------------------------------------------ 35 | 36 | %Determine the number of inputs. If 2 inputs, set default edit costs to 1. 37 | %Otherwise, make sure there are exactly 5 inputs, and set edit costs accordingly. 38 | if ~isempty(varargin) 39 | if length(varargin) ~= 3 40 | error('Usage is: EditDist(''string1'',''string2'',DeleteCost,InsertCost,ReplaceCost)'); 41 | end; 42 | DelCost = varargin{1}; 43 | InsCost = varargin{2}; 44 | ReplCost = varargin{3}; 45 | else 46 | DelCost = 1; 47 | InsCost = 1; 48 | ReplCost = 1; 49 | end; 50 | 51 | [m1,n1] = size(s1); 52 | [m2,n2] = size(s2); 53 | 54 | %Make sure input strings are horizontal. 55 | if ~(ischar(s1) & ischar(s2) & m1 == 1 & m2 == 1) 56 | error('s1 and s2 must be horizontal strings.'); 57 | end; 58 | 59 | %Initialize dynamic matrix D with appropriate size: 60 | D = zeros(n1+1,n2+1); 61 | 62 | %This is dynamic programming algorithm: 63 | for i = 1:n1 64 | D(i+1,1) = D(i,1) + DelCost; 65 | end; 66 | 67 | for j = 1:n2 68 | D(1,j+1) = D(1,j) + InsCost; 69 | end; 70 | 71 | for i = 1:n1 72 | for j = 1:n2 73 | if s1(i) == s2(j) 74 | Repl = 0; 75 | else 76 | Repl = ReplCost; 77 | end; 78 | D(i+1,j+1) = min([D(i,j)+Repl D(i+1,j)+DelCost D(i,j+1)+InsCost]); 79 | end; 80 | end; 81 | 82 | d = D(n1+1,n2+1); -------------------------------------------------------------------------------- /third_party/EditDist.m: -------------------------------------------------------------------------------- 1 | function d = EditDist(s1,s2,varargin) 2 | %EDITDIST Finds the Edit Distance between strings s1 and s2. The Edit Distance 3 | % is defined as the minimum number of single-character edit operations 4 | % (deletions, insertions, and/or replacements) that would convert 5 | % s1 into s2 or vice-versa. Uses an efficient dynamic programming 6 | % algorithm. Useful for gene sequence matching, among other applications. 7 | % 8 | % Example: d = EditDist('cow','house') returns a value of 4. 9 | % Example: s1 = 'now'; s2 = 'cow'; EditDist(s1,s2) returns a value of 1. 10 | % Example from gene sequence matching: 11 | % EditDist('ATTTGCATTA','ATTGCTT') returns a value of 3. 12 | % 13 | % If there are more than two inputs, the 3d, 4th, and 5th inputs will be 14 | % interpreted as the costs of the three edit operations: DELETION, 15 | % INSERTION, and REPLACEMENT respectively. The default is 1 for all 16 | % three operations. Note that if the cost of replacement is at least twice 17 | % the respective costs of deletion and insertion, replacements will never be 18 | % performed. 19 | % 20 | % Example: EditDist('cow','house',1,1,1) returns a value of 4. 21 | % Example: EditDist('cow','house',1,2,1.5) returns a value of 5. 22 | % Example: EditDist('cow','house',1,1,2) returns a value of 6. 23 | % 24 | % 25 | %USAGE: d = EditDist('string1','string2'); 26 | % 27 | % d = EditDist('string1,'string2',1.5,1,2); 28 | % 29 | % 30 | 31 | %Written and tested in Matlab 5.3, Release 11.1 (should work with earlier versions). 32 | %Copyright 2000, Miguel A. Castro 6/4/2000 33 | %talk2miguel@yahoo.com 34 | %------------------------------------------------------------------------------------------ 35 | 36 | %Determine the number of inputs. If 2 inputs, set default edit costs to 1. 37 | %Otherwise, make sure there are exactly 5 inputs, and set edit costs accordingly. 38 | if ~isempty(varargin) 39 | if length(varargin) ~= 3 40 | error('Usage is: EditDist(''string1'',''string2'',DeleteCost,InsertCost,ReplaceCost)'); 41 | end; 42 | DelCost = varargin{1}; 43 | InsCost = varargin{2}; 44 | ReplCost = varargin{3}; 45 | else 46 | DelCost = 1; 47 | InsCost = 1; 48 | ReplCost = 1; 49 | end; 50 | 51 | [m1,n1] = size(s1); 52 | [m2,n2] = size(s2); 53 | 54 | %Make sure input strings are horizontal. 55 | if ~(ischar(s1) & ischar(s2) & m1 == 1 & m2 == 1) 56 | error('s1 and s2 must be horizontal strings.'); 57 | end; 58 | 59 | %Initialize dynamic matrix D with appropriate size: 60 | D = zeros(n1+1,n2+1); 61 | 62 | %This is dynamic programming algorithm: 63 | for i = 1:n1 64 | D(i+1,1) = D(i,1) + DelCost; 65 | end; 66 | 67 | for j = 1:n2 68 | D(1,j+1) = D(1,j) + InsCost; 69 | end; 70 | 71 | for i = 1:n1 72 | for j = 1:n2 73 | if s1(i) == s2(j) 74 | Repl = 0; 75 | else 76 | Repl = ReplCost; 77 | end; 78 | D(i+1,j+1) = min([D(i,j)+Repl D(i+1,j)+DelCost D(i,j+1)+InsCost]); 79 | end; 80 | end; 81 | 82 | d = D(n1+1,n2+1); -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | %% TODO list 2 | %%%% Pipeline %%%%% 3 | 1. Convert every video from MP4 to avi using ffmpeg 4 | ffmpeg -i 17CLlZuiBkQ_0.mp4 -vcodec mpeg4 -acodec ac3 -ar 48000 -ab 192k output.avi 5 | %%% Done 6 | 7 | 2. Finish writing the code up for detection of characters in video 8 | 9 | 3. Run it on deepthought 10 | 11 | All these must be done by today 12 | 13 | %%%% Experiments %%%% 14 | 15 | %%% Dataset %%%% 16 | 17 | %%% Papers %%%% -------------------------------------------------------------------------------- /train/extractAndTrain.m: -------------------------------------------------------------------------------- 1 | function model=extractAndTrain(charname) 2 | configs=configsgen; 3 | fprintf('Extracing and training for %s\n',charname); 4 | posf = fullfile(configs.clean_data,charname); 5 | path_list = dir(fullfile(posf,'*.jpg')); 6 | image_list = cell(length(path_list),1); 7 | size_list = zeros(length(path_list),2); 8 | 9 | fprintf('Extracting positives...'); 10 | %TODO: need to make this a function like get_positives 11 | for image_index = 1:length(image_list) 12 | current_img_path = path_list(image_index).name; 13 | I = imread(fullfile(posf,current_img_path)); 14 | [sr sc ~] = size(I); 15 | size_list(image_index,:) = [sr sc]; 16 | image_list{image_index} = I; 17 | end 18 | 19 | % Determine the mean aspect ratios 20 | mean_dims = floor(mean(size_list)); 21 | ar = mean_dims(1)/mean_dims(2); 22 | canonical_dims = [80 80/ar]; 23 | 24 | fprintf('Done\n'); 25 | fprintf('Extracting negatives...'); 26 | neg_list = get_negative(charname,canonical_dims); 27 | fprintf('Done\n'); 28 | fprintf('Done'); 29 | pos_feat = get_features(neg_list,canonical_dims); 30 | neg_feat = get_features(image_list,canonical_dims); 31 | 32 | pos_label = ones(size(pos_feat,1),1); 33 | neg_label = ones(size(neg_feat,1),1)+1; 34 | 35 | % concatenate them together 36 | total_feat = [pos_feat; neg_feat]; 37 | labels = [pos_label; neg_label]; 38 | 39 | fprintf('Training...') 40 | model = trainChar(total_feat,labels,charname,canonical_dims,configs); 41 | fprintf('Done\n'); 42 | end 43 | -------------------------------------------------------------------------------- /train/get_negative.m: -------------------------------------------------------------------------------- 1 | function negative_images=get_negative(char,dims) 2 | % This function returns the list of negative images for the input chart 3 | % 4 | % INPUTS 5 | % char: the character of the positive class 6 | % dims: 1x2 array, dimensions to rescale the initial negatives 7 | % 8 | % OUTPUTS 9 | % negative_images: the negative images 10 | configs=configsgen; 11 | if( nargin < 3 || isempty(dims)); dims=configs.canonical_scale; 12 | raw_dirs_path = configs.clean_data; 13 | char_index = strfind(configs.alphabets,char); 14 | sr = dims(1); sc = dims(2); 15 | 16 | % data structure to return the negative images; 17 | negative_images = zeros(sr,sc,3,5e3); total = 1; 18 | 19 | % Find out classes are not similar to the current characters 20 | excluded_set = configs.similar_classes(char_index,:)==0; 21 | to_be_added = configs.alphabets(excluded_set); 22 | for i=1:length(to_be_added) 23 | current_char = to_be_added(i); 24 | data_dir = fullfile(raw_dirs_path,current_char); 25 | files = dir(fullfile(data_dir,'*.jpg')); 26 | for file_index = 1:min(length(files),30) 27 | I = imread(fullfile(fullfile(data_dir,files(file_index).name))); 28 | negative_images(:,:,:,total) = imResample(I,[sr,sc]); 29 | total = total + 1; 30 | end 31 | end 32 | 33 | negative_images = negative_images(:,:,:,1:total-1); 34 | end -------------------------------------------------------------------------------- /train/mine_negative.m: -------------------------------------------------------------------------------- 1 | function hard_negative_patches=mine_negative(model,indeces,limit) 2 | % Mining negatives for svm training 3 | % 4 | % INPUTS: 5 | % model: the trained model 6 | % indeces: the indeces determine which of the images in the Flickr 7 | % dataset to use 8 | % limit: the upper limit of how many negatives to take 9 | % 10 | % OUTPUTS 11 | configs = configsgen; 12 | char_dims = model.char_dims; 13 | files = dir(fullfile(configs.RandomFlickr,'*.jpg')); 14 | hard_negative_patches = zeros(char_dims(1),char_dims(2),3,limit); 15 | count = 1; 16 | for i=1:length(indeces) 17 | fprintf('working on %d out of %d - count: %d\n',i,length(indeces),count); 18 | current_index = indeces(i); 19 | filepath = fullfile(configs.RandomFlickr,files(current_index).name); 20 | I = imread(filepath); bbs = detect(I,{model},-0.01); 21 | if size(bbs,1) < 1; continue; end 22 | [patches,~] = bbApply('crop',I,bbs); 23 | 24 | for j=1:length(patches) 25 | image = imResample(patches{j},char_dims); 26 | if count <= limitd 27 | hard_negative_patches(:,:,:,count) = image; 28 | count = count + 1; 29 | else 30 | fprintf('Full, exiting\n'); 31 | return 32 | end 33 | end 34 | end 35 | 36 | hard_negative_patches = hard_negative_patches(:,:,:,1:count-1); 37 | end 38 | -------------------------------------------------------------------------------- /train/mining_hard_negative.m: -------------------------------------------------------------------------------- 1 | %% Setting up 2 | configs = globals; 3 | load svm_model_liblinear; 4 | train_dir = fullfile(configs.data_base,'ICDAR','Processed'); 5 | 6 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,configs.char_dims)),... 7 | configs.bin_size,configs.n_orients)),[],1); 8 | 9 | disp('Reading images'); 10 | files = dir(fullfile(configs.data_base,'ICDAR','RandomFlickr','*.jpg')) 11 | for i=50:60 12 | figure(i) 13 | filepath = fullfile(configs.data_base,'ICDAR','RandomFlickr',files(i).name) 14 | disp('Detecting text'); 15 | I = imread(filepath); 16 | bbs = detecttext(I,model,true); 17 | [patches,~] = bbApply('crop',I,bbs); 18 | 19 | [sr,sc,~] = size(patches{1}); 20 | patch_stack = zeros(sr,sc,3,length(patches)); 21 | base = length(dir(fullfile(configs.data_base,'ICDAR','Processed','nontext','*.png'))); 22 | for j=1:length(patches) 23 | if bbs(j,5) > 4 24 | image = patches{j}; 25 | image = imResample(image,configs.char_dims); 26 | fpath = fullfile(configs.data_base,'ICDAR','Processed','nontext',sprintf('I%05d.png',j+base-1)); 27 | imwrite(image,fpath) 28 | end 29 | end 30 | end 31 | 32 | %[images,labels] = readAllImgs(train_dir,{'morenontext'}); 33 | 34 | %% Extracting features 35 | disp('Extracting features'); 36 | features=fevalArrays(images,cHogFtr)'; 37 | 38 | disp('Classification') 39 | [predicted_label,accuracy, prob] = svmpredict(labels, double(features), model, '-b 1'); 40 | 41 | disp('Getting false positives') 42 | false_positives = (predicted_label ~= labels); 43 | 44 | disp('Writing to false positive'); 45 | images = images(:,:,:,false_positives); 46 | 47 | base = length(dir(fullfile(configs.data_base,'ICDAR','Processed','morenontext2','*.png'))); 48 | 49 | % Write the hard negatives 50 | for i=1:size(images,4) 51 | i 52 | fpath = fullfile(configs.data_base,'ICDAR','Processed','morenontext2',sprintf('I%05d.png',i+base)); 53 | imwrite(images(:,:,:,i),fpath); 54 | end 55 | 56 | %% Combining nontext and hard negatives 57 | % read all images from hard negatives 58 | hardnegs = dir(fullfile(configs.data_base,'ICDAR','Processed','hardnegatives','*.png')); 59 | for i=1:length(hardnegs) 60 | end 61 | %% Train the new classifiers -------------------------------------------------------------------------------- /train/trainAll.m: -------------------------------------------------------------------------------- 1 | function trainAll(chars) 2 | configs=globals; 3 | label_classes = configs.classes; 4 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,configs.char_dims{1})),... 5 | configs.bin_size,configs.n_orients)),[],1); 6 | 7 | for char_index=1:length(chars) 8 | class = chars{char_index}; 9 | alphabet_index = findstr(configs.alphabets,class); 10 | 11 | % read the files and files 12 | fprintf('Reading images...\n'); 13 | train_dir = fullfile(configs.path_to_processedChars74k,class); 14 | [images,labels] = readAllImgs(train_dir,label_classes); 15 | 16 | % extract the features 17 | features = fevalArrays(images,cHogFtr)'; 18 | 19 | model=train_cluster(features,labels,alphabet_index,configs); 20 | model.char_class = class; 21 | model_name = sprintf('%s.mat',class); 22 | 23 | fprintf('Saving %s...\n',model_name); 24 | model.name = model_name; 25 | model.char_index = alphabet_index; 26 | save(fullfile('models',model_name),'model'); 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /train/trainChar.m: -------------------------------------------------------------------------------- 1 | function trainChar(charName,saveDir,trainDims) 2 | configs = configsgen; 3 | if ~exist('trainDims','var'); trainDims = configs.canonical_scale; end; 4 | fprintf('Training a model for the character %s\n',charName); 5 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,trainDims)),... 6 | configs.bin_size,configs.n_orients)),[],1); 7 | 8 | fprintf('Loading positives\n'); 9 | train_dir = fullfile(configs.clean_data,charName); 10 | posFiles = dir(fullfile(train_dir,'*.jpg')); 11 | nFiles = length(posFiles); 12 | posFeats = []; 13 | for i=1:nFiles 14 | curPath = fullfile(train_dir,posFiles(i).name); 15 | I = imread(curPath); 16 | feat = cHogFtr(I); 17 | if isempty(posFeats) 18 | posFeats = zeros(nFiles,length(feat)); 19 | end 20 | 21 | posFeats(i,:) = feat; 22 | end 23 | 24 | fprintf('Loading negatives\n'); 25 | negimgs = get_negative(charName); 26 | negFeats=fevalArrays(negimgs,cHogFtr)'; 27 | 28 | totalFeats=cat(1,posFeats,negFeats); 29 | 30 | lbls=[ones(size(posFeats,1),1);ones(size(negFeats,1),1)+1]; 31 | model=train_cluster(totalFeats,lbls,charName,configs,trainDims); 32 | savePath = fullfile(saveDir,sprintf('%s.mat',charName)); 33 | save(savePath,'model'); 34 | end -------------------------------------------------------------------------------- /train/trainClassifier.m: -------------------------------------------------------------------------------- 1 | function model=trainClassifier(features,labels) 2 | % For every folder in Processed, train a seperate classifier 3 | % TODO: need to sweep for C in the training phase 4 | configs = globals; 5 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,configs.char_dims)),... 6 | configs.bin_size,configs.n_orients)),[],1); 7 | 8 | disp('Reading images'); 9 | 10 | disp('Extracting features'); 11 | features = fevalArrays(images,cHogFtr)'; 12 | 13 | number_of_examples = size(features,1); 14 | cutoff = floor(.8*number_of_examples); 15 | order = randperm(number_of_examples); 16 | features_train = features(order(1:cutoff),:); 17 | features_test = features(order(cutoff+1:end),:); 18 | labels_train = labels(order(1:cutoff)); 19 | labels_test = labels(order(cutoff+1:end)); 20 | 21 | %% Possibly do something regarding reduce the dimension of the feature vectors. 22 | % Use random projection or PCA 23 | % Train the SVM and crossvalidation, model selections 24 | 25 | %% Find best model 26 | disp('Training SVM'); 27 | model = train(labels_train, sparse(double(features_train)), '-s 2 -e 0.0001 -c 3 -q'); 28 | 29 | disp('Training accuracy'); 30 | [predicted_label,accuracy, score] = predict(labels_train, sparse(double(features_train)), model); 31 | disp('Testing accuracy'); 32 | [predicted_label,accuracy, score] = predict(labels_test, sparse(double(features_test)), model); 33 | end -------------------------------------------------------------------------------- /train/trainMixtures.m: -------------------------------------------------------------------------------- 1 | function trainMixtures(chars) 2 | configs=configsgen; 3 | K=configs.nMixtures; 4 | 5 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,configs.canonical_scale)),... 6 | configs.bin_size,configs.n_orients)),[],1); 7 | savemodel = @(sp,model) save(sp,'model'); 8 | 9 | for char_index=1:length(chars) 10 | class = chars(char_index); 11 | fprintf('Training char %s\n',class); 12 | prevTrained = dir(fullfile('mixture_models',sprintf('%s*',class))); 13 | if (length(prevTrained) == K); continue; end; 14 | 15 | % read the files and files 16 | fprintf('Loading positives\n'); 17 | train_dir = fullfile(configs.synth_data,'train','charHard',class); 18 | imgs=imwrite2([],1,0,train_dir); 19 | 20 | % extract the features 21 | feats=fevalArrays(imgs,cHogFtr)'; 22 | 23 | % cluster data into cluster 24 | idx=cluster_traindata(feats,K); 25 | 26 | % Get the negative 27 | fprintf('Loading negatives\n'); 28 | negimgs = get_negative(class); 29 | negfeats=fevalArrays(negimgs,cHogFtr)'; 30 | % debugging 31 | %for i=1:K 32 | %figure(i*10);group=imgs(:,:,:,idx==i); montage(uint8(group)); 33 | %end 34 | 35 | % constructing featsmix, this helps running parallely 36 | featsmix = cell(K,1); 37 | for cluster_ind=1:K 38 | featsmix{cluster_ind} = feats(idx==cluster_ind,:); 39 | end 40 | 41 | % clear feats to free space; 42 | clear('feats') 43 | clear('negimgs') 44 | clear('imgs') 45 | 46 | % For each cluster, train a separate classifier 47 | parfor cluster_ind=1:K 48 | fprintf('Working on cluster %d\n',cluster_ind); 49 | cluster_feats=featsmix{cluster_ind}; 50 | total_features=cat(1,cluster_feats,negfeats); 51 | lbls=[ones(size(cluster_feats,1),1);ones(size(negfeats,1),1)+1]; 52 | model=train_cluster(total_features,lbls,class,configs); 53 | model_name = sprintf('%s_%d.mat',class,cluster_ind); 54 | model.name = model_name; 55 | savemodel(fullfile('mixture_models',model_name),model); 56 | end 57 | end 58 | end -------------------------------------------------------------------------------- /train/trainRoot.m: -------------------------------------------------------------------------------- 1 | function [ model ] = trainRoot(debug) 2 | %trainRoot Summary of this function goes here 3 | % This function takes care of training the filter for the root filter 4 | 5 | configs=globals; 6 | label_classes = configs.classes; 7 | 8 | limit = 1500; 9 | num_datamine = 15; 10 | 11 | char_dims = configs.root_dims; 12 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,char_dims)),... 13 | configs.bin_size,configs.n_orients)),[],1); 14 | 15 | class = 'root'; 16 | 17 | train_dir = fullfile(configs.path_to_processedChars74k,class); 18 | [images,labels] = readAllImgs(train_dir,label_classes); 19 | 20 | iteration = 1; 21 | 22 | testingImage1 = imread('/home/phuc/Research/data/RandomFlickr/im1.jpg'); 23 | testingImage2 = imread('/home/phuc/Research/data/RandomFlickr/im109.jpg'); 24 | testingImage3 = imread('IMG_2533.JPG'); 25 | 26 | while iteration <= num_datamine 27 | disp('Training...'); 28 | features = fevalArrays(images,cHogFtr)'; 29 | model = train(labels, sparse(double(features)), '-s 2 -e 0.0001 -c 3 -q'); 30 | model.char_dims = char_dims; 31 | [~,~, scores] = predict(labels, sparse(double(features)), model); 32 | 33 | % Removing 100 easy examples 34 | [~,order] = sort(scores,'ascend'); 35 | 36 | negative_indeces = find(labels == 2); 37 | 38 | negative_scores = scores(negative_indeces); 39 | %[~,hard_order] = sort(negative_scores,'descend'); 40 | [~,easy_order] = sort(negative_scores,'ascend'); 41 | 42 | %hard_order = negative_indeces(hard_order); 43 | easy_order = negative_indeces(easy_order); 44 | %hard_order = hard_order(1:limit); 45 | easy_order = easy_order(1:limit); 46 | 47 | % Removing the pictures with the easy scores 48 | images(:,:,:,easy_order) = []; 49 | 50 | % debugging 51 | if debug 52 | bbs1=detect(testingImage1,{model},0); 53 | bbs2=detect(testingImage2,{model},0); 54 | bbs3=detect(testingImage3,{model},0); 55 | figure(10); 56 | subplot(3,5,iteration); imshow(testingImage1); bbApply('draw',bbs1(:,1:4)); 57 | figure(11); 58 | subplot(3,5,iteration); imshow(testingImage2); bbApply('draw',bbs2(:,1:4)); 59 | figure(12); 60 | subplot(3,5,iteration); imshow(testingImage3); bbApply('draw',bbs3(:,1:4)); 61 | end 62 | 63 | labels(easy_order) = []; 64 | 65 | % Performing hard negative finding 66 | disp('Mining negatives - Phase 1...'); 67 | 68 | indeces = floor(rand(400)*400)+1; 69 | 70 | % Pull new negatives 71 | hard_negative_patches = mine_negative(model,indeces,limit); 72 | 73 | labels = cat(1,labels, ones(limit,1)+1); 74 | images = cat(4,images,hard_negative_patches); 75 | iteration = iteration + 1; 76 | end 77 | 78 | % Save the model 79 | model_name = sprintf('root_model'); 80 | fprintf('Saving %s...\n',model_name); 81 | model.name = model_name; 82 | save(model_name,'model'); 83 | 84 | end 85 | 86 | -------------------------------------------------------------------------------- /train/trainSVM.m: -------------------------------------------------------------------------------- 1 | %% Reading images 2 | configs = globals; 3 | train_dir = fullfile(configs.data_base,'ICDAR','Processed'); 4 | 5 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,configs.char_dims)),... 6 | configs.bin_size,configs.n_orients)),[],1); 7 | 8 | % check if features already extracted 9 | if (exist('built_features.mat','file')) 10 | load('built_features'); 11 | else 12 | % load images 13 | %features_test=fevalArrays(images_test,cHogFtr)'; 14 | 15 | % save('built_features','features_train','features_test',... 16 | % 'labels_train','labels_test','order') 17 | end 18 | 19 | disp('Reading images'); 20 | [images,labels] = readAllImgs(train_dir,configs.classes); 21 | 22 | %% extract features 23 | disp('Extracting features') 24 | features=fevalArrays(images,cHogFtr)'; 25 | 26 | number_of_examples = size(features,1); 27 | cutoff = floor(.8*number_of_examples); 28 | order = randperm(number_of_examples); 29 | features_train = features(order(1:cutoff),:); 30 | features_test = features(order(cutoff+1:end),:); 31 | labels_train = labels(order(1:cutoff)); 32 | labels_test = labels(order(cutoff+1:end)); 33 | 34 | %% Possibly do something regarding reduce the dimension of the feature vectors. 35 | % Use random projection or PCA 36 | % Train the SVM and crossvalidation, model selections 37 | 38 | %% Find best model 39 | disp('Training SVM'); 40 | sizes = [17000]; 41 | training_acc = zeros(length(sizes),1); 42 | test_acc = zeros(length(sizes),1); 43 | for i=1:length(sizes) 44 | current_size = sizes(i); 45 | model = train(labels_train(1:current_size,:), sparse(double(features_train(1:current_size,:))), '-s 2 -e 0.0001 -c .2 -q'); 46 | 47 | disp('Training accuracy'); 48 | [predicted_label,accuracy, score] = predict(labels_train(1:current_size,:), sparse(double(features_train(1:current_size,:))), model); 49 | training_acc(i) = accuracy(1); 50 | disp('Testing accuracy'); 51 | [predicted_label,accuracy, score] = predict(labels_test, sparse(double(features_test)), model); 52 | test_acc(i) = accuracy(1); 53 | end 54 | 55 | figure(1); 56 | plot(sizes,training_acc); hold on; 57 | plot(sizes,test_acc,'r'); 58 | legend('Training','Cross-validation'); 59 | xlabel('Training examples'); 60 | ylabel('Accuracy'); 61 | save('svm_model_liblinear','model'); -------------------------------------------------------------------------------- /train/train_cluster.m: -------------------------------------------------------------------------------- 1 | function model=train_cluster(features,labels,char_class,configs,char_dims,debug) 2 | if ~exist('char_dims','var'); char_dims = configs.canonical_scale; end 3 | if ~exist('debug','var'); debug = false; end 4 | limit = 200; nDatamine = configs.num_datamine; 5 | 6 | if debug 7 | testI = imread(fullfile(configs.extracted_frames,'7jjcAuEYW9M_0.mp4/0/0','72.jpg')); 8 | testI = imresize(testI,[405,720]); 9 | end 10 | cHogFtr=@(I)reshape((5*hog(single(imResample(I,char_dims)),... 11 | configs.bin_size,configs.n_orients)),[],1); 12 | 13 | iteration = 1; 14 | disp('Training the first iteration'); 15 | model = train(labels, sparse(double(features)), '-s 2 -e 0.001 -c 3 -q'); 16 | model.char_dims = char_dims; 17 | model.char_index = char_class; 18 | %[predicted_label,~, scores] = predict(labels, sparse(double(features)), model); 19 | 20 | while iteration <= nDatamine 21 | fprintf('Data mining - Iteration %d\n',iteration); 22 | % Here we are not removing 23 | %labels(easy_order) = []; features(easy_order,:) = []; % removing 24 | 25 | if debug 26 | negative_indeces = find(predicted_label==2); 27 | negative_scores = scores(negative_indeces); 28 | [~,easy_order] = sort(negative_scores,'ascend'); 29 | easy_order = negative_indeces(easy_order); 30 | easy_order = easy_order(1:limit-100); 31 | bbs1=detect(testI,{model},0); 32 | figure(10); subplot(2,5,mod(iteration-1,10)+1); 33 | imshow(testI); bbApply('draw',bbs1(:,1:4)); 34 | end 35 | 36 | % Performing hard negative mining 37 | indeces = floor(rand(200,1)*600)+1; 38 | fprintf('Mining negatives - Phase %d...\n',iteration); 39 | hard_negative_patches = mine_negative(model,indeces,limit); 40 | 41 | if(size(hard_negative_patches,4)==0); break; end 42 | 43 | new_features=fevalArrays(hard_negative_patches,cHogFtr)'; 44 | features=cat(1,features,new_features); 45 | twos=ones(size(new_features,1),1)+1; labels=cat(1,labels, twos); 46 | 47 | fprintf('Retraining\n'); 48 | model=train(labels,sparse(double(features)), '-s 2 -e 0.0001 -c 3 -q'); 49 | model.char_dims = char_dims; 50 | %[predicted_label,~, scores] = predict(labels, sparse(double(features)), model); 51 | model.char_index = char_class; 52 | 53 | iteration = iteration + 1; 54 | end 55 | end -------------------------------------------------------------------------------- /train/train_temporal_weights.m: -------------------------------------------------------------------------------- 1 | function model=train_temporal_weights(training_folder) 2 | configs=globals; 3 | ground_truth_paths = configs.ground_truth_paths; 4 | training_files = dir(fullfile(training_folder,'*.mat')); 5 | 6 | N = 5; 7 | 8 | %limit = length(ground_truths); 9 | 10 | limit = 10; 11 | total_features = zeros(1e5,N*2); 12 | total_labels = zeros(1e5,1); 13 | current_count = 0; 14 | for i=6:limit 15 | file_name = training_files(i).name; 16 | disp(file_name); 17 | load(fullfile(ground_truth_paths,file_name)); 18 | load(fullfile(training_folder,file_name)); 19 | 20 | %features = [1 2 3 4 1 2 3 4 1 1; 1 2 3 4 1 2 3 4 1 1;]; 21 | %labels = [1 1]; 22 | 23 | [features,labels] = extract_temporal_features(predictions,gt_mat,N); 24 | 25 | current_size = length(labels); 26 | 27 | total_features(current_count+1:current_count+current_size,:) = features; 28 | total_labels(current_count+1:current_count+current_size,:) = labels; 29 | current_count = current_count + current_size; 30 | end 31 | 32 | total_features = total_features(1:current_count,:); 33 | total_labels = total_labels(1:current_count); 34 | 35 | model = train(total_labels, sparse(double(total_features)), '-s 2 -e 0.0001 -c 3 -q'); 36 | end -------------------------------------------------------------------------------- /training/trainChClfs.m: -------------------------------------------------------------------------------- 1 | function trainChClfs 2 | % Train character classifiers (FERNS) 3 | % 4 | % This function first trains FERNS using images for each character class 5 | % and their provided background class. It also does another round of 6 | % training after bootstrapping more negative examples. 7 | % 8 | % CREDITS 9 | % Written and maintained by Kai Wang and Boris Babenko 10 | % Copyright notice: license.txt 11 | % Changelog: changelog.txt 12 | % Please email kaw006@cs.ucsd.edu if you have questions. 13 | 14 | [dPath,ch,ch1,chC,chClfNm]=globals; 15 | % parameters that pretty much won't change 16 | sBin=8; oBin=8; chH=48; 17 | S=6; M=256; thrr=[0 1]; nTrn=Inf; 18 | cHogFtr=@(I)reshape((5*hogOld(imResample(single(I),[chH,chH]),sBin,oBin)),[],1); 19 | cFtr=cHogFtr; 20 | 21 | % Train character detectors specified in the param list 22 | % paramSet={train dataset,with/without neighboring chars, 23 | % bg dataset,# bg images,bootstrap} 24 | paramSets={{'icdar','charHard','icdar',5000,1},... 25 | {'synth','charHard','msrc',5000,1}}; 26 | 27 | fprintf('Training character classifiers.\n'); 28 | % Loop over param sets 29 | for p=1:length(paramSets) 30 | paramSet=paramSets{p}; 31 | trnD=paramSet{1}; trnT=paramSet{2}; trnBg=paramSet{3}; 32 | nBg=paramSet{4}; bs=paramSet{5}; 33 | cDir=fullfile(dPath,trnD,'clfs'); 34 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,'nBg',... 35 | nBg,'nTrn',nTrn}; 36 | cNm=chClfNm(clfPrms{:}); clfPath=fullfile(cDir,[cNm,'.mat']); 37 | newBg=[trnBg,'Bt']; 38 | 39 | fprintf('Working on: '); 40 | disp(paramSet); 41 | 42 | % load training images 43 | RandStream.getDefaultStream.reset(); 44 | [I,y]=readAllImgs(fullfile(dPath,trnD,'train',trnT),chC,nTrn,... 45 | fullfile(dPath,trnBg,'train'),nBg); 46 | x=fevalArrays(I,cFtr)'; 47 | % train char classifier 48 | RandStream.getDefaultStream.reset(); 49 | [ferns,yh]=fernsClfTrain(double(x),y,struct('S',S,'M',M,'thrr',thrr,'bayes',1)); 50 | fprintf('training error=%f\n',mean(y~=yh)); 51 | if(~exist(cDir,'dir')),mkdir(cDir); end 52 | save(clfPath,'ferns','sBin','oBin','chH'); 53 | fModel=[]; fModel.ferns=ferns; fModel.sBin=sBin; fModel.oBin=oBin; 54 | fModel.chH=chH; 55 | 56 | % bootstrap classifier if flag is on 57 | if ~bs, continue; end 58 | 59 | % copy base bg folder to new bootstrap folder 60 | fullBgD=fullfile(dPath,trnBg,'train','charBg'); 61 | fullNewBgD=fullfile(dPath,newBg,'train','charBg'); 62 | if(exist(fullNewBgD,'dir')), 63 | fprintf('Clearing out old hardnegative folder'); 64 | rmdir(fullNewBgD,'s'); 65 | end 66 | mkdir(fullNewBgD); 67 | copyfile(fullBgD,fullNewBgD); 68 | 69 | maxn=100; w=length(dir(fullfile(fullNewBgD,'*png'))); %<- starting index 70 | files=dir(fullfile(dPath,trnBg,'train','images','*.jpg')); files={files.name}; 71 | filesAnn=dir(fullfile(dPath,trnBg,'train','wordAnn','*.txt')); filesAnn={filesAnn.name}; 72 | % bootstrap 73 | ticId=ticStatus('Mining hard negatives',1,30,1); 74 | for f=1:length(files), 75 | I=imread(fullfile(dPath,trnBg,'train','images',files{f})); 76 | if ~isempty(filesAnn) 77 | gtBbs=bbGt('bbLoad',fullfile(dPath,trnBg,'train','wordAnn',filesAnn{f})); 78 | gtBbs1=reshape([gtBbs.bb],4,[])'; 79 | gtBbs1=[gtBbs1, zeros(size(gtBbs1,1),1)]; 80 | else 81 | gtBbs1=[]; 82 | end 83 | 84 | bbs=charDet(I,fModel,{'thr',0,'minH',.1}); 85 | bbs(:,6)=equivClass(bbs(:,6),ch); 86 | bbs=bbNms(bbs,'thr',0,'separate',0,'type','maxg',... 87 | 'resize',{1,1},'ovrDnm','union','overlap',.2,'maxn',inf); 88 | 89 | if(isempty(bbs)), continue; end 90 | P=bbGt('sampleWins',I,{'bbs',bbs,'n',maxn,'dims',[100 100],'ibbs',... 91 | gtBbs1,'thr',.01}); 92 | if(isempty(P)), continue; end 93 | P=cell2array(P); 94 | imwrite2(P,size(P,4)>1,w,fullNewBgD); 95 | w=w+size(P,4); 96 | tocStatus(ticId,f/length(files)); 97 | end 98 | 99 | % re-train again 100 | nBtBg=2*nBg; 101 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',newBg,... 102 | 'nBg',nBtBg,'nTrn',nTrn}; 103 | cNm=chClfNm(clfPrms{:}); 104 | RandStream.getDefaultStream.reset(); 105 | [I,y]=readAllImgs(fullfile(dPath,trnD,'train',trnT),chC,nTrn,... 106 | fullfile(dPath,newBg,'train'),nBtBg); 107 | x=fevalArrays(I,cFtr)'; 108 | % train char classifier 109 | RandStream.getDefaultStream.reset(); 110 | [ferns,yh]=fernsClfTrain(double(x),y,struct('S',S,'M',M,'thrr',thrr,'bayes',1)); 111 | fprintf('training error=%f\n',mean(y~=yh)); 112 | if(~exist(cDir,'dir')),mkdir(cDir); end 113 | save(fullfile(cDir,cNm),'ferns','sBin','oBin','chH'); 114 | end 115 | 116 | end -------------------------------------------------------------------------------- /training/trainRescore.m: -------------------------------------------------------------------------------- 1 | function [model,xmin,xmax,dtE]=trainRescore(dtw,dt,gtw,nFold,pNms,thr) 2 | % Train SVM to score words 3 | % 4 | % USAGE 5 | % [model,xmin,xmax,dtE]=trainRescore( dtw, dt, gtw, nFold, pNms, thr ) 6 | % 7 | % INPUTS 8 | % dtw - detected word objects 9 | % dt - detected bounding boxes 10 | % gtw - ground truth word objects 11 | % nFold - Number of folds for cross validation 12 | % pNms - additional word-level nms params 13 | % .type - ['none'] NMS type (currently just 'none' or 'maxg') 14 | % .thr - [-inf] word threshold 15 | % thr - overlap threshold 16 | % 17 | % OUTPUTS 18 | % model - trained SVM object 19 | % xmin - min values of every feature 20 | % xmax - max values of every feature 21 | % 22 | % CREDITS 23 | % Written and maintained by Kai Wang and Boris Babenko 24 | % Copyright notice: license.txt 25 | % Changelog: changelog.txt 26 | % Please email kaw006@cs.ucsd.edu if you have questions. 27 | 28 | RandStream.setDefaultStream(RandStream('mrg32k3a', 'Seed', sum('iccv11'))); 29 | n=length(gtw); fld=randint2(n,1,[1 nFold]); x=cell(n,1); y=x; 30 | 31 | for k=1:n, y{k}=zeros(length(dtw{k}),1); 32 | for j=1:length(dtw{k}), 33 | y{k}(j)=2*dt{k}(j,6)-1; 34 | if(j==1), x{k}=repmat(computeWordFeatures(dtw{k}(1)),length(dtw{k}),1); 35 | else x{k}(j,:)=computeWordFeatures(dtw{k}(j)); end 36 | end 37 | if(isempty(dtw{k})),x{k}=zeros(0,17); end 38 | end 39 | xmin=min(cat(1,x{:})); 40 | x=cellfun(@(x1)bsxfun(@minus,x1,xmin),x,'UniformOutput',0); 41 | xmax=max(cat(1,x{:})); x=cellfun(@(x1)bsxfun(@times,x1,1./xmax),x,'UniformOutput',0); 42 | 43 | Cs=[5e-1 1e0 5e0 1e1 5e1 1e2 5e2 1e3 5e3]; 44 | fs=zeros(length(Cs),nFold); 45 | for c=1:length(Cs) 46 | for f=1:nFold, 47 | xtr=cat(1,x{fld~=f}); ytr=cat(1,y{fld~=f}); 48 | fprintf('C=%i, fold=%i, numpos=%i\n',Cs(c),f,sum(ytr==1)); 49 | prm=sprintf('-c %i -t 1 -d 2 -w1 %.3f -w-1 %.3f',Cs(c),1,1); 50 | model=svmtrain(ytr,xtr,prm); 51 | fs(c,f)=evalF(dtw(fld==f),gtw(fld==f),model,xmin,xmax,pNms,thr); 52 | fprintf(1,sprintf('PRMS: %s\nF=%.3f\n',prm,fs(c,f))); 53 | end 54 | end 55 | disp(fs); 56 | fs=mean(fs,2); [~,i]=max(fs); 57 | xtr=cat(1,x{:}); ytr=cat(1,y{:}); 58 | prm=sprintf('-c %i -t 1 -d 2 -w1 %.3f -w-1 %.3f',Cs(i),1,1); 59 | fprintf(1,['TRAINING WITH FINAL PRMS:\n' prm '\n']); 60 | model=svmtrain(ytr,xtr,prm); 61 | [f,dtE]=evalF(dtw,gtw,model,xmin,xmax,pNms,thr); 62 | fprintf(1,'training fscore=%.3f\n',f); 63 | end 64 | 65 | function [f,dt1]=evalF(dt,gt,model,xmin,xmax,pNms,thr) 66 | % run nms on all detections and then compute f-score 67 | pNms.clf={xmin,xmax,model}; 68 | dt1=dt; for k=1:length(dt), dt1{k}=wordNms(dt{k},pNms); end 69 | gtE=gt; dtE=dt; 70 | for k=1:length(gt) 71 | [gtE{k},dtE{k},~,dt1{k}]=evalReading1(gt{k},dt1{k},thr,0); 72 | for j=1:length(dt1{k}), dt1{k}(j).cor=dtE{k}(j,6); end 73 | end 74 | [xs,ys]=bbGt('compRoc', gtE, dtE, 0); f=Fscore(xs,ys); 75 | end 76 | 77 | function [gt, dt, gt0, dt0] = evalReading1( gt0, dt0, thr, mul ) 78 | % check inputs 79 | if(nargin<3 || isempty(thr)), thr=.5; end 80 | if(nargin<4 || isempty(mul)), mul=0; end 81 | nd=length(dt0); ng=length(gt0); 82 | for g=1:ng, gt0(g).det=0; gt0(g).read=0; end 83 | if(ng==0), gt=zeros(0,5); else 84 | [~,ord]=sort([gt0.ign],'ascend'); gt0=gt0(ord,:); 85 | gt=reshape([gt0.bb],4,[])'; gt(:,5)=-[gt0.ign]; 86 | end 87 | if(nd==0), dt=zeros(0,6); else 88 | wbbs=reshape([dt0.bb],5,[])'; [~,ord]=sort(wbbs(:,5),'descend'); 89 | dt0=dt0(ord); 90 | dt=reshape([dt0.bb],5,[])'; dt(:,6)=0; 91 | end 92 | if(nd==0||ng==0), return; end 93 | 94 | % Attempt to match each (sorted) dt to each (sorted) gt 95 | for d=1:nd 96 | bstOa=thr; bstg=0; bstm=0; % info about best match so far 97 | for g=1:ng 98 | % if this gt already matched, continue to next gt 99 | m=gt(g,5); if( m==1 && ~mul ), continue; end 100 | % if dt already matched, and on ignore gt, nothing more to do 101 | if( bstm~=0 && m==-1 ), break; end 102 | % compute overlap area, continue to next gt unless better match made 103 | oa=bbGt('compOa',dt0(d).bb(1:4),gt0(g).bb(1:4),m==-1); 104 | if(oa=0 && ~strcmpi(gt0(g).lbl,dt0(d).word)), continue; end 107 | gt0(g).read=1; 108 | % match successful and best so far, store appropriately 109 | bstOa=oa; bstg=g; if(m==0), bstm=1; else bstm=-1; end 110 | end 111 | % store type of match for both dt and gt 112 | if(bstm~=0), gt(bstg,5)=bstm; dt(d,6)=bstm; end 113 | end 114 | end 115 | -------------------------------------------------------------------------------- /training/trainWdClfs.m: -------------------------------------------------------------------------------- 1 | function trainWdClfs 2 | % Train word-level classifiers (SVM); re-score words 3 | % 4 | % CREDITS 5 | % Written and maintained by Kai Wang and Boris Babenko 6 | % Copyright notice: license.txt 7 | % Changelog: changelog.txt 8 | % Please email kaw006@cs.ucsd.edu if you have questions. 9 | 10 | [dPath,ch,ch1,chC,chClfNm]=globals; 11 | RandStream.getDefaultStream.reset(); 12 | 13 | S=6; M=256; nTrn=Inf; 14 | trnT='charHard'; trnBg='msrcBt'; nBg=10000; 15 | clfPrms={'S',S,'M',M,'trnT',trnT,'bgDir',trnBg,'nBg',nBg,'nTrn',nTrn}; 16 | cNm=chClfNm(clfPrms{:}); 17 | 18 | % -- paramSet={dataset, test split, lexicon dir, results dir} 19 | paramSets={{'svt','test','lex',fullfile('res-synth')}%,... 20 | %{'icdar','test','lex50',fullfile('res-swtPad','res-synth')},... 21 | %{'icdar','test','lex50',fullfile('res-synth')} 22 | }; 23 | 24 | nFold=5; evalThr=.5; 25 | pNms=struct('thr',-inf,'ovrDnm','min','overlap',.5); 26 | saveRes=@(f,words)save(f,'words'); 27 | 28 | for p=1:length(paramSets) 29 | paramSet=paramSets{p}; 30 | tstD=paramSet{1}; tstSpl=paramSet{2}; 31 | lexD=paramSet{3}; resDir=paramSet{4}; 32 | 33 | iDir=fullfile(dPath,tstD,tstSpl,'images'); 34 | gtDir=fullfile(dPath,tstD,tstSpl,'wordAnn'); 35 | dtDir=fullfile(dPath,tstD,tstSpl,resDir,cNm,'images'); 36 | 37 | pNms1=pNms; pNms1.type='none'; 38 | % training directories 39 | gtDirTr=fullfile(dPath,tstD,'train','wordAnn'); 40 | dtDirTr=fullfile(dPath,tstD,'train',resDir,cNm,'images'); 41 | lexDirTr=fullfile(dPath,tstD,'train',lexD); 42 | evalPrmTr={'thr',evalThr,'imDir',iDir,'f0',1,'f1',inf,... 43 | 'lexDir',lexDirTr,'pNms',pNms1}; 44 | 45 | % directory to save to after re-scoring 46 | outDir=fullfile(dPath,tstD,tstSpl,[resDir,'-svm'],cNm,'images'); 47 | if(~exist(outDir,'dir')), mkdir(outDir); end 48 | 49 | % eval on training set 50 | [gtT,dtT,gtwT,dtwT]=evalReading(gtDirTr,dtDirTr,evalPrmTr); 51 | 52 | [xs,ys]=bbGt('compRoc', gtT, dtT, 0); 53 | Fscore(xs,ys) 54 | % train 55 | pNms1=pNms; pNms1.type='max'; 56 | [model,xmin,xmax]=trainRescore(dtwT,dtT,gtwT,nFold,pNms1,evalThr); 57 | pNms1.clf={xmin,xmax,model}; pNms1.type='none'; 58 | 59 | % apply svm to re-score test set 60 | files=dir(fullfile(gtDir,'*.txt')); files={files.name}; 61 | for i=1:length(files), fname=[files{i}(1:end-8),'.mat']; 62 | dtNm=fullfile(dtDir,fname); 63 | if(~exist(dtNm,'file')), dta=[]; else res=load(dtNm); dta=res.words; end 64 | % TODO: fix issue with signs of word scores (very confusing) 65 | for j=1:length(dta), dta(j).bb(:,5)=-dta(j).bb(:,5); end 66 | dta=wordNms(dta,pNms1); 67 | for j=1:length(dta), dta(j).bb(:,5)=-dta(j).bb(:,5); end 68 | saveRes(fullfile(outDir,fname),dta); 69 | end 70 | 71 | % save SVM 72 | save(fullfile(outDir,'..','wordSvm'),'pNms1'); 73 | end 74 | 75 | -------------------------------------------------------------------------------- /trainscript.m: -------------------------------------------------------------------------------- 1 | % script to train the mixtures of the characters 2 | configs=configsgen; 3 | tic; trainMixtures(configs.alphabets); toc; -------------------------------------------------------------------------------- /visualization/charDetDraw.m: -------------------------------------------------------------------------------- 1 | function hs = charDetDraw( bb, ch, lw, ls ) 2 | % Draw character bounding boxes 3 | % 4 | % USAGE 5 | % charDetDraw( bb, ch, lw, ls ) 6 | % 7 | % INPUTS 8 | % bb - character bounding boxes 9 | % ch - mapping classid to character 10 | % lw - [2] line width 11 | % ls - [-] line style 12 | % 13 | % OUTPUTS 14 | % hs - handle 15 | % 16 | % CREDITS 17 | % Written and maintained by Kai Wang and Boris Babenko 18 | % Copyright notice: license.txt 19 | % Changelog: changelog.txt 20 | % Please email kaw006@cs.ucsd.edu if you have questions. 21 | 22 | [n,m]=size(bb); if(n==0), hs=[]; return; end 23 | if(nargin<3 || isempty(lw)), lw=2; end 24 | if(nargin<4 || isempty(ls)), ls='-'; end 25 | % prepare display properties 26 | prop={'LineWidth' lw 'LineStyle' ls 'EdgeColor'}; 27 | tProp={'FontSize',10,'color','k','FontWeight','bold',... 28 | 'VerticalAlignment','bottom','BackgroundColor'}; 29 | 30 | hs=zeros(1,n); clrs=hsv(length(ch)); 31 | for b=1:n, hs(b)=rectangle('Position',bb(b,1:4),prop{:},clrs(bb(b,6),:)); end 32 | hs=[hs zeros(1,n)]; 33 | for b=1:n, hs(b+n)=text(bb(b,1),bb(b,2)+3,ch(bb(b,6)),tProp{:},clrs(bb(b,6),:)); end 34 | end -------------------------------------------------------------------------------- /visualization/displayTopDet.m: -------------------------------------------------------------------------------- 1 | function displayTopDet(bbs,N,configs,dfNP) 2 | for char_index=1:length(configs.alphabets) 3 | char_index 4 | bbs_char = bbs(bbs(:,6)==char_index,:); 5 | bbs_char = bbNms(bbs_char,dfNP); 6 | [~,sortorder] = sort(bbs_char(:,5),'descend'); 7 | td = min(N,size(bbs_char,1)); 8 | bbs_char = bbs_char(sortorder(1:td),:); 9 | charDetDraw(bbs_char,configs.alphabets); 10 | end 11 | end -------------------------------------------------------------------------------- /visualization/visualizeHOG.m: -------------------------------------------------------------------------------- 1 | function visualizeHOG(w) 2 | % Visualize HOG features/weights. 3 | % visualizeHOG(w) 4 | 5 | % Make pictures of positive and negative weights 6 | bs = 20; 7 | w = w(:,:,1:9); 8 | scale = max(max(w(:)),max(-w(:))); 9 | pos = HOGpicture(w, bs) * 255/scale; 10 | neg = HOGpicture(-w, bs) * 255/scale; 11 | 12 | % Put pictures together and draw 13 | buff = 10; 14 | pos = padarray(pos, [buff buff], 128, 'both'); 15 | if min(w(:)) < 0 16 | neg = padarray(neg, [buff buff], 128, 'both'); 17 | im = uint8([pos; neg]); 18 | else 19 | im = uint8(pos); 20 | end 21 | imagesc(im); 22 | colormap gray; 23 | axis equal; 24 | axis off; 25 | -------------------------------------------------------------------------------- /visualization/visualizeModel.m: -------------------------------------------------------------------------------- 1 | function visualizeModel(model,is_inverse) 2 | configs=configsgen; 3 | char_dims = model.char_dims; 4 | r = floor(char_dims(1)/configs.bin_size); 5 | c = floor(char_dims(2)/configs.bin_size); 6 | filter = reshape(model.w,[r,c,configs.n_orients*4]); 7 | if is_inverse 8 | ihog=invertHOG(filter); 9 | imshow(ihog) 10 | else 11 | visualizeHOG(filter) 12 | end 13 | end -------------------------------------------------------------------------------- /visualization/visualizeResults.m: -------------------------------------------------------------------------------- 1 | function total_images=visualizeResults(filename,frames,frames_smooth,indeces) 2 | configs=globals; 3 | extracted_frames = configs.extracted_frames; 4 | segments = regexp(filename,'\.','split'); 5 | video_name = sprintf('%s.%s',segments{1},segments{2}); 6 | path_to_frame_folder = fullfile(extracted_frames,video_name); 7 | 8 | max_frame = length(indeces); 9 | 10 | % Run the detection on this frame 11 | total_images = zeros(405,720,3,max_frame); 12 | total_images_s = zeros(405,720,3,max_frame); 13 | current_count = 0; 14 | for frame_index=indeces 15 | frame_index 16 | first_index = floor(frame_index/1000); 17 | second_index = floor(frame_index/100); 18 | path_to_frame = fullfile(path_to_frame_folder,... 19 | int2str(first_index),int2str(second_index),sprintf('%d.jpg',frame_index)); 20 | I = imread(path_to_frame); 21 | Is = imread(path_to_frame); 22 | 23 | bbs = frames{frame_index+1}; 24 | bbs_smooth = frames_smooth{frame_index+1}; 25 | if ~isempty(bbs) 26 | I = bbApply('embed',I,bbs); 27 | Is = bbApply('embed',Is,bbs_smooth); 28 | end 29 | 30 | total_images(:,:,:,current_count+1) = I; 31 | total_images_s(:,:,:,current_count+1) = Is; 32 | 33 | current_count = current_count + 1; 34 | end 35 | 36 | figure(1); montage(uint8(total_images),'Size',[1,max_frame]); 37 | figure(2); montage(uint8(total_images_s),'Size',[1,max_frame]); 38 | end 39 | -------------------------------------------------------------------------------- /visualization/wordDetDraw.m: -------------------------------------------------------------------------------- 1 | function wordDetDraw( words, showRank, showBbs, showText, col, ls, lw ) 2 | % Draw word bounding boxes 3 | % 4 | % USAGE 5 | % wordDetDraw( words, showRank, showBbs, showText, col, ls, lw ) 6 | % 7 | % INPUTS 8 | % words - array of word objects 9 | % showRank - [1] display the rank of detection 10 | % showBbs - [1] display the character bounding boxes 11 | % showText - [1] display the strings with bounding boxes 12 | % col - [0 1 0] vector for the color of bounding boxes 13 | % 14 | % CREDITS 15 | % Written and maintained by Kai Wang and Boris Babenko 16 | % Copyright notice: license.txt 17 | % Changelog: changelog.txt 18 | % Please email kaw006@cs.ucsd.edu if you have questions. 19 | 20 | if(nargin<2), showRank=1; end 21 | if(nargin<3), showBbs=1; end 22 | if(nargin<4), showText=1; end 23 | if(nargin<5), col=[0 1 0]; end 24 | if(nargin<6), ls='-'; end 25 | if(nargin<7), lw=2; end 26 | n=length(words); 27 | 28 | for i=1:n 29 | if(size(words(1).bb,2)==5) 30 | wbbs=reshape([words.bb],5,[])'; 31 | [~,ord]=sort(wbbs(:,5),'descend'); 32 | words=words(ord); 33 | end 34 | 35 | prop={'LineWidth' lw 'LineStyle' ls 'EdgeColor'}; 36 | tProp={'FontSize',8,'color','k'... 37 | 'VerticalAlignment','bottom','BackgroundColor'}; 38 | if(showBbs) 39 | for b=1:length(words), if(~isfield(words(b),'bbs')), continue; end 40 | bbs=words(b).bbs; alt=ones(1,size(bbs,1)); alt(1,2:2:length(alt))=2; 41 | bbApply('draw',bbs(:,1:4),[1-col; 1-col],2,'-',[],alt); 42 | end 43 | end 44 | for b=1:length(words), bb=words(b).bb; 45 | rectangle('Position',bb(1:4)+[-bb(3)*.05 0 bb(3)*.1 0],prop{:},col); 46 | end 47 | if(showText) 48 | for b=1:length(words), bb=words(b).bb; 49 | if(isfield(words(b),'word')), w=words(b).word; else 50 | w=words(b).lbl; end 51 | if(showRank && (size(words(b).bb,2)==5)) 52 | text(bb(1)-bb(3)*.05,bb(2)+3,... 53 | sprintf('%i:%s (%.2f)',b,w,words(b).bb(5)),tProp{:},col); 54 | else 55 | text(bb(1)-bb(3)*.05,bb(2)+3,w,tProp{:},col); 56 | end 57 | end 58 | end 59 | 60 | end -------------------------------------------------------------------------------- /wordNms.m: -------------------------------------------------------------------------------- 1 | function outWords=wordNms(words,varargin) 2 | % Word-level non maximal suppression 3 | % 4 | % USAGE 5 | % [outWords] = wordNms( words, varargin ) 6 | % 7 | % INPUTS 8 | % words - word structure before suppression 9 | % varargin - additional params 10 | % .type - ['none'] NMS type (currently just 'none' or 'maxg') 11 | % .thr - [-inf] word threshold 12 | % .overlap - [.5] overlap threshold 13 | % .overDnm - ['min'] area of overlap denominator ('union' or 'min') 14 | % .clf - [] SVM classifier 15 | % 16 | % OUTPUTS 17 | % outWords - rescored and thresholded words 18 | % 19 | % NOTES 20 | % incoming scores: small=good, big=bad 21 | % outgoing scores: small=bad, big=good 22 | % 23 | % CREDITS 24 | % Written and maintained by Kai Wang and Boris Babenko 25 | % Copyright notice: license.txt 26 | % Changelog: changelog.txt 27 | % Please email kaw006@cs.ucsd.edu if you have questions. 28 | 29 | dfs={'type','none','thr',-inf,'overlap',.5,'ovrDnm','min','clf',[]}; 30 | [type,thr,overlap,ovrDnm,clf]=getPrmDflt(varargin,dfs,1); 31 | if(isempty(words)), outWords=[]; return; end 32 | 33 | % SVM rescore 34 | if(~isempty(clf) && isfield(words(1),'bbs')) 35 | x=zeros(length(words),size(clf{3}.SVs,2)); 36 | for j=1:length(words), x(j,:)=computeWordFeatures(words(j)); end 37 | x=bsxfun(@minus,x,clf{1}); x=bsxfun(@times,x,1./clf{2}); 38 | [~,~,py]=svmpredict(zeros(size(x,1),1),x,clf{3}); 39 | py=py*clf{3}.Label(1); 40 | for j=1:length(words), words(j).bb(:,5)=py(j); end 41 | end 42 | 43 | if(~strcmp(type,'none')) 44 | % word NMS 45 | wbb=reshape([words.bb],5,[])'; 46 | assert(all(isfinite(wbb(:,5)))); 47 | kp=nms1(wbb,overlap,ovrDnm,strcmp(type,'maxg')); 48 | words=words(kp); 49 | wbb1=reshape([words.bb],5,[])'; 50 | outWords=words(wbb1(:,5)>thr); 51 | else 52 | wbb1=reshape([words.bb],5,[])'; 53 | outWords=words(wbb1(:,5)>thr); 54 | end 55 | end 56 | 57 | function [kp,rat]=nms1(bbs,overlap,ovrDnm,greedy) 58 | [~,ord]=sort(bbs(:,5),'descend'); bbs=bbs(ord,:); n=size(bbs,1); 59 | O=(compOas(bbs(:,1:4),bbs(:,1:4),strcmp(ovrDnm,'union'))>overlap)-eye(n); 60 | 61 | kp=true(n,1); rat=cell(n,1); [rat{:}]=deal(inf); 62 | for i=1:n 63 | if(~kp(i) && greedy), continue; end 64 | nbrs=O(i,:)>0; if(sum(nbrs)==0), continue; end 65 | v=max(bbs(nbrs,5)); 66 | if(~isempty(v)), rat{i}=bbs(i,5)-v; end 67 | kp(nbrs & (1:n)>i)=false; 68 | end 69 | kp(ord)=kp; rat(ord)=rat; 70 | end 71 | 72 | function oa = compOas(dt,gt,ovrDnm) 73 | m=size(dt,1); n=size(gt,1); oa=zeros(m,n); 74 | de=dt(:,[1 2])+dt(:,[3 4]); da=dt(:,3).*dt(:,4); 75 | ge=gt(:,[1 2])+gt(:,[3 4]); ga=gt(:,3).*gt(:,4); 76 | for i=1:m 77 | for j=1:n 78 | w=min(de(i,1),ge(j,1))-max(dt(i,1),gt(j,1)); if(w<=0), continue; end 79 | h=min(de(i,2),ge(j,2))-max(dt(i,2),gt(j,2)); if(h<=0), continue; end 80 | t=w*h; if(ovrDnm), u=da(i)+ga(j)-t; else u=min(da(i),ga(j)); end 81 | oa(i,j)=t/u; 82 | end 83 | end 84 | end -------------------------------------------------------------------------------- /wordSpot.m: -------------------------------------------------------------------------------- 1 | function [words,t1,t2,bbs]=wordSpot(I,lexS,fModel,wordSvm,nmsPrms,frnPrms,plxPrms) 2 | % Function for End-to-end word spotting function 3 | % 4 | % Full description can be found in: 5 | % "End-to-end Scene Text Recognition," 6 | % K. Wang, B. Babenko, and S. Belongie. ICCV 2011 7 | % 8 | % USAGE 9 | % [words1,words] = wordSpot( I, lexS ) 10 | % 11 | % INPUTS 12 | % I - input image 13 | % lexS - input lexicon, comma-separated string 14 | % fModel - trained Fern character classifier 15 | % wordSvm - trained Svm word classifier 16 | % nmsPrms - character-level non max suppression parameters (see bbNms.m) 17 | % frnPrms - fern parameters (see charDet.m) 18 | % plxPrms - plex parameters (see wordDet.m) 19 | % 20 | % OUTPUTS 21 | % words - array of word objects with no threshold 22 | % 23 | % CREDITS 24 | % Written and maintained by Kai Wang and Boris Babenko 25 | % Copyright notice: license.txt 26 | % Changelog: changelog.txt 27 | % Please email kaw006@cs.ucsd.edu if you have questions. 28 | 29 | [dPath,ch,ch1,chC,chClfNm,dfNP]=globals; 30 | 31 | if nargin<3, error('not enough params'); end 32 | if ~exist('wordSvm','var'), wordSvm={}; end 33 | if (~exist('nmsPrms','var') || isempty(nmsPrms)), nmsPrms=dfNP; end 34 | if ~exist('frnPrms','var'), frnPrms={}; end 35 | if ~exist('plxPrms','var'), plxPrms={}; end 36 | 37 | % construct trie 38 | lex=wordDet('build',lexS); 39 | % run character detector (Ferns) 40 | t1S=tic; bbs=charDet(I,fModel,frnPrms); t1=toc(t1S); 41 | % upper and lower case are equivalent 42 | bbs(:,6)=equivClass(bbs(:,6),ch); 43 | % character NMS 44 | bbs=bbNms(bbs,nmsPrms); 45 | 46 | % run word detector (PLEX) 47 | t2S=tic; words=wordDet('plexApply',bbs,ch1,lex,plxPrms); t2=toc(t2S); 48 | if ~isempty(wordSvm) 49 | % if available, score using SVM 50 | fprintf('Rescoring...\n'); 51 | words=wordNms(words,wordSvm); 52 | fprintf('Done'); 53 | end 54 | 55 | --------------------------------------------------------------------------------