├── LICENSE ├── PlaneSegmentation.m ├── README.md ├── SegmentProposal.m ├── Superpixel2Bbox.m ├── ext ├── EGBS3D │ ├── COPYING │ ├── Makefile │ ├── README │ ├── beach.jpg │ ├── cameraman.jpg │ ├── convolve.h │ ├── disjoint-set.h │ ├── filter.h │ ├── graphseg.cpp │ ├── graphseg.mexa64 │ ├── igraphseg.m │ ├── image.h │ ├── images.jpg │ ├── imconv.h │ ├── imutil.h │ ├── m_segmentWrapper.m │ ├── misc.h │ ├── pnmfile.h │ ├── segment-graph.h │ ├── segment-image.h │ ├── segment.cpp │ └── tmp_test.m ├── YAEL │ ├── Kmeans++.pdf │ ├── NIPS2011_1271.pdf │ ├── b2fvecs_read.m │ ├── bvecs_read.m │ ├── bvecs_size.m │ ├── bvecs_write.m │ ├── fvec_read.m │ ├── fvec_write.m │ ├── fvecs_read.m │ ├── fvecs_size.m │ ├── fvecs_write.m │ ├── gmm_read.m │ ├── ivec_read.m │ ├── ivec_write.m │ ├── ivecs_read.m │ ├── ivecs_size.m │ ├── ivecs_write.m │ ├── nips2011_shindler_largedatasets_01.pdf │ ├── uint8tobit.m │ ├── yael_L2sqr.m │ ├── yael_L2sqr.mexa64 │ ├── yael_cross_distances.m │ ├── yael_cross_distances.mexa64 │ ├── yael_eigs.m │ ├── yael_eigs.mexa64 │ ├── yael_fisher.m │ ├── yael_fisher.mexa64 │ ├── yael_fvecs_normalize.m │ ├── yael_fvecs_normalize.mexa64 │ ├── yael_getting_started_v300.pdf │ ├── yael_gmm.m │ ├── yael_gmm.mexa64 │ ├── yael_kmax.m │ ├── yael_kmax.mexa64 │ ├── yael_kmeans.m │ ├── yael_kmeans.mexa64 │ ├── yael_kmin.m │ ├── yael_kmin.mexa64 │ ├── yael_nn.m │ ├── yael_nn.mexa64 │ ├── yael_refman_v300.pdf │ ├── yael_svds.m │ ├── yael_svds.mexa64 │ └── yael_v300.tar.gz ├── m_Grabcut │ ├── GraphCut.m │ ├── GraphCut3dConstr.mexa64 │ ├── GraphCutConstr.mexa64 │ ├── GraphCutConstrSparse.mexa64 │ ├── GraphCutMex.mexa64 │ ├── m_GM_logPL.m │ ├── m_GrabCut_GUI.m │ ├── m_Grabcut.m │ ├── m_GraphCut.m │ ├── m_Unary_LogPL.m │ ├── m_assignGMM2pixels.m │ ├── m_calcNwt.m │ ├── m_init_GMMs.m │ └── m_learnGMMs.m ├── m_Grabcut_3D │ ├── GraphCut.m │ ├── GraphCut3dConstr.mexa64 │ ├── GraphCutConstr.mexa64 │ ├── GraphCutConstrSparse.mexa64 │ ├── GraphCutMex.mexa64 │ ├── m_GM_logPL_3D.m │ ├── m_GrabCut_GUI.m~ │ ├── m_GrabCut_GUI_3D.m │ ├── m_Grabcut_3D.m │ ├── m_GraphCut_3D.m │ ├── m_Unary_LogPL.m~ │ ├── m_Unary_LogPL_3D.m │ ├── m_assignGMM2pixels_3D.m │ ├── m_calcNwt_3D.m │ ├── m_init_GMMs_3D.m │ └── m_learnGMMs_3D.m └── toolbox_nyu_depth_v2 │ ├── README │ ├── apply_distortion.m │ ├── camera_params.m │ ├── cbf.cpp │ ├── cbf.h │ ├── cbf_windows.cpp │ ├── cbf_windows.h │ ├── compile.m │ ├── crop_image.m │ ├── demo_fill_depth_cross_bf_test.m │ ├── demo_project_depth_map.m │ ├── demo_synched_projected_frames.m │ ├── depth_plane2depth_world.m │ ├── depth_rel2depth_abs.m │ ├── depth_world2rgb_world.m │ ├── fill_depth_colorization.m │ ├── fill_depth_cross_bf.m │ ├── get_accel_data.cpp │ ├── get_accel_data.m │ ├── get_instance_masks.m │ ├── get_projection_mask.m │ ├── get_rgb_depth_overlay.m │ ├── get_scene_type_from_scene.m │ ├── get_synched_frames.m │ ├── get_timestamp_from_filename.m │ ├── mex_cbf.cpp │ ├── mex_cbf.mexa64 │ ├── mex_cbf_windows.cpp │ ├── project_depth_map.m │ ├── rgb_plane2rgb_world.m │ ├── rgb_world2rgb_plane.m │ ├── undistort.m │ └── undistort_depth.m └── src ├── eval ├── GetMasksGT.m ├── eval_BBs.m ├── eval_segments.m ├── m_BB_VS_GT.m ├── m_Jaccard_bbox.m ├── m_SEG_VS_GT.m ├── m_eval_segments.m~ └── overlap_care.mexa64 ├── planeDet ├── CalcNormals.m ├── GetCameraMatrix.m └── PlanesDet.m ├── segmentations ├── BBfromDPs.m ├── BBfromMPRs.m ├── BBfromNPRs.m ├── BBfromPRs.m ├── Depth2PCD.m ├── FindBorderPixels.m ├── GraphBasedSegmentation.m ├── HierClustering.m ├── Label2Mask.m ├── Mask2Bbox.m ├── Mask2Cell.m ├── NormalVectorGradient.mexa64 ├── RemoveBadBbox.m ├── RemoveDupBbox.m ├── RemoveDupGCxD.m ├── RemoveDupSeg.m ├── Rgb2Lab.m ├── WatershedSegmentation.m ├── Watershed_region.mexa64 ├── m_BB2mask.m ├── m_Normalize.m ├── m_any2gray.m ├── m_classify_planes.m ├── m_mask2bbox.m ├── m_mask5GC3D_cell.m ├── m_mask5GC_cell.m ├── m_pcd_clustering.out └── m_rescale_bbox.m ├── util └── ParSave.m └── vis ├── Label2Rgb.m └── mat2PCDfile.m /PlaneSegmentation.m: -------------------------------------------------------------------------------- 1 | % Zhuo Deng 2 | % 08/27/2015 3 | % script for plane segmentation 4 | 5 | close all; 6 | addpath('ext/toolbox_nyu_depth_v2'); 7 | addpath('src/planeDet'); 8 | addpath('src/util'); 9 | addpath('src/vis'); 10 | 11 | 12 | % load split data 13 | var = load('data/nyuv2/nyusplits.mat'); 14 | set_type = 'test'; 15 | if strcmp(set_type, 'test') 16 | imlist = var.tst - 5000; 17 | else 18 | imlist = var.trainval - 5000; 19 | end 20 | 21 | % create result path 22 | res_path = 'result/nyuv2/planes'; 23 | if ~exist(res_path, 'dir') 24 | mkdir(res_path); 25 | end 26 | data_path = 'data/nyuv2'; 27 | 28 | % load intrinsic matrix 29 | K = GetCameraMatrix(); 30 | 31 | 32 | parfor i = 1 : numel(imlist) 33 | fprintf('processing image %d\n', i); 34 | 35 | if exist(fullfile(res_path, [num2str(imlist(i)), '.mat']), 'file') 36 | fprintf('file exists, skip ...\n'); 37 | continue; 38 | end 39 | 40 | % load color image 41 | I = imread(fullfile(data_path, 'color_crop', [num2str(imlist(i)), '.jpg'])); 42 | %figure; imshow(I); title('color image'); 43 | 44 | % raw depth 45 | var = load(fullfile(data_path, 'rawDepth_crop', [num2str(imlist(i)), '.mat'])); 46 | rawDepth = var.rawDepth; 47 | 48 | % gravity aligned pcd 49 | var = load(fullfile(data_path, 'pcd_align_crop', [num2str(imlist(i)), '.mat'])); 50 | pcd = var.points; 51 | 52 | % plane detection 53 | % Note: pcd should have cm unit as input 54 | Pinfo = PlanesDet(pcd, rawDepth); 55 | 56 | % save data and visualization 57 | ParSave(fullfile(res_path, [num2str(imlist(i)), '.mat']), Pinfo); 58 | vis = Label2Rgb(Pinfo.planesMap); 59 | imwrite(vis,fullfile(res_path, [num2str(imlist(i)), '.png'])); 60 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RGBD-object-propsal 2 | 3 | This project addresses the problem of automatically generating high quality class independent object bounding boxes and segmentations using color and depth images of indoor scenes. 4 | The software is licensed under the GNU General Public License. 5 | If you use this project for your research, please cite: 6 | 7 | @article{deng2016unsupervised, 8 | title={Unsupervised object region proposals for RGB-D indoor scenes}, 9 | author={Deng, Zhuo and Todorovic, Sinisa and Latecki, Longin Jan}, 10 | journal={Computer Vision and Image Understanding}, 11 | year={2016}, 12 | publisher={Elsevier} 13 | } 14 | 15 | 1 Get the NYU data: 16 | 17 | wget http://www.cis.temple.edu/~latecki/TestData/NYUv2data.zip 18 | 2 Get precomputed results (plane segmentations, bounding boxes, object segments): 19 | 20 | wget http://www.cis.temple.edu/~latecki/TestData/NYUv2result.zip 21 | -------------------------------------------------------------------------------- /SegmentProposal.m: -------------------------------------------------------------------------------- 1 | % propose object segments for each RGB-D image in SUN RGB-D dataset. 2 | % zhuo deng 3 | % 09/02/2015 4 | 5 | addpath('ext/toolbox_nyu_depth_v2'); 6 | addpath('src/planeDet'); 7 | addpath('ext/m_Grabcut'); 8 | addpath('ext/m_Grabcut_3D'); 9 | addpath('ext/YAEL'); 10 | addpath('src/segmentations'); 11 | 12 | % load split data 13 | var = load('data/nyuv2/nyusplits.mat'); 14 | set_type = 'test'; 15 | if strcmp(set_type, 'test') 16 | imlist = var.tst - 5000; 17 | else 18 | imlist = var.trainval - 5000; 19 | end 20 | 21 | % result path 22 | res_path ='result/nyuv2/Seg'; 23 | if ~exist(res_path, 'dir') 24 | mkdir(res_path); 25 | end 26 | data_path = 'data/nyuv2'; 27 | bb_path = 'result/nyuv2/BB'; 28 | if ~exist(bb_path, 'dir') 29 | mkdir(bb_path); 30 | end 31 | 32 | Kmat = GetCameraMatrix(); 33 | for i = 1 % numel(imlist) 34 | 35 | if exist(fullfile(res_path, [num2str(imlist(i)), '.mat']), 'file') 36 | fprintf('file exists, skip ...\n'); 37 | continue; 38 | end 39 | 40 | % load proposed bbox 41 | var = load(fullfile('result/nyuv2/BB_init', [num2str(imlist(i)), '.mat'])); 42 | BB = var.BB; 43 | 44 | % load rgb and rawDepth (meters) 45 | I = imread(fullfile(data_path, 'color_crop', [num2str(imlist(i)), '.jpg'])); 46 | var = load(fullfile(data_path, 'rawDepth_crop', [num2str(imlist(i)), '.mat'])); 47 | rawDepth = var.rawDepth; 48 | [h, w] = size(rawDepth); 49 | 50 | % fill holes based on color 51 | rD = rawDepth; 52 | rD(isnan(rawDepth)) = 0; 53 | depth_fill = fill_depth_colorization(I, double(rD)); 54 | 55 | pcd = Depth2PCD(depth_fill) * 100; 56 | 57 | % GC2D 58 | disp('Run GrabCut (RGB) ...'); 59 | seg_GC2D = m_mask5GC_cell(I, BB, true); 60 | 61 | % GC3D 62 | disp('Run GrabCut (RGB-D) ...'); 63 | seg_GC3D = m_mask5GC3D_cell(I, pcd, BB, true); 64 | 65 | % load Watershed masks 66 | var = load(fullfile('cache/sp', num2str(imlist(i)), 'WSMasks_c.mat')); 67 | seg_WS = var.masksWS_cell; 68 | 69 | % MS 70 | K = [300, 500]; MIN = 200; sigma = 0.5; 71 | seg_MS = GraphBasedSegmentation( I, pcd, K, MIN, sigma); 72 | 73 | % DP 74 | var = load(fullfile('cache/sp', num2str(imlist(i)), 'DPMasks_c.mat')); 75 | seg_DP = var.masksDP_cell; 76 | 77 | % remove duplicated 78 | seg = RemoveDupGCxD(seg_GC2D, seg_GC3D, [h, w]); 79 | clear seg_GC2D seg_GC3D; 80 | segCells = cat(1, seg, seg_DP, seg_WS, seg_MS); 81 | clear seg_DP seg_WS seg_MS seg; 82 | fprintf('number of segs (before): %d\n', numel(segCells)); 83 | seg_Full = RemoveDupSeg(segCells, [h, w]); 84 | fprintf('number of segs (after): %d\n', numel(seg_Full)); 85 | save(fullfile(res_path, [num2str(imlist(i)), '.mat']), 'seg_Full', '-v7.3'); 86 | 87 | % 88 | N = numel(segCells); 89 | BB_Full_seg = zeros(N,4); 90 | for j = 1 : N 91 | tmp = zeros(h, w); 92 | tmp(segCells{j}) = 1; 93 | BB_Full_seg(j,:) = m_mask2bbox(tmp); 94 | end 95 | save(fullfile(bb_path, [num2str(imlist(i)), '.mat']), 'BB_Full_seg', '-v7.3'); 96 | 97 | end 98 | 99 | -------------------------------------------------------------------------------- /Superpixel2Bbox.m: -------------------------------------------------------------------------------- 1 | % bounding boxes proposal from superpixels 2 | % zhuo deng 3 | % 08/31/2015 4 | 5 | close all; 6 | 7 | addpath('src/segmentations'); 8 | addpath('ext/EGBS3D'); 9 | 10 | % load split data 11 | var = load('data/nyuv2/nyusplits.mat'); 12 | set_type = 'test'; 13 | if strcmp(set_type, 'test') 14 | imlist = var.tst - 5000; 15 | else 16 | imlist = var.trainval - 5000; 17 | end 18 | 19 | % result path 20 | res_path ='result/nyuv2/BB_init'; 21 | if ~exist(res_path, 'dir') 22 | mkdir(res_path); 23 | end 24 | data_path = 'data/nyuv2'; 25 | 26 | sp_path = 'cache/sp'; 27 | if ~exist(sp_path, 'dir') 28 | mkdir(sp_path); 29 | end 30 | 31 | % load intrinsic matrix 32 | Kmat = GetCameraMatrix(); 33 | 34 | parfor i = 1 : 1 %1 : numel(imlist) 35 | fprintf('processing image %d\n', i); 36 | 37 | if exist(fullfile(res_path, [num2str(imlist(i)), '.mat']), 'file') 38 | fprintf('file exists, skip ...\n'); 39 | continue; 40 | end 41 | 42 | if ~exist(fullfile(sp_path, num2str(imlist(i))), 'dir') 43 | mkdir(fullfile(sp_path, num2str(imlist(i)))); 44 | end 45 | 46 | % load color image, aligned points 47 | I = imread(fullfile(data_path, 'color_crop', [num2str(imlist(i)), '.jpg'])); 48 | [h, w, d] = size(I); 49 | 50 | % raw depth 51 | var = load(fullfile(data_path, 'rawDepth_crop', [num2str(imlist(i)), '.mat'])); 52 | rawDepth = var.rawDepth; 53 | 54 | % gravity aligned pcd 55 | var = load(fullfile(data_path, 'pcd_align_crop', [num2str(imlist(i)), '.mat'])); 56 | pcd = var.points; 57 | 58 | %% multi-scale graph based segmentations 59 | % parameters 60 | K = [100, 300, 500]; MIN = 200; sigma = 0.5; 61 | 62 | % collect masks 63 | masks_cell = GraphBasedSegmentation( I, pcd, K, MIN, sigma); 64 | 65 | % load plane detections 66 | var = load (fullfile('result/nyuv2/planes', [num2str(imlist(i)), '.mat'])); 67 | Pinfo = var.Pinfo; 68 | planesMap = Pinfo.planesMap; 69 | planes = Pinfo.planes; 70 | inliers = Pinfo.inliers; 71 | 72 | % watershed 73 | rD = rawDepth; 74 | rD(isnan(rawDepth)) = 0; 75 | depth_fill = fill_depth_colorization(I, double(rD)); 76 | masksWS_cell = WatershedSegmentation(I, rawDepth, depth_fill); 77 | 78 | % bounding boxes from non-planar regions 79 | [bbox_np, ~] = BBfromNPRs(masks_cell, masksWS_cell, planesMap); 80 | BB1 = m_rescale_bbox(bbox_np, [h,w], 1.3); 81 | bbox_np = cat(1, bbox_np, BB1); 82 | ParSave(fullfile(sp_path, num2str(imlist(i)),'WSMasks_c.mat'), masksWS_cell); 83 | 84 | % big region proposals from planes 85 | [isV, isH, isB] = m_classify_planes(planes, pcd); 86 | bbox_b = BBfromMPRs(inliers(~isB), pcd); 87 | 88 | % object on vertical and horizontal plane proposals 89 | tmp = []; 90 | for j = 1:numel(K) 91 | [mapColor, ~] = m_segmentWrapper(I, nan(size(I)), K(j), MIN, sigma); 92 | tmp = cat(1, tmp, Label2Mask(mapColor)); 93 | end 94 | [mapColor, ~] = m_segmentWrapper(I, nan(size(I)), 300, 200, 0.2); 95 | tmp = cat(1, tmp, Label2Mask(mapColor), masksWS_cell); 96 | [bbox_p, ~ ] = BBfromPRs (tmp, [h, w], inliers); 97 | 98 | 99 | % hierarchical clustering 100 | clusterTolerance = [2, 5, 10]; 101 | [bbox_hc, masksHC_cell] = HierClustering(pcd, clusterTolerance, inliers, isV, isH, isB, i); 102 | BB1 = m_rescale_bbox(bbox_hc, [h,w], 1.3); 103 | bbox_hc = cat(1, bbox_hc, BB1); 104 | 105 | % detected plane proposals 106 | [bbox_dp, masksDP] = BBfromDPs(inliers, [h, w]); 107 | masksDP_cell = Mask2Cell(masksDP); 108 | ParSave(fullfile(sp_path, num2str(imlist(i)),'DPMasks_c.mat'), masksDP_cell); 109 | masksNPR_cell = []; 110 | 111 | 112 | % all bbox 113 | BB = cat(1, bbox_np, bbox_b, bbox_p, bbox_hc, bbox_dp); 114 | validBB = (BB(:,3) > 1) & (BB(:,4) >1); 115 | BB = BB(validBB, :); 116 | area = BB(:,3).*BB(:,4); 117 | [~, ind] = sort(area, 'descend'); 118 | BB = BB(ind, :); 119 | [BB, ~] = RemoveDupBbox(BB, 0.98); 120 | 121 | % save 122 | ParSave(fullfile(res_path, [num2str(imlist(i)), '.mat']), BB); 123 | end 124 | -------------------------------------------------------------------------------- /ext/EGBS3D/Makefile: -------------------------------------------------------------------------------- 1 | INCDIR = -I. 2 | DBG = -g 3 | OPT = -O3 4 | CPP = g++ 5 | CFLAGS = $(DBG) $(OPT) $(INCDIR) 6 | LINK = -lm 7 | 8 | .cpp.o: 9 | $(CPP) $(CFLAGS) -c $< -o $@ 10 | 11 | all: graphseg 12 | 13 | igraphseg: graphseg.cpp segment-image.h segment-graph.h disjoint-set.h 14 | mex -I. graphseg.cpp 15 | 16 | 17 | segment: segment.cpp segment-image.h segment-graph.h disjoint-set.h 18 | $(CPP) $(CFLAGS) -o segment segment.cpp $(LINK) 19 | 20 | clean: 21 | /bin/rm -f segment *.o 22 | 23 | clean-all: clean 24 | /bin/rm -f *~ 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /ext/EGBS3D/README: -------------------------------------------------------------------------------- 1 | 2 | Implementation of the segmentation algorithm described in: 3 | 4 | Efficient Graph-Based Image Segmentation 5 | Pedro F. Felzenszwalb and Daniel P. Huttenlocher 6 | International Journal of Computer Vision, 59(2) September 2004. 7 | 8 | The program takes a color image (PPM format) and produces a segmentation 9 | with a random color assigned to each region. 10 | 11 | 1) Type "make" to compile "segment". 12 | 13 | 2) Run "segment sigma k min input output". 14 | 15 | The parameters are: (see the paper for details) 16 | 17 | sigma: Used to smooth the input image before segmenting it. 18 | k: Value for the threshold function. 19 | min: Minimum component size enforced by post-processing. 20 | input: Input image. 21 | output: Output image. 22 | 23 | Typical parameters are sigma = 0.5, k = 500, min = 20. 24 | Larger values for k result in larger components in the result. 25 | 26 | Get the source from http://people.cs.uchicago.edu/~pff/segment 27 | -------------------------------------------------------------------------------- /ext/EGBS3D/beach.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/EGBS3D/beach.jpg -------------------------------------------------------------------------------- /ext/EGBS3D/cameraman.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/EGBS3D/cameraman.jpg -------------------------------------------------------------------------------- /ext/EGBS3D/convolve.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* convolution */ 20 | 21 | #ifndef CONVOLVE_H 22 | #define CONVOLVE_H 23 | 24 | #include 25 | #include 26 | #include 27 | #include "image.h" 28 | 29 | /* convolve src with mask. dst is flipped! */ 30 | static void convolve_even(image *src, image *dst, 31 | std::vector &mask) { 32 | int width = src->width(); 33 | int height = src->height(); 34 | int len = mask.size(); 35 | 36 | for (int y = 0; y < height; y++) { 37 | for (int x = 0; x < width; x++) { 38 | float sum = mask[0] * imRef(src, x, y); 39 | for (int i = 1; i < len; i++) { 40 | sum += mask[i] * 41 | (imRef(src, std::max(x-i,0), y) + 42 | imRef(src, std::min(x+i, width-1), y)); 43 | } 44 | imRef(dst, y, x) = sum; 45 | } 46 | } 47 | } 48 | 49 | /* convolve src with mask. dst is flipped! */ 50 | static void convolve_odd(image *src, image *dst, 51 | std::vector &mask) { 52 | int width = src->width(); 53 | int height = src->height(); 54 | int len = mask.size(); 55 | 56 | for (int y = 0; y < height; y++) { 57 | for (int x = 0; x < width; x++) { 58 | float sum = mask[0] * imRef(src, x, y); 59 | for (int i = 1; i < len; i++) { 60 | sum += mask[i] * 61 | (imRef(src, std::max(x-i,0), y) - 62 | imRef(src, std::min(x+i, width-1), y)); 63 | } 64 | imRef(dst, y, x) = sum; 65 | } 66 | } 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /ext/EGBS3D/disjoint-set.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef DISJOINT_SET 20 | #define DISJOINT_SET 21 | 22 | // disjoint-set forests using union-by-rank and path compression (sort of). 23 | 24 | typedef struct { 25 | int rank; 26 | int p; 27 | int size; 28 | } uni_elt; 29 | 30 | class universe { 31 | public: 32 | universe(int elements); 33 | ~universe(); 34 | int find(int x); 35 | void join(int x, int y); 36 | int size(int x) const { return elts[x].size; } 37 | int num_sets() const { return num; } 38 | 39 | private: 40 | uni_elt *elts; 41 | int num; 42 | }; 43 | 44 | universe::universe(int elements) { 45 | elts = new uni_elt[elements]; 46 | num = elements; 47 | for (int i = 0; i < elements; i++) { 48 | elts[i].rank = 0; 49 | elts[i].size = 1; 50 | elts[i].p = i; 51 | } 52 | } 53 | 54 | universe::~universe() { 55 | delete [] elts; 56 | } 57 | 58 | int universe::find(int x) { 59 | int y = x; 60 | while (y != elts[y].p) 61 | y = elts[y].p; 62 | elts[x].p = y; 63 | return y; 64 | } 65 | 66 | void universe::join(int x, int y) { 67 | if (elts[x].rank > elts[y].rank) { 68 | elts[y].p = x; 69 | elts[x].size += elts[y].size; 70 | } else { 71 | elts[x].p = y; 72 | elts[y].size += elts[x].size; 73 | if (elts[x].rank == elts[y].rank) 74 | elts[y].rank++; 75 | } 76 | num--; 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /ext/EGBS3D/filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* simple filters */ 20 | 21 | #ifndef FILTER_H 22 | #define FILTER_H 23 | 24 | #include 25 | #include 26 | #include "image.h" 27 | #include "misc.h" 28 | #include "convolve.h" 29 | #include "imconv.h" 30 | 31 | #define WIDTH 4.0 32 | 33 | /* normalize mask so it integrates to one */ 34 | static void normalize(std::vector &mask) { 35 | int len = mask.size(); 36 | float sum = 0; 37 | for (int i = 1; i < len; i++) { 38 | sum += fabs(mask[i]); 39 | } 40 | sum = 2*sum + fabs(mask[0]); 41 | for (int i = 0; i < len; i++) { 42 | mask[i] /= sum; 43 | } 44 | } 45 | 46 | /* make filters */ 47 | #define MAKE_FILTER(name, fun) \ 48 | static std::vector make_ ## name (float sigma) { \ 49 | sigma = std::max(sigma, 0.01F); \ 50 | int len = (int)ceil(sigma * WIDTH) + 1; \ 51 | std::vector mask(len); \ 52 | for (int i = 0; i < len; i++) { \ 53 | mask[i] = fun; \ 54 | } \ 55 | return mask; \ 56 | } 57 | 58 | MAKE_FILTER(fgauss, exp(-0.5*square(i/sigma))); 59 | 60 | /* convolve image with gaussian filter */ 61 | static image *smooth(image *src, float sigma) { 62 | std::vector mask = make_fgauss(sigma); 63 | normalize(mask); 64 | 65 | image *tmp = new image(src->height(), src->width(), false); 66 | image *dst = new image(src->width(), src->height(), false); 67 | convolve_even(src, tmp, mask); 68 | convolve_even(tmp, dst, mask); 69 | 70 | delete tmp; 71 | return dst; 72 | } 73 | 74 | /* convolve image with gaussian filter */ 75 | image *smooth(image *src, float sigma) { 76 | image *tmp = imageUCHARtoFLOAT(src); 77 | image *dst = smooth(tmp, sigma); 78 | delete tmp; 79 | return dst; 80 | } 81 | 82 | /* compute laplacian */ 83 | static image *laplacian(image *src) { 84 | int width = src->width(); 85 | int height = src->height(); 86 | image *dst = new image(width, height); 87 | 88 | for (int y = 1; y < height-1; y++) { 89 | for (int x = 1; x < width-1; x++) { 90 | float d2x = imRef(src, x-1, y) + imRef(src, x+1, y) - 91 | 2*imRef(src, x, y); 92 | float d2y = imRef(src, x, y-1) + imRef(src, x, y+1) - 93 | 2*imRef(src, x, y); 94 | imRef(dst, x, y) = d2x + d2y; 95 | } 96 | } 97 | return dst; 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /ext/EGBS3D/graphseg.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/EGBS3D/graphseg.mexa64 -------------------------------------------------------------------------------- /ext/EGBS3D/igraphseg.m: -------------------------------------------------------------------------------- 1 | %%%%MatlabWrapperFromRoboticsVisionMatlabToolboxes 2 | %IGRAPHSEG Graph-based image segmentation 3 | % 4 | % L = IGRAPHSEG(IM, K, MIN) is a graph-based segmentation of the color 5 | % image IM (HxWx3). L (HxW) is an image where each element is the label 6 | % assigned to the corresponding pixel in IM. K is the scale parameter, 7 | % and a larger value indicates a preference for larger regions, MIN is the 8 | % minimum region size (pixels). 9 | % 10 | % L = IGRAPHSEG(IM, K, MIN, SIGMA) as above and SIGMA is the width of 11 | % a Gaussian which is used to initially smooth the image (default 0.5). 12 | % 13 | % [L,NREG] = IGRAPHSEG(IM, K, MIN, SIGMA) as above but NREG is the number of 14 | % regions found. 15 | % 16 | % Example:: 17 | % im = iread('58060.jpg'); 18 | % [labels,maxval] = igraphseg(im, 1500, 100, 0.5); 19 | % idisp(labels) 20 | % 21 | % Reference:: 22 | % "Efficient graph-based image segmentation", 23 | % P. Felzenszwalb and D. Huttenlocher, 24 | % Int. Journal on Computer Vision, 25 | % vol. 59, pp. 167–181, Sept. 2004. 26 | % 27 | % Notes:: 28 | % - Requires a color uint8 image. 29 | % - The hardwork is done by a MEX file in contrib/graphseg. 30 | % - With zero smoothing the number of regions can be massive and can crash 31 | % MATLAB. 32 | % 33 | % Author:: 34 | % Pedro Felzenszwalb, 2006. 35 | % 36 | % See also ITHRESH, IMSER. 37 | 38 | % wrapper function by Peter Corke 39 | 40 | function [L_,M_] = igraphseg(im, pts, K, mn, sigma) 41 | if size(im, 3) ~= 3 42 | error('MVTB:igraphseg:badarg', 'Input image must be color'); 43 | end 44 | 45 | if ~isa(im, 'uint8') 46 | error('MVTB:igraphseg:badarg', 'image must be of uint8 type'); 47 | end 48 | 49 | if nargin < 4 50 | error('MVTB:igraphseg:badarg', 'must specify image, pts, K, min'); 51 | end 52 | 53 | if nargin < 5 54 | sigma = 0.5; 55 | end 56 | 57 | [L,M] = graphseg(im, pts, K, mn, sigma); 58 | 59 | % labels are not sequential but sparsely distributed over a large 60 | % numeric range 61 | 62 | % not efficient!!! 63 | % uniqLabels = unique(L(:)); 64 | % % create a map 65 | % map=containers.Map(uniqLabels, 1:M); 66 | % L = cell2mat( values(map, num2cell(L) ) ); % map the values 67 | 68 | if nargout >= 1 69 | L_ = L; 70 | end 71 | if nargout > 1 72 | M_ = M; 73 | end -------------------------------------------------------------------------------- /ext/EGBS3D/image.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* a simple image class */ 20 | 21 | #ifndef IMAGE_H 22 | #define IMAGE_H 23 | 24 | #include 25 | 26 | template 27 | class image { 28 | public: 29 | /* create an image */ 30 | image(const int width, const int height, const bool init = true); 31 | 32 | /* delete an image */ 33 | ~image(); 34 | 35 | /* init an image */ 36 | void init(const T &val); 37 | 38 | /* copy an image */ 39 | image *copy() const; 40 | 41 | /* get the width of an image. */ 42 | int width() const { return w; } 43 | 44 | /* get the height of an image. */ 45 | int height() const { return h; } 46 | 47 | /* image data. */ 48 | T *data; 49 | 50 | /* row pointers. */ 51 | T **access; 52 | 53 | private: 54 | int w, h; 55 | }; 56 | 57 | /* use imRef to access image data. */ 58 | #define imRef(im, x, y) (im->access[y][x]) 59 | 60 | /* use imPtr to get pointer to image data. */ 61 | #define imPtr(im, x, y) &(im->access[y][x]) 62 | 63 | template 64 | image::image(const int width, const int height, const bool init) { 65 | w = width; 66 | h = height; 67 | data = new T[w * h]; // allocate space for image data 68 | access = new T*[h]; // allocate space for row pointers 69 | 70 | // initialize row pointers 71 | for (int i = 0; i < h; i++) 72 | access[i] = data + (i * w); 73 | 74 | if (init) 75 | memset(data, 0, w * h * sizeof(T)); 76 | } 77 | 78 | template 79 | image::~image() { 80 | delete [] data; 81 | delete [] access; 82 | } 83 | 84 | template 85 | void image::init(const T &val) { 86 | T *ptr = imPtr(this, 0, 0); 87 | T *end = imPtr(this, w-1, h-1); 88 | while (ptr <= end) 89 | *ptr++ = val; 90 | } 91 | 92 | 93 | template 94 | image *image::copy() const { 95 | image *im = new image(w, h, false); 96 | memcpy(im->data, data, w * h * sizeof(T)); 97 | return im; 98 | } 99 | 100 | #endif 101 | 102 | -------------------------------------------------------------------------------- /ext/EGBS3D/images.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/EGBS3D/images.jpg -------------------------------------------------------------------------------- /ext/EGBS3D/imconv.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* image conversion */ 20 | 21 | #ifndef CONV_H 22 | #define CONV_H 23 | 24 | #include 25 | #include "image.h" 26 | #include "imutil.h" 27 | #include "misc.h" 28 | 29 | #define RED_WEIGHT 0.299 30 | #define GREEN_WEIGHT 0.587 31 | #define BLUE_WEIGHT 0.114 32 | 33 | static image *imageRGBtoGRAY(image *input) { 34 | int width = input->width(); 35 | int height = input->height(); 36 | image *output = new image(width, height, false); 37 | 38 | for (int y = 0; y < height; y++) { 39 | for (int x = 0; x < width; x++) { 40 | imRef(output, x, y) = (uchar) 41 | (imRef(input, x, y).r * RED_WEIGHT + 42 | imRef(input, x, y).g * GREEN_WEIGHT + 43 | imRef(input, x, y).b * BLUE_WEIGHT); 44 | } 45 | } 46 | return output; 47 | } 48 | 49 | static image *imageGRAYtoRGB(image *input) { 50 | int width = input->width(); 51 | int height = input->height(); 52 | image *output = new image(width, height, false); 53 | 54 | for (int y = 0; y < height; y++) { 55 | for (int x = 0; x < width; x++) { 56 | imRef(output, x, y).r = imRef(input, x, y); 57 | imRef(output, x, y).g = imRef(input, x, y); 58 | imRef(output, x, y).b = imRef(input, x, y); 59 | } 60 | } 61 | return output; 62 | } 63 | 64 | static image *imageUCHARtoFLOAT(image *input) { 65 | int width = input->width(); 66 | int height = input->height(); 67 | image *output = new image(width, height, false); 68 | 69 | for (int y = 0; y < height; y++) { 70 | for (int x = 0; x < width; x++) { 71 | imRef(output, x, y) = imRef(input, x, y); 72 | } 73 | } 74 | return output; 75 | } 76 | 77 | static image *imageINTtoFLOAT(image *input) { 78 | int width = input->width(); 79 | int height = input->height(); 80 | image *output = new image(width, height, false); 81 | 82 | for (int y = 0; y < height; y++) { 83 | for (int x = 0; x < width; x++) { 84 | imRef(output, x, y) = imRef(input, x, y); 85 | } 86 | } 87 | return output; 88 | } 89 | 90 | static image *imageFLOATtoUCHAR(image *input, 91 | float min, float max) { 92 | int width = input->width(); 93 | int height = input->height(); 94 | image *output = new image(width, height, false); 95 | 96 | if (max == min) 97 | return output; 98 | 99 | float scale = UCHAR_MAX / (max - min); 100 | for (int y = 0; y < height; y++) { 101 | for (int x = 0; x < width; x++) { 102 | uchar val = (uchar)((imRef(input, x, y) - min) * scale); 103 | imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); 104 | } 105 | } 106 | return output; 107 | } 108 | 109 | static image *imageFLOATtoUCHAR(image *input) { 110 | float min, max; 111 | min_max(input, &min, &max); 112 | return imageFLOATtoUCHAR(input, min, max); 113 | } 114 | 115 | static image *imageUCHARtoLONG(image *input) { 116 | int width = input->width(); 117 | int height = input->height(); 118 | image *output = new image(width, height, false); 119 | 120 | for (int y = 0; y < height; y++) { 121 | for (int x = 0; x < width; x++) { 122 | imRef(output, x, y) = imRef(input, x, y); 123 | } 124 | } 125 | return output; 126 | } 127 | 128 | static image *imageLONGtoUCHAR(image *input, long min, long max) { 129 | int width = input->width(); 130 | int height = input->height(); 131 | image *output = new image(width, height, false); 132 | 133 | if (max == min) 134 | return output; 135 | 136 | float scale = UCHAR_MAX / (float)(max - min); 137 | for (int y = 0; y < height; y++) { 138 | for (int x = 0; x < width; x++) { 139 | uchar val = (uchar)((imRef(input, x, y) - min) * scale); 140 | imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); 141 | } 142 | } 143 | return output; 144 | } 145 | 146 | static image *imageLONGtoUCHAR(image *input) { 147 | long min, max; 148 | min_max(input, &min, &max); 149 | return imageLONGtoUCHAR(input, min, max); 150 | } 151 | 152 | static image *imageSHORTtoUCHAR(image *input, 153 | short min, short max) { 154 | int width = input->width(); 155 | int height = input->height(); 156 | image *output = new image(width, height, false); 157 | 158 | if (max == min) 159 | return output; 160 | 161 | float scale = UCHAR_MAX / (float)(max - min); 162 | for (int y = 0; y < height; y++) { 163 | for (int x = 0; x < width; x++) { 164 | uchar val = (uchar)((imRef(input, x, y) - min) * scale); 165 | imRef(output, x, y) = bound(val, (uchar)0, (uchar)UCHAR_MAX); 166 | } 167 | } 168 | return output; 169 | } 170 | 171 | static image *imageSHORTtoUCHAR(image *input) { 172 | short min, max; 173 | min_max(input, &min, &max); 174 | return imageSHORTtoUCHAR(input, min, max); 175 | } 176 | 177 | #endif 178 | -------------------------------------------------------------------------------- /ext/EGBS3D/imutil.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* some image utilities */ 20 | 21 | #ifndef IMUTIL_H 22 | #define IMUTIL_H 23 | 24 | #include "image.h" 25 | #include "misc.h" 26 | 27 | /* compute minimum and maximum value in an image */ 28 | template 29 | void min_max(image *im, T *ret_min, T *ret_max) { 30 | int width = im->width(); 31 | int height = im->height(); 32 | 33 | T min = imRef(im, 0, 0); 34 | T max = imRef(im, 0, 0); 35 | for (int y = 0; y < height; y++) { 36 | for (int x = 0; x < width; x++) { 37 | T val = imRef(im, x, y); 38 | if (min > val) 39 | min = val; 40 | if (max < val) 41 | max = val; 42 | } 43 | } 44 | 45 | *ret_min = min; 46 | *ret_max = max; 47 | } 48 | 49 | /* threshold image */ 50 | template 51 | image *threshold(image *src, int t) { 52 | int width = src->width(); 53 | int height = src->height(); 54 | image *dst = new image(width, height); 55 | 56 | for (int y = 0; y < height; y++) { 57 | for (int x = 0; x < width; x++) { 58 | imRef(dst, x, y) = (imRef(src, x, y) >= t); 59 | } 60 | } 61 | 62 | return dst; 63 | } 64 | 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /ext/EGBS3D/m_segmentWrapper.m: -------------------------------------------------------------------------------- 1 | function [map, N] = m_segmentWrapper( I, pts, K, MIN, SIGMA) 2 | %M_SEGMENTWRAPPER Summary of this function goes here 3 | % remove the relabel section of Peter Corke's function igraphseg.m 4 | % add corresponding part here 5 | 6 | [L, N] = igraphseg(I, pts, K, MIN, SIGMA); 7 | Ids = unique(L(:)); 8 | A = zeros(max(Ids),1); 9 | A(Ids) = 1:numel(Ids); 10 | map = A(L); 11 | 12 | end 13 | 14 | -------------------------------------------------------------------------------- /ext/EGBS3D/misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | /* random stuff */ 20 | 21 | #ifndef MISC_H 22 | #define MISC_H 23 | 24 | #include 25 | 26 | #ifndef M_PI 27 | #define M_PI 3.141592653589793 28 | #endif 29 | 30 | typedef unsigned char uchar; 31 | 32 | typedef struct { uchar r, g, b; } rgb; 33 | 34 | inline bool operator==(const rgb &a, const rgb &b) { 35 | return ((a.r == b.r) && (a.g == b.g) && (a.b == b.b)); 36 | } 37 | 38 | template 39 | inline T abs(const T &x) { return (x > 0 ? x : -x); }; 40 | 41 | template 42 | inline int sign(const T &x) { return (x >= 0 ? 1 : -1); }; 43 | 44 | template 45 | inline T square(const T &x) { return x*x; }; 46 | 47 | template 48 | inline T bound(const T &x, const T &min, const T &max) { 49 | return (x < min ? min : (x > max ? max : x)); 50 | } 51 | 52 | template 53 | inline bool check_bound(const T &x, const T&min, const T &max) { 54 | return ((x < min) || (x > max)); 55 | } 56 | 57 | inline int vlib_round(float x) { return (int)(x + 0.5F); } 58 | 59 | inline int vlib_round(double x) { return (int)(x + 0.5); } 60 | 61 | inline double gaussian(double val, double sigma) { 62 | return exp(-square(val/sigma)/2)/(sqrt(2*M_PI)*sigma); 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /ext/EGBS3D/segment-graph.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef SEGMENT_GRAPH 20 | #define SEGMENT_GRAPH 21 | 22 | #include 23 | #include 24 | #include "disjoint-set.h" 25 | 26 | // threshold function 27 | #define THRESHOLD(size, c) (c/size) 28 | 29 | typedef struct { 30 | float w; 31 | int a, b; 32 | } edge; 33 | 34 | bool operator<(const edge &a, const edge &b) { 35 | return a.w < b.w; 36 | } 37 | 38 | /* 39 | * Segment a graph 40 | * 41 | * Returns a disjoint-set forest representing the segmentation. 42 | * 43 | * num_vertices: number of vertices in graph. 44 | * num_edges: number of edges in graph 45 | * edges: array of edges. 46 | * c: constant for treshold function. 47 | */ 48 | universe *segment_graph(int num_vertices, int num_edges, edge *edges, 49 | float c) { 50 | // sort edges by weight 51 | std::sort(edges, edges + num_edges); 52 | 53 | // make a disjoint-set forest 54 | universe *u = new universe(num_vertices); 55 | 56 | // init thresholds 57 | float *threshold = new float[num_vertices]; 58 | for (int i = 0; i < num_vertices; i++) 59 | threshold[i] = THRESHOLD(1,c); 60 | 61 | // for each edge, in non-decreasing weight order... 62 | for (int i = 0; i < num_edges; i++) { 63 | edge *pedge = &edges[i]; 64 | 65 | // components conected by this edge 66 | int a = u->find(pedge->a); 67 | int b = u->find(pedge->b); 68 | if (a != b) { 69 | if ((pedge->w <= threshold[a]) && 70 | (pedge->w <= threshold[b])) { 71 | u->join(a, b); 72 | a = u->find(a); 73 | threshold[a] = pedge->w + THRESHOLD(u->size(a), c); 74 | } 75 | } 76 | } 77 | 78 | // free up 79 | delete threshold; 80 | return u; 81 | } 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /ext/EGBS3D/segment.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2006 Pedro Felzenszwalb 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "segment-image.h" 25 | 26 | int main(int argc, char **argv) { 27 | if (argc != 6) { 28 | fprintf(stderr, "usage: %s sigma k min input(ppm) output(ppm)\n", argv[0]); 29 | return 1; 30 | } 31 | 32 | float sigma = atof(argv[1]); 33 | float k = atof(argv[2]); 34 | int min_size = atoi(argv[3]); 35 | 36 | printf("loading input image.\n"); 37 | image *input = loadPPM(argv[4]); 38 | 39 | printf("processing\n"); 40 | int num_ccs; 41 | image *seg = segment_image(input, sigma, k, min_size, &num_ccs); 42 | savePPM(seg, argv[5]); 43 | 44 | printf("got %d components\n", num_ccs); 45 | printf("done! uff...thats hard work.\n"); 46 | 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /ext/EGBS3D/tmp_test.m: -------------------------------------------------------------------------------- 1 | % parameters 2 | K = 100; 3 | MIN = 200; 4 | sigma = 0.5; 5 | pid = 34; 6 | 7 | % color image 8 | Img = imread(fullfile('./data/NYUV2/nyu_color_crop/',[num2str(pid) '.jpg'])); 9 | pts = nan(size(Img)); 10 | [mapColor, ~] = m_segmentWrapper(Img, pts, K, MIN, sigma); 11 | 12 | % depth image (grayscale) 13 | load (fullfile('./data/NYUV2/m_pcdAlign/',[num2str(pid) '.mat'])); 14 | rawDepth = points(:,:,3); 15 | maxi = max(rawDepth(:)); 16 | mini = min(rawDepth(:)); 17 | grayIm = uint8(255 * (rawDepth - mini)/(maxi-mini)); 18 | grayIm(isnan(rawDepth)) = 0; 19 | grayIm = cat(3, grayIm, grayIm, grayIm); 20 | [mapDepth, ~] = m_segmentWrapper(grayIm, pts, K, MIN, sigma); 21 | 22 | % color + depth; 23 | t = tic; 24 | % normalize 25 | x = points(:,:,1); 26 | y = points(:,:,2); 27 | z = points(:,:,3); 28 | x = uint8(255*(x - min(x(:))) / (max(x(:)) - min(x(:)))); 29 | y = uint8(255*(y - min(y(:))) / (max(y(:)) - min(y(:)))); 30 | z = uint8(255*(z - min(z(:))) / (max(z(:)) - min(z(:)))); 31 | pts = cat(3, x,y,z); 32 | pts = double(pts); 33 | [map, N] = m_segmentWrapper(Img, pts, K, MIN, sigma); 34 | toc(t) 35 | 36 | % visualization 37 | imc = ColorizeLabelImage(int32(mapColor)); 38 | imd = ColorizeLabelImage(int32(mapDepth)); 39 | imcd = ColorizeLabelImage(int32(map)); 40 | 41 | im1 = cat(2, Img, imc); 42 | im2 = cat(2, imd, imcd); 43 | im = cat(1, im1, im2); 44 | figure; 45 | imshow(im); 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /ext/YAEL/Kmeans++.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/Kmeans++.pdf -------------------------------------------------------------------------------- /ext/YAEL/NIPS2011_1271.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/NIPS2011_1271.pdf -------------------------------------------------------------------------------- /ext/YAEL/b2fvecs_read.m: -------------------------------------------------------------------------------- 1 | % Read a set of vectors stored in the bvec format (int + n * float) 2 | % The function returns a set of output floating point vector (one vector per column) 3 | % 4 | % Syntax: 5 | % v = b2fvecs_read (filename) -> read all vectors 6 | % v = b2fvecs_read (filename, n) -> read n vectors 7 | % v = b2fvecs_read (filename, [a b]) -> read the vectors from a to b (indices starts from 1) 8 | function v = b2fvecs_read (filename, bounds) 9 | 10 | % open the file and count the number of descriptors 11 | fid = fopen (filename, 'rb'); 12 | 13 | if fid == -1 14 | error ('I/O error : Unable to open the file %s\n', filename) 15 | end 16 | 17 | % Read the vector size 18 | d = fread (fid, 1, 'int'); 19 | 20 | vecsizeof = 1 * 4 + d; 21 | 22 | % Get the number of vectrors 23 | fseek (fid, 0, 1); 24 | a = 1; 25 | bmax = ftell (fid) / vecsizeof; 26 | b = bmax; 27 | 28 | if nargin >= 2 29 | if length (bounds) == 1 30 | b = bounds; 31 | 32 | elseif length (bounds) == 2 33 | a = bounds(1); 34 | b = bounds(2); 35 | end 36 | end 37 | 38 | assert (a >= 1); 39 | if b > bmax 40 | b = bmax; 41 | end 42 | 43 | if b == 0 | b < a 44 | v = []; 45 | fclose (fid); 46 | return; 47 | end 48 | 49 | % compute the number of vectors that are really read and go in starting positions 50 | n = b - a + 1; 51 | a 52 | (a-1)*vecsizeof 53 | 54 | fseek (fid, (a - 1) * vecsizeof, -1); 55 | 56 | fprintf ('b2fvecs_read -> pos=%d\n', ftell (fid)); 57 | 58 | % read n vectors 59 | v = fread (fid, (d + 4) * n, 'uint8=>single'); 60 | v = reshape (v, d + 4, n); 61 | 62 | 63 | % Check if the first column (dimension of the vectors) is correct 64 | assert (sum (v (1, 2:end) == v(1, 1)) == n - 1); 65 | assert (sum (v (2, 2:end) == v(2, 1)) == n - 1); 66 | assert (sum (v (3, 2:end) == v(3, 1)) == n - 1); 67 | assert (sum (v (4, 2:end) == v(4, 1)) == n - 1); 68 | v = v (5:end, :); 69 | 70 | fclose (fid); 71 | -------------------------------------------------------------------------------- /ext/YAEL/bvecs_read.m: -------------------------------------------------------------------------------- 1 | % Read a set of vectors stored in the bvec format (int + n * float) 2 | % The function returns a set of output uint8 vector (one vector per column) 3 | % 4 | % Syntax: 5 | % v = bvecs_read (filename) -> read all vectors 6 | % v = bvecs_read (filename, n) -> read n vectors 7 | % v = bvecs_read (filename, [a b]) -> read the vectors from a to b (indices starts from 1) 8 | function v = bvecs_read (filename, bounds) 9 | 10 | % open the file and count the number of descriptors 11 | fid = fopen (filename, 'rb'); 12 | 13 | if fid == -1 14 | error ('I/O error : Unable to open the file %s\n', filename) 15 | end 16 | 17 | % Read the vector size 18 | d = fread (fid, 1, 'int'); 19 | 20 | vecsizeof = 1 * 4 + d; 21 | 22 | % Get the number of vectrors 23 | fseek (fid, 0, 1); 24 | a = 1; 25 | bmax = ftell (fid) / vecsizeof; 26 | b = bmax; 27 | 28 | if nargin >= 2 29 | if length (bounds) == 1 30 | b = bounds; 31 | 32 | elseif length (bounds) == 2 33 | a = bounds(1); 34 | b = bounds(2); 35 | end 36 | end 37 | 38 | assert (a >= 1); 39 | if b > bmax 40 | b = bmax; 41 | end 42 | 43 | if b == 0 | b < a 44 | v = []; 45 | fclose (fid); 46 | return; 47 | end 48 | 49 | % compute the number of vectors that are really read and go in starting positions 50 | n = b - a + 1; 51 | fseek (fid, (a - 1) * vecsizeof, -1); 52 | 53 | % read n vectors 54 | v = fread (fid, (d + 4) * n, 'uint8=>uint8'); 55 | v = reshape (v, d + 4, n); 56 | 57 | 58 | % Check if the first column (dimension of the vectors) is correct 59 | assert (sum (v (1, 2:end) == v(1, 1)) == n - 1); 60 | assert (sum (v (2, 2:end) == v(2, 1)) == n - 1); 61 | assert (sum (v (3, 2:end) == v(3, 1)) == n - 1); 62 | assert (sum (v (4, 2:end) == v(4, 1)) == n - 1); 63 | v = v (5:end, :); 64 | 65 | fclose (fid); 66 | -------------------------------------------------------------------------------- /ext/YAEL/bvecs_size.m: -------------------------------------------------------------------------------- 1 | % Return the number of vectors contained in a bvecs files and their dimension 2 | % 3 | % Syntax: [n,d] = bvecs_size (filename) 4 | function [n, d] = bvecs_size (filename) 5 | 6 | % open the file and count the number of descriptors 7 | fid = fopen (filename, 'rb'); 8 | 9 | if fid == -1 10 | error ('I/O error : Unable to open the file %s\n', filename) 11 | end 12 | 13 | % Read the vector size 14 | d = fread (fid, 1, 'int'); 15 | 16 | % Read the number of vectors 17 | fseek (fid, 0, 1); 18 | n = ftell (fid) / (1 * 4 + d); 19 | fseek (fid, 0, -1); 20 | 21 | fclose (fid); 22 | -------------------------------------------------------------------------------- /ext/YAEL/bvecs_write.m: -------------------------------------------------------------------------------- 1 | % This function reads a vector from a file in the libit format 2 | 3 | function bvecs_write (filename, v) 4 | 5 | % open the file and count the number of descriptors 6 | fid = fopen (filename, 'wb'); 7 | d = size (v, 1); 8 | n = size (v, 2); 9 | 10 | for i = 1:n 11 | 12 | % first write the vector size 13 | count = fwrite (fid, d, 'int'); 14 | 15 | if count ~= 1 16 | error ('Unable to write vector dimension: count !=1 \n'); 17 | end 18 | 19 | % write the vector components 20 | count = fwrite (fid, v(:,i), 'uint8'); 21 | 22 | if count ~= d 23 | error ('Unable to write vector elements: count !=1 \n'); 24 | end 25 | end 26 | 27 | fclose (fid); -------------------------------------------------------------------------------- /ext/YAEL/fvec_read.m: -------------------------------------------------------------------------------- 1 | % This function reads a vector from a file in the libit format 2 | 3 | function [v,d] = fvec_read (fid) 4 | 5 | % first read the vector size 6 | d = fread (fid, 1, 'int'); 7 | 8 | % read the elements 9 | v = fread (fid, d, 'float=>single'); 10 | 11 | -------------------------------------------------------------------------------- /ext/YAEL/fvec_write.m: -------------------------------------------------------------------------------- 1 | % This function reads a vector from a file in the libit format 2 | 3 | function fvec_write (fid, v) 4 | 5 | % first read the vector size 6 | count = fwrite (fid, length(v), 'int'); 7 | 8 | if (count ~= 1) 9 | error ('Unable to write vector dimension: count !=1 \n'); 10 | end 11 | 12 | % write the vector components 13 | count = fwrite (fid, v, 'float'); 14 | 15 | if (count ~= length (v)) 16 | error ('Unable to write vector elements: count !=1 \n'); 17 | end 18 | 19 | -------------------------------------------------------------------------------- /ext/YAEL/fvecs_read.m: -------------------------------------------------------------------------------- 1 | % Read a set of vectors stored in the fvec format (int + n * float) 2 | % The function returns a set of output vector (one vector per column) 3 | % 4 | % Syntax: 5 | % v = fvecs_read (filename) -> read all vectors 6 | % v = fvecs_read (filename, n) -> read n vectors 7 | % v = fvecs_read (filename, [a b]) -> read the vectors from a to b (indices starts from 1) 8 | function v = fvecs_read (filename, bounds) 9 | 10 | % open the file and count the number of descriptors 11 | fid = fopen (filename, 'rb'); 12 | 13 | if fid == -1 14 | error ('I/O error : Unable to open the file %s\n', filename) 15 | end 16 | 17 | % Read the vector size 18 | d = fread (fid, 1, 'int'); 19 | vecsizeof = 1 * 4 + d * 4; 20 | 21 | % Get the number of vectrors 22 | fseek (fid, 0, 1); 23 | a = 1; 24 | bmax = ftell (fid); 25 | if bmax == 0 26 | v = []; 27 | return; 28 | end 29 | bmax = floor(bmax / vecsizeof); 30 | if bmax == 0 31 | v = []; 32 | return; 33 | end 34 | b = bmax; 35 | 36 | if nargin >= 2 37 | if length (bounds) == 1 38 | b = bounds; 39 | 40 | elseif length (bounds) == 2 41 | a = bounds(1); 42 | b = bounds(2); 43 | end 44 | end 45 | 46 | assert (a >= 1); 47 | if b > bmax 48 | b = bmax; 49 | end 50 | 51 | if b == 0 | b < a 52 | v = []; 53 | fclose (fid); 54 | return; 55 | end 56 | 57 | % compute the number of vectors that are really read and go in starting positions 58 | n = b - a + 1; 59 | fseek (fid, (a - 1) * vecsizeof, -1); 60 | 61 | % read n vectors 62 | v = fread (fid, (d + 1) * n, 'float=>single'); 63 | v = reshape (v, d + 1, n); 64 | 65 | % Check if the first column (dimension of the vectors) is correct 66 | assert (sum (v (1, 2:end) == v(1, 1)) == n - 1); 67 | v = v (2:end, :); 68 | 69 | fclose (fid); 70 | -------------------------------------------------------------------------------- /ext/YAEL/fvecs_size.m: -------------------------------------------------------------------------------- 1 | % Return the number of vectors contained in a fvecs files and their dimension 2 | % 3 | % Syntax: [n,d] = fvecs_size (filename) 4 | function [n, d] = fvecs_size (filename) 5 | 6 | % open the file and count the number of descriptors 7 | fid = fopen (filename, 'rb'); 8 | 9 | if fid == -1 10 | error ('I/O error : Unable to open the file %s\n', filename) 11 | end 12 | 13 | % Read the vector size 14 | d = fread (fid, 1, 'int'); 15 | 16 | % Read the number of vectors 17 | fseek (fid, 0, 1); 18 | n = ftell (fid) / (1 * 4 + d * 4); 19 | fseek (fid, 0, -1); 20 | 21 | fclose (fid); 22 | -------------------------------------------------------------------------------- /ext/YAEL/fvecs_write.m: -------------------------------------------------------------------------------- 1 | % This function reads a vector of float vectors 2 | % 3 | % Usage: fvecs_write (filename, v) 4 | % where v is a set of vector (stored columnwise) 5 | function fvecs_write (filename, v) 6 | 7 | % open the file and count the number of descriptors 8 | fid = fopen (filename, 'wb'); 9 | d = size (v, 1); 10 | n = size (v, 2); 11 | 12 | for i = 1:n 13 | 14 | % first write the vector size 15 | count = fwrite (fid, d, 'int'); 16 | 17 | if count ~= 1 18 | error ('Unable to write vector dimension: count !=1 \n'); 19 | end 20 | 21 | % write the vector components 22 | count = fwrite (fid, v(:, i), 'float'); 23 | 24 | if count ~= d 25 | error ('Unable to write vector elements: count !=1 \n'); 26 | end 27 | end 28 | 29 | fclose (fid); -------------------------------------------------------------------------------- /ext/YAEL/gmm_read.m: -------------------------------------------------------------------------------- 1 | % This function reads the parameters of a gmm file 2 | % 3 | % Usage: [w, mu, sigma] = gmm_read (filename) 4 | function [w, mu, sigma] = gmm_read (filename) 5 | 6 | % open the file and count the number of descriptors 7 | fid = fopen (filename, 'rb'); 8 | 9 | if fid == -1 10 | error ('I/O error : Unable to open the file %s\n', filename) 11 | end 12 | 13 | 14 | % first read the vector size and the number of centroids 15 | d = fread (fid, 1, 'int'); 16 | k = fread (fid, 1, 'int'); 17 | 18 | % read the elements 19 | w = fread (fid, k, 'float=>single'); 20 | mu = fread (fid, d*k, 'float=>single'); 21 | sigma = fread (fid, d*k, 'float=>single'); 22 | 23 | mu = reshape (mu, d, k); 24 | sigma = reshape (sigma, d, k); 25 | 26 | fclose (fid); 27 | -------------------------------------------------------------------------------- /ext/YAEL/ivec_read.m: -------------------------------------------------------------------------------- 1 | % This function reads a vector from a file in the libit format 2 | 3 | function [v,d] = ivec_read (fid) 4 | 5 | % first read the vector size 6 | d = fread (fid, 1, 'int'); 7 | 8 | % read the elements 9 | v = fread (fid, d, 'int'); 10 | 11 | -------------------------------------------------------------------------------- /ext/YAEL/ivec_write.m: -------------------------------------------------------------------------------- 1 | % This function writes a vector from a file in the libit format 2 | 3 | function [v,d] = ivec_write (fid, v) 4 | 5 | 6 | 7 | % first write the vector size 8 | count = fwrite (fid, length(v), 'int'); 9 | 10 | if count ~= 1 11 | error ('Unable to write vector dimension: count !=1 \n'); 12 | end 13 | 14 | % write the vector components 15 | count = fwrite (fid, v, 'int'); 16 | 17 | if count ~= length (v) 18 | error ('Unable to write vector elements: count !=1 \n'); 19 | end 20 | 21 | -------------------------------------------------------------------------------- /ext/YAEL/ivecs_read.m: -------------------------------------------------------------------------------- 1 | % Read a set of vectors stored in the ivec format (int + n * int) 2 | % The function returns a set of output vector (one vector per column) 3 | % 4 | % Syntax: 5 | % v = ivecs_read (filename) -> read all vectors 6 | % v = ivecs_read (filename, n) -> read n vectors 7 | % v = ivecs_read (filename, [a b]) -> read the vectors from a to b (indices starts from 1) 8 | function v = ivecs_read (filename, bounds) 9 | 10 | % open the file and count the number of descriptors 11 | fid = fopen (filename, 'rb'); 12 | 13 | if fid == -1 14 | error ('I/O error : Unable to open the file %s\n', filename) 15 | end 16 | 17 | % Read the vector size 18 | d = fread (fid, 1, 'int'); 19 | vecsizeof = 1 * 4 + d * 4; 20 | 21 | % Get the number of vectrors 22 | fseek (fid, 0, 1); 23 | a = 1; 24 | bmax = ftell (fid); 25 | if bmax == 0 26 | v = []; 27 | return; 28 | end 29 | bmax = bmax / vecsizeof; 30 | if bmax == 0 31 | v = []; 32 | return; 33 | end 34 | b = bmax; 35 | 36 | if nargin >= 2 37 | if length (bounds) == 1 38 | b = bounds; 39 | 40 | elseif length (bounds) == 2 41 | a = bounds(1); 42 | b = bounds(2); 43 | end 44 | end 45 | 46 | assert (a >= 1); 47 | if b > bmax 48 | b = bmax; 49 | end 50 | 51 | if b == 0 | b < a 52 | v = []; 53 | fclose (fid); 54 | return; 55 | end 56 | 57 | % compute the number of vectors that are really read and go in starting positions 58 | n = b - a + 1; 59 | fseek (fid, (a - 1) * vecsizeof, -1); 60 | 61 | % read n vectors 62 | v = fread (fid, (d + 1) * n, 'int=>int32'); 63 | v = reshape (v, d + 1, n); 64 | 65 | % Check if the first column (dimension of the vectors) is correct 66 | assert (sum (v (1, 2:end) == v(1, 1)) == n - 1); 67 | v = v (2:end, :); 68 | 69 | fclose (fid); 70 | 71 | -------------------------------------------------------------------------------- /ext/YAEL/ivecs_size.m: -------------------------------------------------------------------------------- 1 | % Return the number of vectors contained in a ivecs file and their dimension 2 | % 3 | % Syntax: [n,d] = ivecs_size (filename) 4 | function [n, d] = ivecs_size (filename) 5 | 6 | % open the file and count the number of descriptors 7 | fid = fopen (filename, 'rb'); 8 | 9 | if fid == -1 10 | error ('I/O error : Unable to open the file %s\n', filename) 11 | end 12 | 13 | % Read the vector size 14 | d = fread (fid, 1, 'int'); 15 | 16 | % Read the number of vectors 17 | fseek (fid, 0, 1); 18 | n = ftell (fid) / (1 * 4 + d * 4); 19 | fseek (fid, 0, -1); 20 | 21 | fclose (fid); 22 | -------------------------------------------------------------------------------- /ext/YAEL/ivecs_write.m: -------------------------------------------------------------------------------- 1 | % This function writes a vector from a file in the libit format 2 | 3 | function ivecs_write (filename, v) 4 | 5 | % open the file and count the number of descriptors 6 | fid = fopen (filename, 'wb'); 7 | d = size (v, 1); 8 | n = size (v, 2); 9 | 10 | for i = 1:n 11 | 12 | % first write the vector size 13 | count = fwrite (fid, d, 'int'); 14 | 15 | if count ~= 1 16 | error ('Unable to write vector dimension: count !=1 \n'); 17 | end 18 | 19 | % write the vector components 20 | count = fwrite (fid, v(:,i), 'int'); 21 | 22 | if count ~= d 23 | error ('Unable to write vector elements: count !=1 \n'); 24 | end 25 | end 26 | 27 | fclose (fid); 28 | -------------------------------------------------------------------------------- /ext/YAEL/nips2011_shindler_largedatasets_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/nips2011_shindler_largedatasets_01.pdf -------------------------------------------------------------------------------- /ext/YAEL/uint8tobit.m: -------------------------------------------------------------------------------- 1 | % This function translates a uint8 vector into a binary vector 2 | % Usage: b = uint8tobit (v) 3 | % The vectors are column-stored 4 | function b = uint8tobit (v) 5 | 6 | n = size (v, 2); 7 | dbytes = size (v, 1); 8 | d = dbytes * 8; 9 | 10 | b = zeros(d, n, 'uint8'); 11 | 12 | for i = 1:n 13 | for j = 1:dbytes 14 | b((j-1)*8+1:j*8 ,i) = bitget (v(j, i), 1:8); 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /ext/YAEL/yael_L2sqr.m: -------------------------------------------------------------------------------- 1 | % Compute all the distances between two sets of vectors 2 | % 3 | % Usage: [dis] = dis_L2sqr(q, v) 4 | % 5 | % Parameters: 6 | % q, v sets of vectors (1 vector per column) 7 | % 8 | % Returned values 9 | % dis the corresponding *square* distances 10 | % vectors of q corresponds to row, and columns for v 11 | function dis = dis_L2sqr (q, v) 12 | 13 | 14 | % vector dimension and number of vectors in the dataset 15 | n = size (v, 2); 16 | d = size (v, 1); 17 | k = n; 18 | 19 | % number of query vectors 20 | nq = size (q, 2); 21 | 22 | % Compute the square norm of the dataset vectors 23 | v_nr = sum (v .* v); 24 | 25 | % first compute the square norm the queries of the slice 26 | d_nr = sum (q .* q)'; 27 | 28 | % the most efficient way I found to compute distances in matlab 29 | dis = repmat (v_nr, nq, 1) + repmat (d_nr, 1, n) - 2 * q' * v; 30 | dis = repmat (v_nr, nq, 1) + repmat (d_nr, 1, n) - 2 * q' * v; 31 | 32 | neg = find (dis < 0) ; 33 | 34 | dis(neg) = 0; 35 | 36 | 37 | -------------------------------------------------------------------------------- /ext/YAEL/yael_L2sqr.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_L2sqr.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_cross_distances.m: -------------------------------------------------------------------------------- 1 | % Compute all the distances between two sets of vectors 2 | % 3 | % Usage: [dis] = dis_cross_distances(q, v, distype, nt) 4 | % 5 | % Parameters: 6 | % q, v sets of vectors (1 vector per column) 7 | % distype distance type: 1=L1, 8 | % 2=L2 -> Warning: return the square L2 distance 9 | % 3=chi-square -> Warning: return the square Chi-square 10 | % 4=signed chi-square 11 | % 16=cosine 12 | % nt number of threads (not used for L2 distance) 13 | % 14 | % Returned values 15 | % dis the corresponding distances 16 | % vectors of q corresponds to row, and columns for v 17 | function dis = yael_cross_distances (q, v, distype, nt) 18 | 19 | error ('This function is available only if compiled (Mex-file)'); 20 | -------------------------------------------------------------------------------- /ext/YAEL/yael_cross_distances.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_cross_distances.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_eigs.m: -------------------------------------------------------------------------------- 1 | % Computes a few eigenvalues/vectors for a symmetric matrix a. 2 | % 3 | % [eigenvecs, eigenvals] = yael_eigs (a, nev) 4 | % 5 | % compared to the Matlab version, this one operates on single matrices, 6 | % and eigenvals is a vector (instead of a matrix). 7 | % 8 | % requires nev <= size(v, 1) / 2 (otherwise it's better to compute all eigenvectors anyway). 9 | % 10 | 11 | -------------------------------------------------------------------------------- /ext/YAEL/yael_eigs.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_eigs.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_fisher.m: -------------------------------------------------------------------------------- 1 | % This function computes the fisher vector representation of a set of vectors 2 | % See "Fisher kernels on visual vocabularies for image categorization" 3 | % by F. Perronnin and C. Dance, CVPR'2007 4 | % 5 | % Usage: 6 | % fishervector = yael_kmeans (v, w, mu, sigma) 7 | % fishervector = yael_kmeans (v, w, mu, sigma, 'opt1', 'opt2', ...) 8 | % 9 | % where 10 | % v is the set of descriptors to describe by the Fisher Kernel representation 11 | % w, mu and sigma are the parameters of the mixture (learned by, e.g., yael_gmm) 12 | % 13 | % 14 | % By default, only the derivatives associated with the mu component are computed 15 | % 16 | % Options: 17 | % 'weights' includes the mixture weights in the representation 18 | % 'sigma' includes the terms associated with variacne 19 | % 'nomu' do not compute the terms associated with mean 20 | % 'nonorm' do not normalize the fisher vector 21 | % 'verbose' 22 | -------------------------------------------------------------------------------- /ext/YAEL/yael_fisher.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_fisher.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_fvecs_normalize.m: -------------------------------------------------------------------------------- 1 | % This function normalize a set of vectors 2 | % Parameters: 3 | % v the set of vectors to be normalized (column stored) 4 | % nr the norm for which the normalization is performed (Default: Euclidean) 5 | % 6 | % Output: 7 | % vout the normalized vector 8 | % vnr the norms of the input vectors 9 | % 10 | % Remark: the function return Nan for vectors of null norm 11 | function [vout, vnr] = yael_fvecs_normalize (v, nr) 12 | 13 | fprintf ('# Warning: consider using the Mex implementation instead of this pure Matlab one\n'); 14 | 15 | if nargin < 2 16 | nr = 2; 17 | end 18 | 19 | % norm of each column 20 | vnr = (sum (v.^nr)) .^ (1 / nr); 21 | 22 | % sparse multiplication to apply the norm 23 | vout = single (double (v) * sparse (diag (double (1 ./ vnr)))); 24 | -------------------------------------------------------------------------------- /ext/YAEL/yael_fvecs_normalize.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_fvecs_normalize.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_getting_started_v300.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_getting_started_v300.pdf -------------------------------------------------------------------------------- /ext/YAEL/yael_gmm.m: -------------------------------------------------------------------------------- 1 | % This function learn a diagonal form Gaussian mixture 2 | % 3 | % Usage: 4 | % [w, mu, sigma] = yael_gmm (v, k) 5 | % [w, mu, sigma] = yael_gmm (v, k, opts) 6 | % 7 | % where w, mu and sigma are the parameters of the mixture: 8 | % w is the weights of the different Gaussian 9 | % mu contains the mean of all Gaussians (one Gaussian per column) 10 | % sigma contains the diagonal variance of all Gaussian (one Gaussian per column) 11 | % 12 | % Available options are: 13 | % redo number of times the initializing k-means is run (best clustering returned) 14 | % niter number of iteration in the k-means 15 | % verbose the verbosity level. 0: no output, 1 (default), 2: detailled 16 | % nt number of threads. For octave users. 17 | % Warning: nt=1 (default) should provide 18 | % multi-threading depending on matlab version or architecture. 19 | % Warning: do not use nt>1 in that case, at it will cause 20 | % memory leaks 21 | % seed 0 by default. Specify a value !=0 to randomize initalization 22 | 23 | -------------------------------------------------------------------------------- /ext/YAEL/yael_gmm.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_gmm.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_kmax.m: -------------------------------------------------------------------------------- 1 | % This function returns the k largest values of a vector 2 | % 3 | % Usage: [val, idx] = yael_kmax (v,k) 4 | % 5 | % Parameters: 6 | % v the vector to be normalized. If v is a matrix, then the k largest values 7 | % of each column are returned (similar to the min function) 8 | % k the number of neighbors to be returned. Must be smaller than vector length 9 | % 10 | % Output: 11 | % val a k-dimensional vector containing the (ordered) set of largest values 12 | % In case v was a matrix, val is a k*n matrix with one column per vector 13 | % idx the indexes, in the original vector, where the largest values have been found 14 | % This output parameter is not mandatory 15 | % 16 | % Remarks: if k=1, this function is equivalent to searching the min. 17 | % if k is equal to the vector length, it is equivalent to the sort function 18 | function [val, idx] = yael_kmax (v, k) 19 | 20 | fprintf ('# Warning: This is NOT the fast implementation. \n#You should use the Mex version instead\n'); 21 | 22 | [val, idx] = sort (v, 'descend'); 23 | 24 | val = val (1:k, :); 25 | idx = idx (1:k, :); 26 | -------------------------------------------------------------------------------- /ext/YAEL/yael_kmax.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_kmax.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_kmeans.m: -------------------------------------------------------------------------------- 1 | % This function performs the clustering of a set of vector v in k clusters 2 | % 3 | % C = yael_kmeans (v, k) 4 | % [C, I] = yael_kmeans (v, k) 5 | % [C, D, I] = yael_kmeans (v, k) 6 | % [C, D, I, Nassign] = yael_kmeans (v, k) 7 | % returns a set of k centroids, stored column-wise in C 8 | % The input vectors are given in the matrix V (one vector per column) 9 | % 10 | % Optionally the function can returns 11 | % I: the cluster index associated with each input vector, 12 | % D: the square distance D between each vector and its centroid, 13 | % Nassign: the total number of centroids assigned to each cluster 14 | % 15 | % Options: typical usage: 16 | % C = yael_kmeans (v, k, 'redo', redo, 'verbose', verbose, 'seed', seed, 'niter', niter) 17 | % 18 | % Available options are: 19 | % redo number of times the k-means is run (best clustering returned) 20 | % verbose the verbosity level. 0: no output, 1 (default), 2: detailled 21 | % seed 0 by default. Specify a value !=0 to set a specific seed 22 | % init 1 by default. 0=kmeans++ initialization, 1=random centers 23 | % niter number of iterations (default: 50) 24 | -------------------------------------------------------------------------------- /ext/YAEL/yael_kmeans.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_kmeans.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_kmin.m: -------------------------------------------------------------------------------- 1 | % This function returns the k smallest values of a vector 2 | % 3 | % Usage: [val, idx] = yael_kmin (v,k) 4 | % 5 | % Parameters: 6 | % v the vector to be normalized. If v is a matrix, then the k smallest values 7 | % of each column are returned (similar to the min function) 8 | % k the number of neighbors to be returned. Must be smaller than vector length 9 | % 10 | % Output: 11 | % val a k-dimensional vector containing the (ordered) set of smallest values 12 | % In case v was a matrix, val is a k*n matrix with one column per vector 13 | % idx the indexes, in the original vector, where the smallest values have been found 14 | % This output parameter is not mandatory 15 | % 16 | % Remarks: if k=1, this function is equivalent to searching the min. 17 | % if k is equal to the vector length, it is equivalent to the sort function 18 | function [val, idx] = yael_kmin (v, k) 19 | 20 | fprintf ('# Warning: This is NOT the fast implementation. \n#You should use the Mex version instead\n'); 21 | 22 | [val, idx] = sort (v); 23 | 24 | val = val (1:k, :); 25 | idx = idx (1:k, :); 26 | -------------------------------------------------------------------------------- /ext/YAEL/yael_kmin.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_kmin.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_nn.m: -------------------------------------------------------------------------------- 1 | % Return the k nearest neighbors of a set of query vectors 2 | % 3 | % Usage: [ids,dis] = nn(v, q, k, distype) 4 | % v the dataset to be searched (one vector per column) 5 | % q the set of queries (one query per column) 6 | % k (default:1) the number of nearest neigbors we want 7 | % distype distance type: 1=L1, 8 | % 2=L2 -> Warning: return the square L2 distance 9 | % 3=chi-square -> Warning: return the square Chi-square 10 | % 4=signed chis-square 11 | % 16=cosine -> Warning: return the *smallest* cosine values 12 | % Use -query to obtain the largest 13 | % available in Mex-version only 14 | % 15 | % Returned values 16 | % idx the vector index of the nearest neighbors 17 | % dis the corresponding *square* distances 18 | % 19 | % Both v and q contains vectors stored in columns, so transpose them if needed 20 | function [idx, dis] = yael_nn (v, q, k) 21 | 22 | 23 | if narg >= 4 24 | error ('yael_nn is defined for arbitrary distances only in the Mex version\nonsider using the Mex-compiled version instead\n'); 25 | end 26 | 27 | fprintf ('* Warning: this is the slow version of nn\nConsider using the Mex-compiled version instead\n'); 28 | 29 | if nargin < 3, k = 1; end 30 | 31 | assert (size (v, 1) == size (q, 1)); 32 | 33 | n = size (v, 2); 34 | nq = size (q, 2); 35 | 36 | v_nr = sum (v.^2); 37 | q_nr = sum (q.^2); 38 | dis = repmat (v_nr', 1, nq) + repmat (q_nr, n, 1) - 2 * v' * q; 39 | [dis, idx] = sort (dis); 40 | 41 | dis = dis (1:k, :); 42 | idx = idx (1:k, :); 43 | -------------------------------------------------------------------------------- /ext/YAEL/yael_nn.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_nn.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_refman_v300.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_refman_v300.pdf -------------------------------------------------------------------------------- /ext/YAEL/yael_svds.m: -------------------------------------------------------------------------------- 1 | % Computes a few singular values for a matrix a. The full call is 2 | % 3 | % [u, s, v] = yael_svds (a, nev) 4 | % 5 | % if nev = min(size(a)), then a = u * diag(s) * v'. 6 | % 7 | % Reduced calls: 8 | % 9 | % s = yael_svds (a, nev) 10 | % [u, s] = yael_svds (a, nev) 11 | % [s, v] = yael_svds (a, nev, 'v') 12 | % 13 | % 14 | % 15 | % compared to the Matlab version, this one operates on single matrices, 16 | % and s is a vector (instead of a matrix). 17 | % 18 | 19 | 20 | -------------------------------------------------------------------------------- /ext/YAEL/yael_svds.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_svds.mexa64 -------------------------------------------------------------------------------- /ext/YAEL/yael_v300.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/YAEL/yael_v300.tar.gz -------------------------------------------------------------------------------- /ext/m_Grabcut/GraphCut3dConstr.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut/GraphCut3dConstr.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut/GraphCutConstr.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut/GraphCutConstr.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut/GraphCutConstrSparse.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut/GraphCutConstrSparse.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut/GraphCutMex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut/GraphCutMex.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut/m_GM_logPL.m: -------------------------------------------------------------------------------- 1 | function [ LogPL ] = m_GM_logPL( x, mu, detcovar, icovar) 2 | % compute negative gaussian log likelihood 3 | % f(x) = {det(cov)}^(-0.5) * exp{-0.5 * (x-mu)' * inv(cov) * (x-mu)} 4 | % - log{f(x)} = 0.5 * log{det(cov)} + 0.5 * (x-mu)' * inv(cov) * (x-mu) 5 | 6 | % Inputs: 7 | % x : N x d matrix each row is one example 8 | % mu: mean vector dx1 9 | % covar: covariance matrix dxd 10 | 11 | % Outputs: 12 | % LogPL : N x 1 13 | 14 | N = size(x, 1); 15 | LogPL = 0.5 * log (detcovar) + 0.5 * sum ((x- repmat(mu',N, 1)) * icovar ... 16 | .* (x - repmat(mu',N,1)), 2); 17 | 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_GrabCut_GUI.m: -------------------------------------------------------------------------------- 1 | function m_GrabCut_GUI 2 | close all 3 | % Create and hide the UI as it is being constructed. 4 | f = figure('Visible','on','Position',[360,500,1320,350]); 5 | 6 | % Construct the components 7 | h_load = uicontrol('Style','pushbutton',... 8 | 'String','Image','Position',[410,250,70,25], ... 9 | 'Callback', @loadImage_Callback); 10 | h_poly = uicontrol('Style','pushbutton',... 11 | 'String','Polygon','Position',[410,180,70,25], ... 12 | 'Callback', @MarkPolygon_Callback); 13 | h_run = uicontrol('Style','pushbutton',... 14 | 'String','Run','Position',[410,110,70,25], ... 15 | 'Callback', @Run_Callback); 16 | 17 | align([h_load, h_poly, h_run],'Center','None'); 18 | 19 | handles = guihandles(f); 20 | 21 | handles.h_display_1 = axes('Units','Pixels','Position',[10,30,400,300]); 22 | handles.h_display_2 = axes('Units','Pixels','Position',[485,30,400,300]); 23 | handles.h_display_3 = axes('Units','Pixels','Position',[900,30,400,300]); 24 | 25 | guidata(f, handles); 26 | 27 | end 28 | 29 | function loadImage_Callback(source, eventdata) 30 | FilterSpec = ['*']; 31 | [FileName,PathName,FilterIndex] = uigetfile(FilterSpec); 32 | fullFileName = strcat(PathName, FileName); 33 | global I; 34 | I = imread(fullFileName); 35 | handles = guidata(source,); 36 | imshow(I, 'Parent', handles.h_display_1); 37 | end 38 | 39 | 40 | function MarkPolygon_Callback(hObject, eventdata) 41 | global fixedBG; 42 | global I; 43 | 44 | handles = guidata(gcbo); 45 | disp('select ROI ...'); 46 | 47 | fixedBG = ~roipoly(I); 48 | 49 | 50 | %%% show red bounds: 51 | % imBounds = I; 52 | % bounds = double(abs(edge(fixedBG))); 53 | % se = strel('square',3); 54 | % bounds = 1 - imdilate(bounds,se); 55 | % imBounds(:,:,2) = imBounds(:,:,2).*uint8(bounds); 56 | % imBounds(:,:,3) = imBounds(:,:,3).*uint8(bounds); 57 | % imshow(imBounds, 'Parent', handles.h_display_1); 58 | 59 | % show seg 60 | im = I.* repmat(uint8(~fixedBG) , [1 1 3]); 61 | imshow(im, 'Parent', handles.h_display_1); 62 | end 63 | 64 | function Run_Callback(hObject, eventdata) 65 | 66 | global fixedBG; 67 | global I; 68 | disp('Run GrabCut ...'); 69 | im = double(I); 70 | [h, w, d] = size(I); 71 | mask_fixed_fg = false(h, w); 72 | mask_fixed_bg = false(h, w); 73 | seg = m_Grabcut( im, ~fixedBG, mask_fixed_fg, mask_fixed_bg); 74 | result = I.* repmat(uint8(seg), [1 1 3]); 75 | handles = guidata(gcbo); 76 | imshow(result, 'Parent', handles.h_display_2); 77 | imshow(logical(seg), 'Parent', handles.h_display_3); 78 | 79 | 80 | end -------------------------------------------------------------------------------- /ext/m_Grabcut/m_GraphCut.m: -------------------------------------------------------------------------------- 1 | function [seg, E] = m_GraphCut(fgLogPL, bgLogPL, sc, v_edge_wt, h_edge_wt, ini_Labelset) 2 | % construct the ST graph and solve it by graph cut 3 | % 4 | % Inputs: 5 | % fgLogPL, bgLogPL: unary potentials for fg/bg 6 | % sc: label smooth cost matrix e.g., V(L1, L2) = gamma and V(L1,L1) = 0 7 | % v_edge_wt: exp{-beta*L2(z1, z2)} for vertial edges 8 | % h_edge_wt: for horizontal edges 9 | % ini_Labelset: intialize labels before inference 10 | 11 | % Outputs: 12 | % seg: binary segmentation 13 | % E : energy 14 | 15 | dc = cat(3, fgLogPL, bgLogPL); 16 | graphHandle = GraphCut('open', dc , sc, v_edge_wt, h_edge_wt); 17 | graphHandle = GraphCut('set', graphHandle, int32(ini_Labelset)); 18 | [graphHandle, seg] = GraphCut('expand', graphHandle); 19 | [graphHandle, E] = GraphCut('energy', graphHandle); 20 | GraphCut('close', graphHandle); 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_Unary_LogPL.m: -------------------------------------------------------------------------------- 1 | function [fgLogPL, bgLogPL] = m_Unary_LogPL(examples, fgGMMs, bgGMMs, ... 2 | mask_u, mask_fixed_fg, mask_fixed_bg, lambda) 3 | % compute date terms for graph cut 4 | % Inputs: 5 | % examples: N x 3 color image (double) 6 | % fgGMMs, bgGMMs : GMMs model for fg/bg 7 | % mask_u: initial unknown region 8 | % mask_fixed_fg: fixed fg pixels within unknown region 9 | % mask_fixed_bg: fixed bg pxiels within unknown region 10 | % lambda: penalty for fixed labels 11 | 12 | % Outputs: 13 | % fgLogPL, bgLogPL: negative log likelihoods for each pixel. NxM 14 | 15 | [h, w] = size(mask_u); 16 | % determine unknown region, fixed fg/bg region 17 | mask_U = mask_u & (~mask_fixed_fg) & (~mask_fixed_bg); 18 | mask_BG = (~mask_u) | mask_fixed_bg; 19 | mask_FG = mask_fixed_fg; 20 | 21 | % compute negative log likelihoods 22 | fgLogPL = zeros(h, w); 23 | bgLogPL = zeros(h, w); 24 | 25 | fgLogPL(mask_FG) = lambda; 26 | bgLogPL(mask_BG) = lambda; 27 | 28 | U_ids = find(mask_U); 29 | uExamples = examples(U_ids, :); 30 | fgLogPL_U = m_unary_helper(uExamples, fgGMMs, h, w, U_ids); 31 | bgLogPL_U = m_unary_helper(uExamples, bgGMMs, h, w, U_ids); 32 | 33 | fgLogPL = fgLogPL + bgLogPL_U; 34 | bgLogPL = bgLogPL + fgLogPL_U; 35 | 36 | end 37 | 38 | function [LogPL] = m_unary_helper(examples, GMMs, h, w, ids) 39 | K = size(GMMs.mu, 2); 40 | N = size(examples, 1); 41 | PL = zeros(N, K); 42 | for i = 1 : K 43 | if GMMs.wt(i) ~= 0 44 | PL(:, i) = exp( -m_GM_logPL(examples, GMMs.mu(:,i), GMMs.detcov(i), GMMs.icov(:,:,i)) ); 45 | end 46 | end 47 | 48 | PL = PL * GMMs.wt; 49 | 50 | LogPL = zeros(h * w, 1); 51 | LogPL(ids) = -log(PL); 52 | LogPL = reshape(LogPL, h, w); 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_assignGMM2pixels.m: -------------------------------------------------------------------------------- 1 | function [fgkids, bgkids] = m_assignGMM2pixels(examples, fgGMMs, bgGMMs, fgIds, bgIds) 2 | % Assign GMMs component id to each pixel by choosing the component which 3 | % has the minimum negative log likelihood of producing the pixel's color. 4 | % (do not consider the component weight here) 5 | % 6 | 7 | % Inputs: 8 | % examples : N x 3 in color space 9 | % fgGMMs: previous foreground GMMs 10 | % bgGMMs: previous background GMMs 11 | % fgIds: current foregroud pixel ids 12 | % bgIds: current background pixel ids 13 | 14 | % Outputs: 15 | % fgkids: foreground pixel component ids 16 | % bgkids: background pixel component ids 17 | 18 | % fgExamples = examples(fgIds, :); 19 | % bgExamples = examples(bgIds, :); 20 | % 21 | % fgkids = m_assignment_Helper(fgExamples, fgGMMs); 22 | % bgkids = m_assignment_Helper(bgExamples, bgGMMs); 23 | 24 | fgkids = m_assignment_Helper(examples(fgIds,:), fgGMMs); 25 | bgkids = m_assignment_Helper(examples(bgIds,:), bgGMMs); 26 | 27 | end 28 | 29 | function ids = m_assignment_Helper(examples, GMMs) 30 | num_samples = size(examples, 1); 31 | K = size(GMMs.mu, 2); 32 | LogPL = zeros(num_samples, K); 33 | 34 | for i = 1 : K 35 | if GMMs.wt(i) == 0 36 | LogPL(:,i) = 1e4; 37 | else 38 | LogPL(:,i) = m_GM_logPL(examples, GMMs.mu(:,i), GMMs.detcov(i), GMMs.icov(:,:,i)); 39 | end 40 | end 41 | 42 | [~, ids] = min(LogPL, [], 2); 43 | 44 | end 45 | 46 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_calcNwt.m: -------------------------------------------------------------------------------- 1 | function [v_edge_wt, h_edge_wt ] = m_calcNwt(I) 2 | % compute neighborhood edge weights 3 | % exp{-beta*L2(z1, z2)} 4 | 5 | gradH = I(:, 2:end, :) - I(:, 1:end-1, :); 6 | gradV = I(2:end, :, :) - I(1:end-1, :, :); 7 | 8 | gradH = sum(gradH.^2, 3); 9 | gradV = sum(gradV.^2, 3); 10 | 11 | % Calculate beta - parameter of GrabCut algorithm. 12 | % beta = 1/(2*avg(sqr(||color[i] - color[j]||))) 13 | % 4 connection average 14 | [h, w, d] = size(I); 15 | num_C = 2*h*w - (h + w); 16 | beta = 1 / ( 2 * (sum(gradH(:)) + sum(gradV(:))) /num_C ); 17 | 18 | % hC = exp(-beta.*gradH./mean(gradH(:))); 19 | % vC = exp(-beta.*gradV./mean(gradV(:))); 20 | 21 | hC = exp(-beta * gradH); 22 | vC = exp(-beta * gradV); 23 | 24 | h_edge_wt = [hC zeros(size(hC,1),1)]; 25 | v_edge_wt = [vC ;zeros(1, size(vC,2))]; 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_init_GMMs.m: -------------------------------------------------------------------------------- 1 | function [fgGMMs, bgGMMs, flag] = m_init_GMMs(fgExamples, bgExamples, K) 2 | % INPUTS: 3 | % fgExamples: Nx3 4 | % bgExamples: Mx3 5 | % K: number of clusters 6 | 7 | % OUTPUTS: 8 | % fgGMMs: foreground GMM models (means, icovs, detcovs and weights) struct 9 | % bgGMMs: background GMM models (means, icovs, detcovs and weights) struct 10 | % flag: true indicates error 11 | flag = false; 12 | [num_fg, width] = size(fgExamples); 13 | num_bg = size(bgExamples, 1); 14 | 15 | % initialize models 16 | fgGMMs.mu = zeros(width, K); 17 | fgGMMs.icov = zeros(width, width, K); 18 | fgGMMs.detcov = zeros(K, 1); 19 | fgGMMs.wt = zeros(K, 1); 20 | 21 | bgGMMs.mu = zeros(width, K); 22 | bgGMMs.icov = zeros(width, width, K); 23 | bgGMMs.detcov = zeros(K, 1); 24 | bgGMMs.wt = zeros(K, 1); 25 | 26 | % K-means 27 | opts = statset('kmeans'); 28 | % opts.MaxIter = 40; 29 | 30 | % assert(num_fg ~= 0); 31 | % assert(num_bg ~= 0); 32 | if (num_fg < K) || (num_bg < K) 33 | flag = true; 34 | return; 35 | end 36 | 37 | % [fgClusterIds, fgCenters] = kmeans(fgExamples, K, 'emptyaction','singleton' ,'Options',opts); 38 | % [bgClusterIds, bgCenters] = kmeans(bgExamples, K, 'emptyaction','singleton' ,'Options',opts); 39 | %tic; 40 | [~,fgClusterIds] = yael_kmeans(single(fgExamples'),K,'redo',1,'niter',20,'init',0,'verbose',0); 41 | [~,bgClusterIds] = yael_kmeans(single(bgExamples'),K,'redo',1,'niter',20,'init',0,'verbose',0); 42 | %fprintf('yael: %d\n', toc); 43 | 44 | 45 | 46 | % compute sample mean and covariance for GMMs 47 | for i = 1 : K 48 | fg_egs = fgExamples(fgClusterIds == i, :); 49 | if ~isempty(fg_egs) 50 | fgGMMs.mu(:, i) = mean(fg_egs, 1)'; 51 | fg_covar = cov(fg_egs); 52 | fgGMMs.icov(:,:,i) = pinv(fg_covar); 53 | fgGMMs.detcov(i) = det(fg_covar); 54 | fgGMMs.wt(i) = size(fg_egs, 1)/num_fg; 55 | end 56 | 57 | bg_egs = bgExamples(bgClusterIds == i, :); 58 | if ~isempty(bg_egs) 59 | bgGMMs.mu(:,i) = mean(bg_egs, 1)'; 60 | bg_covar = cov(bg_egs); 61 | bgGMMs.icov(:,:,i) = pinv(bg_covar); 62 | bgGMMs.detcov(i) = det(bg_covar); 63 | bgGMMs.wt(i) = size(bg_egs, 1)/num_bg; 64 | end 65 | end 66 | 67 | assert(abs(sum(fgGMMs.wt) - 1) < 1e-6 && abs(sum(bgGMMs.wt) - 1) < 1e-6 ); 68 | 69 | end 70 | 71 | -------------------------------------------------------------------------------- /ext/m_Grabcut/m_learnGMMs.m: -------------------------------------------------------------------------------- 1 | function [fgGMMs, bgGMMs, flag] = m_learnGMMs(examples,fgIds, bgIds, fgkids, bgkids, K) 2 | % *********************************************************** 3 | % estimate parameters of GMMs 4 | 5 | % Inputs: 6 | % examples: N x 3 color image 7 | % fgIds : pixel ids for foreground 8 | % bgIds : pixel ids for background 9 | % fgkids: GMMs component ids for foreground pixels 10 | % bgkids: GMMs component ids for background pixels 11 | % K: number of components 12 | 13 | % Outputs: 14 | % fgGMMs: GMMs model for foreground 15 | % bgGMMs: GMMs model for background 16 | % flag: if it is true, it means bad segment happened 17 | %************************************************************ 18 | flag = false; 19 | [~, d] = size(examples); 20 | 21 | fgExamples = examples(fgIds, :); 22 | bgExamples = examples(bgIds, :); 23 | 24 | % initialize models 25 | fgGMMs.mu = zeros(d, K); 26 | fgGMMs.icov = zeros(d, d, K); 27 | fgGMMs.detcov = zeros(K, 1); 28 | fgGMMs.wt = zeros(K, 1); 29 | 30 | bgGMMs.mu = zeros(d, K); 31 | bgGMMs.icov = zeros(d, d, K); 32 | bgGMMs.detcov = zeros(K, 1); 33 | bgGMMs.wt = zeros(K, 1); 34 | 35 | num_fg = numel(fgIds); 36 | num_bg = numel(bgIds); 37 | 38 | % assert(num_fg ~= 0); 39 | % assert(num_bg ~= 0); 40 | 41 | if num_fg == 0 || num_bg == 0 42 | flag = true; 43 | return; 44 | end 45 | 46 | 47 | for i = 1 : K 48 | 49 | fg_egs = fgExamples(fgkids == i, :); 50 | if ~isempty(fg_egs) 51 | fgGMMs.mu(:, i) = mean(fg_egs, 1)'; 52 | fg_covar = cov(fg_egs); 53 | fgGMMs.icov(:,:,i) = pinv(fg_covar); 54 | fgGMMs.detcov(i) = det(fg_covar); 55 | fgGMMs.wt(i) = size(fg_egs, 1)/num_fg; 56 | end 57 | 58 | bg_egs = bgExamples(bgkids == i, :); 59 | if ~isempty(bg_egs) 60 | bgGMMs.mu(:, i) = mean(bg_egs, 1)'; 61 | bg_covar = cov(bg_egs); 62 | bgGMMs.icov(:,:,i) = pinv(bg_covar); 63 | bgGMMs.detcov(i) = det(bg_covar); 64 | bgGMMs.wt(i) = size(bg_egs, 1)/num_bg; 65 | end 66 | end 67 | 68 | 69 | 70 | end 71 | 72 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/GraphCut3dConstr.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut_3D/GraphCut3dConstr.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/GraphCutConstr.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut_3D/GraphCutConstr.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/GraphCutConstrSparse.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut_3D/GraphCutConstrSparse.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/GraphCutMex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/m_Grabcut_3D/GraphCutMex.mexa64 -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_GM_logPL_3D.m: -------------------------------------------------------------------------------- 1 | function [ LogPL ] = m_GM_logPL_3D( x, mu, detcov, icov) 2 | % compute negative gaussian log likelihood 3 | % f(x) = {det(cov)}^(-0.5) * exp{-0.5 * (x-mu)' * inv(cov) * (x-mu)} 4 | % - log{f(x)} = 0.5 * log{det(cov)} + 0.5 * (x-mu)' * inv(cov) * (x-mu) 5 | 6 | % Inputs: 7 | % x : N x d matrix each row is one example 8 | % mu: mean vector dx1 9 | % covar: covariance matrix dxd 10 | 11 | % Outputs: 12 | % LogPL : N x 1 13 | 14 | N = size(x, 1); 15 | LogPL = 0.5 * log (detcov) + 0.5 * sum ((x- repmat(mu',N, 1)) * icov ... 16 | .* (x - repmat(mu',N,1)), 2); 17 | 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_GrabCut_GUI.m~: -------------------------------------------------------------------------------- 1 | function m_GrabCut_GUI 2 | close all 3 | 4 | m_create_components(); 5 | end 6 | 7 | function m_create_components() 8 | % Create and hide the UI as it is being constructed. 9 | f = figure('Visible','on','Position',[360,500,1320,750]); 10 | 11 | % Construct the pushbuttons 12 | h_load = uicontrol('Style','pushbutton','String','Color','Position', ... 13 | [450,20,70,25], 'Callback', @loadImage_Callback); 14 | 15 | h_load_d = uicontrol('Style','pushbutton','String','Depth','Position', ... 16 | [600,20,70,25], 'Callback', @loadDepth_Callback); 17 | 18 | h_poly = uicontrol('Style','pushbutton','String','Polygon','Position', ... 19 | [800,20,70,25],'Callback', @MarkPolygon_Callback); 20 | 21 | h_run = uicontrol('Style','pushbutton','String','Run','Position', ... 22 | [1200,20,70,25], 'Callback', @Run_Callback); 23 | 24 | 25 | align([h_load, h_load_d, h_poly, h_run],'Fixed', 30,'bottom'); 26 | 27 | % create axes and texts 28 | handles = guihandles(f); 29 | 30 | handles.h_display_1 = axes('Units','Pixels','Position',[10,420,400,300]); 31 | uicontrol('Style','text','Position',[100,725,200,20],'String', ... 32 | 'Original Color Image', 'FontSize',12, 'FontWeight', 'bold'); 33 | 34 | handles.h_display_2 = axes('Units','Pixels','Position',[455,420,400,300]); 35 | uicontrol('Style','text','Position',[520,725,250,20],'String', ... 36 | 'GrabCut(RGB) Segment', 'FontSize',12, 'FontWeight', 'bold'); 37 | 38 | handles.h_display_3 = axes('Units','Pixels','Position',[900,420,400,300]); 39 | uicontrol('Style','text','Position',[1000,725,200,20],'String', ... 40 | 'GrabCut(RGB) Mask', 'FontSize',12, 'FontWeight', 'bold'); 41 | 42 | handles.h_display_4 = axes('Units','Pixels','Position',[10,70,400,300]); 43 | uicontrol('Style','text','Position',[100,380,200,20],'String', ... 44 | 'ROI Polygon', 'FontSize',12, 'FontWeight', 'bold'); 45 | 46 | handles.h_display_5 = axes('Units','Pixels','Position',[455,70,400,300]); 47 | uicontrol('Style','text','Position',[520,380,250,20],'String', ... 48 | 'GrabCut(RGB-D) Segment', 'FontSize',12, 'FontWeight', 'bold'); 49 | 50 | handles.h_display_6 = axes('Units','Pixels','Position',[900,70,400,300]); 51 | uicontrol('Style','text','Position',[1000,380,200,20],'String', ... 52 | 'GrabCut(RGB-D) Mask', 'FontSize',12, 'FontWeight', 'bold'); 53 | guidata(f, handles); 54 | 55 | 56 | end 57 | 58 | function loadImage_Callback(source, eventdata) 59 | FilterSpec = ['*']; 60 | [FileName,PathName,FilterIndex] = uigetfile(FilterSpec); 61 | fullFileName = strcat(PathName, FileName); 62 | global I; 63 | I = imread(fullFileName); 64 | handles = guidata(gcbo); 65 | imshow(I, 'Parent', handles.h_display_1); 66 | end 67 | 68 | function loadDepth_Callback(source, eventdata) 69 | FilterSpec = ['*']; 70 | [FileName,PathName,FilterIndex] = uigetfile(FilterSpec); 71 | fullFileName = strcat(PathName, FileName); 72 | global D; 73 | D = double(imread(fullFileName)); 74 | D = D/100; % centimeters 75 | end 76 | 77 | 78 | function MarkPolygon_Callback(hObject, eventdata) 79 | global fixedBG; 80 | global I; 81 | 82 | handles = guidata(gcbo); 83 | disp('select ROI ...'); 84 | axes(handles.h_display_4); 85 | fixedBG = ~roipoly(I); 86 | 87 | %%% show red bounds: 88 | % imBounds = I; 89 | % bounds = double(abs(edge(fixedBG))); 90 | % se = strel('square',3); 91 | % bounds = 1 - imdilate(bounds,se); 92 | % imBounds(:,:,2) = imBounds(:,:,2).*uint8(bounds); 93 | % imBounds(:,:,3) = imBounds(:,:,3).*uint8(bounds); 94 | % imshow(imBounds, 'Parent', handles.h_display_1); 95 | 96 | % show seg 97 | im = I.* repmat(uint8(~fixedBG) , [1 1 3]); 98 | imshow(im, 'Parent', handles.h_display_4); 99 | end 100 | 101 | function Run_Callback(hObject, eventdata) 102 | 103 | global fixedBG; 104 | global I; 105 | global D; 106 | disp('Run GrabCut ...'); 107 | fx_d = 5.8262448167737955e+02; 108 | fy_d = 5.8269103270988637e+02; 109 | cx_d = 3.1304475870804731e+02; 110 | cy_d = 2.3844389626620386e+02; 111 | 112 | Kd = [fx_d 0 (cx_d-40); 113 | 0 fy_d (cy_d-45); 114 | 0 0 1]; 115 | [h, w, ~] = size(I); 116 | [xx,yy] = meshgrid(1:w, 1:h); 117 | 118 | X = (xx - Kd(1,3)) .* D / fx_d; 119 | Y = (yy - Kd(2,3)) .* D / fy_d; 120 | Z = D; 121 | points = cat(3, X, Y, Z); 122 | 123 | im = double(I); 124 | mask_fixed_fg = false(h, w); 125 | mask_fixed_bg = false(h, w); 126 | handles = guidata(gcbo); 127 | 128 | % run RGB grab cut 129 | cd ../m_Grabcut/; 130 | seg_rgb = m_Grabcut(im, ~fixedBG, mask_fixed_fg, mask_fixed_bg); 131 | result_rgb = I.* repmat(uint8(seg_rgb), [1 1 3]); 132 | imshow(result_rgb, 'Parent', handles.h_display_2); 133 | imshow(logical(seg_rgb), 'Parent', handles.h_display_3); 134 | cd ../InstanceSEG/; 135 | 136 | % run RGBD grab cut 137 | seg = m_Grabcut( im,points, ~fixedBG, mask_fixed_fg, mask_fixed_bg); 138 | result = I.* repmat(uint8(seg), [1 1 3]); 139 | imshow(result, 'Parent', handles.h_display_5); 140 | imshow(logical(seg), 'Parent', handles.h_display_6); 141 | 142 | 143 | 144 | 145 | 146 | end -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_Grabcut_3D.m: -------------------------------------------------------------------------------- 1 | function [ seg ] = m_Grabcut_3D( I, pts, mask_u, mask_fixed_fg, mask_fixed_bg) 2 | % ************************************************************************* 3 | % INPUTS: 4 | % I: input color image 5 | % pts: 3d aligned points mxnx3 unit(meter) 6 | % mask_u: interactive mask for unknown label region which contains 7 | % foreground (logical matrix). It indicates that its complement 8 | % mask (~mask_u) is a hard pixel assignment for background 9 | % 10 | % mask_fixed_fg: hard pixel assignment mask for forground. 11 | % mask_fixed_bg: hard pixel assignment mask for background. 12 | % (typically, for pixels inside mask_u) 13 | 14 | % OUTPUTS: 15 | % segment: the binary segmentation result. 16 | 17 | % Author: Zhuo Deng, Temple University. Mar, 2015. 18 | 19 | % This is implementation of image segmentation algorithm GrabCut described in 20 | % "GrabCut — Interactive Foreground Extraction using Iterated Graph Cuts". 21 | % Carsten Rother, Vladimir Kolmogorov, Andrew Blake. SIGGRAPH, 2004. 22 | % ************************************************************************* 23 | 24 | 25 | % parameters setting 26 | K = 5; % clusters 27 | %gamma = 500; % original image 28 | gamma = 250; % resized image 29 | lambda = 9 * gamma; 30 | 31 | 32 | % initialize fg and bg GMMs models by k-means 33 | I = double(I); 34 | [h, w, ~] = size(I); 35 | %I = I./255; 36 | points = reshape(pts, [], 3); 37 | 38 | %tmp 39 | BB = m_mask2bbox(mask_u); %[c, r, w, h] 40 | bbox = [BB(2), BB(2)+BB(4)-1, BB(1), BB(1)+BB(3)-1]; % [rmin, rmax, cmin, cmax] 41 | bbox_ext = [max(1, bbox(1)-round(1)), min(h, bbox(2)+ round(1)), ... 42 | max(1, bbox(3)-round(1)), min(w, bbox(4)+ round(1))]; 43 | 44 | % assume that fixed fg and bg are within unknown area 45 | mask_fixed_bg = mask_fixed_bg & mask_u; 46 | mask_fixed_fg = mask_fixed_fg & mask_u; 47 | 48 | fgIds = find(mask_u & (~mask_fixed_bg)); 49 | bgIds = find(~mask_u | mask_fixed_bg); 50 | assert( (numel(fgIds) + numel(bgIds)) == (h*w) ); 51 | 52 | examples = reshape(I, [], 3); 53 | examples = [examples points]; 54 | fgExamples = examples(fgIds, :); 55 | bgExamples = examples(bgIds, :); 56 | 57 | %tic; 58 | [fgGMMs, bgGMMs, flag_ini] = m_init_GMMs_3D(fgExamples, bgExamples, K); 59 | %fprintf('init_GMMs_3D: %d\n', toc); 60 | 61 | if flag_ini 62 | seg = mask_u; 63 | return; 64 | end 65 | 66 | % 67 | ini_labels = mask_u & (~mask_fixed_bg); 68 | label_cost = [0 gamma; gamma 0]; 69 | 70 | % compute N edge weights: 71 | rgbd = cat(3, I, pts); 72 | %tic; 73 | [v_edge_wt, h_edge_wt] = m_calcNwt_3D(rgbd); 74 | %fprintf('m_calcNwt_3D: %d\n', toc); 75 | 76 | %% iterative minimization 77 | % t_assign = 0; 78 | % t_learn = 0; 79 | % t_unary = 0; 80 | % t_graph = 0; 81 | 82 | 83 | % add for loop here 84 | max_iter = 10; 85 | last_seg = mask_u; 86 | dupCount = 0; 87 | for i = 1 : max_iter 88 | % assign GMM components to pixels 89 | % kn = argmin Dn(alpha_n, kn, theta, zn); 90 | %tic; 91 | [fgkids, bgkids]= m_assignGMM2pixels_3D(examples, fgGMMs, bgGMMs, fgIds, bgIds); 92 | %t_assign = t_assign + toc; 93 | 94 | 95 | % learn GMM parameters 96 | % theta = argmin U(alpha, k, theta, z); 97 | %tic; 98 | [fgGMMs, bgGMMs, flag] = m_learnGMMs_3D(examples,fgIds, bgIds, fgkids, bgkids, K); 99 | %t_learn = t_learn + toc; 100 | 101 | if flag 102 | seg = mask_u; 103 | return; 104 | end 105 | 106 | % update unary energy 107 | %tic; 108 | [fgLogPL, bgLogPL] = m_Unary_LogPL_3D(examples, fgGMMs, bgGMMs, ... 109 | mask_u, mask_fixed_fg, mask_fixed_bg, lambda); 110 | %t_unary = t_unary + toc; 111 | % estimate segmentation 112 | % alpha = argmin E(alpha, k, theta, z); 113 | %[seg, energy] = m_GraphCut_3D(fgLogPL, bgLogPL, label_cost, v_edge_wt, h_edge_wt, ini_labels); 114 | %tic; 115 | [seg, energy] = m_GraphCut_3D(fgLogPL(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4)), ... 116 | bgLogPL(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4)), ... 117 | label_cost, ... 118 | v_edge_wt(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4)), ... 119 | h_edge_wt(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4)), ... 120 | ini_labels(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4))); 121 | %t_graph = t_graph + toc; 122 | % tmp 123 | tmp = false(h, w); 124 | tmp(bbox_ext(1):bbox_ext(2), bbox_ext(3):bbox_ext(4)) = seg; 125 | seg = tmp; 126 | 127 | % compare 128 | % cmp = seg & (~last_seg); 129 | % if (nnz(cmp) == 0) 130 | % dupCount = dupCount + 1; 131 | % else 132 | % dupCount = 0; 133 | % end 134 | % 135 | % if dupCount >= 2 136 | % break; 137 | % else 138 | % last_seg = seg; 139 | % end 140 | if isequal(seg, last_seg) 141 | break; 142 | else 143 | last_seg = seg; 144 | end 145 | 146 | 147 | fgIds = find(seg == 1); 148 | bgIds = find(seg == 0); 149 | 150 | end 151 | %fprintf('iterations: %d\n', i); 152 | % fprintf('the data term energy: %d\n', energy.data); 153 | % fprintf('the smooth term energy: %d\n', energy.smooth); 154 | % fprintf('assign pixels 3D: %d\n', t_assign); 155 | % fprintf('learnGMM3D: %d\n', t_learn); 156 | % fprintf('unaryLog3D: %d\n', t_unary); 157 | % fprintf('graphcut: %d \n', t_graph); 158 | end 159 | 160 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_GraphCut_3D.m: -------------------------------------------------------------------------------- 1 | function [seg, E] = m_GraphCut_3D(fgLogPL, bgLogPL, sc, v_edge_wt, h_edge_wt, ini_Labelset) 2 | % construct the ST graph and solve it by graph cut 3 | % 4 | % Inputs: 5 | % fgLogPL, bgLogPL: unary potentials for fg/bg 6 | % sc: label smooth cost matrix e.g., V(L1, L2) = gamma and V(L1,L1) = 0 7 | % v_edge_wt: exp{-beta*L2(z1, z2)} for vertial edges 8 | % h_edge_wt: for horizontal edges 9 | % ini_Labelset: intialize labels before inference 10 | 11 | % Outputs: 12 | % seg: binary segmentation 13 | % E : energy 14 | 15 | dc = cat(3, fgLogPL, bgLogPL); 16 | graphHandle = GraphCut('open', dc , sc, v_edge_wt, h_edge_wt); 17 | graphHandle = GraphCut('set', graphHandle, int32(ini_Labelset)); 18 | [graphHandle, seg] = GraphCut('expand', graphHandle); 19 | [graphHandle, E.smooth, E.data] = GraphCut('energy', graphHandle); 20 | GraphCut('close', graphHandle); 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_Unary_LogPL.m~: -------------------------------------------------------------------------------- 1 | function [fgLogPL, bgLogPL] = m_Unary_LogPL(examples, fgGMMs, bgGMMs, ... 2 | mask_u, mask_fixed_fg, mask_fixed_bg, lambda) 3 | % compute date terms for graph cut 4 | % Inputs: 5 | % examples: N x 3 color image (double) 6 | % fgGMMs, bgGMMs : GMMs model for fg/bg 7 | % mask_u: initial unknown region 8 | % mask_fixed_fg: fixed fg pixels within unknown region 9 | % mask_fixed_bg: fixed bg pxiels within unknown region 10 | % lambda: penalty for fixed labels 11 | 12 | % Outputs: 13 | % fgLogPL, bgLogPL: negative log likelihoods for each pixel. NxM 14 | 15 | [h, w] = size(mask_u); 16 | % determine unknown region, fixed fg/bg region 17 | mask_U = mask_u & (~mask_fixed_fg) & (~mask_fixed_bg); 18 | mask_BG = (~mask_u) | mask_fixed_bg; 19 | mask_FG = mask_fixed_fg; 20 | 21 | % compute negative log likelihoods 22 | fgLogPL = zeros(h, w); 23 | bgLogPL = zeros(h, w); 24 | 25 | fgLogPL(mask_FG) = lambda; 26 | bgLogPL(mask_BG) = lambda; 27 | 28 | U_ids = find(mask_U); 29 | uExamples = examples(U_ids, :); 30 | fgLogPL_U = m_unary_helper(uExamples, fgGMMs, h, w, U_ids); 31 | bgLogPL_U = m_unary_helper(uExamples, bgGMMs, h, w, U_ids); 32 | 33 | min_fg = min(fgLogPL_U(:)); 34 | min_bg = min(bgLogPL_U(:)); 35 | if min_fg < min_bg 36 | base = min_fg; 37 | else 38 | base = min_bg; 39 | end 40 | 41 | if base < 0 42 | 43 | fgLogPL = fgLogPL + bgLogPL_U; 44 | bgLogPL = bgLogPL + fgLogPL_U; 45 | 46 | end 47 | 48 | function [LogPL] = m_unary_helper(examples, GMMs, h, w, ids) 49 | K = numel(GMMs.mu); 50 | N = size(examples, 1); 51 | PL = zeros(N, K); 52 | for i = 1 : K 53 | PL(:, i) = exp( -m_GM_logPL(examples, GMMs.mu{i}, GMMs.cov{i}) ); 54 | end 55 | 56 | PL = PL * GMMs.wt; 57 | 58 | LogPL = zeros(h * w, 1); 59 | LogPL(ids) = -log(PL); 60 | LogPL = reshape(LogPL, h, w); 61 | 62 | end 63 | 64 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_Unary_LogPL_3D.m: -------------------------------------------------------------------------------- 1 | function [fgLogPL, bgLogPL] = m_Unary_LogPL_3D(examples, fgGMMs, bgGMMs, ... 2 | mask_u, mask_fixed_fg, mask_fixed_bg, lambda) 3 | % compute date terms for graph cut 4 | % Inputs: 5 | % examples: N x 3 color image (double) 6 | % fgGMMs, bgGMMs : GMMs model for fg/bg 7 | % mask_u: initial unknown region 8 | % mask_fixed_fg: fixed fg pixels within unknown region 9 | % mask_fixed_bg: fixed bg pxiels within unknown region 10 | % lambda: penalty for fixed labels 11 | 12 | % Outputs: 13 | % fgLogPL, bgLogPL: negative log likelihoods for each pixel. NxM 14 | 15 | [h, w] = size(mask_u); 16 | % determine unknown region, fixed fg/bg region 17 | mask_U = mask_u & (~mask_fixed_fg) & (~mask_fixed_bg); 18 | mask_BG = (~mask_u) | mask_fixed_bg; 19 | mask_FG = mask_fixed_fg; 20 | 21 | % compute negative log likelihoods 22 | fgLogPL = zeros(h, w); 23 | bgLogPL = zeros(h, w); 24 | 25 | fgLogPL(mask_FG) = lambda; 26 | bgLogPL(mask_BG) = lambda; 27 | 28 | U_ids = find(mask_U); 29 | uExamples = examples(U_ids, :); 30 | fgLogPL_U = m_unary_helper(uExamples, fgGMMs, h, w, U_ids); 31 | bgLogPL_U = m_unary_helper(uExamples, bgGMMs, h, w, U_ids); 32 | 33 | min_fg = min(fgLogPL_U(:)); 34 | min_bg = min(bgLogPL_U(:)); 35 | if min_fg < min_bg 36 | base = min_fg; 37 | else 38 | base = min_bg; 39 | end 40 | 41 | if base < 0 42 | inc = abs(base); 43 | else 44 | inc = 0; 45 | end 46 | 47 | fgLogPL = fgLogPL + bgLogPL_U + inc*mask_U; 48 | bgLogPL = bgLogPL + fgLogPL_U + inc*mask_U; 49 | 50 | end 51 | 52 | function [LogPL] = m_unary_helper(examples, GMMs, h, w, ids) 53 | K = size(GMMs.mu, 2); 54 | N = size(examples, 1); 55 | PL = zeros(N, K); 56 | for i = 1 : K 57 | if GMMs.wt(i) ~= 0 58 | PL(:, i) = exp( -m_GM_logPL_3D(examples, GMMs.mu(:, i), GMMs.detcov(i), GMMs.icov(:,:,i)) ); 59 | end 60 | end 61 | 62 | PL = PL * GMMs.wt; 63 | 64 | LogPL = zeros(h * w, 1); 65 | LogPL(ids) = -log(PL); 66 | LogPL = reshape(LogPL, h, w); 67 | 68 | end 69 | 70 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_assignGMM2pixels_3D.m: -------------------------------------------------------------------------------- 1 | function [fgkids, bgkids] = m_assignGMM2pixels_3D(examples, fgGMMs, bgGMMs, fgIds, bgIds) 2 | % Assign GMMs component id to each pixel by choosing the component which 3 | % has the minimum negative log likelihood of producing the pixel's color. 4 | % (do not consider the component weight here) 5 | % 6 | 7 | % Inputs: 8 | % examples : N x 3 in color space 9 | % fgGMMs: previous foreground GMMs 10 | % bgGMMs: previous background GMMs 11 | % fgIds: current foregroud pixel ids 12 | % bgIds: current background pixel ids 13 | 14 | % Outputs: 15 | % fgkids: foreground pixel component ids 16 | % bgkids: background pixel component ids 17 | 18 | 19 | fgkids = m_assignment_Helper(examples(fgIds, :), fgGMMs); 20 | bgkids = m_assignment_Helper(examples(bgIds, :), bgGMMs); 21 | 22 | end 23 | 24 | function ids = m_assignment_Helper(examples, GMMs) 25 | num_samples = size(examples, 1); 26 | K = size(GMMs.mu, 2); 27 | LogPL = zeros(num_samples, K); 28 | 29 | for i = 1 : K 30 | if GMMs.wt(i) ~= 0 31 | LogPL(:,i) = m_GM_logPL_3D(examples, GMMs.mu(:,i), GMMs.detcov(i), GMMs.icov(:,:,i)); 32 | else 33 | LogPL(:,i) = 1e4; 34 | end 35 | end 36 | 37 | [~, ids] = min(LogPL, [], 2); 38 | 39 | end 40 | 41 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_calcNwt_3D.m: -------------------------------------------------------------------------------- 1 | function [v_edge_wt, h_edge_wt ] = m_calcNwt_3D(I) 2 | % compute neighborhood edge weights 3 | % exp{-beta*L2(z1, z2)} 4 | 5 | gradH = I(:, 2:end, :) - I(:, 1:end-1, :); 6 | gradV = I(2:end, :, :) - I(1:end-1, :, :); 7 | 8 | gradH = sum(gradH.^2, 3); 9 | gradV = sum(gradV.^2, 3); 10 | 11 | % tmp code 12 | % gradH_c = gradH(:,:,1:3); gradH_c = sum(gradH_c.^2, 3); 13 | % gradH_d = gradH(:,:,4:6); gradH_d = sum(gradH_d.^2, 3); 14 | % gradV_c = gradV(:,:,1:3); gradV_c = sum(gradV_c.^2, 3); 15 | % gradV_d = gradV(:,:,4:6); gradV_d = sum(gradV_d.^2, 3); 16 | % 17 | % gradH_c = gradH_c/max(gradH_c(:)); 18 | % gradH_d = gradH_d/max(gradH_d(:)); 19 | % gradV_c = gradV_c/max(gradV_c(:)); 20 | % gradV_d = gradV_d/max(gradV_d(:)); 21 | % 22 | % mask_H = gradH_c > gradH_d; 23 | % gradH = mask_H.* gradH_c + (~mask_H).*gradH_d; 24 | % mask_V = gradV_c > gradV_d; 25 | % gradV = mask_V.* gradV_c + (~mask_V).*gradV_d; 26 | 27 | 28 | 29 | % Calculate beta - parameter of GrabCut algorithm. 30 | % beta = 1/(2*avg(sqr(||color[i] - color[j]||))) 31 | % 4 connection average 32 | [h, w, d] = size(I); 33 | num_C = 2*h*w - (h + w); 34 | beta = 1 / ( 2 * (sum(gradH(:)) + sum(gradV(:))) /num_C ); 35 | 36 | % hC = exp(-beta.*gradH./mean(gradH(:))); 37 | % vC = exp(-beta.*gradV./mean(gradV(:))); 38 | 39 | hC = exp(-beta * gradH); 40 | vC = exp(-beta * gradV); 41 | 42 | h_edge_wt = [hC zeros(size(hC,1),1)]; 43 | v_edge_wt = [vC ;zeros(1, size(vC,2))]; 44 | 45 | end 46 | 47 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_init_GMMs_3D.m: -------------------------------------------------------------------------------- 1 | function [fgGMMs, bgGMMs, flag] = m_init_GMMs_3D(fgExamples, bgExamples, K) 2 | % INPUTS: 3 | % fgExamples: Nx6 4 | % bgExamples: Mx6 5 | % K: number of clusters 6 | 7 | % OUTPUTS: 8 | % fgGMMs: foreground GMM models (means, icovs, detcovs and weights) struct 9 | % bgGMMs: background GMM models (means, icovs, detcovs and weights) struct 10 | 11 | flag = false; 12 | [num_fg, d] = size(fgExamples); 13 | num_bg = size(bgExamples, 1); 14 | 15 | % initialize models 16 | fgGMMs.mu = zeros(d, K); 17 | fgGMMs.icov = zeros(d, d, K); 18 | fgGMMs.detcov = zeros(K, 1); 19 | fgGMMs.wt = zeros(K, 1); 20 | 21 | bgGMMs.mu = zeros(d, K); 22 | bgGMMs.icov = zeros(d, d, K); 23 | bgGMMs.detcov = zeros(K, 1); 24 | bgGMMs.wt = zeros(K, 1); 25 | 26 | % K-means 27 | opts = statset('kmeans'); 28 | %opts.MaxIter = 30; 29 | 30 | % assert(num_fg ~= 0); 31 | % assert(num_bg ~= 0); 32 | 33 | if (num_fg < K) || (num_bg < K) 34 | flag = true; 35 | return; 36 | end 37 | 38 | % [fgClusterIds, fgCenters] = kmeans(fgExamples, K, 'emptyaction','singleton' ,'Options',opts); 39 | % [bgClusterIds, bgCenters] = kmeans(bgExamples, K, 'emptyaction','singleton' ,'Options',opts); 40 | 41 | %tic; 42 | [~,fgClusterIds] = yael_kmeans(single(fgExamples'),K,'redo',1,'niter',20,'init',0,'verbose',0); 43 | [~,bgClusterIds] = yael_kmeans(single(bgExamples'),K,'redo',1,'niter',20,'init',0,'verbose',0); 44 | %fprintf('yael_3D: %d\n', toc); 45 | 46 | % compute sample mean and covariance for GMMs 47 | for i = 1 : K 48 | fg_egs = fgExamples(fgClusterIds == i, :); 49 | if ~isempty(fg_egs) 50 | fgGMMs.mu(:, i) = mean(fg_egs, 1)'; 51 | fg_covar = cov(fg_egs); 52 | fgGMMs.icov(:,:,i) = pinv(fg_covar); 53 | fgGMMs.detcov(i) = det(fg_covar); 54 | fgGMMs.wt(i) = size(fg_egs, 1)/num_fg; 55 | end 56 | 57 | bg_egs = bgExamples(bgClusterIds == i, :); 58 | if ~isempty(bg_egs) 59 | bgGMMs.mu(:, i) = mean(bg_egs, 1)'; 60 | bg_covar = cov(bg_egs); 61 | bgGMMs.icov(:,:,i) = pinv(bg_covar); 62 | bgGMMs.detcov(i) = det(bg_covar); 63 | bgGMMs.wt(i) = size(bg_egs, 1)/num_bg; 64 | end 65 | end 66 | 67 | assert(abs(sum(fgGMMs.wt) - 1) < 1e-6 && abs(sum(bgGMMs.wt) - 1) < 1e-6 ); 68 | 69 | end 70 | 71 | -------------------------------------------------------------------------------- /ext/m_Grabcut_3D/m_learnGMMs_3D.m: -------------------------------------------------------------------------------- 1 | function [fgGMMs, bgGMMs, flag] = m_learnGMMs_3D(examples,fgIds, bgIds, fgkids, bgkids, K) 2 | % *********************************************************** 3 | % estimate parameters of GMMs 4 | 5 | % Inputs: 6 | % examples: N x 6 color image + points 7 | % fgIds : pixel ids for foreground 8 | % bgIds : pixel ids for background 9 | % fgkids: GMMs component ids for foreground pixels 10 | % bgkids: GMMs component ids for background pixels 11 | % K: number of components 12 | 13 | % Outputs: 14 | % fgGMMs: GMMs model for foreground 15 | % bgGMMs: GMMs model for background 16 | % flag: 17 | %************************************************************ 18 | flag = false; 19 | fgExamples = examples(fgIds, :); 20 | bgExamples = examples(bgIds, :); 21 | [num_fg, d] = size(fgExamples); 22 | 23 | % initialize models 24 | fgGMMs.mu = zeros(d, K); 25 | fgGMMs.icov = zeros(d, d, K); 26 | fgGMMs.detcov = zeros(K, 1); 27 | fgGMMs.wt = zeros(K, 1); 28 | 29 | bgGMMs.mu = zeros(d, K); 30 | bgGMMs.icov = zeros(d, d, K); 31 | bgGMMs.detcov = zeros(K, 1); 32 | bgGMMs.wt = zeros(K, 1); 33 | 34 | num_bg = numel(bgIds); 35 | % assert(num_fg ~= 0); 36 | % assert(num_bg ~= 0); 37 | if num_fg == 0 || num_bg ==0 38 | flag = true; 39 | return; 40 | end 41 | 42 | for i = 1 : K 43 | fg_egs = fgExamples(fgkids == i, :); 44 | if ~isempty(fg_egs) 45 | fgGMMs.mu(:, i) = mean(fg_egs, 1)'; 46 | fg_covar = cov(fg_egs); 47 | fgGMMs.icov(:,:,i) = pinv(fg_covar); 48 | fgGMMs.detcov(i) = det(fg_covar); 49 | fgGMMs.wt(i) = size(fg_egs, 1)/num_fg; 50 | end 51 | 52 | bg_egs = bgExamples(bgkids == i, :); 53 | if ~isempty(bg_egs) 54 | bgGMMs.mu(:, i) = mean(bg_egs, 1)'; 55 | bg_covar = cov(bg_egs); 56 | bgGMMs.icov(:,:,i) = pinv(bg_covar); 57 | bgGMMs.detcov(i) = det(bg_covar); 58 | bgGMMs.wt(i) = size(bg_egs, 1)/num_bg; 59 | end 60 | end 61 | 62 | end 63 | 64 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/README: -------------------------------------------------------------------------------- 1 | NYU Depth V2 Dataset Matlab Toolbox 2 | Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus 3 | 4 | ************************* 5 | RETRIEVING THE ACCEL DATA 6 | ************************* 7 | To compile the get_accel_data function, you must alter compile.m with the 8 | absolute location of the libFreenect include directory. Once compile, 9 | get_accel_data.mex* can be called to extract the accelerometer data from the 10 | *.a files in each scene dump directory. 11 | 12 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/apply_distortion.m: -------------------------------------------------------------------------------- 1 | % Applies distortion to the given image. 2 | % 3 | % Note that this code was taken from Jean-Yves Bouguet's excellent Camera 4 | % Calibration Toolbox for matlab which can be found in its entirety here: 5 | % http://www.vision.caltech.edu/bouguetj/calib_doc/ 6 | function [xd,dxddk] = apply_distortion(x,k) 7 | 8 | 9 | % Complete the distortion vector if you are using the simple distortion model: 10 | length_k = length(k); 11 | if length_k <5 , 12 | k = [k ; zeros(5-length_k,1)]; 13 | end; 14 | 15 | 16 | [m,n] = size(x); 17 | 18 | % Add distortion: 19 | 20 | r2 = x(1,:).^2 + x(2,:).^2; 21 | 22 | r4 = r2.^2; 23 | 24 | r6 = r2.^3; 25 | 26 | 27 | % Radial distortion: 28 | 29 | cdist = 1 + k(1) * r2 + k(2) * r4 + k(5) * r6; 30 | 31 | if nargout > 1, 32 | dcdistdk = [ r2' r4' zeros(n,2) r6']; 33 | end; 34 | 35 | 36 | xd1 = x .* (ones(2,1)*cdist); 37 | 38 | coeff = (reshape([cdist;cdist],2*n,1)*ones(1,3)); 39 | 40 | if nargout > 1, 41 | dxd1dk = zeros(2*n,5); 42 | dxd1dk(1:2:end,:) = (x(1,:)'*ones(1,5)) .* dcdistdk; 43 | dxd1dk(2:2:end,:) = (x(2,:)'*ones(1,5)) .* dcdistdk; 44 | end; 45 | 46 | 47 | % tangential distortion: 48 | 49 | a1 = 2.*x(1,:).*x(2,:); 50 | a2 = r2 + 2*x(1,:).^2; 51 | a3 = r2 + 2*x(2,:).^2; 52 | 53 | delta_x = [k(3)*a1 + k(4)*a2 ; 54 | k(3) * a3 + k(4)*a1]; 55 | 56 | aa = (2*k(3)*x(2,:)+6*k(4)*x(1,:))'*ones(1,3); 57 | bb = (2*k(3)*x(1,:)+2*k(4)*x(2,:))'*ones(1,3); 58 | cc = (6*k(3)*x(2,:)+2*k(4)*x(1,:))'*ones(1,3); 59 | 60 | if nargout > 1, 61 | ddelta_xdk = zeros(2*n,5); 62 | ddelta_xdk(1:2:end,3) = a1'; 63 | ddelta_xdk(1:2:end,4) = a2'; 64 | ddelta_xdk(2:2:end,3) = a3'; 65 | ddelta_xdk(2:2:end,4) = a1'; 66 | end; 67 | 68 | xd = xd1 + delta_x; 69 | 70 | if nargout > 1, 71 | dxddk = dxd1dk + ddelta_xdk ; 72 | if length_k < 5, 73 | dxddk = dxddk(:,1:length_k); 74 | end; 75 | end; 76 | 77 | 78 | return; 79 | 80 | % Test of the Jacobians: 81 | 82 | n = 10; 83 | 84 | lk = 1; 85 | 86 | x = 10*randn(2,n); 87 | k = 0.5*randn(lk,1); 88 | 89 | [xd,dxddk] = apply_distortion(x,k); 90 | 91 | 92 | % Test on k: OK!! 93 | 94 | dk = 0.001 * norm(k)*randn(lk,1); 95 | k2 = k + dk; 96 | 97 | [x2] = apply_distortion(x,k2); 98 | 99 | x_pred = xd + reshape(dxddk * dk,2,n); 100 | 101 | 102 | norm(x2-xd)/norm(x2 - x_pred) 103 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/camera_params.m: -------------------------------------------------------------------------------- 1 | % Calibrated using the RGBDemo Calibration tool: 2 | % http://labs.manctl.com/rgbdemo/ 3 | % 4 | 5 | % The maximum depth used, in meters. 6 | maxDepth = 10; 7 | 8 | % RGB Intrinsic Parameters 9 | fx_rgb = 5.1885790117450188e+02; 10 | fy_rgb = 5.1946961112127485e+02; 11 | cx_rgb = 3.2558244941119034e+02; 12 | cy_rgb = 2.5373616633400465e+02; 13 | 14 | % RGB Distortion Parameters 15 | k1_rgb = 2.0796615318809061e-01; 16 | k2_rgb = -5.8613825163911781e-01; 17 | p1_rgb = 7.2231363135888329e-04; 18 | p2_rgb = 1.0479627195765181e-03; 19 | k3_rgb = 4.9856986684705107e-01; 20 | 21 | % Depth Intrinsic Parameters 22 | fx_d = 5.8262448167737955e+02; 23 | fy_d = 5.8269103270988637e+02; 24 | cx_d = 3.1304475870804731e+02; 25 | cy_d = 2.3844389626620386e+02; 26 | 27 | % RGB Distortion Parameters 28 | k1_d = -9.9897236553084481e-02; 29 | k2_d = 3.9065324602765344e-01; 30 | p1_d = 1.9290592870229277e-03; 31 | p2_d = -1.9422022475975055e-03; 32 | k3_d = -5.1031725053400578e-01; 33 | 34 | % Rotation 35 | R = -[ 9.9997798940829263e-01, 5.0518419386157446e-03, ... 36 | 4.3011152014118693e-03, -5.0359919480810989e-03, ... 37 | 9.9998051861143999e-01, -3.6879781309514218e-03, ... 38 | -4.3196624923060242e-03, 3.6662365748484798e-03, ... 39 | 9.9998394948385538e-01 ]; 40 | 41 | R = reshape(R, [3 3]); 42 | R = inv(R'); 43 | 44 | % 3D Translation 45 | t_x = 2.5031875059141302e-02; 46 | t_z = -2.9342312935846411e-04; 47 | t_y = 6.6238747008330102e-04; 48 | 49 | % Parameters for making depth absolute. 50 | depthParam1 = 351.3; 51 | depthParam2 = 1092.5; 52 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/cbf.h: -------------------------------------------------------------------------------- 1 | #ifndef CBF_H_ 2 | #define CBF_H_ 3 | 4 | #include 5 | 6 | namespace cbf { 7 | 8 | // Filters the given depth image using a Cross Bilateral Filter. 9 | // 10 | // Args: 11 | // height - height of the images. 12 | // width - width of the images. 13 | // depth - HxW row-major ordered matrix. 14 | // intensity - HxW row-major ordered matrix. 15 | // mask - HxW row-major ordered matrix. 16 | // result - HxW row-major ordered matrix. 17 | // num_scales - the number of scales at which to perform the filtering. 18 | // sigma_s - the space sigma (in pixels) 19 | // sigma_r - the range sigma (in intensity values, 0-1) 20 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity, 21 | bool* mask, uint8_t* result, unsigned num_scales, double* sigma_s, 22 | double* sigma_r); 23 | 24 | } // namespace 25 | 26 | #endif // CBF_H_ 27 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/cbf_windows.h: -------------------------------------------------------------------------------- 1 | #ifndef CBF_H_ 2 | #define CBF_H_ 3 | 4 | #define NUM_SCALES 3 5 | #define IMG_HEIGHT 427 6 | #define IMG_WIDTH 561 7 | 8 | typedef unsigned char uint8_t; 9 | 10 | namespace cbf { 11 | 12 | // Filters the given depth image using a Cross Bilateral Filter. 13 | // 14 | // Args: 15 | // depth - HxW row-major ordered matrix. 16 | // intensity - HxW row-major ordered matrix. 17 | // mask - HxW row-major ordered matrix. 18 | // result - HxW row-major ordered matrix. 19 | // sigma_s - the space sigma (in pixels) 20 | // sigma_r - the range sigma (in intensity values, 0-1) 21 | void cbf(uint8_t* depth, uint8_t* intensity, 22 | bool* mask, uint8_t* result, double* sigma_s, 23 | double* sigma_r); 24 | 25 | } // namespace 26 | 27 | #endif // CBF_H_ 28 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/compile.m: -------------------------------------------------------------------------------- 1 | libFreenectIncludeDir = '[Path to LibFreenect Include directory]'; 2 | eval(sprintf('mex -I%s get_accel_data.cpp', libFreenectIncludeDir)); 3 | 4 | % Use this for linux\mac. 5 | if ispc 6 | mex mex_cbf_windows.cpp cbf_windows.cpp 7 | else 8 | mex mex_cbf.cpp cbf.cpp 9 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/crop_image.m: -------------------------------------------------------------------------------- 1 | % Crops the given image to use only the portion where the projected depth 2 | % image exists. 3 | % 4 | % Args: 5 | % img - either a HxW image or a HxWxD image. 6 | % 7 | % Returns: 8 | % img - a cropped version of the image. 9 | function img = crop_image(img) 10 | [mask, sz] = get_projection_mask(); 11 | switch ndims(img) 12 | case 2 13 | img = reshape(img(mask), sz); 14 | case 3 15 | D = size(img, 3); 16 | img = reshape(img, [480*640 D]); 17 | img = reshape(img(mask,:), [sz D]); 18 | otherwise 19 | error('not supported'); 20 | end 21 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/demo_fill_depth_cross_bf_test.m: -------------------------------------------------------------------------------- 1 | % Demo's the in-painting function fill_depth_cross_bf.m 2 | 3 | DATASET_PATH = '~/data1/kinect/spatial_relations_data/labeled_data.mat'; 4 | 5 | load(DATASET_PATH, 'images', 'rawDepths'); 6 | 7 | %% 8 | imageInd = 1; 9 | 10 | imgRgb = images(:,:,:,imageInd); 11 | imgDepthAbs = rawDepths(:,:,imageInd); 12 | 13 | % Crop the images to include the areas where we have depth information. 14 | imgRgb = crop_image(imgRgb); 15 | imgDepthAbs = crop_image(imgDepthAbs); 16 | 17 | imgDepthFilled = fill_depth_cross_bf(imgRgb, double(imgDepthAbs)); 18 | 19 | figure(1); 20 | subplot(1,3,1); imagesc(imgRgb); 21 | subplot(1,3,2); imagesc(imgDepthAbs); 22 | subplot(1,3,3); imagesc(imgDepthFilled); -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/demo_project_depth_map.m: -------------------------------------------------------------------------------- 1 | % Demos the use of project_depth_map.m 2 | 3 | % The location of the RAW dataset. 4 | CLIPS_DIR = '[]'; 5 | 6 | % The path to the labeled dataset. 7 | LABELED_DATASET_PATH = '[]'; 8 | 9 | load(LABELED_DATASET_PATH, 'rawDepthFilenames', 'rawRgbFilenames'); 10 | 11 | %% Load a pair of frames and align them. 12 | imgRgb = imread(sprintf('%s/%s', CLIPS_DIR, rawRgbFilenames{1})); 13 | 14 | imgDepth = imread(sprintf('%s/%s', CLIPS_DIR, rawDepthFilenames{1})); 15 | imgDepth = swapbytes(imgDepth); 16 | 17 | [imgDepth2, imgRgb2] = project_depth_map(imgDepth, imgRgb); 18 | 19 | %% Now visualize the pair before and after alignment. 20 | imgDepthAbsBefore = depth_rel2depth_abs(double(imgDepth)); 21 | imgOverlayBefore = get_rgb_depth_overlay(imgRgb, imgDepthAbsBefore); 22 | 23 | imgOverlayAfter = get_rgb_depth_overlay(imgRgb2, imgDepth2); 24 | 25 | figure; 26 | subplot(1,2,1); 27 | imagesc(crop_image(imgOverlayBefore)); 28 | title('Before projection'); 29 | 30 | subplot(1,2,2); 31 | imagesc(crop_image(imgOverlayAfter)); 32 | title('After projection'); -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/demo_synched_projected_frames.m: -------------------------------------------------------------------------------- 1 | % The directory where you extracted the raw dataset. 2 | datasetDir = '[PATH TO THE NYU DEPTH V2 RAW DATASET]'; 3 | 4 | % The name of the scene to demo. 5 | sceneName = '[NAME OF A SCENE YOU WANT TO VIEW]'; 6 | 7 | % The absolute directory of the 8 | sceneDir = sprintf('%s/%s', datasetDir, sceneName); 9 | 10 | % Reads the list of frames. 11 | frameList = get_synched_frames(sceneDir); 12 | 13 | % Displays each pair of synchronized RGB and Depth frames. 14 | for ii = 1 : 15 : numel(frameList) 15 | imgRgb = imread([sceneDir '/' frameList(ii).rawRgbFilename]); 16 | imgDepthRaw = swapbytes(imread([sceneDir '/' frameList(ii).rawDepthFilename])); 17 | 18 | figure(1); 19 | % Show the RGB image. 20 | subplot(1,3,1); 21 | imagesc(imgRgb); 22 | axis off; 23 | axis equal; 24 | title('RGB'); 25 | 26 | % Show the Raw Depth image. 27 | subplot(1,3,2); 28 | imagesc(imgDepthRaw); 29 | axis off; 30 | axis equal; 31 | title('Raw Depth'); 32 | caxis([800 1100]); 33 | 34 | % Show the projected depth image. 35 | imgDepthProj = project_depth_map(imgDepthRaw, imgRgb); 36 | subplot(1,3,3); 37 | imagesc(imgDepthProj); 38 | axis off; 39 | axis equal; 40 | title('Projected Depth'); 41 | 42 | pause(0.01); 43 | end 44 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/depth_plane2depth_world.m: -------------------------------------------------------------------------------- 1 | % Projects the given depth image to world coordinates. Note that this 3D 2 | % coordinate space is defined by a horizontal plane made from the X and Z 3 | % axes and the Y axis points up. 4 | % 5 | % Args: 6 | % imgDepthAbs - 480x640 depth image whose values indicate depth in 7 | % meters. 8 | % 9 | % Returns: 10 | % points3d - Nx3 matrix of 3D world points (X,Y,Z). 11 | function points3d = depth_plane2depth_world(imgDepthAbs) 12 | [H, W] = size(imgDepthAbs); 13 | assert(H == 480); 14 | assert(W == 640); 15 | 16 | camera_params; 17 | 18 | [xx,yy] = meshgrid(1:W, 1:H); 19 | 20 | X = (xx - cx_d) .* imgDepthAbs / fx_d; 21 | Y = (yy - cy_d) .* imgDepthAbs / fy_d; 22 | Z = imgDepthAbs; 23 | 24 | points3d = [X(:) Y(:) Z(:)]; 25 | end 26 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/depth_rel2depth_abs.m: -------------------------------------------------------------------------------- 1 | % Projects the given depth image to world coordinates. Note that this 3D 2 | % coordinate space is defined by a horizontal plane made from the X and Z 3 | % axes and the Y axis points up. 4 | % 5 | % Args: 6 | % imgDepthOrig - 480x640 raw depth image from the Kinect. Note that the 7 | % bytes of the original uint16 image must have already 8 | % been swapped via swapbytes.m 9 | % 10 | % Returns: 11 | % imgDepthAbs - 480x640 depth image whose values are in meters, rather 12 | % than the internal depth values used by the kinect. 13 | function imgDepthAbs = depth_rel2depth_abs(imgDepthOrig) 14 | assert(isa(imgDepthOrig, 'double')); 15 | 16 | [H, W] = size(imgDepthOrig); 17 | assert(H == 480); 18 | assert(W == 640); 19 | 20 | camera_params; 21 | 22 | imgDepthAbs = depthParam1 ./ (depthParam2 - imgDepthOrig); 23 | 24 | imgDepthAbs(imgDepthAbs > maxDepth) = maxDepth; 25 | imgDepthAbs(imgDepthAbs < 0) = 0; 26 | end 27 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/depth_world2rgb_world.m: -------------------------------------------------------------------------------- 1 | % Performs the affine transformation between the Depth-world coordinate 2 | % frame and the RGB-world coordinate frame. 3 | % 4 | % Args: 5 | % points3d - the 3D points in the depth camera's world coordinate frame, 6 | % an Nx3 matrix where N=480*640. 7 | % 8 | % Returns: 9 | % points3d - the 3D points in the RGB camera's world coordinate frame, 10 | % an Nx3 matrix where N=480*640. 11 | function points3d = depth_world2rgb_world(points3d) 12 | camera_params; 13 | 14 | T = [t_x; t_z; t_y]; 15 | points3d = R * points3d' + T * ones(1, size(points3d,1)); 16 | points3d = points3d'; 17 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/fill_depth_colorization.m: -------------------------------------------------------------------------------- 1 | % Preprocesses the kinect depth image using a gray scale version of the 2 | % RGB image as a weighting for the smoothing. This code is a slight 3 | % adaptation of Anat Levin's colorization code: 4 | % 5 | % See: www.cs.huji.ac.il/~yweiss/Colorization/ 6 | % 7 | % Args: 8 | % imgRgb - HxWx3 matrix, the rgb image for the current frame. This must 9 | % be between 0 and 1. 10 | % imgDepth - HxW matrix, the depth image for the current frame in 11 | % absolute (meters) space. 12 | % alpha - a penalty value between 0 and 1 for the current depth values. 13 | function denoisedDepthImg = fill_depth_colorization(imgRgb, imgDepth, alpha) 14 | error(nargchk(2, 3, nargin)); 15 | if nargin < 3 16 | alpha = 1; 17 | end 18 | 19 | imgIsNoise = (imgDepth == 0 | imgDepth == 10); 20 | 21 | maxImgAbsDepth = max(imgDepth(~imgIsNoise)); 22 | imgDepth = imgDepth ./ maxImgAbsDepth; 23 | imgDepth(imgDepth > 1) = 1; 24 | 25 | assert(ndims(imgDepth) == 2); 26 | [H, W] = size(imgDepth); 27 | numPix = H * W; 28 | 29 | indsM = reshape(1:numPix, H, W); 30 | 31 | knownValMask = ~imgIsNoise; 32 | 33 | grayImg = rgb2gray(imgRgb); 34 | 35 | winRad = 1; 36 | 37 | len = 0; 38 | absImgNdx = 0; 39 | cols = zeros(numPix * (2*winRad+1)^2,1); 40 | rows = zeros(numPix * (2*winRad+1)^2,1); 41 | vals = zeros(numPix * (2*winRad+1)^2,1); 42 | gvals = zeros(1, (2*winRad+1)^2); 43 | 44 | for j = 1 : W 45 | for i = 1 : H 46 | absImgNdx = absImgNdx + 1; 47 | 48 | nWin = 0; % Counts the number of points in the current window. 49 | for ii = max(1, i-winRad) : min(i+winRad, H) 50 | for jj = max(1, j-winRad) : min(j+winRad, W) 51 | if ii == i && jj == j 52 | continue; 53 | end 54 | 55 | len = len+1; 56 | nWin = nWin+1; 57 | rows(len) = absImgNdx; 58 | cols(len) = indsM(ii,jj); 59 | gvals(nWin) = grayImg(ii, jj); 60 | end 61 | end 62 | 63 | curVal = double(grayImg(i, j)); 64 | gvals(nWin+1) = curVal; 65 | c_var = mean((gvals(1:nWin+1)-mean(gvals(1:nWin+1))).^2); 66 | 67 | csig = c_var*0.6; 68 | mgv = min((gvals(1:nWin)- curVal).^2); 69 | if csig < (-mgv/log(0.01)) 70 | csig=-mgv/log(0.01); 71 | end 72 | 73 | if csig < 0.000002 74 | csig = 0.000002; 75 | end 76 | 77 | gvals(1:nWin) = exp(-(gvals(1:nWin)-curVal).^2/csig); 78 | gvals(1:nWin) = gvals(1:nWin) / sum(gvals(1:nWin)); 79 | vals(len-nWin+1 : len) = -gvals(1:nWin); 80 | 81 | % Now the self-reference (along the diagonal). 82 | len = len + 1; 83 | rows(len) = absImgNdx; 84 | cols(len) = absImgNdx; 85 | vals(len) = 1; %sum(gvals(1:nWin)); 86 | end 87 | end 88 | 89 | vals = vals(1:len); 90 | cols = cols(1:len); 91 | rows = rows(1:len); 92 | A = sparse(rows, cols, vals, numPix, numPix); 93 | 94 | rows = 1:numel(knownValMask); 95 | cols = 1:numel(knownValMask); 96 | vals = knownValMask(:) * alpha; 97 | G = sparse(rows, cols, vals, numPix, numPix); 98 | 99 | new_vals = (A + G) \ (vals .* imgDepth(:)); 100 | new_vals = reshape(new_vals, [H, W]); 101 | 102 | denoisedDepthImg = new_vals * maxImgAbsDepth; 103 | end 104 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/fill_depth_cross_bf.m: -------------------------------------------------------------------------------- 1 | % In-paints the depth image using a cross-bilateral filter. The operation 2 | % is implemented via several filterings at various scales. The number of 3 | % scales is determined by the number of spacial and range sigmas provided. 4 | % 3 spacial/range sigmas translated into filtering at 3 scales. 5 | % 6 | % Args: 7 | % imgRgb - the RGB image, a uint8 HxWx3 matrix 8 | % imgDepthAbs - the absolute depth map, a HxW double matrix whose values 9 | % indicate depth in meters. 10 | % spaceSigmas - (optional) sigmas for the spacial gaussian term. 11 | % rangeSigmas - (optional) sigmas for the intensity gaussian term. 12 | % 13 | % Returns: 14 | % imgDepthAbs - the inpainted depth image. 15 | function imgDepthAbs = fill_depth_cross_bf(imgRgb, imgDepthAbs, ... 16 | spaceSigmas, rangeSigmas) 17 | 18 | error(nargchk(2,4,nargin)); 19 | assert(isa(imgRgb, 'uint8'), 'imgRgb must be uint8'); 20 | assert(isa(imgDepthAbs, 'double'), 'imgDepthAbs must be a double'); 21 | 22 | if nargin < 3 23 | spaceSigmas = [12 5 8]; 24 | end 25 | if nargin < 4 26 | rangeSigmas = [0.2 0.08 0.02]; 27 | end 28 | 29 | assert(numel(spaceSigmas) == numel(rangeSigmas)); 30 | assert(isa(rangeSigmas, 'double')); 31 | assert(isa(spaceSigmas, 'double')); 32 | 33 | % Create the 'noise' image and get the maximum observed depth. 34 | imgIsNoise = imgDepthAbs == 0 | imgDepthAbs == 10; 35 | maxDepthObs = max(imgDepthAbs(~imgIsNoise)); 36 | 37 | % Convert the depth image to uint8. 38 | imgDepth = imgDepthAbs ./ maxDepthObs; 39 | imgDepth(imgDepth > 1) = 1; 40 | imgDepth = uint8(imgDepth * 255); 41 | 42 | % Run the cross-bilateral filter. 43 | if ispc 44 | imgDepthAbs = mex_cbf_windows(imgDepth, rgb2gray(imgRgb), imgIsNoise, spaceSigmas(:), rangeSigmas(:)); 45 | else 46 | imgDepthAbs = mex_cbf(imgDepth, rgb2gray(imgRgb), imgIsNoise, spaceSigmas(:), rangeSigmas(:)); 47 | end 48 | 49 | % Convert back to absolute depth (meters). 50 | imgDepthAbs = im2double(imgDepthAbs) .* maxDepthObs; 51 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_accel_data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mex.h" 3 | #include 4 | #include 5 | 6 | int get_data_size(FILE *fp) { 7 | int orig = ftell(fp); 8 | fseek(fp, 0L, SEEK_END); 9 | int out = ftell(fp); 10 | fseek(fp, orig, SEEK_SET); 11 | return out; 12 | } 13 | 14 | void mexFunction(int nlhs, mxArray* plhs[], const int nrhs, const mxArray* prhs[]) { 15 | if (nrhs != 1) { 16 | mexErrMsgTxt("Number of arguments must be exactly 1."); 17 | } else if (!mxIsChar(prhs[0])) { 18 | mexErrMsgTxt("Input must be a string."); 19 | } 20 | 21 | // get the length of the filename. 22 | mwSize filename_length = (mxGetM(prhs[0]) * mxGetN(prhs[0])) + 1; 23 | char *filename = mxArrayToString(prhs[0]); 24 | 25 | FILE* fp = fopen(filename, "r"); 26 | if (fp == NULL) { 27 | mexErrMsgIdAndTxt("filename:notFound", "file %s not found", filename); 28 | } 29 | 30 | int data_size = get_data_size(fp); 31 | if (data_size != sizeof(freenect_raw_tilt_state)) { 32 | mexErrMsgIdAndTxt("filename:notAccel", 33 | "file %s's size doesnt match freenect_raw_tilt_state.", filename); 34 | } 35 | 36 | freenect_raw_tilt_state state; 37 | fread(&state, sizeof(state), 1, fp); 38 | 39 | mwSize ndim = 2; 40 | const mwSize dim_size[] = {4, 1}; 41 | plhs[0] = mxCreateNumericArray(ndim, dim_size, mxDOUBLE_CLASS, mxREAL); 42 | double* output_data = (double*) mxGetData(plhs[0]); 43 | output_data[0] = state.accelerometer_x; 44 | output_data[1] = state.accelerometer_y; 45 | output_data[2] = state.accelerometer_z; 46 | output_data[3] = state.tilt_angle; 47 | 48 | fclose(fp); 49 | } 50 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_accel_data.m: -------------------------------------------------------------------------------- 1 | % Returns the accelerometer data coming from the kinect. 2 | % 3 | % Args: 4 | % filename - the filename of the .dump file. 5 | % 6 | % Returns: 7 | % data - a 4x1 vector where the first three elements are the x, y, and z 8 | % components of the accelerometer and the fourth element is the 9 | % tilt angle. 10 | % 11 | % data = get_accel_data(filename); -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_instance_masks.m: -------------------------------------------------------------------------------- 1 | % Returns a series of masks for each object instance in the given scene. 2 | % 3 | % Args: 4 | % imgObjectLabels - HxW label map. 0 indicates a missing label. 5 | % imgInstances - HxW instance map. 6 | % 7 | % Returns: 8 | % instanceMasks - binary masks of size HxWxN where N is the number of 9 | % total objects in the room. 10 | % instanceLabels - Nx1 vector of class labels for each instance mask. 11 | function [instanceMasks, instanceLabels] = get_instance_masks(... 12 | imgObjectLabels, imgInstances) 13 | 14 | [H, W] = size(imgObjectLabels); 15 | 16 | pairs = unique([imgObjectLabels(:), uint16(imgInstances(:))], 'rows'); 17 | pairs(sum(pairs, 2) == 0, :) = []; 18 | 19 | N = size(pairs, 1); 20 | 21 | instanceMasks = false(H, W, N); 22 | instanceLabels = zeros(N, 1); 23 | for ii = 1 : N 24 | instanceMasks(:,:,ii) = imgObjectLabels == pairs(ii,1) & imgInstances == pairs(ii,2); 25 | instanceLabels(ii) = pairs(ii,1); 26 | end 27 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_projection_mask.m: -------------------------------------------------------------------------------- 1 | % Gets a mask for the projected images that is most conservative with 2 | % respect to the regions that maintain the kinect depth signal following 3 | % projection. 4 | % 5 | % Returns: 6 | % mask - HxW binary image where the projection falls. 7 | % sz - the size of the valid region. 8 | function [mask sz] = get_projection_mask() 9 | mask = false(480, 640); 10 | % original mask 11 | % mask(45:471, 41:601) = 1; 12 | % sz = [427 561]; 13 | 14 | % Gupta CVPR2013 mask 15 | mask(46:470, 41:600) = 1; 16 | sz = [425 560]; 17 | end 18 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_rgb_depth_overlay.m: -------------------------------------------------------------------------------- 1 | % Returns an overlay of RGB and Depth frames to evaluate the alignment. 2 | % 3 | % Args: 4 | % imgRgb - the RGB image, an HxWx3 matrix of type uint8. 5 | % imgDepthAbs - the absolute-depth image, an HxW matrix of type double 6 | % whose values indicate depth in meters. 7 | % 8 | % Returns: 9 | % imgOverlay - an image visualizing RGB and Depth alignment. 10 | function imgOverlay = get_rgb_depth_overlay(imgRgb, imgDepthAbs) 11 | % Check dims. 12 | assert(ndims(imgRgb) == 3); 13 | assert(ndims(imgDepthAbs) == 2); 14 | 15 | % Check sizes. 16 | [H, W, D] = size(imgRgb); 17 | assert(D == 3); 18 | assert(all(size(imgDepthAbs) == [H, W])); 19 | 20 | % Check types. 21 | assert(isa(imgRgb, 'uint8')); 22 | assert(isa(imgDepthAbs, 'double')); 23 | 24 | imgDepth = imgDepthAbs - min(imgDepthAbs(:)); 25 | imgDepth = imgDepth ./ max(imgDepth(:)); 26 | imgDepth = uint8(imgDepth * 255); 27 | 28 | imgOverlay = reshape(imgRgb, [H*W 3]); 29 | imgOverlay(:,3) = imgOverlay(:,2); 30 | imgOverlay(:,2) = imgDepth(:); 31 | 32 | imgOverlay = reshape(imgOverlay, [H, W, 3]); 33 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_scene_type_from_scene.m: -------------------------------------------------------------------------------- 1 | % Returns the scene type (living room, starbucks, subway, etc) from the 2 | % scene (living_room_0002k, office_0013, etc). 3 | % 4 | % Args: 5 | % scene - the scene name: [sceneType]_[sceneNumber] 6 | % 7 | % Returns: 8 | % sceneType - the name of the scene type. 9 | function sceneType = get_scene_type_from_scene(scene) 10 | ind = regexp(scene, '\d+\w?'); 11 | sceneType = scene(1:ind-2); 12 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_synched_frames.m: -------------------------------------------------------------------------------- 1 | % Returns a struct with synchronized RGB and depth frames, as well as the 2 | % accelerometer data. Note that this script considers the depth frames as 3 | % 'primary' in the sense that it keeps every depth frame and matches the 4 | % nearest RGB frame. 5 | % 6 | % Args: 7 | % sceneDir - the directory containing the raw kinect dump for a 8 | % particular scene. 9 | % 10 | % Returns: 11 | % frameList - a struct containing a list of frames. 12 | function frameList = get_synched_frames(sceneDir) 13 | 14 | rgbImages = struct(); 15 | frameList = struct(); 16 | accelRecs = struct(); 17 | 18 | % Faster than matlab's Dir function for big directories and slow 19 | % distributed file systems... 20 | files = regexp(ls(sceneDir), '(\s+|\n)', 'split'); 21 | files(end) = []; 22 | 23 | files = sort(files); 24 | 25 | % Count the number of files of each type found in the scene. 26 | numDepth = 0; 27 | numRgb = 0; 28 | numAccel = 0; 29 | 30 | for ii = 1 : numel(files) 31 | if ~isempty(regexp(files{ii}, '^d-*', 'once')) 32 | numDepth = numDepth + 1; 33 | frameList(numDepth).rawDepthFilename = files{ii}; 34 | elseif ~isempty(regexp(files{ii}, '^r-*', 'once')) 35 | numRgb = numRgb + 1; 36 | rgbImages(numRgb).name = files{ii}; 37 | elseif ~isempty(regexp(files{ii}, '^a-*', 'once')) 38 | numAccel = numAccel + 1; 39 | accelRecs(numAccel).name = files{ii}; 40 | end 41 | end 42 | 43 | fprintf('Found %d depth, %d rgb images, and %d accel dumps.\n', ... 44 | numDepth, numRgb, numAccel); 45 | 46 | % Now, go through both images sets, grabbing the rgb and accelerometer 47 | % data that is nearest to the current timestamp. 48 | 49 | jj = 1; % Current RGB pointer. 50 | kk = 1; % Current Accel pointer. 51 | for ii = 1 : numDepth 52 | fprintf('Matching depth image %d/%d\n', ii, numDepth); 53 | 54 | % Parse the timestamp. 55 | timePartsDepth = regexp(frameList(ii).rawDepthFilename(3:end), '-', 'split'); 56 | timePartsRgb = regexp(rgbImages(jj).name(3:end), '-', 'split'); 57 | timePartsAccel = regexp(accelRecs(kk).name(3:end), '-', 'split'); 58 | 59 | tDepth = str2double(timePartsDepth{1}); 60 | tRgb = str2double(timePartsRgb{1}); 61 | tAccel = str2double(timePartsAccel{1}); 62 | 63 | tDiff = abs(tDepth-tRgb); 64 | % Advance the curInd until the difference in times gets worse. 65 | while jj < numRgb 66 | timePartsRgb = regexp(rgbImages(jj+1).name(3:end), '-', 'split'); 67 | tRgb = str2double(timePartsRgb{1}); 68 | 69 | tmpDiff = abs(tDepth-tRgb); 70 | if tmpDiff > tDiff 71 | break; 72 | end 73 | tDiff = tmpDiff; 74 | 75 | % Otherwise, its better! and we should update jj 76 | jj = jj + 1; 77 | end 78 | 79 | %%%%%%% ACCEL %%%%%% 80 | tDiff = abs(tDepth-tAccel); 81 | % Advance the curInd until the difference in times gets worse. 82 | while kk < numAccel 83 | timePartsAccel = regexp(accelRecs(kk+1).name(3:end), '-', 'split'); 84 | tAccel = str2double(timePartsAccel{1}); 85 | 86 | tmpDiff = abs(tDepth-tAccel); 87 | if tmpDiff > tDiff 88 | break; 89 | end 90 | tDiff = tmpDiff; 91 | 92 | % Otherwise, its better! and we should update kk 93 | kk = kk + 1; 94 | end 95 | 96 | fprintf('Matched depth %d to rgb %d and accel %d.\n', ii, jj, kk); 97 | 98 | % Now save the current RGB filename and ACCEL filename. 99 | frameList(ii).rawRgbFilename = rgbImages(jj).name; 100 | frameList(ii).accelFilename = accelRecs(kk).name; 101 | end 102 | fprintf('\n'); 103 | end 104 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/get_timestamp_from_filename.m: -------------------------------------------------------------------------------- 1 | % Extracts the timestamp from the filename. 2 | % 3 | % Example usage: 4 | % filename = [CLIPS_DIR '/r-1339729868.166858-2965701968.ppm'] 5 | % matlabTime = get_timestamp_from_filename(filename); 6 | % disp(datestr(matlabTime, 'mm/dd/yy HH:MM:SS.FFF')); 7 | % 8 | % 9 | % 10 | % Args: 11 | % filename - the path to the raw kinect output file. 12 | % 13 | % Returns: 14 | % matlabTime - the matlab time which can be passed to datestr 15 | function matlabTime = get_timestamp_from_filename(filename) 16 | parts = regexp(filename(3:end), '-', 'split'); 17 | millis = str2double(parts{2}); 18 | 19 | % Time since the epoch, correcting for offset between UTC and EST 20 | % (technically a good chunk of the film was shot in CST, but se la vi). 21 | unixEpoch = datenum(1969,12,31,20,0,0); 22 | matlabTime = millis ./ 86400 + unixEpoch; 23 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/mex_cbf.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "cbf.h" 3 | #include "mex.h" 4 | 5 | #define ARG_DEPTH 0 6 | #define ARG_INTENSITY 1 7 | #define ARG_NOISE 2 8 | #define ARG_SIG_S 3 9 | #define ARG_SIG_R 4 10 | 11 | #define LOG_LEVEL 1 12 | 13 | void validate_types(const mxArray* prhs[]) { 14 | if (mxGetClassID(prhs[ARG_DEPTH]) != mxUINT8_CLASS) { 15 | mexErrMsgTxt("Depth image must be of type uint8."); 16 | } 17 | 18 | if (mxGetClassID(prhs[ARG_INTENSITY]) != mxUINT8_CLASS) { 19 | mexErrMsgTxt("Intensity image must be of type uint8."); 20 | } 21 | 22 | if (mxGetClassID(prhs[ARG_NOISE]) != mxLOGICAL_CLASS) { 23 | mexErrMsgTxt("Noise image must be logical."); 24 | } 25 | 26 | if (mxGetClassID(prhs[ARG_SIG_S]) != mxDOUBLE_CLASS) { 27 | mexErrMsgTxt("SigmaS image must be double."); 28 | } 29 | 30 | if (mxGetClassID(prhs[ARG_SIG_R]) != mxDOUBLE_CLASS) { 31 | mexErrMsgTxt("SigmaR image must be double."); 32 | } 33 | } 34 | 35 | // Checks that all of the images are of the same size. 36 | void validate_sizes(const mxArray* prhs[]) { 37 | if (mxGetNumberOfDimensions(prhs[ARG_DEPTH]) != 2) { 38 | mexErrMsgTxt("Depth image must be HxW"); 39 | } 40 | 41 | if (mxGetNumberOfDimensions(prhs[ARG_INTENSITY]) != 2) { 42 | mexErrMsgTxt("Intensity image must be HxW"); 43 | } 44 | 45 | if (mxGetNumberOfDimensions(prhs[ARG_NOISE]) != 2) { 46 | mexErrMsgTxt("Noise image must be HxW"); 47 | } 48 | 49 | if (mxGetNumberOfDimensions(prhs[ARG_SIG_S]) != 2 || mxGetN(prhs[ARG_SIG_S]) != 1) { 50 | mexErrMsgTxt("SigamS must be Hx1"); 51 | } 52 | 53 | if (mxGetNumberOfDimensions(prhs[ARG_SIG_R]) != 2 || mxGetN(prhs[ARG_SIG_R]) != 1) { 54 | mexErrMsgTxt("SigamR must be Hx1"); 55 | } 56 | 57 | int M = mxGetM(prhs[ARG_DEPTH]); 58 | int N = mxGetN(prhs[ARG_DEPTH]); 59 | 60 | if (mxGetM(prhs[ARG_INTENSITY]) != M || 61 | mxGetN(prhs[ARG_INTENSITY]) != N) { 62 | mexErrMsgTxt("Intensity image is of a different size than the depth image"); 63 | } 64 | 65 | if (mxGetM(prhs[ARG_NOISE]) != M || 66 | mxGetN(prhs[ARG_NOISE]) != N) { 67 | mexErrMsgTxt("Noise image is of a different size than the depth image"); 68 | } 69 | 70 | int num_scales = mxGetM(prhs[ARG_SIG_S]); 71 | if (mxGetM(prhs[ARG_SIG_R]) != num_scales) { 72 | mexErrMsgTxt("SigmaS and SigmaR must be the same size (Sx1)"); 73 | } 74 | } 75 | 76 | // Args: 77 | // depth - the HxW depth image (read in column major order). 78 | // intensity - the HxW intensity image (read in column major order). 79 | // noise_mask - the HxW logical noise mask. Values of 1 indicate that the 80 | // corresponding depth value is missing or noisy. 81 | // sigma_s - Sx1 vector of sigmas. 82 | // sigma_r - Sx1 vector of range sigmas. 83 | void mexFunction(int nlhs, mxArray* plhs[], 84 | const int nrhs, const mxArray* prhs[]) { 85 | 86 | if (nrhs != 5) { 87 | mexErrMsgTxt("Usage: mex_cbf(depth, intensity, noise, sigmaS, sigmaR);"); 88 | } 89 | 90 | validate_types(prhs); 91 | validate_sizes(prhs); 92 | 93 | int H = mxGetM(prhs[ARG_DEPTH]); 94 | int W = mxGetN(prhs[ARG_INTENSITY]); 95 | int num_scales = mxGetM(prhs[ARG_SIG_S]); 96 | 97 | if (LOG_LEVEL >= 1) { 98 | mexPrintf("Found images of size %dx%d, filtering at %d scales.\n", 99 | H, W, num_scales); 100 | } 101 | 102 | uint8_t* depth = (uint8_t*) mxGetData(prhs[ARG_DEPTH]); 103 | uint8_t* intensity = (uint8_t*) mxGetData(prhs[ARG_INTENSITY]); 104 | bool* noise_mask = (bool*) mxGetData(prhs[ARG_NOISE]); 105 | double* sigma_s = (double*) mxGetData(prhs[ARG_SIG_S]); 106 | double* sigma_r = (double*) mxGetData(prhs[ARG_SIG_R]); 107 | 108 | mwSize ndim = 2; 109 | mwSize dims[] = {H, W}; 110 | plhs[0] = mxCreateNumericArray(ndim, &dims[0], mxUINT8_CLASS, mxREAL); 111 | uint8_t* result = (uint8_t*) mxGetData(plhs[0]); 112 | 113 | cbf::cbf(H, W, depth, intensity, noise_mask, result, num_scales, sigma_s, sigma_r); 114 | } 115 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/mex_cbf.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/ext/toolbox_nyu_depth_v2/mex_cbf.mexa64 -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/mex_cbf_windows.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "cbf_windows.h" 3 | #include "mex.h" 4 | 5 | #define ARG_DEPTH 0 6 | #define ARG_INTENSITY 1 7 | #define ARG_NOISE 2 8 | #define ARG_SIG_S 3 9 | #define ARG_SIG_R 4 10 | 11 | #define LOG_LEVEL 1 12 | 13 | void validate_types(const mxArray* prhs[]) { 14 | if (mxGetClassID(prhs[ARG_DEPTH]) != mxUINT8_CLASS) { 15 | mexErrMsgTxt("Depth image must be of type uint8."); 16 | } 17 | 18 | if (mxGetClassID(prhs[ARG_INTENSITY]) != mxUINT8_CLASS) { 19 | mexErrMsgTxt("Intensity image must be of type uint8."); 20 | } 21 | 22 | if (mxGetClassID(prhs[ARG_NOISE]) != mxLOGICAL_CLASS) { 23 | mexErrMsgTxt("Noise image must be logical."); 24 | } 25 | 26 | if (mxGetClassID(prhs[ARG_SIG_S]) != mxDOUBLE_CLASS) { 27 | mexErrMsgTxt("SigmaS image must be double."); 28 | } 29 | 30 | if (mxGetClassID(prhs[ARG_SIG_R]) != mxDOUBLE_CLASS) { 31 | mexErrMsgTxt("SigmaR image must be double."); 32 | } 33 | } 34 | 35 | // Checks that all of the images are of the same size. 36 | void validate_sizes(const mxArray* prhs[]) { 37 | if (mxGetNumberOfDimensions(prhs[ARG_DEPTH]) != 2) { 38 | mexErrMsgTxt("Depth image must be HxW"); 39 | } 40 | 41 | if (mxGetNumberOfDimensions(prhs[ARG_INTENSITY]) != 2) { 42 | mexErrMsgTxt("Intensity image must be HxW"); 43 | } 44 | 45 | if (mxGetNumberOfDimensions(prhs[ARG_NOISE]) != 2) { 46 | mexErrMsgTxt("Noise image must be HxW"); 47 | } 48 | 49 | if (mxGetNumberOfDimensions(prhs[ARG_SIG_S]) != 2 || mxGetN(prhs[ARG_SIG_S]) != 1) { 50 | mexErrMsgTxt("SigamS must be Hx1"); 51 | } else if (mxGetNumberOfElements(prhs[ARG_SIG_S]) != NUM_SCALES) { 52 | mexErrMsgTxt("SigmaS's length is fixed"); 53 | } 54 | 55 | if (mxGetNumberOfDimensions(prhs[ARG_SIG_R]) != 2 || mxGetN(prhs[ARG_SIG_R]) != 1) { 56 | mexErrMsgTxt("SigamR must be Hx1"); 57 | } else if (mxGetNumberOfElements(prhs[ARG_SIG_R]) != NUM_SCALES) { 58 | mexErrMsgTxt("SigmaR's length is fixed"); 59 | } 60 | 61 | if (mxGetM(prhs[ARG_DEPTH]) != IMG_HEIGHT || 62 | mxGetN(prhs[ARG_DEPTH]) != IMG_WIDTH) { 63 | mexErrMsgTxt("Depth image must be 480x640"); 64 | } 65 | 66 | if (mxGetM(prhs[ARG_INTENSITY]) != IMG_HEIGHT || 67 | mxGetN(prhs[ARG_INTENSITY]) != IMG_WIDTH) { 68 | mexErrMsgTxt("Intensity image must be 480x640"); 69 | } 70 | 71 | if (mxGetM(prhs[ARG_NOISE]) != IMG_HEIGHT || 72 | mxGetN(prhs[ARG_NOISE]) != IMG_WIDTH) { 73 | mexErrMsgTxt("Noise image must be 480x640"); 74 | } 75 | 76 | int num_scales = mxGetM(prhs[ARG_SIG_S]); 77 | if (mxGetM(prhs[ARG_SIG_R]) != num_scales) { 78 | mexErrMsgTxt("SigmaS and SigmaR must be the same size (Sx1)"); 79 | } 80 | } 81 | 82 | // Args: 83 | // depth - the HxW depth image (read in column major order). 84 | // intensity - the HxW intensity image (read in column major order). 85 | // noise_mask - the HxW logical noise mask. Values of 1 indicate that the 86 | // corresponding depth value is missing or noisy. 87 | // sigma_s - Sx1 vector of sigmas. 88 | // sigma_r - Sx1 vector of range sigmas. 89 | void mexFunction(int nlhs, mxArray* plhs[], 90 | const int nrhs, const mxArray* prhs[]) { 91 | 92 | if (nrhs != 5) { 93 | mexErrMsgTxt("Usage: mex_cbf(depth, intensity, noise, sigmaS, sigmaR);"); 94 | } 95 | 96 | validate_types(prhs); 97 | validate_sizes(prhs); 98 | 99 | uint8_t* depth = (uint8_t*) mxGetData(prhs[ARG_DEPTH]); 100 | uint8_t* intensity = (uint8_t*) mxGetData(prhs[ARG_INTENSITY]); 101 | bool* noise_mask = (bool*) mxGetData(prhs[ARG_NOISE]); 102 | double* sigma_s = (double*) mxGetData(prhs[ARG_SIG_S]); 103 | double* sigma_r = (double*) mxGetData(prhs[ARG_SIG_R]); 104 | 105 | mwSize ndim = 2; 106 | mwSize dims[] = {IMG_HEIGHT, IMG_WIDTH}; 107 | plhs[0] = mxCreateNumericArray(ndim, &dims[0], mxUINT8_CLASS, mxREAL); 108 | uint8_t* result = (uint8_t*) mxGetData(plhs[0]); 109 | 110 | cbf::cbf(depth, intensity, noise_mask, result, sigma_s, sigma_r); 111 | } 112 | -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/project_depth_map.m: -------------------------------------------------------------------------------- 1 | % Projects the depth values onto the RGB image. 2 | % 3 | % Usage: 4 | % imgDepth = imread('raw_clips/bedroom_0001/d-12942.665769-31455701.pgm'); 5 | % imgDepth = swapbytes(imgDepth); 6 | % rgb = imread('raw_clips/bedroom_0001/r-12941.6324869-2938947.ppm'); 7 | % 8 | % [depthOut, rgbOut] = project_depth_map(imgDepth, rgb); 9 | % 10 | % 11 | % 12 | % Args: 13 | % imgDepth - the original uint16 output from the Kinect. The bytes MUST 14 | % have already been swapped via swapbytes.m 15 | % rgb - the original uint8 RGB image. 16 | % 17 | % Returns: 18 | % depthOut - the depth image output in meters (double). 19 | % rgb_out - the distorted RGB. 20 | function [depthOut, rgbUndistorted] = project_depth_map(imgDepth, rgb) 21 | camera_params; 22 | [H, W] = size(imgDepth); 23 | 24 | kc_d = [k1_d, k2_d, p1_d, p2_d, k3_d]; 25 | fc_d = [fx_d,fy_d]; 26 | cc_d = [cx_d,cy_d]; 27 | 28 | 29 | fc_rgb = [fx_rgb,fy_rgb]; 30 | cc_rgb = [cx_rgb,cy_rgb]; 31 | kc_rgb = [k1_rgb,k2_rgb,p1_rgb,p2_rgb,k3_rgb]; 32 | 33 | rgbUndistorted = zeros(size(rgb)); 34 | 35 | for ii = 1 : size(rgb,3) 36 | rgbUndistorted(:,:,ii) = undistort(double(rgb(:,:,ii)), ... 37 | fc_rgb, cc_rgb, kc_rgb, 0); 38 | end 39 | 40 | rgbUndistorted = uint8(rgbUndistorted); 41 | 42 | noiseMask = 255 * double(imgDepth == max(imgDepth(:))); 43 | 44 | % Undistort the noise mask. 45 | noiseMask = undistort(noiseMask, fc_d, cc_d, kc_d, 0); 46 | noiseMask = noiseMask > 0; 47 | 48 | imgDepth = undistort_depth(double(imgDepth),fc_d,cc_d,kc_d,0, noiseMask); 49 | 50 | % Fix issues introduced by distortion. 51 | imgDepth(imgDepth < 600) = 2047; 52 | imgDepth(noiseMask) = 2047; 53 | 54 | depth2 = depth_rel2depth_abs(imgDepth); 55 | points3d = depth_plane2depth_world(depth2); 56 | points3d = depth_world2rgb_world(points3d); 57 | 58 | [xProj, yProj] = rgb_world2rgb_plane(points3d); 59 | 60 | % Finally, project back onto the RGB plane. 61 | xProj = round(xProj); 62 | yProj = round(yProj); 63 | 64 | goodInds = find(xProj(:) > 0 & xProj(:) < W & ... 65 | yProj(:) > 0 & yProj(:) < H); 66 | 67 | depthOut = zeros(size(imgDepth)); 68 | [depthSorted, order] = sort(-depth2(goodInds)); 69 | depthSorted = -depthSorted; 70 | 71 | % Z-buffer projection 72 | for ii = 1:length(order) 73 | depthOut(yProj(goodInds(order(ii))), xProj(goodInds(order(ii)))) = ... 74 | depthSorted(ii); 75 | end 76 | 77 | % Fix weird values... 78 | depthOut(depthOut > maxDepth) = maxDepth; 79 | depthOut(depthOut < 0) = 0; 80 | depthOut(isnan(depthOut)) = 0; 81 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/rgb_plane2rgb_world.m: -------------------------------------------------------------------------------- 1 | function points3d = rgb_plane2rgb_world(imgDepth) 2 | camera_params; 3 | [H, W] = size(imgDepth); 4 | 5 | % Make the original consistent with the camera location: 6 | [xx, yy] = meshgrid(1:W, 1:H); 7 | 8 | x3 = (xx - cx_rgb) .* imgDepth / fx_rgb; 9 | y3 = (yy - cy_rgb) .* imgDepth / fy_rgb; 10 | z3 = imgDepth; 11 | 12 | points3d = [x3(:) -y3(:) z3(:)]; 13 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/rgb_world2rgb_plane.m: -------------------------------------------------------------------------------- 1 | % Performs the camera projection from the RGB-world coordinate frame onto 2 | % the RGB plane. 3 | % 4 | % Args: 5 | % points3d - Nx3 matrix of (X,Y,Z) points in the RGB-world coordinate 6 | % frame. 7 | % 8 | % Returns: 9 | % X_plane - the X coordinates in the RGB plane. 10 | % Y_plane - the Y coordiantes in the RGB plane. 11 | function [X_plane, Y_plane] = rgb_world2rgb_plane(points3d) 12 | camera_params; 13 | 14 | X_world = points3d(:,1); 15 | Y_world = points3d(:,2); 16 | Z_world = points3d(:,3); 17 | 18 | X_plane = (X_world .* fx_rgb ./ Z_world) + cx_rgb; 19 | Y_plane = (Y_world .* fy_rgb ./ Z_world) + cy_rgb; 20 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/undistort.m: -------------------------------------------------------------------------------- 1 | % Undistorts the given image using a set of intrinsic parameters. 2 | % 3 | % Note that this code was taken from Jean-Yves Bouguet's excellent Camera 4 | % Calibration Toolbox for matlab which can be found in its entirety here: 5 | % http://www.vision.caltech.edu/bouguetj/calib_doc/ 6 | % 7 | % Args: 8 | % I - the distorted image, an HxW double matrix between 0 and 255. 9 | % fc - 2x1 vector, the focal length parameters. 10 | % cc - 2x1 vector, the camera center parameters. 11 | % kc - 5x1 vector, the distortion parameters. 12 | % alpha_c - the skew coefficient, a scalar. 13 | % 14 | % Returns: 15 | % I2 - the undistorted image, an HxW double matrix between 0 and 255. 16 | function I2 = undistort(I, fc, cc, kc, alpha_c) 17 | KK_new = [fc(1) alpha_c*fc(1) cc(1);0 fc(2) cc(2) ; 0 0 1]; 18 | [I2] = rect(I,eye(3),fc,cc,kc,KK_new); 19 | end 20 | 21 | function [Irec] = rect(I,R,f,c,k,alpha,KK_new) 22 | 23 | 24 | if nargin < 5, 25 | k = [0;0;0;0;0]; 26 | if nargin < 4, 27 | c = [0;0]; 28 | if nargin < 3, 29 | f = [1;1]; 30 | if nargin < 2, 31 | R = eye(3); 32 | if nargin < 1, 33 | error('ERROR: Need an image to rectify'); 34 | end; 35 | end; 36 | end; 37 | end; 38 | end; 39 | 40 | 41 | if nargin < 7, 42 | if nargin < 6, 43 | KK_new = [f(1) 0 c(1);0 f(2) c(2);0 0 1]; 44 | else 45 | KK_new = alpha; % the 6th argument is actually KK_new 46 | end; 47 | alpha = 0; 48 | end; 49 | 50 | 51 | 52 | % Note: R is the motion of the points in space 53 | % So: X2 = R*X where X: coord in the old reference frame, X2: coord in the new ref frame. 54 | 55 | 56 | if ~exist('KK_new'), 57 | KK_new = [f(1) alpha*f(1) c(1);0 f(2) c(2);0 0 1]; 58 | end; 59 | 60 | 61 | [nr,nc] = size(I); 62 | 63 | Irec = 255*ones(nr,nc); 64 | 65 | [mx,my] = meshgrid(1:nc, 1:nr); 66 | px = reshape(mx',nc*nr,1); 67 | py = reshape(my',nc*nr,1); 68 | 69 | rays = inv(KK_new)*[(px - 1)';(py - 1)';ones(1,length(px))]; 70 | 71 | 72 | % Rotation: (or affine transformation): 73 | 74 | rays2 = R'*rays; 75 | 76 | x = [rays2(1,:)./rays2(3,:);rays2(2,:)./rays2(3,:)]; 77 | 78 | 79 | % Add distortion: 80 | xd = apply_distortion(x,k); 81 | 82 | 83 | % Reconvert in pixels: 84 | 85 | px2 = f(1)*(xd(1,:)+alpha*xd(2,:))+c(1); 86 | py2 = f(2)*xd(2,:)+c(2); 87 | 88 | 89 | % Interpolate between the closest pixels: 90 | 91 | px_0 = floor(px2); 92 | 93 | py_0 = floor(py2); 94 | py_1 = py_0 + 1; 95 | 96 | good_points = find((px_0 >= 0) & (px_0 <= (nc-2)) & (py_0 >= 0) & (py_0 <= (nr-2))); 97 | 98 | px2 = px2(good_points); 99 | py2 = py2(good_points); 100 | px_0 = px_0(good_points); 101 | py_0 = py_0(good_points); 102 | 103 | alpha_x = px2 - px_0; 104 | alpha_y = py2 - py_0; 105 | 106 | a1 = (1 - alpha_y).*(1 - alpha_x); 107 | a2 = (1 - alpha_y).*alpha_x; 108 | a3 = alpha_y .* (1 - alpha_x); 109 | a4 = alpha_y .* alpha_x; 110 | 111 | ind_lu = px_0 * nr + py_0 + 1; 112 | ind_ru = (px_0 + 1) * nr + py_0 + 1; 113 | ind_ld = px_0 * nr + (py_0 + 1) + 1; 114 | ind_rd = (px_0 + 1) * nr + (py_0 + 1) + 1; 115 | 116 | ind_new = (px(good_points)-1)*nr + py(good_points); 117 | 118 | 119 | 120 | Irec(ind_new) = a1 .* I(ind_lu) + a2 .* I(ind_ru) + a3 .* I(ind_ld) + a4 .* I(ind_rd); 121 | end -------------------------------------------------------------------------------- /ext/toolbox_nyu_depth_v2/undistort_depth.m: -------------------------------------------------------------------------------- 1 | % Undistorts the given image using a set of intrinsic parameters. 2 | % 3 | % Note that this code was taken from Jean-Yves Bouguet's excellent Camera 4 | % Calibration Toolbox for matlab which can be found in its entirety here: 5 | % http://www.vision.caltech.edu/bouguetj/calib_doc/ 6 | % 7 | % Args: 8 | % I - the distorted image, an HxW double matrix between 0 and 255. 9 | % fc - 2x1 vector, the focal length parameters. 10 | % cc - 2x1 vector, the camera center parameters. 11 | % kc - 5x1 vector, the distortion parameters. 12 | % alpha_c - the skew coefficient, a scalar. 13 | % 14 | % Returns: 15 | % I2 - the undistorted image, an HxW double matrix between 0 and 255. 16 | function I2 = undistort_depth(I, fc, cc, kc, alpha_c, noiseMask) 17 | KK_new = [fc(1) alpha_c*fc(1) cc(1);0 fc(2) cc(2) ; 0 0 1]; 18 | [I2] = rect(I,eye(3),fc,cc,kc, alpha_c, KK_new, noiseMask); 19 | end 20 | 21 | function [Irec] = rect(I, R, f, c, k, alpha, KK_new, noiseMask) 22 | 23 | 24 | if nargin < 5, 25 | k = [0;0;0;0;0]; 26 | if nargin < 4, 27 | c = [0;0]; 28 | if nargin < 3, 29 | f = [1;1]; 30 | if nargin < 2, 31 | R = eye(3); 32 | if nargin < 1, 33 | error('ERROR: Need an image to rectify'); 34 | end; 35 | end; 36 | end; 37 | end; 38 | end; 39 | 40 | 41 | if nargin < 7, 42 | if nargin < 6, 43 | KK_new = [f(1) 0 c(1);0 f(2) c(2);0 0 1]; 44 | else 45 | KK_new = alpha; % the 6th argument is actually KK_new 46 | end; 47 | alpha = 0; 48 | end; 49 | 50 | 51 | 52 | % Note: R is the motion of the points in space 53 | % So: X2 = R*X where X: coord in the old reference frame, X2: coord in the new ref frame. 54 | 55 | 56 | if ~exist('KK_new'), 57 | KK_new = [f(1) alpha*f(1) c(1);0 f(2) c(2);0 0 1]; 58 | end; 59 | 60 | 61 | [nr,nc] = size(I); 62 | 63 | Irec = 255*ones(nr,nc); 64 | 65 | [mx,my] = meshgrid(1:nc, 1:nr); 66 | px = reshape(mx',nc*nr,1); 67 | py = reshape(my',nc*nr,1); 68 | 69 | rays = inv(KK_new)*[(px - 1)';(py - 1)';ones(1,length(px))]; 70 | 71 | 72 | % Rotation: (or affine transformation): 73 | 74 | rays2 = R'*rays; 75 | 76 | x = [rays2(1,:)./rays2(3,:);rays2(2,:)./rays2(3,:)]; 77 | 78 | 79 | % Add distortion: 80 | xd = apply_distortion(x,k); 81 | 82 | 83 | % Reconvert in pixels: 84 | 85 | px2 = f(1)*(xd(1,:)+alpha*xd(2,:))+c(1); 86 | py2 = f(2)*xd(2,:)+c(2); 87 | 88 | 89 | % Interpolate between the closest pixels: 90 | 91 | px_0 = floor(px2); 92 | 93 | py_0 = floor(py2); 94 | 95 | good_points = find((px_0 >= 0) & (px_0 <= (nc-2)) & (py_0 >= 0) & (py_0 <= (nr-2))); 96 | 97 | px2 = px2(good_points); 98 | py2 = py2(good_points); 99 | px_0 = px_0(good_points); 100 | py_0 = py_0(good_points); 101 | 102 | alpha_x = px2 - px_0; 103 | alpha_y = py2 - py_0; 104 | 105 | a1 = (1 - alpha_y).*(1 - alpha_x); 106 | a2 = (1 - alpha_y).*alpha_x; 107 | a3 = alpha_y .* (1 - alpha_x); 108 | a4 = alpha_y .* alpha_x; 109 | 110 | ind_lu = px_0 * nr + py_0 + 1; 111 | ind_ru = (px_0 + 1) * nr + py_0 + 1; 112 | ind_ld = px_0 * nr + (py_0 + 1) + 1; 113 | ind_rd = (px_0 + 1) * nr + (py_0 + 1) + 1; 114 | 115 | ind_new = (px(good_points)-1)*nr + py(good_points); 116 | 117 | % Ignore coeffs when they are indexing into noise. 118 | a1 = a1 .* ~noiseMask(ind_lu); 119 | a2 = a2 .* ~noiseMask(ind_ru); 120 | a3 = a3 .* ~noiseMask(ind_ld); 121 | a4 = a4 .* ~noiseMask(ind_rd); 122 | 123 | s = a1 + a2 + a3 + a4; 124 | 125 | badPix = s == 0; 126 | 127 | a1 = a1 ./ s; 128 | a2 = a2 ./ s; 129 | a3 = a3 ./ s; 130 | a4 = a4 ./ s; 131 | 132 | a1(badPix) = 0; 133 | a2(badPix) = 0; 134 | a3(badPix) = 0; 135 | a4(badPix) = 0; 136 | 137 | Irec = zeros(nr, nc); 138 | Irec(ind_new) = a1 .* I(ind_lu) + a2 .* I(ind_ru) + a3 .* I(ind_ld) + a4 .* I(ind_rd); 139 | end -------------------------------------------------------------------------------- /src/eval/GetMasksGT.m: -------------------------------------------------------------------------------- 1 | function [objMasks, objLabels] = GetMasksGT(m_dataDir, img_id) 2 | % get ground truth object masks and corresponding labels 3 | 4 | load (fullfile(m_dataDir, 'label_crop', [num2str(img_id) '.mat'])); 5 | load (fullfile(m_dataDir, 'instances_crop', [num2str(img_id) '.mat'])); 6 | [objMasks, objLabels] = get_instance_masks(label, instance); 7 | 8 | 9 | end 10 | 11 | -------------------------------------------------------------------------------- /src/eval/eval_BBs.m: -------------------------------------------------------------------------------- 1 | % evaluate the quality of proposed bounding boxes 2 | 3 | % addpath('./m_common/'); 4 | % addpath('./Evaluation/'); 5 | 6 | close all; 7 | 8 | % load split data 9 | var = load('data/nyuv2/nyusplits.mat'); 10 | set_type = 'test'; 11 | if strcmp(set_type, 'test') 12 | imlist = var.tst - 5000; 13 | else 14 | imlist = var.trainval - 5000; 15 | end 16 | 17 | % result path 18 | res_path ='result/nyuv2/Eval/BB'; 19 | if ~exist(res_path, 'dir') 20 | mkdir(res_path); 21 | end 22 | data_path = 'data/nyuv2'; 23 | 24 | 25 | % compute Jmat for each image 26 | BB_path = 'result/nyuv2/BB'; 27 | parfor i = 1 : numel(imlist) 28 | pid = imlist(i); 29 | fprintf('**************processing %d ***************\n', pid); 30 | if exist(fullfile(res_path, [num2str(pid), '.mat']), 'file') 31 | fprintf('skip %d\n', pid); 32 | continue; 33 | end 34 | 35 | var = load(fullfile(BB_path, [num2str(pid), '.mat'])); 36 | BB = var.BB; 37 | [GtMasks, ~] = GetMasksGT(data_path, pid); 38 | GtBB = m_mask2bbox(GtMasks); 39 | Jmat = m_BB_VS_GT(BB, GtBB); 40 | ParSave(fullfile(res_path, [num2str(pid), '.mat']), Jmat); 41 | end 42 | 43 | %% extract stat info from Jmats 44 | ncandSet = [10:5:100,125:25:1000,1500:500:6000,10000]; % use the first ncand proposals 45 | num_ncand = numel(ncandSet); 46 | 47 | % get the total number of objects in tst dataset 48 | num_objects = 0; 49 | num_images = numel(imlist); 50 | for i = 1 : numel(imlist) 51 | var = load(fullfile(res_path, [num2str(imlist(i)), '.mat'])); 52 | Jmat = var.Jmat; 53 | num_objects = num_objects + size(Jmat,1); 54 | end 55 | 56 | % compute best Jaccard for each objects 57 | Jmax = zeros(num_objects, num_ncand); 58 | n_mask_sel = zeros(num_images, num_ncand); 59 | for i = 1 : num_ncand 60 | ncand = ncandSet(i); 61 | k = 1; 62 | for j = 1 : num_images 63 | pid = imlist(j); 64 | var = load(fullfile(res_path, [num2str(pid), '.mat'])); 65 | Jmat = var.Jmat; 66 | [nobjs, nprops] = size(Jmat); 67 | nsel = min(nprops, ncand); 68 | % choose first nsel proposals 69 | Jmat = Jmat(:, 1:nsel); 70 | Jmax_1 = max(Jmat, [], 2); 71 | % save to Jmax 72 | Jmax(k:(k+nobjs-1),i) = Jmax_1; 73 | % update k 74 | k = k + nobjs; 75 | % save nsel 76 | n_mask_sel(j,i) = nsel; 77 | end 78 | end 79 | 80 | 81 | avg_n_mask_sel = mean(n_mask_sel); 82 | save(fullfile(res_path, 'res_s.mat'), 'Jmax','n_mask_sel'); 83 | 84 | %% multi-level recall score (Rs) 85 | lineColors = {'k-', 'r-', 'c-', 'b+', 'gs', 'bo', 'r^', 'm*', 'b>', 'ks', 'r+', 'g^','k--', 'b-'}; 86 | overlap_levels = 0.5; 87 | figure; 88 | grid on; 89 | grid minor; 90 | xlabel('Number of candidates'); 91 | ylabel('Recall'); 92 | title('bounding box proposals on NYUV2 dataset', 'Interpreter','none'); 93 | Methods = {'ours'}; 94 | for k = 1 : numel(Methods) 95 | hold on; 96 | num_recalls = sum(Jmax > overlap_levels, 1); 97 | plot(avg_n_mask_sel, num_recalls/num_objects, lineColors{k}); 98 | legend(Methods{k}); 99 | end -------------------------------------------------------------------------------- /src/eval/eval_segments.m: -------------------------------------------------------------------------------- 1 | 2 | % load split data 3 | var = load('data/nyuv2/nyusplits.mat'); 4 | set_type = 'test'; 5 | if strcmp(set_type, 'test') 6 | imlist = var.tst - 5000; 7 | else 8 | imlist = var.trainval - 5000; 9 | end 10 | 11 | % result path 12 | res_path ='result/nyuv2/Eval/Seg'; 13 | if ~exist(res_path, 'dir') 14 | mkdir(res_path); 15 | end 16 | data_path = 'data/nyuv2'; 17 | 18 | % compute Jmat for each image 19 | seg_path = 'result/nyuv2/Seg'; 20 | parfor i = 1 : numel(imlist) 21 | pid = imlist(i); 22 | fprintf('**************processing image %d ***************\n', pid); 23 | if exist(fullfile(res_path, [num2str(pid) '.mat']), 'file') 24 | fprintf('skip %d\n', pid); 25 | continue; 26 | end 27 | var = load(fullfile(seg_path, [num2str(pid) '.mat'])); 28 | segMasks = var.segCells; 29 | [GtMasks, ~] = GetMasksGT(data_path, pid); 30 | Jmat = m_SEG_VS_GT(segMasks, GtMasks); 31 | ParSave(fullfile(res_path, [num2str(pid) '.mat']), Jmat); 32 | end 33 | 34 | %% extract stat info from Jmats 35 | ncandSet = [10:5:100,125:25:1000,1500:500:6000,10000]; % use the first ncand proposals 36 | num_ncand = numel(ncandSet); 37 | 38 | % get the total number of objects in tst dataset 39 | num_objects = 0; 40 | for i = 1 : numel(imlist) 41 | pid = imlist(i); 42 | var = load(fullfile(res_path, [num2str(pid) '.mat'])); 43 | Jmat = var.Jmat; 44 | num_objects = num_objects + size(Jmat,1); 45 | end 46 | 47 | % compute best Jaccard for each objects 48 | Jmax = zeros(num_objects, num_ncand); 49 | n_mask_sel = zeros(num_images, num_ncand); 50 | for i = 1 : num_ncand 51 | ncand = ncandSet(i); 52 | k = 1; 53 | for j = 1 : numel(imlist) 54 | pid = imlist(j); 55 | var = load(fullfile(res_path, [num2str(pid) '.mat'])); 56 | Jmat = var.Jmat; 57 | [nobjs, nprops] = size(Jmat); 58 | nsel = min(nprops, ncand); 59 | % choose first nsel proposals 60 | Jmat = Jmat(:, 1:nsel); 61 | Jmax_1 = max(Jmat, [], 2); 62 | % save to Jmax 63 | Jmax(k:(k+nobjs-1),i) = Jmax_1; 64 | % update k 65 | k = k + nobjs; 66 | % save nsel 67 | n_mask_sel(j,i) = nsel; 68 | end 69 | end 70 | 71 | avg_n_mask_sel = mean(n_mask_sel); 72 | save(fullfile(res_path, 'res.mat'), 'Jmax','n_mask_sel'); 73 | 74 | %% JI 75 | figure; 76 | grid on; 77 | grid minor; 78 | xlabel('Number of candidates'); 79 | ylabel('Jaccard'); 80 | title('Segment proposals on NYUV2 dataset'); 81 | Methods = {'ours'}; 82 | for k = 1 : 1 83 | hold on; 84 | Ji = sum(Jmax, 1); 85 | plot(avg_n_mask_sel, Ji/num_objects, 'r-'); 86 | legend(Methods{k}); 87 | end 88 | -------------------------------------------------------------------------------- /src/eval/m_BB_VS_GT.m: -------------------------------------------------------------------------------- 1 | function Jmat = m_BB_VS_GT(BB, GtBB) 2 | % compute Jaccard matrix between proposed bounding boxes and ground 3 | % truth objects 4 | % 5 | % Inputs: 6 | % BB: proposals N x 4; 7 | % GtBB: M x 4; 8 | % 9 | % Outputs: 10 | % Jmat: M x N 11 | 12 | N = size(BB, 1); 13 | M = size(GtBB, 1); 14 | Jmat = zeros(M, N); 15 | 16 | for i = 1 : M 17 | gt = GtBB(i,:); 18 | for j = 1 : N 19 | Jmat(i,j) = m_Jaccard_bbox(gt, BB(j,:)); 20 | end 21 | end 22 | 23 | end 24 | 25 | -------------------------------------------------------------------------------- /src/eval/m_Jaccard_bbox.m: -------------------------------------------------------------------------------- 1 | function [ J ] = m_Jaccard_bbox( gt, bbox ) 2 | % compute Jaccard index for overlapping between gt and bbox 3 | % Inputs: 4 | % gt: 1 x 4; [Cmin, Rmin, width, height] 5 | % bbox : 1 x 4; [Cmin, Rmin, width, height] 6 | % Outputs: 7 | % J: Jaccard index 8 | 9 | % convert to [Cmin, Rmin, Cmax, Rmax] 10 | coor_gt = gt; 11 | coor_gt(3) = gt(1) + gt(3) - 1; 12 | coor_gt(4) = gt(2) + gt(4) - 1; 13 | 14 | a1 = gt(3)*gt(4); 15 | 16 | coor_bbox = bbox; 17 | coor_bbox(3) = bbox(1) + bbox(3) - 1; 18 | coor_bbox(4) = bbox(2) + bbox(4) - 1; 19 | 20 | a2 = bbox(3) * bbox(4); 21 | 22 | % 23 | minr1 = coor_gt(2); 24 | maxr1 = coor_gt(4); 25 | minc1 = coor_gt(1); 26 | maxc1 = coor_gt(3); 27 | % 28 | minr2 = coor_bbox(2); 29 | maxr2 = coor_bbox(4); 30 | minc2 = coor_bbox(1); 31 | maxc2 = coor_bbox(3); 32 | % intersection area 33 | ri_1 = max(minr1, minr2); 34 | ci_1 = max(minc1, minc2); 35 | ri_2 = min(maxr1, maxr2); 36 | ci_2 = min(maxc1, maxc2); 37 | 38 | hi = ri_2 - ri_1 + 1; 39 | wi = ci_2 - ci_1 + 1; 40 | IA = 0; 41 | if (hi > 0) && (wi > 0) 42 | IA = hi * wi; 43 | end 44 | % union area 45 | UA = a1 + a2 - IA; 46 | assert(UA > 0); 47 | % 48 | J = IA / UA; 49 | 50 | end 51 | 52 | -------------------------------------------------------------------------------- /src/eval/m_SEG_VS_GT.m: -------------------------------------------------------------------------------- 1 | function Jmat = m_SEG_VS_GT( segCell, GtMasks) 2 | % compare segments with ground truth 3 | 4 | % segCell -- segments stored in cell 5 | % GtMasks -- ground truth masks 6 | 7 | % Jmat 8 | 9 | nobjs = numel(segCell); 10 | [h, w, nGt] = size(GtMasks); 11 | % Jmat 12 | Jmat = zeros(nGt, nobjs); 13 | % care 14 | instancesMap = zeros(h, w); 15 | for i = 1 : nGt 16 | instancesMap(logical(GtMasks(:,:,i))) = i; 17 | end 18 | care = (instancesMap ~= 0); 19 | 20 | % 21 | parfor i = 1 : nobjs 22 | mask = zeros(h, w); 23 | mask(segCell{i}) = 1; 24 | % 25 | Jmat(:,i) = m_overlap(mask, GtMasks, care); 26 | end 27 | 28 | end 29 | 30 | function J = m_overlap(mask, GtMasks, care) 31 | S = size(GtMasks,3); 32 | J = zeros(S,1); 33 | for i = 1 : S 34 | tmp = logical(GtMasks(:,:,i)); 35 | if isempty(tmp) 36 | J(i) = 0; 37 | else 38 | J(i) = overlap_care(logical(mask), tmp , care); 39 | end 40 | end 41 | end 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/eval/m_eval_segments.m~: -------------------------------------------------------------------------------- 1 | 2 | % load split data 3 | var = load('data/nyuv2/nyusplits.mat'); 4 | set_type = 'test'; 5 | if strcmp(set_type, 'test') 6 | imlist = var.tst - 5000; 7 | else 8 | imlist = var.trainval - 5000; 9 | end 10 | 11 | 12 | % parfor i = 1 : num_images 13 | % pid = tst(i); 14 | % fprintf('**************processing image %d ***************\n', pid); 15 | % if exist(fullfile(res_Jmat_Path, [num2str(pid) '.mat']), 'file') 16 | % fprintf('skip %d\n', pid); 17 | % continue; 18 | % end 19 | % var = load(fullfile(segPath, [num2str(pid) '.mat'])); 20 | % %segMasks = var.masks; 21 | % segMasks = var.segMasks; 22 | % % segMasks = m_parload(fullfile(segPath, [num2str(pid) '.mat']), 'segMasks'); 23 | % [GtMasks, GtLabels] = m_getMasksGT(GtPath, pid); 24 | % 25 | % 26 | % Jmat = m_SEG_VS_GT(segMasks, GtMasks); 27 | % m_parsave(fullfile(res_Jmat_Path,[num2str(pid) '.mat']), Jmat); 28 | % m_parsave(fullfile(res_label_Path,[num2str(pid) '.mat']), GtLabels); 29 | % %save(fullfile(res_Jmat_Path,[num2str(pid) '.mat']), 'Jmat'); 30 | % end 31 | 32 | %% extract stat info from Jmats 33 | ncandSet = [10:5:100,125:25:1000,1500:500:6000,10000]; % use the first ncand proposals 34 | num_ncand = numel(ncandSet); 35 | 36 | % get the total number of objects in tst dataset 37 | num_objects = 0; 38 | for i = 1 : num_images 39 | pid = tst(i); 40 | var = load(fullfile(res_Jmat_Path, [num2str(pid) '.mat'])); 41 | Jmat = var.Jmat; 42 | num_objects = num_objects + size(Jmat,1); 43 | end 44 | 45 | % get the total object classes 46 | objLabels = zeros(num_objects, 1); 47 | k = 1; 48 | for i = 1 : num_images 49 | var = load(fullfile(res_label_Path, [num2str(pid) '.mat'])); 50 | GtLabels = var.GtLabels; 51 | nobjs = numel(GtLabels); 52 | objLabels(k:(k+nobjs-1)) = GtLabels; 53 | k = k + nobjs; 54 | end 55 | 56 | % compute best Jaccard for each objects 57 | Jmax = zeros(num_objects, num_ncand); 58 | n_mask_sel = zeros(num_images, num_ncand); 59 | for i = 1 : num_ncand 60 | ncand = ncandSet(i); 61 | k = 1; 62 | for j = 1 : num_images 63 | pid = tst(j); 64 | var = load(fullfile(res_Jmat_Path, [num2str(pid) '.mat'])); 65 | Jmat = var.Jmat; 66 | [nobjs, nprops] = size(Jmat); 67 | nsel = min(nprops, ncand); 68 | % choose first nsel proposals 69 | Jmat = Jmat(:, 1:nsel); 70 | Jmax_1 = max(Jmat, [], 2); 71 | % save to Jmax 72 | Jmax(k:(k+nobjs-1),i) = Jmax_1; 73 | % update k 74 | k = k + nobjs; 75 | % save nsel 76 | n_mask_sel(j,i) = nsel; 77 | end 78 | end 79 | 80 | avg_n_mask_sel = mean(n_mask_sel); 81 | save(fullfile(res_eval_Path, 'res.mat'), 'Jmax','n_mask_sel','objLabels'); 82 | 83 | %% JI 84 | figure; 85 | grid on; 86 | grid minor; 87 | xlabel('Number of candidates'); 88 | ylabel('Jaccard'); 89 | title('Segment proposals on NYUV2 dataset'); 90 | Methods = {'ours','others'}; 91 | for k = 1 : 1 92 | hold on; 93 | Ji = sum(Jmax, 1); 94 | plot(avg_n_mask_sel, Ji/num_objects, 'r-'); 95 | legend(Methods{k}); 96 | end 97 | 98 | 99 | delete(gcp('nocreate')); -------------------------------------------------------------------------------- /src/eval/overlap_care.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/src/eval/overlap_care.mexa64 -------------------------------------------------------------------------------- /src/planeDet/CalcNormals.m: -------------------------------------------------------------------------------- 1 | function [ normals ] = CalcNormals( points ) 2 | % compute normals for each pixel 3 | % 4 | % Inputs: 5 | % points: mxnx3 cm 6 | % rawDepth: mxn cm 7 | % 8 | % Output: 9 | % normals: mxnx4 10 | rawDepth = points(:,:,3); % delete later!!! 11 | [h, w] = size(rawDepth); 12 | pts = reshape(points, [], 3); 13 | pts_homo = [pts ones(h*w, 1)]; 14 | 15 | bw = [-9:2:-1 0 1 : 2 : 9]; 16 | [bc, br] = meshgrid(bw, bw); 17 | 18 | non_missing = ~isnan(rawDepth(:)); 19 | normals = NaN(h*w, 4); 20 | [ci, ri] = meshgrid(1 : w, 1 : h); 21 | 22 | parfor k = 1 : (h*w) 23 | 24 | if non_missing(k) == false 25 | continue; 26 | end 27 | 28 | u = ri(k); 29 | v = ci(k); 30 | ROI_r = u + br; 31 | ROI_c = v + bc; 32 | % ensure that ROI is within image. 33 | valid = (ROI_r >= 1) & (ROI_c >= 1) & (ROI_r <= h) & (ROI_c <= w); 34 | u1 = ROI_r(valid); 35 | v1 = ROI_c(valid); 36 | ind = u1 + (v1-1)*h; 37 | % ensure that neighbor pixels have similar depth value (not good!) 38 | valid = abs(rawDepth(ind) - rawDepth(k)) ... 39 | < (rawDepth(k) * 0.05); 40 | ind = ind(valid); 41 | % ensure that NaN depth is removed 42 | valid = non_missing(ind); 43 | ind = ind(valid); 44 | 45 | % calculate normal by fitting plane to points with RANSAC 46 | % pts_candidates = pts_homo(ind, :); 47 | % p = m_normal_ransac(pts_candidates, pts_homo(k,:)); 48 | % if ~isempty(p) 49 | % normals(k, :) = p'; 50 | % end 51 | if numel(ind) >= 3 52 | A = pts_homo(ind, :); 53 | [v, l] = eig(A'*A); 54 | p = v(:,1); 55 | p = p/norm(p(1:3)); 56 | normals(k,:) = p'; 57 | end 58 | end 59 | 60 | normals = reshape(normals, [h,w,4]); 61 | end 62 | 63 | -------------------------------------------------------------------------------- /src/planeDet/GetCameraMatrix.m: -------------------------------------------------------------------------------- 1 | function K = GetCameraMatrix() 2 | camera_params; 3 | K = [fx_rgb, 0, cx_rgb-40; 4 | 0, fy_rgb, cy_rgb-45; 5 | 0, 0, 1]; 6 | end 7 | 8 | -------------------------------------------------------------------------------- /src/segmentations/BBfromDPs.m: -------------------------------------------------------------------------------- 1 | function [bbox_dp, masksDP] = BBfromDPs(inliers, sz) 2 | % bounding boxes directly converted from detected planes 3 | 4 | N = numel(inliers); 5 | if N == 0 6 | bbox_dp = []; 7 | masksDP = []; 8 | return; 9 | end 10 | 11 | % cc for each plane region 12 | % SE = strel('square', 5); 13 | regions = []; 14 | for i = 1 : N 15 | PR = zeros(sz); 16 | PR(inliers{i}) = 1; 17 | % morphological process 18 | % PR = imdilate(PR, SE); 19 | % PR = imerode(PR, SE); 20 | % cc 21 | cc = bwconncomp(PR, 8); 22 | masks = zeros(sz(1), sz(2), cc.NumObjects); 23 | for j = 1 : cc.NumObjects 24 | tmp = zeros(sz); 25 | tmp(cc.PixelIdxList{j}) = 1; 26 | masks(:,:,j) = tmp; 27 | end 28 | regions = cat(3, regions, masks); 29 | if cc.NumObjects > 1 30 | regions = cat(3, regions, PR); 31 | end 32 | end 33 | 34 | % 35 | bbox_dp = m_mask2bbox(regions, 1.0); 36 | masksDP = regions; 37 | 38 | end 39 | 40 | -------------------------------------------------------------------------------- /src/segmentations/BBfromMPRs.m: -------------------------------------------------------------------------------- 1 | function [ bbox, Masks ] = BBfromMPRs( inliers, points ) 2 | % propose object bounding box by merging plane regions 3 | % 4 | % Inputs: 5 | % inliers: N x 1 cell 6 | % points: n x m x 3 pcd 7 | % 8 | % Outputs: 9 | % bbox: bounding boxes 10 | % Masks: corresponding regions for bbox 11 | 12 | [h, w, ~] = size(points); 13 | N = numel(inliers); 14 | 15 | % cc for each plane region 16 | % SE = strel('square', 5); 17 | regions = []; 18 | for i = 1 : N 19 | PR = zeros(h,w); 20 | PR(inliers{i}) = 1; 21 | % morphological process 22 | % PR = imdilate(PR, SE); 23 | % PR = imerode(PR, SE); 24 | % cc 25 | cc = bwconncomp(PR, 8); 26 | masks = zeros(h, w, cc.NumObjects); 27 | for j = 1 : cc.NumObjects 28 | tmp = zeros(h, w); 29 | tmp(cc.PixelIdxList{j}) = 1; 30 | masks(:,:,j) = tmp; 31 | end 32 | regions = cat(3, regions, masks); 33 | end 34 | 35 | % find borders for each region 36 | NR = size(regions,3); 37 | borders = cell(NR, 1); 38 | for i = 1 : NR 39 | borders{i} = find( FindBorderPixels(regions(:,:,i))); 40 | end 41 | 42 | % create min dist matrix 43 | pts = reshape(points, [], 3); 44 | distMat = zeros(NR, NR); 45 | for i = 1 : (NR-1) 46 | pcd1 = pts(borders{i},:); 47 | for j = (i+1) : NR 48 | pcd2 = pts(borders{j},:); 49 | dist = pdist2(pcd1, pcd2); 50 | distMat(i,j) = min(dist(:)); 51 | end 52 | end 53 | distMat = distMat + distMat'; 54 | 55 | % merge PRs 56 | th = 10; % cm 57 | CC = m_graphCC(NR, distMat, th); 58 | 59 | % convert to bounding box 60 | Masks = []; 61 | bbox = []; 62 | for i = 1 : numel(CC) 63 | if numel(CC{i}) > 1 64 | PR = regions(:,:,CC{i}); 65 | PR = sum(PR, 3); 66 | Masks = cat(3, Masks, PR); 67 | bb = m_mask2bbox(PR, 1.0); 68 | bbox = cat(1,bbox, bb); 69 | end 70 | end 71 | 72 | end 73 | 74 | function cc = m_graphCC(N_node, distMat, th) 75 | 76 | edges = (distMat < th); 77 | edges(logical(eye(N_node, N_node))) = false; 78 | cc = {}; 79 | isExplored = false(N_node,1); 80 | 81 | for i = 1 : N_node 82 | if ~isExplored(i) 83 | Q = i; 84 | ids = i; 85 | isExplored(i) = true; 86 | while ~isempty(Q) 87 | v = Q(1); 88 | Q(1) = []; 89 | % find neighbors 90 | Ne = find(edges(v,:)' & ~isExplored); 91 | % update 92 | ids = [ids; Ne]; 93 | Q = [Q; Ne]; 94 | isExplored(Ne) = true; 95 | end 96 | cc = cat(1, cc, ids); 97 | end 98 | end 99 | 100 | end 101 | 102 | -------------------------------------------------------------------------------- /src/segmentations/BBfromNPRs.m: -------------------------------------------------------------------------------- 1 | function [ bbox, segMasks] = BBfromNPRs( masks, masksWS, planesMap ) 2 | % propose bounding box from Non-planar regions 3 | 4 | % Inputs: 5 | % masks: Lx1 cell segment masks 6 | % masksWS: M x 1 cell 7 | % planesMap: mxn plane map from plane detection 8 | % 9 | % Outputs: 10 | % bbox: bounding boxes 11 | % segMasks: corresponding segments Nx1 cell 12 | debug = false; 13 | 14 | % masks = logical(masks); 15 | % [h, w, d] = size (masks); 16 | [h, w] = size(planesMap); 17 | d = numel(masks); 18 | fprintf('# of masks from multi-scale segmentation: %d\n', d + numel(masksWS)); 19 | 20 | % morphological processing to fill small holes 21 | planar = (planesMap ~= 0); 22 | SE = strel('square', 3); 23 | planar = imdilate(planar, SE); 24 | planar = imerode(planar, SE); 25 | non_planar = ~planar; 26 | 27 | % morphological processing to remove little wires 28 | SE = strel('square', 5); 29 | survive = true(d, 1); 30 | for i = 1 : d 31 | mask = false(h, w); 32 | mask(masks{i}) = true; 33 | mask = imerode(mask, SE); 34 | mask = imdilate(mask, SE); 35 | masks{i} = find(mask); 36 | if numel(masks{i}) == 0 37 | survive(i) = false; 38 | end 39 | end 40 | masks = masks(survive); 41 | 42 | % add wshed here 43 | masks = cat(1, masks, masksWS); 44 | d = numel(masks); 45 | if debug 46 | fprintf('# of masks after little wires criteria: %d\n', d); 47 | end 48 | 49 | % consider use other strategy to filter out masks further 50 | % p/np ratio 51 | survive = true(d,1); 52 | for i = 1 : d 53 | mask = false(h, w); 54 | mask(masks{i}) = true; 55 | tmp = non_planar(mask); 56 | rt = sum(tmp(:))/sum(mask(:)); 57 | if rt < 0.2 58 | survive(i) = false; 59 | end 60 | end 61 | d = sum(survive); 62 | if debug 63 | fprintf('# of masks after p/np ratio criteria: %d\n', d); 64 | end 65 | masks = masks(survive); 66 | 67 | % compute bounding box 68 | scale = 1.0; 69 | bbox = Mask2Bbox(masks, [h, w], scale); 70 | 71 | % remove duplicated boxes 72 | area = bbox(:,3).* bbox(:,4); 73 | [~, ind] = sort(area, 'descend'); 74 | bbox = bbox(ind,:); 75 | masks = masks(ind); 76 | [bbox, ind] = RemoveDupBbox(bbox, 0.98); 77 | masks = masks(ind); 78 | if debug 79 | fprintf('# of masks after remove duplicated criteria: %d\n', size(bbox,1)); 80 | end 81 | 82 | % filter out bbox which have small overlap with non-planar 83 | [bbox, ind] = RemoveBadBbox(bbox, non_planar, 0.1); 84 | masks = masks(ind); 85 | if debug 86 | fprintf('# of masks after box overlap criteria: %d\n', size(bbox,1)); 87 | end 88 | 89 | % nms 90 | [bbox, ind] = RemoveDupBbox(bbox, 0.9); 91 | segMasks = masks(ind); 92 | if debug 93 | fprintf('# of masks after nms criteria: %d\n', size(bbox,1)); 94 | end 95 | 96 | 97 | end 98 | 99 | -------------------------------------------------------------------------------- /src/segmentations/BBfromPRs.m: -------------------------------------------------------------------------------- 1 | function [ bbox, segMasks ] = BBfromPRs (segMasks,sz, inliers) 2 | % propose bounding box on planar regions 3 | % 4 | % Inputs: 5 | % segMasks: L x 1 cell segment masks from segmentation 6 | % inliers: N x 1 cell plane points 7 | % 8 | % Outputs: 9 | % bbox: bounding boxes on planes 10 | debug = false; 11 | 12 | h = sz(1); 13 | w = sz(2); 14 | num_seg = numel(segMasks); 15 | N = numel(inliers); 16 | 17 | if N == 0 18 | bbox = []; 19 | segMasks = []; 20 | return; 21 | end 22 | 23 | % reform planes map 24 | planesMap = zeros(h, w); 25 | SE = strel('square', 5); 26 | id = 1; 27 | NPX = []; 28 | for i = 1 : N 29 | PR = zeros(h, w); 30 | PR(inliers{i}) = 1; 31 | % morphological process 32 | PR = imdilate(PR, SE); 33 | PR = imerode(PR, SE); 34 | % cc 35 | cc = bwconncomp(PR, 8); 36 | for j = 1 : cc.NumObjects 37 | planesMap(cc.PixelIdxList{j}) = id; 38 | id = id + 1; 39 | NPX = [NPX; numel(cc.PixelIdxList{j})]; 40 | end 41 | end 42 | num_planar = numel(NPX); 43 | if debug 44 | fprintf('# masks on planes (before filter): %d\n', num_seg); 45 | end 46 | 47 | % filter out segments 48 | survive = true(num_seg,1); 49 | for i = 1 : num_seg 50 | mask = false(h, w); 51 | mask(segMasks{i}) = true; 52 | 53 | tmp = planesMap(mask); 54 | num_pixels = numel(tmp); 55 | tmp(tmp==0) = []; 56 | bins = histc(tmp, 1:num_planar); 57 | if numel(tmp) == 1 58 | bins = bins'; 59 | end 60 | r = bins./NPX; 61 | maxr = max(r); 62 | r1 = sum(bins)/num_pixels; 63 | if maxr > 1.0 || r1 < 0.2 64 | survive(i) = false; 65 | end 66 | end 67 | 68 | segMasks = segMasks(survive); 69 | if debug 70 | fprintf('# masks on planes (after filter): %d\n', size(segMasks,3)); 71 | end 72 | 73 | % change to bbox 74 | bbox = Mask2Bbox(segMasks, [h, w], 1.0); 75 | 76 | % remove duplicated boxes 77 | area = bbox(:,3).* bbox(:,4); 78 | [~, ind] = sort(area, 'descend'); 79 | bbox = bbox(ind,:); 80 | segMasks = segMasks(ind); 81 | [bbox, ids] = RemoveDupBbox(bbox, 0.98); 82 | segMasks = segMasks(ids); 83 | if debug 84 | fprintf('# of masks on planes after remove duplicated criteria: %d\n', size(bbox,1)); 85 | end 86 | 87 | % nms 88 | [bbox, ids] = RemoveDupBbox(bbox, 0.8); 89 | segMasks = segMasks(ids); 90 | num_seg = numel(segMasks); 91 | if debug 92 | fprintf('# of masks after nms criteria: %d\n', num_seg); 93 | end 94 | 95 | 96 | % bad bounding box 97 | % [bbox, ids] = m_removeBadBB_planar(bbox, planesMap, NPX, 0.9); 98 | % segMasks = segMasks(:,:,ids); 99 | % num_seg = size(segMasks,3); 100 | % if debug 101 | % fprintf('# of masks after remove bad bbs: %d\n', num_seg); 102 | % end 103 | 104 | % aspect ratio 105 | aspect1 = bbox(:,3)./bbox(:,4); 106 | aspect2 = bbox(:,4)./bbox(:,3); 107 | aspect = min([aspect1 aspect2], [],2); 108 | survive = aspect > 0.04; 109 | bbox = bbox(survive,:); 110 | segMasks = segMasks(survive); 111 | num_seg = numel(segMasks); 112 | if debug 113 | fprintf('# of masks after remove aspect bb: %d\n', num_seg); 114 | end 115 | 116 | 117 | end 118 | 119 | function [out, survive] = m_removeBadBB_planar(bbox, planesMap, NPX, th) 120 | N = size(bbox,1); 121 | survive = true(N,1); 122 | num_planar = numel(NPX); 123 | for i = 1 : N 124 | rBegin = bbox(i,2); 125 | rEnd = rBegin + bbox(i,4) -1; 126 | cBegin = bbox(i,1); 127 | cEnd = cBegin + bbox(i,3) -1; 128 | tmp = planesMap(rBegin:rEnd, cBegin:cEnd); 129 | tmp = tmp(:); 130 | tmp(tmp==0) = []; 131 | bins = histc(tmp, 1:num_planar); 132 | if numel(tmp) == 1 133 | bins = bins'; 134 | end 135 | r = bins./NPX; 136 | maxr = max(r); 137 | if maxr > th 138 | survive(i) = false; 139 | end 140 | end 141 | 142 | out = bbox(survive,:); 143 | 144 | end 145 | 146 | -------------------------------------------------------------------------------- /src/segmentations/Depth2PCD.m: -------------------------------------------------------------------------------- 1 | function [ points] = Depth2PCD(D) 2 | % this function is dedicated to the KINECT used by NYUV2 3 | % and the image itself is cropped to size [425 560]. 4 | % You can modify the camera parameters accordingly 5 | % 6 | % Inputs: 7 | % D: inpainted depth matrix mxn 8 | % 9 | % Outputs: 10 | % points: 3d point clouds mxnx3 11 | 12 | Kd = GetCameraMatrix(); 13 | 14 | [h, w] = size(D); 15 | [xx,yy] = meshgrid(1:w, 1:h); 16 | 17 | X = (xx - Kd(1,3)) .* D / Kd(1,1); 18 | Y = (yy - Kd(2,3)) .* D / Kd(2,2); 19 | Z = D; 20 | 21 | points = cat(3, X, Y, Z); 22 | 23 | end 24 | 25 | -------------------------------------------------------------------------------- /src/segmentations/FindBorderPixels.m: -------------------------------------------------------------------------------- 1 | function [ borderMask ] = FindBorderPixels( spMask ) 2 | 3 | % find boundary pixels given one superpixel 4 | % Input: 5 | % spMask: superpixel mask 6 | % Output: 7 | % borderMask: boundary pixels mask 8 | 9 | B = bwboundaries(spMask, 'noholes'); 10 | sz = size(spMask); 11 | borderMask = zeros(sz); 12 | for i = 1 : numel(B) 13 | b = B{i}; 14 | b = b(1:end-1, :); 15 | ind = sub2ind(sz, b(:,1), b(:,2)); 16 | borderMask(ind) = 1; 17 | end 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /src/segmentations/GraphBasedSegmentation.m: -------------------------------------------------------------------------------- 1 | function [ masks_cell ] = GraphBasedSegmentation( I, points, K, MIN, sigma) 2 | % generate segment masks from multiscale, multi-channel graph 3 | % based segmentation 4 | debug = false; 5 | % grayscale depth image 6 | rawDepth = points(:,:,3); 7 | maxi = max(rawDepth(:)); 8 | mini = min(rawDepth(:)); 9 | grayIm = uint8(255 * (rawDepth - mini)/(maxi-mini)); 10 | grayIm(isnan(rawDepth)) = 0; 11 | grayIm = cat(3, grayIm, grayIm, grayIm); 12 | 13 | % normalized x y z (uint8) 14 | x = points(:,:,1); 15 | y = points(:,:,2); 16 | z = points(:,:,3); 17 | x = uint8(255*(x - min(x(:))) / (max(x(:)) - min(x(:)))); 18 | y = uint8(255*(y - min(y(:))) / (max(y(:)) - min(y(:)))); 19 | z = uint8(255*(z - min(z(:))) / (max(z(:)) - min(z(:)))); 20 | pts = cat(3, x,y,z); 21 | pts = double(pts); 22 | 23 | masks_cell = []; 24 | for i = 1 : numel(K) 25 | % color image 26 | [mapColor, ~] = m_segmentWrapper(I, nan(size(I)), K(i), MIN, sigma); 27 | tmp = Label2Mask(mapColor); 28 | masks_cell = cat(1, masks_cell, tmp); 29 | 30 | % depth image (grayscale) 31 | [mapDepth, ~] = m_segmentWrapper(grayIm, nan(size(I)), K(i), MIN, sigma); 32 | tmp = Label2Mask(mapDepth); 33 | masks_cell = cat(1, masks_cell, tmp); 34 | 35 | % color + depth 36 | [map, ~] = m_segmentWrapper(I, pts, K(i), MIN, sigma); 37 | tmp = Label2Mask(map); 38 | masks_cell = cat(1, masks_cell, tmp); 39 | 40 | % visualization 41 | if (debug) 42 | imc = ColorizeLabelImage(int32(mapColor)); 43 | imd = ColorizeLabelImage(int32(mapDepth)); 44 | imcd = ColorizeLabelImage(int32(map)); 45 | 46 | im1 = cat(2, I, imc); 47 | im2 = cat(2, imd, imcd); 48 | figure; 49 | im = cat(1, im1, im2); 50 | imshow(im); 51 | end 52 | end 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /src/segmentations/HierClustering.m: -------------------------------------------------------------------------------- 1 | function [bbox, segMasks] = HierClustering(points, clusterTolerance, inliers, isV, isH, isB, pid) 2 | % spatial pcd partition by euclidean clustering 3 | % note plane points are removed 4 | % 5 | % Inputs: 6 | % points: mxnx3 pcd 7 | % clusterTolerance: Lx1 (cm) 8 | % inliers: NX1 cell for plane inliers 9 | % isV, isH, isB: plane types 10 | % 11 | % Outputs: 12 | % bbox: bounding boxes 13 | % segMasks: corresponding masks 14 | 15 | %% remove plane points from pcd 16 | [h,w,~] = size(points); 17 | pts = reshape(points, [], 3); 18 | N = size(pts, 1); 19 | ind = (1 : N)'; 20 | planeIdx = []; 21 | %planeId = find(isV | isH | isB); 22 | 23 | new_isH = m_filter_HVP(inliers, isH, [h, w], 6000); 24 | %planeId = find(isV | new_isH | isB); 25 | 26 | new_isV = m_filter_HVP(inliers, isV, [h, w], 25000); 27 | planeId = find(new_isH | isB |new_isV); 28 | for i = 1 : numel(planeId) 29 | planeIdx = cat(1, planeIdx, inliers{planeId(i)}'); 30 | end 31 | 32 | % get remaining point index 33 | isMissing = isnan(pts(:,3)); 34 | ind = ind(~isMissing); 35 | ind = setdiff(ind, planeIdx); 36 | % get remaining points 37 | pcd = pts(ind, :); 38 | pcd = pcd/100; % convert to meters 39 | pcdFile = fullfile('src/segmentations', [num2str(pid) '.pcd']); 40 | mat2PCDfile(pcdFile,double(pcd)); 41 | 42 | 43 | 44 | %% clustering 45 | segMasks = []; 46 | clusterTolerance = clusterTolerance/100; % use meters 47 | for i = 1 : numel(clusterTolerance) 48 | % euclidean clustering 49 | system(sprintf('src/segmentations/m_pcd_clustering.out %s %d %d', ... 50 | pcdFile, clusterTolerance(i), pid)); 51 | % read in result from text file 52 | txtFile = ['clusters_', num2str(pid), '.txt']; 53 | clusters = load(fullfile('./', txtFile)); 54 | % save mask 55 | nC = max(clusters); 56 | for j = 1 : nC 57 | tmp = zeros(h, w); 58 | idx = ind(clusters == j); 59 | tmp(idx) = 1; 60 | mask = cell(1,1); 61 | mask{1,1} = find(tmp); 62 | segMasks = cat(1, segMasks, mask); 63 | end 64 | system(['rm ./', txtFile]); 65 | end 66 | 67 | 68 | %% convert to bounding box 69 | bbox = Mask2Bbox(segMasks,[h, w], 1.0); 70 | area = bbox(:,3).* bbox(:,4); 71 | [~,idx] = sort(area, 'descend'); 72 | bbox = bbox(idx,:); 73 | segMasks = segMasks(idx); 74 | [bbox, ind] = RemoveDupBbox(bbox, 0.8); 75 | segMasks = segMasks(ind); 76 | 77 | system(['rm ' pcdFile]); 78 | end 79 | 80 | % function m_vis_clustering(pcd, clusters) 81 | % c = m_label2rgb(clusters); 82 | % c = uint8(255 * reshape(c,[],3)); 83 | % pointRGB = cat(2, pcd, single(c)); 84 | % mat2PCDfile('./visualization/vis_clustering.pcd',double(pointRGB)); 85 | % system('./visualization/m_pclViewer.out ./visualization/vis_clustering.pcd'); 86 | % end 87 | 88 | function new_isH = m_filter_HVP(inliers, isH, sz, th) 89 | N = numel(isH); 90 | 91 | for i = 1 : N 92 | if ~isH(i) 93 | continue; 94 | end 95 | 96 | PR = zeros(sz); 97 | PR(inliers{i}) = 1; 98 | % cc 99 | cc = bwconncomp(PR, 8); 100 | count = zeros(cc.NumObjects,1); 101 | for j = 1 : cc.NumObjects 102 | count(j) = numel(cc.PixelIdxList{j}); 103 | end 104 | % 105 | ind = find(count > th); 106 | if isempty(ind) 107 | isH(i) = false; 108 | end 109 | end 110 | new_isH = isH; 111 | end -------------------------------------------------------------------------------- /src/segmentations/Label2Mask.m: -------------------------------------------------------------------------------- 1 | function [masks_cell, N] = Label2Mask( map ) 2 | % convert label map into region masks 3 | % 4 | % inputs: 5 | % map: mxn (start from 1) 6 | % outputs: 7 | % masks_cell: N x 1 8 | % N: number of masks 9 | 10 | N = max(map(:)); 11 | masks_cell = cell(N, 1); 12 | for i = 1 : N 13 | masks_cell{i} = find(map == i); 14 | end 15 | 16 | end 17 | 18 | -------------------------------------------------------------------------------- /src/segmentations/Mask2Bbox.m: -------------------------------------------------------------------------------- 1 | function bbox = Mask2Bbox(masks, sz, scale) 2 | % covert masks into bounding boxes 3 | % Inputs: 4 | % masks: d x 1 cell object masks 5 | % sz: [h, w] 6 | % scale: scale ratio for the bounding box 7 | % 8 | % outputs: 9 | % bbox: d x 4 [col, row, width, height] 10 | if nargin < 2 11 | scale = 1.0; 12 | end 13 | 14 | h = sz(1); 15 | w = sz(2); 16 | d = numel(masks); 17 | bbox = zeros(d, 4); 18 | 19 | for i = 1 : d 20 | mask = false(h, w); 21 | mask(masks{i}) = true; 22 | bbox(i,:) = m_helper(mask, scale); 23 | end 24 | 25 | 26 | end 27 | 28 | function bbox = m_helper(mask, scale) 29 | [h, w] = size(mask); 30 | 31 | % calc borders 32 | [c, r] = meshgrid(1:w,1:h); 33 | 34 | R = r(mask); 35 | C = c(mask); 36 | maxr = max(R); 37 | minr = min(R); 38 | maxc = max(C); 39 | minc = min(C); 40 | width = maxc -minc + 1; 41 | height = maxr- minr + 1; 42 | center_r = floor((minr+maxr)/2); 43 | center_c = floor((minc+maxc)/2); 44 | 45 | % scale 46 | new_width = width * scale; 47 | new_height = height * scale; 48 | 49 | % update borders 50 | Rmin = 0; 51 | Cmin = 0; 52 | if mod(minr+maxr,2) == 0 53 | Rmin = center_r - floor(new_height/2); 54 | Cmin = center_c - floor(new_width/2); 55 | else 56 | Rmin = center_r - floor(new_height/2) + 1; 57 | Cmin = center_c - floor(new_width/2) + 1; 58 | end 59 | 60 | if Rmin <= 0 61 | Rmin = 1; 62 | end 63 | 64 | if Rmin > h 65 | Rmin = h; 66 | end 67 | 68 | Rmax = center_r + floor(new_height/2); 69 | if Rmax > h 70 | Rmax = h; 71 | end 72 | 73 | if Rmax <=0 74 | Rmax = 1; 75 | end 76 | 77 | if Cmin <= 0 78 | Cmin = 1; 79 | end 80 | 81 | if Cmin > w 82 | Cmin = w; 83 | end 84 | 85 | Cmax = center_c + floor(new_width/2); 86 | if Cmax > w 87 | Cmax = w; 88 | end 89 | 90 | if Cmax <=0 91 | Cmax = 1; 92 | end 93 | 94 | if Cmax < Cmin 95 | Cmax = Cmin; 96 | end 97 | 98 | if Rmax < Rmin 99 | Rmax = Rmin; 100 | end 101 | 102 | bbox = [Cmin, Rmin, (Cmax-Cmin+1), (Rmax-Rmin+1)]; 103 | 104 | end 105 | 106 | -------------------------------------------------------------------------------- /src/segmentations/Mask2Cell.m: -------------------------------------------------------------------------------- 1 | function segCell = Mask2Cell( segMasks ) 2 | % convert segment mask to cell structure 3 | 4 | N = size(segMasks,3); 5 | segCell = cell(N, 1); 6 | for i = 1 : N 7 | segCell{i} = find(segMasks(:,:,i)); 8 | end 9 | 10 | end 11 | 12 | -------------------------------------------------------------------------------- /src/segmentations/NormalVectorGradient.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/src/segmentations/NormalVectorGradient.mexa64 -------------------------------------------------------------------------------- /src/segmentations/RemoveBadBbox.m: -------------------------------------------------------------------------------- 1 | function [bbox, survive] = RemoveBadBbox(bbox, non_planar, th) 2 | 3 | % remove bounding box that have small overlap with non-planar area 4 | N = size(bbox,1); 5 | survive = true(N,1); 6 | area = bbox(:,3).*bbox(:,4); 7 | for i = 1 : N 8 | rBegin = bbox(i,2); 9 | rEnd = rBegin + bbox(i,4) -1; 10 | cBegin = bbox(i,1); 11 | cEnd = cBegin + bbox(i,3) -1; 12 | region = non_planar(rBegin:rEnd, cBegin:cEnd); 13 | ratio = sum(region(:))/area(i); 14 | if ratio < th 15 | survive(i) = false; 16 | end 17 | end 18 | 19 | bbox = bbox(survive,:); 20 | 21 | end 22 | 23 | -------------------------------------------------------------------------------- /src/segmentations/RemoveDupBbox.m: -------------------------------------------------------------------------------- 1 | function [ out, survive ] = RemoveDupBbox( bbox, th ) 2 | % remove duplicated bounding boxes 3 | 4 | % Inputs: 5 | % bbox : m x 4; [Cmin, Rmin, width, height] 6 | % th: threshold for overlap 7 | % outputs: 8 | % out : n x 4 9 | 10 | N = size(bbox, 1); 11 | coor = bbox; 12 | coor(:,3) = coor(:,1) + coor(:,3) -1; 13 | coor(:,4) = coor(:,2) + coor(:,4) -1; 14 | 15 | survive = true(N, 1); 16 | for i = 1 : (N-1) 17 | 18 | if ~survive(i) 19 | continue; 20 | end 21 | 22 | minr1 = coor(i,2); 23 | maxr1 = coor(i,4); 24 | minc1 = coor(i,1); 25 | maxc1 = coor(i,3); 26 | a1 = bbox(i,3) * bbox(i,4); 27 | 28 | for j = (i+1) : N 29 | minr2 = coor(j, 2); 30 | maxr2 = coor(j, 4); 31 | minc2 = coor(j, 1); 32 | maxc2 = coor(j, 3); 33 | a2 = bbox(j,3) * bbox(j,4); 34 | % intersection area 35 | ri_1 = max(minr1, minr2); 36 | ci_1 = max(minc1, minc2); 37 | ri_2 = min(maxr1, maxr2); 38 | ci_2 = min(maxc1, maxc2); 39 | 40 | hi = ri_2 - ri_1 + 1; 41 | wi = ci_2 - ci_1 + 1; 42 | IA = 0; 43 | if (hi > 0) && (wi > 0) 44 | IA = hi * wi; 45 | end 46 | 47 | % union area 48 | UA = a1 + a2 - IA; 49 | 50 | % determine 51 | ratio = IA/UA; 52 | if ratio > th 53 | survive(j) = false; 54 | end 55 | end 56 | end 57 | 58 | out = bbox(survive, :); 59 | 60 | end 61 | 62 | -------------------------------------------------------------------------------- /src/segmentations/RemoveDupGCxD.m: -------------------------------------------------------------------------------- 1 | function seg = RemoveDupGCxD(seg_GC2D, seg_GC3D, sz) 2 | % remove duplicated segments between GC2D and GC3D 3 | 4 | % seg_GC2D -- cell 5 | % seg_GC3D -- cell 6 | % sz - size of image 7 | 8 | % seg -- combined unique segments cell 9 | 10 | N = numel(seg_GC2D); 11 | survive = true(N,1); 12 | 13 | parfor i = 1 : N 14 | J = false; 15 | if numel(seg_GC2D{i}) == numel(seg_GC3D{i}) 16 | avg2D = mean(seg_GC2D{i}); 17 | avg3D = mean(seg_GC3D{i}); 18 | 19 | if (avg2D == avg3D) 20 | mask1 = zeros(sz); 21 | mask1(seg_GC2D{i}) = 1; 22 | mask2 = zeros(sz); 23 | mask2(seg_GC3D{i}) = 1; 24 | J = isequal(mask1, mask2); 25 | end 26 | end 27 | 28 | if J 29 | survive(i) = false; 30 | end 31 | end 32 | segGC = seg_GC2D(survive); 33 | seg = cat(1, segGC, seg_GC3D); 34 | 35 | end 36 | 37 | -------------------------------------------------------------------------------- /src/segmentations/RemoveDupSeg.m: -------------------------------------------------------------------------------- 1 | function out = RemoveDupSeg( segCells, sz ) 2 | % remove duplicated segments from multiple sources 3 | 4 | N = numel(segCells); 5 | th = 1; 6 | care = true(sz); 7 | 8 | % precompute area, centroid 9 | Area = zeros(N, 1); 10 | Centers = zeros(N, 1); 11 | for i = 1 : N 12 | Area(i) = numel(segCells{i}); 13 | Centers(i) = mean(segCells{i}); 14 | end 15 | 16 | % 17 | cpmat = zeros(N,N); 18 | parfor i = 1 : N 19 | % mask1 = zeros(sz); 20 | % mask1(segCells{i}) = 1; 21 | % np = numel(segCells{i}); 22 | % cpmat(i,:) = m_helper_compare(logical(mask1), segCells, care, th, i, np); 23 | cpmat(i,:) = m_compare(segCells, i, sz, Area, Centers); 24 | end 25 | 26 | sel = triu(cpmat); 27 | sel(logical(eye(N))) = 0; 28 | 29 | sel = sum(sel, 1); 30 | survive = (sel == 0); 31 | 32 | Segs = segCells(survive); 33 | 34 | % remove empty masks 35 | N = numel(Segs); 36 | survive = true(N, 1); 37 | for i = 1 : N 38 | if (numel(Segs{i}) == 0) 39 | survive(i) = false; 40 | end 41 | end 42 | out = Segs(survive); 43 | 44 | end 45 | 46 | function out = m_compare(segCells, j, sz, Area, Centers) 47 | N = numel(segCells); 48 | out = zeros(1, N); 49 | for i = 1 : N 50 | if i > j 51 | 52 | J = false; 53 | if Area(j) == Area(i) 54 | 55 | if (Centers(j) == Centers(i)) 56 | mask1 = zeros(sz); 57 | mask1(segCells{i}) = 1; 58 | mask2 = zeros(sz); 59 | mask2(segCells{i}) = 1; 60 | J = isequal(mask1, mask2); 61 | end 62 | 63 | end 64 | 65 | if J 66 | out(i) = 1; 67 | end 68 | 69 | end 70 | end 71 | 72 | 73 | end 74 | 75 | 76 | function out = m_helper_compare(mask1, segCells, care, th, j, np1) 77 | [h,w] = size(mask1); 78 | N = numel(segCells); 79 | out = zeros(1, N); 80 | 81 | for i = 1 : N 82 | if j >= i 83 | continue; 84 | end 85 | 86 | mask2 = zeros(h,w); 87 | mask2(segCells{i}) = 1; 88 | 89 | np2 = numel(segCells{i}); 90 | if (abs(np1 - np2)/(min(np1,np2) + eps)) > 0 91 | continue; 92 | end 93 | 94 | J = overlap_care(mask1, logical(mask2) , care); 95 | if J >= th 96 | out(i) = 1; 97 | end 98 | end 99 | end 100 | 101 | -------------------------------------------------------------------------------- /src/segmentations/Rgb2Lab.m: -------------------------------------------------------------------------------- 1 | % 20130604 Zhuo Deng Temple University 2 | % Convert a rgb image into Lab space 3 | 4 | 5 | function [L,A,B,lab] = Rgb2Lab(img) 6 | 7 | cform = makecform('srgb2lab'); 8 | lab = applycform(img,cform); 9 | L=lab(:,:,1); 10 | A=lab(:,:,2); 11 | B=lab(:,:,3); 12 | 13 | end -------------------------------------------------------------------------------- /src/segmentations/WatershedSegmentation.m: -------------------------------------------------------------------------------- 1 | function masksWS_cell = WatershedSegmentation(I, rawDepth, D) 2 | % generate segments based on watershed from different signal channels 3 | % 4 | th_L = 0.1; 5 | th_rD = 0.3; 6 | th_d = 0.2; 7 | th_N = 0.1; 8 | 9 | G1 = fspecial('gaussian',[9 9],1); 10 | %% process RGB info (intensity) 11 | [L,~,~] = Rgb2Lab(I); 12 | L = imfilter(L,G1,'same','replicate'); 13 | gradient_L = imgradient(L); 14 | gradient_L = m_Normalize(gradient_L); 15 | gradient_L = (gradient_L > th_L).*gradient_L; 16 | segMap = Watershed_region(gradient_L,false); 17 | masksL = m_segMap2masks(segMap, 85); % 85 18 | %% process rawDepth info (holes) 19 | isvalid = (rawDepth ~= 0); 20 | SE = strel('square',3); 21 | isvalid = imerode(isvalid,SE); 22 | rawDepth(~isvalid) = 0; 23 | rD = m_any2gray(rawDepth); 24 | rD = imfilter(rD, G1,'same','replicate'); 25 | gradient_rD = imgradient(rD); 26 | gradient_rD = m_Normalize(gradient_rD); 27 | gradient_rD = (gradient_rD > th_rD).*gradient_rD; 28 | segMap = Watershed_region(gradient_rD,false); 29 | masksRD = m_segMap2masks(segMap, 20); 30 | 31 | %% process depth info (depth gradient) 32 | d = m_any2gray(D); 33 | d = imfilter(d,G1,'same','replicate'); 34 | gradient_d = imgradient(d); 35 | gradient_d = m_Normalize(gradient_d); 36 | gradient_d = (gradient_d > th_d).*gradient_d; 37 | segMap = Watershed_region(gradient_d,false); 38 | masksD = m_segMap2masks(segMap, 20); 39 | 40 | %% normals 41 | points = Depth2PCD(D); 42 | normals= CalcNormals( points); 43 | gradient_n = NormalVectorGradient(normals(:,:,1:3)); 44 | gradient_n = m_Normalize(gradient_n); 45 | gradient_n = medfilt2(gradient_n, [5,5], 'symmetric'); 46 | gradient_n = (gradient_n > th_N) .* gradient_n; 47 | segMap = Watershed_region(gradient_n,false); 48 | masksN = m_segMap2masks(segMap, 85); %85 49 | 50 | masksWS_cell = cat(1,masksL, masksRD, masksD, masksN); 51 | end 52 | 53 | function masks_cell = m_segMap2masks(segMap, th) 54 | % Note segMap starts from 0 here 55 | segMap = segMap + 1; 56 | N = max(segMap(:)); 57 | count = 1; 58 | [h, w] = size(segMap); 59 | max_pixels = round(h*w/4); 60 | masks_cell = []; 61 | 62 | for i = 1 : N 63 | tmp = (segMap == i); 64 | num_pixels = sum(tmp(:)); 65 | if (num_pixels > th) && (num_pixels < max_pixels) 66 | tmp1 = cell(1,1); 67 | tmp1{1,1} = find(tmp); 68 | masks_cell = cat(1, masks_cell, tmp1); 69 | count = count + 1; 70 | end 71 | end 72 | 73 | end 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/segmentations/Watershed_region.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/src/segmentations/Watershed_region.mexa64 -------------------------------------------------------------------------------- /src/segmentations/m_BB2mask.m: -------------------------------------------------------------------------------- 1 | function [ masks ] = m_BB2mask( BB, sz ) 2 | % convert bounding box to mask 3 | % 4 | % Inputs: 5 | % BB: N x 4. [Cmin, Rmin, Width, Height] 6 | % sz: mask size 7 | % Outputs: 8 | % masks: m x n x N 9 | 10 | N = size(BB, 1); 11 | masks = zeros(sz(1), sz(2), N); 12 | 13 | for i = 1 : N 14 | rBegin = BB(i,2); 15 | rEnd = BB(i,2) + BB(i,4) -1; 16 | cBegin = BB(i,1); 17 | cEnd = BB(i,1) + BB(i,3) -1; 18 | masks(rBegin:rEnd, cBegin:cEnd,i) = 1; 19 | end 20 | 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /src/segmentations/m_Normalize.m: -------------------------------------------------------------------------------- 1 | % 20130604 Zhuo Deng Temple University 2 | % normalize an input matrix of which values fall in [0,1] 3 | % currently Input is a 1D or 2D matrix 4 | 5 | function Mat_norm = m_Normalize(Matrix) 6 | M = max(Matrix(:)); 7 | N = min(Matrix(:)); 8 | diff = double(M-N); 9 | if diff == 0 10 | diff = diff + eps; 11 | end 12 | Mat_norm = (Matrix - N) / diff; 13 | end -------------------------------------------------------------------------------- /src/segmentations/m_any2gray.m: -------------------------------------------------------------------------------- 1 | function [ grayIm ] = m_any2gray( data ) 2 | % convert matrix to uint8 format for visualization purpose 3 | 4 | mask = isnan(data); 5 | data(mask) = 0; 6 | 7 | maxi = max(data(:)); 8 | mini = min(data(:)); 9 | grayIm = uint8(255 * (data - mini)/(maxi-mini)); 10 | 11 | end 12 | 13 | -------------------------------------------------------------------------------- /src/segmentations/m_classify_planes.m: -------------------------------------------------------------------------------- 1 | function [isV, isH, isB] = m_classify_planes(planes, points) 2 | % classify planes into horizontal, vertical, boundary 3 | 4 | % Inputs: 5 | % planes: N x 4 parametric matrix 6 | % points: m x n x 3 3d coordinates 7 | % 8 | % Outputs: indicators for type 9 | % isV: 10 | % isH: 11 | % isB: 12 | 13 | num_planes = size(planes,1); 14 | % angles with ny: cos(alpha) = ny 15 | angles = acos(abs(planes(:,2))) * 180 /pi; 16 | th = 10; 17 | % vertical 18 | isV = angles > (90-th); 19 | % horizontal 20 | isH = angles < th; 21 | 22 | % make planes normal point to viewer 23 | direction = planes(:,4) < 0 ; 24 | sign = ones(num_planes, 1); 25 | sign(direction) = -1; 26 | sign = repmat(sign, 1,4); 27 | planes = planes .* sign; 28 | 29 | % compute distance to planes 30 | [h, w, ~] = size(points); 31 | rawDepth = points(:,:,3); 32 | tolerance_d = 1 * 2.85e-5 * rawDepth(:).^2; 33 | pts = reshape(points, [], 3); 34 | pts_homo = [pts ones(h*w, 1)]; 35 | 36 | dist = planes * pts_homo'; 37 | offset = repmat(3 * tolerance_d', num_planes, 1); 38 | dist = dist + offset; 39 | outbound = dist < 0; 40 | non_missing = ~isnan(rawDepth); 41 | outbound = outbound(:, non_missing); 42 | N_out = size(outbound,2); 43 | num_outbound = sum(outbound, 2); 44 | ratio = num_outbound / N_out; 45 | 46 | isB = ratio < 0.01; 47 | 48 | end 49 | 50 | -------------------------------------------------------------------------------- /src/segmentations/m_mask2bbox.m: -------------------------------------------------------------------------------- 1 | function bbox = m_mask2bbox(masks, scale) 2 | % covert masks into bounding boxes 3 | % Inputs: 4 | % masks: m x n x d object masks 5 | % scale: scale ratio for the bounding box 6 | % 7 | % outputs: 8 | % bbox: d x 4 [col, row, width, height] 9 | if nargin < 2 10 | scale = 1.0; 11 | end 12 | 13 | 14 | [h, w, d] = size(masks); 15 | bbox = zeros(d, 4); 16 | 17 | for i = 1 : d 18 | mask = logical(masks(:,:,i)); 19 | bbox(i,:) = m_helper(mask, scale); 20 | end 21 | 22 | 23 | end 24 | 25 | function bbox = m_helper(mask, scale) 26 | [h, w] = size(mask); 27 | 28 | % calc borders 29 | [c, r] = meshgrid(1:w,1:h); 30 | 31 | R = r(mask); 32 | C = c(mask); 33 | maxr = max(R); 34 | minr = min(R); 35 | maxc = max(C); 36 | minc = min(C); 37 | width = maxc -minc + 1; 38 | height = maxr- minr + 1; 39 | center_r = floor((minr+maxr)/2); 40 | center_c = floor((minc+maxc)/2); 41 | 42 | % scale 43 | new_width = width * scale; 44 | new_height = height * scale; 45 | 46 | % update borders 47 | Rmin = 0; 48 | Cmin = 0; 49 | if mod(minr+maxr,2) == 0 50 | Rmin = center_r - floor(new_height/2); 51 | Cmin = center_c - floor(new_width/2); 52 | else 53 | Rmin = center_r - floor(new_height/2) + 1; 54 | Cmin = center_c - floor(new_width/2) + 1; 55 | end 56 | 57 | if Rmin <= 0 58 | Rmin = 1; 59 | end 60 | 61 | if Rmin > h 62 | Rmin = h; 63 | end 64 | 65 | Rmax = center_r + floor(new_height/2); 66 | if Rmax > h 67 | Rmax = h; 68 | end 69 | 70 | if Rmax <=0 71 | Rmax = 1; 72 | end 73 | 74 | if Cmin <= 0 75 | Cmin = 1; 76 | end 77 | 78 | if Cmin > w 79 | Cmin = w; 80 | end 81 | 82 | Cmax = center_c + floor(new_width/2); 83 | if Cmax > w 84 | Cmax = w; 85 | end 86 | 87 | if Cmax <=0 88 | Cmax = 1; 89 | end 90 | 91 | if Cmax < Cmin 92 | Cmax = Cmin; 93 | end 94 | 95 | if Rmax < Rmin 96 | Rmax = Rmin; 97 | end 98 | 99 | bbox = [Cmin, Rmin, (Cmax-Cmin+1), (Rmax-Rmin+1)]; 100 | 101 | end -------------------------------------------------------------------------------- /src/segmentations/m_mask5GC3D_cell.m: -------------------------------------------------------------------------------- 1 | function segCells = m_mask5GC3D_cell(I, points, BB, isRescale) 2 | % get segments from bounding box by using grabcut rgbd 3 | % 4 | % Inputs: 5 | % I: color image 6 | % points: 3d points 7 | % BB: bounding boxes. N x 4 8 | % Outputs: 9 | % segCells: Nx1 10 | if nargin < 4 11 | isRescale = false; 12 | end 13 | [org_h, org_w, ~] = size(I); 14 | if isRescale 15 | I = imresize(I, 0.5); 16 | points = imresize(points, 0.5); 17 | end 18 | 19 | [h, w, ~] = size(I); 20 | im = double(I); 21 | mask_fixed_fg = false(h, w); 22 | mask_fixed_bg = false(h, w); 23 | 24 | 25 | % run RGBD grab cut 26 | N = size(BB, 1); 27 | segCells = cell(N, 1); 28 | parfor i = 1 : N 29 | tmp = m_BB2mask(BB(i,:), [org_h, org_w]); 30 | if isRescale 31 | tmp = tmp(1:2:end, 1:2:end); 32 | end 33 | segMask = m_Grabcut_3D( im, points, tmp, mask_fixed_fg, mask_fixed_bg); 34 | if isRescale 35 | segMask = imresize(segMask, [org_h, org_w], 'nearest'); 36 | end 37 | segCells{i} = find(segMask); 38 | end 39 | 40 | end 41 | 42 | -------------------------------------------------------------------------------- /src/segmentations/m_mask5GC_cell.m: -------------------------------------------------------------------------------- 1 | function segCells = m_mask5GC_cell(I, BB, isRescale) 2 | % get segments from bounding box by using grabcut 3 | % 4 | % Inputs: 5 | % I: color image 6 | % BB: bounding boxes. N x 4 7 | % Outputs: 8 | % segCells: Nx1 9 | if nargin < 3 10 | isRescale = false; 11 | end 12 | 13 | [org_h, org_w, ~] = size(I); 14 | if isRescale 15 | I = imresize(I, 0.5); 16 | end 17 | 18 | im = double(I); 19 | [h, w, ~] = size(I); 20 | N = size(BB, 1); 21 | 22 | % hard constraints 23 | mask_fixed_fg = false(h, w); 24 | mask_fixed_bg = false(h, w); 25 | 26 | % 27 | segCells = cell(N, 1); 28 | parfor i = 1 : N 29 | tmp = m_BB2mask(BB(i,:), [org_h, org_w]); 30 | if isRescale 31 | tmp = tmp(1:2:end, 1:2:end); 32 | end 33 | 34 | segMask = m_Grabcut(im, tmp, mask_fixed_fg, mask_fixed_bg); 35 | if isRescale 36 | segMask = imresize(segMask, [org_h, org_w], 'nearest'); 37 | end 38 | 39 | segCells{i} = find(segMask); 40 | end 41 | 42 | end 43 | 44 | -------------------------------------------------------------------------------- /src/segmentations/m_pcd_clustering.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phoenixnn/RGBD-object-propsal/e88c9997e0342e3bc0bee65bead17bb058f342fe/src/segmentations/m_pcd_clustering.out -------------------------------------------------------------------------------- /src/segmentations/m_rescale_bbox.m: -------------------------------------------------------------------------------- 1 | function BB = m_rescale_bbox(bbox, sz, scale) 2 | N = size(bbox,1); 3 | BB = zeros(N, 4); 4 | h = sz(1); 5 | w = sz(2); 6 | for i = 1 : N 7 | bb = bbox(i,:); 8 | 9 | width = bb(3); 10 | height = bb(4); 11 | minr = bb(2); 12 | minc = bb(1); 13 | 14 | maxc = minc + width -1; 15 | maxr = minr + height -1; 16 | 17 | center_r = floor((minr+maxr)/2); 18 | center_c = floor((minc+maxc)/2); 19 | 20 | % scale 21 | new_width = width * scale; 22 | new_height = height * scale; 23 | 24 | % update borders 25 | Rmin = 0; 26 | Cmin = 0; 27 | if mod(minr+maxr,2) == 0 28 | Rmin = center_r - floor(new_height/2); 29 | Cmin = center_c - floor(new_width/2); 30 | else 31 | Rmin = center_r - floor(new_height/2) + 1; 32 | Cmin = center_c - floor(new_width/2) + 1; 33 | end 34 | 35 | if Rmin <= 0 36 | Rmin = 1; 37 | end 38 | 39 | if Rmin > h 40 | Rmin = h; 41 | end 42 | 43 | Rmax = center_r + floor(new_height/2); 44 | if Rmax > h 45 | Rmax = h; 46 | end 47 | 48 | if Rmax <=0 49 | Rmax = 1; 50 | end 51 | 52 | if Cmin <= 0 53 | Cmin = 1; 54 | end 55 | 56 | if Cmin > w 57 | Cmin = w; 58 | end 59 | 60 | Cmax = center_c + floor(new_width/2); 61 | if Cmax > w 62 | Cmax = w; 63 | end 64 | 65 | if Cmax <=0 66 | Cmax = 1; 67 | end 68 | 69 | if Cmax < Cmin 70 | Cmax = Cmin; 71 | end 72 | 73 | if Rmax < Rmin 74 | Rmax = Rmin; 75 | end 76 | 77 | BB(i,:) = [Cmin, Rmin, (Cmax-Cmin+1), (Rmax-Rmin+1)]; 78 | 79 | end 80 | 81 | 82 | 83 | 84 | 85 | 86 | end -------------------------------------------------------------------------------- /src/util/ParSave.m: -------------------------------------------------------------------------------- 1 | function ParSave(fname, varargin) 2 | % zhuo deng 3 | % temple university 4 | 5 | % save within the parfor loop 6 | % e.g., m_parsave('./filename.mat', x); 7 | % m_parsave('./filename.mat', x, y, z); 8 | 9 | %fprintf('number of arguments is %d\n ', nargin); 10 | 11 | if nargin < 2 12 | assert(false, 'not enough parameters\n'); 13 | else 14 | for i = 2 : nargin 15 | var_name = inputname(i); 16 | eval([var_name sprintf('= varargin{%d};', i-1)]); 17 | try 18 | save(fname,var_name,'-append'); 19 | catch 20 | save(fname,var_name, '-v7.3'); 21 | end 22 | end 23 | end 24 | 25 | end 26 | 27 | 28 | % function helper(fname, data) 29 | % 30 | % var_name = inputname(2); 31 | % eval([var_name '= data']); 32 | % 33 | % try 34 | % save(fname,var_name,'-append'); 35 | % catch 36 | % save(fname,var_name); 37 | % end 38 | % 39 | % end 40 | 41 | -------------------------------------------------------------------------------- /src/vis/Label2Rgb.m: -------------------------------------------------------------------------------- 1 | function rgb = Label2Rgb( labels ) 2 | % colorize labeled image 3 | % assume that 0 for unlabeled pixels 4 | 5 | L = max(labels(:)); 6 | if L <= 7 7 | cmap = lines(L); 8 | else 9 | cmap = cat(1,lines(7), hsv(L-7)); 10 | end 11 | 12 | cmap = [0 0 0; cmap]; 13 | rgb = ind2rgb(labels+1, cmap); 14 | 15 | end 16 | 17 | -------------------------------------------------------------------------------- /src/vis/mat2PCDfile.m: -------------------------------------------------------------------------------- 1 | function mat2PCDfile(fileName, points, mode) 2 | 3 | % zhuo deng 4 | % temple university 5 | % 20140918 6 | 7 | % convert the 3d points represented by matlab matrix into .pcd file 8 | 9 | % PCD v.7 file format 10 | % ========================= 11 | % VERSION .7 12 | % FIELDS x y z rgb 13 | % SIZE 4 4 4 4 14 | % TYPE F F F F 15 | % COUNT 1 1 1 1 16 | % WIDTH 213 17 | % HEIGHT 1 18 | % VIEWPOINT 0 0 0 1 0 0 0 19 | % POINTS 213 20 | % DATA ascii 21 | % 0.93773 0.33763 0 4.2108e+06 22 | % 0.90805 0.35641 0 4.2108e+06 23 | % 0.81915 0.32 0 4.2108e+06 24 | % ... 25 | % ========================== 26 | 27 | % inputs: 28 | % fileName: filePath + fileName (e.g. /usr/local/MATLAB/points.pcd) 29 | % points: 3D points data in matlab matrix mxnxd or mxd 30 | % mode: string 'binary' or 'ascii' 31 | 32 | if nargin < 3 33 | 34 | mode = 'ascii'; 35 | end 36 | 37 | assert( strcmp(mode, 'ascii') || strcmp(mode, 'binary'), ... 38 | 'invalid mode'); 39 | 40 | if strcmp(mode, 'ascii') 41 | 42 | fid = fopen(fileName,'w'); 43 | save_PCD(fid, points, 'ascii'); 44 | fclose(fid); 45 | 46 | else 47 | 48 | fprintf('binary mode is under construction ... \n'); 49 | end 50 | 51 | 52 | 53 | end 54 | 55 | function save_PCD(fileID, points, mode) 56 | 57 | fprintf(fileID, 'VERSION .7\n'); 58 | 59 | if ndims(points) == 2 60 | % unorganized points 61 | 62 | % write attributes 63 | num_attributes = size(points,2); 64 | print_helper(fileID, num_attributes); 65 | fprintf(fileID, 'WIDTH %d\n', size(points,1)); 66 | fprintf(fileID, 'HEIGHT 1\n'); 67 | fprintf(fileID, 'VIEWPOINT 0 0 0 1 0 0 0 \n'); 68 | fprintf(fileID, 'POINTS %d\n', size(points,1)); 69 | fprintf(fileID, 'DATA %s\n',mode); 70 | 71 | % write data 72 | print_helper_1(fileID, points'); 73 | 74 | else 75 | % organized points 76 | 77 | % write attributes 78 | num_attributes = size(points,3); 79 | print_helper(fileID, num_attributes); 80 | fprintf(fileID, 'WIDTH %d\n', size(points,2)); 81 | fprintf(fileID, 'HEIGHT %d\n', size(points,1)); 82 | fprintf(fileID, 'VIEWPOINT 0 0 0 1 0 0 0 \n'); 83 | fprintf(fileID, 'POINTS %d\n', size(points,2) * size(points,1)); 84 | fprintf(fileID, 'DATA %s\n',mode); 85 | 86 | % write data 87 | points_t = reshape(points, [], size(points,3)); 88 | print_helper_1(fileID, points_t'); 89 | 90 | 91 | end 92 | 93 | 94 | 95 | end 96 | 97 | function print_helper(fileID, num_attributes) 98 | % print 'fields', 'size', 'type', 'count' 99 | 100 | switch num_attributes 101 | case 3 102 | % x y z 103 | fprintf(fileID, 'FIELDS x y z\n'); 104 | fprintf(fileID, 'SIZE 4 4 4\n'); 105 | fprintf(fileID, 'TYPE F F F\n'); 106 | fprintf(fileID, 'COUNT 1 1 1\n'); 107 | 108 | case 6 109 | % x y z r g b 110 | fprintf(fileID, 'FIELDS x y z rgb\n'); 111 | fprintf(fileID, 'SIZE 4 4 4 4\n'); 112 | fprintf(fileID, 'TYPE F F F F\n'); 113 | fprintf(fileID, 'COUNT 1 1 1 1\n'); 114 | 115 | otherwise 116 | disp('other attribute format is under construction ...\n'); 117 | 118 | end 119 | 120 | 121 | 122 | end 123 | 124 | function print_helper_1 (fileID, points) 125 | % print data : each column represents one point 126 | num_dim = size(points,1); 127 | 128 | switch num_dim 129 | case 3 130 | fprintf(fileID,'%.10f %.10f %.10f\n', points); 131 | 132 | case 6 133 | % encode r g b into rgb 134 | color = encodeRGB(points(4:6,:)); 135 | fprintf(fileID, '%.10f %.10f %.10f %d\n',[points(1:3,:); color]); 136 | 137 | 138 | otherwise 139 | disp('other attribute format is under construction ...\n'); 140 | end 141 | 142 | end 143 | 144 | function color = encodeRGB(rgb) 145 | % rgb is a 3xN matrix (r, g, b) 146 | color = bitor( bitshift(rgb(1,:), 16), bitshift(rgb(2,:),8)); 147 | color = bitor (color, rgb(3,:)); 148 | 149 | end 150 | 151 | 152 | 153 | 154 | 155 | --------------------------------------------------------------------------------