├── .DS_Store ├── FeatureExtractor_torch.py ├── MaximalCliqueAlgorithm ├── .DS_Store ├── maximalCliques.m ├── pairImgs.m ├── run_per_folder.sh └── vec2Mat.m ├── Pascal3dPlus.py ├── README.md ├── aggregate_3d_wPars.py ├── assign_synthetic_img.py ├── calculate_mAP.py ├── crop_synthetic_img.py ├── eval ├── VOCap.py └── eval_AP.py ├── extractLayerFeat.py ├── featPickle2featCell.py ├── get_vp_examples.py ├── refine_pool3.py ├── solve_nearest.py ├── solve_nearest_score.py ├── util.py └── vertex_id_picker.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ytongbai/SemanticPartDetection/53d2e30e894e558622372dec01be70fd0bdbfe06/.DS_Store -------------------------------------------------------------------------------- /FeatureExtractor_torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torchvision import transforms 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from PIL import Image 7 | import numpy as np 8 | import os 9 | 10 | # VGGPool4 Model 11 | class VGGPool4(nn.Module): 12 | def __init__(self, layers): 13 | super(VGGPool4, self).__init__() 14 | self.pool4 = nn.Sequential(*layers) 15 | 16 | def forward(self, x): 17 | x = self.pool4(x) 18 | return x 19 | 20 | 21 | class FeatureExtractor_torch: 22 | def __init__(self, state_file="/mnt/4TB_b/qing/VC_journal/vgg_pretrain/vgg_pool4_state.pth", scale_size=224, layer='pool4'): 23 | self.img_mean = np.array([123.68, 116.779, 103.939]) # RGB 24 | 25 | vgg_template = torchvision.models.vgg16_bn() 26 | # Collect the layers before Pool4 (inclusively) 27 | layers = [] 28 | 29 | # 34 for pool4, 24 for pool3 30 | if layer == 'pool4': 31 | layer_n = 34 32 | elif layer == 'pool3': 33 | layer_n = 24 34 | else: 35 | print('*******Unknown Layer setting, changed into pool4 (default)********') 36 | layer_n = 34 37 | 38 | 39 | for i in range(layer_n): 40 | layers.append(vgg_template.features[i]) 41 | 42 | 43 | # Initialize VGGPool4 Model 44 | self.model = VGGPool4(layers).cuda() 45 | # Load pre-trained weights 46 | pretrained_dict = torch.load(state_file) 47 | model_dict = self.model.state_dict() 48 | 49 | # 1. filter out unnecessary keys 50 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 51 | # 2. overwrite entries in the existing state dict 52 | # model_dict.update(pretrained_dict) 53 | # 3. load the new state dict 54 | self.model.load_state_dict(pretrained_dict) 55 | 56 | 57 | # self.model.load_state_dict(torch.load(state_file)) 58 | 59 | print("VGG Model Loaded") 60 | 61 | # RGB 62 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 63 | 64 | self.trans = transforms.Compose([transforms.Resize(scale_size), transforms.ToTensor(), normalize]) 65 | self.trans2 = transforms.Compose([transforms.ToTensor(), normalize]) 66 | 67 | def extract_feature_image_from_path(self, img_path, resize=True): 68 | assert(os.path.exists(img_path)) 69 | with open(img_path, 'rb') as fh: 70 | with Image.open(fh) as img_o: 71 | img = img_o.convert('RGB') 72 | 73 | if resize: 74 | img = self.trans(img) 75 | else: 76 | img = self.trans2(img) 77 | 78 | img = img.view(1, img.size(0), img.size(1), img.size(2)) # Batch mode [BxCxHxW], where B=1 in this case 79 | img_var = Variable(img).cuda() 80 | pool4 = self.model(img_var) 81 | pool4_normed = nn.functional.normalize(pool4, p=2, dim=1, eps=1e-12) 82 | pool4_feature_normed = pool4_normed.data.cpu().numpy() 83 | 84 | return pool4_feature_normed 85 | 86 | 87 | if __name__ == "__main__": 88 | featureExtractor = FeatureExtractor_torch() 89 | featureExtractor.extract_feature_image_from_path('/mnt/1TB_SSD/dataset/PASCAL3D+_release1.1/Images/car_imagenet/n04166281_7958.JPEG') 90 | -------------------------------------------------------------------------------- /MaximalCliqueAlgorithm/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ytongbai/SemanticPartDetection/53d2e30e894e558622372dec01be70fd0bdbfe06/MaximalCliqueAlgorithm/.DS_Store -------------------------------------------------------------------------------- /MaximalCliqueAlgorithm/maximalCliques.m: -------------------------------------------------------------------------------- 1 | function [ MC ] = maximalCliques( A, v_str ) 2 | if size(A,1) ~= size(A,2) 3 | error('MATLAB:maximalCliques', 'Adjacency matrix is not square.'); 4 | elseif ~all(all((A==1) | (A==0))) 5 | error('MATLAB:maximalCliques', 'Adjacency matrix is not boolean (zero-one valued).') 6 | elseif ~all(all(A==A.')) 7 | error('MATLAB:maximalCliques', 'Adjacency matrix is not undirected (symmetric).') 8 | elseif trace(abs(A)) ~= 0 9 | error('MATLAB:maximalCliques', 'Adjacency matrix contains self-edges (check your diagonal).'); 10 | end 11 | 12 | if ~exist('v_str','var') 13 | v_str = 'v2'; 14 | end 15 | 16 | if ~strcmp(v_str,'v1') && ~strcmp(v_str,'v2') 17 | warning('MATLAB:maximalCliques', 'Version not recognized, defaulting to v2.'); 18 | v_str = 'v2'; 19 | end 20 | 21 | n = size(A,2); % number of vertices 22 | MC = []; % storage for maximal cliques 23 | R = []; % currently growing clique 24 | P = 1:n; % prospective nodes connected to all nodes in R 25 | X = []; % nodes already processed 26 | 27 | 28 | if strcmp(v_str,'v1') 29 | BKv1(R,P,X); 30 | else 31 | BKv2(R,P,X); 32 | end 33 | 34 | 35 | function [] = BKv1 ( R, P, X ) 36 | 37 | if isempty(P) && isempty(X) 38 | newMC = zeros(1,n); 39 | newMC(R) = 1; 40 | MC = [MC newMC.']; 41 | else 42 | for u = P 43 | P = setxor(P,u); 44 | Rnew = [R u]; 45 | Nu = find(A(u,:)); 46 | Pnew = intersect(P,Nu); 47 | Xnew = intersect(X,Nu); 48 | BKv1(Rnew, Pnew, Xnew); 49 | X = [X u]; 50 | end 51 | end 52 | 53 | end 54 | 55 | function [] = BKv2 ( R, P, X ) 56 | 57 | ignore = []; 58 | if (isempty(P) && isempty(X)) 59 | % report R as a maximal clique 60 | newMC = zeros(1,n); 61 | newMC(R) = 1; % newMC contains ones at indices equal to the values in R 62 | MC = [MC newMC.']; 63 | else 64 | % choose pivot 65 | ppivots = union(P,X); % potential pivots 66 | binP = zeros(1,n); 67 | binP(P) = 1; 68 | pcounts = A(ppivots,:)*binP.'; 69 | [ignore,ind] = max(pcounts); 70 | u_p = ppivots(ind); 71 | 72 | for u = intersect(find(~A(u_p,:)),P) 73 | P = setxor(P,u); 74 | Rnew = [R u]; 75 | Nu = find(A(u,:)); 76 | Pnew = intersect(P,Nu); 77 | Xnew = intersect(X,Nu); 78 | BKv2(Rnew, Pnew, Xnew); 79 | X = [X u]; 80 | end 81 | end 82 | 83 | end 84 | 85 | 86 | end 87 | 88 | -------------------------------------------------------------------------------- /MaximalCliqueAlgorithm/pairImgs.m: -------------------------------------------------------------------------------- 1 | %% VGG parameter ( We fix pooling 4 here) 2 | % clear all; 3 | % close all; 4 | Apad_set = [2, 6, 18, 42, 90]; % padding size = 42 5 | Astride_set = [2, 4, 8, 16, 32]; % stride size = 16 6 | featDim_set = [64, 128, 256, 512, 512]; % feature dimension = 512 7 | 8 | Arf_set = [6, 16, 44, 100, 212]; % Arf_set = 100 9 | offset_set = round(Apad_set./Astride_set)-1; %%Round to nearest decimal or integer. offset = 3 10 | layer_n = 3; %% this is python's index of pooling layer 4. 11 | Apad = Apad_set(layer_n+1); 12 | Astride = Astride_set(layer_n+1); 13 | featDim = featDim_set(layer_n+1); 14 | Arf = Arf_set(layer_n+1); %% add 1 here 15 | offset = offset_set(layer_n+1); 16 | 17 | 18 | 19 | %% They are corresponding between the matrix and img_idx normally it is 512(extracted from pooling-4 layer), but add 3 demensions into that. 20 | %% The other 3 demensions are: 21 | %% feat_cell_locinfo.mat 是指每个feat后三位加上了(img_index,i,j)的feat来源loc信息 22 | % dir1='/mnt/4TB_b/qing/SPMatch/vp_test_sedan/a40e0/'; 23 | 24 | load(strcat(dir1, 'feat_cell_locinfo_3syn.mat')); 25 | for img_idx2 = drange(size(feat_cell_locinfo,2)-2:size(feat_cell_locinfo,2)) 26 | temp_feat_tensor_info2=feat_cell_locinfo{1,img_idx2}; 27 | feat_num2=size(temp_feat_tensor_info2,1)*size(temp_feat_tensor_info2,2); 28 | 29 | for img_idx1 = drange(1:size(feat_cell_locinfo,2)-3) 30 | % for img_idx1 = drange(1:1) 31 | img_idx1 32 | temp_feat_tensor_info1=feat_cell_locinfo{1,img_idx1}; 33 | feat_num1=size(temp_feat_tensor_info1,1)*size(temp_feat_tensor_info1,2); 34 | 35 | 36 | %% Initial the compare vector: 37 | 38 | compare_vec_len = feat_num1*feat_num2; 39 | compare_vec=zeros(compare_vec_len,1+3+3); 40 | 41 | %% Compute the compare_vec 42 | for i=1:compare_vec_len 43 | [idx_r,idx_c]=vec2Mat(i,feat_num1,feat_num2); 44 | [i1,j1]=vec2Mat(idx_r, size(temp_feat_tensor_info1,1), size(temp_feat_tensor_info1,2)); 45 | [i2,j2]=vec2Mat(idx_c, size(temp_feat_tensor_info2,1), size(temp_feat_tensor_info2,2)); 46 | 47 | % temp_feat_tensor_info = feat + info 48 | % what is the info? Need to ask 49 | feat1=reshape(temp_feat_tensor_info1(i1,j1,1:512),1,512); %size(feat1) = 1 512 50 | feat2=reshape(temp_feat_tensor_info2(i2,j2,1:512),1,512); %size(feat2) = 1 512 51 | 52 | info1=reshape(temp_feat_tensor_info1(i1,j1,512+1:512+3),1,3); 53 | info2=reshape(temp_feat_tensor_info2(i2,j2,512+1:512+3),1,3); 54 | 55 | % The composition of the compare_vec, including 7 dimensions: 56 | % Is compare_vec equals to the adjcent matrix? Needs confirmation. 57 | compare_vec(i,1)=dot(feat1,feat2); 58 | compare_vec(i,2:4)=info1; 59 | compare_vec(i,5:7)=info2; 60 | 61 | end 62 | %% Get the compare_vec_sort. 63 | [sortval, sortpos]=sort(compare_vec(:,1),'descend'); 64 | compare_vec_sort=compare_vec(sortpos,:); 65 | 66 | 67 | %% Find the best matches between two images 68 | % Process image1 first 69 | posinimg1=compare_vec_sort(:,3)*1E3+compare_vec_sort(:,4); 70 | [~,unique_pos1]=unique(posinimg1); % When there are only 2 parameters, the unique_pos1 is the element that appears firstly. 71 | compare_vec_sort_tmp1=compare_vec_sort(unique_pos1,:); 72 | 73 | posinimg2=compare_vec_sort_tmp1(:,6)*1E3+compare_vec_sort_tmp1(:,7); 74 | [~,unique_pos2]=unique(posinimg2); 75 | to_select1 = unique_pos1(unique_pos2); 76 | 77 | compare_vec_sort1 = compare_vec_sort(to_select1,:); 78 | %% Again, find the best matches between two images in the remaining features 79 | % posinimg1_1=compare_vec_sort1(:,3)*1E3+compare_vec_sort1(:,4); 80 | % posinimg2_1=compare_vec_sort1(:,6)*1E3+compare_vec_sort1(:,7); 81 | % 82 | % posinimg1=compare_vec_sort(:,3)*1E3+compare_vec_sort(:,4); 83 | % posinimg2=compare_vec_sort(:,6)*1E3+compare_vec_sort(:,7); 84 | 85 | % compare_vec_sort_r = compare_vec_sort(~(ismember(posinimg1, posinimg1_1) | ismember(posinimg2, posinimg2_1)),:); 86 | 87 | % compare_vec_sort_r = compare_vec_sort(~ismember(1:compare_vec_len, to_select1),:); 88 | % posinimg1_r=compare_vec_sort_r(:,3)*1E3+compare_vec_sort_r(:,4); 89 | % [~,unique_pos1_r]=unique(posinimg1_r); % When there are only 2 parameters, the unique_pos1 is the element that appears firstly. 90 | % compare_vec_sort_tmp1_r=compare_vec_sort_r(unique_pos1_r,:); 91 | % 92 | % posinimg2_r=compare_vec_sort_tmp1_r(:,6)*1E3+compare_vec_sort_tmp1_r(:,7); 93 | % [~,unique_pos2_r]=unique(posinimg2_r); 94 | % to_select2 = unique_pos1_r(unique_pos2_r); 95 | % 96 | % compare_vec_sort2 = compare_vec_sort_r(to_select2, :); 97 | %% Cut the two compare_vec_sort matrices 98 | % the program will freeze if compare more than 200 feature pairs 99 | % compare_vec_sort1 = vertcat(compare_vec_sort1,compare_vec_sort2); 100 | 101 | max_num_to_compare1 = 100; 102 | % max_num_to_compare2 = 100; 103 | 104 | [sortval2, sortpos2]=sort(compare_vec_sort1(:,1),'descend'); 105 | compare_vec_sort1=compare_vec_sort1(sortpos2,:); 106 | if (size(compare_vec_sort1, 1) > max_num_to_compare1) 107 | compare_vec_sort1 = compare_vec_sort1(1:max_num_to_compare1, :); 108 | end 109 | 110 | % [sortval2, sortpos2]=sort(compare_vec_sort2(:,1),'descend'); 111 | % compare_vec_sort2=compare_vec_sort2(sortpos2,:); 112 | % if (size(compare_vec_sort2, 1) > max_num_to_compare2) 113 | % compare_vec_sort2 = compare_vec_sort2(1:max_num_to_compare2, :); 114 | % end 115 | 116 | % compare_vec_sort1 = vertcat(compare_vec_sort1,compare_vec_sort2); 117 | %% MCP 118 | maximal_clique_q1=MCP(compare_vec_sort1); 119 | 120 | % maximal_clique_q=find(MC(:,2)==1); 121 | % temp_test=rela_adj_matrix(:,maximal_clique_q); 122 | % temp_test=temp_test(maximal_clique_q,:); 123 | 124 | make_filepath=[dir1,'img',num2str(img_idx1-1),'VSimg',num2str(img_idx2-1)]; 125 | % mkdir(make_filepath); 126 | 127 | % compare_vec_sort2 = compare_vec_sort1(~ismember(1:size(compare_vec_sort1, 1), ... 128 | % maximal_clique_q1),:); 129 | % maximal_clique_q2=MCP(compare_vec_sort2); 130 | 131 | rst = compare_vec_sort1(maximal_clique_q1,:); 132 | save_filename = strcat(make_filepath, '.mat'); 133 | save(save_filename, 'rst'); 134 | 135 | 136 | %% Show the result,change the path of the image here: 137 | % img_filepath=['/mnt/1TB_SSD/dataset/PASCAL3D+_cropped/car_imagenet/']; 138 | % img_list_file=['/mnt/1TB_SSD/dataset/PASCAL3D+_release1.1/Image_sets/car_imagenet_train.txt']; 139 | % file_list=textread(img_list_file, '%s', 'delimiter', '\n', 'whitespace', ''); 140 | 141 | % img_list_file1=strcat(dir1, 'file_list.txt'); 142 | % file_list=textread(img_list_file1, '%s', 'delimiter', '\n', 'whitespace', ''); 143 | % img1_o=imread([file_list{img_idx1}]); 144 | % % scl1 = 224/min([size(img1_o,1),size(img1_o,2)]); 145 | % % img1 = imresize(img1_o,scl1); 146 | % img1 = img1_o; 147 | % img2_o=imread([file_list{img_idx2}]); 148 | % scl2 = 224/min([size(img2_o,1),size(img2_o,2)]); 149 | % img2 = imresize(img2_o,scl2); 150 | 151 | 152 | % 153 | % two_img_screen=zeros( max(size(img1,1),size(img2,1)) , size(img1,2)+size(img2,2)+10 , 3 ); 154 | % two_img_screen(1:size(img1,1),1:size(img1,2),:)=img1; 155 | % two_img_screen(1:size(img2,1),... 156 | % (size(img1,2)+10+1):(size(img1,2)+10+size(img2,2)), : )=img2; 157 | % two_img_screen=uint8(two_img_screen); 158 | % hold on; 159 | % 160 | % for i=maximal_clique_q' 161 | % imshow(two_img_screen); 162 | % hold on; 163 | % %% parameters of loc_set: [ii, hi, wi, hi+Arf, wi+Arf] 164 | % ihi1=compare_vec_sort(i,3)-1; 165 | % iwi1=compare_vec_sort(i,4)-1; 166 | % hi1 = Astride * (ihi1 + offset) - Apad; %same 167 | % wi1 = Astride * (iwi1 + offset) - Apad; %same 168 | % x1=[wi1,wi1+Arf,wi1+Arf,wi1]; 169 | % y1=[hi1,hi1,hi1+Arf,hi1+Arf]; 170 | % patch(x1,y1,'r','FaceAlpha',0.35,'edgealpha',0); 171 | % hold on; 172 | % 173 | % ihi2=compare_vec_sort(i,6)-1; 174 | % iwi2=compare_vec_sort(i,7)-1; 175 | % hi2 = Astride * (ihi2 + offset) - Apad; %same 176 | % wi2 = Astride * (iwi2 + offset) - Apad; %same 177 | % x2=[wi2,wi2+Arf,wi2+Arf,wi2]+size(img1,2)+10; 178 | % y2=[hi2,hi2,hi2+Arf,hi2+Arf]; 179 | % patch(x2,y2,'r','FaceAlpha',0.35,'edgealpha',0); 180 | % % hold on; 181 | % saveas(gcf,[make_filepath,'\img1_',num2str(img_idx1-1),'VSimg1_',num2str(img_idx2-1),... 182 | % '_order',num2str(i),'of',num2str(MCP_test_range),... 183 | % '_cos',num2str(compare_vec_sort(i,1)),... 184 | % '_h1',num2str(compare_vec_sort(i,3)),... 185 | % '_w1',num2str(compare_vec_sort(i,4)),... 186 | % '_h2',num2str(compare_vec_sort(i,6)),... 187 | % '_w2',num2str(compare_vec_sort(i,7)),... 188 | % '.jpg']); 189 | % end 190 | 191 | end 192 | end 193 | -------------------------------------------------------------------------------- /MaximalCliqueAlgorithm/run_per_folder.sh: -------------------------------------------------------------------------------- 1 | xlist="0 10 80 90 100 170 180 190 260 270 280 350" 2 | for azi in $(seq 0 10 350) 3 | do 4 | if [[ ${xlist} =~ (^|[[:space:]])${azi}($|[[:space:]]) ]] && true || false 5 | then 6 | continue 7 | fi 8 | for ele in $(seq -5 5 35) 9 | do 10 | if [ -d "/home/yutong/SPMatch/vp_test_sedan/a${azi}e${ele}/" ] 11 | then 12 | /usr/local/MATLAB/R2017a/bin/matlab -nodisplay -nosplash -nodesktop -r "dir1='/mnt/4TB_b/qing/SPMatch/vp_test_sedan/a${azi}e${ele}/';pairImgs_split;exit" 13 | else 14 | echo "not exist: /home/yutong/SPMatch/vp_test_sedan/a${azi}e${ele}/" 15 | fi 16 | done 17 | done 18 | -------------------------------------------------------------------------------- /MaximalCliqueAlgorithm/vec2Mat.m: -------------------------------------------------------------------------------- 1 | function [idx_r,idx_c]=vec2Mat(idx_in_vec,mat_h,mat_w) 2 | idx_c=ceil(idx_in_vec/mat_h); 3 | idx_r=mod(idx_in_vec-1,mat_h)+1; 4 | 5 | %% Tips: 6 | % Y = ceil(X) rounds each element of X to the nearest integer greater than or equal to that element. 7 | % It is a small tool to change vector back to matrix, using the default method of MATLAB. 8 | 9 | -------------------------------------------------------------------------------- /Pascal3dPlus.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import scipy.io as sio 4 | import h5py 5 | import numpy as np 6 | from scipy import misc 7 | import pickle 8 | from util import * 9 | import sys 10 | import cv2 11 | 12 | class Pascal3dPlus: 13 | def __init__(self, category='car', source='imagenet', split='train', crop=True, rescale=224.0, first_n_debug=9999, 14 | base_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_release1.1', 15 | sp_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_sp', 16 | detection_split_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_sp/file_list', 17 | imagenet_split_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_release1.1/Image_sets', 18 | pascal_split_dir='', 19 | occ_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_occ/occ', 20 | black_dir='/mnt/4TB_b/qing/dataset/PASCAL3D+_black'): 21 | self.base_dir = base_dir 22 | self.sp_dir = sp_dir 23 | self.detection_split_dir = detection_split_dir 24 | if split[0:3]=='occ': 25 | self.detection_split_dir = occ_dir 26 | 27 | self.imagenet_split_dir = imagenet_split_dir 28 | self.pascal_split_dir = pascal_split_dir 29 | 30 | self.category = category 31 | self.source = source 32 | self.split = split 33 | if split[0:3]=='occ': 34 | self.split='occ' 35 | self.occ_level = split.split('_')[1] 36 | elif split[0:5]=='black': 37 | self.split='black' 38 | self.black_level = split.split('_')[1] 39 | 40 | self.crop = crop 41 | self.rescale = rescale 42 | self.first_n_debug = first_n_debug # TODO: dirty truncation for fast debugging should be deleted 43 | 44 | self.image_dir = os.path.join(self.base_dir, 'Images', category + '_' + source) 45 | if self.split=='occ': 46 | self.image_dir = os.path.join(occ_dir, category + 'LEVEL' + self.occ_level) 47 | elif self.split=='black': 48 | self.image_dir = os.path.join(black_dir, category, 'gray_img_30_300_loc2', 'patch_'+self.black_level) 49 | if self.black_level=='0': 50 | self.image_dir = os.path.join(black_dir, category, 'gray_img_30_300', 'patch_1') 51 | 52 | self.classification_dir = os.path.join(self.base_dir, 'Annotations', category + '_' + source) 53 | self.detection_dir = os.path.join(sp_dir, category + '_' + source, 'transfered') 54 | 55 | print('inited Pascal3D+:\n category={}\n split={}\n crop={}'.format(self.category, split, self.crop)) 56 | 57 | def get_classification_instances(self): 58 | return_list = [] 59 | # should be reading lists 60 | for file in os.listdir(self.image_dir): 61 | dot = file.find('.') 62 | img_name = file[:dot] 63 | return_list.append([img_name, -1]) 64 | return return_list 65 | 66 | def get_classification_images(self): 67 | instances = self.get_classification_instances() 68 | return self.get_images_from_instances(instances) 69 | 70 | def get_classification_all(self): 71 | raise NotImplementedError 72 | 73 | def get_detection_instances(self): 74 | file_list = os.path.join(self.detection_split_dir, self.split + '_list', self.category + '_' + self.split + '.txt') 75 | if self.split == 'black': 76 | file_list = os.path.join(self.detection_split_dir, 'test_list', self.category + '_test.txt') 77 | 78 | return_list = [] 79 | 80 | with open(file_list, 'r') as file: 81 | for line in file: 82 | if len(return_list) == self.first_n_debug: 83 | break 84 | if line == '\n': 85 | continue 86 | segments = line.split(' ') 87 | img_name = segments[0] 88 | instance_id = int(segments[1][:-1]) - 1 # matlab to python 89 | return_list.append([img_name, instance_id]) 90 | return return_list 91 | 92 | def get_detection_all(self, target_len=None): 93 | return_list = [] 94 | instances = self.get_detection_images(target_len) 95 | # max_spid = 0 96 | for (inst_i,(img_name, instance_id, bbox, delta_xy, delta_scale, img, ins)) in enumerate(instances): 97 | zhishuai_bbox = bound_bbox_to_int(img, bbox) 98 | zhishuai_shift = np.array([zhishuai_bbox[0], zhishuai_bbox[1], zhishuai_bbox[0], zhishuai_bbox[1]]) 99 | zhishuai_scale = bbox_to_delta_scale(zhishuai_bbox, 224.0) 100 | 101 | this_shift = np.array([delta_xy[0], delta_xy[1], delta_xy[0], delta_xy[1]]) 102 | this_scale = delta_scale 103 | 104 | mat_name = os.path.join(self.detection_dir, img_name + '.mat') 105 | anno = sio.loadmat(mat_name)['anno'] 106 | sp_anno = anno[instance_id][1] 107 | sp_list = [] 108 | sp_id = -1 109 | for sp in sp_anno: 110 | sp_id += 1 111 | 112 | for sp_bbox in sp[0]: 113 | sp_bbox -= 1 114 | original = sp_bbox[0:4] / zhishuai_scale + zhishuai_shift 115 | this = (original - this_shift) * this_scale 116 | try: 117 | assert(sp_bbox[8] == sp_id) 118 | except: 119 | # print(sp_bbox, sp_id) 120 | continue 121 | 122 | sp_list.append([sp_id, this]) 123 | 124 | if len(sp_list) == 0: 125 | continue 126 | 127 | bbox = (bbox - this_shift) * this_scale 128 | return_list.append([img_name, instance_id, bbox, delta_xy, delta_scale, img, ins, sp_list]) 129 | 130 | return return_list 131 | 132 | 133 | def get_detection_images(self, target_len): 134 | instances = self.get_detection_instances() 135 | return self.get_images_from_instances(instances, target_len) 136 | 137 | def get_images_from_instances(self, instances, target_len): 138 | return_list = [] 139 | for (inst_i, (img_name, instance_id)) in enumerate(instances): 140 | if self.split == 'occ': 141 | img_path = os.path.join(self.image_dir, '{}_{}.mat'.format(img_name, instance_id+1)) 142 | f = h5py.File(img_path) 143 | img = np.array(f['record']['img']).T # RGB 144 | else: 145 | img_path = os.path.join(self.image_dir, img_name + '.JPEG') 146 | try: 147 | img = cv2.imread(img_path)[:,:,::-1] 148 | except: 149 | continue 150 | 151 | img = im2rgb(img) 152 | 153 | if self.split == 'black' and self.black_level=='0': 154 | RGB_mean = np.array([[[123.675, 116.28, 103.53]]]) 155 | img = np.tile(RGB_mean, (img.shape[0], img.shape[1], 1)) 156 | 157 | mat_full_name = os.path.join(self.classification_dir, img_name + '.mat') 158 | record = sio.loadmat(mat_full_name, )['record'] 159 | if not instance_id == -1: # specifying instance_id 160 | category_name = record[0, 0]['objects'][0, instance_id]['class'][0] 161 | assert(category_name == self.category) 162 | bbox = record[0, 0]['objects'][0, instance_id]['bbox'][0] - 1 163 | if target_len is None: 164 | ins, delta_xy, delta_scale = bbox_crop_and_bbox_rescale(img, bbox, self.rescale, self.crop) 165 | return_list.append([img_name, instance_id, bbox, delta_xy, delta_scale, img, ins]) 166 | else: 167 | bbox_ = np.array([0, 0, img.shape[1]-1, img.shape[0]-1]) 168 | if target_len == 'area': 169 | ins, delta_xy, delta_scale = area_rescale(img, 300.0) 170 | elif type(target_len) is list: 171 | ins, delta_xy, delta_scale = bbox_crop_and_bbox_rescale(img, bbox_, target_len[inst_i], self.crop) 172 | else: 173 | ins, delta_xy, delta_scale = bbox_crop_and_bbox_rescale(img, bbox_, target_len, self.crop) 174 | 175 | return_list.append([img_name, instance_id, bbox, delta_xy, delta_scale, img, ins]) 176 | 177 | else: # not specifying instance_id 178 | for real_instance_id in range(len(record[0, 0]['objects'][0])): 179 | category_name = record[0, 0]['objects'][0, real_instance_id]['class'][0] 180 | if not category_name == self.category: 181 | continue 182 | bbox = record[0, 0]['objects'][0, real_instance_id]['bbox'][0] - 1 183 | ins, delta_xy, delta_scale = bbox_crop_and_bbox_rescale(img, bbox, self.rescale, self.crop) 184 | return_list.append([img_name, real_instance_id, bbox, delta_xy, delta_scale, img, ins]) 185 | return return_list 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Semantic Part Detection via Matching: Learning to Generalize to Novel Viewpoints from Limited Training Data 2 | 3 | This repo constains the pytorch implementation for the ICCV2019: Semantic Part Detection via Matching: Learning to Generalize to Novel Viewpoints from Limited Training Data [arxiv](https://arxiv.org/abs/1811.11823). 4 | 5 | ``` 6 | @inproceedings{bai2019semantic, 7 | title={Semantic Part Detection via Matching: Learning to Generalize to Novel Viewpoints from Limited Training Data}, 8 | author={Bai, Yutong and Liu, Qing and Xie, Lingxi and Qiu, Weichao and Zheng, Yan and Yuille, Alan L}, 9 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 10 | pages={7535--7545}, 11 | year={2019} 12 | } 13 | ``` 14 | 15 | ## Highlight 16 | - We present an approach which can learn from a small annotated dataset containing a limited range of viewpoints and generalize to detect semantic parts for a much larger range of viewpoints. 17 | - The approach is based on our matching algorithm, which is used for finding accurate spatial correspondence between two images and transplanting semantic parts annotated on one image to the other. 18 | 19 | ## Usage 20 | Our code uses PyTorch to implement. 21 | Experiments are performed on the [VehicleSemanticPart (VSP)](https://arxiv.org/abs/1511.06855) dataset and synthetic images generated by [UnrealCV](https://unrealcv.org/). 22 | Please refer to these websites for details of data preparation. 23 | 24 | - supports python3 and [pytorch=1.1.0](http://pytorch.org) 25 | 26 | - need to install MATLAB 27 | 28 | - clone this repo: `git clone https://github.com/ytongbai/SemanticPartDetection.git` 29 | 30 | - Pipeline: 31 | For both training and testing stage: 32 | - Generate synthetic data. 33 | - Get training images from different viewpoints. 34 | 35 | `python get_vp_examples.py`, `python assign_synthetic_img.py ` 36 | - Extract features from ImageNet pretrained VGG-16 Network. 37 | 38 | `python extractLayerFeat.py` 39 | - Data format change to MATLAB. 40 | 41 | `python featPickle2featCell.py` 42 | - Use maximal clique algorithm to conduct the matching stage. 43 | 44 | `bash MaximalCliqueAlgorithm/run_per_folder.sh` 45 | - Use feature extracted from pool-3 layer from ImageNet Pretrained VGG-16 to refine. 46 | 47 | `python refine_pool3.py` 48 | - Aggregate 2D parts to 3D CAD model. 49 | 50 | `python aggregate_3d_wPars.py`,`python solve_nearest_score.py` 51 | - Calculate mAP score. 52 | 53 | `python calculate_mAP.py` 54 | 55 | ## Contact 56 | 57 | For any questions, please feel free to reach 58 | ``` 59 | Yutong Bai: ytongbai@gmail.com 60 | ``` 61 | -------------------------------------------------------------------------------- /aggregate_3d_wPars.py: -------------------------------------------------------------------------------- 1 | import os, json, pickle 2 | import d3, vdb 3 | from vertex_id_picker import * 4 | import numpy as np 5 | import imageio 6 | import matplotlib.pyplot as plt 7 | import cv2 8 | import glob 9 | from scipy.spatial.distance import pdist,squareform 10 | 11 | def cluster_3d(pnts, thres=1, cn=None): 12 | # pnts=[(id1,np.array([x1,y1,z1])),...,(idn,np.array([xn,yn,zn]))] 13 | groups = [] 14 | centers = [] 15 | 16 | groups.append([pnts[0]]) 17 | centers.append(pnts[0][1]) 18 | 19 | for pp in pnts[1:]: 20 | pp_ci = [] 21 | for ci,cc in enumerate(centers): 22 | cc_dist = np.linalg.norm(pp[1]-cc) 23 | if cc_dist < thres: 24 | pp_ci.append(ci) 25 | 26 | if len(pp_ci)>0: 27 | new_gg = [groups[ci] for ci in range(len(centers)) if ci in pp_ci] 28 | new_gg = sum(new_gg, []) 29 | new_gg += [pp] 30 | new_c = np.mean(np.array([pgg[1] for pgg in new_gg]), axis=0) 31 | 32 | groups = [groups[ci] for ci in range(len(centers)) if ci not in pp_ci] 33 | centers = [centers[ci] for ci in range(len(centers)) if ci not in pp_ci] 34 | groups.append(new_gg) 35 | centers.append(new_c) 36 | else: 37 | groups.append([pp]) 38 | centers.append(pp[1]) 39 | 40 | group_size = [len(gg) for gg in groups] 41 | if cn is None: 42 | size_thres = len(pnts)/2 43 | selected = np.where(np.array(group_size)>size_thres)[0] 44 | else: 45 | selected = np.argsort(-np.array(group_size))[0:cn] 46 | 47 | rst_ids = [] 48 | for ss in selected: 49 | ss_g = groups[ss] 50 | ss_c = centers[ss] 51 | ss_dist = [np.linalg.norm(pp[1]-ss_c) for pp in ss_g] 52 | ss_id = ss_g[np.argmin(ss_dist)][0] 53 | rst_ids.append(ss_id) 54 | 55 | return rst_ids 56 | 57 | 58 | db_root = '/home/yutong/SPMatch/from_weichao/DenseMatching_CarOnly' 59 | obj_name = 'Sedan_4Door_Sedan4Door_LOD0_11' 60 | cam_name = 'sedan4door' 61 | 62 | root_dir = '/home/yutong/SPMatch/vp_examples/' 63 | fid_vp_ls = [(144,90),(153,45),(162,0),(171,315),(180,270),(189,225),(198,180),(207,135)] 64 | 65 | test_dir = '/home/yutong/SPMatch/vp_test_sedan/' 66 | dir_ls = glob.glob(os.path.join(test_dir,'a*e*')) 67 | 68 | total_spnum=39 69 | 70 | ''' 71 | [(0,4,10),(1,4,10),(2,4,10),(3,4,10),(4,4,10),(5,4,10),(6,4,10),(7,4,10,*20),(8,4,10),\ 72 | (9,4,20),(10,2,10),(11,4,40,*5),(12,2,15),(13,4,15,*1-10), (14,2,10,*1),(15,4,10),(16,2,10),\ 73 | (20,1,10),(23,1,10,*20),(24,1,10),(25,1,10,*???), (27,1,10,*20)\] 74 | ''' 75 | 76 | # sp_pars = [(0,4,10),(1,4,10),(2,4,10),(3,4,10),(4,4,10),(5,4,10),(6,4,10),(7,4,10),(8,4,10),(9,4,20),(10,2,10)] 77 | 78 | # This is some parameters for aggragation. Varys using different images. 79 | sp_par_file = '/home/yutong/SPMatch/aggregate_3d_sp_pars_32train.txt' 80 | with open(sp_par_file,'r') as fh: 81 | contents = fh.readlines() 82 | 83 | sp_pars = [cc.strip().split() for cc in contents] 84 | sp_pars = [[int(sppp) for sppp in spp[:-1]]+[spp[-1]] for spp in sp_pars] 85 | 86 | # gather training data 87 | input_ls = [[] for _ in range(total_spnum)] 88 | for (frame_id,vp) in fid_vp_ls: 89 | src_lit_filename, src_cam_pose, src_depth_filename, src_vertexs_3d, src_bbox = \ 90 | get_frame_info(db_root, obj_name, cam_name, frame_id) 91 | 92 | img = cv2.imread(src_lit_filename) 93 | img_cropped = img[src_bbox[2]-3:src_bbox[3]+4, src_bbox[0]-3:src_bbox[1]+4] 94 | 95 | ratio = 224/np.min(img_cropped.shape[0:2]) 96 | 97 | img_dir = os.path.join(root_dir,str(vp)) 98 | 99 | filelist = os.path.join(img_dir, 'file_list.txt') 100 | with open(filelist, 'r') as fh: 101 | contents = fh.readlines() 102 | 103 | img_list = [cc.strip() for cc in contents] 104 | img_idx2 = len(img_list)-1 105 | 106 | for img_idx1 in range(img_idx2): 107 | sp_transfer1 = os.path.join(img_dir, 'img{}VSimg{}_transSP.pickle'.format(img_idx1, img_idx2)) 108 | predict_sp_info = pickle.load(open(sp_transfer1,'rb')) 109 | 110 | for tar_sp in range(total_spnum): 111 | sp_ls = [pp[1] for pp in predict_sp_info if pp[0]==tar_sp] 112 | for spp in sp_ls: 113 | input_vertex = np.array(spp)/ratio + np.array([src_bbox[0]-3, src_bbox[2]-3]) 114 | 115 | vertex_id, matched_2d = get_vertex_id(input_vertex, src_vertexs_3d, src_cam_pose, src_depth_filename) 116 | input_ls[tar_sp].append((vertex_id, np.array(src_vertexs_3d[vertex_id]))) 117 | 118 | # run clustering algorithm 119 | sp_3d_id = [] 120 | for sp_par in sp_pars: 121 | tar_sp, tar_cn, cpar = sp_par[0:3] 122 | print('total number of transfered sp{}: {}'.format(tar_sp, len(input_ls[tar_sp]))) 123 | merged_id = cluster_3d(input_ls[tar_sp], cpar, tar_cn) 124 | sp_3d_id.append(merged_id) 125 | 126 | # put back to each 2d images used in testing 127 | 128 | for dd in dir_ls: 129 | info = dd.split('/')[-1] 130 | i1 = info.index('a') 131 | i2 = info.index('e') 132 | azi_s = int(info[i1+1:i2]) 133 | ele = int(info[i2+1:]) 134 | 135 | for azi in [azi_s,azi_s+5,azi_s+10]: 136 | azi = azi%360 137 | if azi > 90: 138 | j1 = (90 - (azi-360))//5 139 | else: 140 | j1 = (90 - azi)//5 141 | 142 | j2 = ele//5+1 143 | frame_id = j2*72+j1 144 | 145 | src_lit_filename, src_cam_pose, src_depth_filename, src_vertexs_3d, src_bbox = \ 146 | get_frame_info(db_root, obj_name, cam_name, frame_id) 147 | depth = np.load(src_depth_filename) 148 | 149 | 150 | img = cv2.imread(src_lit_filename) 151 | img_cropped = img[src_bbox[2]-3:src_bbox[3]+4, src_bbox[0]-3:src_bbox[1]+4] 152 | ratio = 224/np.min(img_cropped.shape[0:2]) 153 | img_cropped = cv2.resize(img_cropped, (0,0), fx=ratio, fy=ratio) 154 | 155 | sp_info = [] 156 | plt.close() 157 | fig,ax = plt.subplots(1,1,figsize=(20,10)) 158 | ax.imshow(img_cropped[:,:,::-1]) 159 | 160 | for sp_par in sp_pars: 161 | 162 | tar_sp = sp_par[0] 163 | dep_thrh = sp_par[3] 164 | if len(sp_par)>4: 165 | vp_thrh = sp_par[4].split(',') 166 | vp_thrh[0] = vp_thrh[0][1:] 167 | vp_thrh[-1] = vp_thrh[-1][:-1] 168 | vp_thrh = [int(vpt) for vpt in vp_thrh ] 169 | else: 170 | vp_thrh = None 171 | candidates = [] 172 | 173 | for mi in sp_3d_id[tar_sp]: 174 | matched_2d = src_cam_pose.project_to_cam_space(src_vertexs_3d[mi:mi+1,:]) 175 | ds = depth[(int(matched_2d[:,1]), int(matched_2d[:,0]))] 176 | 177 | if abs(matched_2d[0,2] - ds) < dep_thrh: 178 | if not vp_thrh is None: 179 | in_range = False 180 | for vpti in range(len(vp_thrh)//2): 181 | if (azi >= vp_thrh[2*vpti] and azi <= vp_thrh[2*vpti+1]): 182 | in_range = True 183 | 184 | if not in_range: 185 | continue 186 | 187 | 188 | matched_2d_shift = (matched_2d[0,0:2].astype(int) - np.array([src_bbox[0]-3, src_bbox[2]-3]))*ratio 189 | candidates.append(matched_2d_shift) 190 | 191 | if len(candidates)>1: 192 | candidates_new = [] 193 | c_pdist = squareform(pdist(np.array(candidates))) 194 | for ci in range(len(candidates)): 195 | ci_f = True 196 | for cj in range(len(candidates)): 197 | if c_pdist[ci,cj]<30: 198 | ci_f = False 199 | if cj>ci: 200 | candidates_new.append((candidates[ci]+candidates[cj])/2) 201 | 202 | if ci_f: 203 | candidates_new.append(candidates[ci]) 204 | 205 | candidates = candidates_new 206 | 207 | for cc in candidates: 208 | sp_info.append([tar_sp, cc]) 209 | ax.plot(int(cc[0]), int(cc[1]), 'b*') 210 | 211 | plt.savefig(os.path.join(dd, 'sp_anno_%08d.png' % frame_id)) 212 | save_file = os.path.join(dd, 'sp_info_%08d.pickle' % frame_id) 213 | pickle.dump(sp_info, open(save_file,'wb')) -------------------------------------------------------------------------------- /assign_synthetic_img.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | from shutil import copyfile 4 | 5 | root_dir = '/mnt/4TB_b/qing/SPMatch' 6 | source_dir = os.path.join(root_dir, 'from_weichao','DenseMatching_CarOnly','sedan4door','lit_cropped') 7 | target_dir = os.path.join(root_dir, 'vp_test_sedan', 'a*e*') 8 | dir_list = glob.glob(target_dir) 9 | for dd in dir_list: 10 | info = dd.split('/')[-1] 11 | i1 = info.index('a') 12 | i2 = info.index('e') 13 | azi = int(info[i1+1:i2])+10 14 | if azi == 360: 15 | azi=0 16 | ele = int(info[i2+1:]) 17 | 18 | if azi > 90: 19 | j1 = (90 - (azi-360))//5 20 | else: 21 | j1 = (90 - azi)//5 22 | 23 | j2 = ele//5+1 24 | frame_id = j2*72+j1 25 | 26 | s_file = os.path.join(source_dir, '%08d.png' % frame_id) 27 | t_file = os.path.join(dd, '%08d.png' % frame_id) 28 | copyfile(s_file,t_file) -------------------------------------------------------------------------------- /calculate_mAP.py: -------------------------------------------------------------------------------- 1 | from eval.eval_AP import eval_AP 2 | import os, glob, pickle 3 | import numpy as np 4 | from Pascal3dPlus import Pascal3dPlus 5 | 6 | root_dir = '/home/yutong/SPMatch/vp_test_sedan/' 7 | dir_ls = glob.glob(os.path.join(root_dir,'a*e*')) 8 | sp_num = 39 9 | category = 'car' 10 | set_type = 'test' 11 | crop = True 12 | sp_detection = [] 13 | img_names = [] 14 | img_size = [] 15 | 16 | img_id = 0 17 | for img_dir in dir_ls: 18 | if not 'a20e0' in img_dir: 19 | continue 20 | 21 | filelist = os.path.join(img_dir, 'file_list.txt') 22 | with open(filelist, 'r') as fh: 23 | contents = fh.readlines() 24 | 25 | img_list = [cc.strip() for cc in contents][0:-1] 26 | 27 | for img_ii in range(len(img_list)): 28 | sp_detection.append([]) 29 | 30 | img_path = img_list[img_ii] 31 | img_name = img_path.split('/')[-1].split('.')[0] 32 | img_names.append(img_name) 33 | 34 | sp_file = os.path.join(img_dir, 'img{}VSimg{}_transSP.pickle'.format(img_ii, len(img_list))) 35 | sp_info = pickle.load(open(sp_file, 'rb')) 36 | if len(sp_info)==0: 37 | continue 38 | 39 | assert(sp_info[0][0] == img_name) 40 | for spi in range(sp_num): 41 | locs = np.array([ss[2] for ss in sp_info if ss[1]==spi]) 42 | scores = np.array([ss[3] for ss in sp_info if ss[1]==spi]) 43 | 44 | if len(locs)==0: 45 | sp_detection[-1].append(np.zeros((0,6))) 46 | continue 47 | 48 | c_list = locs[:,0] 49 | r_list = locs[:,1] 50 | 51 | bb_loc = np.column_stack((c_list - 49.5, r_list - 49.5, c_list + 49.5, r_list + 49.5, scores)) 52 | bb_loc = np.concatenate((np.ones((bb_loc.shape[0], 1)) * img_id, bb_loc), axis=1) 53 | 54 | sp_detection[-1].append(bb_loc) 55 | 56 | img_id += 1 57 | 58 | print('total number of testing images: {}'.format(len(sp_detection))) 59 | # read in ground truth from Pascal 3D+ 60 | pascal = Pascal3dPlus(category=category, split=set_type, crop=crop, first_n_debug=9999) 61 | detection_all = pascal.get_detection_all() 62 | 63 | spanno = [] 64 | 65 | img_names2 = [dd[0] for dd in detection_all] 66 | 67 | for img_name in img_names: 68 | img_id2 = img_names2.index(img_name) 69 | 70 | img_name, instance_id, bbox, delta_xy, delta_scale, img, ins, sp_list = detection_all[img_id2] 71 | 72 | spanno.append([[] for _ in range(sp_num)]) 73 | for [sp_id, this] in sp_list: 74 | if sp_id < sp_num: 75 | bb_o = this + 1 # also to Matlab 76 | bb_o = np.array([max(np.ceil(bb_o[0]), 1), max(np.ceil(bb_o[1]), 1), 77 | min(np.floor(bb_o[2]), ins.shape[1]), min(np.floor(bb_o[3]), ins.shape[0])]) 78 | spanno[-1][sp_id].append(bb_o) 79 | 80 | for i in range(sp_num): 81 | if len(spanno[-1][i]) == 0: 82 | spanno[-1][i] = np.array([]) 83 | else: 84 | spanno[-1][i] = np.array(spanno[-1][i]) 85 | img_size.append(ins.shape[0:2]) 86 | 87 | eval_AP(sp_detection, spanno, img_size) 88 | -------------------------------------------------------------------------------- /crop_synthetic_img.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | import d3, vdb 3 | from vertex_id_picker import * 4 | import numpy as np 5 | import imageio 6 | import matplotlib.pyplot as plt 7 | import cv2 8 | 9 | 10 | db_root = '/home/yutong/SPMatch/from_weichao/DenseMatching_CarOnly' 11 | obj_name = 'Sedan_4Door_Sedan4Door_LOD0_11' 12 | cam_name = 'sedan4door' 13 | 14 | for frame_id in range(648): 15 | 16 | src_lit_filename, src_cam_pose, src_depth_filename, src_vertexs_3d, src_bbox = \ 17 | get_frame_info(db_root, obj_name, cam_name, frame_id) 18 | 19 | img = cv2.imread(src_lit_filename) 20 | img_cropped = img[src_bbox[2]-3:src_bbox[3]+4, src_bbox[0]-3:src_bbox[1]+4] 21 | 22 | save_dir = os.path.join(db_root,cam_name,'lit_cropped') 23 | if not os.path.exists(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | save_file = os.path.join(save_dir,'%08d.png' % frame_id) 27 | cv2.imwrite(save_file, img_cropped) -------------------------------------------------------------------------------- /eval/VOCap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def VOCap(rec, prec): 5 | mrec = np.append(0, rec) 6 | mrec = np.append(mrec, 1) 7 | 8 | mpre = np.append(0, prec) 9 | mpre = np.append(mpre, 0) 10 | 11 | for ii in range(len(mpre)-2,-1,-1): 12 | mpre[ii] = max(mpre[ii], mpre[ii+1]) 13 | 14 | msk = [i!=j for i,j in zip(mrec[1:], mrec[0:-1])] 15 | ap = np.sum((mrec[1:][msk]-mrec[0:-1][msk])*mpre[1:][msk]) 16 | return ap -------------------------------------------------------------------------------- /eval/eval_AP.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from joblib import Parallel, delayed 3 | import numpy as np 4 | 5 | SP = dict() 6 | SP['criteria'] = 'iou' 7 | SP['iou_thresh'] = 0.5 8 | 9 | def VOCap(rec, prec): 10 | mrec = np.append(0, rec) 11 | mrec = np.append(mrec, 1) 12 | 13 | mpre = np.append(0, prec) 14 | mpre = np.append(mpre, 0) 15 | 16 | for ii in range(len(mpre)-2,-1,-1): 17 | mpre[ii] = max(mpre[ii], mpre[ii+1]) 18 | 19 | msk = [i!=j for i,j in zip(mrec[1:], mrec[0:-1])] 20 | ap = np.sum((mrec[1:][msk]-mrec[0:-1][msk])*mpre[1:][msk]) 21 | return ap 22 | 23 | def eval_AP_inner(inp): 24 | sp_detection, spanno, img_size = inp 25 | N = len(spanno) 26 | kp_pos = np.sum([spanno[nn].shape[0] for nn in range(N)]) 27 | 28 | tot = sp_detection.shape[0] 29 | sort_idx = np.argsort(-sp_detection[:, 5]) 30 | id_list = sp_detection[sort_idx, 0] 31 | col_list = (sp_detection[sort_idx, 1] + sp_detection[sort_idx, 3]) / 2 32 | row_list = (sp_detection[sort_idx, 2] + sp_detection[sort_idx, 4]) / 2 33 | bbox_list = sp_detection[sort_idx, 1:5].astype(int) 34 | 35 | tp = np.zeros(tot) 36 | fp = np.zeros(tot) 37 | flag = np.zeros((N, 20)) 38 | for dd in range(tot): 39 | if np.sum(flag) == kp_pos: 40 | fp[dd:] = 1 41 | break 42 | 43 | img_id = int(id_list[dd]) 44 | col_c = col_list[dd] 45 | row_c = row_list[dd] 46 | if SP['criteria'] == 'dist': 47 | min_dist = np.inf 48 | inst = spanno[img_id] 49 | for ii in range(inst.shape[0]): 50 | xx = (inst[ii, 0] + inst[ii, 2]) / 2 51 | yy = (inst[ii, 1] + inst[ii, 3]) / 2 52 | 53 | if np.sqrt((xx - col_c) ** 2 + (yy - row_c) ** 2) < min_dist: 54 | min_dist = np.sqrt((xx - col_c) ** 2 + (yy - row_c) ** 2) 55 | min_idx = ii 56 | 57 | if min_dist < SP['dist_thresh'] and flag[img_id, min_idx] == 0: 58 | tp[dd] = 1 59 | flag[img_id, min_idx] = 1 60 | else: 61 | fp[dd] = 1 62 | 63 | elif SP['criteria'] == 'iou': 64 | max_iou = -np.inf 65 | inst = spanno[img_id] 66 | for ii in range(inst.shape[0]): 67 | bbgt = inst[ii] 68 | bb = bbox_list[dd] 69 | bb = np.array([max(np.ceil(bb[0]), 1), max(np.ceil(bb[1]), 1), 70 | min(np.floor(bb[2]), img_size[img_id][1]), min(np.floor(bb[3]), img_size[img_id][0])]) 71 | 72 | bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])] 73 | iw = bi[2] - bi[0] + 1 74 | ih = bi[3] - bi[1] + 1 75 | 76 | if iw > 0 and ih > 0: 77 | ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + \ 78 | (bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] + 1) - \ 79 | iw * ih 80 | ov = iw * ih / ua 81 | if ov > max_iou: 82 | max_iou = ov 83 | max_idx = ii 84 | 85 | if max_iou > SP['iou_thresh'] and flag[img_id, max_idx] == 0: 86 | tp[dd] = 1 87 | flag[img_id, max_idx] = 1 88 | else: 89 | fp[dd] = 1 90 | 91 | fp = np.cumsum(fp) 92 | tp = np.cumsum(tp) 93 | rec = tp / kp_pos 94 | prec = tp / (tp + fp) 95 | ap = VOCap(rec, prec) 96 | return ap 97 | 98 | 99 | def eval_AP(sp_detection, spanno, img_size): 100 | paral_num = 6 101 | inp_ls = [(np.concatenate(tuple([sp_detection[img_id][sp_id] for img_id in range(len(spanno))]), axis=0), 102 | [spanno[img_id][sp_id] for img_id in range(len(spanno))], 103 | img_size) for sp_id in range(len(sp_detection[0]))] 104 | 105 | ap_ls = np.array(Parallel(n_jobs=paral_num)(delayed(eval_AP_inner)(i) for i in inp_ls)) 106 | for sp_id in range(len(sp_detection[0])): 107 | print('{:3.1f}'.format(ap_ls[sp_id]*100)) 108 | 109 | print(np.nanmean(ap_ls) * 100) 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /extractLayerFeat.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import cdist 2 | from FeatureExtractor_torch import FeatureExtractor_torch 3 | import os, glob 4 | import numpy as np 5 | import math 6 | import pickle 7 | from copy import * 8 | 9 | def extractLayerFeat(img_dir, extractor, scale_size=224): 10 | img_list = glob.glob(os.path.join(img_dir, '*.JPEG')) 11 | N = len(img_list) 12 | print('Extracting features from {} images...'.format(N)) 13 | 14 | 15 | feat_set = [None for nn in range(N)] 16 | for nn,impath in enumerate(img_list): 17 | layer_feature = extractor.extract_feature_image_from_path(impath)[0] 18 | layer_feature = layer_feature.transpose([1,2,0]) 19 | feat_set[nn] = layer_feature 20 | 21 | print('extracted feature shape: {}'.format(feat_set[0].shape)) 22 | 23 | return feat_set 24 | 25 | 26 | def extractLayerFeat_one(img_path, extractor, scale_size=224): 27 | layer_feature = extractor.extract_feature_image_from_path(img_path)[0] 28 | layer_feature = layer_feature.transpose([1,2,0]) 29 | 30 | # print('extracted feature shape: {}'.format(feat_set[0].shape)) 31 | return layer_feature 32 | 33 | 34 | if __name__=='__main__': 35 | layer_n = 'pool3' 36 | extractor = FeatureExtractor_torch(layer=layer_n) 37 | 38 | for vpi in [0, 45, 90, 135, 180, 225, 270, 315]: 39 | img_dir = '/mnt/4TB_b/qing/SPMatch/vp_examples/{}/'.format(vpi) 40 | img_list = glob.glob(os.path.join(img_dir, '*.JPEG')) 41 | N = len(img_list) 42 | print('Extracting features from {} images...'.format(N)) 43 | for nn,impath in enumerate(img_list): 44 | layer_feature = extractLayerFeat_one(impath, extractor) 45 | file_cache_feat = '.'.join(impath.split('.')[0:-1])+'_{}.pickle'.format(layer_n) 46 | with open(file_cache_feat, 'wb') as fh: 47 | pickle.dump(layer_feature, fh) -------------------------------------------------------------------------------- /featPickle2featCell.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import pickle 4 | import scipy.io as sio 5 | import glob 6 | 7 | featDim = 512 8 | offset = 3 9 | layer_n = 'pool4' 10 | 11 | for vpi in [0, 45, 90, 135, 180, 225, 270, 315]: 12 | img_dir = '/mnt/4TB_b/qing/SPMatch/vp_examples/{}/'.format(vpi) 13 | pickle_list = glob.glob(os.path.join(img_dir, '*_{}.pickle'.format(layer_n))) 14 | N = len(pickle_list) 15 | print('Transfer features for {} images...'.format(N)) 16 | 17 | for file_cache_feat in pickle_list: 18 | with open(file_cache_feat, 'rb') as fh: 19 | feat_org = pickle.load(fh) 20 | 21 | feat_r, feat_c = feat_org.shape[0:2] 22 | feat = feat_org[offset:feat_r-offset, offset:feat_c-offset, :] 23 | 24 | feat_r, feat_c = feat.shape[0:2] 25 | lff = feat.reshape(-1, featDim) 26 | lff_norm = lff/np.sqrt(np.sum(lff**2, 1)).reshape(-1,1) 27 | feat = lff_norm.reshape(feat_r,feat_c,-1) 28 | 29 | assert(feat.shape[2]==featDim) 30 | 31 | info_r = np.tile(np.arange(feat_r).reshape(feat_r,1, 1), [1, feat_c, 1])+1 32 | info_c = np.tile(np.arange(feat_c).reshape(1,feat_c, 1), [feat_r, 1, 1])+1 33 | info_nn = np.ones((feat_r, feat_c, 1))*1 34 | 35 | feat_info = np.concatenate([feat, info_nn, info_r, info_c], axis=2) 36 | feat_cell_locinfo = np.array([feat_info], dtype=np.object) 37 | 38 | save_file = os.path.join(img_dir, 'feat_cell_locinfo.mat') 39 | sio.savemat(save_file, {'feat_cell_locinfo': feat_cell_locinfo}) 40 | -------------------------------------------------------------------------------- /get_vp_examples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pickle 4 | import scipy.io as sio 5 | import os,cv2 6 | from Pascal3dPlus import Pascal3dPlus 7 | 8 | category='car' 9 | set_type='train' 10 | dataset_root = '/mnt/1TB_SSD/dataset/' 11 | list_dir = os.path.join(dataset_root, 'PASCAL3D+_release1.1','Image_sets') 12 | anno_dir = os.path.join(dataset_root, 'PASCAL3D+_release1.1','Annotations', '{}_imagenet'.format(category)) 13 | 14 | 15 | split_dir='/mnt/1TB_SSD/dataset/PASCAL3D+_sp/file_list' 16 | filelist = os.path.join(split_dir, set_type + '_list', category + '_' + set_type + '.txt') 17 | 18 | with open(filelist, 'r') as fh: 19 | contents = fh.readlines() 20 | 21 | file_names = [cc.strip().split()[0] for cc in contents if cc != '\n'] 22 | instance_id = [int(cc.strip().split()[1])-1 for cc in contents if cc != '\n'] 23 | 24 | pascal = Pascal3dPlus(category=category, split=set_type, crop=True, first_n_debug=9999) 25 | detection_all = pascal.get_detection_all() 26 | print(len(detection_all)) 27 | 28 | fname_set = [tmp[0] for tmp in detection_all] 29 | 30 | vp_list = [] 31 | subtype_list = [] 32 | fname_index = 0 33 | for fi, ff in enumerate(file_names): 34 | if ff not in fname_set: 35 | continue 36 | 37 | assert(ff == fname_set[fname_index]) 38 | fname_index += 1 39 | 40 | mat_file = os.path.join(anno_dir, '{}.mat'.format(ff)) 41 | assert(os.path.isfile(mat_file)) 42 | mat_contents = sio.loadmat(mat_file) 43 | record = mat_contents['record'] 44 | objects = record['objects'] 45 | azi = objects[0,0]['viewpoint'][0,instance_id[fi]]['azimuth_coarse'][0,0][0,0] 46 | ele = objects[0,0]['viewpoint'][0,instance_id[fi]]['elevation_coarse'][0,0][0,0] 47 | subtype = objects[0,0]['subtype'][0,instance_id[fi]][0] 48 | vp_list.append((azi, ele)) 49 | subtype_list.append(subtype) 50 | 51 | print('done') 52 | 53 | for target_vp in [0,45,90,135,180,225,270,315]: 54 | target_range = [target_vp-5, target_vp+5] 55 | image_name_list = [] 56 | instance_info_list = [] 57 | sp_info_list = [] 58 | vp_set_list = [] 59 | 60 | save_img_dir = os.path.join('/mnt/4TB_b/qing/SPMatch', 'vp_examples_less', str(target_vp)) 61 | if not os.path.exists(save_img_dir): 62 | os.makedirs(save_img_dir) 63 | 64 | print(save_img_dir) 65 | for fi, ff in enumerate(fname_set): 66 | if subtype_list[fi] == 'sedan': 67 | vp_curr = vp_list[fi] 68 | if len(image_name_list) < 4 and vp_curr[1]==0 and vp_curr[0]<=target_range[1] and vp_curr[0]>=target_range[0]: 69 | img_name, instance_id, bbox, delta_xy, delta_scale, img, ins, sp_list = detection_all[fi] 70 | assert(ff == img_name) 71 | image_name_list.append(ff) 72 | instance_info_list.append(instance_id) 73 | sp_info_list.append(sp_list) 74 | vp_set_list.append(vp_curr) 75 | cv2.imwrite(os.path.join(save_img_dir, '{}.JPEG'.format(ff)), ins[:,:,::-1]) 76 | 77 | 78 | print(len(image_name_list)) 79 | 80 | save_info_file = os.path.join(save_img_dir, 'info.pickle') 81 | with open(save_info_file, 'wb') as fh: 82 | pickle.dump([image_name_list, instance_info_list, sp_info_list, vp_set_list], fh) -------------------------------------------------------------------------------- /refine_pool3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import pickle 4 | from scipy.spatial.distance import cdist 5 | import cv2 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | import matplotlib.patches as patches 10 | import scipy.io as sio 11 | 12 | def process_pool3_patch(lowpool_1, lowpool_2, row1, col1, row2, col2): 13 | pat1_core = lowpool_1[row1-1:row1+3,col1-1:col1+3].reshape(-1,256) 14 | pat2_core = lowpool_2[row2-1:row2+3,col2-1:col2+3].reshape(-1,256) 15 | cor_dist = cdist(pat1_core, pat2_core, 'cosine') 16 | 17 | sort_idx = np.argsort(cor_dist,axis=None) 18 | pat_dist_ls = [] 19 | for ii in sort_idx[0:3]: 20 | pat1_idx_core, pat2_idx_core = np.unravel_index(ii, cor_dist.shape) 21 | pat1_r_core, pat1_c_core = np.unravel_index(pat1_idx_core, (4,4)) 22 | pat2_r_core, pat2_c_core = np.unravel_index(pat2_idx_core, (4,4)) 23 | pat1_r = pat1_r_core + (row1-1) 24 | pat1_c = pat1_c_core + (col1-1) 25 | pat2_r = pat2_r_core + (row2-1) 26 | pat2_c = pat2_c_core + (col2-1) 27 | 28 | rr1 = np.max((0,pat1_r-3)) 29 | rr2 = np.max((0,pat2_r-3)) 30 | cc1 = np.max((0,pat1_c-3)) 31 | cc2 = np.max((0,pat2_c-3)) 32 | 33 | # 6 by 6 context 34 | pat1 = lowpool_1[rr1:pat1_r+3, cc1:pat1_c+3] 35 | pat2 = lowpool_2[rr2:pat2_r+3, cc2:pat2_c+3] 36 | 37 | pat_dist = np.mean(cdist(pat1.reshape(-1,256), pat2.reshape(-1,256), 'cosine'),axis=None) 38 | pat_dist_ls.append(pat_dist) 39 | 40 | pat1_idx_core, pat2_idx_core = np.unravel_index(sort_idx[np.argmin(pat_dist_ls)], cor_dist.shape) 41 | pat1_r_core, pat1_c_core = np.unravel_index(pat1_idx_core, (4,4)) 42 | pat2_r_core, pat2_c_core = np.unravel_index(pat2_idx_core, (4,4)) 43 | 44 | pat1_r = pat1_r_core + (row1-1) 45 | pat1_c = pat1_c_core + (col1-1) 46 | pat2_r = pat2_r_core + (row2-1) 47 | pat2_c = pat2_c_core + (col2-1) 48 | 49 | return(pat1_r, pat1_c, pat2_r, pat2_c, 1-np.min(pat_dist_ls)) 50 | 51 | 52 | category = 'car' 53 | set_type = 'train' 54 | featDim = 256 55 | offset = 2 56 | dataset_root = '/home/yutong/dataset/' 57 | list_dir = os.path.join(dataset_root, 'PASCAL3D+_release1.1','Image_sets') 58 | proj_root ='/mnt/4TB_b/qing/VC_journal/' 59 | cache_dir = os.path.join(proj_root, 'feat') 60 | 61 | pool4_offset=2 62 | 63 | root_dir = '/home/yutong/SPMatch/vp_examples_less/' 64 | for vp in [0,45,90,135,180,225,270,315]: 65 | # for vp in [45]: 66 | img_dir = os.path.join(root_dir, str(vp)) 67 | # file list 68 | filelist = os.path.join(img_dir, 'file_list.txt') 69 | with open(filelist, 'r') as fh: 70 | contents = fh.readlines() 71 | 72 | img_list = [cc.strip() for cc in contents] 73 | img_idx2 = len(img_list)-1 74 | 75 | for img_idx1 in range(img_idx2): 76 | 77 | # load pool4 match result 78 | mat_filename = os.path.join(img_dir, 'img{}VSimg{}.mat'.format(img_idx1, img_idx2)) 79 | mat_content = sio.loadmat(mat_filename) 80 | pool4_rst = mat_content['rst'] 81 | 82 | filelist = os.path.join(img_dir, 'file_list.txt') 83 | with open(filelist, 'r') as fh: 84 | contents = fh.readlines() 85 | 86 | img_list = [cc.strip() for cc in contents] 87 | img_file1 = img_list[img_idx1] 88 | img_file2 = img_list[img_idx2] 89 | pool3_file1 = '.'.join(img_file1.split('.')[0:-1])+'_pool3.pickle' 90 | pool3_file2 = '.'.join(img_file2.split('.')[0:-1])+'_pool3.pickle' 91 | 92 | pool3_img1 = pickle.load(open(pool3_file1, 'rb')) 93 | pool3_img2 = pickle.load(open(pool3_file2, 'rb')) 94 | img1 = cv2.imread(img_file1) 95 | img2 = cv2.imread(img_file2) 96 | rratio = 224.0/np.min(img2.shape[0:2]) 97 | img2 = cv2.resize(img2, (0,0), fx=rratio, fy=rratio) 98 | 99 | save_path = mat_filename.replace('.mat','_refine/') 100 | if not os.path.exists(save_path): 101 | os.makedirs(save_path) 102 | 103 | # to save pool4 and pool3 matched x1,y1,x1',y1' 104 | matched = [[],[]] 105 | 106 | for pi in range(pool4_rst.shape[0]): 107 | pool4_info = pool4_rst[pi] 108 | p4_score, img1_id, row1, col1, img2_id, row2, col2 = pool4_info.astype(int) 109 | 110 | pool3_row1 = (row1+pool4_offset-1)*2 111 | pool3_col1 = (col1+pool4_offset-1)*2 112 | pool3_row2 = (row2+pool4_offset-1)*2 113 | pool3_col2 = (col2+pool4_offset-1)*2 114 | 115 | # plot images 116 | plt.close() 117 | fig,ax = plt.subplots(1,2,figsize=(20,5)) 118 | 119 | # Display the image 120 | ax[0].imshow(img1[:,:,::-1]) 121 | ax[1].imshow(img2[:,:,::-1]) 122 | 123 | pool4_matched = [] 124 | # Create a Rectangle patch 125 | x_pool4 = 16*(col1+pool4_offset-1)-42 126 | y_pool4 = 16*(row1+pool4_offset-1)-42 127 | rect = patches.Rectangle((x_pool4,y_pool4),100,100,linewidth=1,edgecolor='r',facecolor='none') 128 | circ = patches.Circle((x_pool4+50,y_pool4+50),3) 129 | pool4_matched = pool4_matched + [x_pool4+50,y_pool4+50] 130 | ax[0].add_patch(rect) 131 | ax[0].add_patch(circ) 132 | 133 | x_pool4 = 16*(col2+pool4_offset-1)-42 134 | y_pool4 = 16*(row2+pool4_offset-1)-42 135 | rect = patches.Rectangle((x_pool4,y_pool4),100,100,linewidth=1,edgecolor='r',facecolor='none') 136 | circ = patches.Circle((x_pool4+50,y_pool4+50),3) 137 | pool4_matched = pool4_matched + [x_pool4+50,y_pool4+50] 138 | ax[1].add_patch(rect) 139 | ax[1].add_patch(circ) 140 | 141 | pool4_matched = pool4_matched + [p4_score] 142 | matched[0].append(pool4_matched) 143 | 144 | pool3_matched = [] 145 | # refine the matches on pool3 146 | pat1_r, pat1_c, pat2_r, pat2_c, p3_score = process_pool3_patch(pool3_img1,pool3_img2, pool3_row1, pool3_col1, pool3_row2, pool3_col2) 147 | 148 | # Create a Rectangle patch -- pool3 149 | x_pool3 = 8*(pat1_c)-18 150 | y_pool3 = 8*(pat1_r)-18 151 | rect = patches.Rectangle((x_pool3,y_pool3),44,44,linewidth=1,edgecolor='r',facecolor='none') 152 | circ = patches.Circle((x_pool3+22,y_pool3+22),3, edgecolor='g',facecolor='g') 153 | pool3_matched = pool3_matched + [x_pool3+22,y_pool3+22] 154 | ax[0].add_patch(rect) 155 | ax[0].add_patch(circ) 156 | 157 | x_pool3 = 8*(pat2_c)-18 158 | y_pool3 = 8*(pat2_r)-18 159 | rect = patches.Rectangle((x_pool3,y_pool3),44,44,linewidth=1,edgecolor='r',facecolor='none') 160 | circ = patches.Circle((x_pool3+22,y_pool3+22),3, edgecolor='g',facecolor='g') 161 | pool3_matched = pool3_matched + [x_pool3+22,y_pool3+22] 162 | ax[1].add_patch(rect) 163 | ax[1].add_patch(circ) 164 | 165 | pool3_matched = pool3_matched + [p3_score] 166 | matched[1].append(pool3_matched) 167 | 168 | plt.savefig(os.path.join(save_path, '{}.png'.format(str(pi)))) 169 | 170 | print(matched) 171 | save_pickle = mat_filename.replace('.mat','_matched.pickle') 172 | with open(save_pickle,'wb') as fh: 173 | pickle.dump(matched, fh) -------------------------------------------------------------------------------- /solve_nearest.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os,sys 3 | import numpy as np 4 | import matplotlib 5 | matplotlib.use('agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.patches as patches 8 | import cv2 9 | 10 | def predict(pnt1, matched_arr, width1, height1, width2, height2): 11 | # pnt1 = [x1, y1] 12 | matched = matched_arr 13 | 14 | matched_1 = matched[:,0:2] 15 | matched_1_dist = np.sum((matched_1-pnt1.reshape(1,-1))**2,axis=1) 16 | nn_ls = np.argsort(matched_1_dist)[0:4] 17 | 18 | matched_nearest = matched[nn_ls.astype(int)] 19 | weights = 1.0/((np.sum((matched_nearest[:,0:2] - pnt1)**2, axis=1))**0.5) 20 | 21 | transfer = np.average(matched_nearest[:,2:4]/np.array([[width2, height2]]) - \ 22 | matched_nearest[:,0:2]/np.array([[width1, height1]]), axis=0, weights = weights) 23 | mscore = np.average(matched_nearest[:,4], weights = weights) 24 | 25 | pnt2 = (pnt1/np.array([width1, height1]) + transfer)*np.array([width2, height2]) 26 | 27 | return pnt2,mscore 28 | 29 | 30 | def find_nearest(pnt1, arr, num=1): 31 | # arr = [[x1,y1],...,[xn,yn]] 32 | dist = (np.sum((pnt1.reshape(1,-1)-arr[:,0:4])**2,axis=1))**0.5 33 | return np.argsort(dist)[0:num] 34 | 35 | 36 | def predict2(pnt1, matched_arr, width1, height1, width2, height2): 37 | # pnt1 = [x1, y1] 38 | vertices = np.array([[0,0,0,0],\ 39 | [width1,0,width2,0],\ 40 | [0,height1,0,height2],\ 41 | [width1,height1,width2,height2]]) 42 | 43 | # matched = np.concatenate((matched_arr, vertices), axis=0) 44 | matched = matched_arr 45 | 46 | nn_ls = [] 47 | 48 | matched_1 = matched[:,0:2] 49 | field1 = np.where(np.logical_and(matched_1[:,0]<=pnt1[0], matched_1[:,1]<=pnt1[1]))[0] 50 | if len(field1)>0: 51 | nn1 = field1[find_nearest(pnt1, matched_1[field1])] 52 | nn_ls = np.concatenate((nn_ls,nn1)) 53 | 54 | field2 = np.where(np.logical_and(matched_1[:,0]>=pnt1[0], matched_1[:,1]<=pnt1[1]))[0] 55 | if len(field2)>0: 56 | nn2 = field2[find_nearest(pnt1, matched_1[field2])] 57 | nn_ls = np.concatenate((nn_ls,nn2)) 58 | 59 | 60 | field3 = np.where(np.logical_and(matched_1[:,0]<=pnt1[0], matched_1[:,1]>=pnt1[1]))[0] 61 | if len(field3)>0: 62 | nn3 = field3[find_nearest(pnt1, matched_1[field3])] 63 | nn_ls = np.concatenate((nn_ls,nn3)) 64 | 65 | field4 = np.where(np.logical_and(matched_1[:,0]>=pnt1[0], matched_1[:,1]>=pnt1[1]))[0] 66 | if len(field4)>0: 67 | nn4 = field4[find_nearest(pnt1, matched_1[field4])] 68 | nn_ls = np.concatenate((nn_ls,nn4)) 69 | 70 | matched_nearest = matched[nn_ls] 71 | transfer = np.mean(matched_nearest[:,2:4] - matched_nearest[:,0:2], axis=0) 72 | mscore = np.mean(matched_nearest[:,4]) 73 | 74 | pnt2 = pnt1 + transfer 75 | 76 | return pnt2,mscore 77 | 78 | 79 | 80 | def draw_img(img1,img2,pnt_img1,pnt_img2,savefile,edge=0): 81 | plt.close() 82 | fig,ax = plt.subplots(1,2,figsize=(20,5)) 83 | ax[0].imshow(img1[:,:,::-1]) 84 | ax[1].imshow(img2[:,:,::-1]) 85 | 86 | for pp in pnt_img1: 87 | circ = patches.Circle(pp,3) 88 | ax[0].add_patch(circ) 89 | if edge > 0: 90 | edge_half = int(edge/2) 91 | rect = patches.Rectangle((pp[0]-edge_half,pp[1]-edge_half),edge,edge,linewidth=1,edgecolor='r',facecolor='none') 92 | ax[0].add_patch(rect) 93 | 94 | for pp in pnt_img2: 95 | circ = patches.Circle(pp,3) 96 | ax[1].add_patch(circ) 97 | if edge > 0: 98 | edge_half = int(edge/2) 99 | rect = patches.Rectangle((pp[0]-edge_half,pp[1]-edge_half),edge,edge,linewidth=1,edgecolor='r',facecolor='none') 100 | ax[1].add_patch(rect) 101 | 102 | plt.savefig(savefile) 103 | 104 | 105 | root_dir = '/mnt/4TB_b/qing/SPMatch/vp_examples_less/' 106 | for vp in [0,45,90,135,180,225,270,315]: 107 | img_dir = os.path.join(root_dir, str(vp)) 108 | # file list 109 | filelist = os.path.join(img_dir, 'file_list.txt') 110 | with open(filelist, 'r') as fh: 111 | contents = fh.readlines() 112 | 113 | img_list = [cc.strip() for cc in contents] 114 | img_idx2 = len(img_list)-1 115 | img_file2 = img_list[img_idx2] 116 | img2 = cv2.imread(img_file2) 117 | rratio = 224.0/np.min(img2.shape[0:2]) 118 | img2 = cv2.resize(img2, (0,0), fx=rratio, fy=rratio) 119 | 120 | info_file = os.path.join(img_dir, 'info.pickle') 121 | image_name_list, instance_info_list, sp_info_list, vp_set_list = pickle.load(open(info_file, 'rb')) 122 | 123 | for img_idx1 in range(img_idx2): 124 | img_file1 = img_list[img_idx1] 125 | 126 | # read in images 127 | img1 = cv2.imread(img_file1) 128 | # rratio = 224.0/np.min(img1.shape[0:2]) 129 | # img1 = cv2.resize(img1, (0,0), fx=rratio, fy=rratio) 130 | 131 | 132 | # load pool4 match result 133 | matched_file = os.path.join(img_dir, 'img{}VSimg{}_matched.pickle'.format(img_idx1, img_idx2)) 134 | with open(matched_file, 'rb') as fh: 135 | matched = pickle.load(fh) 136 | 137 | N = len(matched[0]) + len(matched[1]) 138 | matched_arr = np.zeros((N, 5)) 139 | 140 | for nn in range(N): 141 | if nn < len(matched[0]): 142 | matched_arr[nn] = np.array(matched[0][nn]) 143 | else: 144 | matched_arr[nn] = np.array(matched[1][nn-len(matched[0])]) 145 | 146 | 147 | # print(matched_arr.shape) 148 | 149 | for ispinfo,iname in zip(sp_info_list, image_name_list): 150 | if iname in img_list[img_idx1]: 151 | sp_info = ispinfo 152 | 153 | save_dir = os.path.join(img_dir, 'img{}VSimg{}_transferSP_nearest'.format(img_idx1, img_idx2)) 154 | if not os.path.exists(save_dir): 155 | os.makedirs(save_dir) 156 | 157 | 158 | predict_sp_info = [] 159 | for isp in range(39): 160 | isp_ls = [spp[1] for spp in sp_info if spp[0]==isp] 161 | 162 | if len(isp_ls)==0: 163 | continue 164 | 165 | save_file = os.path.join(save_dir, 'transfer_sp{}.png'.format(isp)) 166 | pnt_ls1 = [] 167 | pnt_ls2 = [] 168 | for sppos in isp_ls: 169 | pnt1 = np.array([min(img1.shape[1],max(0,(sppos[0]+sppos[2])/2)),\ 170 | min(img1.shape[0],max(0,(sppos[1]+sppos[3])/2))]) 171 | # pnt1 = np.array([(sppos[0]+sppos[2])/2, (sppos[1]+sppos[3])/2]) 172 | pnt2,_ = predict(pnt1,matched_arr,img1.shape[1],img1.shape[0],img2.shape[1],img2.shape[0] ) 173 | pnt2 = np.array([min(img2.shape[1],max(0,pnt2[0])),\ 174 | min(img2.shape[0],max(0,pnt2[1]))]) 175 | 176 | pnt_ls1.append(pnt1) 177 | pnt_ls2.append(pnt2) 178 | predict_sp_info.append([isp,pnt2]) 179 | 180 | draw_img(img1,img2,pnt_ls1,pnt_ls2,save_file,100) 181 | 182 | sp_file = os.path.join(img_dir, 'img{}VSimg{}_transSP.pickle'.format(img_idx1, img_idx2)) 183 | pickle.dump(predict_sp_info, open(sp_file, 'wb')) 184 | -------------------------------------------------------------------------------- /solve_nearest_score.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os,sys,glob 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.patches as patches 6 | import cv2 7 | from scipy.spatial.distance import cdist 8 | from scipy.interpolate import interp1d 9 | 10 | 11 | def find_nearest(pnt1, arr, num=1): 12 | dist = (np.sum((pnt1.reshape(1,-1)-arr[:,0:4])**2,axis=1))**0.5 13 | return np.argsort(dist)[0:num] 14 | 15 | 16 | def predict2(pnt1, matched_arr, width1, height1, width2, height2): 17 | vertices = np.array([[0,0,0,0],\ 18 | [width1,0,width2,0],\ 19 | [0,height1,0,height2],\ 20 | [width1,height1,width2,height2]]) 21 | matched = matched_arr 22 | 23 | nn_ls = [] 24 | 25 | matched_1 = matched[:,0:2] 26 | field1 = np.where(np.logical_and(matched_1[:,0]<=pnt1[0], matched_1[:,1]<=pnt1[1]))[0] 27 | if len(field1)>0: 28 | nn1 = field1[find_nearest(pnt1, matched_1[field1], 1)] 29 | nn_ls = np.concatenate((nn_ls,nn1)) 30 | 31 | field2 = np.where(np.logical_and(matched_1[:,0]>=pnt1[0], matched_1[:,1]<=pnt1[1]))[0] 32 | if len(field2)>0: 33 | nn2 = field2[find_nearest(pnt1, matched_1[field2], 1)] 34 | nn_ls = np.concatenate((nn_ls,nn2)) 35 | 36 | 37 | field3 = np.where(np.logical_and(matched_1[:,0]<=pnt1[0], matched_1[:,1]>=pnt1[1]))[0] 38 | if len(field3)>0: 39 | nn3 = field3[find_nearest(pnt1, matched_1[field3], 1)] 40 | nn_ls = np.concatenate((nn_ls,nn3)) 41 | 42 | field4 = np.where(np.logical_and(matched_1[:,0]>=pnt1[0], matched_1[:,1]>=pnt1[1]))[0] 43 | if len(field4)>0: 44 | nn4 = field4[find_nearest(pnt1, matched_1[field4], 1)] 45 | nn_ls = np.concatenate((nn_ls,nn4)) 46 | 47 | 48 | matched_nearest = matched[nn_ls.astype(int)] 49 | weights = 1.0/((np.sum((matched_nearest[:,0:2] - pnt1)**2, axis=1))**0.5) 50 | 51 | transfer = np.average(matched_nearest[:,2:4] - matched_nearest[:,0:2], axis=0, weights = weights) 52 | mscore = np.average(matched_nearest[:,4], weights = weights) 53 | 54 | pnt2 = pnt1 + transfer 55 | 56 | return pnt2,mscore 57 | 58 | def predict(pnt1, matched_arr, width1, height1, width2, height2): 59 | vertices = np.array([[0,0,0,0],\ 60 | [width1,0,width2,0],\ 61 | [0,height1,0,height2],\ 62 | [width1,height1,width2,height2]]) 63 | matched = matched_arr 64 | 65 | nn_ls = [] 66 | 67 | matched_1 = matched[:,0:2] 68 | matched_1_dist = np.sum((matched_1-pnt1.reshape(1,-1))**2,axis=1) 69 | nn_ls = np.argsort(matched_1_dist)[0:4] 70 | 71 | 72 | matched_nearest = matched[nn_ls.astype(int)] 73 | weights = 1.0/((np.sum((matched_nearest[:,0:2] - pnt1)**2, axis=1))**0.5) 74 | 75 | transfer = np.average(matched_nearest[:,2:4] - matched_nearest[:,0:2], axis=0, weights = weights) 76 | mscore = np.average(matched_nearest[:,4], weights = weights) 77 | 78 | pnt2 = pnt1 + transfer 79 | 80 | return pnt2,mscore 81 | 82 | 83 | def predict_inter(pnt1, matched_arr, width1, height1, width2, height2): 84 | # pnt1 = [x1, y1] 85 | vertices = np.array([[0,0,0,0],\ 86 | [width1,0,width2,0],\ 87 | [0,height1,0,height2],\ 88 | [width1,height1,width2,height2]]) 89 | 90 | matched = matched_arr 91 | 92 | nn_ls = [] 93 | 94 | matched_1 = matched[:,0:2] 95 | matched_1_dist = np.sum((matched_1-pnt1.reshape(1,-1))**2,axis=1) 96 | nn_ls = np.argsort(matched_1_dist)[0:10] 97 | 98 | matched_nearest = matched[nn_ls.astype(int)] 99 | sidx = np.argsort(matched_nearest[:,0]) 100 | try: 101 | fx = interp1d(matched_nearest[sidx,0], matched_nearest[sidx,2], kind='linear', fill_value="extrapolate") 102 | except: 103 | print(matched_nearest[sidx,0], matched_nearest[sidx,2]) 104 | sidx = np.argsort(matched_nearest[:,1]) 105 | fy = interp1d(matched_nearest[sidx,1], matched_nearest[sidx,3], kind='linear', fill_value="extrapolate") 106 | pnt2 = np.array([fx(pnt1[0]), fy(pnt1[1])]) 107 | 108 | weights = 1.0/((np.sum((matched_nearest[:,0:2] - pnt1)**2, axis=1))**0.5) 109 | mscore = np.average(matched_nearest[:,4], weights = weights) 110 | 111 | return pnt2,mscore 112 | 113 | 114 | def draw_img(img1,img2,pnt_img1,pnt_img2,savefile,edge=0): 115 | plt.close() 116 | fig,ax = plt.subplots(1,2,figsize=(20,5)) 117 | ax[0].imshow(img1[:,:,::-1]) 118 | ax[1].imshow(img2[:,:,::-1]) 119 | 120 | for pp in pnt_img1: 121 | circ = patches.Circle(pp,3) 122 | ax[0].add_patch(circ) 123 | if edge > 0: 124 | edge_half = int(edge/2) 125 | rect = patches.Rectangle((pp[0]-edge_half,pp[1]-edge_half),edge,edge,linewidth=1,edgecolor='r',facecolor='none') 126 | ax[0].add_patch(rect) 127 | 128 | for pp in pnt_img2: 129 | circ = patches.Circle(pp,3) 130 | ax[1].add_patch(circ) 131 | if edge > 0: 132 | edge_half = int(edge/2) 133 | rect = patches.Rectangle((pp[0]-edge_half,pp[1]-edge_half),edge,edge,linewidth=1,edgecolor='r',facecolor='none') 134 | ax[1].add_patch(rect) 135 | 136 | plt.savefig(savefile) 137 | 138 | 139 | root_dir = '/home/yutong/SPMatch/vp_test_sedan/' 140 | dir_ls = glob.glob(os.path.join(root_dir,'a*e*')) 141 | total_spnum=39 142 | draw_p=True 143 | for img_dir in dir_ls: 144 | if not 'a20e0' in img_dir: 145 | continue 146 | 147 | draw=False 148 | 149 | print(img_dir) 150 | # file list 151 | filelist = os.path.join(img_dir, 'file_list.txt') 152 | with open(filelist, 'r') as fh: 153 | contents = fh.readlines() 154 | 155 | img_list = [cc.strip() for cc in contents] 156 | syn_n = 3 157 | 158 | for img_idx2 in range(len(img_list)-syn_n, len(img_list)): 159 | img_file2 = img_list[img_idx2] 160 | img2 = cv2.imread(img_file2) 161 | rratio = 224.0/np.min(img2.shape[0:2]) 162 | img2 = cv2.resize(img2, (0,0), fx=rratio, fy=rratio) 163 | 164 | img2_name = img_file2.split('/')[-1].split('.')[0] 165 | 166 | info_file = os.path.join(img_dir, 'sp_info_{}.pickle'.format(img2_name)) 167 | sp_info = pickle.load(open(info_file, 'rb')) 168 | 169 | img2_pool4_file = '.'.join(img_file2.split('.')[0:-1])+'_pool4.pickle' 170 | img2_pool4 = pickle.load(open(img2_pool4_file, 'rb')) 171 | sp_features = [] 172 | for isp in range(total_spnum): 173 | sp_features.append([]) 174 | 175 | isp_ls = [spp[1] for spp in sp_info if spp[0]==isp] 176 | if len(isp_ls)==0: 177 | continue 178 | 179 | for sppos in isp_ls: 180 | pnt2 = np.array([min(img2.shape[1],max(0,sppos[0])),\ 181 | min(img2.shape[0],max(0,sppos[1]))]) 182 | pnt2_pool4 = (pnt2//16).astype(int) 183 | sp_features[-1].append(img2_pool4[min(img2_pool4.shape[0]-1,pnt2_pool4[1]), \ 184 | min(img2_pool4.shape[1]-1,pnt2_pool4[0])]) 185 | 186 | 187 | for img_idx1 in range(len(img_list)-syn_n): 188 | img_file1 = img_list[img_idx1] 189 | img_file_name = img_file1.split('/')[-1] 190 | img_file_name = img_file_name.split('.')[0] 191 | 192 | img1_pool4_file = '.'.join(img_file1.split('.')[0:-1])+'_pool4.pickle' 193 | img1_pool4 = pickle.load(open(img1_pool4_file, 'rb')) 194 | 195 | # read in images 196 | img1 = cv2.imread(img_file1) 197 | matched_file = os.path.join(img_dir, 'img{}VSimg{}_matched.pickle'.format(img_idx1, img_idx2)) 198 | with open(matched_file, 'rb') as fh: 199 | matched = pickle.load(fh) 200 | 201 | matched_arr = np.array(matched[1]) 202 | # print(matched_arr.shape) 203 | 204 | matched_arr_rv = np.zeros_like(matched_arr) 205 | matched_arr_rv[:,0:2] = matched_arr[:,2:4] 206 | matched_arr_rv[:,2:4] = matched_arr[:,0:2] 207 | matched_arr_rv[:,4] = matched_arr[:,4] 208 | 209 | 210 | save_dir = os.path.join(img_dir, 'img{}VSimg{}_transferSP_nearest'.format(img_idx1, img_idx2)) 211 | if not os.path.exists(save_dir): 212 | os.makedirs(save_dir) 213 | 214 | 215 | predict_sp_info = [] 216 | for isp in range(total_spnum): 217 | isp_ls = [spp[1] for spp in sp_info if spp[0]==isp] 218 | 219 | if len(isp_ls)==0: 220 | continue 221 | 222 | save_file = os.path.join(save_dir, 'transfer_sp{}.png'.format(isp)) 223 | pnt_ls1 = [] 224 | pnt_ls2 = [] 225 | for spii,sppos in enumerate(isp_ls): 226 | pnt2 = np.array([min(img2.shape[1],max(0,sppos[0])),\ 227 | min(img2.shape[0],max(0,sppos[1]))]) 228 | 229 | # pnt1 = np.array([(sppos[0]+sppos[2])/2, (sppos[1]+sppos[3])/2]) 230 | pnt1,_ = predict(pnt2,matched_arr_rv,img2.shape[1],img2.shape[0],img1.shape[1],img1.shape[0] ) 231 | pnt1 = np.array([min(img1.shape[1],max(0,pnt1[0])),\ 232 | min(img1.shape[0],max(0,pnt1[1]))]) 233 | 234 | pnt1_pool4 = (pnt1//16).astype(int) 235 | pnt1_pool4_f = img1_pool4[min(img1_pool4.shape[0]-1,pnt1_pool4[1]), \ 236 | min(img1_pool4.shape[1]-1,pnt1_pool4[0])] 237 | mscore = 1-cdist(pnt1_pool4_f.reshape(-1,512), np.array(sp_features[isp][spii:spii+1]), 'cosine')[0,0] 238 | 239 | img1_pool4_h, img1_pool4_w = img1_pool4.shape[0:2] 240 | img1_pool4_dist = cdist(img1_pool4.reshape(-1,512), np.array(sp_features[isp][spii:spii+1]), 'cosine') 241 | 242 | img1_pool4_dist = img1_pool4_dist.reshape(img1_pool4_h*img1_pool4_w,) 243 | 244 | to_select = int(img1_pool4_h*img1_pool4_w*0.03) # top 3% close features 245 | img1_pool4_cans = np.argsort(img1_pool4_dist)[0:to_select] 246 | 247 | img1_cans = [] 248 | for p4c in img1_pool4_cans: 249 | prow,pcol = np.unravel_index(p4c, (img1_pool4_h, img1_pool4_w)) 250 | img1_cans.append(np.array([pcol,prow])*16-42+50) 251 | 252 | img1_nearest_idx = np.argmin(cdist(np.array(img1_cans), pnt1.reshape(1,2)).reshape(-1,)) 253 | img1_nearest = img1_cans[img1_nearest_idx] 254 | img1_nearest_score = 1-img1_pool4_dist[img1_pool4_cans[img1_nearest_idx]] 255 | 256 | pnt_ls1.append(pnt1) 257 | pnt_ls2.append(pnt2) 258 | predict_sp_info.append([img_file_name,isp,pnt1,mscore]) 259 | 260 | if draw: 261 | draw_img(img1,img2,pnt_ls1,pnt_ls2,save_file,100) 262 | 263 | sp_file = os.path.join(img_dir, 'img{}VSimg{}_transSP.pickle'.format(img_idx1, img_idx2)) 264 | pickle.dump(predict_sp_info, open(sp_file, 'wb')) -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import scipy.io as sio 4 | import h5py 5 | import numpy as np 6 | from scipy import misc 7 | import pickle 8 | import torch 9 | import math 10 | 11 | def img_rescale(img, delta_scale): 12 | new_shape0 = int(img.shape[0] * delta_scale) 13 | new_shape1 = int(img.shape[1] * delta_scale) 14 | return misc.imresize(img, (new_shape0, new_shape1)) 15 | 16 | 17 | def bbox_rescale(img, bbox, rescale): 18 | # calculate min_edge and calculate scale only based on bbox 19 | delta_scale = bbox_to_delta_scale(bbox, rescale) 20 | return img_rescale(img, delta_scale), bbox * delta_scale, delta_scale 21 | 22 | 23 | def bbox_to_delta_scale(bbox, rescale): 24 | min_edge = min(bbox[3] - bbox[1], bbox[2] - bbox[0]) 25 | assert (min_edge > 0.5) 26 | if rescale > 0: 27 | delta_scale = rescale / float(min_edge) 28 | else: 29 | delta_scale = -rescale 30 | return delta_scale 31 | 32 | 33 | def bbox_crop(img, bbox): 34 | """ 35 | :param img: 36 | :param bbox: 37 | :return: cropped image, pixel delta at [x, y] 38 | """ 39 | bbox = bound_bbox_to_int(img, bbox) 40 | cropped = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :] 41 | return cropped, np.array([bbox[0], bbox[1]]) 42 | 43 | 44 | def bound_bbox_to_int(img, bbox): 45 | """ 46 | :param img: np.array(RGB_img) shape = [y, x, 3] 47 | :param bbox: [x0, y0, x1, y1] 48 | :return: bounded bbox [x0:x1), [y0:y1) => [x0, y0, x1, y1] 49 | """ 50 | if bbox[1] < -0.5: 51 | # print(str(bbox[1]) + ' < -0.5') 52 | bbox[1] = -0.5 53 | if bbox[0] < -0.5: 54 | # print(str(bbox[0]) + ' < -0.5') 55 | bbox[0] = -0.5 56 | if bbox[3] > img.shape[0] - 0.5: 57 | # print(str(bbox[3]) + ' > ' + str(img.shape[0] - 0.5)) 58 | bbox[3] = img.shape[0] - 0.5 59 | if bbox[2] > img.shape[1] - 0.5: 60 | # print(str(bbox[2]) + ' > ' + str(img.shape[1] - 0.5)) 61 | bbox[2] = img.shape[1] - 0.5 62 | if bbox[1] >= bbox[3] or bbox[0] >= bbox[2]: 63 | print('invalid bbox') 64 | return np.round((bbox + 0.5)).astype(np.int32) 65 | 66 | 67 | def bbox_rescale_and_bbox_crop(img, bbox, rescale, crop=True): 68 | """ 69 | :param img: RGB images 70 | :param bbox: rescale and crop based one bbox[x0, y0, x1, y1], set to img size if do not 71 | :param rescale: negative means delta_scale, positive means pixel scale of short edge 72 | :param crop: crop the bbox or not 73 | :return: img, new = old * delta_scale - delta_xy 74 | """ 75 | delta_xy = np.array([0.0, 0.0]) 76 | img, bbox, delta_scale = bbox_rescale(img, bbox, rescale) 77 | if crop: 78 | img, delta_xy = bbox_crop(img, bbox) 79 | return img, delta_scale, delta_xy 80 | 81 | 82 | def area_rescale(img, sqrt_area=300.0): 83 | """ 84 | :param img: RGB images 85 | :param sqrt_area: desired sqrt of area 86 | :return: img, new = (old - delta_xy) * delta_scale 87 | """ 88 | delta_xy = np.array([0.0, 0.0]) 89 | img_sqrt_area = math.sqrt(img.shape[0] * img.shape[1]) 90 | delta_scale = sqrt_area / img_sqrt_area 91 | return img_rescale(img, delta_scale), delta_xy, delta_scale 92 | 93 | 94 | 95 | def bbox_crop_and_bbox_rescale(img, bbox, rescale, crop=True): 96 | """ 97 | :param img: RGB images 98 | :param bbox: rescale and crop based one bbox[x0, y0, x1, y1], set to img size if do not 99 | :param rescale: negative means delta_scale, positive means pixel scale of short edge 100 | :param crop: crop the bbox or not 101 | :return: img, new = (old - delta_xy) * delta_scale 102 | """ 103 | delta_xy = np.array([0.0, 0.0]) 104 | 105 | if crop: 106 | img, delta_xy = bbox_crop(img, bbox) 107 | bbox = np.array([-0.5, -0.5, img.shape[1] - 0.5, img.shape[0] - 0.5]) 108 | else: 109 | bbox = bound_bbox_to_int(img, bbox) 110 | 111 | img, bbox, delta_scale = bbox_rescale(img, bbox, rescale) 112 | return img, delta_xy, delta_scale 113 | 114 | 115 | def im2rgb(img): 116 | if len(img.shape) == 3 and img.shape[2] == 3: 117 | return img 118 | if len(img.shape) == 3 and img.shape[2] == 4: 119 | return img[:, :, :3] 120 | if len(img.shape) == 2: 121 | return np.expand_dims(img, axis=2).repeat(3, axis=2) 122 | if len(img.shape) == 3 and img.shape[2] == 1: 123 | return img.repeat(3, axis=2) 124 | print('im2rgb error') 125 | return img 126 | 127 | 128 | # def np_rgb2img_var(img): 129 | # numpy_array = np.float32(img) # H, W, RGB 130 | # numpy_array -= np.float32([[[123.68, 116.779, 103.939]]]) # H, W, RGB 131 | # numpy_array = numpy_array[:, :, ::-1] # H, W, BGR 132 | # numpy_array = numpy_array.transpose([2, 0, 1]) # BGR, H, W 133 | # numpy_array = np.expand_dims(numpy_array, axis=0) # 1, BGR, H, W 134 | # 135 | # input_tensor = torch.from_numpy(numpy_array.copy()) 136 | # return torch.autograd.Variable(input_tensor) 137 | 138 | def get_coverage_firing_stats(encoding, mask): 139 | # encoding: C,H,W. mask: H,W 140 | firecnt_pixel = np.sum(encoding, axis=0) 141 | fire_pixel = firecnt_pixel!=0 142 | 143 | firing = np.mean(firecnt_pixel, axis=None) 144 | firing_b = np.mean(firecnt_pixel[np.where(mask==1)]) 145 | firing_ob = np.mean(firecnt_pixel[np.where(mask==0)]) 146 | 147 | coverage = np.mean(fire_pixel, axis=None) 148 | coverage_b = np.mean(fire_pixel[np.where(mask==1)]) 149 | coverage_ob = np.mean(fire_pixel[np.where(mask==0)]) 150 | return (firing, firing_b, firing_ob, coverage, coverage_b, coverage_ob) 151 | 152 | def predict_bbox(vc_encoding, short_edge_len = 14, coverage_thres = 0.3): 153 | height = vc_encoding.shape[2] 154 | width = vc_encoding.shape[3] 155 | 156 | mask = np.zeros((height, width)) 157 | center = (height//2, width//2) 158 | mask[center[0]-1:center[0]+2, center[1]-1:center[1]+2] = 1 # start with 3 by 3 159 | rlist, clist = np.where(mask==1) 160 | short_edge = np.min((np.max(rlist)-np.min(rlist), np.max(clist)-np.min(clist)))+1 161 | while True: 162 | rmin = np.min(rlist) 163 | rmax = np.max(rlist) 164 | cmin = np.min(clist) 165 | cmax = np.max(clist) 166 | 167 | mask_ls = [] 168 | fb_ls = [] 169 | cb_ls = [] 170 | if rmin>0: 171 | mask1 = np.copy(mask) 172 | mask1[rmin-1, cmin:cmax+1] = 1 173 | _, fb1, _, _, cb1, _ = get_coverage_firing_stats(vc_encoding[0], mask1) 174 | mask_ls.append(mask1) 175 | fb_ls.append(fb1) 176 | cb_ls.append(cb1) 177 | 178 | if rmax0: 187 | mask3 = np.copy(mask) 188 | mask3[rmin:rmax+1, cmin-1] = 1 189 | _, fb3, _, _, cb3, _ = get_coverage_firing_stats(vc_encoding[0], mask3) 190 | mask_ls.append(mask3) 191 | fb_ls.append(fb3) 192 | cb_ls.append(cb3) 193 | 194 | if cmax short_edge_len or cb_curr < coverage_thres: 212 | break 213 | 214 | for mm in mask_ls: 215 | mask = np.logical_or(mask, mm) 216 | 217 | return mask -------------------------------------------------------------------------------- /vertex_id_picker.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | 3 | import d3, vdb 4 | 5 | import numpy as np 6 | 7 | import imageio 8 | 9 | import matplotlib.pyplot as plt 10 | 11 | import matplotlib.patches as patches 12 | 13 | 14 | 15 | def read_vertex_list(obj_filename): 16 | 17 | with open(obj_filename) as f: 18 | 19 | lines = f.readlines() 20 | 21 | 22 | 23 | lines = [v.split(' ') for v in lines] 24 | 25 | lines = [(float(v[1]),float(v[2]),float(v[3])) for v in lines] 26 | 27 | vertexs = np.array(lines) 28 | 29 | 30 | 31 | return vertexs 32 | 33 | 34 | 35 | def get_vertex_id(pixel_coords, vertexs_3d, cam_pose, depth_filename): 36 | 37 | ''' Get vertex id 38 | 39 | pixel_coords: (x, y) 40 | 41 | obj_filename: *.obj for car or bike 42 | 43 | cam_pose: camera_pose, define the viewpoint 44 | 45 | depth_filename: depth for this view, to filter invisible points 46 | 47 | ''' 48 | 49 | vertexs_2d = cam_pose.project_to_cam_space(vertexs_3d) 50 | 51 | # plt.plot(vertexs_2d[:,0], vertexs_2d[:,1], '*') 52 | 53 | depth = np.load(depth_filename) 54 | 55 | xs = vertexs_2d[:,0].astype('int'); ys = vertexs_2d[:,1].astype('int'); 56 | 57 | zs = vertexs_2d[:,2] 58 | 59 | ds = depth[(ys, xs)] 60 | 61 | # print(zs) 62 | 63 | # print(ds) 64 | 65 | # print(abs(zs - ds)) 66 | 67 | visible = abs(zs - ds) < 10 68 | 69 | invisible = abs(zs - ds) > 10 70 | 71 | # print(visible.sum()) 72 | 73 | # print(invisible.sum()) 74 | 75 | 76 | 77 | # visible_index = np.where(visible)[0] 78 | 79 | # print(visible_index) 80 | 81 | # visible_vertexs = vertexs_2d[visible_index, :] 82 | 83 | # print(visible_vertexs.shape) 84 | 85 | vertexs_2d[invisible, :] = 10e10 86 | 87 | dist = np.linalg.norm(vertexs_2d[:,[0,1]] - np.array(pixel_coords), axis=1) 88 | 89 | vertex_index = np.argmin(dist) 90 | 91 | assert dist[vertex_index] == min(dist) 92 | 93 | # print('Match a match for coord %s, min distance is %.2f, vertex id is %d' % (pixel_coords, min(dist), vertex_index)) 94 | 95 | 96 | return vertex_index, vertexs_2d[vertex_index, :] 97 | 98 | 99 | 100 | def project_vertex_id(vertex_id, vertexs_3d, cam_pose): 101 | 102 | vertexs_2d = cam_pose.project_to_cam_space(vertexs_3d) 103 | 104 | point_2d = vertexs_2d[vertex_id, :] 105 | 106 | return point_2d 107 | 108 | 109 | 110 | def draw_bb(ax, bbox): 111 | 112 | x0, x1, y0, y1 = bbox 113 | 114 | cx = (x0 + x1) / 2; cy = (y0 + y1) / 2 115 | 116 | w = x1 - x0; h = y1 - y0 117 | 118 | rect = patches.Rectangle((x0, y0), w, h, linewidth=1, edgecolor='r', facecolor='none') 119 | 120 | ax.add_patch(rect) 121 | 122 | 123 | 124 | def get_frame_info(db_root, obj_name, cam_name, frame_id): 125 | 126 | obj_filename = os.path.join(db_root, obj_name + '.obj') 127 | 128 | lit_filename = os.path.join(db_root, cam_name, 'lit', '%08d.png' % frame_id) 129 | 130 | depth_filename = os.path.join(db_root, cam_name, 'depth', '%08d.npy' % frame_id) 131 | 132 | cam_filename = os.path.join(db_root, cam_name, 'caminfo', '%08d.json' % frame_id) 133 | 134 | scene_filename = os.path.join(db_root, 'scene', '%08d.json' % frame_id) 135 | 136 | seg_filename = os.path.join(db_root, cam_name, 'seg', '%08d.png' % frame_id) 137 | 138 | with open(scene_filename) as f: 139 | 140 | data = json.load(f) 141 | 142 | 143 | 144 | obj_info = data[obj_name] 145 | 146 | obj_location = np.array([ 147 | 148 | obj_info['Location']['X'], 149 | 150 | obj_info['Location']['Y'], 151 | 152 | obj_info['Location']['Z'] 153 | 154 | ]) 155 | 156 | 157 | 158 | with open(cam_filename) as f: 159 | 160 | data = json.load(f) 161 | 162 | 163 | 164 | loc = data['Location']; rot = data['Rotation'] 165 | 166 | assert data['Fov'] == 90 167 | 168 | cam_pose = d3.CameraPose(loc['X'], loc['Y'], loc['Z'], 169 | 170 | rot['Pitch'], rot['Yaw'], rot['Roll'], 171 | 172 | data['FilmWidth'], data['FilmHeight'], data['FilmWidth'] / 2) 173 | 174 | 175 | 176 | vertexs_3d = read_vertex_list(obj_filename) 177 | 178 | vertexs_3d = vertexs_3d + obj_location 179 | 180 | 181 | 182 | seg_im = imageio.imread(seg_filename) 183 | 184 | obj_mask = vdb.get_obj_mask(seg_im, obj_info['AnnotationColor']) 185 | 186 | bbox = vdb.seg2bb(obj_mask) 187 | 188 | 189 | 190 | return lit_filename, cam_pose, depth_filename, vertexs_3d, bbox 191 | 192 | 193 | 194 | def main(): 195 | 196 | db_root = '/mnt/c/data/temp/DenseMatching/20181026_1324' 197 | 198 | obj_name = 'Sedan2Door_Vehicle_Sedan2Door_LOD0_8' 199 | 200 | cam_name = 'sedan2door' 201 | 202 | src_frame_id = 0 203 | 204 | tgt_frame_id = 1 205 | 206 | 207 | 208 | src_lit_filename, src_cam_pose, src_depth_filename, src_vertexs_3d, src_bbox = get_frame_info(db_root, obj_name, cam_name, src_frame_id) 209 | 210 | tgt_lit_filename, tgt_cam_pose, d, tgt_vertexs_3d, tgt_bbox = get_frame_info(db_root, obj_name, cam_name, tgt_frame_id) 211 | 212 | 213 | 214 | ax1 = plt.subplot(121); 215 | 216 | ax2 = plt.subplot(122); 217 | 218 | ax1.imshow(plt.imread(src_lit_filename)) 219 | 220 | ax2.imshow(plt.imread(tgt_lit_filename)) 221 | 222 | for input_vertex in [(10,10), (320, 190), (353, 105), (345, 220), (410, 210)]: 223 | 224 | vertex_id, matched_2d = get_vertex_id(input_vertex, src_vertexs_3d, src_cam_pose, src_depth_filename) 225 | 226 | predicted_2d = project_vertex_id(vertex_id, tgt_vertexs_3d, tgt_cam_pose) 227 | 228 | 229 | 230 | ax1.plot(input_vertex[0], input_vertex[1], 'ro') 231 | 232 | draw_bb(ax1, src_bbox) 233 | 234 | ax1.plot(matched_2d[0], matched_2d[1], 'b*') 235 | 236 | ax2.plot(predicted_2d[0], predicted_2d[1], 'b*') 237 | 238 | draw_bb(ax2, tgt_bbox) 239 | 240 | plt.show() 241 | 242 | 243 | 244 | if __name__ == '__main__': 245 | 246 | main() 247 | 248 | --------------------------------------------------------------------------------