├── .gitignore
├── README.md
├── datasets
    └── linemod
    │   └── dataset_posecnn.py
├── eval_net.py
├── lib
    ├── HoughVoting
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── houghvoting.cc
    │   ├── houghvoting.py
    │   └── setup.py
    ├── center_est_funcs.py
    ├── loss_funcions.py
    ├── roi_pool_pytorch.py
    ├── vgg16_convs.py
    ├── vgg16_convs_combine_mask.py
    └── vgg16_convs_combine_seg_center.py
├── testpytorch.py
└── train_net.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | pretrained_model/vgg16-397923af.pth
 2 | log/test
 3 | log/train
 4 | trained_model/pretrained-vgg-05-1-weight
 5 | trained_model/pretrained-posecnn-test
 6 | trained_model/pretrained-posecnn-linemod
 7 | *.pyc
 8 | *.zip
 9 | trained_model/pretrained-center-linemod
10 | trained_model/checkpoint.pth.tar
11 | log/*
12 | trained_model/*
13 | pretrained_model/*
14 | *.ipynb
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EECS442_CourseProject
2 | * EECS 442: Computer Vision 2019 Winter Course Project Workspace
3 | * Instructor: David Fouhey
4 | * University of Michigan 
5 | 
6 | 


--------------------------------------------------------------------------------
/datasets/linemod/dataset_posecnn.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import errno
  6 | import torch
  7 | import json
  8 | import codecs
  9 | import numpy as np
 10 | import sys
 11 | import torchvision.transforms as transforms
 12 | import argparse
 13 | import json
 14 | import time
 15 | import random
 16 | import numpy.ma as ma
 17 | import copy
 18 | import scipy.misc
 19 | import scipy.io as scio
 20 | import yaml
 21 | import lib.center_est_funcs as center_img_gt 
 22 | import matplotlib.pyplot as plt
 23 | 
 24 | class PoseDataset(data.Dataset):
 25 |     def __init__(self, mode, num, add_noise, root, noise_trans, refine, onehot, 
 26 |                  seg = False, vertex_reg = False, vertex_reg_hough = False):
 27 | #        self.objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
 28 | #        self.objlist = [1, 2]
 29 |         self.objlist = [2]
 30 |         
 31 |         self.mode = mode
 32 |         
 33 |         self.seg_list = []
 34 |         self.list_segmentation = [] # only the 2th class have the ground truth segmentation 
 35 |         self.list_rgb = []  # save the path of the rgbd image 
 36 |         self.list_depth = []  # save the path of the depth image 
 37 |         self.list_label = []  # save the path of the label image 
 38 |         self.list_obj = []  # save the list of objlist(the folder name)  
 39 |         self.list_rank = [] # save the index of data in the folder
 40 |         self.meta = {}  # meta_file have the ground truth information
 41 |         self.pt = {}  
 42 |         self.root = root
 43 |         self.noise_trans = noise_trans
 44 |         self.refine = refine
 45 |         self.onehot = onehot
 46 |         self.seg_mode = seg
 47 |         self.vertex_reg_mode = vertex_reg
 48 |         self.vertex_reg_hough_mode = vertex_reg_hough
 49 |         
 50 |         item_count = 0
 51 |         for item in self.objlist:
 52 |             if self.seg_mode and item!=2:
 53 |                 continue
 54 |             if self.mode != 'train':
 55 |                 input_file = open('{0}/data/{1}/train.txt'.format(self.root, '%02d' % item))
 56 |             else:
 57 |                 input_file = open('{0}/data/{1}/test.txt'.format(self.root, '%02d' % item))
 58 |             while 1:
 59 |                 item_count += 1
 60 |                 input_line = input_file.readline()
 61 |                 if self.mode == 'test' and item_count % 10 != 0:
 62 |                     continue
 63 |                 if not input_line:
 64 |                     break
 65 |                 if input_line[-1:] == '\n':
 66 |                     input_line = input_line[:-1]
 67 |                     
 68 |                 self.list_rgb.append('{0}/data/{1}/rgb/{2}.png'.format(self.root, '%02d' % item, input_line))
 69 |                 self.list_depth.append('{0}/data/{1}/depth/{2}.png'.format(self.root, '%02d' % item, input_line))
 70 |                 
 71 |                 if self.mode == 'eval':
 72 |                     self.list_label.append('{0}/segnet_results/{1}_label/{2}_label.png'.format(self.root, '%02d' % item, input_line))
 73 |                 elif self.seg_mode:
 74 |                     self.list_label.append('{0}/data/{1}/mask_all/{2}.png'.format(self.root, '%02d' % item, input_line))
 75 |                 else:
 76 |                     self.list_label.append('{0}/data/{1}/mask/{2}.png'.format(self.root, '%02d' % item, input_line))
 77 |                 
 78 |                 self.list_obj.append(item)
 79 |                 self.list_rank.append(int(input_line))
 80 |             
 81 |             
 82 |             meta_file = open('{0}/data/{1}/gt.yml'.format(self.root, '%02d' % item), 'r')
 83 |             self.meta[item] = yaml.load(meta_file, Loader=yaml.FullLoader)
 84 | #            self.pt[item] = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % item))
 85 |             print("Object {0} buffer loaded".format(item))
 86 | 
 87 |         self.length = len(self.list_rgb)
 88 | 
 89 |         self.cam_cx = 325.26110
 90 |         self.cam_cy = 242.04899
 91 |         self.cam_fx = 572.41140
 92 |         self.cam_fy = 573.57043
 93 |         self.intrin_matrix = np.array([[self.cam_fx, 0 , self.cam_cx, 0], 
 94 |                                        [0, self.cam_fy, self.cam_cy, 0], 
 95 |                                        [0, 0, 1, 0]])
 96 | 
 97 |         self.xmap = np.array([[j for i in range(640)] for j in range(480)])
 98 |         self.ymap = np.array([[i for i in range(640)] for j in range(480)])
 99 |         
100 |         self.num = num  # this if the number of points
101 |         self.add_noise = add_noise
102 |         self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
103 |         self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
104 |         self.border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
105 |         self.num_pt_mesh_large = 500
106 |         self.num_pt_mesh_small = 500
107 |         self.symmetry_obj_idx = [7, 8]
108 |         # This is the pixel value for each class, it has the same order with data in the folder
109 |         # pixel value == 21 => the object is 01 in the folder
110 | #        self.seg_list = [0, 21, 43, 64, 85,106,128, 149, 170, 191, 213, 234, 255]
111 |         self.seg_list = [0, 21, 43, 106,128, 170, 191, 213, 234, 255]
112 |         self.weight_clsss = np.array([0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1])
113 |         self.cls_indexes = [i for i in range(len(self.seg_list))]
114 |         self.num_classes = len(self.seg_list)
115 |         self.seg_label_to_gt_label = [0, 1, 2, 5, 6, 8, 9, 10, 11, 12]
116 |         self.gt_label_to_seg_label = [0, 1, 2, -1, -1, 3, 4, -1, 5, 6, 7, 8, 9]
117 |         self.extents = self.get_extents()
118 | 
119 |     def get_extents(self):
120 |         extents = np.zeros((self.num_classes, 3))
121 |         for i in range(1,len(self.gt_label_to_seg_label)):
122 |             if self.gt_label_to_seg_label[i]>0:
123 |                 pt = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % i))
124 |                 model_points = pt / 1000.0
125 |                 points_arr = np.array(model_points)
126 |                 xyz_min = np.min(points_arr, axis = 0)
127 |                 xyz_max = np.max(points_arr, axis = 0)
128 |                 extents[self.gt_label_to_seg_label[i], :] = xyz_max - xyz_min
129 |         return -np.sort(-extents, axis = 1)
130 |     
131 |     
132 |     
133 |     
134 |     def __getitem__(self, index):
135 |         img = Image.open(self.list_rgb[index])
136 |         ori_img = np.array(img)
137 |         depth = np.array(Image.open(self.list_depth[index]))
138 |         label_in = np.array(Image.open(self.list_label[index]))
139 |         obj = self.list_obj[index] # what the label for this index 
140 |         rank = self.list_rank[index]      
141 |         img = np.array(img)[:, :, :3]
142 |         img = np.transpose(img, (2, 0, 1))
143 |  
144 |        # Since the second object have more information
145 |         if obj == 2:
146 |             # if the object is the second  object
147 |             if not self.vertex_reg_mode:
148 |             # if we only need the  information of the second object 
149 |                 for i in range(0, len(self.meta[obj][rank])):
150 |                     if self.meta[obj][rank][i]['obj_id'] == 2:
151 |                         meta = self.meta[obj][rank][i]
152 |                         break
153 |             else:
154 |             # if we need all the information for all objects
155 |                 meta = self.meta[obj][rank]
156 |         else:
157 |             meta = self.meta[obj][rank][0]
158 |             
159 |         label = None
160 | #        unique, counts = np.unique(label_in, return_counts=True)
161 | #        print(unique, counts)
162 |         if self.seg_mode:
163 |             label = np.zeros((len(self.seg_list), label_in.shape[0], label_in.shape[1]))
164 |             for j in range(len(self.seg_list)):
165 |                 label[j, :] = label_in == self.seg_list[j]
166 |             if not self.onehot:
167 |                 label = np.argmax(label, axis = 0)
168 |                 
169 | #        plt.imshow(label, cmap = 'hot', interpolation = 'nearest')
170 | #        plt.show()
171 | #        plt.pause(100)
172 |         
173 |         # without vertex_reg mod, only return the data for segmentation
174 |         if not self.vertex_reg_mode:
175 |             return torch.from_numpy(img.astype(np.float32)), \
176 |                torch.from_numpy(label.astype(int))
177 |         
178 |         # with vertex_reg mode 
179 |         # Relate with meta
180 |         bboxs = np.zeros((self.num_classes, 5))  # all the bounding box in the same image for different class
181 |         extrin_matrixs = np.zeros((self.num_classes, 4, 4))
182 |         extrin_matrixs[:,3,3] = 1
183 |         centers = np.zeros((self.num_classes, 2))
184 |         depth_centers = np.zeros((self.num_classes, 1))
185 |         for sub_meta in meta:
186 |             seg_index = self.gt_label_to_seg_label[int(sub_meta['obj_id'])]
187 |             # preprocess the bounding box information 
188 |             bboxs[seg_index, 0] = 1
189 |             rmin, rmax, cmin, cmax = get_bbox(sub_meta['obj_bb'])
190 |             bboxs[seg_index, 1:] = [cmin, rmin, cmax, rmax]
191 |     
192 |             # preprocess the pose information 
193 |             extrin_matrixs[seg_index, 0:3, 0:3] = np.resize(np.array(sub_meta['cam_R_m2c']), (3, 3))
194 |             extrin_matrixs[seg_index, 0:3, 3] = np.array(sub_meta['cam_t_m2c'])
195 |             obj_center = np.ones((4,1))
196 |             obj_center[0:3, 0] = extrin_matrixs[seg_index, 0:3, 3]
197 |             center_homo = self.intrin_matrix.dot(obj_center)
198 |             centers[seg_index, :] = center_homo[0:2].reshape(-1)/center_homo[2]
199 |             depth_centers[seg_index, :] = extrin_matrixs[seg_index, 2, 3]
200 |         if  self.onehot: 
201 |             label_single_channel = np.argmax(label, axis = 0)
202 |         else:
203 |             label_single_channel = label
204 |         vertex_targets, vertex_weights = center_img_gt._vote_centers_train(label_single_channel, self.cls_indexes, 
205 |                                                                            centers, depth_centers, self.num_classes)
206 |             
207 |         # with vertex reg and hough voting 
208 |         # load the point cloud and set the size of the point clout to be num_pt_mesh_small
209 |         
210 |         # meta data include camera intrinsic matrix 
211 |         meta = np.zeros((48,))
212 |         meta[0] = self.cam_fx
213 |         meta[4] = self.cam_fy
214 |         meta[2] = self.cam_cx
215 |         meta[5] = self.cam_cy
216 |         
217 |         # gt give to hough voting information to calculate weight 
218 |         gt_hough = np.zeros((10,1))
219 |         
220 |         
221 |         
222 |         return (torch.from_numpy(img.astype(np.float32)), 
223 |                     torch.from_numpy(label.astype(int)), 
224 |                     torch.from_numpy(vertex_targets.astype(np.float32)), 
225 |                     torch.from_numpy(vertex_weights.astype(np.float32)),
226 |                     torch.from_numpy(self.extents.astype(np.float32)),
227 |                     torch.from_numpy(meta.astype(np.float32)),
228 |                     torch.from_numpy(gt_hough.astype(np.float32)),
229 |                     torch.from_numpy(extrin_matrixs.astype(np.float32)),
230 |                     torch.from_numpy(bboxs.astype(np.float32)))
231 |         
232 |         """
233 |         return torch.from_numpy(cloud.astype(np.float32)), \
234 |                torch.LongTensor(choose.astype(np.int32)), \
235 |                self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
236 |                torch.from_numpy(target.astype(np.float32)), \
237 |                torch.from_numpy(model_points.astype(np.float32)), \
238 |                torch.LongTensor([self.objlist.index(obj)])
239 |         """
240 |         
241 |     def __len__(self):
242 |         return self.length
243 | 
244 |     def get_sym_list(self):
245 |         return self.symmetry_obj_idx
246 | 
247 |     def get_num_points_mesh(self):
248 |         if self.refine:
249 |             return self.num_pt_mesh_large
250 |         else:
251 |             return self.num_pt_mesh_small
252 | 
253 | 
254 |     
255 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
256 | img_width = 480
257 | img_length = 640
258 | 
259 | def get_bbox(bbox):
260 |     bbx = [bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2]]
261 |     if bbx[0] < 0:
262 |         bbx[0] = 0
263 |     if bbx[1] >= 480:
264 |         bbx[1] = 479
265 |     if bbx[2] < 0:
266 |         bbx[2] = 0
267 |     if bbx[3] >= 640:
268 |         bbx[3] = 639                
269 |     rmin, rmax, cmin, cmax = bbx[0], bbx[1], bbx[2], bbx[3]
270 |     r_b = rmax - rmin
271 |     for tt in range(len(border_list)):
272 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
273 |             r_b = border_list[tt + 1]
274 |             break
275 |     c_b = cmax - cmin
276 |     for tt in range(len(border_list)):
277 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
278 |             c_b = border_list[tt + 1]
279 |             break
280 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
281 |     rmin = center[0] - int(r_b / 2)
282 |     rmax = center[0] + int(r_b / 2)
283 |     cmin = center[1] - int(c_b / 2)
284 |     cmax = center[1] + int(c_b / 2)
285 |     if rmin < 0:
286 |         delt = -rmin
287 |         rmin = 0
288 |         rmax += delt
289 |     if cmin < 0:
290 |         delt = -cmin
291 |         cmin = 0
292 |         cmax += delt
293 |     if rmax > 480:
294 |         delt = rmax - 480
295 |         rmax = 480
296 |         rmin -= delt
297 |     if cmax > 640:
298 |         delt = cmax - 640
299 |         cmax = 640
300 |         cmin -= delt
301 |     return rmin, rmax, cmin, cmax
302 | 
303 | 
304 | def ply_vtx(path):
305 |     f = open(path)
306 |     assert f.readline().strip() == "ply"
307 |     f.readline()
308 |     f.readline()
309 |     N = int(f.readline().split()[-1])
310 |     while f.readline().strip() != "end_header":
311 |         continue
312 |     pts = []
313 |     for _ in range(N):
314 |         pts.append(np.float32(f.readline().split()[:3]))
315 |     return np.array(pts)
316 | 


--------------------------------------------------------------------------------
/eval_net.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Apr 15 11:04:43 2019
 4 | 
 5 | @author: sunhu
 6 | """
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.optim as optim
11 | import torch.nn.functional as F
12 | from sklearn.metrics import average_precision_score as ap_score
13 | from tqdm import tqdm
14 | 
15 | 
16 | 
17 | def cal_AP(testloader, net, criterion, device, num_obj, opt):
18 |     '''
19 |     Calculate Average Precision
20 |     Evaluation for the semantic segmentation part 
21 |     '''
22 |     cnt = 0
23 |     aps = []
24 |     with torch.no_grad():
25 |         net = net.eval()
26 |         preds = [[] for _ in range(num_obj)]
27 |         heatmaps = [[] for _ in range(num_obj)]
28 |         for data in tqdm(testloader):
29 |             if opt.vertex_reg == True:
30 |                  # Only train the center-voting part
31 |                  images, labels, vertex_targets, vertex_weights, extents = data
32 |                  images = images.to(device)
33 |                  labels = labels.type('torch.LongTensor').to(device)
34 |                  extents = extents.to(device)
35 |                  output_seg, _, _ = net(images, extents)
36 |             else:
37 |                  # Only train the segmentation part
38 |                  images, labels = data
39 |                  images = images.to(device)
40 |                  labels = labels.type('torch.LongTensor').to(device)
41 |                  output_seg = net(images)
42 |             output = output_seg.cpu().numpy()
43 |             for c in range(num_obj):
44 |                 preds[c].append(output[:, c].reshape(-1))
45 |                 heatmaps[c].append(labels[:, c].cpu().numpy().reshape(-1))
46 |         
47 |         for c in range(num_obj):
48 |             preds[c] = np.concatenate(preds[c])
49 |             heatmaps[c] = np.concatenate(heatmaps[c])
50 |             if heatmaps[c].max() == 0:
51 |                 ap = float('nan')
52 |             else:
53 |                 ap = ap_score(heatmaps[c], preds[c])
54 |                 aps.append(ap)
55 |             print("AP = {}".format(ap))
56 | 
57 |     # print(losses / cnt)
58 |     return aps


--------------------------------------------------------------------------------
/lib/HoughVoting/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nxu96/PoseCNN_PyTorch/92a9a005f3e4f61540cee11650b4288c615b9beb/lib/HoughVoting/.gitignore


--------------------------------------------------------------------------------
/lib/HoughVoting/__init__.py:
--------------------------------------------------------------------------------
1 | #  --------------------------------------------------------
2 | # Pose CNN Pytorch Implementation: Hough Voting Layer
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # --------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/lib/HoughVoting/houghvoting.cc:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <cfloat>
  3 | #include <math.h>
  4 | #include <time.h>
  5 | 
  6 | #include <algorithm>
  7 | #include <vector>
  8 | #include <eigen3/Eigen/Geometry>
  9 | #include <eigen3/Eigen/Dense>
 10 | #include "opencv2/opencv.hpp"
 11 | 
 12 | #include <iostream>
 13 | // #include "opencv2/core/matx.hpp"
 14 | #include "opencv2/core/core.hpp"
 15 | // #include <torch/script.h>
 16 | #include <torch/extension.h>
 17 | #include "torch/script.h"
 18 | #include <torch/script.h>
 19 | #include <torch/torch.h>
 20 | 
 21 | #define VERTEX_CHANNELS 3
 22 | using namespace torch;
 23 | // typedef Eigen::Matrix<float,10,1,Eigen::DontAlign> Vec;
 24 | using namespace at;
 25 | 
 26 | int clamp(int val, int min_val, int max_val)
 27 | {
 28 |   return std::max(min_val, std::min(max_val, val));
 29 | }
 30 | 
 31 | // // Hough Voting layer main function
 32 | // std::vector<cv::Vec<float, 14>> Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, //const Tensor& meta_data, const Tensor& gt, int is_train);
 33 | //std::vector<std::vector<float>> Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, //const Tensor& meta_data, const Tensor& gt, int is_train);
 34 | torch::Tensor Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, const Tensor& meta_data, const Tensor& gt, int is_train);
 35 | 
 36 |     
 37 | // Get ground truth model 3D geometry
 38 | void getBb3Ds(const Tensor& extents, std::vector<std::vector<cv::Point3f>>& bb3Ds, int num_classes);
 39 | 
 40 | // // Get ground truth model 3D bounding box
 41 | inline std::vector<cv::Point3f> getBB3D(const cv::Vec<float, 3>& extent);
 42 | 
 43 | // // Projected 2D Bounding box
 44 | // inline cv::Rect getBB2D(int imageWidth, int imageHeight, const std::vector<cv::Point3f>& bb3D, const cv::Mat& camMat, const cv::Mat& rvec, const cv::Mat& tvec);
 45 | 
 46 | inline float angle_distance(cv::Point2f x, cv::Point2f n, cv::Point2f p);
 47 | 
 48 | void projectPoints(std::vector<cv::Point3f> bb3Ds, float& bb_distance, Eigen::MatrixXf camMat, std::vector<cv::Point2f>& bb2D);
 49 | // Hough voting functionality
 50 | void hough_voting(const Tensor& v_label, const Tensor& v_vertex, const int labelmap, const int vertmap, std::vector<std::vector<cv::Point3f>> bb3Ds, int batch, int height, int width, int num_classes, int is_train,float fx, float fy, float px, float py, std::vector<cv::Vec<float, 14> >& outputs);
 51 | 
 52 | // // Find better bb2D geometry
 53 | // inline void compute_width_height(const Tensor& label, const Tensor& vertex, const int labelmap, const int vertmap, cv::Point2f center, std::vector<std::vector<cv::Point3f>> bb3Ds, cv::Mat camMat, float inlierThreshold, int height, int width, int channel, int num_classes, int & bb_width, int & bb_height, float & bb_distance);
 54 | inline void compute_width_height(const Tensor& v_label, const Tensor& v_vertex, const int labelmap, const float vertmap, cv::Point2f center, std::vector<std::vector<cv::Point3f>> bb3Ds, Eigen::Matrix3f camMat,float inlierThreshold, int height, int width, int channel, int num_classes, int & bb_width, int & bb_height, float & bb_distance);
 55 | 
 56 | // ///////////////////////////////
 57 | 
 58 | // std::vector<cv::Vec<float, 14>> Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, //const Tensor& meta_data, const Tensor& gt, int is_train) 
 59 | //std::vector<std::vector<float>> Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, //const Tensor& meta_data, const Tensor& gt, int is_train) 
 60 | torch::Tensor Forward(const Tensor& label, const Tensor& vertex, const Tensor& extents, const Tensor& meta_data, const Tensor& gt, int is_train) 
 61 | 
 62 | {
 63 |     // Grab the input tensor
 64 |     ///////////////////////////////
 65 |     // format of the meta_data
 66 |     // intrinsic matrix: meta_data[0 ~ 8]
 67 |     // inverse intrinsic matrix: meta_data[9 ~ 17]
 68 |     // pose_world2live: meta_data[18 ~ 29]
 69 |     // pose_live2world: meta_data[30 ~ 41]
 70 |     // voxel step size: meta_data[42, 43, 44]
 71 |     // voxel min value: meta_data[45, 46, 47]
 72 |     auto v_meta_data = meta_data.view(-1);
 73 | 
 74 |     // const float* v_gt = gt.flat<float>().data();
 75 |     // const float* v_gt = &gt.view(-1);
 76 |     auto v_gt = gt.view(-1);
 77 |     // batch size
 78 |     int batch_size = label.size(0);
 79 |     // height
 80 |     int height = label.size(1);
 81 |     // width
 82 |     int width = label.size(2);
 83 |     
 84 |     auto v_label = label.contiguous().view(-1);
 85 |     auto v_vertex = vertex.contiguous().view(-1);
 86 |     // num of classes
 87 |     int num_classes = vertex.size(3) / VERTEX_CHANNELS;
 88 |     int num_meta_data = meta_data.size(1);
 89 |     // int num_gt = gt.size(0);
 90 |     
 91 |     std::vector<cv::Vec<float, 14> > outputs;
 92 |     auto v_extents = extents.view(-1);
 93 |     std::vector<std::vector<cv::Point3f>> bb3Ds;
 94 |     
 95 |     getBb3Ds(v_extents, bb3Ds, num_classes);
 96 | 
 97 |     int index_meta_data = 0;
 98 |     float fx, fy, px, py;
 99 |     auto acc_v_meta_data = v_meta_data.accessor<float,1>();
100 |     
101 |     for (int n = 0; n < batch_size; n++)
102 |     {
103 |       // these map are the starting index
104 |       const int labelmap = n * height * width;
105 |       const int vertmap = n * height * width * VERTEX_CHANNELS * num_classes;
106 |       // find camera parameters
107 |       fx = acc_v_meta_data[index_meta_data + 0];
108 |       fy = acc_v_meta_data[index_meta_data + 4];
109 |       px = acc_v_meta_data[index_meta_data + 2];
110 |       py = acc_v_meta_data[index_meta_data + 5];
111 |       
112 |       hough_voting(v_label, v_vertex, labelmap, vertmap, bb3Ds, n, height, width, num_classes, is_train, fx, fy, px, py, outputs);
113 |         
114 |       index_meta_data += num_meta_data;
115 |     }
116 |     if (outputs.size() == 0)
117 |     {
118 |       std::cout << "no detection" << std::endl;
119 |       // add a dummy detection to the output
120 |       cv::Vec<float, 14> roi;
121 |       roi(0) = 0;
122 |       roi(1) = -1;
123 |       outputs.push_back(roi);
124 |     }
125 |     // to change the datatype from vector<cv::Vec> to tensor
126 |     int n_output = outputs.size();
127 |     int size_single_roi = outputs[0].rows;
128 | //     std::cout<<"Size_single_roi: "<<size_single_roi<<std::endl;
129 | //     int size_single_roi = 15;
130 |     
131 | //     auto output_tensor = //torch::CUDA(torch::kFloat32).tensorFromBlob(outputs.data{n_output,size_single_roi});
132 | 
133 | //     std::cout<<"Type of the output: "<<typeid(output_tensor).name()<<std::endl;
134 | //     Tensor output_tensor = at::zeros({n, size_single_roi}, Kfloat32) 
135 | //     for (int i =0; i< n_output;i++){
136 | //     }
137 |     
138 | //     std::vector<std::vector<float>> result(n_output, std::vector<float>(size_single_roi));
139 | //     for (int i = 0; i<n_output; i++){
140 |         
141 | //         test[i].resize(size_single_roi);
142 | //     }
143 | //     return test;
144 | //     torch::tensor output_tensor = torch::zeros({n_output, size_single_roi}, torch::kFloat32);
145 |     at::Tensor output_tensor = torch::zeros({n_output, size_single_roi}, torch::kFloat32);
146 |     for (int i = 0; i<n_output; i++){
147 |         for (int j =0; j< size_single_roi; j++){
148 |              output_tensor[i][j] = outputs[i][j];
149 |         }
150 |     }
151 |     return output_tensor;
152 | //     return outputs;
153 | //     return output_tensor;
154 | }
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | void hough_voting(const Tensor& v_label, const Tensor& v_vertex, const int labelmap, const int vertmap, std::vector<std::vector<cv::Point3f>> bb3Ds, int batch, int height, int width, int num_classes, int is_train, float fx, float fy, float px, float py, std::vector<cv::Vec<float, 14> >& outputs){
162 |   
163 |   float inlierThreshold = 0.9;
164 |   int votingThreshold = 50;
165 | 
166 |   // camera intrinsic matrix 3 X 3
167 | //   cv::Mat camMat=cv::Mat::zeros(3,3,CV_32F); 
168 | //   int sz[] = {3,3};
169 | //   cv::Mat camMat;
170 | //   camMat.create(2,sz,CV_32FC1);
171 | //   camMat = Scalar(0);
172 | //   std::vector<std::vector<float> > camMat(3, std::vector<float>(0));
173 | //   camMat.at([0][0]) = fx;
174 |     
175 |     
176 | //   cv::Mat_<float> camMat = cv::Mat_<float>::zeros(3, 3);
177 | //   camMat(0, 0) = fx;
178 | //   camMat(1, 1) = fy;
179 | //   camMat(2, 2) = 1.f;
180 | //   camMat(0, 2) = px;
181 | //   camMat(1, 2) = py;
182 |   
183 |     
184 |    // Xu Ning
185 |   Eigen::Matrix3f camMat;
186 |   // camMat << fx, 0.0, px.
187 |   //           0.0, fy, py,
188 |   //           0.0, 0.0, 1.0;
189 |   camMat(0,0) = fx;
190 |   camMat(1,1) = fy;
191 |   camMat(2,2) = 1.f;
192 |   camMat(0,2) = px;
193 |   camMat(1,2) = py; 
194 |   camMat(0,1) = 0;
195 |   camMat(1,0) = 0;
196 |   camMat(2,0) = 0;
197 |   camMat(2,1) = 0;
198 |     
199 |     
200 |   // initialize hough space
201 |   // H X W X N integer
202 |   int* hough_space = (int*)malloc(sizeof(int) * height * width * num_classes);
203 |   // Initialize all values to 0
204 |   memset(hough_space, 0, height * width * num_classes);
205 |   // N integer
206 |   int* flags = (int*)malloc(sizeof(int) * num_classes);
207 |   // Initialize all values in memory space to 0
208 |   memset(flags, 0, num_classes);
209 |   auto acc_label = v_label.accessor<int,1>();
210 |   auto acc_vertex = v_vertex.accessor<float,1>();
211 |   // for each pixel
212 |   for (int x = 0; x < width; x++)
213 |   {
214 |     for (int y = 0; y < height; y++)
215 |     {
216 |       // here need to understand the value of label map
217 |       int c = acc_label[labelmap+y * width + x]; // label map is one dimension array contains pixel wise image label map, map to class 1-13 etc..  
218 |       if (c > 0) // this pixel is in this object class
219 |       {
220 |         flags[c] = 1; // this is a flag of whether there is this object in this image. 
221 |         // read the predict center direction
222 |         int offset = VERTEX_CHANNELS * c + VERTEX_CHANNELS * num_classes * (y * width + x); // Don't understand this 
223 |         float u = acc_vertex[vertmap+offset];
224 |         float v = acc_vertex[vertmap+offset + 1];
225 |         float norm = sqrt(u * u + v * v); // u and v here are the delta_x and delta_y
226 |         u /= norm;
227 |         v /= norm;// (u,v) is the unit vector indicates center direction
228 | 
229 |         // voting
230 | 	    float delta = 1.0 / fabs(u);
231 |         float cx = x;
232 |         float cy = y;
233 |         while(1)
234 |         {
235 |           cx += delta * u;
236 |           cy += delta * v;
237 |           int center_x = int(cx);
238 |           int center_y = int(cy);
239 |           if (center_x >= 0 && center_x < width && center_y >= 0 && center_y < height)
240 |           {
241 |             offset = c + num_classes * (center_y * width + center_x);
242 |             hough_space[offset] += 1;
243 |           }
244 |           else
245 |             break;
246 |         }
247 |       }
248 |     }
249 |   }
250 |   // find the maximum in hough space
251 |   for (int c = 1; c < num_classes; c++)
252 |   {
253 |     if (flags[c])
254 |     {
255 |       int max_vote = 0;
256 |       int max_x, max_y;
257 |       for (int x = 0; x < width; x++)
258 |       {
259 |         for (int y = 0; y < height; y++)
260 |         {
261 |           int offset = c + num_classes * (y * width + x);
262 |           if (hough_space[offset] > max_vote)
263 |           {
264 |             max_vote = hough_space[offset];
265 |             max_x = x;
266 |             max_y = y;
267 |           }
268 |         }
269 |       }
270 |       if (max_vote < votingThreshold)
271 |         continue;
272 | 
273 |       // center
274 |       cv::Point2f center(max_x, max_y);
275 |       int bb_width, bb_height;
276 |       float bb_distance;
277 |       
278 |       compute_width_height(v_label, v_vertex, labelmap, vertmap, center, bb3Ds, camMat, inlierThreshold, height, width, c, num_classes, bb_width, bb_height, bb_distance);
279 |       
280 |       // construct output
281 |       cv::Vec<float, 14> roi;
282 |       roi(0) = batch; //batch number index 0 to batchsize -1
283 |       roi(1) = c; //cls number index 1 to 13
284 | 
285 |       // bounding box
286 |       float scale = 0.05;
287 |       roi(2) = center.x - bb_width * (0.5 + scale);
288 |       roi(3) = center.y - bb_height * (0.5 + scale);
289 |       roi(4) = center.x + bb_width * (0.5 + scale);
290 |       roi(5) = center.y + bb_height * (0.5 + scale);
291 |       // score
292 |       roi(6) = max_vote;
293 | 
294 |       // pose
295 |       float rx = (center.x - px) / fx;
296 |       float ry = (center.y - py) / fy;
297 |       roi(7) = 1;
298 |       roi(8) = 0;
299 |       roi(9) = 0;
300 |       roi(10) = 0;
301 |       roi(11) = rx * bb_distance;
302 |       roi(12) = ry * bb_distance;
303 |       roi(13) = bb_distance;
304 | 
305 |       outputs.push_back(roi);
306 | //     /////////////
307 | //     // TODO 
308 | //       if (is_train)
309 | //       {
310 | //         // add jittering rois
311 | //         float x1 = roi(2);
312 | //         float y1 = roi(3);
313 | //         float x2 = roi(4);
314 | //         float y2 = roi(5);
315 | //         float ww = x2 - x1;
316 | //         float hh = y2 - y1;
317 | 
318 | //         // (-1, -1)
319 | //         roi(2) = x1 - 0.05 * ww;
320 | //         roi(3) = y1 - 0.05 * hh;
321 | //         roi(4) = roi(2) + ww;
322 | //         roi(5) = roi(3) + hh;
323 | //         outputs.push_back(roi);
324 | 
325 | //         // (+1, -1)
326 | //         roi(2) = x1 + 0.05 * ww;
327 | //         roi(3) = y1 - 0.05 * hh;
328 | //         roi(4) = roi(2) + ww;
329 | //         roi(5) = roi(3) + hh;
330 | //         outputs.push_back(roi);
331 | 
332 | //         // (-1, +1)
333 | //         roi(2) = x1 - 0.05 * ww;
334 | //         roi(3) = y1 + 0.05 * hh;
335 | //         roi(4) = roi(2) + ww;
336 | //         roi(5) = roi(3) + hh;
337 | //         outputs.push_back(roi);
338 | 
339 | //         // (+1, +1)
340 | //         roi(2) = x1 + 0.05 * ww;
341 | //         roi(3) = y1 + 0.05 * hh;
342 | //         roi(4) = roi(2) + ww;
343 | //         roi(5) = roi(3) + hh;
344 | //         outputs.push_back(roi);
345 | 
346 | //         // (0, -1)
347 | //         roi(2) = x1;
348 | //         roi(3) = y1 - 0.05 * hh;
349 | //         roi(4) = roi(2) + ww;
350 | //         roi(5) = roi(3) + hh;
351 | //         outputs.push_back(roi);
352 | 
353 | //         // (-1, 0)
354 | //         roi(2) = x1 - 0.05 * ww;
355 | //         roi(3) = y1;
356 | //         roi(4) = roi(2) + ww;
357 | //         roi(5) = roi(3) + hh;
358 | //         outputs.push_back(roi);
359 | 
360 | //         // (0, +1)
361 | //         roi(2) = x1;
362 | //         roi(3) = y1 + 0.05 * hh;
363 | //         roi(4) = roi(2) + ww;
364 | //         roi(5) = roi(3) + hh;
365 | //         outputs.push_back(roi);
366 | 
367 | //         // (+1, 0)
368 | //         roi(2) = x1 + 0.05 * ww;
369 | //         roi(3) = y1;
370 | //         roi(4) = roi(2) + ww;
371 | //         roi(5) = roi(3) + hh;
372 | //         outputs.push_back(roi);
373 | //       }
374 |     }
375 |   }
376 | }
377 | 
378 | 
379 | // get 3D bounding boxes
380 | void getBb3Ds(const Tensor& extents, std::vector<std::vector<cv::Point3f>>& bb3Ds, int num_classes)
381 | {
382 |   // for each object
383 |   auto acc_extents = extents.packed_accessor<float,1>();
384 |   for (int i = 1; i < num_classes; i++)
385 |   {
386 |     cv::Vec<float, 3> extent;
387 | 
388 |     extent(0) = acc_extents[i * 3];
389 |     extent(1) = acc_extents[i * 3 + 1];
390 |     extent(2) = acc_extents[i * 3 + 2];
391 |     bb3Ds.push_back(getBB3D(extent));
392 |   }
393 | }
394 | 
395 | 
396 | inline std::vector<cv::Point3f> getBB3D(const cv::Vec<float, 3>& extent)
397 | {
398 |   std::vector<cv::Point3f> bb;  
399 |   float xHalf = extent[0] * 0.5;
400 |   float yHalf = extent[1] * 0.5;
401 |   float zHalf = extent[2] * 0.5;
402 | 
403 |   bb.push_back(cv::Point3f(xHalf, yHalf, zHalf));
404 |   bb.push_back(cv::Point3f(-xHalf, yHalf, zHalf));
405 |   bb.push_back(cv::Point3f(xHalf, -yHalf, zHalf));
406 |   bb.push_back(cv::Point3f(-xHalf, -yHalf, zHalf));
407 |   
408 |   bb.push_back(cv::Point3f(xHalf, yHalf, -zHalf));
409 |   bb.push_back(cv::Point3f(-xHalf, yHalf, -zHalf));
410 |   bb.push_back(cv::Point3f(xHalf, -yHalf, -zHalf));
411 |   bb.push_back(cv::Point3f(-xHalf, -yHalf, -zHalf));
412 | 
413 |   return bb;
414 | }
415 | 
416 | 
417 | // inline cv::Rect getBB2D(int imageWidth, int imageHeight, const std::vector<cv::Point3f>& bb3D, const cv::Mat& camMat, const cv::Mat& rvec, const cv::Mat& tvec)
418 | // {    
419 | //   // project 3D bounding box vertices into the image
420 | //   std::vector<cv::Point2f> bb2D;
421 | //   cv::projectPoints(bb3D, rvec, tvec, camMat, cv::Mat(), bb2D);
422 |     
423 | //   // get min-max of projected vertices
424 | //   int minX = imageWidth - 1;
425 | //   int maxX = 0;
426 | //   int minY = imageHeight - 1;
427 | //   int maxY = 0;
428 |     
429 | //   for(unsigned j = 0; j < bb2D.size(); j++)
430 | //   {
431 | //     minX = std::min((float) minX, bb2D[j].x);
432 | //     minY = std::min((float) minY, bb2D[j].y);
433 | //     maxX = std::max((float) maxX, bb2D[j].x);
434 | //     maxY = std::max((float) maxY, bb2D[j].y);
435 | //   }
436 |     
437 | //   // clamp at image border
438 | //   minX = clamp(minX, 0, imageWidth - 1);
439 | //   maxX = clamp(maxX, 0, imageWidth - 1);
440 | //   minY = clamp(minY, 0, imageHeight - 1);
441 | //   maxY = clamp(maxY, 0, imageHeight - 1);
442 |     
443 | //   return cv::Rect(minX, minY, (maxX - minX + 1), (maxY - minY + 1));
444 | // }
445 | 
446 | inline void compute_width_height(const Tensor& v_label, const Tensor& v_vertex, const int labelmap, const float vertmap, cv::Point2f center, std::vector<std::vector<cv::Point3f>> bb3Ds, Eigen::Matrix3f camMat, float inlierThreshold, int height, int width, int channel, int num_classes, int & bb_width, int & bb_height, float & bb_distance)
447 | {
448 |   float d = 0;
449 |   int count = 0;
450 | 
451 |   // for each pixel
452 |   std::vector<float> dx;
453 |   std::vector<float> dy;
454 |   auto acc_label = v_label.accessor<int,1>();
455 |   auto acc_vertex = v_vertex.accessor<float,1>();
456 |   for (int x = 0; x < width; x++)
457 |   {
458 |     for (int y = 0; y < height; y++)
459 |     {
460 |       if (acc_label[labelmap+y * width + x] == channel)
461 |       {
462 |         cv::Point2f point(x, y);
463 |         int offset = VERTEX_CHANNELS * channel + VERTEX_CHANNELS * num_classes * (y * width + x);
464 |         float u = acc_vertex[vertmap+offset];
465 |         float v = acc_vertex[vertmap+offset + 1];
466 |         float distance = exp(acc_vertex[vertmap+offset + 2]/1000.0);
467 |         float norm = sqrt(u * u + v * v);
468 |         u /= norm;
469 |         v /= norm;
470 |         cv::Point2f direction(u, v);
471 | 
472 |         // inlier check
473 |         if(angle_distance(center, direction, point) > inlierThreshold)
474 |         {
475 |           dx.push_back(fabs(point.x - center.x));
476 |           dy.push_back(fabs(point.y - center.y));
477 |           d += distance;
478 |           count++;
479 |         }
480 |       }
481 |     }
482 |   }
483 |   bb_distance = d / count;
484 |   // estimate a projection
485 | //   cv::Mat tvec(3, 1, CV_64F);
486 | //   cv::Mat rvec(3, 1, CV_64F);
487 | //   for(int i = 0; i < 3; i++)
488 | //   {
489 | //     tvec.at<double>(i, 0) = 0.0;
490 | //     rvec.at<double>(i, 0) = 0.0;
491 | //   }  
492 | //   tvec.at<double>(2, 0) = bb_distance;
493 | // //   jp::cv_trans_t pose(rvec, tvec);
494 | 
495 | //   std::vector<cv::Point2f> bb2D;
496 | // //   cv::projectPoints(bb3Ds[objID-1], pose.first, pose.second, camMat, cv::Mat(), bb2D);
497 | //   cv::projectPoints(bb3Ds[channel-1], rvec, tvec,  camMat, cv::Mat(), bb2D);
498 |     
499 |   //Xu Ning
500 |   Eigen::MatrixXf tvec(3,1);
501 |   Eigen::MatrixXf rvec(3,1);
502 |   for(int i = 0; i < 3; i++)
503 |   {
504 |     tvec(i,0) = 0.0;
505 |     rvec(i,0) = 0.0;
506 |   }    
507 |   tvec(2, 0) = bb_distance;
508 |   std::vector<cv::Point2f> bb2D;
509 |   projectPoints(bb3Ds[channel-1], bb_distance, camMat, bb2D);
510 |  
511 |     
512 |     // get min-max of projected vertices
513 |   int minX = 1e8;
514 |   int maxX = -1e8;
515 |   int minY = 1e8;
516 |   int maxY = -1e8;
517 |   for(unsigned int i = 0; i < bb2D.size(); i++)
518 |   {
519 |     minX = std::min((float) minX, bb2D[i].x);
520 |     minY = std::min((float) minY, bb2D[i].y);
521 |     maxX = std::max((float) maxX, bb2D[i].x);
522 |     maxY = std::max((float) maxY, bb2D[i].y);
523 |   }
524 |   cv::Rect bb = cv::Rect(0, 0, (maxX - minX + 1), (maxY - minY + 1));
525 |   std::vector<float>::iterator it;
526 |   it = std::remove_if(dx.begin(), dx.end(), std::bind2nd(std::greater<float>(), std::max(bb.width, bb.height) ));
527 |   dx.erase(it, dx.end()); 
528 | 
529 |   it = std::remove_if(dy.begin(), dy.end(), std::bind2nd(std::greater<float>(), std::max(bb.width, bb.height) ));
530 |   dy.erase(it, dy.end()); 
531 |   std::sort(dx.begin(), dx.end());
532 |   std::sort(dy.begin(), dy.end());
533 |   int index1 = int(dx.size() * 0.95);
534 |   int index2 = int(dy.size() * 0.95);
535 |   if (dx.size() == 0 || dy.size() == 0){
536 |      bb_width = 2;
537 |      bb_height = 2;
538 |   }else{
539 |      bb_width = 2 * dx[index1];
540 |      bb_height = 2 * dy[index2];
541 |   }
542 | }
543 | 
544 | void projectPoints(std::vector<cv::Point3f> bb3Ds, float& bb_distance, Eigen::MatrixXf camMat, std::vector<cv::Point2f>& bb2D){
545 |     Eigen::MatrixXf extrinsic = Eigen::MatrixXf::Zero(3,4);
546 |     Eigen::MatrixXf intrinsic(3,3);
547 |     // extrinsic << 1,0,0,0,0,1,0,0,0,0,1,bb_distance;
548 |     extrinsic(0,0) = 1;
549 |     extrinsic(1,1) = 1;
550 |     extrinsic(2,2) = 1;
551 |     extrinsic(2,3) = bb_distance;
552 |     intrinsic = camMat;
553 |     // Eigen::Matrix2d mat;  
554 |     // mat << 1, 2,  
555 |             // 3, 4;  
556 |     // Eigen::Vector2d u(-1,1), v(2,0);  
557 |     // std::cout << "Here is mat*mat:\n" << mat*u << std::endl;  
558 | 
559 |     for (auto bb3D:bb3Ds){
560 |         cv::Point2f pt(0,0);
561 |         Eigen::Vector4f vecpt(bb3D.x, bb3D.y, bb3D.z, 1);
562 |         // extrinsic*vecpt;
563 |         Eigen::Vector3f pt_vec = intrinsic * extrinsic * vecpt;
564 |         pt.x = pt_vec[0]/pt_vec[2];
565 |         pt.y = pt_vec[1]/pt_vec[2];
566 |         bb2D.push_back(pt);
567 |     }
568 | }
569 | 
570 | inline float angle_distance(cv::Point2f x, cv::Point2f n, cv::Point2f p)
571 | {
572 |     return n.dot(x - p) / (cv::norm(n) * cv::norm(x - p));
573 | }
574 | 
575 | PYBIND11_MODULE(HoughVoting, m) {
576 |   m.def("forward", &Forward, "HoughVoting forward");
577 |   // m.def("backward", &Backward, "HoughVoting backward");
578 | }
579 | 
580 | 
581 | 


--------------------------------------------------------------------------------
/lib/HoughVoting/houghvoting.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import HoughVoting
 5 | 
 6 | # class HoughVoting(nn.Module):
 7 | #     # initialization
 8 | #     def __init__(self):
 9 | #         self.vertex_channels = 3
10 |         
11 | #     def forward(self, label, vertex, extents, meta_data, gt, is_train):
12 |         ## label : (batch size, height, weight) 
13 |         ## vertex: b, h, w, 3 * num_cls
14 | 
15 |                                                 
16 |         # # flatten 
17 |         # v_meta_data = meta_data.view(-1)
18 |         # v_gt = gt.view(-1)
19 |         # v_extents = extents.view(-1)
20 |         # # batch size
21 |         # batch_size = label.shape[0]
22 |         # # height
23 |         # height = label.shape[1]
24 |         # # width
25 |         # width = label.shape[2]
26 |         # # num of cls
27 |         # num_cls = vertex.shape[3] / self.vertex_channels
28 |         # # num of meta data
29 |         # num_meta_data = meta_data.shape[3]
30 |         # # num of gt
31 |         # num_gt = gt.shape[0]
32 |         
33 |         # getBb3Ds(v_extents, num_cls)
34 |         # index_meta_data = 0
35 |         # # for each image run hough voting 
36 |         # for n in range(batch_size):
37 |         #     idx_label = 
38 |         #     idx_vertex = 
39 |         #     fx = v_meta_data(index_meta_data+0)
40 |         #     fy = v_meta_data(index_meta_data+4)
41 |         #     px = v_meta_data(index_meta_data+2)
42 |         #     py = v_meta_data(index_meta_data+5)
43 |         #     outputs = voting(idx_label, idx_vertex, label, vertex, bb3Ds, n, height, weight, num_cls, is_train, fx, fy, px ,py)
44 |         #     index_meta_data = index_meta_data + 1
45 |         # if (outputs.size() == 0):
46 |         #     print("No detection")
47 |         #     # add a dummy detection to the output? 
48 |         #     roi = torch.empty((14,1))
49 |         #     roi[0] = 0
50 |         #     roi[1] = -1
51 |         #     # add back to outputs
52 |         #     outputs[]
53 | # class HoughVotingFunction(torch.autograd.Function):
54 | #     @staticmethod
55 | #     def forward(ctx, input, weights, bias, old_h, old_cell):
56 | #         outputs = HoughVoting.forward(label, vertex, extents, meta_data, gt, is_train =True)
57 | #         # new_h, new_cell = outputs[:2]
58 | #         # variables = outputs[1:] + [weights]
59 | #         # ctx.save_for_backward(*variables)
60 | #         return outputs
61 | 
62 | #     @staticmethod
63 | #     def backward(ctx, grad_h, grad_cell):
64 | #         outputs = lltm_cpp.backward(
65 | #             grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables)
66 | #         d_old_h, d_input, d_weights, d_bias, d_old_cell = outputs
67 | #         return d_input, d_weights, d_bias, d_old_h, d_old_cell
68 | 
69 | 
70 | class HF(torch.nn.Module):
71 |     def __init__(self):
72 |         super(HF, self).__init__()
73 |         # self.input_features = input_features
74 |         # self.state_size = state_size
75 |         # self.weights = torch.nn.Parameter(
76 |         #     torch.empty(3 * state_size, input_features + state_size))
77 |         # self.bias = torch.nn.Parameter(torch.empty(3 * state_size))
78 |         # self.reset_parameters()
79 |         self.vertex_channels = 3
80 | 
81 |     # def reset_parameters(self):
82 |     #     stdv = 1.0 / math.sqrt(self.state_size)
83 |     #     for weight in self.parameters():
84 |     #         weight.data.uniform_(-stdv, +stdv)
85 | 
86 |     def forward(self, label, vertex, extents, meta_data, gt, is_train):
87 |         outputs = HoughVoting.forward(label, vertex, extents, meta_data, gt, is_train)
88 |         return outputs
89 | 
90 |     def backward(self, label, vertex):
91 |         label_grad = torch.zeros(label.size())
92 |         vertex_grad = torch.zeros(vertex.size())
93 |         return label_grad, vertex_grad
94 |         


--------------------------------------------------------------------------------
/lib/HoughVoting/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | # from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 3 | from torch.utils.cpp_extension import CppExtension, BuildExtension
 4 | 
 5 | 
 6 | setup(name='HoughVoting',
 7 |       ext_modules=[CppExtension('HoughVoting', ['houghvoting.cc'],
 8 |       library_dirs = ['/home/parallels/conda/lib/'])],
 9 |       cmdclass={'build_ext': BuildExtension})
10 | 
11 | 


--------------------------------------------------------------------------------
/lib/center_est_funcs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Apr 21 15:14:11 2019
 4 | 
 5 | @author: Junzhe Xu
 6 | """
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | 
12 | 
13 | # FOR CENTER ESTIMATION
14 | 
15 | # Center-voting for validation
16 | def _vote_centers_val(im_label, cls_indexes, centers, poses, num_classes, extents):
17 |     width = im_label.shape[1]
18 |     height = im_label.shape[0]
19 |     vertex_targets = np.zeros((height, width, 3), dtype=np.float32)
20 |     center = np.zeros((2, 1), dtype=np.float32)
21 |     
22 |     for i in range(1, num_classes):
23 |         y, x = np.where(im_label == i)
24 |         I = np.where(im_label == i)
25 |         ind = np.where(cls_indexes == i)[0]
26 |         
27 |         if len(x) > 0 and len(ind) > 0:
28 |             center[0] = centers[ind, 0]
29 |             center[1] = centers[ind, 1]
30 |             z = poses[2, 3, ind]
31 |             R = np.tile(center, (1, len(x))) - np.vstack((x, y))
32 |             # compute the norm
33 |             N = np.linalg.norm(R, axis=0) + 1e-10
34 |             # normalization
35 |             R = np.divide(R, np.tile(N, (2,1)))
36 |             # assignment
37 |             vertex_targets[y, x, 0] = R[0,:]
38 |             vertex_targets[y, x, 1] = R[1,:]
39 |             vertex_targets[y, x, 2] = z
40 | 
41 |     return vertex_targets
42 | 
43 | 
44 | 
45 | 
46 | # Center voting for training
47 | def _vote_centers_train(im_label, cls_indexes, center, depth_centers, num_classes):
48 |     height = im_label.shape[0]
49 |     width = im_label.shape[1]
50 |     vertex_targets = np.zeros((3*num_classes, height, width), dtype=np.float32)
51 |     vertex_weights = np.zeros(vertex_targets.shape, dtype=np.float32)
52 |     c = np.zeros((2, 1), dtype=np.float32)
53 |     
54 |     for i in range(1, num_classes):
55 |         y, x = np.where(im_label == i)
56 |         if len(x) > 0:
57 |             c[0] = center[i, 0]
58 |             c[1] = center[i, 1]
59 |             R = np.tile(c, (1, len(x))) - np.vstack((x, y))
60 |             # compute the norm
61 |             N = np.linalg.norm(R, axis=0) + 1e-10
62 |             # normalization
63 |             R = np.divide(R, np.tile(N, (2,1)))
64 |             # assignment
65 |             start = 3 * i
66 |             end = start + 3
67 |             vertex_targets[3*i, y, x] = R[0,:]
68 |             vertex_targets[3*i+1, y, x] = R[1,:]
69 |             vertex_targets[3*i+2, y, x] = depth_centers[i, 0]
70 |             vertex_weights[start:end, y, x] = 10.0
71 | 
72 |     return vertex_targets, vertex_weights
73 | 
74 | 
75 | 
76 | def smooth_l1_loss_vertex(vertex_pred, vertex_targets, vertex_weights, sigma=1.0, VERTEX_W=5.0):
77 | 
78 |     sigma_2 = sigma ** 2
79 |     vertex_diff = vertex_pred - vertex_targets
80 |     diff = vertex_weights * vertex_diff
81 |     abs_diff = torch.abs(diff)
82 |     smoothL1_sign = torch.tensor((abs_diff < 1. / sigma_2).float(), requires_grad=False)
83 |     in_loss = torch.pow(diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
84 |             + (abs_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
85 |     loss = torch.div(torch.sum(in_loss), torch.sum(vertex_weights) + 1e-10 )
86 |     loss = VERTEX_W * torch.tensor(loss, requires_grad=True)
87 | 
88 |     return loss
89 | 


--------------------------------------------------------------------------------
/lib/loss_funcions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | #######################################################################
 7 | ########## Loss functions adapted from the tensorflow version #########
 8 | #######################################################################
 9 | 
10 | def loss_cross_entropy_single_frame(scores, labels):
11 |     """
12 |     scores: a tensor [batch_size, height, width, num_classes]
13 |     labels: a tensor [batch_size, height, width, num_classes]
14 |     """
15 | 
16 |     with tf.name_scope('loss'):
17 |         cross_entropy = -tf.reduce_sum(labels * scores, reduction_indices=[3])
18 |         loss = tf.div(tf.reduce_sum(cross_entropy), tf.reduce_sum(labels)+1e-10)
19 | 
20 |     return loss
21 | 
22 | 
23 | def torch_loss_cross_entropy_single_frame(scores, labels):
24 | 
25 |     """
26 |     scores: a tensor [batch_size, height, width, num_classes]
27 |     labels: a tensor [batch_size, height, width, num_classes]
28 |     """
29 | 
30 |     cross_entropy = -torch.sum(labels * scores, dim=3)
31 |     loss = torch.div(torch.sum(cross_entropy), torch.sum(labels)+1e-10)
32 |     loss = torch.tensor(loss, requires_grad=True)
33 | 
34 |     return loss
35 | 
36 | ###########################################################################
37 | 
38 | def smooth_l1_loss_vertex(vertex_pred, vertex_targets, vertex_weights, sigma=1.0):
39 | 
40 |     sigma_2 = sigma ** 2
41 |     vertex_diff = vertex_pred - vertex_targets
42 |     diff = tf.multiply(vertex_weights, vertex_diff)
43 |     abs_diff = tf.abs(diff)
44 |     smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_diff, 1. / sigma_2)))
45 |     in_loss = tf.pow(diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
46 |             + (abs_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
47 |     loss = tf.div( tf.reduce_sum(in_loss), tf.reduce_sum(vertex_weights) + 1e-10 )
48 | 
49 |     return loss
50 | 
51 | def torch_smooth_l1_loss_vertex(vertex_pred, vertex_targets, vertex_weights, sigma=1.0, VERTEX_W=5.0):
52 | 
53 |     sigma_2 = sigma ** 2
54 |     vertex_diff = vertex_pred - vertex_targets
55 |     diff = vertex_weights * vertex_diff
56 |     abs_diff = torch.abs(diff)
57 |     smoothL1_sign = torch.tensor((abs_diff < 1. / sigma_2).float(), requires_grad=False)
58 |     in_loss = torch.pow(diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
59 |             + (abs_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
60 |     loss = torch.div(torch.sum(in_loss), torch.sum(vertex_weights) + 1e-10 )
61 |     loss = VERTEX_W * torch.tensor(loss, requires_grad=True)
62 | 
63 |     return loss
64 | 
65 | ###########################################################################
66 | """
67 | logits should be domain_score, labels should be label_domain
68 | """
69 | 
70 | def torch_loss_domain(logits, labels, ADAPT_WEIGHT=0.1):
71 | 
72 |     loss = ADAPT_WEIGHT * torch.mean(F.nll_loss(F.softmax(logits), labels))
73 |     loss = torch.tensor(loss, requires_grad=True)
74 | 
75 |     return loss
76 | 
77 | ###########################################################################
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/lib/roi_pool_pytorch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         """
16 |         features shape is (batch_size, num_channels, img_height, img_width)
17 |         rois shape is (num_rois, 5), where the first index is batch index, the last 4 indexes are the coordinate
18 |         of the upper left corner and the lower right corner
19 |         spatial scale should be like 1/16, 1/8, etc.
20 |         """
21 | 
22 |         batch_size, num_channels, data_height, data_width = features.size()
23 |         num_rois = rois.size()[0]
24 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
25 | 
26 |         for roi_idx, roi in enumerate(rois):
27 |             batch_idx = int(roi[0])
28 |             if batch_idx > batch_size - 1:
29 |                 raise ValueError("Batch index out of range!")
30 |             upleft_x, upleft_y, downright_x, downright_y = np.round(roi[1:].cpu().numpy() * self.spatial_scale).astype(int)
31 |             roi_width = max(downright_x - upleft_x + 1, 1)
32 |             roi_height = max(downright_y - upleft_y + 1, 1)
33 |             bin_size_w = float(roi_width) / float(self.pooled_width)
34 |             bin_size_h = float(roi_height) / float(self.pooled_height)
35 | 
36 |             for ph in range(self.pooled_height):
37 |                 hstart = int(np.floor(ph * bin_size_h))
38 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
39 |                 hstart = min(data_height, max(0, hstart + upleft_y))
40 |                 hend = min(data_height, max(0, hend + upleft_y))
41 | 
42 |                 for pw in range(self.pooled_width):
43 |                     wstart = int(np.floor(pw * bin_size_w))
44 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
45 |                     wstart = min(data_width, max(0, wstart + upleft_x))
46 |                     wend = min(data_width, max(0, wend + upleft_x))
47 |                     is_error = (hend <= hstart) or (wend <= wstart)
48 | 
49 |                     if is_error:
50 |                         outputs[roi_idx, :, ph, pw] = 0
51 | 
52 |                     else:
53 |                         data = features[batch_idx]
54 |                         outputs[roi_idx, :, ph, pw] = torch.max(torch.max(data[:, hstart:hend, wstart:wend], dim=1)[0], dim=2)[0].view(-1)
55 | 
56 |         return outputs
57 | 
58 | 


--------------------------------------------------------------------------------
/lib/vgg16_convs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 12 13:41:56 2019
  4 | 
  5 | @author: Junzhe Xu
  6 | """
  7 | 
  8 | import torch
  9 | import torchvision
 10 | import torchvision.transforms as transforms
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | import time
 15 | #from networks.network import Network
 16 | from lib.HoughVoting.houghvoting import *
 17 | 
 18 | 
 19 | class vgg16_convs(nn.Module):
 20 |     def __init__(self, input_format, num_classes, num_units, scales, threshold_label, vote_threshold, 
 21 |                  vertex_reg_2d=False, vertex_reg_3d=False, vertex_reg_hough_in = False, pose_reg=False, 
 22 |                  adaptation=False, trainable=True, is_train=True):
 23 |         super(vgg16_convs, self).__init__()
 24 |         
 25 |         self.inputs = []
 26 |         self.input_format = input_format
 27 |         self.num_classes = num_classes
 28 |         self.num_units = num_units
 29 |         self.scale = 1.0
 30 |         self.threshold_label = threshold_label
 31 |         self.vertex_reg_2d = vertex_reg_2d
 32 |         self.vertex_reg_3d = vertex_reg_3d
 33 |         self.vertex_reg = vertex_reg_2d or vertex_reg_3d
 34 |         self.vertex_reg_hough = vertex_reg_hough_in
 35 |         self.pose_reg = pose_reg
 36 |         self.adaptation = adaptation
 37 |         self.trainable = trainable
 38 |         
 39 |         # if vote_threshold < 0, only detect single instance (default). 
 40 |         # Otherwise, multiple instances are detected if hough voting score larger than the threshold
 41 | 
 42 |         if is_train:
 43 |             self.is_train = 1
 44 |             self.skip_pixels = 10
 45 |             self.vote_threshold = vote_threshold
 46 |             self.vote_percentage = 0.02
 47 |         else:
 48 |             self.is_train = 0
 49 |             self.skip_pixels = 10
 50 |             self.vote_threshold = vote_threshold
 51 |             self.vote_percentage = 0.02
 52 |             
 53 | 
 54 |         
 55 |         # VGG-16 for feature extraction
 56 |         self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 57 |         self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 58 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/2 of the origin image
 59 | 
 60 |         self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 61 |         self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 62 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/4 of the origin image
 63 | 
 64 |         self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 65 |         self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 66 |         self.conv3_3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 67 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/8 of the origin image
 68 | 
 69 |         self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 70 |         self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 71 |         self.conv4_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 72 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/16 of the origin image
 73 | 
 74 |         self.conv5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 75 |         self.conv5_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 76 |         self.conv5_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 77 | 
 78 |         # If input format is RGBD we use another network
 79 |         """
 80 |         self.conv1_1_p = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 81 |         self.conv1_2_p = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 82 |         self.poo1_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 83 | 
 84 |         self.conv2_1_p = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 85 |         self.conv2_2_p = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 86 |         self.pool2_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 87 | 
 88 |         self.conv3_1_p = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 89 |         self.conv3_2_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 90 |         self.conv3_3_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 91 |         self.pool3_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 92 | 
 93 |         self.conv4_1_p = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 94 |         self.conv4_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 95 |         self.conv4_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 96 |         self.pool4_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 97 | 
 98 |         self.conv5_1_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 99 |         self.conv5_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
100 |         self.conv5_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
101 |         """
102 |         
103 |         
104 |         # For combination layer
105 |         # For semantic segmentation
106 |         self.conv6_seman_a = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
107 |         self.conv6_seman_b = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
108 |         self.dconv6_seman_a = nn.ConvTranspose2d(in_channels=64, out_channels=64,
109 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
110 |         self.dropout = nn.Dropout2d()
111 |         self.dconv7_seman = nn.ConvTranspose2d(in_channels=64, out_channels=64,
112 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
113 |         self.conv8_seman = nn.Conv2d(in_channels=64, out_channels=self.num_classes, kernel_size=3, stride=1, padding=1)
114 |         
115 |         
116 |         # For Center estimation
117 |         self.conv6_center_a = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
118 |         self.conv6_center_b = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
119 |         self.dconv6_center_a = nn.ConvTranspose2d(in_channels=128, out_channels=128,
120 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
121 |         self.dconv7_center = nn.ConvTranspose2d(in_channels=128, out_channels=128,
122 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
123 |         self.conv8_center = nn.Conv2d(in_channels=128, out_channels=3 * self.num_classes, kernel_size=3, stride=1, padding=1)
124 |         
125 |         
126 |         self.relu = nn.ReLU()
127 |         
128 |         self.hough_voting = HF()
129 |         
130 |         
131 |     def conv_fun(self, x):
132 |         x = self.relu(self.conv1_1(x))
133 |         x = self.relu(self.conv1_2(x))
134 |         x = self.pool1(x) # 1/2 of the original image 
135 |         
136 |         x = self.relu(self.conv2_1(x))
137 |         x = self.relu(self.conv2_2(x))
138 |         x = self.pool2(x)  # 1/4 of the original image 
139 |         
140 |         x = self.relu(self.conv3_1(x))
141 |         x = self.relu(self.conv3_2(x))
142 |         x = self.relu(self.conv3_3(x))
143 |         x = self.pool3(x)  # 1/8 of the original image 
144 |         
145 |         x = self.relu(self.conv4_1(x))
146 |         x = self.relu(self.conv4_2(x))
147 |         f_conv4 = self.relu(self.conv4_3(x))
148 |         x = self.pool3(f_conv4)  # 1/16 of the original image 
149 |         
150 |         x = self.relu(self.conv5_1(x))
151 |         x = self.relu(self.conv5_2(x))
152 |         f_conv5 = self.relu(self.conv5_3(x))
153 |         x = self.pool3(f_conv5)  # 1/32 of the original image 
154 |         return x, f_conv4, f_conv5
155 |         
156 |     
157 |     def seman_net(self, f_conv4, f_conv5):
158 |         x_a = self.dconv6_seman_a(self.conv6_seman_a(f_conv5))
159 |         x_b = self.conv6_seman_b(f_conv4)
160 |         x = x_a + x_b
161 |         x = self.dconv7_seman(x)
162 |         x = self.conv8_seman(x)
163 |         return x
164 |     
165 |     
166 |     def center_net(self, f_conv4, f_conv5):
167 |         x_a = self.dconv6_center_a(self.conv6_center_a(f_conv5))
168 |         x_b = self.conv6_center_b(f_conv4)
169 |         x = x_a + x_b
170 |         x = self.dconv7_center(x)
171 |         x = self.conv8_center(x)
172 |         return x
173 |         
174 |         
175 |     def forward(self, x, extents, meta, gt_hough, is_train, device_cpu):
176 |         x, f_conv4, f_conv5 = self.conv_fun(x)
177 |         x_seman = self.seman_net(f_conv4, f_conv5)  # the output of semantic segmentation
178 |         
179 |         if self.vertex_reg == True:
180 |             x_center_dir = self.center_net(f_conv4, f_conv5) # the output of the center estimation
181 |             x_center = None
182 |             if self.vertex_reg_hough:
183 |                 x_seman_single = torch.argmax(x_seman, dim = 1).type('torch.IntTensor')
184 |                 x_seman_single = x_seman_single.to(device_cpu)
185 | 
186 |                 x_center_dir_hough = x_center_dir.permute(0,2,3,1)
187 |                 x_center_dir_hough = x_center_dir_hough.to(device_cpu)
188 | 
189 |                 x_hough = self.hough_voting(x_seman_single, x_center_dir_hough, extents, meta, gt_hough, is_train)
190 |                 
191 |             return x_seman, x_center_dir, x_hough
192 |         else:
193 |             return x_seman
194 |         
195 |     
196 |     
197 |     


--------------------------------------------------------------------------------
/lib/vgg16_convs_combine_mask.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 12 13:41:56 2019
  4 | 
  5 | @author: Junzhe Xu
  6 | """
  7 | 
  8 | import torch
  9 | import torchvision
 10 | import torchvision.transforms as transforms
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | import time
 15 | #from networks.network import Network
 16 | 
 17 | 
 18 | class vgg16_convs_comb_seg_center(nn.Module):
 19 |     def __init__(self, input_format, num_classes, num_units, scales, threshold_label, vote_threshold, 
 20 |                  vertex_reg_2d=False, vertex_reg_3d=False, combine_seg_center_in = False, pose_reg=False, 
 21 |                  adaptation=False, trainable=True, is_train=True):
 22 |         super(vgg16_convs_comb_seg_center, self).__init__()
 23 |         
 24 |         self.inputs = []
 25 |         self.input_format = input_format
 26 |         self.num_classes = num_classes
 27 |         self.num_units = num_units
 28 |         self.scale = 1.0
 29 |         self.threshold_label = threshold_label
 30 |         self.vertex_reg_2d = vertex_reg_2d
 31 |         self.vertex_reg_3d = vertex_reg_3d
 32 |         self.vertex_reg = vertex_reg_2d or vertex_reg_3d
 33 |         self.combine_seg_center = combine_seg_center_in
 34 |         self.pose_reg = pose_reg
 35 |         self.adaptation = adaptation
 36 |         self.trainable = trainable
 37 |         
 38 |         # if vote_threshold < 0, only detect single instance (default). 
 39 |         # Otherwise, multiple instances are detected if hough voting score larger than the threshold
 40 | 
 41 |         if is_train:
 42 |             self.is_train = 1
 43 |             self.skip_pixels = 10
 44 |             self.vote_threshold = vote_threshold
 45 |             self.vote_percentage = 0.02
 46 |         else:
 47 |             self.is_train = 0
 48 |             self.skip_pixels = 10
 49 |             self.vote_threshold = vote_threshold
 50 |             self.vote_percentage = 0.02
 51 |             
 52 | 
 53 |         
 54 |         # VGG-16 for feature extraction
 55 |         self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 56 |         self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 57 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/2 of the origin image
 58 | 
 59 |         self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 60 |         self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 61 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/4 of the origin image
 62 | 
 63 |         self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 64 |         self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 65 |         self.conv3_3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 66 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/8 of the origin image
 67 | 
 68 |         self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 69 |         self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 70 |         self.conv4_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 71 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/16 of the origin image
 72 | 
 73 |         self.conv5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 74 |         self.conv5_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 75 |         self.conv5_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 76 | 
 77 |         # If input format is RGBD we use another network
 78 |         """
 79 |         self.conv1_1_p = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 80 |         self.conv1_2_p = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 81 |         self.poo1_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 82 | 
 83 |         self.conv2_1_p = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 84 |         self.conv2_2_p = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 85 |         self.pool2_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 86 | 
 87 |         self.conv3_1_p = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 88 |         self.conv3_2_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 89 |         self.conv3_3_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 90 |         self.pool3_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 91 | 
 92 |         self.conv4_1_p = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 93 |         self.conv4_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 94 |         self.conv4_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 95 |         self.pool4_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 96 | 
 97 |         self.conv5_1_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 98 |         self.conv5_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 99 |         self.conv5_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
100 |         """
101 |         
102 |         
103 |         # For combination layer
104 |         # For semantic segmentation
105 |         self.conv6_seman_a = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
106 |         self.conv6_seman_b = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
107 |         self.dconv6_seman_a = nn.ConvTranspose2d(in_channels=64, out_channels=64,
108 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
109 |         self.dropout = nn.Dropout2d()
110 |         self.dconv7_seman = nn.ConvTranspose2d(in_channels=64, out_channels=64,
111 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
112 |         self.conv8_seman = nn.Conv2d(in_channels=64, out_channels=self.num_classes, kernel_size=3, stride=1, padding=1)
113 |         
114 |         
115 |         # For Center estimation
116 |         self.conv6_center_a = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
117 |         self.conv6_center_b = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
118 |         self.dconv6_center_a = nn.ConvTranspose2d(in_channels=128, out_channels=128,
119 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
120 |         self.dconv7_center = nn.ConvTranspose2d(in_channels=128, out_channels=128,
121 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
122 |         self.conv8_center = nn.Conv2d(in_channels=128, out_channels=3 * self.num_classes, kernel_size=3, stride=1, padding=1)
123 |         
124 |         
125 |         # Combine seg with center estimation 
126 |         self.dconv7_center_comb = nn.ConvTranspose2d(in_channels=192, out_channels=192,
127 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
128 |         self.conv8_center_comb = nn.Conv2d(in_channels=192, out_channels=3 * self.num_classes, kernel_size=3, stride=1, padding=1)
129 |         
130 |         
131 |         self.relu = nn.ReLU()
132 |         
133 |         
134 |     def conv_fun(self, x):
135 |         x = self.relu(self.conv1_1(x))
136 |         x = self.relu(self.conv1_2(x))
137 |         x = self.pool1(x) # 1/2 of the original image 
138 |         
139 |         x = self.relu(self.conv2_1(x))
140 |         x = self.relu(self.conv2_2(x))
141 |         x = self.pool2(x)  # 1/4 of the original image 
142 |         
143 |         x = self.relu(self.conv3_1(x))
144 |         x = self.relu(self.conv3_2(x))
145 |         x = self.relu(self.conv3_3(x))
146 |         x = self.pool3(x)  # 1/8 of the original image 
147 |         
148 |         x = self.relu(self.conv4_1(x))
149 |         x = self.relu(self.conv4_2(x))
150 |         f_conv4 = self.relu(self.conv4_3(x))
151 |         x = self.pool3(f_conv4)  # 1/16 of the original image 
152 |         
153 |         x = self.relu(self.conv5_1(x))
154 |         x = self.relu(self.conv5_2(x))
155 |         f_conv5 = self.relu(self.conv5_3(x))
156 |         x = self.pool3(f_conv5)  # 1/32 of the original image 
157 |         return x, f_conv4, f_conv5
158 |         
159 |     def seman_net(self, f_conv4, f_conv5):
160 |         x_a = self.dconv6_seman_a(self.conv6_seman_a(f_conv5))
161 |         x_b = self.conv6_seman_b(f_conv4)
162 |         x_mid = x_a + x_b
163 |         x = self.dconv7_seman(x_mid)
164 |         x = self.conv8_seman(x)
165 |         return x, x_mid
166 |         
167 |     def center_net(self, f_conv4, f_conv5):
168 |         x_a = self.dconv6_center_a(self.conv6_center_a(f_conv5))
169 |         x_b = self.conv6_center_b(f_conv4)
170 |         x = x_a + x_b
171 |         x = self.dconv7_center(x)
172 |         x = self.conv8_center(x)
173 |         return x
174 |         
175 |     
176 |     def combine_seg_center_net(self, x_seg_mid, f_conv4, f_conv5):
177 |         # the input shape of x_seg is batch X channel X ...
178 |         # the channel is the # class
179 |         x_a = self.dconv6_center_a(self.conv6_center_a(f_conv5))
180 |         x_b = self.conv6_center_b(f_conv4)
181 |         x = x_a + x_b
182 |         x = torch.cat((x, x_seg_mid), 1)  # channel becomes 192
183 |         x = self.dconv7_center_comb(x)
184 |         x = self.conv8_center_comb(x)
185 |         return x
186 |         
187 |         
188 |         
189 |     def forward(self, x):
190 |         x, f_conv4, f_conv5 = self.conv_fun(x)
191 |         x_seman, x_seg_mid = self.seman_net(f_conv4, f_conv5)  # the output of semantic segmentation
192 |         
193 |         if self.vertex_reg == True:
194 |             if self.combine_seg_center:
195 |                 x_center = self.combine_seg_center_net(x_seg_mid, f_conv4, f_conv5)
196 |             else:
197 |                 x_center = self.center_net(f_conv4, f_conv5) # the output of the center estimation
198 |             return x_seman, x_center
199 |         
200 |         else:
201 |           return x_seman
202 |         
203 |     
204 |     
205 |     
206 |     
207 |     
208 |     
209 |     
210 |     
211 |     


--------------------------------------------------------------------------------
/lib/vgg16_convs_combine_seg_center.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 12 13:41:56 2019
  4 | 
  5 | @author: Junzhe Xu
  6 | """
  7 | 
  8 | import torch
  9 | import torchvision
 10 | import torchvision.transforms as transforms
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | import time
 15 | #from networks.network import Network
 16 | 
 17 | 
 18 | class vgg16_convs_comb_seg_center(nn.Module):
 19 |     def __init__(self, input_format, num_classes, num_units, scales, threshold_label, vote_threshold, 
 20 |                  vertex_reg_2d=False, vertex_reg_3d=False, combine_seg_center_in = False, pose_reg=False, 
 21 |                  adaptation=False, trainable=True, is_train=True):
 22 |         super(vgg16_convs_comb_seg_center, self).__init__()
 23 |         
 24 |         self.inputs = []
 25 |         self.input_format = input_format
 26 |         self.num_classes = num_classes
 27 |         self.num_units = num_units
 28 |         self.scale = 1.0
 29 |         self.threshold_label = threshold_label
 30 |         self.vertex_reg_2d = vertex_reg_2d
 31 |         self.vertex_reg_3d = vertex_reg_3d
 32 |         self.vertex_reg = vertex_reg_2d or vertex_reg_3d
 33 |         self.combine_seg_center = combine_seg_center_in
 34 |         self.pose_reg = pose_reg
 35 |         self.adaptation = adaptation
 36 |         self.trainable = trainable
 37 |         
 38 |         # if vote_threshold < 0, only detect single instance (default). 
 39 |         # Otherwise, multiple instances are detected if hough voting score larger than the threshold
 40 | 
 41 |         if is_train:
 42 |             self.is_train = 1
 43 |             self.skip_pixels = 10
 44 |             self.vote_threshold = vote_threshold
 45 |             self.vote_percentage = 0.02
 46 |         else:
 47 |             self.is_train = 0
 48 |             self.skip_pixels = 10
 49 |             self.vote_threshold = vote_threshold
 50 |             self.vote_percentage = 0.02
 51 |             
 52 | 
 53 |         
 54 |         # VGG-16 for feature extraction
 55 |         self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 56 |         self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 57 |         self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/2 of the origin image
 58 | 
 59 |         self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 60 |         self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 61 |         self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/4 of the origin image
 62 | 
 63 |         self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 64 |         self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 65 |         self.conv3_3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 66 |         self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/8 of the origin image
 67 | 
 68 |         self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 69 |         self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 70 |         self.conv4_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 71 |         self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 1/16 of the origin image
 72 | 
 73 |         self.conv5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 74 |         self.conv5_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 75 |         self.conv5_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 76 | 
 77 |         # If input format is RGBD we use another network
 78 |         """
 79 |         self.conv1_1_p = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
 80 |         self.conv1_2_p = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
 81 |         self.poo1_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 82 | 
 83 |         self.conv2_1_p = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
 84 |         self.conv2_2_p = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
 85 |         self.pool2_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 86 | 
 87 |         self.conv3_1_p = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
 88 |         self.conv3_2_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 89 |         self.conv3_3_p = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 90 |         self.pool3_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 91 | 
 92 |         self.conv4_1_p = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
 93 |         self.conv4_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 94 |         self.conv4_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 95 |         self.pool4_p = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
 96 | 
 97 |         self.conv5_1_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 98 |         self.conv5_2_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
 99 |         self.conv5_3_p = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
100 |         """
101 |         
102 |         
103 |         # For combination layer
104 |         # For semantic segmentation
105 |         self.conv6_seman_a = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
106 |         self.conv6_seman_b = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3, stride=1, padding=1)
107 |         self.dconv6_seman_a = nn.ConvTranspose2d(in_channels=64, out_channels=64,
108 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
109 |         self.dropout = nn.Dropout2d()
110 |         self.dconv7_seman = nn.ConvTranspose2d(in_channels=64, out_channels=64,
111 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
112 |         self.conv8_seman = nn.Conv2d(in_channels=64, out_channels=self.num_classes, kernel_size=3, stride=1, padding=1)
113 |         
114 |         
115 |         # For Center estimation
116 |         self.conv6_center_a = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
117 |         self.conv6_center_b = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
118 |         self.dconv6_center_a = nn.ConvTranspose2d(in_channels=128, out_channels=128,
119 |                                           kernel_size=4, stride=2, padding=1, output_padding=0)
120 |         self.dconv7_center = nn.ConvTranspose2d(in_channels=128, out_channels=128,
121 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
122 |         self.conv8_center = nn.Conv2d(in_channels=128, out_channels=3 * self.num_classes, kernel_size=3, stride=1, padding=1)
123 |         
124 |         
125 |         # Combine seg with center estimation 
126 |         self.dconv7_center_comb = nn.ConvTranspose2d(in_channels=192, out_channels=192,
127 |                                                   kernel_size=16, stride=8, padding=4, output_padding=0)
128 |         self.conv8_center_comb = nn.Conv2d(in_channels=192, out_channels=3 * self.num_classes, kernel_size=3, stride=1, padding=1)
129 |         
130 |         
131 |         self.relu = nn.ReLU()
132 |         
133 |         
134 |     def conv_fun(self, x):
135 |         x = self.relu(self.conv1_1(x))
136 |         x = self.relu(self.conv1_2(x))
137 |         x = self.pool1(x) # 1/2 of the original image 
138 |         
139 |         x = self.relu(self.conv2_1(x))
140 |         x = self.relu(self.conv2_2(x))
141 |         x = self.pool2(x)  # 1/4 of the original image 
142 |         
143 |         x = self.relu(self.conv3_1(x))
144 |         x = self.relu(self.conv3_2(x))
145 |         x = self.relu(self.conv3_3(x))
146 |         x = self.pool3(x)  # 1/8 of the original image 
147 |         
148 |         x = self.relu(self.conv4_1(x))
149 |         x = self.relu(self.conv4_2(x))
150 |         f_conv4 = self.relu(self.conv4_3(x))
151 |         x = self.pool3(f_conv4)  # 1/16 of the original image 
152 |         
153 |         x = self.relu(self.conv5_1(x))
154 |         x = self.relu(self.conv5_2(x))
155 |         f_conv5 = self.relu(self.conv5_3(x))
156 |         x = self.pool3(f_conv5)  # 1/32 of the original image 
157 |         return x, f_conv4, f_conv5
158 |         
159 |     def seman_net(self, f_conv4, f_conv5):
160 |         x_a = self.dconv6_seman_a(self.conv6_seman_a(f_conv5))
161 |         x_b = self.conv6_seman_b(f_conv4)
162 |         x_mid = x_a + x_b
163 |         x = self.dconv7_seman(x_mid)
164 |         x = self.conv8_seman(x)
165 |         return x, x_mid
166 |         
167 |     def center_net(self, f_conv4, f_conv5):
168 |         x_a = self.dconv6_center_a(self.conv6_center_a(f_conv5))
169 |         x_b = self.conv6_center_b(f_conv4)
170 |         x = x_a + x_b
171 |         x = self.dconv7_center(x)
172 |         x = self.conv8_center(x)
173 |         return x
174 |         
175 |     
176 |     def combine_seg_center_net(self, x_seg_mid, f_conv4, f_conv5):
177 |         # the input shape of x_seg is batch X channel X ...
178 |         # the channel is the # class
179 |         x_a = self.dconv6_center_a(self.conv6_center_a(f_conv5))
180 |         x_b = self.conv6_center_b(f_conv4)
181 |         x = x_a + x_b
182 |         x = torch.cat((x, x_seg_mid), 1)  # channel becomes 192
183 |         x = self.dconv7_center_comb(x)
184 |         x = self.conv8_center_comb(x)
185 |         return x
186 |         
187 |         
188 |         
189 |     def forward(self, x):
190 |         x, f_conv4, f_conv5 = self.conv_fun(x)
191 |         x_seman, x_seg_mid = self.seman_net(f_conv4, f_conv5)  # the output of semantic segmentation
192 |         
193 |         if self.vertex_reg == True:
194 |             if self.combine_seg_center:
195 |                 x_center = self.combine_seg_center_net(x_seg_mid, f_conv4, f_conv5)
196 |             else:
197 |                 x_center = self.center_net(f_conv4, f_conv5) # the output of the center estimation
198 |             return x_seman, x_center
199 |         
200 |         else:
201 |           return x_seman
202 |         
203 |     
204 |     
205 |     
206 |     
207 |     
208 |     
209 |     
210 |     
211 |     


--------------------------------------------------------------------------------
/testpytorch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import HoughVoting
 3 | print(torch.__version__)
 4 | a = torch.Tensor([[1,2],[3,4]])
 5 | print (a)
 6 | b = a.flatten()
 7 | print(b)
 8 | 
 9 | print(HoughVoting.forward)
10 | # help(HoughVoting.forward)
11 | 


--------------------------------------------------------------------------------
/train_net.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # PoseCNN with pytorch  
  3 | # Author: university of michigan EECS442 
  4 | # --------------------------------------------------------
  5 | 
  6 | import os
  7 | import random
  8 | import time
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.parallel
 13 | #import torch.backends.cudnn as cudnn
 14 | import torch.optim as optim
 15 | import torch.utils.data
 16 | import torchvision.datasets as dset
 17 | import torchvision.transforms as transforms
 18 | import torchvision.utils as vutils
 19 | from torch.autograd import Variable
 20 | from tqdm import tqdm
 21 | from eval_net import cal_AP
 22 | import shutil
 23 | import cv2
 24 | import matplotlib.pyplot as plt 
 25 | import matplotlib.patches as patches
 26 | from PIL import Image
 27 | import pdb
 28 | import copy
 29 | 
 30 | #from datasets.ycb.dataset import PoseDataset as PoseDataset_ycb
 31 | from datasets.linemod.dataset_posecnn import PoseDataset as PoseDataset_linemod
 32 | from datasets.YCB.dataset import Posedataset as PoseDataset_ycb
 33 | 
 34 | #from lib.network import PoseNet, PoseRefineNet
 35 | #from lib.loss import Loss
 36 | from lib.vgg16_convs import vgg16_convs
 37 | from lib.vgg16_convs_combine_seg_center import vgg16_convs_comb_seg_center
 38 | from lib.center_est_funcs import *
 39 | 
 40 | class arguments():
 41 |     def __init__(self):
 42 |         self.dataset = 'linemod'  
 43 |         self.dataset_root = '/home/ubuntu/EECS442_CourseProject/datasets/linemod/Linemod_preprocessed'
 44 |         self.num_objects = 13
 45 |         
 46 |         self.flag_pretrained_vgg = False
 47 |         self.flag_pretrained = True
 48 |         self.path_pretrained = 'trained_model/pretrained-posecnn-linemod/checkpoint.pth.tar'
 49 | #         self.num_pretrain_param = 36
 50 |         # 26 for vgg part         36 for vgg+seg    
 51 |         # 46 for vgg+seg+center   42 for vgg+seg+center(combine seg and center)
 52 |         self.num_pretrain_param_load = 46  
 53 |         self.num_pretrain_param_freeze = 0  
 54 | 
 55 |         self.save_model = True
 56 |         self.save_test_result = True
 57 |         self.save_train_result = True
 58 |         self.save_hough_result = True
 59 |         self.color = [(255, 255, 255), (0, 255, 0), (255, 0, 0),  (0, 0, 255), (255, 255, 0), 
 60 |                       (255, 0, 255), (0, 255, 255), (128, 0, 0), (0, 128, 0), (0, 0, 128), 
 61 |                       (128, 128, 0), (128, 0, 128), (0, 128, 128),(64, 0, 0), (0, 64, 0), (0, 0, 64)]
 62 |         self.arch = 'Semantic_Segmentation'
 63 |         self.gpu = True
 64 |         self.niter_print = 50
 65 |         self.nepoch_save = 1
 66 |         self.num_pretrain_param_vgg = 26
 67 |         
 68 |         self.batch_size = 4
 69 |         self.workers = 0
 70 | 
 71 |         self.lr = 2e-4
 72 |         
 73 |         self.iteration = 2
 74 |         self.nepoch = 5
 75 | 
 76 |         self.repeat_epoch = 1
 77 |         
 78 |         self.noise_trans = 0.03
 79 |         self.manualSeed = 0
 80 |         self.num_units = 10 
 81 |         self.scales = 1
 82 |         self.threshold_label = 1 
 83 |         self.vote_threshold = 1
 84 |         self.refine_start = False
 85 |         
 86 |         # FOR CENTER ESTIMATION
 87 |         self.train_single_frame = True
 88 |         self.vertex_reg = True
 89 |         self.vertex_reg_hough = True
 90 |         self.combine_seg_center = True
 91 |         self.combine_loss = True
 92 |         
 93 | opt = arguments()   
 94 | 
 95 | def save_image_fun(img, name):
 96 |     if img.ndim == 3:
 97 |         img = np.transpose(img, (1,2,0)).astype(np.int)
 98 |     name = os.path.join('log', name+'.jpg')
 99 |     cv2.imwrite(name, img)
100 | 
101 | def outputtoimg(output):
102 |     if output.ndim == 2:
103 |         return output
104 |     else:
105 |         img = np.argmax(output, axis = 0)
106 |         return img
107 | 
108 | def imgscale(img, scale, offset):
109 |     global opt
110 |     return img*scale + offset
111 | 
112 | def save_image( images, labels, output_seg, vertex_targets, output_center, epoch, mode = 'train', i=0, index = 0):
113 |     start_str_img = None
114 |     start_str_class = None
115 |     if mode == 'train':
116 |         start_str_img = 'report_result/seg_center_dir/epoch{0}_iter{1}_image{2}'.format(epoch, i, index)
117 |         start_str_class = 'report_result/seg_center_dir/epoch{0}_iter{1}_image{2}'.format(epoch, i, index)
118 |     else:
119 |         start_str_img = 'test/image{0}'.format(index)
120 |         start_str_class = 'test/image{0}'.format(index)
121 |         
122 |     save_image_fun(images.cpu().numpy()[0], start_str_img)
123 |     save_image_fun(imgscale(labels[0].cpu().detach().numpy(), 255//opt.num_objects, 0), start_str_img+'_seg_gt')
124 |     save_image_fun(imgscale(outputtoimg(output_seg.cpu().detach().numpy()[0]), 255//opt.num_objects, 0), start_str_img+'_seg')
125 |     if opt.vertex_reg:
126 |         for j in range(1, opt.num_objects):
127 |             save_image_fun(imgscale(vertex_targets[0, j*3].cpu().detach().numpy(), 100, 100), start_str_class + '_class{0}_vertex_x_gt'.format(j))
128 |             save_image_fun(imgscale(vertex_targets[0, j*3+1].cpu().detach().numpy(), 100, 100), start_str_class + '_class{0}_vertex_y_gt'.format(j))
129 |             save_image_fun(imgscale(vertex_targets[0, j*3+2].cpu().detach().numpy(), 0.1, 0), start_str_class + '_class{0}_vertex_depth_gt'.format(j))
130 |         
131 |             save_image_fun(imgscale(output_center[0, j*3].cpu().detach().numpy(), 100, 100), start_str_class + '_class{0}_vertex_x'.format(j))
132 |             save_image_fun(imgscale(output_center[0, j*3+1].cpu().detach().numpy(), 100, 100), start_str_class + '_class{0}_vertex_y'.format(j))
133 |             save_image_fun(imgscale(output_center[0, j*3+2].cpu().detach().numpy(), 0.1, 0), start_str_class + '_class{0}_vertex_depth'.format(j))
134 |             # save the image for the inverse of the difference between gt and prediction
135 |                     
136 |                 
137 | def save_bbox_center(images, output_hough, epoch, i):
138 |     index_bbox = 0
139 |     output_hough = output_hough.cpu().numpy()
140 |     images_plot = images.cpu().numpy()
141 |     images_plot = images_plot.transpose(0,2,3,1).astype(np.uint8)
142 |     for ii in range(images.shape[0]):
143 |         img3 = copy.deepcopy(images_plot[ii])
144 |         for jj in range(index_bbox, output_hough.shape[0]):
145 |             if output_hough[jj,0] != ii:
146 |                 break
147 |             index_bbox +=1
148 |             width = output_hough[jj,4] - output_hough[jj,2]
149 |             height = output_hough[jj,5] - output_hough[jj,3] 
150 |             if width<4 or height<4:
151 |                 continue
152 |             img2 = np.zeros((480,640,3), np.uint8)
153 |             index_class = int(output_hough[jj,1])
154 |             cv2.rectangle(img2, (int(output_hough[jj,2]), int(output_hough[jj,3])),
155 |                           (int(output_hough[jj,4]), int(output_hough[jj,5])),opt.color[index_class],3)
156 |             c_x = int(output_hough[jj,2] + 0.5*width)
157 |             c_y = int(output_hough[jj,3] + 0.5*height)
158 |             cv2.circle(img2, (c_x, c_y), 3, opt.color[index_class], 2)
159 |             index = np.where(img2>0)
160 |             img3[index] = img2[index]
161 |         name = 'log/report_result/bbox_center/epoch{0}_iter{1}_image{2}_bboxs.png'.format(epoch, i, ii)
162 |         cv2.imwrite(name, img3)
163 |     
164 |     
165 |     
166 | def train(trainloader, net, criterion, criterion_center, optimizer, device, device_cpu):
167 |     global opt
168 |     loss_his = []
169 |     images = None
170 |     labels = None
171 |     vertex_targets = None 
172 |     vertex_weights = None
173 |     extents = None
174 |     meta = None
175 |     gt_hough = None
176 |     extrin_matrixs_gt= None 
177 |     bboxs_gt = None
178 |     for epoch in range(opt.nepoch): #TODO decide epochs
179 |         print('-----------------Epoch = %d-----------------' % (epoch+1))
180 |         if torch.cuda.is_available():
181 |             torch.cuda.empty_cache()
182 |         net.train()
183 |         start = time.time()
184 |         running_loss = 0.0
185 |         for i, data in enumerate(trainloader):
186 |             optimizer.zero_grad()
187 |             if opt.train_single_frame:
188 |                 # train the network part by part 
189 |                 if opt.vertex_reg == True:
190 |                   # Only train the center-voting part
191 |                   images, labels, vertex_targets, vertex_weights, extents, meta, gt_hough, extrin_matrixs_gt, bboxs_gt = data
192 |                   images = images.to(device)
193 |                   labels = labels.type('torch.LongTensor').to(device)
194 |                   vertex_targets = vertex_targets.to(device)
195 |                   vertex_weights = vertex_weights.to(device)
196 |                   extents = extents.to(device_cpu)
197 |                   meta = meta.to(device_cpu)
198 |                   gt_hough = gt_hough.to(device_cpu)
199 |                   # change all the tensor type to CPU
200 |                   output_seg, output_center_dir, output_hough = net(images, extents, meta, gt_hough, 1, device_cpu)
201 | #                   print("This is the output of hough voting: ", output_hough.cpu().numpy())
202 |                   loss_seg = criterion(output_seg, labels)
203 |                   loss_center = criterion_center(output_center_dir, vertex_targets)
204 |                   loss = loss_seg + loss_center
205 |                 else:
206 |                   # Only train the segmentation part
207 |                   images, labels = data
208 |                   images = images.to(device)
209 |                   labels = labels.type('torch.LongTensor').to(device)
210 |                   output_seg = net(images)
211 |                   loss = criterion(output_seg, labels)
212 |             else:
213 |                 # from the begining to the end 
214 |                 print('Empty for this part')
215 |                 loss = 0
216 |             loss.backward()
217 |             optimizer.step()
218 |             running_loss += loss.item()
219 |             if i %opt.niter_print == opt.niter_print-1:
220 |                 end = time.time()
221 |                 print('[epoch %d, iter %5d] loss: %.3f eplased time %.3f' %
222 |                       (epoch + 1, i + 1, running_loss / opt.niter_print, end-start))
223 |                 start = time.time()
224 |                 loss_his.append(running_loss / opt.niter_print)
225 |                 running_loss = 0.0
226 |             if opt.save_train_result and (epoch % opt.nepoch_save == 0 or epoch == opt.nepoch-1) and i%50 == 0:
227 |                 save_image(images, labels, output_seg, vertex_targets, output_center_dir, epoch, 'train', i, i//50)
228 |                 if opt.save_hough_result:
229 |                     save_bbox_center(images, output_hough, epoch, i)
230 | #                     pdb.set_trace()
231 |     return loss_his
232 | 
233 | 
234 | def test(testloader, net, criterion, criterion_center, device, device_cpu):
235 |     '''
236 |     Function for testing.
237 |     '''
238 |     global opt
239 |     losses = 0.
240 |     cnt = 0
241 |     cnt_image = 0
242 |     with torch.no_grad():
243 |         net = net.eval()
244 |         loss = 0.0
245 |         vertex_targets = None
246 |         output_center = None
247 |         for data in tqdm(testloader):
248 |             if opt.train_single_frame:
249 |                 if opt.vertex_reg == True:
250 |                   # Only train the center-voting part
251 |                   images, labels, vertex_targets, vertex_weights, extents, meta, gt_hough, extrin_matrixs_gt, bboxs_gt = data
252 |                   images = images.to(device)
253 |                   labels = labels.type('torch.LongTensor').to(device)
254 |                   vertex_targets = vertex_targets.to(device)
255 |                   vertex_weights = vertex_weights.to(device)
256 |                   extents = extents.to(device_cpu)
257 |                   meta = meta.to(device_cpu)
258 |                   gt_hough = gt_hough.to(device_cpu)
259 |                   output_seg, output_center_dir, output_center= net(images, extents, meta, gt_hough, 0, device_cpu)
260 |                   
261 |                   loss_seg = criterion(output_seg, labels)
262 |                   loss_center = criterion_center(output_center_dir, vertex_targets)
263 |                   loss_temp = loss_seg + loss_center 
264 |                   loss += loss_temp.item()
265 |                 else:
266 |                   # Only train the segmentation part
267 |                   images, labels = data
268 |                   images = images.to(device)
269 |                   labels = labels.type('torch.LongTensor').to(device)
270 |                   output_seg = net(images)
271 |                   loss_temp = criterion(output_seg, labels)
272 |                   loss += loss_temp.item()
273 |             else:
274 |                 # this part corresponding to the network is end to end
275 |                 # and only have one loss 
276 |                 print('Empty for this part')
277 |                 loss = 0
278 |                 pass 
279 |             
280 |             if opt.save_test_result and cnt%4 == 3:
281 |                 cnt_image+=1
282 |                 save_image(images, labels, output_seg, vertex_targets, output_center_dir, 0, 'test', i, cnt_image)
283 |                 
284 |             cnt += 1
285 |     print(loss / cnt)
286 |     return (loss/cnt)
287 | 
288 | 
289 | 
290 | def loadpretrain(net, pretrained_dic, device, num):
291 |     pretrained_list = list(pretrained_dic.items())
292 |     net_dic = net.state_dict()
293 |     net_dic_new = net_dic
294 |     count = 0
295 |     for k, v in net_dic.items():
296 |         name_temp, value_pretrained = pretrained_list[count]
297 |         net_dic_new[k] = value_pretrained
298 |         count+=1
299 |         if count >= num:
300 |             break
301 |     return net.load_state_dict(net_dic_new)
302 | 
303 | 
304 | def save_checkpoint(state, is_best, filename='trained_model/checkpoint.pth.tar'):
305 |     torch.save(state, filename)
306 |     if is_best:
307 |         shutil.copyfile(filename, 'trained_model/model_best.pth.tar')
308 |         
309 | 
310 | def main():
311 |     opt.manualSeed = random.randint(1, 10000)
312 |     random.seed(opt.manualSeed)
313 |     torch.manual_seed(opt.manualSeed)
314 |     device_cpu = torch.device('cpu')
315 |     if opt.gpu:
316 |         if torch.cuda.is_available():
317 |             device = torch.device('cuda:0')
318 |         else:
319 |             device = torch.device('cpu')    
320 |     else:
321 |         device = torch.device('cpu')
322 |     if torch.cuda.is_available():
323 |         torch.cuda.empty_cache() 
324 |         torch.backends.cudnn.benchmark = True
325 | 
326 |     if opt.dataset == 'ycb':
327 |         opt.num_objects = 21 #number of object classes in the dataset
328 |         opt.num_points = 1000 #number of points on the input pointcloud
329 |         opt.outf = 'trained_models/ycb' #folder to save trained models
330 |         opt.log_dir = 'experiments/logs/ycb' #folder to save logs
331 |         opt.repeat_epoch = 1 #number of repeat times for one epoch training
332 |     elif opt.dataset == 'linemod':
333 |         opt.num_objects = 10
334 |         opt.num_points = 500
335 |         opt.outf = 'trained_models/linemod'
336 |         opt.log_dir = 'experiments/logs/linemod'
337 |         opt.repeat_epoch = 20
338 |     else:
339 |         print('Unknown dataset')
340 |         return
341 |     
342 |     # check for the network mode
343 |     if not opt.vertex_reg and opt.vertex_reg_hough:
344 |         assert ValueError('Mode Incorrect')
345 | 
346 |     if opt.dataset == 'ycb':
347 |         dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, 
348 |                                   opt.noise_trans, opt.refine_start)
349 |     elif opt.dataset == 'linemod':
350 |         dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, 
351 |                                       opt.refine_start, False, True, opt.vertex_reg, opt.vertex_reg_hough)
352 |     trainloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, 
353 |                                               shuffle=True, num_workers=opt.workers)
354 |     
355 |     
356 |     if opt.dataset == 'ycb':
357 |         test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 
358 |                                        0.0, opt.refine_start)
359 |     elif opt.dataset == 'linemod':
360 |         test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root,
361 |                                            0.0, opt.refine_start, 
362 |                                            False, True, opt.vertex_reg, opt.vertex_reg_hough)
363 |     testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, 
364 |                                                  shuffle=False, num_workers=opt.workers)
365 |     
366 |     if opt.dataset == 'ycb':
367 |         pass
368 |     else:
369 |         ap_data = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, 
370 |                                       opt.refine_start, True, True, opt.vertex_reg, opt.vertex_reg_hough)
371 |     ap_loader = torch.utils.data.DataLoader(ap_data, batch_size=1, shuffle=False, num_workers=opt.workers)
372 |     
373 |     opt.sym_list = dataset.get_sym_list()
374 |     opt.num_points_mesh = dataset.get_num_points_mesh()
375 | #     print(opt.sym_list)
376 | #    print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list))
377 | 
378 |     
379 |     # Network, optimizer and loss               
380 |     net = vgg16_convs(None, opt.num_objects, opt.num_objects, opt.scales, opt.threshold_label, 
381 |                       opt.vote_threshold, opt.vertex_reg, opt.vertex_reg, opt.vertex_reg_hough)
382 | #    net = vgg16_convs_comb_seg_center(None, opt.num_objects, opt.num_objects, opt.scales, opt.threshold_label, 
383 | #                                      opt.vote_threshold, opt.vertex_reg, opt.combine_seg_center)
384 |     
385 |     optimizer = optim.Adam(net.parameters(), lr = opt.lr)
386 |     
387 |     weight_class = torch.from_numpy(dataset.weight_clsss).type('torch.FloatTensor').to(device)
388 | #    criterion = nn.CrossEntropyLoss(weight_class) 
389 |     criterion = nn.CrossEntropyLoss() 
390 |     criterion_center = nn.SmoothL1Loss()
391 |     
392 |     # Load pretrained model
393 |     if opt.flag_pretrained and not opt.flag_pretrained_vgg:
394 |         # load out model trained before as initialization to continue  
395 |         if os.path.isfile(opt.path_pretrained):
396 |             print("=> Loading Checkpoint '{}'".format(opt.path_pretrained))
397 |             pre_trained = torch.load(opt.path_pretrained)
398 |             net_dic = net.state_dict()
399 |             net_dic_new = net_dic
400 |             pretrained_dic = pre_trained['state_dict']
401 |             pretrained_list = list(pretrained_dic.items())
402 | #            net.load_state_dict()
403 |             if opt.num_pretrain_param_load > 0:
404 |                 count = 0
405 |                 for k, v in net_dic.items():
406 |                     if count >= opt.num_pretrain_param_load:
407 |                         break
408 |                     name_temp, value_pretrained = pretrained_list[count]
409 |                     if opt.gpu:
410 |                         net_dic_new[k] = value_pretrained
411 |                     else:
412 |                         net_dic_new[k] = value_pretrained.cpu()
413 |                     count+=1
414 |                     
415 |                     
416 |             net.load_state_dict(net_dic_new)
417 |             """
418 |             optimizer.load_state_dict(pre_trained['optimizer'])
419 |             for state in optimizer.state.values():
420 |                 for k, v in state.items():
421 |                     if torch.cuda.is_available:
422 |                         if isinstance(v, torch.Tensor):
423 |                             state[k] = v.cuda()
424 |             """
425 |             print("=> Loaded Checkpoint '{}'".format(opt.path_pretrained))
426 |         else:
427 |             assert ValueError("no pretrained_model found at {}".format(opt.path_pretrained))
428 |         
429 |         count = 0
430 |         for param in net.parameters():
431 |             if count >= opt.num_pretrain_param_freeze:
432 |                 break
433 |             param.requires_grad = False
434 |             count+=1
435 |     elif not opt.flag_pretrained and opt.flag_pretrained_vgg:
436 |         # load the pretrained weight of VGG16 net 
437 |         # 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'
438 |         pretrained_dic = torch.load('pretrained_model\\vgg16-397923af.pth')
439 |         pretrained_list = list(pretrained_dic.items())
440 |         net_dic = net.state_dict()
441 |         net_dic_new = net_dic
442 |         count = 0
443 |         for k, v in net_dic.items():
444 |             name_temp, value_pretrained = pretrained_list[count]
445 |             net_dic_new[k] = value_pretrained
446 |             count+=1
447 |             if count >= opt.num_pretrain_param_vgg:
448 |                 break
449 |         net.load_state_dict(net_dic_new)
450 |         count = 0
451 |         for param in net.parameters():
452 |             param.requires_grad = False
453 |             count+=1
454 |             if count>=opt.num_pretrain_param_vgg:
455 |                 break
456 |     elif not opt.flag_pretrained and not opt.flag_pretrained_vgg:
457 |         print('without laod any pretrained model')
458 |     else:
459 |         print('Collision with the flag of load vgg param and laod pretrain model')
460 |     
461 | 
462 |     net.to(device)
463 |     loss_his = []
464 |     loss_his = train(trainloader, net, criterion, criterion_center, optimizer, device, device_cpu)
465 |     
466 |     print('>>>>>>>>----------Training Finished!---------<<<<<<<<')
467 |     
468 |     test_loss = 0
469 |     test_loss = test(testdataloader, net, criterion, criterion_center, device, device_cpu)
470 |     
471 |     print('>>>>>>>>----------AP---------<<<<<<<<')
472 | #     aps = None
473 | #     if opt.train_single_frame:
474 | #         aps = cal_AP(ap_loader, net, criterion, device, opt.num_objects, opt)
475 | #         aps = np.array(aps)
476 | #         print('Final mean AP : {}'.format(np.mean(aps)))
477 |         
478 |     print('>>>>>>>>----------Save the model weights!---------<<<<<<<<')
479 |     if opt.save_model:   
480 |         # save the trained model
481 |         save_checkpoint({
482 |             'epoch': opt.nepoch,
483 |             'arch': opt.arch,
484 |             'state_dict': net.state_dict(),
485 |             'test_loss': test_loss,
486 |             'aps': aps,
487 |             'optimizer' : optimizer.state_dict(),
488 |         }, False)
489 |         
490 |     print('>>>>>>>>----------Loss History---------<<<<<<<<')
491 |     np.save('log//loss//loss', np.array(loss_his))
492 |     plt.figure()
493 |     plt.plot(loss_his)
494 |     plt.show()
495 | # <<<<<<< HEAD
496 |     plt.savefig('/home/ubuntu/EECS442_CourseProject/log/loss/unfreeze_seg_ctr.png')
497 | # =======
498 | #     plt.savefig('log//loss//loss.png')
499 |   
500 | # >>>>>>> ce96c070c17b981e90464ae0b458ab905b1009db
501 | 
502 | if __name__ == '__main__':
503 |     main()
504 | 


--------------------------------------------------------------------------------