├── .gitignore ├── INSTALL.md ├── README.md ├── data ├── __init__.py ├── kitti_dataset.py ├── kitti_gt_dataset.py ├── kitti_object_dataset.py ├── kitti_object_roi_dataset.py ├── kitti_prediction_dataset.py └── load_dataset.py ├── datasets └── KITTI_object │ ├── annotations │ ├── data_object_training_annotations.json │ ├── inference_annotations.json │ ├── object_inference_annotations.json │ ├── train_annotations.json │ └── val_annotations.json │ ├── test.txt │ ├── train.txt │ ├── training │ ├── depth │ ├── image_2 │ ├── label_2 │ └── planes │ ├── trainval.txt │ └── val.txt ├── experiments └── foresee │ ├── depth_normal_model.py │ ├── lateral_net.py │ ├── loss.py │ ├── pc │ ├── bin2obj.py │ ├── gen.sh │ ├── gen_colorps.sh │ ├── kitti_prediction.py │ └── kitti_prediction_colorps.py │ ├── test.sh │ ├── train.sh │ ├── train_kitti.py │ └── val_kitti.py ├── lib ├── __init__.py ├── core │ ├── __init__.py │ └── config.py ├── models │ ├── MobileNetV2.py │ ├── ResNeXt.py │ ├── __init__.py │ ├── image_transfer.py │ ├── lateral_net.py │ └── loss.py └── utils │ ├── __init__.py │ ├── bounding_box.py │ ├── chamfer_distance │ ├── __init__.py │ ├── chamfer_distance.cpp │ ├── chamfer_distance.cu │ └── chamfer_distance.py │ ├── collections.py │ ├── evaluate_depth_error.py │ ├── logging.py │ ├── misc.py │ ├── mobilenetv2_weight_helper.py │ ├── net_tools.py │ ├── obj_utils.py │ ├── resnext_weights_helper.py │ ├── timer.py │ └── training_stats.py ├── prepocessing ├── gen_depth.sh ├── generate_depth.py ├── generate_disp.py ├── generate_lidar.py ├── kitti_process_RANSAC.py └── kitti_util.py ├── pretrained_model └── ResNeXt_ImageNet └── tools ├── __init__.py ├── kitti_prediction.py ├── kitti_prediction_sample.py ├── kitti_prediction_sample_diff.py ├── parse_arg_base.py ├── parse_arg_test.py ├── parse_arg_train.py ├── parse_arg_val.py ├── test_kitti.py ├── train_kitti.py └── val_kitti.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pth 2 | *.pyc 3 | __pycache__/ 4 | 5 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ###Requirements 4 | - PyTorch >= 0.4.1 5 | - torchvision == 0.2.1 6 | - matplotlib 7 | - opencv-python 8 | - dill 9 | - scipy 10 | - yaml 11 | 12 | ### Step-by-step installation 13 | ```bash 14 | # Firstly, your conda is setup properly with the right environment for that 15 | 16 | conda create --n foresee python=3.6 17 | conda activate foresee 18 | 19 | 20 | # basic packages 21 | conda install matplotlib dill pyyaml opencv scipy 22 | 23 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 24 | # we give the instructions for CUDA 9.0 25 | conda install -c pytorch torchvision=0.2.1 cudatoolkit=9.0 26 | 27 | ``` 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Task-Aware Monocular Depth Estimation for 3D Object Detection 2 | 3 | This project hosts the code for implementing the ForeSeE algorithm for depth estimation. 4 | 5 | 6 | > [**Task-Aware Monocular Depth Estimation for 3D Object Detection**](https://arxiv.org/abs/1909.07701), 7 | > Xinlong Wang, Wei Yin, Tao Kong, Yuning Jiang, Lei Li, Chunhua Shen 8 | > *AAAI, 2020* 9 | 10 | 11 | ## Installation 12 | 13 | This implementation is based on [VNL](https://github.com/YvanYin/VNL_Monocular_Depth_Prediction). Please refer to [INSTALL.md](INSTALL.md) for installation. 14 | 15 | ## Dataset 16 | 17 | Please refer to [KITTI dataset](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_prediction) for details. 18 | The annotation files of [KITTI Object subset](https://github.com/WXinlong/ForeSeE/tree/master/datasets/KITTI_object/annotations) used in our work are provided. 19 | 20 | ## Models 21 | Download the trained model from this [link](https://cloudstor.aarnet.edu.au/plus/s/M3LFxiDPZkMKrtw) and put it under experiments/foresee/. 22 | 23 | ## Testing 24 | 25 | cd experiments/foresee 26 | sh test.sh 27 | 28 | ## Training 29 | 30 | cd experiments/foresee 31 | sh train.sh 32 | 33 | ## Citations 34 | 35 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows. 36 | 37 | ``` 38 | @InProceedings{wang2020foresee, 39 | title={Task-Aware Monocular Depth Estimation for 3D Object Detection}, 40 | author = {Wang, Xinlong and Yin, Wei and Kong, Tao and Jiang, Yuning, and Li, Lei and Shen, Chunhua}, 41 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)}, 42 | year={2020} 43 | } 44 | ``` 45 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/data/__init__.py -------------------------------------------------------------------------------- /data/kitti_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | import torch 4 | import numpy as np 5 | from lib.core.config import cfg 6 | import cv2 7 | import json 8 | from lib.utils.logging import setup_logging 9 | logger = setup_logging(__name__) 10 | 11 | class KITTIDataset(): 12 | def initialize(self, opt): 13 | self.opt = opt 14 | self.root = opt.dataroot 15 | self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json') 16 | self.A_paths, self.B_paths = self.getData() 17 | self.data_size = len(self.A_paths) 18 | self.depth_normalize = 255. * 80. 19 | self.uniform_size = (385, 1243) 20 | 21 | def getData(self): 22 | with open(self.dir_anno, 'r') as load_f: 23 | AB_anno = json.load(load_f) 24 | A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))] 25 | B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))] 26 | logger.info('Loaded Kitti data!') 27 | return A_list, B_list 28 | 29 | def __getitem__(self, anno_index): 30 | if 'train' in self.opt.phase: 31 | try: 32 | data = self.online_aug_train(anno_index) 33 | except: 34 | print('Error: {}'.format(self.B_paths[anno_index])) 35 | data = self.online_aug_train(anno_index - 1) 36 | else: 37 | data = self.online_aug_val_test(anno_index) 38 | return data 39 | 40 | def online_aug_train(self, idx): 41 | A_path = self.A_paths[idx] 42 | B_path = self.B_paths[idx] 43 | 44 | A = cv2.imread(A_path, -1) # [H, W, C] C:bgr 45 | B = cv2.imread(B_path, -1) / self.depth_normalize #[0.0, 1.0] 46 | 47 | flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A) 48 | 49 | A_crop = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size, pad, 128) 50 | B_crop = self.flip_pad_reshape_crop(B, flip_flg, resize_size, crop_size, pad, -1) 51 | 52 | A_crop = A_crop.transpose((2, 0, 1)) 53 | B_crop = B_crop[np.newaxis, :, :] 54 | 55 | # change the color channel, bgr->rgb 56 | A_crop = A_crop[::-1, :, :] 57 | 58 | # to torch, normalize 59 | A_crop = self.scale_torch(A_crop, 255.) 60 | B_crop = self.scale_torch(B_crop, resize_ratio) 61 | 62 | B_classes = self.depth_to_class(B_crop) 63 | 64 | invalid_side = [0, 0, 0, 0] if crop_size[1] != 0 else [int((pad[0] + 50)*resize_ratio), 0, 0, 0] 65 | 66 | A = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0)) 67 | B = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0)) 68 | 69 | data = {'A': A_crop, 'B': B_crop, 'A_raw': A, 'B_raw': B, 'B_classes': B_classes, 'A_paths': A_path, 70 | 'B_paths': B_path, 'invalid_side': np.array(invalid_side), 'pad_raw': np.array(pad)} 71 | return data 72 | 73 | def online_aug_val_test(self, idx): 74 | A_path = self.A_paths[idx] 75 | B_path = self.B_paths[idx] 76 | 77 | A = cv2.imread(A_path, -1) # [H, W, C] C:bgr 78 | 79 | B = cv2.imread(B_path, -1) / self.depth_normalize # [0.0, 1.0] 80 | 81 | flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A) 82 | 83 | crop_size_l = [pad[2], 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]] 84 | crop_size_m = [cfg.CROP_SIZE[1] + pad[2] - 20, 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]] 85 | crop_size_r = [self.uniform_size[1] - cfg.CROP_SIZE[1], 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]] 86 | 87 | A_crop_l = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_l, pad, 128) 88 | A_crop_l = A_crop_l.transpose((2, 0, 1)) 89 | A_crop_l = A_crop_l[::-1, :, :] 90 | 91 | A_crop_m = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_m, pad, 128) 92 | A_crop_m = A_crop_m.transpose((2, 0, 1)) 93 | A_crop_m = A_crop_m[::-1, :, :] 94 | 95 | A_crop_r = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_r, pad, 128) 96 | A_crop_r = A_crop_r.transpose((2, 0, 1)) 97 | A_crop_r = A_crop_r[::-1, :, :] 98 | 99 | A_crop_l = self.scale_torch(A_crop_l, 255.) 100 | A_crop_m = self.scale_torch(A_crop_m, 255.) 101 | A_crop_r = self.scale_torch(A_crop_r, 255.) 102 | A = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0)) 103 | B = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0)) 104 | crop_lmr = np.array((crop_size_l, crop_size_m, crop_size_r)) 105 | 106 | A_crop = A.transpose((2, 0, 1)) 107 | B_crop = B[np.newaxis, :, :] 108 | # change the color channel, bgr->rgb 109 | A_crop = A_crop[::-1, :, :] 110 | # to torch, normalize 111 | A_crop = self.scale_torch(A_crop, 255.) 112 | B_crop = self.scale_torch(B_crop, 1.0) 113 | 114 | data = {'A': A_crop, 'B': B_crop,'A_l': A_crop_l, 'A_m': A_crop_m, 'A_r': A_crop_r, 115 | 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad), 'crop_lmr': crop_lmr} 116 | return data 117 | 118 | def set_flip_pad_reshape_crop(self, A): 119 | """ 120 | Set flip, padding, reshaping and cropping flags. 121 | :param A: Input image, [H, W, C] 122 | :return: Data augamentation parameters 123 | """ 124 | # flip 125 | flip_prob = np.random.uniform(0.0, 1.0) 126 | flip_flg = True if flip_prob > 0.5 and 'train' in self.opt.phase else False 127 | 128 | # pad 129 | pad_height = self.uniform_size[0] - A.shape[0] 130 | pad_width = self.uniform_size[1] - A.shape[1] 131 | pad = [pad_height, 0, pad_width, 0] #[up, down, left, right] 132 | 133 | # reshape 134 | ratio_list = [1.0, 1.2, 1.5, 1.8, 2.0]# 135 | resize_ratio = ratio_list[np.random.randint(len(ratio_list))] if 'train' in self.opt.phase else 1.0 136 | resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5), 137 | int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)] 138 | 139 | # crop 140 | start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\ 141 | else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio) 142 | start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio) 143 | crop_height = cfg.CROP_SIZE[0] 144 | crop_width = cfg.CROP_SIZE[1] 145 | crop_size = [start_x, start_y, crop_width, crop_height] 146 | return flip_flg, resize_size, crop_size, pad, resize_ratio 147 | 148 | def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0): 149 | """ 150 | Preprocessing input image or ground truth depth. 151 | :param img: RGB image or depth image 152 | :param flip: Flipping flag, True or False 153 | :param resize_size: Resizing size 154 | :param crop_size: Cropping size 155 | :param pad: Padding region 156 | :param pad_value: Padding value 157 | :return: Processed image 158 | """ 159 | if len(img.shape) == 1: 160 | return img 161 | # Flip 162 | if flip: 163 | img = np.flip(img, axis=1) 164 | 165 | # Pad the raw image 166 | if len(img.shape) == 3: 167 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', 168 | constant_values=(pad_value, pad_value)) 169 | else: 170 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', 171 | constant_values=(pad_value, pad_value)) 172 | # Resize the raw image 173 | img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR) 174 | # Crop the resized image 175 | img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]] 176 | 177 | return img_crop 178 | 179 | def depth_to_class(self, depth): 180 | """ 181 | Discretize depth into depth bins 182 | Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1 183 | :param depth: 1-channel depth, [1, h, w] 184 | :return: depth bins [1, h, w] 185 | """ 186 | invalid_mask = depth < 0. 187 | depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN 188 | depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX 189 | classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int) 190 | classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1 191 | classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1 192 | return classes 193 | 194 | def scale_torch(self, img, scale): 195 | # scale image 196 | img = img.astype(np.float32) 197 | img /= scale 198 | img = torch.from_numpy(img.copy()) 199 | if img.size(0) == 3: 200 | img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img) 201 | else: 202 | img = transforms.Normalize((0,), (1,))(img) 203 | return img 204 | 205 | 206 | def __len__(self): 207 | return self.data_size 208 | 209 | def name(self): 210 | return 'KITTI' 211 | -------------------------------------------------------------------------------- /data/kitti_gt_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import torchvision.transforms as transforms 5 | import torch 6 | import numpy as np 7 | import cv2 8 | import json 9 | 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(ROOT_DIR) 13 | 14 | from lib.core.config import cfg 15 | from lib.utils.logging import setup_logging 16 | from lib.utils.obj_utils import read_labels, rois2mask, rois2mask_shrink, rois2boxlist 17 | logger = setup_logging(__name__) 18 | 19 | from IPython import embed 20 | 21 | class KITTIGtDataset(): 22 | def initialize(self, opt): 23 | self.opt = opt 24 | self.root = opt.dataroot 25 | self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json') 26 | self.A_paths, self.B_paths, self.AB_anno, self.rois_paths = self.getData() 27 | self.data_size = len(self.AB_anno) 28 | self.depth_normalize = 255. * 80. 29 | self.ignore_cate_list = ['Person_sitting', 'Misc', 'DontCare'] 30 | self.uniform_size = (385, 1243) 31 | 32 | def getData(self): 33 | with open(self.dir_anno, 'r') as load_f: 34 | AB_anno = json.load(load_f) 35 | A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))] 36 | B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))] 37 | rois_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rois_path']) for i in range(len(AB_anno))] 38 | logger.info('Loaded Kitti data!') 39 | return A_list, B_list, AB_anno, rois_list 40 | 41 | def __getitem__(self, anno_index): 42 | 43 | data = self.online_aug_val_test(anno_index) 44 | return data 45 | 46 | def online_aug_val_test(self, idx): 47 | A_path = self.A_paths[idx] 48 | B_path = self.B_paths[idx] 49 | rois_path = self.rois_paths[idx] 50 | 51 | A = cv2.imread(A_path, -1) # [H, W, C] C:bgr 52 | 53 | #B = np.zeros((A.shape[0], A.shape[1]), dtype=np.float32 ) 54 | B = cv2.imread(B_path, -1) / self.depth_normalize 55 | 56 | rois = read_labels(rois_path, ignore_cate=self.ignore_cate_list) # list of instances of class ObjectLabel, see obj_utils.py 57 | raw_boxlist = rois2boxlist(rois, (A.shape[1], A.shape[0])) 58 | 59 | flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A) 60 | 61 | A_crop = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0)) 62 | B_crop = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0)) 63 | 64 | raw_boxlist.bbox[:, 0::2] += pad[2] 65 | raw_boxlist.bbox[:, 1::2] += pad[0] 66 | boxes = raw_boxlist.bbox 67 | 68 | A_crop = A_crop.transpose((2, 0, 1)) 69 | B_crop = B_crop[np.newaxis, :, :] 70 | # change the color channel, bgr->rgb 71 | A_crop = A_crop[::-1, :, :] 72 | # to torch, normalize 73 | A_crop = self.scale_torch(A_crop, 255.) 74 | B_crop = self.scale_torch(B_crop, 1.0) 75 | 76 | data = {'A': A_crop, 'B': B_crop, 'bbox': boxes, 77 | 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad)} 78 | 79 | return data 80 | 81 | def set_flip_pad_reshape_crop(self, A): 82 | flip_flg = False 83 | 84 | # pad 85 | pad_height = self.uniform_size[0] - A.shape[0] 86 | pad_width = self.uniform_size[1] - A.shape[1] 87 | pad = [pad_height, 0, pad_width, 0] # [up, down, left, right] 88 | 89 | # reshape 90 | resize_ratio = 1.0 91 | resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5), 92 | int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)] 93 | 94 | # crop 95 | start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\ 96 | else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio) 97 | start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio) 98 | crop_height = cfg.CROP_SIZE[0] 99 | crop_width = cfg.CROP_SIZE[1] 100 | crop_size = [start_x, start_y, crop_width, crop_height] 101 | return flip_flg, resize_size, crop_size, pad, resize_ratio 102 | 103 | def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0): 104 | if len(img.shape) == 1: 105 | return img 106 | # Flip 107 | if flip: 108 | img = np.flip(img, axis=1) 109 | 110 | # Pad the raw image 111 | if len(img.shape) == 3: 112 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', 113 | constant_values=(pad_value, pad_value)) 114 | else: 115 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', 116 | constant_values=(pad_value, pad_value)) 117 | # Resize the raw image 118 | img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR) 119 | # Crop the resized image 120 | img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]] 121 | return img_crop 122 | 123 | def depth_to_class(self, depth): 124 | """ 125 | Transfer 1-channel depth to 1-channel depth in n depth ranges 126 | Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1 127 | :param depth: 1-channel depth, [1, h, w] 128 | :return: classes [1, h, w] 129 | """ 130 | invalid_mask = depth < 0. 131 | depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN 132 | depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX 133 | classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int) 134 | classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1 135 | classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1 136 | return classes 137 | 138 | def scale_torch(self, img, scale): 139 | # scale image 140 | img = img.astype(np.float32) 141 | img /= scale 142 | img = torch.from_numpy(img.copy()) 143 | if img.size(0) == 3: 144 | img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img) 145 | else: 146 | img = transforms.Normalize((0,), (1,))(img) 147 | return img 148 | 149 | 150 | def __len__(self): 151 | return self.data_size 152 | 153 | def name(self): 154 | return 'NYUDepthV2Dataset' 155 | 156 | if __name__ == "__main__": 157 | class test_opt: 158 | def __init__(self): 159 | self.phase = "tongji" 160 | self.dataroot = "../datasets/KITTI_object" 161 | 162 | opt = test_opt() 163 | dataset = KITTIPredictionDataset() 164 | dataset.initialize(opt) 165 | 166 | #idx = 0 167 | #data = dataset.__getitem__(idx) 168 | 169 | #embed() 170 | 171 | # tongji 172 | num_fg = 0 173 | num_bg = 0 174 | num_all = 0 175 | num_iter = len(dataset) 176 | #num_iter = 20 177 | 178 | fg_list = [] 179 | bg_list = [] 180 | 181 | fg_hist_cnt = 0 182 | bg_hist_cnt = 0 183 | 184 | def cal_grad(B): 185 | B = B[0,...] 186 | H, W = B.shape 187 | #for i in range(H): 188 | # for j in range(W): 189 | 190 | B_cv2 = cv2.fromarray(B) 191 | B_lap = cv2.Laplacian(img,cv2.CV_64F) 192 | 193 | return np.array(B_lap) 194 | 195 | for i in range(num_iter): 196 | print(i) 197 | #idx = np.random.randint(0, len(dataset)) 198 | data = dataset.__getitem__(i) 199 | 200 | bbox = data['bbox'] 201 | B = data['B'] * 80 202 | 203 | num_box = bbox.shape[0] 204 | rois_mask = np.zeros_like(B) 205 | for j in range(num_box): 206 | 207 | box = bbox[j] 208 | x1, y1, x2, y2 = map(int, box) 209 | rois_mask[0, y1:y2, x1:x2] = 1 210 | rois_mask = torch.from_numpy(rois_mask.astype(np.uint8)) 211 | 212 | mask_0 = B != 0 213 | cur_fg_list = torch.masked_select(B, mask_0 & rois_mask) 214 | cur_bg_list = torch.masked_select(B, mask_0 & (1 - rois_mask)) 215 | 216 | cur_fg_list = list(cur_fg_list.numpy()) 217 | cur_bg_list = list(cur_bg_list.numpy()) 218 | 219 | cur_fg_hist_cnt, bins = np.histogram(cur_fg_list, bins=10, range=(0,80)) 220 | cur_bg_hist_cnt, bins = np.histogram(cur_bg_list, bins=10, range=(0,80)) 221 | 222 | fg_hist_cnt += cur_fg_hist_cnt 223 | bg_hist_cnt += cur_bg_hist_cnt 224 | 225 | #fg_list.extend(list(cur_fg_list.numpy())) 226 | #bg_list.extend(list(cur_bg_list.numpy())) 227 | 228 | print(fg_hist_cnt) 229 | print(bg_hist_cnt) 230 | 231 | import matplotlib 232 | import matplotlib.pyplot as plt 233 | import seaborn as sns 234 | sns.set(color_codes=True) 235 | 236 | center = (bins[:-1] + bins[1:]) / 2 237 | width = 0.3 * (bins[1] - bins[0]) 238 | 239 | fg_hist_frq = 1. * fg_hist_cnt / fg_hist_cnt.sum() 240 | bg_hist_frq = 1. * bg_hist_cnt / bg_hist_cnt.sum() 241 | 242 | print("fg_hist_frq: {}".format(fg_hist_frq)) 243 | print("bg_hist_frq: {}".format(bg_hist_frq)) 244 | 245 | labels = [str(a) for a in range(8, 88, 8)] 246 | x = np.arange(len(labels)) 247 | width = 0.35 248 | 249 | fig, ax = plt.subplots() 250 | rect1 = ax.bar(x-width/2, fg_hist_frq, color='salmon', width=width, label="Foreground") 251 | rect2 = ax.bar(x+width/2, bg_hist_frq, color="darkseagreen", width=width, label="Background") 252 | 253 | ax.set_xticks(x) 254 | ax.set_xticklabels(labels) 255 | ax.legend() 256 | 257 | fig.tight_layout() 258 | plt.show() 259 | 260 | embed() 261 | 262 | """ 263 | plt.subplot(1, 2, 1) 264 | plt.bar(center, fg_hist_frq, align='center', color='salmon') 265 | plt.xlim((0,80)) 266 | plt.ylim((0,0.2)) 267 | 268 | plt.subplot(1, 2, 2) 269 | plt.bar(center, bg_hist_frq, align='center', color='salmon') 270 | plt.xlim((0,80)) 271 | plt.ylim((0,0.2)) 272 | 273 | plt.show() 274 | 275 | embed() 276 | """ 277 | -------------------------------------------------------------------------------- /data/kitti_prediction_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | import torch 4 | import numpy as np 5 | from lib.core.config import cfg 6 | import cv2 7 | import json 8 | from lib.utils.logging import setup_logging 9 | from lib.utils.obj_utils import read_labels, rois2mask, rois2mask_shrink, rois2boxlist 10 | logger = setup_logging(__name__) 11 | 12 | 13 | class KITTIPredictionDataset(): 14 | def initialize(self, opt): 15 | self.opt = opt 16 | self.root = opt.dataroot 17 | self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json') 18 | self.A_paths, self.B_paths, self.AB_anno, self.rois_paths = self.getData() 19 | self.data_size = len(self.AB_anno) 20 | self.depth_normalize = 255. * 80. 21 | self.ignore_cate_list = ['Person_sitting', 'Misc', 'DontCare'] 22 | self.uniform_size = (385, 1243) 23 | 24 | def getData(self): 25 | with open(self.dir_anno, 'r') as load_f: 26 | AB_anno = json.load(load_f) 27 | A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))] 28 | B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))] 29 | rois_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rois_path']) for i in range(len(AB_anno))] 30 | logger.info('Loaded Kitti data!') 31 | return A_list, B_list, AB_anno, rois_list 32 | 33 | def __getitem__(self, anno_index): 34 | 35 | data = self.online_aug_val_test(anno_index) 36 | return data 37 | 38 | def online_aug_val_test(self, idx): 39 | A_path = self.A_paths[idx] 40 | B_path = self.B_paths[idx] 41 | rois_path = self.rois_paths[idx] 42 | 43 | A = cv2.imread(A_path, -1) # [H, W, C] C:bgr 44 | 45 | B = np.zeros((A.shape[0], A.shape[1]), dtype=np.float32 ) 46 | 47 | rois = read_labels(rois_path, ignore_cate=self.ignore_cate_list) # list of instances of class ObjectLabel, see obj_utils.py 48 | raw_boxlist = rois2boxlist(rois, (A.shape[1], A.shape[0])) 49 | 50 | flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A) 51 | 52 | A_crop = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0)) 53 | B_crop = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0)) 54 | 55 | raw_boxlist.bbox[:, 0::2] += pad[2] 56 | raw_boxlist.bbox[:, 1::2] += pad[0] 57 | boxes = raw_boxlist.bbox 58 | 59 | A_crop = A_crop.transpose((2, 0, 1)) 60 | B_crop = B_crop[np.newaxis, :, :] 61 | # change the color channel, bgr->rgb 62 | A_crop = A_crop[::-1, :, :] 63 | # to torch, normalize 64 | A_crop = self.scale_torch(A_crop, 255.) 65 | B_crop = self.scale_torch(B_crop, 1.0) 66 | 67 | data = {'A': A_crop, 'B': B_crop, 'bbox': boxes, 68 | 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad)} 69 | return data 70 | 71 | def set_flip_pad_reshape_crop(self, A): 72 | flip_flg = False 73 | 74 | # pad 75 | pad_height = self.uniform_size[0] - A.shape[0] 76 | pad_width = self.uniform_size[1] - A.shape[1] 77 | pad = [pad_height, 0, pad_width, 0] # [up, down, left, right] 78 | 79 | # reshape 80 | resize_ratio = 1.0 81 | resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5), 82 | int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)] 83 | 84 | # crop 85 | start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\ 86 | else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio) 87 | start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio) 88 | crop_height = cfg.CROP_SIZE[0] 89 | crop_width = cfg.CROP_SIZE[1] 90 | crop_size = [start_x, start_y, crop_width, crop_height] 91 | return flip_flg, resize_size, crop_size, pad, resize_ratio 92 | 93 | def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0): 94 | if len(img.shape) == 1: 95 | return img 96 | # Flip 97 | if flip: 98 | img = np.flip(img, axis=1) 99 | 100 | # Pad the raw image 101 | if len(img.shape) == 3: 102 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', 103 | constant_values=(pad_value, pad_value)) 104 | else: 105 | img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', 106 | constant_values=(pad_value, pad_value)) 107 | # Resize the raw image 108 | img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR) 109 | # Crop the resized image 110 | img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]] 111 | return img_crop 112 | 113 | def depth_to_class(self, depth): 114 | """ 115 | Transfer 1-channel depth to 1-channel depth in n depth ranges 116 | Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1 117 | :param depth: 1-channel depth, [1, h, w] 118 | :return: classes [1, h, w] 119 | """ 120 | invalid_mask = depth < 0. 121 | depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN 122 | depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX 123 | classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int) 124 | classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1 125 | classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1 126 | return classes 127 | 128 | def scale_torch(self, img, scale): 129 | # scale image 130 | img = img.astype(np.float32) 131 | img /= scale 132 | img = torch.from_numpy(img.copy()) 133 | if img.size(0) == 3: 134 | img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img) 135 | else: 136 | img = transforms.Normalize((0,), (1,))(img) 137 | return img 138 | 139 | 140 | def __len__(self): 141 | return self.data_size 142 | 143 | def name(self): 144 | return 'NYUDepthV2Dataset' 145 | -------------------------------------------------------------------------------- /data/load_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | import importlib 3 | from lib.utils.logging import setup_logging 4 | logger = setup_logging(__name__) 5 | 6 | class CustomerDataLoader(): 7 | def __init__(self, opt): 8 | self.opt = opt 9 | self.dataset = create_dataset(opt) 10 | self.dataloader = torch.utils.data.DataLoader( 11 | self.dataset, 12 | batch_size=opt.batchsize, 13 | shuffle= True if 'train' in opt.phase else False, 14 | num_workers=opt.thread) 15 | 16 | def load_data(self): 17 | return self 18 | 19 | def __len__(self): 20 | return len(self.dataset) 21 | 22 | def __iter__(self): 23 | for i, data in enumerate(self.dataloader): 24 | if i * self.opt.batchsize >= float("inf"): 25 | break 26 | yield data 27 | 28 | def create_dataset(opt): 29 | dataset = find_dataset_lib(opt.dataset)() 30 | dataset.initialize(opt) 31 | logger.info("%s is created." % opt.dataset) 32 | return dataset 33 | 34 | 35 | def find_dataset_lib(dataset_name): 36 | """ 37 | Give the option --dataset [datasetname], import "data/datasetname_dataset.py" 38 | :param dataset_name: --dataset 39 | :return: "data/datasetname_dataset.py" 40 | """ 41 | dataset_filename = "data." + dataset_name + "_dataset" 42 | datasetlib = importlib.import_module(dataset_filename) 43 | 44 | dataset = None 45 | target_dataset_name = dataset_name.replace('_', '') + 'dataset' 46 | for name, cls in datasetlib.__dict__.items(): 47 | if name.lower() == target_dataset_name.lower(): 48 | dataset = cls 49 | if dataset is None: 50 | logger.info("In %s.py, there should be a class name that matches %s in lowercase." % ( 51 | dataset_filename, target_dataset_name)) 52 | exit(0) 53 | return dataset -------------------------------------------------------------------------------- /datasets/KITTI_object/annotations/data_object_training_annotations.json: -------------------------------------------------------------------------------- 1 | [{"rgb_path": "training/image_2/000048.png", "depth_path": "training/image_2/000048.png"}] 2 | -------------------------------------------------------------------------------- /datasets/KITTI_object/training/depth: -------------------------------------------------------------------------------- 1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/Kitti/object/training/depth -------------------------------------------------------------------------------- /datasets/KITTI_object/training/image_2: -------------------------------------------------------------------------------- 1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/data_object_image_2/training/image_2 -------------------------------------------------------------------------------- /datasets/KITTI_object/training/label_2: -------------------------------------------------------------------------------- 1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/data_object_label_2/training/label_2 -------------------------------------------------------------------------------- /datasets/KITTI_object/training/planes: -------------------------------------------------------------------------------- 1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/planes -------------------------------------------------------------------------------- /experiments/foresee/depth_normal_model.py: -------------------------------------------------------------------------------- 1 | import lateral_net 2 | from lib.utils.net_tools import * 3 | from lib.models.image_transfer import * 4 | from loss import weight_crossentropy_loss, rois_weight_crossentropy_loss 5 | from lib.core.config import cfg 6 | 7 | 8 | 9 | class DepthNormal(nn.Module): 10 | def __init__(self): 11 | super(DepthNormal, self).__init__() 12 | self.loss_names = ['Weighted_Cross_Entropy', 'Global_Normal'] 13 | self.depth_normal_model = DepthModel() 14 | 15 | def forward(self, data): 16 | # Input data is a_real, predicted data is b_fake, groundtruth is b_real 17 | self.a_real = data['A'].cuda() 18 | self.boxes = data['bbox'].to(device=self.a_real.device) 19 | self.b_fake, self.b_roi_fake = self.depth_normal_model(self.a_real, self.boxes) 20 | return {'b_fake': self.b_fake[1], 'b_fake_nosoftmax': self.b_fake[0], 'b_fake_roi': self.b_roi_fake[1], 'b_fake_roi_nosoftmax': self.b_roi_fake[0]} 21 | 22 | def inference(self, data): 23 | with torch.no_grad(): 24 | out = self.forward(data) 25 | 26 | class_conf_final = fg_bg_maxpooling(out['b_fake_nosoftmax'], out['b_fake_roi_nosoftmax']) 27 | out_depth_final = class_depth(class_conf_final) 28 | 29 | class_conf = out['b_fake'] 30 | return {'b_fake': out_depth_final, 'b_fake_conf': class_conf} 31 | 32 | def inference_kitti(self, data): 33 | #crop kitti images into 3 parts 34 | with torch.no_grad(): 35 | self.a_l_real = data['A_l'].cuda() 36 | self.boxes_l = data['bbox_l'].to(device=self.a_l_real.device) 37 | [b_l_classes_nosoftmax, b_l_classes], [b_l_roi_classes_nosoftmax, b_l_roi_classes] = self.depth_normal_model(self.a_l_real, self.boxes_l) 38 | b_l_classes_final = fg_bg_maxpooling(b_l_classes_nosoftmax, b_l_roi_classes_nosoftmax) 39 | self.b_l_fake_final = class_depth(b_l_classes_final) 40 | 41 | self.a_m_real = data['A_m'].cuda() 42 | self.boxes_m = data['bbox_m'].to(device=self.a_m_real.device) 43 | [b_m_classes_nosoftmax, b_m_classes], [b_m_roi_classes_nosoftmax, b_m_roi_classes] = self.depth_normal_model(self.a_m_real, self.boxes_m) 44 | b_m_classes_final = fg_bg_maxpooling(b_m_classes_nosoftmax, b_m_roi_classes_nosoftmax) 45 | self.b_m_fake_final = class_depth(b_m_classes_final) 46 | 47 | self.a_r_real = data['A_r'].cuda() 48 | self.boxes_r = data['bbox_r'].to(device=self.a_r_real.device) 49 | [b_r_classes_nosoftmax, b_r_classes], [b_r_roi_classes_nosoftmax, b_r_roi_classes] = self.depth_normal_model(self.a_r_real, self.boxes_r) 50 | b_r_classes_final = fg_bg_maxpooling(b_r_classes_nosoftmax, b_r_roi_classes_nosoftmax) 51 | self.b_r_fake_final = class_depth(b_r_classes_final) 52 | 53 | out = kitti_merge_imgs(self.b_l_fake_final, self.b_m_fake_final, self.b_r_fake_final, torch.squeeze(data['B_raw']).shape, data['crop_lmr']) 54 | return {'b_fake': out} 55 | 56 | 57 | class ModelLoss(object): 58 | def __init__(self): 59 | super(ModelLoss, self).__init__() 60 | self.weight_cross_entropy_loss =weight_crossentropy_loss 61 | self.rois_weight_cross_entropy_loss =rois_weight_crossentropy_loss 62 | 63 | 64 | def criterion(self, pred_softmax, pred_nosoftmax, pred_softmax_roi, pred_nosoftmax_roi, data, epoch): 65 | loss = {} 66 | # transfer output and gt 67 | pred_depth = class_depth(pred_softmax) 68 | 69 | #alpha = 0.99 70 | add_alpha = 0.2 71 | add_beta = 0.2 72 | 73 | # bg 74 | loss_entropy, valid_num = self.weight_cross_entropy_loss(pred_nosoftmax, data['B_classes'], data) 75 | loss_entropy_rois, valid_num_roi = self.rois_weight_cross_entropy_loss(pred_nosoftmax, data['B_rois_classes'], data) 76 | 77 | loss['bg_wcel_loss_fg'] = loss_entropy_rois / valid_num_roi 78 | loss['bg_wcel_loss_bg'] = (loss_entropy - loss_entropy_rois) / (valid_num - valid_num_roi) 79 | loss['bg_wcel_loss'] = (1 - add_beta) * loss['bg_wcel_loss_bg'] + add_beta * loss['bg_wcel_loss_fg'] 80 | 81 | # fg 82 | fg_loss_entropy, fg_valid_num = self.weight_cross_entropy_loss(pred_nosoftmax_roi, data['B_classes'], data) 83 | fg_loss_entropy_rois, fg_valid_num_roi = self.rois_weight_cross_entropy_loss(pred_nosoftmax_roi, data['B_rois_classes'], data) 84 | 85 | loss['fg_wcel_loss_bg'] = (fg_loss_entropy - fg_loss_entropy_rois) / (fg_valid_num - fg_valid_num_roi) 86 | loss['fg_wcel_loss_fg'] = fg_loss_entropy_rois / fg_valid_num_roi 87 | loss['fg_wcel_loss'] = (1 - add_alpha) * loss['fg_wcel_loss_fg'] + add_alpha * loss['fg_wcel_loss_bg'] 88 | 89 | loss['total_loss'] = loss['bg_wcel_loss'] + loss['fg_wcel_loss'] 90 | return loss 91 | 92 | 93 | class ModelOptimizer(object): 94 | def __init__(self, model): 95 | super(ModelOptimizer, self).__init__() 96 | backbone_params = [] 97 | backbone_params_names = [] 98 | nonbackbone_others_params = [] 99 | nonbackbone_others_params_names = [] 100 | nograd_param_names = [] 101 | 102 | for key, value in dict(model.named_parameters()).items(): 103 | if value.requires_grad: 104 | if 'res' in key: 105 | backbone_params.append(value) 106 | backbone_params_names.append(key) 107 | else: 108 | nonbackbone_others_params.append(value) 109 | nonbackbone_others_params_names.append(key) 110 | else: 111 | nograd_param_names.append(key) 112 | 113 | lr_resnet = cfg.TRAIN.BASE_LR 114 | lr_fcn = cfg.TRAIN.BASE_LR * cfg.TRAIN.DIFF_LR 115 | weight_decay = 0.0005 116 | 117 | net_params = [ 118 | {'params': backbone_params, 119 | 'lr': lr_resnet, 120 | 'weight_decay': weight_decay}, 121 | {'params': nonbackbone_others_params, 122 | 'lr': lr_fcn, 123 | 'weight_decay': weight_decay}, 124 | ] 125 | self.optimizer = torch.optim.SGD(net_params, momentum=0.9) 126 | def optim(self, loss): 127 | self.optimizer.zero_grad() 128 | loss_all = loss['total_loss'] 129 | loss_all.backward() 130 | self.optimizer.step() 131 | 132 | 133 | class DepthModel(nn.Module): 134 | def __init__(self): 135 | super(DepthModel, self).__init__() 136 | bottom_up_model = 'lateral_net.lateral_' + cfg.MODEL.ENCODER 137 | self.lateral_modules = get_func(bottom_up_model)() 138 | self.topdown_modules = lateral_net.fcn_topdown(cfg.MODEL.ENCODER) 139 | 140 | def forward(self, x, boxlist): 141 | lateral_out, backbone_stage_size = self.lateral_modules(x) 142 | # out: [nosoftmax, softmax] 143 | out, out_roi = self.topdown_modules(lateral_out, backbone_stage_size, boxlist) 144 | return out, out_roi 145 | 146 | def cal_params(model): 147 | model_dict = model.state_dict() 148 | paras = np.sum(p.numel() for p in model.parameters() if p.requires_grad) 149 | sum = 0 150 | 151 | for key in model_dict.keys(): 152 | print(key) 153 | if 'layer5' not in key: 154 | if 'running' not in key: 155 | print(key) 156 | ss = model_dict[key].size() 157 | temp = 1 158 | for s in ss: 159 | temp = temp * s 160 | print(temp) 161 | sum = sum + temp 162 | print(sum) 163 | print(paras) 164 | -------------------------------------------------------------------------------- /experiments/foresee/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.core.config import cfg 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def cross_entropy_loss(pred_nosoftmax, gt_class): 8 | """ 9 | Standard cross-entropy loss 10 | :param pred_nosoftmax: predicted label 11 | :param gt_class: target label 12 | :return: 13 | """ 14 | gt_class = torch.squeeze(gt_class) 15 | gt_class = gt_class.to(device=pred_nosoftmax.device, dtype=torch.int64) 16 | entropy = torch.nn.CrossEntropyLoss(ignore_index=cfg.MODEL.DECODER_OUTPUT_C+1) 17 | loss = entropy(pred_nosoftmax, gt_class) 18 | return loss 19 | 20 | 21 | def weight_crossentropy_loss(pred_nosoftmax, gt, data): 22 | """ 23 | Weighted Cross-entropy Loss 24 | :param pred_nosoftmax: predicted label 25 | :param gt: target label 26 | """ 27 | invalid_side = data['invalid_side'] 28 | cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device) 29 | weight = cfg.DATA.WCE_LOSS_WEIGHT 30 | weight /= torch.sum(weight, 1, keepdim=True) 31 | classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype) 32 | log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1) 33 | log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1)) 34 | 35 | gt_reshape = gt.reshape(-1, 1) 36 | one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device) 37 | weight = torch.matmul(one_hot, weight) 38 | weight_log_pred = weight * log_pred 39 | 40 | valid_pixes = torch.tensor([0], device=pred_nosoftmax.device, dtype=torch.float) 41 | for i in range(gt.size(0)): 42 | valid_gt = gt[i, :, int(invalid_side[i][0]):gt.size(2)-int(invalid_side[i][1]), :] 43 | valid_pixes += valid_gt.size(1) * valid_gt.size(2) 44 | loss_sum = -1 * torch.sum(weight_log_pred) 45 | return loss_sum, valid_pixes 46 | 47 | def rois_weight_crossentropy_loss(pred_nosoftmax, gt, data): 48 | """ 49 | Weighted Cross-entropy Loss 50 | :param pred_nosoftmax: predicted label 51 | :param gt: target label 52 | """ 53 | invalid_side = data['invalid_side'] 54 | rois_mask = data['rois_mask'] 55 | cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device) 56 | weight = cfg.DATA.WCE_LOSS_WEIGHT 57 | weight /= torch.sum(weight, 1, keepdim=True) 58 | classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype) 59 | log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1) 60 | log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1)) 61 | 62 | gt_reshape = gt.reshape(-1, 1) 63 | one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device) 64 | weight = torch.matmul(one_hot, weight) 65 | weight_log_pred = weight * log_pred 66 | 67 | valid_pixels = max(rois_mask.sum(), 1) 68 | loss_sum = -1 * torch.sum(weight_log_pred) 69 | return loss_sum, valid_pixels 70 | 71 | def rois_scale_invariant_loss(pred_depth, data): 72 | """ 73 | Follow Eigen paper, add silog loss, for KITTI benchmark 74 | :param pred_depth: 75 | :param data: 76 | :return: 77 | """ 78 | invalid_side = data['invalid_side'] 79 | gt_depth = data['B'].cuda() 80 | 81 | rois_mask = data['rois_mask'].to(device=gt_depth.device) 82 | 83 | loss_mean = torch.tensor([0.]).cuda() 84 | for j in range(pred_depth.size(0)): 85 | valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :] 86 | valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :] 87 | valid_rois_mask = rois_mask[j, :, int(invalid_side[j][0]): rois_mask.size(2) - int(invalid_side[j][1]), :] 88 | 89 | diff_log = torch.log(valid_pred) - torch.log(valid_gt) 90 | diff_log = diff_log * valid_rois_mask.to(dtype=diff_log.dtype) 91 | 92 | #size = torch.numel(diff_log) 93 | size = torch.sum(valid_rois_mask) 94 | if size == 0: 95 | continue 96 | 97 | loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2) 98 | loss = loss_mean / pred_depth.size(0) 99 | return loss 100 | 101 | 102 | def scale_invariant_loss(pred_depth, data): 103 | """ 104 | Follow Eigen paper, add silog loss, for KITTI benchmark 105 | :param pred_depth: 106 | :param data: 107 | :return: 108 | """ 109 | invalid_side = data['invalid_side'] 110 | gt_depth = data['B'].cuda() 111 | 112 | 113 | loss_mean = torch.tensor([0.]).cuda() 114 | for j in range(pred_depth.size(0)): 115 | valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :] 116 | valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :] 117 | 118 | diff_log = torch.log(valid_pred) - torch.log(valid_gt) 119 | 120 | size = torch.numel(diff_log) 121 | #size = torch.sum(valid_rois_mask) 122 | #if size == 0: 123 | # continue 124 | 125 | loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2) 126 | loss = loss_mean / pred_depth.size(0) 127 | return loss 128 | 129 | 130 | def berhu_loss(pred_depth, data, scale=80.): 131 | """ 132 | :param pred_depth: 133 | :param data: 134 | :return: 135 | """ 136 | huber_threshold = 0.2 137 | 138 | invalid_side = data['invalid_side'] 139 | gt_depth = data['B'].cuda() 140 | 141 | mask = gt_depth > 0 142 | 143 | pred_depth = pred_depth * mask.to(dtype=pred_depth.dtype) 144 | gt_depth = gt_depth * mask.to(dtype=gt_depth.dtype) 145 | 146 | diff = torch.abs(gt_depth - pred_depth) 147 | delta = huber_threshold * torch.max(diff).data.cpu() 148 | 149 | part1 = -F.threshold(-diff, -delta, 0.) 150 | part2 = F.threshold(diff**2 + delta**2, 2*delta**2, 0.) 151 | part2 = part2 / (2.*delta) 152 | 153 | loss = part1 + part2 154 | 155 | loss = loss[mask] 156 | loss = torch.mean(loss) 157 | 158 | return loss 159 | 160 | 161 | def rmse_log_loss(pred_depth, data, scale=80.): 162 | """ 163 | :param pred_depth: 164 | :param data: 165 | :return: 166 | """ 167 | 168 | gt_depth = data['B'].cuda() 169 | mask = gt_depth > 0 170 | 171 | pred_depth = pred_depth * scale 172 | gt_depth = gt_depth * scale 173 | 174 | diff = torch.log(gt_depth) - torch.log(pred_depth) 175 | diff = diff[mask] 176 | 177 | loss = torch.sqrt(torch.mean(diff**2)) 178 | return loss 179 | 180 | 181 | def rmse_loss(pred_depth, data, scale=80.): 182 | """ 183 | :param pred_depth: 184 | :param data: 185 | :return: 186 | """ 187 | 188 | gt_depth = data['B'].cuda() 189 | mask = gt_depth > 0 190 | 191 | pred_depth = pred_depth 192 | gt_depth = gt_depth 193 | 194 | diff = gt_depth - pred_depth 195 | diff = diff[mask] 196 | 197 | loss = torch.sqrt(torch.mean(diff**2)) 198 | return loss 199 | 200 | def mse_loss(pred_depth, data, scale=80.): 201 | """ 202 | :param pred_depth: 203 | :param data: 204 | :return: 205 | """ 206 | 207 | gt_depth = data['B'].cuda() 208 | mask = gt_depth > 0 209 | 210 | pred_depth = pred_depth 211 | gt_depth = gt_depth 212 | 213 | diff = gt_depth - pred_depth 214 | diff = diff[mask] 215 | 216 | loss = torch.mean(diff**2) 217 | return loss 218 | 219 | 220 | def rois_rmse_log_loss(pred_depth, data, scale=80.): 221 | """ 222 | :param pred_depth: 223 | :param data: 224 | :return: 225 | """ 226 | 227 | gt_depth = data['B'].cuda() 228 | 229 | mask = gt_depth > 0 230 | rois_mask = data['rois_mask'].to(device=gt_depth.device) 231 | mask = mask & rois_mask 232 | 233 | pred_depth = pred_depth 234 | gt_depth = gt_depth 235 | 236 | diff = torch.log(gt_depth) - torch.log(pred_depth) 237 | diff = diff[mask] 238 | 239 | loss = torch.sqrt(torch.mean(diff**2)) 240 | return loss 241 | -------------------------------------------------------------------------------- /experiments/foresee/pc/bin2obj.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | 5 | def bin2obj(path, output_path): 6 | 7 | pts = np.fromfile(open(path, 'rb'), np.single).reshape([-1, 4]) 8 | 9 | f = open(output_path, 'w') 10 | 11 | for i in range(pts.shape[0]): 12 | f.write('v %f %f %f %f %f %f\n' % (pts[i][0], pts[i][1], pts[i][2], pts[i][3], pts[i][3], pts[i][3])) 13 | 14 | f.close 15 | 16 | 17 | if __name__ == "__main__": 18 | src_path = "pseudo-lidar/foresee/training/000039.bin" 19 | output_path = './' + os.path.basename(src_path).split('.')[0] + '.obj' 20 | bin2obj(src_path, output_path) 21 | -------------------------------------------------------------------------------- /experiments/foresee/pc/gen.sh: -------------------------------------------------------------------------------- 1 | python kitti_prediction.py \ 2 | --dataroot ../../../datasets/KITTI_object \ 3 | --dataset kitti_prediction \ 4 | --load_ckpt ../epoch19_step18000.pth \ 5 | --pcd_dir pseudo-lidar/foresee/training \ 6 | --encoder ResNeXt101_32x4d_body_stride16 \ 7 | --decoder_out_c 100 \ 8 | --phase inference 9 | 10 | -------------------------------------------------------------------------------- /experiments/foresee/pc/gen_colorps.sh: -------------------------------------------------------------------------------- 1 | python kitti_prediction_colorps.py \ 2 | --dataroot ../../../datasets/KITTI_object \ 3 | --dataset kitti_prediction \ 4 | --load_ckpt ../epoch19_step18000.pth \ 5 | --pcd_dir pseudo-lidar/ForSeE.colorps/training \ 6 | --encoder ResNeXt101_32x4d_body_stride16 \ 7 | --decoder_out_c 100 \ 8 | --phase val 9 | 10 | -------------------------------------------------------------------------------- /experiments/foresee/pc/kitti_prediction.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import torch 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(BASE_DIR))) 8 | sys.path.append(BASE_DIR) 9 | sys.path.append(ROOT_DIR) 10 | sys.path.append(os.path.dirname(BASE_DIR)) 11 | 12 | from tools.parse_arg_test import TestOptions 13 | from data.load_dataset import CustomerDataLoader 14 | from lib.utils.net_tools import load_ckpt 15 | from lib.utils.logging import setup_logging, SmoothedValue 16 | 17 | from depth_normal_model_maxp_bfsoftmax import DepthNormal 18 | 19 | logger = setup_logging(__name__) 20 | 21 | from IPython import embed 22 | 23 | # Add by users 24 | #pcd_folder = os.path.join(BASE_DIR, 'output') 25 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib') 26 | 27 | def main(): 28 | test_args = TestOptions().parse() 29 | test_args.thread = 1 # test code only supports thread = 1 30 | test_args.batchsize = 1 # test code only supports batchSize = 1 31 | 32 | pcd_folder = test_args.pcd_dir 33 | if not os.path.exists(pcd_folder): 34 | os.makedirs(pcd_folder) 35 | 36 | data_loader = CustomerDataLoader(test_args) 37 | test_datasize = len(data_loader) 38 | logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize)) 39 | # load model 40 | model = DepthNormal() 41 | # evaluate mode 42 | model.eval() 43 | 44 | # load checkpoint 45 | if test_args.load_ckpt: 46 | load_ckpt(test_args, model) 47 | model.cuda() 48 | model = torch.nn.DataParallel(model) 49 | 50 | for i, data in enumerate(data_loader): 51 | out = model.module.inference(data) 52 | pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w] 53 | pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w] 54 | 55 | # the image size has been padded to the size (385, 1243) 56 | pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:] 57 | pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:] 58 | 59 | #sample_th = 0.15 60 | #sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w] 61 | 62 | ####################################################################################### 63 | # add by users 64 | img_name = data['A_paths'][0].split('/')[-1][:-4] 65 | calib_name = img_name + '.txt' 66 | calib_dir = os.path.join(calib_fold, calib_name) 67 | camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None) 68 | P3_0 = camera_para[3] 69 | P2_0 = camera_para[2] 70 | P3_2 = P3_0 71 | P3_2[4] -= P2_0[4] 72 | R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2) 73 | Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1) 74 | 75 | pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6]) 76 | # Transfer points in cam2 coordinate to cam0 coordinate 77 | pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]]) 78 | 79 | # Transfer points in cam0 coordinate to velo coordinate 80 | pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0) 81 | 82 | rgb = data['A_raw'][0].cpu().numpy() 83 | 84 | save_bin(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.bin', sample_mask=None) 85 | #save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.ply', sample_mask=None) 86 | #save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply') 87 | print('saved', img_name) 88 | ####################################################################################### 89 | 90 | 91 | ########################################################################## 92 | # others 93 | def get_sample_mask(conf, threshold): 94 | max_conf = np.amax(conf, axis=0) # [h, w] 95 | print(max_conf.shape) 96 | return max_conf >= threshold 97 | 98 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0): 99 | pcd_cam0_3n = pcd_cam0.reshape((3, -1)) 100 | R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3)) 101 | R_rect0_inv = np.linalg.inv(R_rect0) 102 | 103 | # X_cam0_raw = (R_rect0)^-1 * X_cam0 104 | pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n) 105 | 106 | T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4)) 107 | R_velo_cam0 = T_velo_cam0[:, 0:3] 108 | T_velo_cam0 = T_velo_cam0[:, 3] 109 | R_cam0_velo = np.linalg.inv(R_velo_cam0) 110 | T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0) 111 | 112 | # X_velo = R*X_cam0 + T 113 | T_cam0_velo = T_cam0_velo[:, np.newaxis] 114 | pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo 115 | pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2]) 116 | return pcd_velo 117 | 118 | 119 | def reconstruct_3D(depth, cu, cv, fx, fy): 120 | width = depth.shape[1] 121 | height = depth.shape[0] 122 | row = np.arange(0, width, 1) 123 | u = np.array([row for _ in np.arange(height)]) 124 | col = np.arange(0, height, 1) 125 | v = np.array([col for _ in np.arange(width)]) 126 | v = v.transpose(1, 0) 127 | 128 | x = (u - cu) * depth / fx 129 | y = (v - cv) * depth / fy 130 | z = depth 131 | 132 | x = x[np.newaxis, :, :] 133 | y = y[np.newaxis, :, :] 134 | z = z[np.newaxis, :, :] 135 | return np.concatenate([x, y, z], axis=0) 136 | 137 | def save_ply(pcd, rgb, path, sample_mask=None): 138 | width = rgb.shape[1] 139 | height = rgb.shape[0] 140 | x = np.reshape(pcd[0], width * height) 141 | y = np.reshape(pcd[1], width * height) 142 | z = np.reshape(pcd[2], width * height) 143 | 144 | rgb = np.reshape(rgb, (width * height, 3)) 145 | 146 | if sample_mask is not None: 147 | sample_mask = np.reshape(sample_mask, width * height) 148 | x = x[sample_mask] 149 | y = y[sample_mask] 150 | z = z[sample_mask] 151 | 152 | rgb = rgb[sample_mask, :] 153 | 154 | r = rgb[:, 2] 155 | g = rgb[:, 1] 156 | b = rgb[:, 0] 157 | r = np.squeeze(r) 158 | g = np.squeeze(g) 159 | b = np.squeeze(b) 160 | 161 | ply_head = 'ply\n' \ 162 | 'format ascii 1.0\n' \ 163 | 'element vertex %d\n' \ 164 | 'property float x\n' \ 165 | 'property float y\n' \ 166 | 'property float z\n' \ 167 | 'property uchar red\n' \ 168 | 'property uchar green\n' \ 169 | 'property uchar blue\n' \ 170 | 'end_header' % r.shape[0] 171 | # ---- Save ply data to disk 172 | np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='') 173 | ########################################################################## 174 | 175 | 176 | def save_bin(pcd, rgb, path, sample_mask=None): 177 | width = rgb.shape[1] 178 | height = rgb.shape[0] 179 | xyz = np.reshape(pcd, [3, width * height]) 180 | xyz = np.swapaxes(xyz, 0, 1) 181 | 182 | #rgb = np.reshape(rgb, (width * height, 3)) 183 | 184 | if sample_mask is not None: 185 | sample_mask = np.reshape(sample_mask, width * height) 186 | xyz = xyz[sample_mask, :] 187 | #rgb = rgb[sample_mask, :] 188 | 189 | # remove points higher than 1m. 190 | height_filter = xyz[:, 2] <= 1. 191 | xyz = xyz[height_filter, :] 192 | 193 | # add reflectance 194 | r = np.ones([xyz.shape[0], 1]) 195 | xyzr = np.hstack([xyz, r]).astype(np.single) 196 | 197 | f = open(path, 'wb') 198 | f.write(xyzr) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | -------------------------------------------------------------------------------- /experiments/foresee/pc/kitti_prediction_colorps.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import torch 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(BASE_DIR))) 8 | sys.path.append(BASE_DIR) 9 | sys.path.append(ROOT_DIR) 10 | sys.path.append(os.path.dirname(BASE_DIR)) 11 | 12 | from tools.parse_arg_test import TestOptions 13 | from data.load_dataset import CustomerDataLoader 14 | from lib.utils.net_tools import load_ckpt 15 | from lib.utils.logging import setup_logging, SmoothedValue 16 | 17 | from depth_normal_model_maxp_bfsoftmax import DepthNormal 18 | 19 | logger = setup_logging(__name__) 20 | 21 | from IPython import embed 22 | 23 | # Add by users 24 | #pcd_folder = os.path.join(BASE_DIR, 'output') 25 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib') 26 | 27 | def main(): 28 | test_args = TestOptions().parse() 29 | test_args.thread = 1 # test code only supports thread = 1 30 | test_args.batchsize = 1 # test code only supports batchSize = 1 31 | 32 | pcd_folder = test_args.pcd_dir 33 | if not os.path.exists(pcd_folder): 34 | os.makedirs(pcd_folder) 35 | 36 | data_loader = CustomerDataLoader(test_args) 37 | test_datasize = len(data_loader) 38 | logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize)) 39 | # load model 40 | model = DepthNormal() 41 | # evaluate mode 42 | model.eval() 43 | 44 | # load checkpoint 45 | if test_args.load_ckpt: 46 | load_ckpt(test_args, model) 47 | model.cuda() 48 | model = torch.nn.DataParallel(model) 49 | 50 | for i, data in enumerate(data_loader): 51 | out = model.module.inference(data) 52 | pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w] 53 | pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w] 54 | 55 | # the image size has been padded to the size (385, 1243) 56 | pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:] 57 | pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:] 58 | 59 | #sample_th = 0.15 60 | #sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w] 61 | 62 | ####################################################################################### 63 | # add by users 64 | img_name = data['A_paths'][0].split('/')[-1][:-4] 65 | calib_name = img_name + '.txt' 66 | calib_dir = os.path.join(calib_fold, calib_name) 67 | camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None) 68 | P3_0 = camera_para[3] 69 | P2_0 = camera_para[2] 70 | P3_2 = P3_0 71 | P3_2[4] -= P2_0[4] 72 | R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2) 73 | Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1) 74 | 75 | pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6]) 76 | # Transfer points in cam2 coordinate to cam0 coordinate 77 | pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]]) 78 | 79 | # Transfer points in cam0 coordinate to velo coordinate 80 | pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0) 81 | 82 | rgb = data['A_raw'][0].cpu().numpy() 83 | 84 | #save_bin(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.bin', sample_mask=None) 85 | #save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.ply', sample_mask=None) 86 | save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply') 87 | print('saved', img_name) 88 | ####################################################################################### 89 | 90 | 91 | ########################################################################## 92 | # others 93 | def get_sample_mask(conf, threshold): 94 | max_conf = np.amax(conf, axis=0) # [h, w] 95 | print(max_conf.shape) 96 | return max_conf >= threshold 97 | 98 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0): 99 | pcd_cam0_3n = pcd_cam0.reshape((3, -1)) 100 | R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3)) 101 | R_rect0_inv = np.linalg.inv(R_rect0) 102 | 103 | # X_cam0_raw = (R_rect0)^-1 * X_cam0 104 | pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n) 105 | 106 | T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4)) 107 | R_velo_cam0 = T_velo_cam0[:, 0:3] 108 | T_velo_cam0 = T_velo_cam0[:, 3] 109 | R_cam0_velo = np.linalg.inv(R_velo_cam0) 110 | T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0) 111 | 112 | # X_velo = R*X_cam0 + T 113 | T_cam0_velo = T_cam0_velo[:, np.newaxis] 114 | pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo 115 | pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2]) 116 | return pcd_velo 117 | 118 | 119 | def reconstruct_3D(depth, cu, cv, fx, fy): 120 | width = depth.shape[1] 121 | height = depth.shape[0] 122 | row = np.arange(0, width, 1) 123 | u = np.array([row for _ in np.arange(height)]) 124 | col = np.arange(0, height, 1) 125 | v = np.array([col for _ in np.arange(width)]) 126 | v = v.transpose(1, 0) 127 | 128 | x = (u - cu) * depth / fx 129 | y = (v - cv) * depth / fy 130 | z = depth 131 | 132 | x = x[np.newaxis, :, :] 133 | y = y[np.newaxis, :, :] 134 | z = z[np.newaxis, :, :] 135 | return np.concatenate([x, y, z], axis=0) 136 | 137 | def save_ply(pcd, rgb, path, sample_mask=None): 138 | width = rgb.shape[1] 139 | height = rgb.shape[0] 140 | x = np.reshape(pcd[0], width * height) 141 | y = np.reshape(pcd[1], width * height) 142 | z = np.reshape(pcd[2], width * height) 143 | 144 | rgb = np.reshape(rgb, (width * height, 3)) 145 | 146 | if sample_mask is not None: 147 | sample_mask = np.reshape(sample_mask, width * height) 148 | x = x[sample_mask] 149 | y = y[sample_mask] 150 | z = z[sample_mask] 151 | 152 | rgb = rgb[sample_mask, :] 153 | 154 | r = rgb[:, 2] 155 | g = rgb[:, 1] 156 | b = rgb[:, 0] 157 | r = np.squeeze(r) 158 | g = np.squeeze(g) 159 | b = np.squeeze(b) 160 | 161 | ply_head = 'ply\n' \ 162 | 'format ascii 1.0\n' \ 163 | 'element vertex %d\n' \ 164 | 'property float x\n' \ 165 | 'property float y\n' \ 166 | 'property float z\n' \ 167 | 'property uchar red\n' \ 168 | 'property uchar green\n' \ 169 | 'property uchar blue\n' \ 170 | 'end_header' % r.shape[0] 171 | # ---- Save ply data to disk 172 | np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='') 173 | ########################################################################## 174 | 175 | 176 | def save_bin(pcd, rgb, path, sample_mask=None): 177 | width = rgb.shape[1] 178 | height = rgb.shape[0] 179 | xyz = np.reshape(pcd, [3, width * height]) 180 | xyz = np.swapaxes(xyz, 0, 1) 181 | 182 | #rgb = np.reshape(rgb, (width * height, 3)) 183 | 184 | if sample_mask is not None: 185 | sample_mask = np.reshape(sample_mask, width * height) 186 | xyz = xyz[sample_mask, :] 187 | #rgb = rgb[sample_mask, :] 188 | 189 | # remove points higher than 1m. 190 | height_filter = xyz[:, 2] <= 1. 191 | xyz = xyz[height_filter, :] 192 | 193 | # add reflectance 194 | r = np.ones([xyz.shape[0], 1]) 195 | xyzr = np.hstack([xyz, r]).astype(np.single) 196 | 197 | f = open(path, 'wb') 198 | f.write(xyzr) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | -------------------------------------------------------------------------------- /experiments/foresee/test.sh: -------------------------------------------------------------------------------- 1 | python val_kitti.py \ 2 | --dataroot ../../datasets/KITTI_object \ 3 | --dataset kitti_object_roi \ 4 | --load_ckpt epoch19_step18000.pth \ 5 | --encoder ResNeXt101_32x4d_body_stride16 \ 6 | --decoder_out_c 100 7 | -------------------------------------------------------------------------------- /experiments/foresee/train.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python train_kitti.py \ 2 | --dataroot ../../datasets/KITTI_object \ 3 | --dataset kitti_object_roi \ 4 | --encoder ResNeXt101_32x4d_body_stride16 \ 5 | --decoder_out_c 100 \ 6 | --lr 0.001 \ 7 | --batchsize 4 \ 8 | --epoch 0 20 \ 9 | --use_tfboard 10 | 11 | -------------------------------------------------------------------------------- /experiments/foresee/train_kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import traceback 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) 8 | sys.path.append(BASE_DIR) 9 | sys.path.append(ROOT_DIR) 10 | 11 | from data.load_dataset import CustomerDataLoader 12 | from lib.utils.training_stats import TrainingStats 13 | from lib.utils.evaluate_depth_error import validate_err_kitti 14 | from lib.core.config import cfg, train_args, val_args, merge_cfg_from_file 15 | from lib.utils.net_tools import save_ckpt, load_ckpt 16 | from lib.utils.logging import setup_logging, SmoothedValue 17 | 18 | from depth_normal_model import * 19 | logger = setup_logging(__name__) 20 | 21 | 22 | def train(train_dataloader, model, epoch, loss_func, 23 | optimizer, scheduler, training_stats, val_dataloader=None, val_err=[], ignore_step=-1): 24 | model.train() 25 | epoch_steps = math.ceil(len(train_dataloader) / cfg.TRAIN.BATCH_SIZE) 26 | base_steps = epoch_steps * epoch + ignore_step if ignore_step != -1 else epoch_steps * epoch 27 | for i, data in enumerate(train_dataloader): 28 | if ignore_step != -1 and i > epoch_steps - ignore_step: 29 | return 30 | scheduler.step() # decay lr every iteration 31 | training_stats.IterTic() 32 | out = model(data) 33 | losses = loss_func.criterion(out['b_fake'], out['b_fake_nosoftmax'], out['b_fake_roi'], out['b_fake_roi_nosoftmax'], data, epoch) 34 | optimizer.optim(losses) 35 | 36 | step = base_steps + i + 1 37 | training_stats.UpdateIterStats(losses) 38 | training_stats.IterToc() 39 | training_stats.LogIterStats(step, epoch, optimizer.optimizer, val_err[0]) 40 | 41 | # validate the model 42 | if step % cfg.TRAIN.VAL_STEP == 0 and step != 0 and val_dataloader is not None:# 43 | model.eval() 44 | val_err[0] = val_kitti(val_dataloader, model) 45 | # training mode 46 | model.train() 47 | # save checkpoint 48 | if step % cfg.TRAIN.SNAPSHOT_ITERS == 0 and step != 0: 49 | save_ckpt(train_args, step, epoch, model, optimizer.optimizer, scheduler, val_err[0]) 50 | 51 | 52 | def val_kitti(val_dataloader, model): 53 | """ 54 | Validate the model. 55 | """ 56 | smoothed_absRel = SmoothedValue(len(val_dataloader)) 57 | smoothed_silog = SmoothedValue(len(val_dataloader)) 58 | smoothed_silog2 = SmoothedValue(len(val_dataloader)) 59 | smoothed_criteria = {'err_absRel': smoothed_absRel, 'err_silog': smoothed_silog, 'err_silog2': smoothed_silog2} 60 | # rois-level 61 | rois_smoothed_absRel = SmoothedValue(len(val_dataloader)) 62 | rois_smoothed_silog = SmoothedValue(len(val_dataloader)) 63 | rois_smoothed_silog2 = SmoothedValue(len(val_dataloader)) 64 | rois_smoothed_criteria = {'err_absRel': rois_smoothed_absRel, 'err_silog': rois_smoothed_silog, 'err_silog2': rois_smoothed_silog2} 65 | 66 | # bg-level 67 | bg_smoothed_absRel = SmoothedValue(len(val_dataloader)) 68 | bg_smoothed_silog = SmoothedValue(len(val_dataloader)) 69 | bg_smoothed_silog2 = SmoothedValue(len(val_dataloader)) 70 | bg_smoothed_criteria = {'err_absRel': bg_smoothed_absRel, 'err_silog': bg_smoothed_silog, 'err_silog2': bg_smoothed_silog2} 71 | for i, data in enumerate(val_dataloader): 72 | pred_depth = model.module.inference_kitti(data) 73 | smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw'], smoothed_criteria) 74 | rois_smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw_rois'], rois_smoothed_criteria) 75 | bg_smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw_bg'], bg_smoothed_criteria) 76 | #print(np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)) 77 | val_metrics = {'abs_rel': smoothed_criteria['err_absRel'].GetGlobalAverageValue(), 78 | 'silog': np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)} 79 | rois_val_metrics = {'abs_rel': rois_smoothed_criteria['err_absRel'].GetGlobalAverageValue(), 80 | 'silog': np.sqrt(rois_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (rois_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)} 81 | bg_val_metrics = {'abs_rel': bg_smoothed_criteria['err_absRel'].GetGlobalAverageValue(), 82 | 'silog': np.sqrt(bg_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (bg_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)} 83 | print("global: ", val_metrics) 84 | print("roi: ", rois_val_metrics) 85 | print("bg: ", bg_val_metrics) 86 | return val_metrics 87 | 88 | if __name__=='__main__': 89 | train_dataloader = CustomerDataLoader(train_args) 90 | train_datasize = len(train_dataloader) 91 | gpu_num = torch.cuda.device_count() 92 | merge_cfg_from_file(train_datasize, gpu_num) 93 | 94 | val_dataloader = CustomerDataLoader(val_args) 95 | val_datasize = len(val_dataloader) 96 | 97 | # tensorboard logger 98 | if train_args.use_tfboard: 99 | from tensorboardX import SummaryWriter 100 | tblogger = SummaryWriter(cfg.TRAIN.LOG_DIR) 101 | 102 | # training status for logging 103 | training_stats = TrainingStats(train_args, cfg.TRAIN.LOG_INTERVAL, 104 | tblogger if train_args.use_tfboard else None) 105 | 106 | # total iterations 107 | total_iters = math.ceil(train_datasize / train_args.batchsize) * train_args.epoch[-1] 108 | 109 | # load model 110 | model = DepthNormal() 111 | 112 | if gpu_num != -1: 113 | logger.info('{:>15}: {:<30}'.format('GPU_num', gpu_num)) 114 | logger.info('{:>15}: {:<30}'.format('train_data_size', train_datasize)) 115 | logger.info('{:>15}: {:<30}'.format('val_data_size', val_datasize)) 116 | logger.info('{:>15}: {:<30}'.format('total_iterations', total_iters)) 117 | model.cuda() 118 | #optimizer 119 | optimizer = ModelOptimizer(model) 120 | #loss function 121 | loss_func = ModelLoss() 122 | 123 | val_err = [{'abs_rel': 0, 'silog': 0}] 124 | 125 | ignore_step = -1 126 | 127 | # Lerning strategy 128 | lr_optim_lambda = lambda iter: (1.0 - iter / (float(total_iters))) ** 0.9 129 | scheduler = torch.optim.lr_scheduler.LambdaLR( 130 | optimizer.optimizer, lr_lambda=lr_optim_lambda) 131 | 132 | #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer.optimizer, milestones=train_args.epoch, gamma=0.1) 133 | 134 | # load checkpoint 135 | if train_args.load_ckpt: 136 | load_ckpt(train_args, model, optimizer.optimizer, scheduler, val_err) 137 | ignore_step = train_args.start_step - train_args.start_epoch * math.ceil(train_datasize / train_args.batchsize) 138 | 139 | if gpu_num != -1: 140 | model = torch.nn.DataParallel(model) 141 | try: 142 | for epoch in range(train_args.start_epoch, cfg.TRAIN.EPOCH[-1]): 143 | # training 144 | train(train_dataloader, model, epoch, loss_func, optimizer, scheduler, training_stats, 145 | val_dataloader, val_err, ignore_step) 146 | ignore_step = -1 147 | 148 | except (RuntimeError, KeyboardInterrupt): 149 | logger.info('Save ckpt on exception ...') 150 | stack_trace = traceback.format_exc() 151 | print(stack_trace) 152 | 153 | finally: 154 | if train_args.use_tfboard: 155 | tblogger.close() 156 | -------------------------------------------------------------------------------- /experiments/foresee/val_kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import torch 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) 8 | sys.path.append(BASE_DIR) 9 | sys.path.append(ROOT_DIR) 10 | 11 | from tools.parse_arg_val import ValOptions 12 | from data.load_dataset import CustomerDataLoader 13 | from lib.utils.net_tools import load_ckpt 14 | from lib.utils.evaluate_depth_error import evaluate_err 15 | from lib.utils.net_tools import save_images 16 | from lib.utils.logging import setup_logging, SmoothedValue 17 | 18 | from depth_normal_model import DepthNormal 19 | logger = setup_logging(__name__) 20 | 21 | 22 | if __name__ == '__main__': 23 | test_args = ValOptions().parse() 24 | test_args.thread = 1 # test code only supports thread = 1 25 | test_args.batchsize = 1 # test code only supports batchSize = 1 26 | 27 | data_loader = CustomerDataLoader(test_args) 28 | test_datasize = len(data_loader) 29 | logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize)) 30 | # load model 31 | model = DepthNormal() 32 | # evaluate mode 33 | model.eval() 34 | 35 | # load checkpoint 36 | if test_args.load_ckpt: 37 | load_ckpt(test_args, model) 38 | model.cuda() 39 | model = torch.nn.DataParallel(model) 40 | 41 | # global 42 | smoothed_absRel = SmoothedValue(test_datasize) 43 | smoothed_rms = SmoothedValue(test_datasize) 44 | smoothed_logRms = SmoothedValue(test_datasize) 45 | smoothed_squaRel = SmoothedValue(test_datasize) 46 | smoothed_silog = SmoothedValue(test_datasize) 47 | smoothed_silog2 = SmoothedValue(test_datasize) 48 | smoothed_log10 = SmoothedValue(test_datasize) 49 | smoothed_delta1 = SmoothedValue(test_datasize) 50 | smoothed_delta2 = SmoothedValue(test_datasize) 51 | smoothed_delta3 = SmoothedValue(test_datasize) 52 | smoothed_criteria = {'err_absRel':smoothed_absRel, 'err_squaRel': smoothed_squaRel, 'err_rms': smoothed_rms, 53 | 'err_silog': smoothed_silog, 'err_logRms': smoothed_logRms, 'err_silog2': smoothed_silog2, 54 | 'err_delta1': smoothed_delta1, 'err_delta2': smoothed_delta2, 'err_delta3': smoothed_delta3, 55 | 'err_log10': smoothed_log10} 56 | 57 | # rois 58 | rois_smoothed_absRel = SmoothedValue(test_datasize) 59 | rois_smoothed_rms = SmoothedValue(test_datasize) 60 | rois_smoothed_logRms = SmoothedValue(test_datasize) 61 | rois_smoothed_squaRel = SmoothedValue(test_datasize) 62 | rois_smoothed_silog = SmoothedValue(test_datasize) 63 | rois_smoothed_silog2 = SmoothedValue(test_datasize) 64 | rois_smoothed_log10 = SmoothedValue(test_datasize) 65 | rois_smoothed_delta1 = SmoothedValue(test_datasize) 66 | rois_smoothed_delta2 = SmoothedValue(test_datasize) 67 | rois_smoothed_delta3 = SmoothedValue(test_datasize) 68 | rois_smoothed_criteria = {'err_absRel':rois_smoothed_absRel, 'err_squaRel': rois_smoothed_squaRel, 'err_rms': rois_smoothed_rms, 69 | 'err_silog': rois_smoothed_silog, 'err_logRms': rois_smoothed_logRms, 'err_silog2': rois_smoothed_silog2, 70 | 'err_delta1': rois_smoothed_delta1, 'err_delta2': rois_smoothed_delta2, 'err_delta3': rois_smoothed_delta3, 71 | 'err_log10': rois_smoothed_log10} 72 | 73 | # bg 74 | bg_smoothed_absRel = SmoothedValue(test_datasize) 75 | bg_smoothed_rms = SmoothedValue(test_datasize) 76 | bg_smoothed_logRms = SmoothedValue(test_datasize) 77 | bg_smoothed_squaRel = SmoothedValue(test_datasize) 78 | bg_smoothed_silog = SmoothedValue(test_datasize) 79 | bg_smoothed_silog2 = SmoothedValue(test_datasize) 80 | bg_smoothed_log10 = SmoothedValue(test_datasize) 81 | bg_smoothed_delta1 = SmoothedValue(test_datasize) 82 | bg_smoothed_delta2 = SmoothedValue(test_datasize) 83 | bg_smoothed_delta3 = SmoothedValue(test_datasize) 84 | bg_smoothed_criteria = {'err_absRel':bg_smoothed_absRel, 'err_squaRel': bg_smoothed_squaRel, 'err_rms': bg_smoothed_rms, 85 | 'err_silog': bg_smoothed_silog, 'err_logRms': bg_smoothed_logRms, 'err_silog2': bg_smoothed_silog2, 86 | 'err_delta1': bg_smoothed_delta1, 'err_delta2': bg_smoothed_delta2, 'err_delta3': bg_smoothed_delta3, 87 | 'err_log10': bg_smoothed_log10} 88 | for i, data in enumerate(data_loader): 89 | out = model.module.inference_kitti(data) 90 | pred_depth = np.squeeze(out['b_fake']) 91 | img_path = data['A_paths'] 92 | 93 | if len(data['B_raw'].shape) != 2: 94 | smoothed_criteria = evaluate_err(pred_depth, data['B_raw'], smoothed_criteria, scale=80.) 95 | rois_smoothed_criteria = evaluate_err(pred_depth, data['B_raw_rois'], rois_smoothed_criteria, scale=80.) 96 | bg_smoothed_criteria = evaluate_err(pred_depth, data['B_raw_bg'], bg_smoothed_criteria, scale=80.) 97 | print('processing (%04d)-th image... %s' % (i, img_path)) 98 | print(smoothed_criteria['err_absRel'].GetGlobalAverageValue()) 99 | save_images(data, pred_depth, scale=256.*80.) 100 | 101 | 102 | LOG_FOUT = open(os.path.join('object_val_results.txt'), 'w') 103 | def log_string(out_str): 104 | LOG_FOUT.write(out_str+'\n') 105 | LOG_FOUT.flush() 106 | print(out_str) 107 | 108 | 109 | if len(data['B_raw'].shape) != 2: 110 | log_string("---image-level----") 111 | log_string("###############absREL ERROR: {}".format(smoothed_criteria['err_absRel'].GetGlobalAverageValue())) 112 | log_string("###############silog ERROR: {}".format(np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))) 113 | log_string("###############log10 ERROR: {}".format(smoothed_criteria['err_log10'].GetGlobalAverageValue())) 114 | log_string("###############RMS ERROR: {}".format(np.sqrt(smoothed_criteria['err_rms'].GetGlobalAverageValue()))) 115 | log_string("###############delta_1 ERROR: {}".format(smoothed_criteria['err_delta1'].GetGlobalAverageValue())) 116 | log_string("###############delta_2 ERROR: {}".format(smoothed_criteria['err_delta2'].GetGlobalAverageValue())) 117 | log_string("###############delta_3 ERROR: {}".format(smoothed_criteria['err_delta3'].GetGlobalAverageValue())) 118 | log_string("###############squaRel ERROR: {}".format(smoothed_criteria['err_squaRel'].GetGlobalAverageValue())) 119 | log_string("###############logRms ERROR: {}".format(np.sqrt(smoothed_criteria['err_logRms'].GetGlobalAverageValue()))) 120 | 121 | 122 | log_string("---rois-level----") 123 | log_string("###############absREL ERROR: {}".format(rois_smoothed_criteria['err_absRel'].GetGlobalAverageValue())) 124 | log_string("###############silog ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (rois_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))) 125 | log_string("###############log10 ERROR: {}".format(rois_smoothed_criteria['err_log10'].GetGlobalAverageValue())) 126 | log_string("###############RMS ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_rms'].GetGlobalAverageValue()))) 127 | log_string("###############delta_1 ERROR: {}".format(rois_smoothed_criteria['err_delta1'].GetGlobalAverageValue())) 128 | log_string("###############delta_2 ERROR: {}".format(rois_smoothed_criteria['err_delta2'].GetGlobalAverageValue())) 129 | log_string("###############delta_3 ERROR: {}".format(rois_smoothed_criteria['err_delta3'].GetGlobalAverageValue())) 130 | log_string("###############squaRel ERROR: {}".format(rois_smoothed_criteria['err_squaRel'].GetGlobalAverageValue())) 131 | log_string("###############logRms ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_logRms'].GetGlobalAverageValue()))) 132 | 133 | log_string("---bg-level----") 134 | log_string("###############absREL ERROR: {}".format(bg_smoothed_criteria['err_absRel'].GetGlobalAverageValue())) 135 | log_string("###############silog ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (bg_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))) 136 | log_string("###############log10 ERROR: {}".format(bg_smoothed_criteria['err_log10'].GetGlobalAverageValue())) 137 | log_string("###############RMS ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_rms'].GetGlobalAverageValue()))) 138 | log_string("###############delta_1 ERROR: {}".format(bg_smoothed_criteria['err_delta1'].GetGlobalAverageValue())) 139 | log_string("###############delta_2 ERROR: {}".format(bg_smoothed_criteria['err_delta2'].GetGlobalAverageValue())) 140 | log_string("###############delta_3 ERROR: {}".format(bg_smoothed_criteria['err_delta3'].GetGlobalAverageValue())) 141 | log_string("###############squaRel ERROR: {}".format(bg_smoothed_criteria['err_squaRel'].GetGlobalAverageValue())) 142 | log_string("###############logRms ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_logRms'].GetGlobalAverageValue()))) 143 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/__init__.py -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/core/__init__.py -------------------------------------------------------------------------------- /lib/core/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import numpy as np 8 | from lib.utils.collections import AttrDict 9 | from lib.utils.misc import get_run_name 10 | from tools.parse_arg_train import TrainOptions 11 | from tools.parse_arg_val import ValOptions 12 | 13 | # ---------------------------------------------------------------------------- # 14 | # Load parse for training, val, and test 15 | # ---------------------------------------------------------------------------- # 16 | train_opt = TrainOptions() 17 | train_args = train_opt.parse() 18 | train_opt.print_options(train_args) 19 | 20 | val_opt = ValOptions() 21 | val_args = val_opt.parse() 22 | val_args.batchsize = 1 23 | val_args.thread = 0 24 | val_opt.print_options(val_args) 25 | 26 | __C = AttrDict() 27 | # Consumers can get config by: 28 | cfg = __C 29 | 30 | # Random note: avoid using '.ON' as a config key since yaml converts it to True; 31 | # prefer 'ENABLED' instead 32 | # Root directory of project 33 | __C.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 34 | __C.EXP_NAME = os.path.dirname(__file__).split('/')[-1] 35 | __C.DATASET = train_args.dataset 36 | # "Fun" fact: the history of where these values comes from is lost (From Detectron lol) 37 | __C.RGB_PIXEL_MEANS = (102.9801, 115.9465, 122.7717) 38 | __C.RGB_PIXEL_VARS = (1, 1, 1) 39 | __C.CROP_SIZE = (385, 513) if 'kitti' in train_args.dataset else (385, 385) #height * width 40 | 41 | # ---------------------------------------------------------------------------- # 42 | # Models configurations 43 | # ---------------------------------------------------------------------------- # 44 | __C.MODEL = AttrDict() 45 | __C.MODEL.INIT_TYPE = 'xavier' 46 | # Configure the model type for the encoder, e.g.ResNeXt50_32x4d_body_stride16 47 | __C.MODEL.ENCODER = train_args.encoder 48 | __C.MODEL.MODEL_REPOSITORY = 'pretrained_model' 49 | __C.MODEL.PRETRAINED_WEIGHTS = train_args.pretrained_model 50 | __C.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = True 51 | 52 | # Configure resnet and resnext 53 | __C.MODEL.RESNET_BOTTLENECK_DIM = [64, 256, 512, 1024, 2048] if 'ResNeXt' in train_args.encoder else [32, 24, 32, 96, 320] 54 | __C.MODEL.RESNET_BLOCK_DIM = [64, 64, 128, 256, 512] 55 | # Place the stride 2 conv on the 1x1 filter 56 | # Use True only for the original MSRA ResNet; use False for C2 and Torch models 57 | __C.MODEL.RESNET_STRIDE_1X1 = True 58 | # Set bn type of resnet, bn->batch normalization, affine->affine transformation 59 | __C.MODEL.RESNET_BN_TYPE = 'bn' 60 | 61 | # Freeze the batch normalization layer of pretrained model 62 | __C.MODEL.FREEZE_BACKBONE_BN = False 63 | # Configure the decoder 64 | __C.MODEL.FCN_DIM_IN = [512, 256, 256, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [128, 64, 64, 64, 64, 64] 65 | __C.MODEL.FCN_DIM_OUT = [256, 256, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [64, 64, 64, 64, 64] 66 | __C.MODEL.LATERAL_OUT = [512, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [128, 64, 64, 64] 67 | 68 | 69 | # Configure input and output channel of the model 70 | __C.MODEL.ENCODRE_INPUT_C = 3 71 | __C.MODEL.DECODER_OUTPUT_C = train_args.decoder_out_c 72 | 73 | # Configure weight for different losses 74 | __C.MODEL.DIFF_LOSS_WEIGHT = 6 75 | 76 | # ---------------------------------------------------------------------------- # 77 | # Data configurations 78 | # ---------------------------------------------------------------------------- # 79 | __C.DATA = AttrDict() 80 | __C.DATA.DATA_SET = train_args.dataset 81 | # Minimum depth 82 | __C.DATA.DATA_MIN = 0.01 if 'nyu' in train_args.dataset else 0.015 83 | # Maximum depth 84 | __C.DATA.DATA_MAX = 1.7 if 'nyu' in train_args.dataset else 1.0 85 | # Minimum depth in log space 86 | __C.DATA.DATA_MIN_LOG = np.log10(__C.DATA.DATA_MIN) 87 | # Interval of each range 88 | __C.DATA.DEPTH_RANGE_INTERVAL = (np.log10(__C.DATA.DATA_MAX) - np.log10(__C.DATA.DATA_MIN)) / __C.MODEL.DECODER_OUTPUT_C 89 | # Depth class 90 | __C.DATA.DEPTH_CLASSES = np.array([__C.DATA.DATA_MIN_LOG + __C.DATA.DEPTH_RANGE_INTERVAL * (i + 0.5) for i in range(__C.MODEL.DECODER_OUTPUT_C)]) 91 | __C.DATA.WCE_LOSS_WEIGHT = [[np.exp(-0.2 * (i - j) ** 2) for i in range(__C.MODEL.DECODER_OUTPUT_C)] 92 | for j in np.arange(__C.MODEL.DECODER_OUTPUT_C)] 93 | __C.DATA.LOAD_MODEL_NAME = train_args.load_ckpt 94 | # ---------------------------------------------------------------------------- # 95 | # Training configurations 96 | # ---------------------------------------------------------------------------- # 97 | __C.TRAIN = AttrDict() 98 | # Load run name, which is the combination of running time and host name 99 | __C.TRAIN.RUN_NAME = get_run_name() 100 | __C.TRAIN.OUTPUT_ROOT_DIR = './outputs' 101 | #__C.TRAIN.OUTPUT_ROOT_DIR = '/mnt/cephfs_hl/vc/wxl/depth/' + __C.EXP_NAME 102 | # Dir for checkpoint and logs 103 | __C.TRAIN.LOG_DIR = os.path.join(__C.TRAIN.OUTPUT_ROOT_DIR, train_args.dataset + '_' + cfg.TRAIN.RUN_NAME) 104 | # Differ the learning rate between encoder and decoder 105 | __C.TRAIN.DIFF_LR = train_args.scale_decoder_lr 106 | __C.TRAIN.BASE_LR = train_args.lr 107 | __C.TRAIN.MAX_ITER = 0 108 | # Set training epoches, end at the last epoch of list 109 | __C.TRAIN.EPOCH = train_args.epoch 110 | # Snapshot (model checkpoint) period 111 | __C.TRAIN.SNAPSHOT_ITERS = 6000 112 | __C.TRAIN.VAL_STEP = 6000 113 | __C.TRAIN.BATCH_SIZE = train_args.batchsize 114 | __C.TRAIN.GPU_NUM = 1 115 | # Steps for LOG interval 116 | __C.TRAIN.LOG_INTERVAL = 20 117 | __C.TRAIN.LR_DECAY_MILESTONES = __C.TRAIN.EPOCH[1:-1] 118 | 119 | 120 | 121 | def merge_cfg_from_file(datasize, gpu_num): 122 | __C.TRAIN.MAX_ITER = round(datasize / __C.TRAIN.BATCH_SIZE + 0.5) * __C.TRAIN.EPOCH[-1] 123 | __C.TRAIN.GPU_NUM = gpu_num 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /lib/models/MobileNetV2.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | from lib.core.config import cfg 4 | # ---------------------------------------------------------------------------- # 5 | # Bits for specific architectures (ResNeXt50, ResNeXt101, ...) 6 | # ---------------------------------------------------------------------------- # 7 | def MobileNetV2_body(): 8 | return MobileNetV2() 9 | 10 | def MobileNetV2_body_stride16(): 11 | return MobileNetV2(output_stride=16) 12 | 13 | def MobileNetV2_body_stride8(): 14 | return MobileNetV2(output_stride=8) 15 | 16 | 17 | def conv_bn(inp, oup, stride): 18 | return nn.Sequential( 19 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 20 | nn.BatchNorm2d(oup), 21 | nn.ReLU6(inplace=True) 22 | ) 23 | 24 | 25 | def conv_1x1_bn(inp, oup): 26 | return nn.Sequential( 27 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 28 | nn.BatchNorm2d(oup), 29 | nn.ReLU6(inplace=True) 30 | ) 31 | 32 | 33 | class InvertedResidual(nn.Module): 34 | def __init__(self, inp, oup, stride, expand_ratio, dilation=1): 35 | super(InvertedResidual, self).__init__() 36 | self.stride = stride 37 | assert stride in [1, 2] 38 | 39 | hidden_dim = round(inp * expand_ratio) 40 | self.use_res_connect = self.stride == 1 and inp == oup 41 | 42 | if expand_ratio == 1: 43 | self.conv = nn.Sequential( 44 | # dw 45 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, groups=hidden_dim, bias=False, padding=dilation, dilation=dilation), 46 | nn.BatchNorm2d(hidden_dim), 47 | nn.ReLU6(inplace=True), 48 | # pw-linear 49 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 50 | nn.BatchNorm2d(oup), 51 | ) 52 | else: 53 | self.conv = nn.Sequential( 54 | # pw 55 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 56 | nn.BatchNorm2d(hidden_dim), 57 | nn.ReLU6(inplace=True), 58 | # dw 59 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, groups=hidden_dim, bias=False, padding=dilation, dilation=dilation), 60 | nn.BatchNorm2d(hidden_dim), 61 | nn.ReLU6(inplace=True), 62 | # pw-linear 63 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 64 | nn.BatchNorm2d(oup), 65 | ) 66 | 67 | def forward(self, x): 68 | if self.use_res_connect: 69 | out = self.conv(x) 70 | out += x 71 | return out 72 | else: 73 | return self.conv(x) 74 | 75 | def add_block(res_setting, input_channel, width_mult=1, dilation=1): 76 | # building inverted residual blocks 77 | block = [] 78 | for t, c, n, s in res_setting: 79 | output_channel = int(c * width_mult) 80 | for i in range(n): 81 | if i == 0: 82 | block.append(InvertedResidual(input_channel, output_channel, s, expand_ratio=t, dilation=dilation)) 83 | else: 84 | block.append(InvertedResidual(input_channel, output_channel, 1, expand_ratio=t, dilation=dilation)) 85 | input_channel = output_channel 86 | return nn.Sequential(*block), output_channel 87 | 88 | 89 | class MobileNetV2(nn.Module): 90 | def __init__(self, width_mult=1., output_stride=32): 91 | super(MobileNetV2, self).__init__() 92 | input_channel = 32 93 | last_channel = 320 94 | self.convX = 5 95 | stride1 = 1 if 32 / output_stride == 4 else 2 96 | stride2 = 1 if 32 / output_stride > 1 else 2 97 | dilation1 = 1 if stride1 == 2 else 2 98 | dilation2 = 1 if stride2 == 2 else (2 if stride1 == 2 else 4) 99 | 100 | interverted_residual_setting_block2 = [ 101 | #t, c, n, s 102 | [1, 16, 1, 1], 103 | [6, 24, 2, 2], 104 | ] 105 | interverted_residual_setting_block3 = [ 106 | # t, c, n, s 107 | [6, 32, 3, 2], 108 | ] 109 | interverted_residual_setting_block4 = [ 110 | # t, c, n, s 111 | [6, 64, 4, stride1], 112 | [6, 96, 3, 1], 113 | ] 114 | interverted_residual_setting_block5 = [ 115 | # t, c, n, s 116 | [6, 160, 3, stride2], 117 | [6, 320, 1, 1], 118 | ] 119 | 120 | 121 | # building first layer 122 | #assert cfg.CROP_SIZE[0] % 32 == 0 and cfg.CROP_SIZE[1] % 32 == 0 123 | input_channel = int(input_channel * width_mult) 124 | self.last_channel = last_channel 125 | self.res1 = nn.Sequential(conv_bn(3, input_channel, 2)) 126 | 127 | self.res2, output_channel = add_block(interverted_residual_setting_block2, input_channel, width_mult) 128 | 129 | self.res3, output_channel = add_block(interverted_residual_setting_block3, output_channel, width_mult) 130 | 131 | self.res4, output_channel = add_block(interverted_residual_setting_block4, output_channel, width_mult, dilation1) 132 | 133 | self.res5, output_channel = add_block(interverted_residual_setting_block5, output_channel, width_mult, dilation2) 134 | 135 | self._initialize_weights() 136 | 137 | def forward(self, x): 138 | for i in range(self.convX): 139 | x = getattr(self, 'res%d' % (i + 1))(x) 140 | return x 141 | 142 | def _initialize_weights(self): 143 | for m in self.modules(): 144 | if isinstance(m, nn.Conv2d): 145 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 146 | m.weight.data.normal_(0, math.sqrt(2. / n)) 147 | if m.bias is not None: 148 | m.bias.data.zero_() 149 | elif isinstance(m, nn.BatchNorm2d): 150 | m.weight.data.fill_(1) 151 | m.bias.data.zero_() 152 | elif isinstance(m, nn.Linear): 153 | n = m.weight.size(1) 154 | m.weight.data.normal_(0, 0.01) 155 | m.bias.data.zero_() -------------------------------------------------------------------------------- /lib/models/ResNeXt.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch.nn as nn 3 | from lib.core.config import cfg 4 | 5 | # ---------------------------------------------------------------------------- # 6 | # Bits for specific architectures (ResNeXt50, ResNeXt101, ...) 7 | # ---------------------------------------------------------------------------- # 8 | 9 | def ResNeXt50_32x4d_body_stride16(): 10 | return ResNeXt_body((3, 4, 6, 3), 32, 4, 16) 11 | 12 | 13 | def ResNeXt101_32x4d_body_stride16(): 14 | return ResNeXt_body((3, 4, 23, 3), 32, 4, 16) 15 | 16 | 17 | class ResNeXt_body(nn.Module): 18 | def __init__(self, block_counts, cardinality, base_width, output_stride): 19 | super().__init__() 20 | self.block_counts = block_counts 21 | self.convX = len(block_counts) + 1 22 | self.num_layers = (sum(block_counts) + 3 * (self.convX == 4)) * 3 + 2 23 | 24 | self.res1 = basic_bn_stem() 25 | dim_in = 64 26 | res5_dilate = int(32 / output_stride) 27 | res5_stride = 2 if res5_dilate == 1 else 1 28 | res4_dilate = 1 if res5_dilate <= 2 else 2 29 | res4_stride = 2 if res4_dilate == 1 else 1 30 | 31 | self.res2, dim_in = add_stage(dim_in, 256, block_counts[0], cardinality, base_width, 32 | dilation=1, stride_init=1) 33 | self.res3, dim_in = add_stage(dim_in, 512, block_counts[1], cardinality, base_width, 34 | dilation=1, stride_init=2) 35 | self.res4, dim_in = add_stage(dim_in, 1024, block_counts[2], cardinality, base_width, 36 | dilation=res4_dilate, stride_init=res4_stride) 37 | self.res5, dim_in = add_stage(dim_in, 2048, block_counts[3], cardinality, base_width, 38 | dilation=res5_dilate, stride_init=res5_stride) 39 | self.spatial_scale = 1 / output_stride 40 | self.dim_out = dim_in 41 | self._init_modle() 42 | 43 | def forward(self, x): 44 | for i in range(self.convX): 45 | x = getattr(self, 'res%d' % (i + 1))(x) 46 | return x 47 | 48 | 49 | def train(self, mode=True): 50 | # Override 51 | self.training = mode 52 | 53 | for i in range(1, self.convX + 1): 54 | getattr(self, 'res%d' % i).train(mode) 55 | def _init_modle(self): 56 | def freeze_params(m): 57 | for p in m.parameters(): 58 | p.requires_grad = False 59 | if cfg.MODEL.FREEZE_BACKBONE_BN: 60 | self.apply(lambda m: freeze_params(m) if isinstance(m, nn.BatchNorm2d) else None) 61 | 62 | def basic_bn_stem(): 63 | conv1 = nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False) 64 | return nn.Sequential(OrderedDict([ 65 | ('conv1', conv1), 66 | ('bn1', nn.BatchNorm2d(64)), 67 | ('relu', nn.ReLU(inplace=True)), 68 | ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])) 69 | 70 | def add_stage(inplanes, outplanes, nblocks, cardinality, base_width, dilation=1, stride_init=2): 71 | """Make a stage consist of `nblocks` residual blocks. 72 | Returns: 73 | - stage module: an nn.Sequentail module of residual blocks 74 | - final output dimension 75 | """ 76 | res_blocks = [] 77 | stride = stride_init 78 | for _ in range(nblocks): 79 | res_blocks.append(ResNeXtBottleneck( 80 | inplanes, outplanes, stride, dilation, cardinality, base_width 81 | )) 82 | inplanes = outplanes 83 | stride = 1 84 | return nn.Sequential(*res_blocks), outplanes 85 | 86 | 87 | class ResNeXtBottleneck(nn.Module): 88 | """ 89 | RexNeXt bottleneck type C (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua) 90 | """ 91 | 92 | def __init__(self, in_channels, out_channels, stride, dilate, cardinality=32, base_width=4): 93 | """ Constructor 94 | Args: 95 | in_channels: input channel dimensionality 96 | out_channels: output channel dimensionality 97 | stride: conv stride. Replaces pooling layer. 98 | cardinality: num of convolution groups. 99 | base_width: base number of channels in each group. 100 | widen_factor: factor to reduce the input dimensionality before convolution. 101 | """ 102 | super(ResNeXtBottleneck, self).__init__() 103 | width_ratio = out_channels / 256. 104 | D = cardinality * base_width * int(width_ratio) 105 | self.conv1 = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False) 106 | self.bn1 = nn.BatchNorm2d(D) 107 | self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=dilate, dilation=dilate, groups=cardinality, bias=False) 108 | self.bn2 = nn.BatchNorm2d(D) 109 | self.conv3 = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False) 110 | self.bn3 = nn.BatchNorm2d(out_channels) 111 | self.relu = nn.ReLU(inplace=True) 112 | 113 | 114 | if in_channels != out_channels: 115 | self.shortcut = nn.Sequential() 116 | self.shortcut.add_module('conv', 117 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, 118 | bias=False)) 119 | self.shortcut.add_module('bn', nn.BatchNorm2d(out_channels)) 120 | else: 121 | self.shortcut = None 122 | 123 | def forward(self, x): 124 | residual = x 125 | out = self.conv1(x) 126 | out = self.bn1(out) 127 | out = self.relu(out) 128 | out = self.conv2(out) 129 | out = self.bn2(out) 130 | out = self.relu(out) 131 | out = self.conv3(out) 132 | out = self.bn3(out) 133 | 134 | if self.shortcut is not None: 135 | residual = self.shortcut(x) 136 | 137 | out += residual 138 | out = self.relu(out) 139 | return out 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/models/__init__.py -------------------------------------------------------------------------------- /lib/models/image_transfer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | from lib.core.config import cfg 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | 8 | 9 | def fg_bg_maxpooling(clses_bg, clses_fg): 10 | "[b,c,h,w]" 11 | B,C,H,W = clses_bg.shape 12 | clses_bg = clses_bg.permute(0, 2, 3, 1) #[b, h, w, c] 13 | clses_bg = clses_bg.reshape((B, -1, C)).unsqueeze(-1) 14 | 15 | clses_fg = clses_fg.permute(0, 2, 3, 1) #[b, h, w, c] 16 | clses_fg = clses_fg.reshape((B, -1, C)).unsqueeze(-1) 17 | 18 | clses_cat = torch.cat((clses_fg, clses_bg), -1) # [b, hxw, c, 2] 19 | 20 | clses_final = F.max_pool2d(clses_cat, kernel_size=(1, 2)) #[b, hxw, c, 1] 21 | clses_final = clses_final.squeeze(-1).reshape((B, H, W, C)) 22 | 23 | clses_final = clses_final.permute(0, 3, 1, 2) # [b, c, h, w] 24 | 25 | clses_final = F.softmax(clses_final, dim=1) 26 | 27 | return clses_final 28 | 29 | 30 | def class_depth(classes): 31 | """ 32 | Transfer n-channel output of the network in classes to 1-channel depth 33 | @classes: n-channel output of the network, [b, c, h, w] 34 | :return: 1-channel depth, [b, 1, h, w] 35 | """ 36 | if type(classes).__module__ != torch.__name__: 37 | classes = torch.tensor(classes, dtype=torch.float32).cuda() 38 | classes = classes.permute(0, 2, 3, 1) #[b, h, w, c] 39 | if type(cfg.DATA.DEPTH_CLASSES).__module__ != torch.__name__: 40 | cfg.DATA.DEPTH_CLASSES = torch.tensor(cfg.DATA.DEPTH_CLASSES, dtype=torch.float32).cuda() 41 | depth = classes * cfg.DATA.DEPTH_CLASSES 42 | depth = torch.sum(depth, dim=3, dtype=torch.float32, keepdim=True) 43 | depth = 10 ** depth 44 | depth = depth.permute(0, 3, 1, 2) #[b, 1, h, w] 45 | return depth 46 | 47 | def class_depth_hard(classes): 48 | """ 49 | Transfer n-channel output of the network in classes to 1-channel depth 50 | @classes: n-channel output of the network, [b, c, h, w] 51 | :return: 1-channel depth, [b, 1, h, w] 52 | """ 53 | if type(classes).__module__ != torch.__name__: 54 | classes = torch.tensor(classes, dtype=torch.float32).cuda() 55 | classes = classes.permute(0, 2, 3, 1) #[b, h, w, c] 56 | if type(cfg.DATA.DEPTH_CLASSES).__module__ != torch.__name__: 57 | cfg.DATA.DEPTH_CLASSES = torch.tensor(cfg.DATA.DEPTH_CLASSES, dtype=torch.float32).cuda() 58 | 59 | # softmax to one-hot 60 | max_idx = torch.argmax(classes, -1, keepdim=True) 61 | one_hot = torch.FloatTensor(classes.shape).zero_().to(device=max_idx.device) 62 | one_hot.scatter_(-1, max_idx, 1) 63 | classes = one_hot 64 | 65 | depth = classes * cfg.DATA.DEPTH_CLASSES 66 | depth = torch.sum(depth, dim=3, dtype=torch.float32, keepdim=True) 67 | depth = 10 ** depth 68 | depth = depth.permute(0, 3, 1, 2) #[b, 1, h, w] 69 | return depth 70 | 71 | def depth_class(depth): 72 | """ 73 | Transfer 1-channel depth to 1-channel depth in n depth ranges 74 | :param depth: 1-channel depth, [b, 1, h, w] 75 | :return: classes [b, 1, h, w] 76 | """ 77 | depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN 78 | depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX 79 | classes = torch.round((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL) 80 | classes = classes.to(torch.long) 81 | classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1 82 | return classes 83 | 84 | 85 | def resize_image(img, size): 86 | if type(img).__module__ != np.__name__: 87 | img = img.cpu().numpy() 88 | img = cv2.resize(img, (size[1], size[0])) 89 | return img 90 | 91 | 92 | def kitti_merge_imgs(left, middle, right, img_shape, crops): 93 | left = torch.squeeze(left) 94 | right = torch.squeeze(right) 95 | middle = torch.squeeze(middle) 96 | out = torch.zeros(img_shape, dtype=left.dtype, device=left.device) 97 | crops = torch.squeeze(crops) 98 | band = 5 99 | 100 | out[:, crops[0][0]:crops[0][0] + crops[0][2] - band] = left[:, 0:left.size(1)-band] 101 | out[:, crops[1][0]+band:crops[1][0] + crops[1][2] - band] += middle[:, band:middle.size(1)-band] 102 | out[:, crops[1][0] + crops[1][2] - 2*band:crops[2][0] + crops[2][2]] += right[:, crops[1][0] + crops[1][2] - 2*band-crops[2][0]:] 103 | 104 | out[:, crops[1][0]+band:crops[0][0] + crops[0][2] - band] /= 2.0 105 | out[:, crops[1][0] + crops[1][2] - 2*band:crops[1][0] + crops[1][2] - band] /= 2.0 106 | out = out.cpu().numpy() 107 | 108 | return out 109 | -------------------------------------------------------------------------------- /lib/models/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.core.config import cfg 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def cross_entropy_loss(pred_nosoftmax, gt_class): 8 | """ 9 | Standard cross-entropy loss 10 | :param pred_nosoftmax: predicted label 11 | :param gt_class: target label 12 | :return: 13 | """ 14 | gt_class = torch.squeeze(gt_class) 15 | gt_class = gt_class.to(device=pred_nosoftmax.device, dtype=torch.int64) 16 | entropy = torch.nn.CrossEntropyLoss(ignore_index=cfg.MODEL.DECODER_OUTPUT_C+1) 17 | loss = entropy(pred_nosoftmax, gt_class) 18 | return loss 19 | 20 | 21 | def weight_crossentropy_loss(pred_nosoftmax, gt, data): 22 | """ 23 | Weighted Cross-entropy Loss 24 | :param pred_nosoftmax: predicted label 25 | :param gt: target label 26 | """ 27 | invalid_side = data['invalid_side'] 28 | cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device) 29 | weight = cfg.DATA.WCE_LOSS_WEIGHT 30 | weight /= torch.sum(weight, 1, keepdim=True) 31 | classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype) 32 | log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1) 33 | log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1)) 34 | 35 | gt_reshape = gt.reshape(-1, 1) 36 | one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device) 37 | weight = torch.matmul(one_hot, weight) 38 | weight_log_pred = weight * log_pred 39 | 40 | valid_pixes = torch.tensor([0], device=pred_nosoftmax.device, dtype=torch.float) 41 | for i in range(gt.size(0)): 42 | valid_gt = gt[i, :, int(invalid_side[i][0]):gt.size(2)-int(invalid_side[i][1]), :] 43 | valid_pixes += valid_gt.size(1) * valid_gt.size(2) 44 | loss = -1 * torch.sum(weight_log_pred) / valid_pixes 45 | return loss 46 | 47 | def rois_weight_crossentropy_loss(pred_nosoftmax, gt, data): 48 | """ 49 | Weighted Cross-entropy Loss 50 | :param pred_nosoftmax: predicted label 51 | :param gt: target label 52 | """ 53 | invalid_side = data['invalid_side'] 54 | rois_mask = data['rois_mask'] 55 | cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device) 56 | weight = cfg.DATA.WCE_LOSS_WEIGHT 57 | weight /= torch.sum(weight, 1, keepdim=True) 58 | classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype) 59 | log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1) 60 | log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1)) 61 | 62 | gt_reshape = gt.reshape(-1, 1) 63 | one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device) 64 | weight = torch.matmul(one_hot, weight) 65 | weight_log_pred = weight * log_pred 66 | 67 | valid_pixels = max(rois_mask.sum(), 1) 68 | loss = -1 * torch.sum(weight_log_pred) / valid_pixels 69 | return loss 70 | 71 | def rois_scale_invariant_loss(pred_depth, data): 72 | """ 73 | Follow Eigen paper, add silog loss, for KITTI benchmark 74 | :param pred_depth: 75 | :param data: 76 | :return: 77 | """ 78 | invalid_side = data['invalid_side'] 79 | gt_depth = data['B'].cuda() 80 | 81 | rois_mask = data['rois_mask'].to(device=gt_depth.device) 82 | 83 | loss_mean = torch.tensor([0.]).cuda() 84 | for j in range(pred_depth.size(0)): 85 | valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :] 86 | valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :] 87 | valid_rois_mask = rois_mask[j, :, int(invalid_side[j][0]): rois_mask.size(2) - int(invalid_side[j][1]), :] 88 | 89 | diff_log = torch.log(valid_pred) - torch.log(valid_gt) 90 | diff_log = diff_log * valid_rois_mask.to(dtype=diff_log.dtype) 91 | 92 | #size = torch.numel(diff_log) 93 | size = torch.sum(valid_rois_mask) 94 | if size == 0: 95 | continue 96 | 97 | loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2) 98 | loss = loss_mean / pred_depth.size(0) 99 | return loss 100 | 101 | 102 | def scale_invariant_loss(pred_depth, data): 103 | """ 104 | Follow Eigen paper, add silog loss, for KITTI benchmark 105 | :param pred_depth: 106 | :param data: 107 | :return: 108 | """ 109 | invalid_side = data['invalid_side'] 110 | gt_depth = data['B'].cuda() 111 | 112 | 113 | loss_mean = torch.tensor([0.]).cuda() 114 | for j in range(pred_depth.size(0)): 115 | valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :] 116 | valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :] 117 | 118 | diff_log = torch.log(valid_pred) - torch.log(valid_gt) 119 | 120 | size = torch.numel(diff_log) 121 | #size = torch.sum(valid_rois_mask) 122 | #if size == 0: 123 | # continue 124 | 125 | loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2) 126 | loss = loss_mean / pred_depth.size(0) 127 | return loss 128 | 129 | 130 | def berhu_loss(pred_depth, data, scale=80.): 131 | """ 132 | :param pred_depth: 133 | :param data: 134 | :return: 135 | """ 136 | huber_threshold = 0.2 137 | 138 | invalid_side = data['invalid_side'] 139 | gt_depth = data['B'].cuda() 140 | 141 | mask = gt_depth > 0 142 | 143 | pred_depth = pred_depth * mask.to(dtype=pred_depth.dtype) 144 | gt_depth = gt_depth * mask.to(dtype=gt_depth.dtype) 145 | 146 | diff = torch.abs(gt_depth - pred_depth) 147 | delta = huber_threshold * torch.max(diff).data.cpu() 148 | 149 | part1 = -F.threshold(-diff, -delta, 0.) 150 | part2 = F.threshold(diff**2 + delta**2, 2*delta**2, 0.) 151 | part2 = part2 / (2.*delta) 152 | 153 | loss = part1 + part2 154 | 155 | loss = loss[mask] 156 | loss = torch.mean(loss) 157 | 158 | return loss 159 | 160 | 161 | def rmse_log_loss(pred_depth, data, scale=80.): 162 | """ 163 | :param pred_depth: 164 | :param data: 165 | :return: 166 | """ 167 | 168 | gt_depth = data['B'].cuda() 169 | mask = gt_depth > 0 170 | 171 | pred_depth = pred_depth * scale 172 | gt_depth = gt_depth * scale 173 | 174 | diff = torch.log(gt_depth) - torch.log(pred_depth) 175 | diff = diff[mask] 176 | 177 | loss = torch.sqrt(torch.mean(diff**2)) 178 | return loss 179 | 180 | 181 | def rmse_loss(pred_depth, data, scale=80.): 182 | """ 183 | :param pred_depth: 184 | :param data: 185 | :return: 186 | """ 187 | 188 | gt_depth = data['B'].cuda() 189 | mask = gt_depth > 0 190 | 191 | pred_depth = pred_depth 192 | gt_depth = gt_depth 193 | 194 | diff = gt_depth - pred_depth 195 | diff = diff[mask] 196 | 197 | loss = torch.sqrt(torch.mean(diff**2)) 198 | return loss 199 | 200 | def mse_loss(pred_depth, data, scale=80.): 201 | """ 202 | :param pred_depth: 203 | :param data: 204 | :return: 205 | """ 206 | 207 | gt_depth = data['B'].cuda() 208 | mask = gt_depth > 0 209 | 210 | pred_depth = pred_depth 211 | gt_depth = gt_depth 212 | 213 | diff = gt_depth - pred_depth 214 | diff = diff[mask] 215 | 216 | loss = torch.mean(diff**2) 217 | return loss 218 | 219 | 220 | def rois_rmse_log_loss(pred_depth, data, scale=80.): 221 | """ 222 | :param pred_depth: 223 | :param data: 224 | :return: 225 | """ 226 | 227 | gt_depth = data['B'].cuda() 228 | 229 | mask = gt_depth > 0 230 | rois_mask = data['rois_mask'].to(device=gt_depth.device) 231 | mask = mask & rois_mask 232 | 233 | pred_depth = pred_depth 234 | gt_depth = gt_depth 235 | 236 | diff = torch.log(gt_depth) - torch.log(pred_depth) 237 | diff = diff[mask] 238 | 239 | loss = torch.sqrt(torch.mean(diff**2)) 240 | return loss 241 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/bounding_box.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | # transpose 5 | FLIP_LEFT_RIGHT = 0 6 | FLIP_TOP_BOTTOM = 1 7 | 8 | 9 | class BoxList(object): 10 | """ 11 | This class represents a set of bounding boxes. 12 | The bounding boxes are represented as a Nx4 Tensor. 13 | In order to uniquely determine the bounding boxes with respect 14 | to an image, we also store the corresponding image dimensions. 15 | They can contain extra information that is specific to each bounding box, such as 16 | labels. 17 | """ 18 | 19 | def __init__(self, bbox, image_size, mode="xyxy"): 20 | device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu") 21 | bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device) 22 | if bbox.ndimension() != 2: 23 | raise ValueError( 24 | "bbox should have 2 dimensions, got {}".format(bbox.ndimension()) 25 | ) 26 | if bbox.size(-1) != 4: 27 | raise ValueError( 28 | "last dimension of bbox should have a " 29 | "size of 4, got {}".format(bbox.size(-1)) 30 | ) 31 | if mode not in ("xyxy", "xywh"): 32 | raise ValueError("mode should be 'xyxy' or 'xywh'") 33 | 34 | self.bbox = bbox 35 | self.size = image_size # (image_width, image_height) 36 | self.mode = mode 37 | self.extra_fields = {} 38 | 39 | def add_field(self, field, field_data): 40 | self.extra_fields[field] = field_data 41 | 42 | def get_field(self, field): 43 | return self.extra_fields[field] 44 | 45 | def has_field(self, field): 46 | return field in self.extra_fields 47 | 48 | def fields(self): 49 | return list(self.extra_fields.keys()) 50 | 51 | def _copy_extra_fields(self, bbox): 52 | for k, v in bbox.extra_fields.items(): 53 | self.extra_fields[k] = v 54 | 55 | def convert(self, mode): 56 | if mode not in ("xyxy", "xywh"): 57 | raise ValueError("mode should be 'xyxy' or 'xywh'") 58 | if mode == self.mode: 59 | return self 60 | # we only have two modes, so don't need to check 61 | # self.mode 62 | xmin, ymin, xmax, ymax = self._split_into_xyxy() 63 | if mode == "xyxy": 64 | bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1) 65 | bbox = BoxList(bbox, self.size, mode=mode) 66 | else: 67 | TO_REMOVE = 1 68 | bbox = torch.cat( 69 | (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1 70 | ) 71 | bbox = BoxList(bbox, self.size, mode=mode) 72 | bbox._copy_extra_fields(self) 73 | return bbox 74 | 75 | def _split_into_xyxy(self): 76 | if self.mode == "xyxy": 77 | xmin, ymin, xmax, ymax = self.bbox.split(1, dim=-1) 78 | return xmin, ymin, xmax, ymax 79 | elif self.mode == "xywh": 80 | TO_REMOVE = 1 81 | xmin, ymin, w, h = self.bbox.split(1, dim=-1) 82 | return ( 83 | xmin, 84 | ymin, 85 | xmin + (w - TO_REMOVE).clamp(min=0), 86 | ymin + (h - TO_REMOVE).clamp(min=0), 87 | ) 88 | else: 89 | raise RuntimeError("Should not be here") 90 | 91 | def resize(self, size, *args, **kwargs): 92 | """ 93 | Returns a resized copy of this bounding box 94 | :param size: The requested size in pixels, as a 2-tuple: 95 | (width, height). 96 | """ 97 | 98 | ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) 99 | if ratios[0] == ratios[1]: 100 | ratio = ratios[0] 101 | scaled_box = self.bbox * ratio 102 | bbox = BoxList(scaled_box, size, mode=self.mode) 103 | # bbox._copy_extra_fields(self) 104 | for k, v in self.extra_fields.items(): 105 | if not isinstance(v, torch.Tensor): 106 | v = v.resize(size, *args, **kwargs) 107 | bbox.add_field(k, v) 108 | return bbox 109 | 110 | ratio_width, ratio_height = ratios 111 | xmin, ymin, xmax, ymax = self._split_into_xyxy() 112 | scaled_xmin = xmin * ratio_width 113 | scaled_xmax = xmax * ratio_width 114 | scaled_ymin = ymin * ratio_height 115 | scaled_ymax = ymax * ratio_height 116 | scaled_box = torch.cat( 117 | (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1 118 | ) 119 | bbox = BoxList(scaled_box, size, mode="xyxy") 120 | # bbox._copy_extra_fields(self) 121 | for k, v in self.extra_fields.items(): 122 | if not isinstance(v, torch.Tensor): 123 | v = v.resize(size, *args, **kwargs) 124 | bbox.add_field(k, v) 125 | 126 | return bbox.convert(self.mode) 127 | 128 | def transpose(self, method): 129 | """ 130 | Transpose bounding box (flip or rotate in 90 degree steps) 131 | :param method: One of :py:attr:`PIL.Image.FLIP_LEFT_RIGHT`, 132 | :py:attr:`PIL.Image.FLIP_TOP_BOTTOM`, :py:attr:`PIL.Image.ROTATE_90`, 133 | :py:attr:`PIL.Image.ROTATE_180`, :py:attr:`PIL.Image.ROTATE_270`, 134 | :py:attr:`PIL.Image.TRANSPOSE` or :py:attr:`PIL.Image.TRANSVERSE`. 135 | """ 136 | if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM): 137 | raise NotImplementedError( 138 | "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented" 139 | ) 140 | 141 | image_width, image_height = self.size 142 | xmin, ymin, xmax, ymax = self._split_into_xyxy() 143 | if method == FLIP_LEFT_RIGHT: 144 | TO_REMOVE = 1 145 | transposed_xmin = image_width - xmax - TO_REMOVE 146 | transposed_xmax = image_width - xmin - TO_REMOVE 147 | transposed_ymin = ymin 148 | transposed_ymax = ymax 149 | elif method == FLIP_TOP_BOTTOM: 150 | transposed_xmin = xmin 151 | transposed_xmax = xmax 152 | transposed_ymin = image_height - ymax 153 | transposed_ymax = image_height - ymin 154 | 155 | transposed_boxes = torch.cat( 156 | (transposed_xmin, transposed_ymin, transposed_xmax, transposed_ymax), dim=-1 157 | ) 158 | bbox = BoxList(transposed_boxes, self.size, mode="xyxy") 159 | # bbox._copy_extra_fields(self) 160 | for k, v in self.extra_fields.items(): 161 | if not isinstance(v, torch.Tensor): 162 | v = v.transpose(method) 163 | bbox.add_field(k, v) 164 | return bbox.convert(self.mode) 165 | 166 | def crop(self, box): 167 | """ 168 | Cropss a rectangular region from this bounding box. The box is a 169 | 4-tuple defining the left, upper, right, and lower pixel 170 | coordinate. 171 | """ 172 | xmin, ymin, xmax, ymax = self._split_into_xyxy() 173 | w, h = box[2] - box[0], box[3] - box[1] 174 | cropped_xmin = (xmin - box[0]).clamp(min=0, max=w) 175 | cropped_ymin = (ymin - box[1]).clamp(min=0, max=h) 176 | cropped_xmax = (xmax - box[0]).clamp(min=0, max=w) 177 | cropped_ymax = (ymax - box[1]).clamp(min=0, max=h) 178 | 179 | # TODO should I filter empty boxes here? 180 | if False: 181 | is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin == cropped_ymax) 182 | 183 | cropped_box = torch.cat( 184 | (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1 185 | ) 186 | bbox = BoxList(cropped_box, (w, h), mode="xyxy") 187 | # bbox._copy_extra_fields(self) 188 | for k, v in self.extra_fields.items(): 189 | if not isinstance(v, torch.Tensor): 190 | v = v.crop(box) 191 | bbox.add_field(k, v) 192 | return bbox.convert(self.mode) 193 | 194 | # Tensor-like methods 195 | 196 | def to(self, device): 197 | bbox = BoxList(self.bbox.to(device), self.size, self.mode) 198 | for k, v in self.extra_fields.items(): 199 | if hasattr(v, "to"): 200 | v = v.to(device) 201 | bbox.add_field(k, v) 202 | return bbox 203 | 204 | def __getitem__(self, item): 205 | bbox = BoxList(self.bbox[item], self.size, self.mode) 206 | for k, v in self.extra_fields.items(): 207 | bbox.add_field(k, v[item]) 208 | return bbox 209 | 210 | def __len__(self): 211 | return self.bbox.shape[0] 212 | 213 | def clip_to_image(self, remove_empty=True): 214 | TO_REMOVE = 1 215 | self.bbox[:, 0].clamp_(min=0, max=self.size[0] - TO_REMOVE) 216 | self.bbox[:, 1].clamp_(min=0, max=self.size[1] - TO_REMOVE) 217 | self.bbox[:, 2].clamp_(min=0, max=self.size[0] - TO_REMOVE) 218 | self.bbox[:, 3].clamp_(min=0, max=self.size[1] - TO_REMOVE) 219 | if remove_empty: 220 | box = self.bbox 221 | keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0]) 222 | return self[keep] 223 | return self 224 | 225 | def area(self): 226 | box = self.bbox 227 | if self.mode == "xyxy": 228 | TO_REMOVE = 1 229 | area = (box[:, 2] - box[:, 0] + TO_REMOVE) * (box[:, 3] - box[:, 1] + TO_REMOVE) 230 | elif self.mode == "xywh": 231 | area = box[:, 2] * box[:, 3] 232 | else: 233 | raise RuntimeError("Should not be here") 234 | 235 | return area 236 | 237 | def copy_with_fields(self, fields, skip_missing=False): 238 | bbox = BoxList(self.bbox, self.size, self.mode) 239 | if not isinstance(fields, (list, tuple)): 240 | fields = [fields] 241 | for field in fields: 242 | if self.has_field(field): 243 | bbox.add_field(field, self.get_field(field)) 244 | elif not skip_missing: 245 | raise KeyError("Field '{}' not found in {}".format(field, self)) 246 | return bbox 247 | 248 | def __repr__(self): 249 | s = self.__class__.__name__ + "(" 250 | s += "num_boxes={}, ".format(len(self)) 251 | s += "image_width={}, ".format(self.size[0]) 252 | s += "image_height={}, ".format(self.size[1]) 253 | s += "mode={})".format(self.mode) 254 | return s 255 | 256 | 257 | if __name__ == "__main__": 258 | bbox = BoxList([[0, 0, 10, 10], [0, 0, 5, 5]], (10, 10)) 259 | s_bbox = bbox.resize((5, 5)) 260 | print(s_bbox) 261 | print(s_bbox.bbox) 262 | 263 | t_bbox = bbox.transpose(0) 264 | print(t_bbox) 265 | print(t_bbox.bbox) 266 | -------------------------------------------------------------------------------- /lib/utils/chamfer_distance/__init__.py: -------------------------------------------------------------------------------- 1 | from .chamfer_distance import ChamferDistance 2 | -------------------------------------------------------------------------------- /lib/utils/chamfer_distance/chamfer_distance.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // CUDA forward declarations 4 | int ChamferDistanceKernelLauncher( 5 | const int b, const int n, 6 | const float* xyz, 7 | const int m, 8 | const float* xyz2, 9 | float* result, 10 | int* result_i, 11 | float* result2, 12 | int* result2_i); 13 | 14 | int ChamferDistanceGradKernelLauncher( 15 | const int b, const int n, 16 | const float* xyz1, 17 | const int m, 18 | const float* xyz2, 19 | const float* grad_dist1, 20 | const int* idx1, 21 | const float* grad_dist2, 22 | const int* idx2, 23 | float* grad_xyz1, 24 | float* grad_xyz2); 25 | 26 | 27 | void chamfer_distance_forward_cuda( 28 | const at::Tensor xyz1, 29 | const at::Tensor xyz2, 30 | const at::Tensor dist1, 31 | const at::Tensor dist2, 32 | const at::Tensor idx1, 33 | const at::Tensor idx2) 34 | { 35 | ChamferDistanceKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data(), 36 | xyz2.size(1), xyz2.data(), 37 | dist1.data(), idx1.data(), 38 | dist2.data(), idx2.data()); 39 | } 40 | 41 | void chamfer_distance_backward_cuda( 42 | const at::Tensor xyz1, 43 | const at::Tensor xyz2, 44 | at::Tensor gradxyz1, 45 | at::Tensor gradxyz2, 46 | at::Tensor graddist1, 47 | at::Tensor graddist2, 48 | at::Tensor idx1, 49 | at::Tensor idx2) 50 | { 51 | ChamferDistanceGradKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data(), 52 | xyz2.size(1), xyz2.data(), 53 | graddist1.data(), idx1.data(), 54 | graddist2.data(), idx2.data(), 55 | gradxyz1.data(), gradxyz2.data()); 56 | } 57 | 58 | 59 | void nnsearch( 60 | const int b, const int n, const int m, 61 | const float* xyz1, 62 | const float* xyz2, 63 | float* dist, 64 | int* idx) 65 | { 66 | for (int i = 0; i < b; i++) { 67 | for (int j = 0; j < n; j++) { 68 | const float x1 = xyz1[(i*n+j)*3+0]; 69 | const float y1 = xyz1[(i*n+j)*3+1]; 70 | const float z1 = xyz1[(i*n+j)*3+2]; 71 | double best = 0; 72 | int besti = 0; 73 | for (int k = 0; k < m; k++) { 74 | const float x2 = xyz2[(i*m+k)*3+0] - x1; 75 | const float y2 = xyz2[(i*m+k)*3+1] - y1; 76 | const float z2 = xyz2[(i*m+k)*3+2] - z1; 77 | const double d=x2*x2+y2*y2+z2*z2; 78 | if (k==0 || d < best){ 79 | best = d; 80 | besti = k; 81 | } 82 | } 83 | dist[i*n+j] = best; 84 | idx[i*n+j] = besti; 85 | } 86 | } 87 | } 88 | 89 | 90 | void chamfer_distance_forward( 91 | const at::Tensor xyz1, 92 | const at::Tensor xyz2, 93 | const at::Tensor dist1, 94 | const at::Tensor dist2, 95 | const at::Tensor idx1, 96 | const at::Tensor idx2) 97 | { 98 | const int batchsize = xyz1.size(0); 99 | const int n = xyz1.size(1); 100 | const int m = xyz2.size(1); 101 | 102 | const float* xyz1_data = xyz1.data(); 103 | const float* xyz2_data = xyz2.data(); 104 | float* dist1_data = dist1.data(); 105 | float* dist2_data = dist2.data(); 106 | int* idx1_data = idx1.data(); 107 | int* idx2_data = idx2.data(); 108 | 109 | nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data); 110 | nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data); 111 | } 112 | 113 | 114 | void chamfer_distance_backward( 115 | const at::Tensor xyz1, 116 | const at::Tensor xyz2, 117 | at::Tensor gradxyz1, 118 | at::Tensor gradxyz2, 119 | at::Tensor graddist1, 120 | at::Tensor graddist2, 121 | at::Tensor idx1, 122 | at::Tensor idx2) 123 | { 124 | const int b = xyz1.size(0); 125 | const int n = xyz1.size(1); 126 | const int m = xyz2.size(1); 127 | 128 | const float* xyz1_data = xyz1.data(); 129 | const float* xyz2_data = xyz2.data(); 130 | float* gradxyz1_data = gradxyz1.data(); 131 | float* gradxyz2_data = gradxyz2.data(); 132 | float* graddist1_data = graddist1.data(); 133 | float* graddist2_data = graddist2.data(); 134 | const int* idx1_data = idx1.data(); 135 | const int* idx2_data = idx2.data(); 136 | 137 | for (int i = 0; i < b*n*3; i++) 138 | gradxyz1_data[i] = 0; 139 | for (int i = 0; i < b*m*3; i++) 140 | gradxyz2_data[i] = 0; 141 | for (int i = 0;i < b; i++) { 142 | for (int j = 0; j < n; j++) { 143 | const float x1 = xyz1_data[(i*n+j)*3+0]; 144 | const float y1 = xyz1_data[(i*n+j)*3+1]; 145 | const float z1 = xyz1_data[(i*n+j)*3+2]; 146 | const int j2 = idx1_data[i*n+j]; 147 | 148 | const float x2 = xyz2_data[(i*m+j2)*3+0]; 149 | const float y2 = xyz2_data[(i*m+j2)*3+1]; 150 | const float z2 = xyz2_data[(i*m+j2)*3+2]; 151 | const float g = graddist1_data[i*n+j]*2; 152 | 153 | gradxyz1_data[(i*n+j)*3+0] += g*(x1-x2); 154 | gradxyz1_data[(i*n+j)*3+1] += g*(y1-y2); 155 | gradxyz1_data[(i*n+j)*3+2] += g*(z1-z2); 156 | gradxyz2_data[(i*m+j2)*3+0] -= (g*(x1-x2)); 157 | gradxyz2_data[(i*m+j2)*3+1] -= (g*(y1-y2)); 158 | gradxyz2_data[(i*m+j2)*3+2] -= (g*(z1-z2)); 159 | } 160 | for (int j = 0; j < m; j++) { 161 | const float x1 = xyz2_data[(i*m+j)*3+0]; 162 | const float y1 = xyz2_data[(i*m+j)*3+1]; 163 | const float z1 = xyz2_data[(i*m+j)*3+2]; 164 | const int j2 = idx2_data[i*m+j]; 165 | const float x2 = xyz1_data[(i*n+j2)*3+0]; 166 | const float y2 = xyz1_data[(i*n+j2)*3+1]; 167 | const float z2 = xyz1_data[(i*n+j2)*3+2]; 168 | const float g = graddist2_data[i*m+j]*2; 169 | gradxyz2_data[(i*m+j)*3+0] += g*(x1-x2); 170 | gradxyz2_data[(i*m+j)*3+1] += g*(y1-y2); 171 | gradxyz2_data[(i*m+j)*3+2] += g*(z1-z2); 172 | gradxyz1_data[(i*n+j2)*3+0] -= (g*(x1-x2)); 173 | gradxyz1_data[(i*n+j2)*3+1] -= (g*(y1-y2)); 174 | gradxyz1_data[(i*n+j2)*3+2] -= (g*(z1-z2)); 175 | } 176 | } 177 | } 178 | 179 | 180 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 181 | m.def("forward", &chamfer_distance_forward, "ChamferDistance forward"); 182 | m.def("forward_cuda", &chamfer_distance_forward_cuda, "ChamferDistance forward (CUDA)"); 183 | m.def("backward", &chamfer_distance_backward, "ChamferDistance backward"); 184 | m.def("backward_cuda", &chamfer_distance_backward_cuda, "ChamferDistance backward (CUDA)"); 185 | } 186 | -------------------------------------------------------------------------------- /lib/utils/chamfer_distance/chamfer_distance.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | __global__ 7 | void ChamferDistanceKernel( 8 | int b, 9 | int n, 10 | const float* xyz, 11 | int m, 12 | const float* xyz2, 13 | float* result, 14 | int* result_i) 15 | { 16 | const int batch=512; 17 | __shared__ float buf[batch*3]; 18 | for (int i=blockIdx.x;ibest){ 130 | result[(i*n+j)]=best; 131 | result_i[(i*n+j)]=best_i; 132 | } 133 | } 134 | __syncthreads(); 135 | } 136 | } 137 | } 138 | 139 | void ChamferDistanceKernelLauncher( 140 | const int b, const int n, 141 | const float* xyz, 142 | const int m, 143 | const float* xyz2, 144 | float* result, 145 | int* result_i, 146 | float* result2, 147 | int* result2_i) 148 | { 149 | ChamferDistanceKernel<<>>(b, n, xyz, m, xyz2, result, result_i); 150 | ChamferDistanceKernel<<>>(b, m, xyz2, n, xyz, result2, result2_i); 151 | 152 | cudaError_t err = cudaGetLastError(); 153 | if (err != cudaSuccess) 154 | printf("error in chamfer distance updateOutput: %s\n", cudaGetErrorString(err)); 155 | } 156 | 157 | 158 | __global__ 159 | void ChamferDistanceGradKernel( 160 | int b, int n, 161 | const float* xyz1, 162 | int m, 163 | const float* xyz2, 164 | const float* grad_dist1, 165 | const int* idx1, 166 | float* grad_xyz1, 167 | float* grad_xyz2) 168 | { 169 | for (int i = blockIdx.x; i>>(b, n, xyz1, m, xyz2, grad_dist1, idx1, grad_xyz1, grad_xyz2); 204 | ChamferDistanceGradKernel<<>>(b, m, xyz2, n, xyz1, grad_dist2, idx2, grad_xyz2, grad_xyz1); 205 | 206 | cudaError_t err = cudaGetLastError(); 207 | if (err != cudaSuccess) 208 | printf("error in chamfer distance get grad: %s\n", cudaGetErrorString(err)); 209 | } 210 | -------------------------------------------------------------------------------- /lib/utils/chamfer_distance/chamfer_distance.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from torch.utils.cpp_extension import load 5 | cd = load(name="cd", 6 | sources=["../../lib/utils/chamfer_distance/chamfer_distance.cpp", 7 | "../../lib/utils/chamfer_distance/chamfer_distance.cu"]) 8 | 9 | class ChamferDistanceFunction(torch.autograd.Function): 10 | @staticmethod 11 | def forward(ctx, xyz1, xyz2): 12 | batchsize, n, _ = xyz1.size() 13 | _, m, _ = xyz2.size() 14 | xyz1 = xyz1.contiguous() 15 | xyz2 = xyz2.contiguous() 16 | dist1 = torch.zeros(batchsize, n) 17 | dist2 = torch.zeros(batchsize, m) 18 | 19 | idx1 = torch.zeros(batchsize, n, dtype=torch.int) 20 | idx2 = torch.zeros(batchsize, m, dtype=torch.int) 21 | 22 | if not xyz1.is_cuda: 23 | cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) 24 | else: 25 | dist1 = dist1.cuda() 26 | dist2 = dist2.cuda() 27 | idx1 = idx1.cuda() 28 | idx2 = idx2.cuda() 29 | cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2) 30 | 31 | ctx.save_for_backward(xyz1, xyz2, idx1, idx2) 32 | 33 | return dist1, dist2 34 | 35 | @staticmethod 36 | def backward(ctx, graddist1, graddist2): 37 | xyz1, xyz2, idx1, idx2 = ctx.saved_tensors 38 | 39 | graddist1 = graddist1.contiguous() 40 | graddist2 = graddist2.contiguous() 41 | 42 | gradxyz1 = torch.zeros(xyz1.size()) 43 | gradxyz2 = torch.zeros(xyz2.size()) 44 | 45 | if not graddist1.is_cuda: 46 | cd.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 47 | else: 48 | gradxyz1 = gradxyz1.cuda() 49 | gradxyz2 = gradxyz2.cuda() 50 | cd.backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 51 | 52 | return gradxyz1, gradxyz2 53 | 54 | 55 | class ChamferDistance(torch.nn.Module): 56 | def forward(self, xyz1, xyz2): 57 | return ChamferDistanceFunction.apply(xyz1, xyz2) 58 | -------------------------------------------------------------------------------- /lib/utils/collections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """A simple attribute dictionary used for representing configuration options.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | 24 | class AttrDict(dict): 25 | 26 | IMMUTABLE = '__immutable__' 27 | 28 | def __init__(self, *args, **kwargs): 29 | super(AttrDict, self).__init__(*args, **kwargs) 30 | self.__dict__[AttrDict.IMMUTABLE] = False 31 | 32 | def __getattr__(self, name): 33 | if name in self.__dict__: 34 | return self.__dict__[name] 35 | elif name in self: 36 | return self[name] 37 | else: 38 | raise AttributeError(name) 39 | 40 | def __setattr__(self, name, value): 41 | if not self.__dict__[AttrDict.IMMUTABLE]: 42 | if name in self.__dict__: 43 | self.__dict__[name] = value 44 | else: 45 | self[name] = value 46 | else: 47 | raise AttributeError( 48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'. 49 | format(name, value) 50 | ) 51 | 52 | def immutable(self, is_immutable): 53 | """Set immutability to is_immutable and recursively apply the setting 54 | to all nested AttrDicts. 55 | """ 56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable 57 | # Recursively set immutable state 58 | for v in self.__dict__.values(): 59 | if isinstance(v, AttrDict): 60 | v.immutable(is_immutable) 61 | for v in self.values(): 62 | if isinstance(v, AttrDict): 63 | v.immutable(is_immutable) 64 | 65 | def is_immutable(self): 66 | return self.__dict__[AttrDict.IMMUTABLE] 67 | -------------------------------------------------------------------------------- /lib/utils/evaluate_depth_error.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | import numpy as np 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def validate_err(pred, gt, smoothed_criteria, mask=None, scale=10.): 8 | if type(pred).__module__ == torch.__name__: 9 | pred = pred.cpu().numpy() 10 | if type(gt).__module__ == torch.__name__: 11 | gt = gt.cpu().numpy() 12 | gt = np.squeeze(gt) 13 | pred = np.squeeze(pred) 14 | if mask is not None: 15 | gt = gt[mask[0]:mask[1], mask[2]:mask[3]] 16 | pred = pred[mask[0]:mask[1], mask[2]:mask[3]] 17 | if pred.shape != gt.shape: 18 | logger.info('The shapes of dt and gt are not same!') 19 | return -1 20 | mask2 = gt > 0 21 | gt = gt[mask2] 22 | pred = pred[mask2] 23 | n_pxl = gt.size 24 | gt_scale = gt * scale 25 | pred_scale = pred * scale 26 | 27 | # Mean Absolute Relative Error 28 | rel = np.abs(gt_scale - pred_scale) / gt_scale # compute errors 29 | abs_rel_sum = np.sum(rel) 30 | smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl) 31 | return smoothed_criteria 32 | 33 | 34 | def validate_err_kitti(pred, gt, smoothed_criteria, mask=None, scale=256.*80.): 35 | if type(pred).__module__ == torch.__name__: 36 | pred = pred.cpu().numpy() 37 | if type(gt).__module__ == torch.__name__: 38 | gt = gt.cpu().numpy() 39 | gt = np.squeeze(gt) 40 | pred = np.squeeze(pred) 41 | if mask is not None: 42 | gt = gt[mask[0]:mask[1], mask[2]:mask[3]] 43 | pred = pred[mask[0]:mask[1], mask[2]:mask[3]] 44 | if pred.shape != gt.shape: 45 | logger.info('The shapes of dt and gt are not same!') 46 | return -1 47 | 48 | mask2 = gt > 0 49 | gt = gt[mask2] 50 | pred = pred[mask2] 51 | n_pxl = gt.size 52 | gt_scale = gt * scale 53 | pred_scale = pred * scale 54 | 55 | # Mean Absolute Relative Error 56 | rel = np.abs(gt_scale - pred_scale) / gt_scale # compute errors 57 | abs_rel_sum = np.sum(rel) 58 | smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl) 59 | 60 | # Scale invariant error, silog is an evaluation metric of KITTI benchmark 61 | diff_log = np.log(pred_scale) - np.log(gt_scale) 62 | diff_log_sum = np.sum(diff_log) 63 | smoothed_criteria['err_silog'].AddValue(np.float64(diff_log_sum), n_pxl) 64 | diff_log_2 = diff_log ** 2 65 | diff_log_2_sum = np.sum(diff_log_2) 66 | smoothed_criteria['err_silog2'].AddValue(np.float64(diff_log_2_sum), n_pxl) 67 | return smoothed_criteria 68 | 69 | 70 | def evaluate_err(pred, gt, smoothed_criteria, mask = None, scale=10.0 ): 71 | if type(pred).__module__ != np.__name__: 72 | pred = pred.cpu().numpy() 73 | if type(gt).__module__ != np.__name__: 74 | gt = gt.cpu().numpy() 75 | 76 | pred = np.squeeze(pred) 77 | gt = np.squeeze(gt) 78 | if mask is not None: 79 | gt = gt[mask[0]:mask[1], mask[2]:mask[3]] 80 | pred = pred[mask[0]:mask[1], mask[2]:mask[3]] 81 | if pred.shape != gt.shape: 82 | logger.info('The shapes of dt and gt are not same!') 83 | return -1 84 | 85 | mask2 = gt > 0 86 | gt = gt[mask2] 87 | pred = pred[mask2] 88 | n_pxl = gt.size 89 | gt_scale = gt * scale 90 | pred_scale = pred * scale 91 | 92 | #Mean Absolute Relative Error 93 | rel = np.abs(gt - pred) / gt# compute errors 94 | abs_rel_sum = np.sum(rel) 95 | smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl) 96 | 97 | #Square Mean Relative Error 98 | s_rel = ((gt_scale - pred_scale) * (gt_scale - pred_scale)) / (gt_scale * gt_scale)# compute errors 99 | squa_rel_sum = np.sum(s_rel) 100 | smoothed_criteria['err_squaRel'].AddValue(np.float64(squa_rel_sum), n_pxl) 101 | 102 | #Root Mean Square error 103 | square = (gt_scale - pred_scale) ** 2 104 | rms_squa_sum = np.sum(square) 105 | smoothed_criteria['err_rms'].AddValue(np.float64(rms_squa_sum), n_pxl) 106 | 107 | #Log Root Mean Square error 108 | log_square = (np.log(gt_scale) - np.log(pred_scale)) **2 109 | log_rms_sum = np.sum(log_square) 110 | smoothed_criteria['err_logRms'].AddValue(np.float64(log_rms_sum), n_pxl) 111 | 112 | # Scale invariant error 113 | diff_log = np.log(pred_scale) - np.log(gt_scale) 114 | diff_log_sum = np.sum(diff_log) 115 | smoothed_criteria['err_silog'].AddValue(np.float64(diff_log_sum), n_pxl) 116 | diff_log_2 = diff_log ** 2 117 | diff_log_2_sum = np.sum(diff_log_2) 118 | smoothed_criteria['err_silog2'].AddValue(np.float64(diff_log_2_sum), n_pxl) 119 | 120 | # Mean log10 error 121 | log10_sum = np.sum(np.abs(np.log10(gt) - np.log10(pred))) 122 | smoothed_criteria['err_log10'].AddValue(np.float64(log10_sum), n_pxl) 123 | 124 | #Delta 125 | gt_pred = gt_scale / pred_scale 126 | pred_gt = pred_scale / gt_scale 127 | gt_pred = np.reshape(gt_pred, (1, -1)) 128 | pred_gt = np.reshape(pred_gt, (1, -1)) 129 | gt_pred_gt = np.concatenate((gt_pred, pred_gt), axis=0) 130 | ratio_max = np.amax(gt_pred_gt, axis=0) 131 | 132 | delta_1_sum = np.sum(ratio_max < 1.25) 133 | smoothed_criteria['err_delta1'].AddValue(np.float64(delta_1_sum), n_pxl) 134 | delta_2_sum = np.sum(ratio_max < 1.25**2) 135 | smoothed_criteria['err_delta2'].AddValue(np.float64(delta_2_sum), n_pxl) 136 | delta_3_sum = np.sum(ratio_max < 1.25**3) 137 | smoothed_criteria['err_delta3'].AddValue(np.float64(delta_3_sum), n_pxl) 138 | 139 | return smoothed_criteria 140 | 141 | -------------------------------------------------------------------------------- /lib/utils/logging.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | """Utilities for logging.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | from collections import deque 24 | import logging 25 | import numpy as np 26 | import sys 27 | from lib.core.config import cfg 28 | def log_stats(stats, args): 29 | """Log training statistics to terminal""" 30 | lines = "[Step %d/%d] [Epoch %d/%d] [%s]\n" % ( 31 | stats['iter'], cfg.TRAIN.MAX_ITER, stats['epoch'], args.epoch[-1], args.dataset) 32 | 33 | lines += "\t\tloss: %.3f, time: %.6f, eta: %s\n" % ( 34 | stats['total_loss'], stats['time'], stats['eta'] ) 35 | 36 | for k in stats: 37 | if 'loss' in k and 'total_loss' not in k: 38 | lines += "\t\t" + ", ".join("%s: %.3f" % (k, v) for k, v in stats[k].items()) + ", " 39 | 40 | # validate criteria 41 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['val_err'].items()) + ", " 42 | lines += '\n' 43 | 44 | # lr in different groups 45 | lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['lr'].items()) + ", " 46 | lines += '\n' 47 | print(lines[:-1]) # remove last new line 48 | 49 | class SmoothedValue(object): 50 | """Track a series of values and provide access to smoothed values over a 51 | window or the global series average. 52 | """ 53 | def __init__(self, window_size): 54 | self.deque = deque(maxlen=window_size) 55 | self.series = [] 56 | self.total = 0.0 57 | self.count = 0 58 | 59 | def AddValue(self, value, size=1): 60 | self.deque.append(value) 61 | self.series.append(value) 62 | self.count += size 63 | self.total += value 64 | 65 | def GetMedianValue(self): 66 | return np.median(self.deque) 67 | 68 | def GetAverageValue(self): 69 | return np.mean(self.deque) 70 | 71 | def GetGlobalAverageValue(self): 72 | return self.total / self.count 73 | 74 | 75 | def setup_logging(name): 76 | FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s' 77 | # Manually clear root loggers to prevent any module that may have called 78 | # logging.basicConfig() from blocking our logging setup 79 | logging.root.handlers = [] 80 | logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) 81 | logger = logging.getLogger(name) 82 | return logger 83 | 84 | -------------------------------------------------------------------------------- /lib/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | from datetime import datetime 4 | 5 | 6 | def get_run_name(): 7 | """ A unique name for each run """ 8 | return datetime.now().strftime( 9 | '%b%d-%H-%M-%S') + '_' + socket.gethostname() 10 | 11 | 12 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] 13 | 14 | 15 | def is_image_file(filename): 16 | """Checks if a file is an image. 17 | Args: 18 | filename (string): path to a file 19 | Returns: 20 | bool: True if the filename ends with a known image extension 21 | """ 22 | filename_lower = filename.lower() 23 | return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) 24 | 25 | 26 | def get_imagelist_from_dir(dirpath): 27 | images = [] 28 | for f in os.listdir(dirpath): 29 | if is_image_file(f): 30 | images.append(os.path.join(dirpath, f)) 31 | return images 32 | -------------------------------------------------------------------------------- /lib/utils/mobilenetv2_weight_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from lib.core.config import cfg 4 | import numpy as np 5 | import logging 6 | logger = logging.getLogger(__name__) 7 | 8 | def load_pretrained_imagenet_resnext_weights(model): 9 | """Load pretrained weights 10 | Args: 11 | num_layers: 50 for res50 and so on. 12 | model: the generalized rcnnn module 13 | """ 14 | model_state_dict = model.state_dict() 15 | weights_file = os.path.join(cfg.ROOT_DIR, cfg.MODEL.MODEL_REPOSITORY, 'MobileNetV2_ImageNet', cfg.MODEL.PRETRAINED_WEIGHTS) 16 | pretrianed_state_dict = convert_state_dict(torch.load(weights_file), model_state_dict) 17 | 18 | for k, v in pretrianed_state_dict.items(): 19 | if k in model_state_dict.keys(): 20 | model_state_dict[k].copy_(pretrianed_state_dict[k]) 21 | else: 22 | logger.info('Weight %s is not in MobileNetV2 model.' % k) 23 | logger.info('Pretrained MobileNetV2 weight has been loaded') 24 | 25 | def convert_state_dict(src_dict, model_dict): 26 | """Return the correct mapping of tensor name and value 27 | 28 | Mapping from the names of torchvision model to our resnet conv_body and box_head. 29 | """ 30 | dst_dict = {} 31 | res_block_n = np.array([1, 4, 7, 14, 18]) 32 | for k, v in src_dict.items(): 33 | toks = k.split('.') 34 | id_n = int(toks[1]) 35 | if id_n < 18 and '17.conv.7' not in k and 'classifier' not in k: 36 | res_n = np.where(res_block_n > id_n)[0][0] + 1 37 | n = res_n - 2 if res_n >= 2 else 0 38 | res_n_m = 0 if id_n - res_block_n[n] < 0 else id_n - res_block_n[n] 39 | toks[0] = 'res%s' % res_n 40 | toks[1] = '%s' % res_n_m 41 | name = '.'.join(toks) 42 | dst_dict[name] = v 43 | return dst_dict 44 | -------------------------------------------------------------------------------- /lib/utils/net_tools.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import torch 3 | import os 4 | from lib.core.config import cfg 5 | import torch.nn as nn 6 | from lib.utils.logging import setup_logging 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import dill 10 | logger = setup_logging(__name__) 11 | import cv2 12 | 13 | 14 | def get_func(func_name): 15 | """Helper to return a function object by name. func_name must identify a 16 | function in this module or the path to a function relative to the base 17 | 'modeling' module. 18 | """ 19 | if func_name == '': 20 | return None 21 | try: 22 | parts = func_name.split('.') 23 | # Refers to a function in this module 24 | if len(parts) == 1: 25 | return globals()[parts[0]] 26 | # Otherwise, assume we're referencing a module under modeling 27 | module_name = 'lib.models.' + '.'.join(parts[:-1]) 28 | module = importlib.import_module(module_name) 29 | return getattr(module, parts[-1]) 30 | except Exception: 31 | logger.error('Failed to f1ind function: %s', func_name) 32 | raise 33 | 34 | def load_ckpt(args, model, optimizer=None, scheduler=None, val_err=[]): 35 | """ 36 | Load checkpoint. 37 | """ 38 | if os.path.isfile(args.load_ckpt): 39 | logger.info("loading checkpoint %s", args.load_ckpt) 40 | checkpoint = torch.load(args.load_ckpt, map_location=lambda storage, loc: storage, pickle_module=dill) 41 | model_dict = model.state_dict() 42 | #model_dict = {k: v for k,v in model_dict if k in checkpoint.items()} 43 | model_dict.update(checkpoint['model_state_dict']) 44 | 45 | model.load_state_dict(model_dict) 46 | if args.resume: 47 | args.batchsize = checkpoint['batch_size'] 48 | args.start_step = checkpoint['step'] 49 | args.start_epoch = checkpoint['epoch'] 50 | optimizer.load_state_dict(checkpoint['optimizer']) 51 | scheduler.load_state_dict(checkpoint['scheduler']) 52 | if 'val_err' in checkpoint: # For backward compatibility 53 | val_err[0] = checkpoint['val_err'] 54 | del checkpoint 55 | torch.cuda.empty_cache() 56 | 57 | 58 | def save_ckpt(args, step, epoch, model, optimizer, scheduler, val_err={}): 59 | """Save checkpoint""" 60 | ckpt_dir = os.path.join(cfg.TRAIN.LOG_DIR, 'ckpt') 61 | if not os.path.exists(ckpt_dir): 62 | os.makedirs(ckpt_dir) 63 | save_name = os.path.join(ckpt_dir, 'epoch%d_step%d.pth' %(epoch, step)) 64 | if isinstance(model, nn.DataParallel): 65 | model = model.module 66 | torch.save({ 67 | 'step': step, 68 | 'epoch': epoch, 69 | 'batch_size': args.batchsize, 70 | 'scheduler': scheduler.state_dict(), 71 | 'val_err': val_err, 72 | 'model_state_dict': model.state_dict(), 73 | 'optimizer': optimizer.state_dict()}, 74 | save_name, pickle_module=dill) 75 | logger.info('save model: %s', save_name) 76 | 77 | 78 | # save image to the disk 79 | def save_images(data, pred, scale=60000.): 80 | rgb = data['A_raw'] 81 | gt = data['B_raw'] 82 | if type(rgb).__module__ != np.__name__: 83 | rgb = rgb.cpu().numpy() 84 | rgb = np.squeeze(rgb) 85 | rgb = rgb[:, :, ::-1] 86 | if type(gt).__module__ != np.__name__: 87 | gt = gt.cpu().numpy() 88 | gt = np.squeeze(gt) 89 | if type(pred).__module__ != np.__name__: 90 | pred = pred.cpu().numpy() 91 | pred = np.squeeze(pred) 92 | model_name = (cfg.DATA.LOAD_MODEL_NAME.split('/')[-1]).split('.')[0] 93 | image_dir = os.path.join(cfg.TRAIN.OUTPUT_ROOT_DIR, '../evaluation', model_name) 94 | if not os.path.exists(image_dir): 95 | os.makedirs(image_dir) 96 | 97 | 98 | if 'kitti' in cfg.DATASET: 99 | name = data['A_paths'][0].split('/')[-4] + '-' + data['A_paths'][0].split('/')[-1].split('.')[0] 100 | else: 101 | name = data['A_paths'][0].split('/')[-1].split('.')[0] 102 | rgb_name = '%s_%s.png' % (name, 'rgb') 103 | gt_name = '%s_%s.png' % (name, 'gt') 104 | gt_raw_name = '%s_%s.png' % (name, 'gt-raw') 105 | pred_name = '%s_%s.png' % (name, 'pred') 106 | pred_raw_name = '%s_%s.png' % (name, 'pred-raw') 107 | 108 | plt.imsave(os.path.join(image_dir, rgb_name), rgb) 109 | if len(data['B_raw'].shape) != 2: 110 | plt.imsave(os.path.join(image_dir, gt_name), gt, cmap='rainbow') 111 | gt_scale = gt * scale 112 | gt_scale = gt_scale.astype('uint16') 113 | cv2.imwrite(os.path.join(image_dir, gt_raw_name), gt_scale) 114 | plt.imsave(os.path.join(image_dir, pred_name), pred, cmap='rainbow') 115 | pred_raw = pred * scale 116 | pred_raw = pred_raw.astype('uint16') 117 | cv2.imwrite(os.path.join(image_dir, pred_raw_name), pred_raw) 118 | 119 | -------------------------------------------------------------------------------- /lib/utils/obj_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from lib.utils.bounding_box import BoxList 4 | 5 | class ObjectLabel: 6 | """Object Label Class 7 | 1 type Describes the type of object: 'Car', 'Van', 'Truck', 8 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 9 | 'Misc' or 'DontCare' 10 | 11 | 1 truncated Float from 0 (non-truncated) to 1 (truncated), where 12 | truncated refers to the object leaving image boundaries 13 | 14 | 1 occluded Integer (0,1,2,3) indicating occlusion state: 15 | 0 = fully visible, 1 = partly occluded 16 | 2 = largely occluded, 3 = unknown 17 | 18 | 1 alpha Observation angle of object, ranging [-pi..pi] 19 | 20 | 4 bbox 2D bounding box of object in the image (0-based index): 21 | contains left, top, right, bottom pixel coordinates 22 | 23 | 3 dimensions 3D object dimensions: height, width, length (in meters) 24 | 25 | 3 location 3D object location x,y,z in camera coordinates (in meters) 26 | 27 | 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] 28 | 29 | 1 score Only for results: Float, indicating confidence in 30 | detection, needed for p/r curves, higher is better. 31 | """ 32 | 33 | def __init__(self): 34 | self.type = "" # Type of object 35 | self.truncation = 0. 36 | self.occlusion = 0. 37 | self.alpha = 0. 38 | self.x1 = 0. 39 | self.y1 = 0. 40 | self.x2 = 0. 41 | self.y2 = 0. 42 | self.h = 0. 43 | self.w = 0. 44 | self.l = 0. 45 | self.t = (0., 0., 0.) 46 | self.ry = 0. 47 | self.score = 0. 48 | 49 | def __eq__(self, other): 50 | """Compares the given object to the current ObjectLabel instance. 51 | 52 | :param other: object to compare to this instance against 53 | :return: True, if other and current instance is the same 54 | """ 55 | if not isinstance(other, ObjectLabel): 56 | return False 57 | 58 | if self.__dict__ != other.__dict__: 59 | return False 60 | else: 61 | return True 62 | 63 | 64 | def read_labels(label_path, results=False, ignore_cate=None): 65 | """Reads in label data file from Kitti Dataset. 66 | 67 | Returns: 68 | obj_list -- List of instances of class ObjectLabel. 69 | 70 | Keyword arguments: 71 | label_dir -- directory of the label files 72 | img_idx -- index of the image 73 | """ 74 | 75 | # Define the object list 76 | obj_list = [] 77 | 78 | # Extract the list 79 | if os.stat(label_path).st_size == 0: 80 | return 81 | 82 | if results: 83 | p = np.loadtxt(label_path, delimiter=' ', 84 | dtype=str, 85 | usecols=np.arange(start=0, step=1, stop=16)) 86 | else: 87 | p = np.loadtxt(label_path, delimiter=' ', 88 | dtype=str, 89 | usecols=np.arange(start=0, step=1, stop=15)) 90 | 91 | # Check if the output is single dimensional or multi dimensional 92 | if len(p.shape) > 1: 93 | label_num = p.shape[0] 94 | else: 95 | label_num = 1 96 | 97 | for idx in np.arange(label_num): 98 | obj = ObjectLabel() 99 | 100 | if label_num > 1: 101 | # Fill in the object list 102 | obj.type = p[idx, 0] 103 | obj.truncation = float(p[idx, 1]) 104 | obj.occlusion = float(p[idx, 2]) 105 | obj.alpha = float(p[idx, 3]) 106 | obj.x1 = float(p[idx, 4]) 107 | obj.y1 = float(p[idx, 5]) 108 | obj.x2 = float(p[idx, 6]) 109 | obj.y2 = float(p[idx, 7]) 110 | obj.h = float(p[idx, 8]) 111 | obj.w = float(p[idx, 9]) 112 | obj.l = float(p[idx, 10]) 113 | obj.t = (float(p[idx, 11]), float(p[idx, 12]), float(p[idx, 13])) 114 | obj.ry = float(p[idx, 14]) 115 | if results: 116 | obj.score = float(p[idx, 15]) 117 | else: 118 | obj.score = 0.0 119 | else: 120 | # Fill in the object list 121 | obj.type = p[0] 122 | obj.truncation = float(p[1]) 123 | obj.occlusion = float(p[2]) 124 | obj.alpha = float(p[3]) 125 | obj.x1 = float(p[4]) 126 | obj.y1 = float(p[5]) 127 | obj.x2 = float(p[6]) 128 | obj.y2 = float(p[7]) 129 | obj.h = float(p[8]) 130 | obj.w = float(p[9]) 131 | obj.l = float(p[10]) 132 | obj.t = (float(p[11]), float(p[12]), float(p[13])) 133 | obj.ry = float(p[14]) 134 | if results: 135 | obj.score = float(p[15]) 136 | else: 137 | obj.score = 0.0 138 | 139 | if ignore_cate is not None and obj.type in ignore_cate: 140 | continue 141 | else: 142 | obj_list.append(obj) 143 | 144 | return obj_list 145 | 146 | 147 | def rois2mask(rois, shape): 148 | mask = np.zeros(shape) 149 | for roi in rois: 150 | mask[int(roi.y1):int(roi.y2)+1, int(roi.x1):int(roi.x2)+1] = 1. 151 | 152 | return mask 153 | 154 | 155 | def rois2mask_shrink(rois, shape): 156 | mask = np.zeros(shape) 157 | for roi in rois: 158 | h = int(roi.y2) - int(roi.y1) + 1 159 | w = int(roi.x2) - int(roi.x1) + 1 160 | 161 | mask[int(roi.y1)+int(h/4.):int(roi.y2)+1-int(h/4.), int(roi.x1)+int(w/4):int(roi.x2)+1-int(w/4)] = 1. 162 | 163 | return mask 164 | 165 | 166 | def rois2mask_ins(rois, shape): 167 | np.random.shuffle(rois) 168 | mask = np.zeros(shape) 169 | for idx, roi in enumerate(rois): 170 | mask[int(roi.y1):int(roi.y2)+1, int(roi.x1):int(roi.x2)+1] = 1. + idx 171 | 172 | return mask 173 | 174 | 175 | def rois2boxlist(rois, image_size, mode="xyxy"): 176 | boxes = [] 177 | np.random.shuffle(rois) 178 | for roi in rois: 179 | xyxy = [int(roi.x1), int(roi.y1), int(roi.x2), int(roi.y2)] 180 | boxes.append(xyxy) 181 | 182 | boxlist = BoxList(boxes, image_size, mode=mode) 183 | return boxlist 184 | 185 | -------------------------------------------------------------------------------- /lib/utils/resnext_weights_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from lib.core.config import cfg 4 | import logging 5 | logger = logging.getLogger(__name__) 6 | 7 | def load_pretrained_imagenet_resnext_weights(model): 8 | """Load pretrained weights 9 | Args: 10 | num_layers: 50 for res50 and so on. 11 | model: the generalized rcnnn module 12 | """ 13 | weights_file = os.path.join(cfg.ROOT_DIR, cfg.MODEL.MODEL_REPOSITORY, 'ResNeXt_ImageNet', cfg.MODEL.PRETRAINED_WEIGHTS) 14 | pretrianed_state_dict = convert_state_dict(torch.load(weights_file)) 15 | 16 | model_state_dict = model.state_dict() 17 | 18 | for k, v in pretrianed_state_dict.items(): 19 | if k in model_state_dict.keys(): 20 | model_state_dict[k].copy_(pretrianed_state_dict[k]) 21 | else: 22 | print('Weight %s is not in ResNeXt model.' % k) 23 | logger.info('Pretrained ResNeXt weight has been loaded') 24 | 25 | def convert_state_dict(src_dict): 26 | """Return the correct mapping of tensor name and value 27 | 28 | Mapping from the names of torchvision model to our resnet conv_body and box_head. 29 | """ 30 | dst_dict = {} 31 | res_id = 1 32 | map1 = ['conv1.', 'bn1.', ' ', 'conv2.', 'bn2.'] 33 | map2 = [[' ', 'conv3.', 'bn3.'], ['shortcut.conv.', 'shortcut.bn.']] 34 | for k, v in src_dict.items(): 35 | toks = k.split('.') 36 | if int(toks[0]) == 0: 37 | name = 'res%d.' % res_id + 'conv1.' + toks[-1] 38 | elif int(toks[0]) == 1: 39 | name = 'res%d.' % res_id + 'bn1.' + toks[-1] 40 | elif int(toks[0]) >=4 and int(toks[0]) <= 7: 41 | name_res = 'res%d.%d.' % (int(toks[0])-2, int(toks[1])) 42 | if len(toks) == 7: 43 | name = name_res + map1[int(toks[-2])] + toks[-1] 44 | elif len(toks) == 6: 45 | name = name_res + map2[int(toks[-3])][int(toks[-2])] + toks[-1] 46 | else: 47 | continue 48 | dst_dict[name] = v 49 | 50 | return dst_dict 51 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import time 7 | 8 | 9 | class Timer(object): 10 | """A simple timer.""" 11 | 12 | def __init__(self): 13 | self.reset() 14 | 15 | def tic(self): 16 | # using time.time instead of time.clock because time time.clock 17 | # does not normalize for multithreading 18 | self.start_time = time.time() 19 | 20 | def toc(self, average=True): 21 | self.diff = time.time() - self.start_time 22 | self.total_time += self.diff 23 | self.calls += 1 24 | self.average_time = self.total_time / self.calls 25 | if average: 26 | return self.average_time 27 | else: 28 | return self.diff 29 | 30 | def reset(self): 31 | self.total_time = 0. 32 | self.calls = 0 33 | self.start_time = 0. 34 | self.diff = 0. 35 | self.average_time = 0. 36 | -------------------------------------------------------------------------------- /lib/utils/training_stats.py: -------------------------------------------------------------------------------- 1 | 2 | """Utilities for training.""" 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | from __future__ import unicode_literals 8 | 9 | from collections import defaultdict, OrderedDict 10 | import datetime 11 | from lib.core.config import cfg 12 | 13 | 14 | from lib.utils.logging import log_stats 15 | from lib.utils.logging import SmoothedValue 16 | from lib.utils.timer import Timer 17 | 18 | 19 | 20 | class TrainingStats(object): 21 | """Track vital training statistics.""" 22 | def __init__(self, args, log_period=20, tensorboard_logger=None): 23 | # Output logging period in SGD iterations 24 | self.args = args 25 | self.log_period = log_period 26 | self.tblogger = tensorboard_logger 27 | self.tb_ignored_keys = ['iter', 'eta', 'epoch', 'time'] 28 | self.iter_timer = Timer() 29 | # Window size for smoothing tracked values (with median filtering) 30 | self.filter_size = 20 31 | def create_smoothed_value(): 32 | return SmoothedValue(self.filter_size) 33 | self.smoothed_losses = defaultdict(create_smoothed_value) 34 | self.smoothed_metrics = defaultdict(create_smoothed_value) 35 | self.smoothed_total_loss = SmoothedValue(self.filter_size) 36 | 37 | 38 | def IterTic(self): 39 | self.iter_timer.tic() 40 | 41 | def IterToc(self): 42 | return self.iter_timer.toc(average=False) 43 | 44 | def ResetIterTimer(self): 45 | self.iter_timer.reset() 46 | 47 | def UpdateIterStats(self, loss): 48 | """Update tracked iteration statistics.""" 49 | total_loss = 0 50 | for k in loss: 51 | # all losses except the total loss: loss['all'] 52 | if k != 'total_loss': 53 | self.smoothed_losses[k].AddValue(float(loss[k])) 54 | 55 | total_loss += loss['total_loss'] 56 | self.smoothed_total_loss.AddValue(float(total_loss)) 57 | 58 | def LogIterStats(self, cur_iter, cur_epoch, optimizer, val_err={}): 59 | """Log the tracked statistics.""" 60 | if (cur_iter % self.log_period == 0): 61 | stats = self.GetStats(cur_iter, cur_epoch, optimizer, val_err) 62 | log_stats(stats, self.args) 63 | if self.tblogger: 64 | self.tb_log_stats(stats, cur_iter) 65 | 66 | def tb_log_stats(self, stats, cur_iter): 67 | """Log the tracked statistics to tensorboard""" 68 | for k in stats: 69 | if k not in self.tb_ignored_keys: 70 | v = stats[k] 71 | if isinstance(v, dict): 72 | self.tb_log_stats(v, cur_iter) 73 | else: 74 | self.tblogger.add_scalar(k, v, cur_iter) 75 | 76 | 77 | def GetStats(self, cur_iter, cur_epoch, optimizer, val_err = {}): 78 | eta_seconds = self.iter_timer.average_time * ( 79 | cfg.TRAIN.MAX_ITER - cur_iter 80 | ) 81 | eta = str(datetime.timedelta(seconds=int(eta_seconds))) 82 | stats = OrderedDict( 83 | iter=cur_iter, # 1-indexed 84 | time=self.iter_timer.average_time, 85 | eta=eta, 86 | total_loss=self.smoothed_total_loss.GetMedianValue(), 87 | epoch=cur_epoch, 88 | ) 89 | optimizer_state_dict = optimizer.state_dict() 90 | lr = {} 91 | for i in range(len(optimizer_state_dict['param_groups'])): 92 | lr_name = 'group%d_lr' % i 93 | lr[lr_name] = optimizer_state_dict['param_groups'][i]['lr'] 94 | 95 | stats['lr'] = OrderedDict(lr) 96 | for k, v in self.smoothed_losses.items(): 97 | stats[k] = OrderedDict([(k, v.GetMedianValue())]) 98 | 99 | stats['val_err'] = OrderedDict(val_err) 100 | return stats 101 | -------------------------------------------------------------------------------- /prepocessing/gen_depth.sh: -------------------------------------------------------------------------------- 1 | python generate_depth.py --data_path ~/wangxinlong/data/KITTI/Kitti/object/training/ --split_file ~/wangxinlong/data/KITTI/Kitti/object/trainval.txt 2 | -------------------------------------------------------------------------------- /prepocessing/generate_depth.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import scipy.misc as ssc 6 | 7 | import kitti_util 8 | 9 | 10 | def generate_depth_from_velo(pc_velo, height, width, calib): 11 | pts_2d = calib.project_velo_to_image(pc_velo) 12 | fov_inds = (pts_2d[:, 0] < width - 1) & (pts_2d[:, 0] >= 0) & \ 13 | (pts_2d[:, 1] < height - 1) & (pts_2d[:, 1] >= 0) 14 | fov_inds = fov_inds & (pc_velo[:, 0] > 2) 15 | imgfov_pc_velo = pc_velo[fov_inds, :] 16 | imgfov_pts_2d = pts_2d[fov_inds, :] 17 | imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo) 18 | depth_map = np.zeros((height, width)) - 1 19 | imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int) 20 | for i in range(imgfov_pts_2d.shape[0]): 21 | depth = imgfov_pc_rect[i, 2] 22 | depth_map[int(imgfov_pts_2d[i, 1]), int(imgfov_pts_2d[i, 0])] = depth 23 | 24 | return depth_map 25 | 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser(description='Generate Depth') 29 | parser.add_argument('--data_path', type=str, default='~/Kitti/object/training/') 30 | parser.add_argument('--split_file', type=str, default='~/Kitti/object/train.txt') 31 | args = parser.parse_args() 32 | 33 | assert os.path.isdir(args.data_path) 34 | lidar_dir = args.data_path + '/velodyne/' 35 | calib_dir = args.data_path + '/calib/' 36 | image_dir = args.data_path + '/image_2/' 37 | lidepth_dir = args.data_path + '/lidepth/' 38 | 39 | assert os.path.isdir(lidar_dir) 40 | assert os.path.isdir(calib_dir) 41 | assert os.path.isdir(image_dir) 42 | 43 | if not os.path.isdir(lidepth_dir): 44 | os.makedirs(lidepth_dir) 45 | 46 | lidar_files = [x for x in os.listdir(lidar_dir) if x[-3:] == 'bin'] 47 | lidar_files = sorted(lidar_files) 48 | 49 | assert os.path.isfile(args.split_file) 50 | with open(args.split_file, 'r') as f: 51 | file_names = [x.strip() for x in f.readlines()] 52 | 53 | for fn in lidar_files: 54 | predix = fn[:-4] 55 | if predix not in file_names: 56 | continue 57 | calib_file = '{}/{}.txt'.format(calib_dir, predix) 58 | calib = kitti_util.Calibration(calib_file) 59 | # load point cloud 60 | lidar = np.fromfile(lidar_dir + '/' + fn, dtype=np.float32).reshape((-1, 4))[:, :3] 61 | image_file = '{}/{}.png'.format(image_dir, predix) 62 | image = ssc.imread(image_file) 63 | height, width = image.shape[:2] 64 | lidepth = generate_depth_from_velo(lidar, height, width, calib) 65 | np.save(lidepth_dir + '/' + predix, lidepth) 66 | print('Finish Depth {}'.format(predix)) 67 | -------------------------------------------------------------------------------- /prepocessing/generate_disp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import scipy.misc as ssc 6 | 7 | import kitti_util 8 | 9 | 10 | def generate_dispariy_from_velo(pc_velo, height, width, calib): 11 | pts_2d = calib.project_velo_to_image(pc_velo) 12 | fov_inds = (pts_2d[:, 0] < width - 1) & (pts_2d[:, 0] >= 0) & \ 13 | (pts_2d[:, 1] < height - 1) & (pts_2d[:, 1] >= 0) 14 | fov_inds = fov_inds & (pc_velo[:, 0] > 2) 15 | imgfov_pc_velo = pc_velo[fov_inds, :] 16 | imgfov_pts_2d = pts_2d[fov_inds, :] 17 | imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo) 18 | depth_map = np.zeros((height, width)) - 1 19 | imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int) 20 | for i in range(imgfov_pts_2d.shape[0]): 21 | depth = imgfov_pc_rect[i, 2] 22 | depth_map[int(imgfov_pts_2d[i, 1]), int(imgfov_pts_2d[i, 0])] = depth 23 | baseline = 0.54 24 | 25 | disp_map = (calib.f_u * baseline) / depth_map 26 | return disp_map 27 | 28 | 29 | if __name__ == '__main__': 30 | parser = argparse.ArgumentParser(description='Generate Disparity') 31 | parser.add_argument('--data_path', type=str, default='~/Kitti/object/training/') 32 | parser.add_argument('--split_file', type=str, default='~/Kitti/object/train.txt') 33 | args = parser.parse_args() 34 | 35 | assert os.path.isdir(args.data_path) 36 | lidar_dir = args.data_path + '/velodyne/' 37 | calib_dir = args.data_path + '/calib/' 38 | image_dir = args.data_path + '/image_2/' 39 | disparity_dir = args.data_path + '/disparity/' 40 | 41 | assert os.path.isdir(lidar_dir) 42 | assert os.path.isdir(calib_dir) 43 | assert os.path.isdir(image_dir) 44 | 45 | if not os.path.isdir(disparity_dir): 46 | os.makedirs(disparity_dir) 47 | 48 | lidar_files = [x for x in os.listdir(lidar_dir) if x[-3:] == 'bin'] 49 | lidar_files = sorted(lidar_files) 50 | 51 | assert os.path.isfile(args.split_file) 52 | with open(args.split_file, 'r') as f: 53 | file_names = [x.strip() for x in f.readlines()] 54 | 55 | for fn in lidar_files: 56 | predix = fn[:-4] 57 | if predix not in file_names: 58 | continue 59 | calib_file = '{}/{}.txt'.format(calib_dir, predix) 60 | calib = kitti_util.Calibration(calib_file) 61 | # load point cloud 62 | lidar = np.fromfile(lidar_dir + '/' + fn, dtype=np.float32).reshape((-1, 4))[:, :3] 63 | image_file = '{}/{}.png'.format(image_dir, predix) 64 | image = ssc.imread(image_file) 65 | height, width = image.shape[:2] 66 | disp = generate_dispariy_from_velo(lidar, height, width, calib) 67 | np.save(disparity_dir + '/' + predix, disp) 68 | print('Finish Disparity {}'.format(predix)) 69 | -------------------------------------------------------------------------------- /prepocessing/generate_lidar.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import scipy.misc as ssc 6 | 7 | import kitti_util 8 | 9 | 10 | def project_disp_to_depth(calib, disp, max_high): 11 | disp[disp < 0] = 0 12 | baseline = 0.54 13 | mask = disp > 0 14 | depth = calib.f_u * baseline / (disp + 1. - mask) 15 | rows, cols = depth.shape 16 | c, r = np.meshgrid(np.arange(cols), np.arange(rows)) 17 | points = np.stack([c, r, depth]) 18 | points = points.reshape((3, -1)) 19 | points = points.T 20 | points = points[mask.reshape(-1)] 21 | cloud = calib.project_image_to_velo(points) 22 | valid = (cloud[:, 0] >= 0) & (cloud[:, 2] < max_high) 23 | return cloud[valid] 24 | 25 | 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser(description='Generate Libar') 28 | parser.add_argument('--calib_dir', type=str, 29 | default='~/Kitti/object/training/calib') 30 | parser.add_argument('--disparity_dir', type=str, 31 | default='~/Kitti/object/training/predicted_disparity') 32 | parser.add_argument('--save_dir', type=str, 33 | default='~/Kitti/object/training/predicted_velodyne') 34 | parser.add_argument('--max_high', type=int, default=1) 35 | args = parser.parse_args() 36 | 37 | assert os.path.isdir(args.disparity_dir) 38 | assert os.path.isdir(args.calib_dir) 39 | 40 | if not os.path.isdir(args.save_dir): 41 | os.makedirs(args.save_dir) 42 | 43 | disps = [x for x in os.listdir(args.disparity_dir) if x[-3:] == 'png'] 44 | disps = sorted(disps) 45 | 46 | for fn in disps: 47 | predix = fn[:-4] 48 | calib_file = '{}/{}.txt'.format(args.calib_dir, predix) 49 | calib = kitti_util.Calibration(calib_file) 50 | disp_map = ssc.imread(args.disparity_dir + '/' + fn) / 256. 51 | lidar = project_disp_to_depth(calib, disp_map, args.max_high) 52 | # pad 1 in the indensity dimension 53 | lidar = np.concatenate([lidar, np.ones((lidar.shape[0], 1))], 1) 54 | lidar = lidar.astype(np.float32) 55 | lidar.tofile('{}/{}.bin'.format(args.save_dir, predix)) 56 | print('Finish Depth {}'.format(predix)) 57 | -------------------------------------------------------------------------------- /prepocessing/kitti_process_RANSAC.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.linear_model import RANSACRegressor 6 | 7 | import kitti_util as utils 8 | 9 | 10 | def extract_ransac(calib_dir, lidar_dir, planes_dir): 11 | data_idx_list = [x[:-4] for x in os.listdir(lidar_dir) if x[-4:] == '.bin'] 12 | 13 | if not os.path.isdir(planes_dir): 14 | os.makedirs(planes_dir) 15 | 16 | for data_idx in data_idx_list: 17 | 18 | print('------------- ', data_idx) 19 | calib = calib_dir + '/' + data_idx + '.txt' 20 | calib = utils.Calibration(calib) 21 | pc_velo = lidar_dir + '/' + data_idx + '.bin' 22 | pc_velo = np.fromfile(pc_velo, dtype=np.float32).reshape(-1, 4) 23 | pc_rect = calib.project_velo_to_rect(pc_velo[:, :3]) 24 | valid_loc = (pc_rect[:, 1] > 1.5) & \ 25 | (pc_rect[:, 1] < 1.86) & \ 26 | (pc_rect[:, 2] > 0) & \ 27 | (pc_rect[:, 2] < 40) & \ 28 | (pc_rect[:, 0] > -15) & \ 29 | (pc_rect[:, 0] < 15) 30 | pc_rect = pc_rect[valid_loc] 31 | if len(pc_rect) < 1: 32 | w = [0, -1, 0] 33 | h = 1.65 34 | else: 35 | reg = RANSACRegressor().fit(pc_rect[:, [0, 2]], pc_rect[:, 1]) 36 | w = np.zeros(3) 37 | w[0] = reg.estimator_.coef_[0] 38 | w[2] = reg.estimator_.coef_[1] 39 | w[1] = -1.0 40 | h = reg.estimator_.intercept_ 41 | w = w / np.linalg.norm(w) 42 | print(w) 43 | print(h) 44 | 45 | lines = ['# Plane', 'Width 4', 'Height 1'] 46 | 47 | plane_file = os.path.join(planes_dir, data_idx + '.txt') 48 | result_lines = lines[:3] 49 | result_lines.append("{:e} {:e} {:e} {:e}".format(w[0], w[1], w[2], h)) 50 | result_str = '\n'.join(result_lines) 51 | with open(plane_file, 'w') as f: 52 | f.write(result_str) 53 | 54 | 55 | if __name__ == '__main__': 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--calib_dir', default='KITTI/object/training/calib') 58 | parser.add_argument('--lidar_dir', default='KITTI/object/training/velodyne') 59 | parser.add_argument('--planes_dir', default='KITTI/object/training/velodyne_planes') 60 | args = parser.parse_args() 61 | 62 | extract_ransac(args.calib_dir, args.lidar_dir, args.planes_dir) 63 | -------------------------------------------------------------------------------- /prepocessing/kitti_util.py: -------------------------------------------------------------------------------- 1 | """ Helper methods for loading and parsing KITTI data. 2 | 3 | Author: Charles R. Qi 4 | Date: September 2017 5 | """ 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | 10 | 11 | class Calibration(object): 12 | ''' Calibration matrices and utils 13 | 3d XYZ in