├── .gitignore
├── INSTALL.md
├── README.md
├── data
    ├── __init__.py
    ├── kitti_dataset.py
    ├── kitti_gt_dataset.py
    ├── kitti_object_dataset.py
    ├── kitti_object_roi_dataset.py
    ├── kitti_prediction_dataset.py
    └── load_dataset.py
├── datasets
    └── KITTI_object
    │   ├── annotations
    │       ├── data_object_training_annotations.json
    │       ├── inference_annotations.json
    │       ├── object_inference_annotations.json
    │       ├── train_annotations.json
    │       └── val_annotations.json
    │   ├── test.txt
    │   ├── train.txt
    │   ├── training
    │       ├── depth
    │       ├── image_2
    │       ├── label_2
    │       └── planes
    │   ├── trainval.txt
    │   └── val.txt
├── experiments
    └── foresee
    │   ├── depth_normal_model.py
    │   ├── lateral_net.py
    │   ├── loss.py
    │   ├── pc
    │       ├── bin2obj.py
    │       ├── gen.sh
    │       ├── gen_colorps.sh
    │       ├── kitti_prediction.py
    │       └── kitti_prediction_colorps.py
    │   ├── test.sh
    │   ├── train.sh
    │   ├── train_kitti.py
    │   └── val_kitti.py
├── lib
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   └── config.py
    ├── models
    │   ├── MobileNetV2.py
    │   ├── ResNeXt.py
    │   ├── __init__.py
    │   ├── image_transfer.py
    │   ├── lateral_net.py
    │   └── loss.py
    └── utils
    │   ├── __init__.py
    │   ├── bounding_box.py
    │   ├── chamfer_distance
    │       ├── __init__.py
    │       ├── chamfer_distance.cpp
    │       ├── chamfer_distance.cu
    │       └── chamfer_distance.py
    │   ├── collections.py
    │   ├── evaluate_depth_error.py
    │   ├── logging.py
    │   ├── misc.py
    │   ├── mobilenetv2_weight_helper.py
    │   ├── net_tools.py
    │   ├── obj_utils.py
    │   ├── resnext_weights_helper.py
    │   ├── timer.py
    │   └── training_stats.py
├── prepocessing
    ├── gen_depth.sh
    ├── generate_depth.py
    ├── generate_disp.py
    ├── generate_lidar.py
    ├── kitti_process_RANSAC.py
    └── kitti_util.py
├── pretrained_model
    └── ResNeXt_ImageNet
└── tools
    ├── __init__.py
    ├── kitti_prediction.py
    ├── kitti_prediction_sample.py
    ├── kitti_prediction_sample_diff.py
    ├── parse_arg_base.py
    ├── parse_arg_test.py
    ├── parse_arg_train.py
    ├── parse_arg_val.py
    ├── test_kitti.py
    ├── train_kitti.py
    └── val_kitti.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pth
2 | *.pyc
3 | __pycache__/
4 | 
5 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ###Requirements
 4 | - PyTorch >= 0.4.1
 5 | - torchvision == 0.2.1
 6 | - matplotlib
 7 | - opencv-python
 8 | - dill
 9 | - scipy
10 | - yaml
11 | 
12 | ### Step-by-step installation
13 | ```bash
14 | # Firstly, your conda is setup properly with the right environment for that
15 | 
16 | conda create --n foresee python=3.6
17 | conda activate foresee
18 | 
19 | 
20 | # basic packages
21 | conda install matplotlib dill pyyaml opencv scipy 
22 | 
23 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
24 | # we give the instructions for CUDA 9.0
25 | conda install -c pytorch torchvision=0.2.1 cudatoolkit=9.0
26 | 
27 | ```
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Task-Aware Monocular Depth Estimation for 3D Object Detection
 2 | 
 3 | This project hosts the code for implementing the ForeSeE algorithm for depth estimation.
 4 | 
 5 | 
 6 | > [**Task-Aware Monocular Depth Estimation for 3D Object Detection**](https://arxiv.org/abs/1909.07701),     
 7 | > Xinlong Wang, Wei Yin, Tao Kong, Yuning Jiang, Lei Li, Chunhua Shen    
 8 | > *AAAI, 2020*
 9 | 
10 | 
11 | ## Installation
12 | 
13 | This implementation is based on [VNL](https://github.com/YvanYin/VNL_Monocular_Depth_Prediction). Please refer to [INSTALL.md](INSTALL.md) for installation.
14 | 
15 | ## Dataset
16 | 
17 | Please refer to [KITTI dataset](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_prediction) for details.
18 | The annotation files of [KITTI Object subset](https://github.com/WXinlong/ForeSeE/tree/master/datasets/KITTI_object/annotations) used in our work are provided.
19 | 
20 | ## Models
21 | Download the trained model from this [link](https://cloudstor.aarnet.edu.au/plus/s/M3LFxiDPZkMKrtw) and put it under experiments/foresee/.
22 | 
23 | ## Testing
24 | 
25 |       cd experiments/foresee
26 |       sh test.sh
27 |  
28 | ## Training
29 | 
30 |       cd experiments/foresee
31 |       sh train.sh
32 |   
33 | ## Citations
34 | 
35 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows.
36 | 
37 | ```
38 | @InProceedings{wang2020foresee, 
39 |   title={Task-Aware Monocular Depth Estimation for 3D Object Detection}, 
40 |   author = {Wang, Xinlong and Yin, Wei and Kong, Tao and Jiang, Yuning, and Li, Lei and Shen, Chunhua},
41 |   booktitle={Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)},
42 |   year={2020}
43 | }
44 | ```
45 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/data/__init__.py


--------------------------------------------------------------------------------
/data/kitti_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import torchvision.transforms as transforms
  3 | import torch
  4 | import numpy as np
  5 | from lib.core.config import cfg
  6 | import cv2
  7 | import json
  8 | from lib.utils.logging import setup_logging
  9 | logger = setup_logging(__name__)
 10 | 
 11 | class KITTIDataset():
 12 |     def initialize(self, opt):
 13 |         self.opt = opt
 14 |         self.root = opt.dataroot
 15 |         self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json')
 16 |         self.A_paths, self.B_paths = self.getData()
 17 |         self.data_size = len(self.A_paths)
 18 |         self.depth_normalize = 255. * 80.
 19 |         self.uniform_size = (385, 1243)
 20 | 
 21 |     def getData(self):
 22 |         with open(self.dir_anno, 'r') as load_f:
 23 |             AB_anno = json.load(load_f)
 24 |         A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))]
 25 |         B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))]
 26 |         logger.info('Loaded Kitti data!')
 27 |         return A_list, B_list
 28 | 
 29 |     def __getitem__(self, anno_index):
 30 |         if 'train' in self.opt.phase:
 31 |             try:
 32 |                 data = self.online_aug_train(anno_index)
 33 |             except:
 34 |                 print('Error: {}'.format(self.B_paths[anno_index]))
 35 |                 data = self.online_aug_train(anno_index - 1)
 36 |         else:
 37 |             data = self.online_aug_val_test(anno_index)
 38 |         return data
 39 | 
 40 |     def online_aug_train(self, idx):
 41 |         A_path = self.A_paths[idx]
 42 |         B_path = self.B_paths[idx]
 43 | 
 44 |         A = cv2.imread(A_path, -1)  # [H, W, C] C:bgr
 45 |         B = cv2.imread(B_path, -1) / self.depth_normalize  #[0.0, 1.0]
 46 | 
 47 |         flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A)
 48 | 
 49 |         A_crop = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size, pad, 128)
 50 |         B_crop = self.flip_pad_reshape_crop(B, flip_flg, resize_size, crop_size, pad, -1)
 51 | 
 52 |         A_crop = A_crop.transpose((2, 0, 1))
 53 |         B_crop = B_crop[np.newaxis, :, :]
 54 | 
 55 |         # change the color channel, bgr->rgb
 56 |         A_crop = A_crop[::-1, :, :]
 57 | 
 58 |         # to torch, normalize
 59 |         A_crop = self.scale_torch(A_crop, 255.)
 60 |         B_crop = self.scale_torch(B_crop, resize_ratio)
 61 | 
 62 |         B_classes = self.depth_to_class(B_crop)
 63 | 
 64 |         invalid_side = [0, 0, 0, 0] if crop_size[1] != 0 else [int((pad[0] + 50)*resize_ratio), 0, 0, 0]
 65 | 
 66 |         A = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0))
 67 |         B = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0))
 68 | 
 69 |         data = {'A': A_crop, 'B': B_crop, 'A_raw': A, 'B_raw': B, 'B_classes': B_classes, 'A_paths': A_path,
 70 |                     'B_paths': B_path, 'invalid_side': np.array(invalid_side), 'pad_raw': np.array(pad)}
 71 |         return data
 72 | 
 73 |     def online_aug_val_test(self, idx):
 74 |         A_path = self.A_paths[idx]
 75 |         B_path = self.B_paths[idx]
 76 | 
 77 |         A = cv2.imread(A_path, -1)  # [H, W, C] C:bgr
 78 | 
 79 |         B = cv2.imread(B_path, -1) / self.depth_normalize  # [0.0, 1.0]
 80 | 
 81 |         flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A)
 82 | 
 83 |         crop_size_l = [pad[2], 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]]
 84 |         crop_size_m = [cfg.CROP_SIZE[1] + pad[2] - 20, 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]]
 85 |         crop_size_r = [self.uniform_size[1] - cfg.CROP_SIZE[1], 0, cfg.CROP_SIZE[1], cfg.CROP_SIZE[0]]
 86 | 
 87 |         A_crop_l = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_l, pad, 128)
 88 |         A_crop_l = A_crop_l.transpose((2, 0, 1))
 89 |         A_crop_l = A_crop_l[::-1, :, :]
 90 | 
 91 |         A_crop_m = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_m, pad, 128)
 92 |         A_crop_m = A_crop_m.transpose((2, 0, 1))
 93 |         A_crop_m = A_crop_m[::-1, :, :]
 94 | 
 95 |         A_crop_r = self.flip_pad_reshape_crop(A, flip_flg, resize_size, crop_size_r, pad, 128)
 96 |         A_crop_r = A_crop_r.transpose((2, 0, 1))
 97 |         A_crop_r = A_crop_r[::-1, :, :]
 98 | 
 99 |         A_crop_l = self.scale_torch(A_crop_l, 255.)
100 |         A_crop_m = self.scale_torch(A_crop_m, 255.)
101 |         A_crop_r = self.scale_torch(A_crop_r, 255.)
102 |         A = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0))
103 |         B = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0))
104 |         crop_lmr = np.array((crop_size_l, crop_size_m, crop_size_r))
105 | 
106 |         A_crop = A.transpose((2, 0, 1))
107 |         B_crop = B[np.newaxis, :, :]
108 |         # change the color channel, bgr->rgb
109 |         A_crop = A_crop[::-1, :, :]
110 |         # to torch, normalize
111 |         A_crop = self.scale_torch(A_crop, 255.)
112 |         B_crop = self.scale_torch(B_crop, 1.0)
113 | 
114 |         data = {'A': A_crop, 'B': B_crop,'A_l': A_crop_l, 'A_m': A_crop_m, 'A_r': A_crop_r,
115 |                 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad), 'crop_lmr': crop_lmr}
116 |         return data
117 | 
118 |     def set_flip_pad_reshape_crop(self, A):
119 |         """
120 |         Set flip, padding, reshaping and cropping flags.
121 |         :param A: Input image, [H, W, C]
122 |         :return: Data augamentation parameters
123 |         """
124 |         # flip
125 |         flip_prob = np.random.uniform(0.0, 1.0)
126 |         flip_flg = True if flip_prob > 0.5 and 'train' in self.opt.phase else False
127 | 
128 |         # pad
129 |         pad_height = self.uniform_size[0] - A.shape[0]
130 |         pad_width = self.uniform_size[1] - A.shape[1]
131 |         pad = [pad_height, 0, pad_width, 0] #[up, down, left, right]
132 | 
133 |         # reshape
134 |         ratio_list = [1.0, 1.2, 1.5, 1.8, 2.0]#
135 |         resize_ratio = ratio_list[np.random.randint(len(ratio_list))] if 'train' in self.opt.phase else 1.0
136 |         resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5),
137 |                        int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)]
138 | 
139 |         # crop
140 |         start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\
141 |             else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio)
142 |         start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio)
143 |         crop_height = cfg.CROP_SIZE[0]
144 |         crop_width = cfg.CROP_SIZE[1]
145 |         crop_size = [start_x, start_y, crop_width, crop_height]
146 |         return flip_flg, resize_size, crop_size, pad, resize_ratio
147 | 
148 |     def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0):
149 |         """
150 |         Preprocessing input image or ground truth depth.
151 |         :param img: RGB image or depth image
152 |         :param flip: Flipping flag, True or False
153 |         :param resize_size: Resizing size
154 |         :param crop_size: Cropping size
155 |         :param pad: Padding region
156 |         :param pad_value: Padding value
157 |         :return: Processed image
158 |         """
159 |         if len(img.shape) == 1:
160 |             return img
161 |         # Flip
162 |         if flip:
163 |             img = np.flip(img, axis=1)
164 | 
165 |         # Pad the raw image
166 |         if len(img.shape) == 3:
167 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant',
168 |                        constant_values=(pad_value, pad_value))
169 |         else:
170 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant',
171 |                        constant_values=(pad_value, pad_value))
172 |         # Resize the raw image
173 |         img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR)
174 |         # Crop the resized image
175 |         img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]]
176 | 
177 |         return img_crop
178 | 
179 |     def depth_to_class(self, depth):
180 |         """
181 |         Discretize depth into depth bins
182 |         Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1
183 |         :param depth: 1-channel depth, [1, h, w]
184 |         :return: depth bins [1, h, w]
185 |         """
186 |         invalid_mask = depth < 0.
187 |         depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN
188 |         depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX
189 |         classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int)
190 |         classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1
191 |         classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1
192 |         return classes
193 | 
194 |     def scale_torch(self, img, scale):
195 |         # scale image
196 |         img = img.astype(np.float32)
197 |         img /= scale
198 |         img = torch.from_numpy(img.copy())
199 |         if img.size(0) == 3:
200 |             img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img)
201 |         else:
202 |             img = transforms.Normalize((0,), (1,))(img)
203 |         return img
204 | 
205 | 
206 |     def __len__(self):
207 |         return self.data_size
208 | 
209 |     def name(self):
210 |         return 'KITTI'
211 | 


--------------------------------------------------------------------------------
/data/kitti_gt_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import sys
  4 | import torchvision.transforms as transforms
  5 | import torch
  6 | import numpy as np
  7 | import cv2
  8 | import json
  9 | 
 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 11 | ROOT_DIR = os.path.dirname(BASE_DIR)
 12 | sys.path.append(ROOT_DIR)
 13 | 
 14 | from lib.core.config import cfg
 15 | from lib.utils.logging import setup_logging
 16 | from lib.utils.obj_utils import read_labels, rois2mask, rois2mask_shrink, rois2boxlist
 17 | logger = setup_logging(__name__)
 18 | 
 19 | from IPython import embed
 20 | 
 21 | class KITTIGtDataset():
 22 |     def initialize(self, opt):
 23 |         self.opt = opt
 24 |         self.root = opt.dataroot
 25 |         self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json')
 26 |         self.A_paths, self.B_paths, self.AB_anno, self.rois_paths = self.getData()
 27 |         self.data_size = len(self.AB_anno)
 28 |         self.depth_normalize = 255. * 80.
 29 |         self.ignore_cate_list = ['Person_sitting', 'Misc', 'DontCare']
 30 |         self.uniform_size = (385, 1243)
 31 | 
 32 |     def getData(self):
 33 |         with open(self.dir_anno, 'r') as load_f:
 34 |             AB_anno = json.load(load_f)
 35 |         A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))]
 36 |         B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))]
 37 |         rois_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rois_path']) for i in range(len(AB_anno))]
 38 |         logger.info('Loaded Kitti data!')
 39 |         return A_list, B_list, AB_anno, rois_list
 40 | 
 41 |     def __getitem__(self, anno_index):
 42 | 
 43 |         data = self.online_aug_val_test(anno_index)
 44 |         return data
 45 | 
 46 |     def online_aug_val_test(self, idx):
 47 |         A_path = self.A_paths[idx]
 48 |         B_path = self.B_paths[idx]
 49 |         rois_path = self.rois_paths[idx]
 50 | 
 51 |         A = cv2.imread(A_path, -1)  # [H, W, C] C:bgr
 52 | 
 53 |         #B = np.zeros((A.shape[0], A.shape[1]), dtype=np.float32 )
 54 |         B = cv2.imread(B_path, -1) / self.depth_normalize
 55 | 
 56 |         rois = read_labels(rois_path, ignore_cate=self.ignore_cate_list) # list of instances of class ObjectLabel, see obj_utils.py
 57 |         raw_boxlist = rois2boxlist(rois, (A.shape[1], A.shape[0]))
 58 | 
 59 |         flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A)
 60 | 
 61 |         A_crop = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0))
 62 |         B_crop = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0))
 63 | 
 64 |         raw_boxlist.bbox[:, 0::2] += pad[2]
 65 |         raw_boxlist.bbox[:, 1::2] += pad[0]
 66 |         boxes = raw_boxlist.bbox        
 67 | 
 68 |         A_crop = A_crop.transpose((2, 0, 1))
 69 |         B_crop = B_crop[np.newaxis, :, :]
 70 |         # change the color channel, bgr->rgb
 71 |         A_crop = A_crop[::-1, :, :]
 72 |         # to torch, normalize
 73 |         A_crop = self.scale_torch(A_crop, 255.)
 74 |         B_crop = self.scale_torch(B_crop, 1.0)
 75 | 
 76 |         data = {'A': A_crop, 'B': B_crop, 'bbox': boxes,
 77 |                 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad)}
 78 | 
 79 |         return data
 80 | 
 81 |     def set_flip_pad_reshape_crop(self, A):
 82 |         flip_flg = False
 83 | 
 84 |         # pad
 85 |         pad_height = self.uniform_size[0] - A.shape[0]
 86 |         pad_width = self.uniform_size[1] - A.shape[1]
 87 |         pad = [pad_height, 0, pad_width, 0] # [up, down, left, right]
 88 | 
 89 |         # reshape
 90 |         resize_ratio = 1.0
 91 |         resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5),
 92 |                        int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)]
 93 | 
 94 |         # crop
 95 |         start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\
 96 |             else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio)
 97 |         start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio)
 98 |         crop_height = cfg.CROP_SIZE[0]
 99 |         crop_width = cfg.CROP_SIZE[1]
100 |         crop_size = [start_x, start_y, crop_width, crop_height]
101 |         return flip_flg, resize_size, crop_size, pad, resize_ratio
102 | 
103 |     def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0):
104 |         if len(img.shape) == 1:
105 |             return img
106 |         # Flip
107 |         if flip:
108 |             img = np.flip(img, axis=1)
109 | 
110 |         # Pad the raw image
111 |         if len(img.shape) == 3:
112 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant',
113 |                        constant_values=(pad_value, pad_value))
114 |         else:
115 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant',
116 |                              constant_values=(pad_value, pad_value))
117 |         # Resize the raw image
118 |         img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR)
119 |         # Crop the resized image
120 |         img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]]
121 |         return img_crop
122 | 
123 |     def depth_to_class(self, depth):
124 |         """
125 |         Transfer 1-channel depth to 1-channel depth in n depth ranges
126 |         Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1
127 |         :param depth: 1-channel depth, [1, h, w]
128 |         :return: classes [1, h, w]
129 |         """
130 |         invalid_mask = depth < 0.
131 |         depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN
132 |         depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX
133 |         classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int)
134 |         classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1
135 |         classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1
136 |         return classes
137 | 
138 |     def scale_torch(self, img, scale):
139 |         # scale image
140 |         img = img.astype(np.float32)
141 |         img /= scale
142 |         img = torch.from_numpy(img.copy())
143 |         if img.size(0) == 3:
144 |             img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img)
145 |         else:
146 |             img = transforms.Normalize((0,), (1,))(img)
147 |         return img
148 | 
149 | 
150 |     def __len__(self):
151 |         return self.data_size
152 | 
153 |     def name(self):
154 |         return 'NYUDepthV2Dataset'
155 | 
156 | if __name__ == "__main__":
157 |     class test_opt:
158 |         def __init__(self):
159 |             self.phase = "tongji"
160 |             self.dataroot = "../datasets/KITTI_object"
161 |         
162 |     opt = test_opt()
163 |     dataset = KITTIPredictionDataset()
164 |     dataset.initialize(opt) 
165 |     
166 |     #idx = 0
167 |     #data = dataset.__getitem__(idx)
168 | 
169 |     #embed()
170 | 
171 |     # tongji
172 |     num_fg = 0
173 |     num_bg = 0
174 |     num_all = 0
175 |     num_iter =  len(dataset)
176 |     #num_iter =  20
177 | 
178 |     fg_list = []
179 |     bg_list = []
180 | 
181 |     fg_hist_cnt = 0
182 |     bg_hist_cnt = 0
183 | 
184 |     def cal_grad(B):
185 |         B = B[0,...]
186 |         H, W = B.shape
187 |         #for i in range(H):
188 |         #    for j in range(W):
189 | 
190 |         B_cv2 = cv2.fromarray(B)
191 |         B_lap = cv2.Laplacian(img,cv2.CV_64F)    
192 | 
193 |         return np.array(B_lap)
194 | 
195 |     for i in range(num_iter):
196 |         print(i)
197 |         #idx = np.random.randint(0, len(dataset)) 
198 |         data = dataset.__getitem__(i)
199 |         
200 |         bbox = data['bbox']
201 |         B = data['B'] * 80
202 |         
203 |         num_box = bbox.shape[0]
204 |         rois_mask = np.zeros_like(B)
205 |         for j in range(num_box):
206 |             
207 |             box = bbox[j]
208 |             x1, y1, x2, y2 = map(int, box)
209 |             rois_mask[0, y1:y2, x1:x2] = 1
210 |         rois_mask = torch.from_numpy(rois_mask.astype(np.uint8))
211 |        
212 |         mask_0 = B != 0 
213 |         cur_fg_list = torch.masked_select(B, mask_0 & rois_mask) 
214 |         cur_bg_list = torch.masked_select(B, mask_0 & (1 - rois_mask))
215 | 
216 |         cur_fg_list = list(cur_fg_list.numpy())
217 |         cur_bg_list = list(cur_bg_list.numpy())
218 | 
219 |         cur_fg_hist_cnt, bins = np.histogram(cur_fg_list, bins=10, range=(0,80))
220 |         cur_bg_hist_cnt, bins = np.histogram(cur_bg_list, bins=10, range=(0,80))
221 | 
222 |         fg_hist_cnt += cur_fg_hist_cnt
223 |         bg_hist_cnt += cur_bg_hist_cnt
224 | 
225 |         #fg_list.extend(list(cur_fg_list.numpy()))
226 |         #bg_list.extend(list(cur_bg_list.numpy()))
227 | 
228 |     print(fg_hist_cnt)
229 |     print(bg_hist_cnt)
230 | 
231 |     import matplotlib
232 |     import matplotlib.pyplot as plt
233 |     import seaborn as sns
234 |     sns.set(color_codes=True)
235 |     
236 |     center = (bins[:-1] + bins[1:]) / 2
237 |     width = 0.3 * (bins[1] - bins[0])
238 | 
239 |     fg_hist_frq = 1. * fg_hist_cnt / fg_hist_cnt.sum()
240 |     bg_hist_frq = 1. * bg_hist_cnt / bg_hist_cnt.sum()
241 | 
242 |     print("fg_hist_frq: {}".format(fg_hist_frq))
243 |     print("bg_hist_frq: {}".format(bg_hist_frq))
244 | 
245 |     labels = [str(a) for a in range(8, 88, 8)]
246 |     x = np.arange(len(labels))
247 |     width = 0.35
248 | 
249 |     fig, ax = plt.subplots()
250 |     rect1 = ax.bar(x-width/2, fg_hist_frq, color='salmon', width=width, label="Foreground")
251 |     rect2 = ax.bar(x+width/2, bg_hist_frq, color="darkseagreen", width=width, label="Background")
252 | 
253 |     ax.set_xticks(x)
254 |     ax.set_xticklabels(labels)
255 |     ax.legend()
256 | 
257 |     fig.tight_layout()
258 |     plt.show()
259 | 
260 |     embed()    
261 | 
262 |     """
263 |     plt.subplot(1, 2, 1)
264 |     plt.bar(center, fg_hist_frq, align='center', color='salmon')
265 |     plt.xlim((0,80))
266 |     plt.ylim((0,0.2))
267 | 
268 |     plt.subplot(1, 2, 2)
269 |     plt.bar(center, bg_hist_frq, align='center', color='salmon')
270 |     plt.xlim((0,80))
271 |     plt.ylim((0,0.2))
272 | 
273 |     plt.show()
274 |     
275 |     embed()
276 |     """
277 | 


--------------------------------------------------------------------------------
/data/kitti_prediction_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import torchvision.transforms as transforms
  3 | import torch
  4 | import numpy as np
  5 | from lib.core.config import cfg
  6 | import cv2
  7 | import json
  8 | from lib.utils.logging import setup_logging
  9 | from lib.utils.obj_utils import read_labels, rois2mask, rois2mask_shrink, rois2boxlist
 10 | logger = setup_logging(__name__)
 11 | 
 12 | 
 13 | class KITTIPredictionDataset():
 14 |     def initialize(self, opt):
 15 |         self.opt = opt
 16 |         self.root = opt.dataroot
 17 |         self.dir_anno = os.path.join(opt.dataroot, 'annotations', opt.phase + '_annotations.json')
 18 |         self.A_paths, self.B_paths, self.AB_anno, self.rois_paths = self.getData()
 19 |         self.data_size = len(self.AB_anno)
 20 |         self.depth_normalize = 255. * 80.
 21 |         self.ignore_cate_list = ['Person_sitting', 'Misc', 'DontCare']
 22 |         self.uniform_size = (385, 1243)
 23 | 
 24 |     def getData(self):
 25 |         with open(self.dir_anno, 'r') as load_f:
 26 |             AB_anno = json.load(load_f)
 27 |         A_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rgb_path']) for i in range(len(AB_anno))]
 28 |         B_list = [os.path.join(self.opt.dataroot, AB_anno[i]['depth_path']) for i in range(len(AB_anno))]
 29 |         rois_list = [os.path.join(self.opt.dataroot, AB_anno[i]['rois_path']) for i in range(len(AB_anno))]
 30 |         logger.info('Loaded Kitti data!')
 31 |         return A_list, B_list, AB_anno, rois_list
 32 | 
 33 |     def __getitem__(self, anno_index):
 34 | 
 35 |         data = self.online_aug_val_test(anno_index)
 36 |         return data
 37 | 
 38 |     def online_aug_val_test(self, idx):
 39 |         A_path = self.A_paths[idx]
 40 |         B_path = self.B_paths[idx]
 41 |         rois_path = self.rois_paths[idx]
 42 | 
 43 |         A = cv2.imread(A_path, -1)  # [H, W, C] C:bgr
 44 | 
 45 |         B = np.zeros((A.shape[0], A.shape[1]), dtype=np.float32 )
 46 | 
 47 |         rois = read_labels(rois_path, ignore_cate=self.ignore_cate_list) # list of instances of class ObjectLabel, see obj_utils.py
 48 |         raw_boxlist = rois2boxlist(rois, (A.shape[1], A.shape[0]))
 49 | 
 50 |         flip_flg, resize_size, crop_size, pad, resize_ratio = self.set_flip_pad_reshape_crop(A)
 51 | 
 52 |         A_crop = np.pad(A, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant', constant_values=(0, 0))
 53 |         B_crop = np.pad(B, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant', constant_values=(0, 0))
 54 | 
 55 |         raw_boxlist.bbox[:, 0::2] += pad[2]
 56 |         raw_boxlist.bbox[:, 1::2] += pad[0]
 57 |         boxes = raw_boxlist.bbox        
 58 | 
 59 |         A_crop = A_crop.transpose((2, 0, 1))
 60 |         B_crop = B_crop[np.newaxis, :, :]
 61 |         # change the color channel, bgr->rgb
 62 |         A_crop = A_crop[::-1, :, :]
 63 |         # to torch, normalize
 64 |         A_crop = self.scale_torch(A_crop, 255.)
 65 |         B_crop = self.scale_torch(B_crop, 1.0)
 66 | 
 67 |         data = {'A': A_crop, 'B': B_crop, 'bbox': boxes,
 68 |                 'A_raw': A, 'B_raw': B, 'A_paths': A_path, 'B_paths': B_path, 'pad_raw': np.array(pad)}
 69 |         return data
 70 | 
 71 |     def set_flip_pad_reshape_crop(self, A):
 72 |         flip_flg = False
 73 | 
 74 |         # pad
 75 |         pad_height = self.uniform_size[0] - A.shape[0]
 76 |         pad_width = self.uniform_size[1] - A.shape[1]
 77 |         pad = [pad_height, 0, pad_width, 0] # [up, down, left, right]
 78 | 
 79 |         # reshape
 80 |         resize_ratio = 1.0
 81 |         resize_size = [int((A.shape[0]+pad[0]+pad[1]) * resize_ratio + 0.5),
 82 |                        int((A.shape[1]+pad[2]+pad[3]) * resize_ratio + 0.5)]
 83 | 
 84 |         # crop
 85 |         start_y = 0 if resize_size[0] < (50 + pad[0] + pad[1]) * resize_ratio + cfg.CROP_SIZE[0]\
 86 |             else np.random.randint(int((50 + pad[0]) * resize_ratio), resize_size[0] - cfg.CROP_SIZE[0] - pad[1] * resize_ratio)
 87 |         start_x = np.random.randint(pad[2] * resize_ratio, resize_size[1] - cfg.CROP_SIZE[1] - pad[3] * resize_ratio)
 88 |         crop_height = cfg.CROP_SIZE[0]
 89 |         crop_width = cfg.CROP_SIZE[1]
 90 |         crop_size = [start_x, start_y, crop_width, crop_height]
 91 |         return flip_flg, resize_size, crop_size, pad, resize_ratio
 92 | 
 93 |     def flip_pad_reshape_crop(self, img, flip, resize_size, crop_size, pad, pad_value=0):
 94 |         if len(img.shape) == 1:
 95 |             return img
 96 |         # Flip
 97 |         if flip:
 98 |             img = np.flip(img, axis=1)
 99 | 
100 |         # Pad the raw image
101 |         if len(img.shape) == 3:
102 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0)), 'constant',
103 |                        constant_values=(pad_value, pad_value))
104 |         else:
105 |             img_pad = np.pad(img, ((pad[0], pad[1]), (pad[2], pad[3])), 'constant',
106 |                              constant_values=(pad_value, pad_value))
107 |         # Resize the raw image
108 |         img_resize = cv2.resize(img_pad, (resize_size[1], resize_size[0]), interpolation=cv2.INTER_LINEAR)
109 |         # Crop the resized image
110 |         img_crop = img_resize[crop_size[1]:crop_size[1] + crop_size[3], crop_size[0]:crop_size[0] + crop_size[2]]
111 |         return img_crop
112 | 
113 |     def depth_to_class(self, depth):
114 |         """
115 |         Transfer 1-channel depth to 1-channel depth in n depth ranges
116 |         Mark invalid padding area as cfg.MODEL.DECODER_OUTPUT_C + 1
117 |         :param depth: 1-channel depth, [1, h, w]
118 |         :return: classes [1, h, w]
119 |         """
120 |         invalid_mask = depth < 0.
121 |         depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN
122 |         depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX
123 |         classes = ((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL).to(torch.int)
124 |         classes[invalid_mask] = cfg.MODEL.DECODER_OUTPUT_C + 1
125 |         classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1
126 |         return classes
127 | 
128 |     def scale_torch(self, img, scale):
129 |         # scale image
130 |         img = img.astype(np.float32)
131 |         img /= scale
132 |         img = torch.from_numpy(img.copy())
133 |         if img.size(0) == 3:
134 |             img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img)
135 |         else:
136 |             img = transforms.Normalize((0,), (1,))(img)
137 |         return img
138 | 
139 | 
140 |     def __len__(self):
141 |         return self.data_size
142 | 
143 |     def name(self):
144 |         return 'NYUDepthV2Dataset'
145 | 


--------------------------------------------------------------------------------
/data/load_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | import importlib
 3 | from lib.utils.logging import setup_logging
 4 | logger = setup_logging(__name__)
 5 | 
 6 | class CustomerDataLoader():
 7 |     def __init__(self, opt):
 8 |         self.opt = opt
 9 |         self.dataset = create_dataset(opt)
10 |         self.dataloader = torch.utils.data.DataLoader(
11 |             self.dataset,
12 |             batch_size=opt.batchsize,
13 |             shuffle= True if 'train' in opt.phase else False,
14 |             num_workers=opt.thread)
15 | 
16 |     def load_data(self):
17 |         return self
18 | 
19 |     def __len__(self):
20 |         return len(self.dataset)
21 | 
22 |     def __iter__(self):
23 |         for i, data in enumerate(self.dataloader):
24 |             if i * self.opt.batchsize >= float("inf"):
25 |                 break
26 |             yield data
27 | 
28 | def create_dataset(opt):
29 |     dataset = find_dataset_lib(opt.dataset)()
30 |     dataset.initialize(opt)
31 |     logger.info("%s is created." % opt.dataset)
32 |     return dataset
33 | 
34 | 
35 | def find_dataset_lib(dataset_name):
36 |     """
37 |     Give the option --dataset [datasetname], import "data/datasetname_dataset.py"
38 |     :param dataset_name: --dataset
39 |     :return: "data/datasetname_dataset.py"
40 |     """
41 |     dataset_filename = "data." + dataset_name + "_dataset"
42 |     datasetlib = importlib.import_module(dataset_filename)
43 | 
44 |     dataset = None
45 |     target_dataset_name = dataset_name.replace('_', '') + 'dataset'
46 |     for name, cls in datasetlib.__dict__.items():
47 |         if name.lower() == target_dataset_name.lower():
48 |             dataset = cls
49 |     if dataset is None:
50 |         logger.info("In %s.py, there should be a class name that matches %s in lowercase." % (
51 |         dataset_filename, target_dataset_name))
52 |         exit(0)
53 |     return dataset


--------------------------------------------------------------------------------
/datasets/KITTI_object/annotations/data_object_training_annotations.json:
--------------------------------------------------------------------------------
1 | [{"rgb_path": "training/image_2/000048.png", "depth_path": "training/image_2/000048.png"}]
2 | 


--------------------------------------------------------------------------------
/datasets/KITTI_object/training/depth:
--------------------------------------------------------------------------------
1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/Kitti/object/training/depth


--------------------------------------------------------------------------------
/datasets/KITTI_object/training/image_2:
--------------------------------------------------------------------------------
1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/data_object_image_2/training/image_2


--------------------------------------------------------------------------------
/datasets/KITTI_object/training/label_2:
--------------------------------------------------------------------------------
1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/data_object_label_2/training/label_2


--------------------------------------------------------------------------------
/datasets/KITTI_object/training/planes:
--------------------------------------------------------------------------------
1 | /mnt/cephfs/common/lab/wangxinlong/data/KITTI/planes


--------------------------------------------------------------------------------
/experiments/foresee/depth_normal_model.py:
--------------------------------------------------------------------------------
  1 | import lateral_net
  2 | from lib.utils.net_tools import *
  3 | from lib.models.image_transfer import *
  4 | from loss import weight_crossentropy_loss, rois_weight_crossentropy_loss 
  5 | from lib.core.config import cfg
  6 | 
  7 | 
  8 | 
  9 | class DepthNormal(nn.Module):
 10 |     def __init__(self):
 11 |         super(DepthNormal, self).__init__()
 12 |         self.loss_names = ['Weighted_Cross_Entropy', 'Global_Normal']
 13 |         self.depth_normal_model = DepthModel()
 14 | 
 15 |     def forward(self, data):
 16 |         # Input data is a_real, predicted data is b_fake, groundtruth is b_real
 17 |         self.a_real = data['A'].cuda()
 18 |         self.boxes = data['bbox'].to(device=self.a_real.device)
 19 |         self.b_fake, self.b_roi_fake  = self.depth_normal_model(self.a_real, self.boxes)
 20 |         return {'b_fake': self.b_fake[1], 'b_fake_nosoftmax': self.b_fake[0], 'b_fake_roi': self.b_roi_fake[1], 'b_fake_roi_nosoftmax': self.b_roi_fake[0]}
 21 | 
 22 |     def inference(self, data):
 23 |         with torch.no_grad():
 24 |             out = self.forward(data)
 25 |         
 26 |             class_conf_final = fg_bg_maxpooling(out['b_fake_nosoftmax'], out['b_fake_roi_nosoftmax'])
 27 |             out_depth_final = class_depth(class_conf_final)
 28 | 
 29 |             class_conf = out['b_fake']
 30 |             return {'b_fake': out_depth_final, 'b_fake_conf': class_conf}
 31 | 
 32 |     def inference_kitti(self, data):
 33 |         #crop kitti images into 3 parts
 34 |         with torch.no_grad():
 35 |             self.a_l_real = data['A_l'].cuda()
 36 |             self.boxes_l = data['bbox_l'].to(device=self.a_l_real.device)
 37 |             [b_l_classes_nosoftmax, b_l_classes], [b_l_roi_classes_nosoftmax, b_l_roi_classes] = self.depth_normal_model(self.a_l_real, self.boxes_l)
 38 |             b_l_classes_final = fg_bg_maxpooling(b_l_classes_nosoftmax, b_l_roi_classes_nosoftmax)
 39 |             self.b_l_fake_final = class_depth(b_l_classes_final)
 40 | 
 41 |             self.a_m_real = data['A_m'].cuda()
 42 |             self.boxes_m = data['bbox_m'].to(device=self.a_m_real.device)
 43 |             [b_m_classes_nosoftmax, b_m_classes], [b_m_roi_classes_nosoftmax, b_m_roi_classes] = self.depth_normal_model(self.a_m_real, self.boxes_m)
 44 |             b_m_classes_final = fg_bg_maxpooling(b_m_classes_nosoftmax, b_m_roi_classes_nosoftmax)
 45 |             self.b_m_fake_final = class_depth(b_m_classes_final)
 46 | 
 47 |             self.a_r_real = data['A_r'].cuda()
 48 |             self.boxes_r = data['bbox_r'].to(device=self.a_r_real.device)
 49 |             [b_r_classes_nosoftmax, b_r_classes], [b_r_roi_classes_nosoftmax, b_r_roi_classes] = self.depth_normal_model(self.a_r_real, self.boxes_r)
 50 |             b_r_classes_final = fg_bg_maxpooling(b_r_classes_nosoftmax, b_r_roi_classes_nosoftmax)
 51 |             self.b_r_fake_final = class_depth(b_r_classes_final)
 52 | 
 53 |             out = kitti_merge_imgs(self.b_l_fake_final, self.b_m_fake_final, self.b_r_fake_final, torch.squeeze(data['B_raw']).shape, data['crop_lmr'])
 54 |             return {'b_fake': out}
 55 | 
 56 | 
 57 | class ModelLoss(object):
 58 |     def __init__(self):
 59 |         super(ModelLoss, self).__init__()
 60 |         self.weight_cross_entropy_loss =weight_crossentropy_loss
 61 |         self.rois_weight_cross_entropy_loss =rois_weight_crossentropy_loss
 62 | 
 63 | 
 64 |     def criterion(self, pred_softmax, pred_nosoftmax, pred_softmax_roi, pred_nosoftmax_roi, data, epoch):
 65 |         loss = {}
 66 |         # transfer output and gt
 67 |         pred_depth = class_depth(pred_softmax)
 68 | 
 69 |         #alpha = 0.99
 70 |         add_alpha = 0.2
 71 |         add_beta = 0.2
 72 | 
 73 |         # bg
 74 |         loss_entropy, valid_num = self.weight_cross_entropy_loss(pred_nosoftmax, data['B_classes'], data)
 75 |         loss_entropy_rois, valid_num_roi = self.rois_weight_cross_entropy_loss(pred_nosoftmax, data['B_rois_classes'], data)
 76 | 
 77 |         loss['bg_wcel_loss_fg'] = loss_entropy_rois / valid_num_roi
 78 |         loss['bg_wcel_loss_bg'] = (loss_entropy - loss_entropy_rois) / (valid_num - valid_num_roi)
 79 |         loss['bg_wcel_loss'] = (1 - add_beta) * loss['bg_wcel_loss_bg'] +  add_beta * loss['bg_wcel_loss_fg']
 80 | 
 81 |         # fg
 82 |         fg_loss_entropy, fg_valid_num = self.weight_cross_entropy_loss(pred_nosoftmax_roi, data['B_classes'], data)
 83 |         fg_loss_entropy_rois, fg_valid_num_roi = self.rois_weight_cross_entropy_loss(pred_nosoftmax_roi, data['B_rois_classes'], data)
 84 | 
 85 |         loss['fg_wcel_loss_bg'] = (fg_loss_entropy - fg_loss_entropy_rois) / (fg_valid_num - fg_valid_num_roi)
 86 |         loss['fg_wcel_loss_fg'] = fg_loss_entropy_rois / fg_valid_num_roi
 87 |         loss['fg_wcel_loss'] = (1 - add_alpha) * loss['fg_wcel_loss_fg'] + add_alpha * loss['fg_wcel_loss_bg']
 88 | 
 89 |         loss['total_loss'] = loss['bg_wcel_loss'] + loss['fg_wcel_loss']
 90 |         return loss
 91 | 
 92 | 
 93 | class ModelOptimizer(object):
 94 |     def __init__(self, model):
 95 |         super(ModelOptimizer, self).__init__()
 96 |         backbone_params = []
 97 |         backbone_params_names = []
 98 |         nonbackbone_others_params = []
 99 |         nonbackbone_others_params_names = []
100 |         nograd_param_names = []
101 | 
102 |         for key, value in dict(model.named_parameters()).items():
103 |             if value.requires_grad:
104 |                 if 'res' in key:
105 |                     backbone_params.append(value)
106 |                     backbone_params_names.append(key)
107 |                 else:
108 |                     nonbackbone_others_params.append(value)
109 |                     nonbackbone_others_params_names.append(key)
110 |             else:
111 |                 nograd_param_names.append(key)
112 | 
113 |         lr_resnet = cfg.TRAIN.BASE_LR
114 |         lr_fcn = cfg.TRAIN.BASE_LR * cfg.TRAIN.DIFF_LR
115 |         weight_decay = 0.0005
116 | 
117 |         net_params = [
118 |             {'params': backbone_params,
119 |              'lr': lr_resnet,
120 |              'weight_decay': weight_decay},
121 |             {'params': nonbackbone_others_params,
122 |              'lr': lr_fcn,
123 |              'weight_decay': weight_decay},
124 |             ]
125 |         self.optimizer = torch.optim.SGD(net_params, momentum=0.9)
126 |     def optim(self, loss):
127 |         self.optimizer.zero_grad()
128 |         loss_all = loss['total_loss']
129 |         loss_all.backward()
130 |         self.optimizer.step()
131 | 
132 | 
133 | class DepthModel(nn.Module):
134 |     def __init__(self):
135 |         super(DepthModel, self).__init__()
136 |         bottom_up_model = 'lateral_net.lateral_' + cfg.MODEL.ENCODER
137 |         self.lateral_modules = get_func(bottom_up_model)()
138 |         self.topdown_modules = lateral_net.fcn_topdown(cfg.MODEL.ENCODER)
139 | 
140 |     def forward(self, x, boxlist):
141 |         lateral_out, backbone_stage_size = self.lateral_modules(x)
142 |         # out: [nosoftmax, softmax]
143 |         out, out_roi = self.topdown_modules(lateral_out, backbone_stage_size, boxlist)
144 |         return out, out_roi
145 | 
146 | def cal_params(model):
147 |     model_dict = model.state_dict()
148 |     paras = np.sum(p.numel() for p in model.parameters() if p.requires_grad)
149 |     sum = 0
150 | 
151 |     for key in model_dict.keys():
152 |         print(key)
153 |         if 'layer5' not in key:
154 |             if 'running' not in key:
155 |                 print(key)
156 |                 ss = model_dict[key].size()
157 |                 temp = 1
158 |                 for s in ss:
159 |                     temp = temp * s
160 |                 print(temp)
161 |                 sum = sum + temp
162 |     print(sum)
163 |     print(paras)
164 | 


--------------------------------------------------------------------------------
/experiments/foresee/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from lib.core.config import cfg
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | def cross_entropy_loss(pred_nosoftmax, gt_class):
  8 |     """
  9 |     Standard cross-entropy loss
 10 |     :param pred_nosoftmax: predicted label
 11 |     :param gt_class: target label
 12 |     :return:
 13 |     """
 14 |     gt_class = torch.squeeze(gt_class)
 15 |     gt_class = gt_class.to(device=pred_nosoftmax.device, dtype=torch.int64)
 16 |     entropy = torch.nn.CrossEntropyLoss(ignore_index=cfg.MODEL.DECODER_OUTPUT_C+1)
 17 |     loss = entropy(pred_nosoftmax, gt_class)
 18 |     return loss
 19 | 
 20 | 
 21 | def weight_crossentropy_loss(pred_nosoftmax, gt, data):
 22 |     """
 23 |     Weighted Cross-entropy Loss
 24 |     :param pred_nosoftmax: predicted label
 25 |     :param gt: target label
 26 |     """
 27 |     invalid_side = data['invalid_side']
 28 |     cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device)
 29 |     weight = cfg.DATA.WCE_LOSS_WEIGHT
 30 |     weight /= torch.sum(weight, 1, keepdim=True)
 31 |     classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype)
 32 |     log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1)
 33 |     log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1))
 34 | 
 35 |     gt_reshape = gt.reshape(-1, 1)
 36 |     one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device)
 37 |     weight = torch.matmul(one_hot, weight)
 38 |     weight_log_pred = weight * log_pred
 39 | 
 40 |     valid_pixes = torch.tensor([0], device=pred_nosoftmax.device, dtype=torch.float)
 41 |     for i in range(gt.size(0)):
 42 |         valid_gt = gt[i, :,  int(invalid_side[i][0]):gt.size(2)-int(invalid_side[i][1]), :]
 43 |         valid_pixes += valid_gt.size(1) * valid_gt.size(2)
 44 |     loss_sum = -1 * torch.sum(weight_log_pred)
 45 |     return loss_sum, valid_pixes
 46 | 
 47 | def rois_weight_crossentropy_loss(pred_nosoftmax, gt, data):
 48 |     """
 49 |     Weighted Cross-entropy Loss
 50 |     :param pred_nosoftmax: predicted label
 51 |     :param gt: target label
 52 |     """
 53 |     invalid_side = data['invalid_side']
 54 |     rois_mask = data['rois_mask']
 55 |     cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device)
 56 |     weight = cfg.DATA.WCE_LOSS_WEIGHT
 57 |     weight /= torch.sum(weight, 1, keepdim=True)
 58 |     classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype)
 59 |     log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1)
 60 |     log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1))
 61 | 
 62 |     gt_reshape = gt.reshape(-1, 1)
 63 |     one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device)
 64 |     weight = torch.matmul(one_hot, weight)
 65 |     weight_log_pred = weight * log_pred
 66 | 
 67 |     valid_pixels = max(rois_mask.sum(), 1)
 68 |     loss_sum = -1 * torch.sum(weight_log_pred)
 69 |     return loss_sum, valid_pixels
 70 | 
 71 | def rois_scale_invariant_loss(pred_depth, data):
 72 |     """
 73 |     Follow Eigen paper, add silog loss, for KITTI benchmark
 74 |     :param pred_depth:
 75 |     :param data:
 76 |     :return:
 77 |     """
 78 |     invalid_side = data['invalid_side']
 79 |     gt_depth = data['B'].cuda()
 80 | 
 81 |     rois_mask = data['rois_mask'].to(device=gt_depth.device)
 82 | 
 83 |     loss_mean = torch.tensor([0.]).cuda()
 84 |     for j in range(pred_depth.size(0)):
 85 |         valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :]
 86 |         valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :]
 87 |         valid_rois_mask = rois_mask[j, :, int(invalid_side[j][0]): rois_mask.size(2) - int(invalid_side[j][1]), :]
 88 | 
 89 |         diff_log = torch.log(valid_pred) - torch.log(valid_gt)
 90 |         diff_log = diff_log * valid_rois_mask.to(dtype=diff_log.dtype)
 91 | 
 92 |         #size = torch.numel(diff_log)
 93 |         size = torch.sum(valid_rois_mask)
 94 |         if size == 0:
 95 |             continue
 96 | 
 97 |         loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2)
 98 |     loss = loss_mean / pred_depth.size(0)
 99 |     return loss
100 | 
101 | 
102 | def scale_invariant_loss(pred_depth, data):
103 |     """
104 |     Follow Eigen paper, add silog loss, for KITTI benchmark
105 |     :param pred_depth:
106 |     :param data:
107 |     :return:
108 |     """
109 |     invalid_side = data['invalid_side']
110 |     gt_depth = data['B'].cuda()
111 | 
112 | 
113 |     loss_mean = torch.tensor([0.]).cuda()
114 |     for j in range(pred_depth.size(0)):
115 |         valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :]
116 |         valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :]
117 | 
118 |         diff_log = torch.log(valid_pred) - torch.log(valid_gt)
119 | 
120 |         size = torch.numel(diff_log)
121 |         #size = torch.sum(valid_rois_mask)
122 |         #if size == 0:
123 |         #    continue
124 | 
125 |         loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2)
126 |     loss = loss_mean / pred_depth.size(0)
127 |     return loss
128 | 
129 | 
130 | def berhu_loss(pred_depth, data, scale=80.):
131 |     """
132 |     :param pred_depth:
133 |     :param data:
134 |     :return:
135 |     """
136 |     huber_threshold = 0.2
137 | 
138 |     invalid_side = data['invalid_side']
139 |     gt_depth = data['B'].cuda()
140 | 
141 |     mask = gt_depth > 0
142 | 
143 |     pred_depth = pred_depth * mask.to(dtype=pred_depth.dtype)
144 |     gt_depth = gt_depth * mask.to(dtype=gt_depth.dtype)
145 | 
146 |     diff = torch.abs(gt_depth - pred_depth)
147 |     delta = huber_threshold * torch.max(diff).data.cpu()
148 | 
149 |     part1 = -F.threshold(-diff, -delta, 0.)
150 |     part2 = F.threshold(diff**2 + delta**2, 2*delta**2, 0.) 
151 |     part2 = part2 / (2.*delta)
152 | 
153 |     loss = part1 + part2
154 | 
155 |     loss = loss[mask]
156 |     loss = torch.mean(loss)
157 | 
158 |     return loss
159 | 
160 | 
161 | def rmse_log_loss(pred_depth, data, scale=80.):
162 |     """
163 |     :param pred_depth:
164 |     :param data:
165 |     :return:
166 |     """
167 | 
168 |     gt_depth = data['B'].cuda()
169 |     mask = gt_depth > 0
170 | 
171 |     pred_depth = pred_depth * scale
172 |     gt_depth = gt_depth * scale
173 | 
174 |     diff = torch.log(gt_depth) - torch.log(pred_depth)
175 |     diff = diff[mask]
176 |     
177 |     loss = torch.sqrt(torch.mean(diff**2))
178 |     return loss
179 | 
180 | 
181 | def rmse_loss(pred_depth, data, scale=80.):
182 |     """
183 |     :param pred_depth:
184 |     :param data:
185 |     :return:
186 |     """
187 | 
188 |     gt_depth = data['B'].cuda()
189 |     mask = gt_depth > 0
190 | 
191 |     pred_depth = pred_depth
192 |     gt_depth = gt_depth
193 | 
194 |     diff = gt_depth - pred_depth
195 |     diff = diff[mask]
196 |     
197 |     loss = torch.sqrt(torch.mean(diff**2))
198 |     return loss
199 | 
200 | def mse_loss(pred_depth, data, scale=80.):
201 |     """
202 |     :param pred_depth:
203 |     :param data:
204 |     :return:
205 |     """
206 | 
207 |     gt_depth = data['B'].cuda()
208 |     mask = gt_depth > 0
209 | 
210 |     pred_depth = pred_depth
211 |     gt_depth = gt_depth
212 | 
213 |     diff = gt_depth - pred_depth
214 |     diff = diff[mask]
215 |     
216 |     loss = torch.mean(diff**2)
217 |     return loss
218 | 
219 | 
220 | def rois_rmse_log_loss(pred_depth, data, scale=80.):
221 |     """
222 |     :param pred_depth:
223 |     :param data:
224 |     :return:
225 |     """
226 | 
227 |     gt_depth = data['B'].cuda()
228 | 
229 |     mask = gt_depth > 0
230 |     rois_mask = data['rois_mask'].to(device=gt_depth.device)
231 |     mask = mask & rois_mask
232 | 
233 |     pred_depth = pred_depth
234 |     gt_depth = gt_depth
235 | 
236 |     diff = torch.log(gt_depth) - torch.log(pred_depth)
237 |     diff = diff[mask]
238 |     
239 |     loss = torch.sqrt(torch.mean(diff**2))
240 |     return loss
241 | 


--------------------------------------------------------------------------------
/experiments/foresee/pc/bin2obj.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | 
 5 | def bin2obj(path, output_path):
 6 | 
 7 |     pts = np.fromfile(open(path, 'rb'), np.single).reshape([-1, 4])
 8 | 
 9 |     f = open(output_path, 'w')
10 | 
11 |     for i in range(pts.shape[0]):
12 |         f.write('v %f %f %f %f %f %f\n' % (pts[i][0], pts[i][1], pts[i][2], pts[i][3], pts[i][3], pts[i][3]))
13 |         
14 |     f.close
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     src_path = "pseudo-lidar/foresee/training/000039.bin"
19 |     output_path = './' + os.path.basename(src_path).split('.')[0] + '.obj'
20 |     bin2obj(src_path, output_path)
21 | 


--------------------------------------------------------------------------------
/experiments/foresee/pc/gen.sh:
--------------------------------------------------------------------------------
 1 | python kitti_prediction.py \
 2 | --dataroot ../../../datasets/KITTI_object \
 3 | --dataset kitti_prediction \
 4 | --load_ckpt ../epoch19_step18000.pth \
 5 | --pcd_dir pseudo-lidar/foresee/training \
 6 | --encoder ResNeXt101_32x4d_body_stride16 \
 7 | --decoder_out_c 100 \
 8 | --phase  inference
 9 | 
10 | 


--------------------------------------------------------------------------------
/experiments/foresee/pc/gen_colorps.sh:
--------------------------------------------------------------------------------
 1 | python kitti_prediction_colorps.py \
 2 | --dataroot ../../../datasets/KITTI_object \
 3 | --dataset kitti_prediction \
 4 | --load_ckpt ../epoch19_step18000.pth \
 5 | --pcd_dir pseudo-lidar/ForSeE.colorps/training \
 6 | --encoder ResNeXt101_32x4d_body_stride16 \
 7 | --decoder_out_c 100 \
 8 | --phase  val
 9 | 
10 | 


--------------------------------------------------------------------------------
/experiments/foresee/pc/kitti_prediction.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(BASE_DIR)))
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | sys.path.append(os.path.dirname(BASE_DIR))
 11 | 
 12 | from tools.parse_arg_test import TestOptions
 13 | from data.load_dataset import CustomerDataLoader
 14 | from lib.utils.net_tools import load_ckpt
 15 | from lib.utils.logging import setup_logging, SmoothedValue
 16 | 
 17 | from depth_normal_model_maxp_bfsoftmax import DepthNormal
 18 | 
 19 | logger = setup_logging(__name__)
 20 | 
 21 | from IPython import embed
 22 | 
 23 | # Add by users
 24 | #pcd_folder = os.path.join(BASE_DIR, 'output')
 25 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib')
 26 | 
 27 | def main():
 28 |     test_args = TestOptions().parse()
 29 |     test_args.thread = 1   # test code only supports thread = 1
 30 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 31 | 
 32 |     pcd_folder = test_args.pcd_dir
 33 |     if not os.path.exists(pcd_folder):
 34 |         os.makedirs(pcd_folder)
 35 | 
 36 |     data_loader = CustomerDataLoader(test_args)
 37 |     test_datasize = len(data_loader)
 38 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 39 |     # load model
 40 |     model = DepthNormal()
 41 |     # evaluate mode
 42 |     model.eval()
 43 | 
 44 |     # load checkpoint
 45 |     if test_args.load_ckpt:
 46 |         load_ckpt(test_args, model)
 47 |     model.cuda()
 48 |     model = torch.nn.DataParallel(model)
 49 | 
 50 |     for i, data in enumerate(data_loader):
 51 |         out = model.module.inference(data)
 52 |         pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w]
 53 |         pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w]
 54 | 
 55 |         # the image size has been padded to the size (385, 1243)
 56 |         pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 57 |         pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 58 | 
 59 |         #sample_th = 0.15
 60 |         #sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w]
 61 | 
 62 |         #######################################################################################
 63 |         # add by users
 64 |         img_name = data['A_paths'][0].split('/')[-1][:-4]
 65 |         calib_name = img_name + '.txt'
 66 |         calib_dir = os.path.join(calib_fold, calib_name)
 67 |         camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None)
 68 |         P3_0 = camera_para[3]
 69 |         P2_0 = camera_para[2]
 70 |         P3_2 = P3_0
 71 |         P3_2[4] -= P2_0[4]
 72 |         R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2)
 73 |         Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1)
 74 | 
 75 |         pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6])
 76 |         # Transfer points in cam2 coordinate to cam0 coordinate
 77 |         pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]])
 78 | 
 79 |         # Transfer points in cam0 coordinate to velo coordinate
 80 |         pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0)
 81 | 
 82 |         rgb = data['A_raw'][0].cpu().numpy()
 83 | 
 84 |         save_bin(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.bin', sample_mask=None)
 85 |         #save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.ply', sample_mask=None)
 86 |         #save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 87 |         print('saved', img_name)
 88 |         #######################################################################################
 89 | 
 90 | 
 91 | ##########################################################################
 92 | # others
 93 | def get_sample_mask(conf, threshold):
 94 |     max_conf = np.amax(conf, axis=0) # [h, w]
 95 |     print(max_conf.shape)
 96 |     return max_conf >= threshold
 97 | 
 98 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0):
 99 |     pcd_cam0_3n = pcd_cam0.reshape((3, -1))
100 |     R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3))
101 |     R_rect0_inv = np.linalg.inv(R_rect0)
102 | 
103 |     # X_cam0_raw = (R_rect0)^-1 * X_cam0
104 |     pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n)
105 | 
106 |     T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4))
107 |     R_velo_cam0 = T_velo_cam0[:, 0:3]
108 |     T_velo_cam0 = T_velo_cam0[:, 3]
109 |     R_cam0_velo = np.linalg.inv(R_velo_cam0)
110 |     T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0)
111 | 
112 |     # X_velo = R*X_cam0 + T
113 |     T_cam0_velo = T_cam0_velo[:, np.newaxis]
114 |     pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo
115 |     pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2])
116 |     return pcd_velo
117 | 
118 | 
119 | def reconstruct_3D(depth, cu, cv, fx, fy):
120 |     width = depth.shape[1]
121 |     height = depth.shape[0]
122 |     row = np.arange(0, width, 1)
123 |     u = np.array([row for _ in np.arange(height)])
124 |     col = np.arange(0, height, 1)
125 |     v = np.array([col for _ in np.arange(width)])
126 |     v = v.transpose(1, 0)
127 | 
128 |     x = (u - cu) * depth / fx
129 |     y = (v - cv) * depth / fy
130 |     z = depth
131 | 
132 |     x = x[np.newaxis, :, :]
133 |     y = y[np.newaxis, :, :]
134 |     z = z[np.newaxis, :, :]
135 |     return np.concatenate([x, y, z], axis=0)
136 | 
137 | def save_ply(pcd, rgb, path, sample_mask=None):
138 |     width = rgb.shape[1]
139 |     height = rgb.shape[0]
140 |     x = np.reshape(pcd[0], width * height)
141 |     y = np.reshape(pcd[1], width * height)
142 |     z = np.reshape(pcd[2], width * height)
143 | 
144 |     rgb = np.reshape(rgb, (width * height, 3))
145 | 
146 |     if sample_mask is not None:
147 |         sample_mask = np.reshape(sample_mask, width * height)
148 |         x = x[sample_mask]
149 |         y = y[sample_mask]
150 |         z = z[sample_mask]
151 |         
152 |         rgb = rgb[sample_mask, :]
153 | 
154 |     r = rgb[:, 2]
155 |     g = rgb[:, 1]
156 |     b = rgb[:, 0]
157 |     r = np.squeeze(r)
158 |     g = np.squeeze(g)
159 |     b = np.squeeze(b)
160 | 
161 |     ply_head = 'ply\n' \
162 |                'format ascii 1.0\n' \
163 |                'element vertex %d\n' \
164 |                'property float x\n' \
165 |                'property float y\n' \
166 |                'property float z\n' \
167 |                'property uchar red\n' \
168 |                'property uchar green\n' \
169 |                'property uchar blue\n' \
170 |                'end_header' % r.shape[0]
171 |     # ---- Save ply data to disk
172 |     np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='')
173 | ##########################################################################
174 | 
175 | 
176 | def save_bin(pcd, rgb, path, sample_mask=None):
177 |     width = rgb.shape[1]
178 |     height = rgb.shape[0]
179 |     xyz = np.reshape(pcd, [3, width * height])
180 |     xyz = np.swapaxes(xyz, 0, 1)
181 | 
182 |     #rgb = np.reshape(rgb, (width * height, 3))
183 | 
184 |     if sample_mask is not None:
185 |         sample_mask = np.reshape(sample_mask, width * height)
186 |         xyz = xyz[sample_mask, :] 
187 |         #rgb = rgb[sample_mask, :]
188 |    
189 |     # remove points higher than 1m.
190 |     height_filter = xyz[:, 2] <= 1. 
191 |     xyz = xyz[height_filter, :]
192 | 
193 |     # add reflectance
194 |     r = np.ones([xyz.shape[0], 1])
195 |     xyzr = np.hstack([xyz, r]).astype(np.single)
196 | 
197 |     f = open(path, 'wb')
198 |     f.write(xyzr)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |    main()
203 | 


--------------------------------------------------------------------------------
/experiments/foresee/pc/kitti_prediction_colorps.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(BASE_DIR)))
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | sys.path.append(os.path.dirname(BASE_DIR))
 11 | 
 12 | from tools.parse_arg_test import TestOptions
 13 | from data.load_dataset import CustomerDataLoader
 14 | from lib.utils.net_tools import load_ckpt
 15 | from lib.utils.logging import setup_logging, SmoothedValue
 16 | 
 17 | from depth_normal_model_maxp_bfsoftmax import DepthNormal
 18 | 
 19 | logger = setup_logging(__name__)
 20 | 
 21 | from IPython import embed
 22 | 
 23 | # Add by users
 24 | #pcd_folder = os.path.join(BASE_DIR, 'output')
 25 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib')
 26 | 
 27 | def main():
 28 |     test_args = TestOptions().parse()
 29 |     test_args.thread = 1   # test code only supports thread = 1
 30 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 31 | 
 32 |     pcd_folder = test_args.pcd_dir
 33 |     if not os.path.exists(pcd_folder):
 34 |         os.makedirs(pcd_folder)
 35 | 
 36 |     data_loader = CustomerDataLoader(test_args)
 37 |     test_datasize = len(data_loader)
 38 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 39 |     # load model
 40 |     model = DepthNormal()
 41 |     # evaluate mode
 42 |     model.eval()
 43 | 
 44 |     # load checkpoint
 45 |     if test_args.load_ckpt:
 46 |         load_ckpt(test_args, model)
 47 |     model.cuda()
 48 |     model = torch.nn.DataParallel(model)
 49 | 
 50 |     for i, data in enumerate(data_loader):
 51 |         out = model.module.inference(data)
 52 |         pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w]
 53 |         pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w]
 54 | 
 55 |         # the image size has been padded to the size (385, 1243)
 56 |         pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 57 |         pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 58 | 
 59 |         #sample_th = 0.15
 60 |         #sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w]
 61 | 
 62 |         #######################################################################################
 63 |         # add by users
 64 |         img_name = data['A_paths'][0].split('/')[-1][:-4]
 65 |         calib_name = img_name + '.txt'
 66 |         calib_dir = os.path.join(calib_fold, calib_name)
 67 |         camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None)
 68 |         P3_0 = camera_para[3]
 69 |         P2_0 = camera_para[2]
 70 |         P3_2 = P3_0
 71 |         P3_2[4] -= P2_0[4]
 72 |         R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2)
 73 |         Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1)
 74 | 
 75 |         pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6])
 76 |         # Transfer points in cam2 coordinate to cam0 coordinate
 77 |         pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]])
 78 | 
 79 |         # Transfer points in cam0 coordinate to velo coordinate
 80 |         pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0)
 81 | 
 82 |         rgb = data['A_raw'][0].cpu().numpy()
 83 | 
 84 |         #save_bin(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.bin', sample_mask=None)
 85 |         #save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.ply', sample_mask=None)
 86 |         save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 87 |         print('saved', img_name)
 88 |         #######################################################################################
 89 | 
 90 | 
 91 | ##########################################################################
 92 | # others
 93 | def get_sample_mask(conf, threshold):
 94 |     max_conf = np.amax(conf, axis=0) # [h, w]
 95 |     print(max_conf.shape)
 96 |     return max_conf >= threshold
 97 | 
 98 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0):
 99 |     pcd_cam0_3n = pcd_cam0.reshape((3, -1))
100 |     R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3))
101 |     R_rect0_inv = np.linalg.inv(R_rect0)
102 | 
103 |     # X_cam0_raw = (R_rect0)^-1 * X_cam0
104 |     pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n)
105 | 
106 |     T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4))
107 |     R_velo_cam0 = T_velo_cam0[:, 0:3]
108 |     T_velo_cam0 = T_velo_cam0[:, 3]
109 |     R_cam0_velo = np.linalg.inv(R_velo_cam0)
110 |     T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0)
111 | 
112 |     # X_velo = R*X_cam0 + T
113 |     T_cam0_velo = T_cam0_velo[:, np.newaxis]
114 |     pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo
115 |     pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2])
116 |     return pcd_velo
117 | 
118 | 
119 | def reconstruct_3D(depth, cu, cv, fx, fy):
120 |     width = depth.shape[1]
121 |     height = depth.shape[0]
122 |     row = np.arange(0, width, 1)
123 |     u = np.array([row for _ in np.arange(height)])
124 |     col = np.arange(0, height, 1)
125 |     v = np.array([col for _ in np.arange(width)])
126 |     v = v.transpose(1, 0)
127 | 
128 |     x = (u - cu) * depth / fx
129 |     y = (v - cv) * depth / fy
130 |     z = depth
131 | 
132 |     x = x[np.newaxis, :, :]
133 |     y = y[np.newaxis, :, :]
134 |     z = z[np.newaxis, :, :]
135 |     return np.concatenate([x, y, z], axis=0)
136 | 
137 | def save_ply(pcd, rgb, path, sample_mask=None):
138 |     width = rgb.shape[1]
139 |     height = rgb.shape[0]
140 |     x = np.reshape(pcd[0], width * height)
141 |     y = np.reshape(pcd[1], width * height)
142 |     z = np.reshape(pcd[2], width * height)
143 | 
144 |     rgb = np.reshape(rgb, (width * height, 3))
145 | 
146 |     if sample_mask is not None:
147 |         sample_mask = np.reshape(sample_mask, width * height)
148 |         x = x[sample_mask]
149 |         y = y[sample_mask]
150 |         z = z[sample_mask]
151 |         
152 |         rgb = rgb[sample_mask, :]
153 | 
154 |     r = rgb[:, 2]
155 |     g = rgb[:, 1]
156 |     b = rgb[:, 0]
157 |     r = np.squeeze(r)
158 |     g = np.squeeze(g)
159 |     b = np.squeeze(b)
160 | 
161 |     ply_head = 'ply\n' \
162 |                'format ascii 1.0\n' \
163 |                'element vertex %d\n' \
164 |                'property float x\n' \
165 |                'property float y\n' \
166 |                'property float z\n' \
167 |                'property uchar red\n' \
168 |                'property uchar green\n' \
169 |                'property uchar blue\n' \
170 |                'end_header' % r.shape[0]
171 |     # ---- Save ply data to disk
172 |     np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='')
173 | ##########################################################################
174 | 
175 | 
176 | def save_bin(pcd, rgb, path, sample_mask=None):
177 |     width = rgb.shape[1]
178 |     height = rgb.shape[0]
179 |     xyz = np.reshape(pcd, [3, width * height])
180 |     xyz = np.swapaxes(xyz, 0, 1)
181 | 
182 |     #rgb = np.reshape(rgb, (width * height, 3))
183 | 
184 |     if sample_mask is not None:
185 |         sample_mask = np.reshape(sample_mask, width * height)
186 |         xyz = xyz[sample_mask, :] 
187 |         #rgb = rgb[sample_mask, :]
188 |    
189 |     # remove points higher than 1m.
190 |     height_filter = xyz[:, 2] <= 1. 
191 |     xyz = xyz[height_filter, :]
192 | 
193 |     # add reflectance
194 |     r = np.ones([xyz.shape[0], 1])
195 |     xyzr = np.hstack([xyz, r]).astype(np.single)
196 | 
197 |     f = open(path, 'wb')
198 |     f.write(xyzr)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |    main()
203 | 


--------------------------------------------------------------------------------
/experiments/foresee/test.sh:
--------------------------------------------------------------------------------
1 | python val_kitti.py \
2 | --dataroot ../../datasets/KITTI_object \
3 | --dataset kitti_object_roi \
4 | --load_ckpt epoch19_step18000.pth \
5 | --encoder ResNeXt101_32x4d_body_stride16 \
6 | --decoder_out_c 100 
7 | 


--------------------------------------------------------------------------------
/experiments/foresee/train.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0  python  train_kitti.py \
 2 | --dataroot ../../datasets/KITTI_object \
 3 | --dataset kitti_object_roi \
 4 | --encoder ResNeXt101_32x4d_body_stride16 \
 5 | --decoder_out_c 100 \
 6 | --lr 0.001 \
 7 | --batchsize 4 \
 8 | --epoch 0 20 \
 9 | --use_tfboard 
10 | 
11 | 


--------------------------------------------------------------------------------
/experiments/foresee/train_kitti.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import math
  4 | import traceback
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from data.load_dataset import CustomerDataLoader
 12 | from lib.utils.training_stats import TrainingStats
 13 | from lib.utils.evaluate_depth_error import validate_err_kitti
 14 | from lib.core.config import cfg, train_args, val_args, merge_cfg_from_file
 15 | from lib.utils.net_tools import save_ckpt, load_ckpt
 16 | from lib.utils.logging import setup_logging, SmoothedValue
 17 | 
 18 | from depth_normal_model import *
 19 | logger = setup_logging(__name__)
 20 | 
 21 | 
 22 | def train(train_dataloader, model, epoch, loss_func,
 23 |           optimizer, scheduler, training_stats, val_dataloader=None, val_err=[], ignore_step=-1):
 24 |     model.train()
 25 |     epoch_steps = math.ceil(len(train_dataloader) / cfg.TRAIN.BATCH_SIZE)
 26 |     base_steps = epoch_steps * epoch + ignore_step if ignore_step != -1 else epoch_steps * epoch
 27 |     for i, data in enumerate(train_dataloader):
 28 |         if ignore_step != -1 and i > epoch_steps - ignore_step:
 29 |             return
 30 |         scheduler.step()  # decay lr every iteration
 31 |         training_stats.IterTic()
 32 |         out = model(data)
 33 |         losses = loss_func.criterion(out['b_fake'], out['b_fake_nosoftmax'], out['b_fake_roi'], out['b_fake_roi_nosoftmax'],  data, epoch)
 34 |         optimizer.optim(losses)
 35 | 
 36 |         step = base_steps + i + 1
 37 |         training_stats.UpdateIterStats(losses)
 38 |         training_stats.IterToc()
 39 |         training_stats.LogIterStats(step, epoch, optimizer.optimizer, val_err[0])
 40 | 
 41 |         # validate the model
 42 |         if step % cfg.TRAIN.VAL_STEP == 0 and step != 0 and val_dataloader is not None:#
 43 |             model.eval()
 44 |             val_err[0] = val_kitti(val_dataloader, model)
 45 |             # training mode
 46 |             model.train()
 47 |         # save checkpoint
 48 |         if step % cfg.TRAIN.SNAPSHOT_ITERS == 0 and step != 0:
 49 |             save_ckpt(train_args, step, epoch, model, optimizer.optimizer, scheduler, val_err[0])
 50 | 
 51 | 
 52 | def val_kitti(val_dataloader, model):
 53 |     """
 54 |     Validate the model.
 55 |     """
 56 |     smoothed_absRel = SmoothedValue(len(val_dataloader))
 57 |     smoothed_silog = SmoothedValue(len(val_dataloader))
 58 |     smoothed_silog2 = SmoothedValue(len(val_dataloader))
 59 |     smoothed_criteria = {'err_absRel': smoothed_absRel, 'err_silog': smoothed_silog, 'err_silog2': smoothed_silog2}
 60 |     # rois-level
 61 |     rois_smoothed_absRel = SmoothedValue(len(val_dataloader))
 62 |     rois_smoothed_silog = SmoothedValue(len(val_dataloader))
 63 |     rois_smoothed_silog2 = SmoothedValue(len(val_dataloader))
 64 |     rois_smoothed_criteria = {'err_absRel': rois_smoothed_absRel, 'err_silog': rois_smoothed_silog, 'err_silog2': rois_smoothed_silog2}
 65 | 
 66 |     # bg-level
 67 |     bg_smoothed_absRel = SmoothedValue(len(val_dataloader))
 68 |     bg_smoothed_silog = SmoothedValue(len(val_dataloader))
 69 |     bg_smoothed_silog2 = SmoothedValue(len(val_dataloader))
 70 |     bg_smoothed_criteria = {'err_absRel': bg_smoothed_absRel, 'err_silog': bg_smoothed_silog, 'err_silog2': bg_smoothed_silog2}
 71 |     for i, data in enumerate(val_dataloader):
 72 |         pred_depth = model.module.inference_kitti(data)
 73 |         smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw'], smoothed_criteria)
 74 |         rois_smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw_rois'], rois_smoothed_criteria)
 75 |         bg_smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw_bg'], bg_smoothed_criteria)
 76 |         #print(np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))
 77 |     val_metrics = {'abs_rel': smoothed_criteria['err_absRel'].GetGlobalAverageValue(),
 78 |             'silog': np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)}
 79 |     rois_val_metrics = {'abs_rel': rois_smoothed_criteria['err_absRel'].GetGlobalAverageValue(),
 80 |             'silog': np.sqrt(rois_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (rois_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)}
 81 |     bg_val_metrics = {'abs_rel': bg_smoothed_criteria['err_absRel'].GetGlobalAverageValue(),
 82 |             'silog': np.sqrt(bg_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (bg_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)}
 83 |     print("global: ", val_metrics)
 84 |     print("roi: ", rois_val_metrics)
 85 |     print("bg: ", bg_val_metrics)
 86 |     return val_metrics
 87 | 
 88 | if __name__=='__main__':
 89 |     train_dataloader = CustomerDataLoader(train_args)
 90 |     train_datasize = len(train_dataloader)
 91 |     gpu_num = torch.cuda.device_count()
 92 |     merge_cfg_from_file(train_datasize, gpu_num)
 93 | 
 94 |     val_dataloader = CustomerDataLoader(val_args)
 95 |     val_datasize = len(val_dataloader)
 96 | 
 97 |     # tensorboard logger
 98 |     if train_args.use_tfboard:
 99 |         from tensorboardX import SummaryWriter
100 |         tblogger = SummaryWriter(cfg.TRAIN.LOG_DIR)
101 | 
102 |     # training status for logging
103 |     training_stats = TrainingStats(train_args, cfg.TRAIN.LOG_INTERVAL,
104 |                                    tblogger if train_args.use_tfboard else None)
105 | 
106 |     # total iterations
107 |     total_iters = math.ceil(train_datasize / train_args.batchsize) * train_args.epoch[-1]
108 | 
109 |     # load model
110 |     model = DepthNormal()
111 | 
112 |     if gpu_num != -1:
113 |         logger.info('{:>15}: {:<30}'.format('GPU_num', gpu_num))
114 |         logger.info('{:>15}: {:<30}'.format('train_data_size', train_datasize))
115 |         logger.info('{:>15}: {:<30}'.format('val_data_size', val_datasize))
116 |         logger.info('{:>15}: {:<30}'.format('total_iterations', total_iters))
117 |         model.cuda()
118 |     #optimizer
119 |     optimizer = ModelOptimizer(model)
120 |     #loss function
121 |     loss_func = ModelLoss()
122 | 
123 |     val_err = [{'abs_rel': 0, 'silog': 0}]
124 | 
125 |     ignore_step = -1
126 | 
127 |     # Lerning strategy
128 |     lr_optim_lambda = lambda iter: (1.0 - iter / (float(total_iters))) ** 0.9
129 |     scheduler = torch.optim.lr_scheduler.LambdaLR(
130 |         optimizer.optimizer, lr_lambda=lr_optim_lambda)
131 | 
132 |     #scheduler =  torch.optim.lr_scheduler.MultiStepLR(optimizer.optimizer, milestones=train_args.epoch, gamma=0.1)
133 | 
134 |     # load checkpoint
135 |     if train_args.load_ckpt:
136 |         load_ckpt(train_args, model, optimizer.optimizer, scheduler, val_err)
137 |         ignore_step = train_args.start_step - train_args.start_epoch * math.ceil(train_datasize / train_args.batchsize)
138 | 
139 |     if gpu_num != -1:
140 |         model = torch.nn.DataParallel(model)
141 |     try:
142 |         for epoch in range(train_args.start_epoch, cfg.TRAIN.EPOCH[-1]):
143 |             # training
144 |             train(train_dataloader, model, epoch, loss_func, optimizer, scheduler, training_stats,
145 |                   val_dataloader, val_err, ignore_step)
146 |             ignore_step = -1
147 | 
148 |     except (RuntimeError, KeyboardInterrupt):
149 |         logger.info('Save ckpt on exception ...')
150 |         stack_trace = traceback.format_exc()
151 |         print(stack_trace)
152 | 
153 |     finally:
154 |         if train_args.use_tfboard:
155 |             tblogger.close()
156 | 


--------------------------------------------------------------------------------
/experiments/foresee/val_kitti.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from tools.parse_arg_val import ValOptions
 12 | from data.load_dataset import CustomerDataLoader
 13 | from lib.utils.net_tools import load_ckpt
 14 | from lib.utils.evaluate_depth_error import evaluate_err
 15 | from lib.utils.net_tools import save_images
 16 | from lib.utils.logging import setup_logging, SmoothedValue
 17 | 
 18 | from depth_normal_model import DepthNormal
 19 | logger = setup_logging(__name__)
 20 | 
 21 | 
 22 | if __name__ == '__main__':
 23 |     test_args = ValOptions().parse()
 24 |     test_args.thread = 1   # test code only supports thread = 1
 25 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 26 | 
 27 |     data_loader = CustomerDataLoader(test_args)
 28 |     test_datasize = len(data_loader)
 29 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 30 |     # load model
 31 |     model = DepthNormal()
 32 |     # evaluate mode
 33 |     model.eval()
 34 | 
 35 |     # load checkpoint
 36 |     if test_args.load_ckpt:
 37 |        load_ckpt(test_args, model)
 38 |     model.cuda()
 39 |     model = torch.nn.DataParallel(model)
 40 | 
 41 |     # global
 42 |     smoothed_absRel = SmoothedValue(test_datasize)
 43 |     smoothed_rms = SmoothedValue(test_datasize)
 44 |     smoothed_logRms = SmoothedValue(test_datasize)
 45 |     smoothed_squaRel = SmoothedValue(test_datasize)
 46 |     smoothed_silog = SmoothedValue(test_datasize)
 47 |     smoothed_silog2 = SmoothedValue(test_datasize)
 48 |     smoothed_log10 = SmoothedValue(test_datasize)
 49 |     smoothed_delta1 = SmoothedValue(test_datasize)
 50 |     smoothed_delta2 = SmoothedValue(test_datasize)
 51 |     smoothed_delta3 = SmoothedValue(test_datasize)
 52 |     smoothed_criteria = {'err_absRel':smoothed_absRel, 'err_squaRel': smoothed_squaRel, 'err_rms': smoothed_rms,
 53 |                          'err_silog': smoothed_silog, 'err_logRms': smoothed_logRms, 'err_silog2': smoothed_silog2,
 54 |                          'err_delta1': smoothed_delta1, 'err_delta2': smoothed_delta2, 'err_delta3': smoothed_delta3,
 55 |                          'err_log10': smoothed_log10}
 56 | 
 57 |     # rois
 58 |     rois_smoothed_absRel = SmoothedValue(test_datasize)
 59 |     rois_smoothed_rms = SmoothedValue(test_datasize)
 60 |     rois_smoothed_logRms = SmoothedValue(test_datasize)
 61 |     rois_smoothed_squaRel = SmoothedValue(test_datasize)
 62 |     rois_smoothed_silog = SmoothedValue(test_datasize)
 63 |     rois_smoothed_silog2 = SmoothedValue(test_datasize)
 64 |     rois_smoothed_log10 = SmoothedValue(test_datasize)
 65 |     rois_smoothed_delta1 = SmoothedValue(test_datasize)
 66 |     rois_smoothed_delta2 = SmoothedValue(test_datasize)
 67 |     rois_smoothed_delta3 = SmoothedValue(test_datasize)
 68 |     rois_smoothed_criteria = {'err_absRel':rois_smoothed_absRel, 'err_squaRel': rois_smoothed_squaRel, 'err_rms': rois_smoothed_rms,
 69 |                          'err_silog': rois_smoothed_silog, 'err_logRms': rois_smoothed_logRms, 'err_silog2': rois_smoothed_silog2,
 70 |                          'err_delta1': rois_smoothed_delta1, 'err_delta2': rois_smoothed_delta2, 'err_delta3': rois_smoothed_delta3,
 71 |                          'err_log10': rois_smoothed_log10}
 72 | 
 73 |     # bg
 74 |     bg_smoothed_absRel = SmoothedValue(test_datasize)
 75 |     bg_smoothed_rms = SmoothedValue(test_datasize)
 76 |     bg_smoothed_logRms = SmoothedValue(test_datasize)
 77 |     bg_smoothed_squaRel = SmoothedValue(test_datasize)
 78 |     bg_smoothed_silog = SmoothedValue(test_datasize)
 79 |     bg_smoothed_silog2 = SmoothedValue(test_datasize)
 80 |     bg_smoothed_log10 = SmoothedValue(test_datasize)
 81 |     bg_smoothed_delta1 = SmoothedValue(test_datasize)
 82 |     bg_smoothed_delta2 = SmoothedValue(test_datasize)
 83 |     bg_smoothed_delta3 = SmoothedValue(test_datasize)
 84 |     bg_smoothed_criteria = {'err_absRel':bg_smoothed_absRel, 'err_squaRel': bg_smoothed_squaRel, 'err_rms': bg_smoothed_rms,
 85 |                          'err_silog': bg_smoothed_silog, 'err_logRms': bg_smoothed_logRms, 'err_silog2': bg_smoothed_silog2,
 86 |                          'err_delta1': bg_smoothed_delta1, 'err_delta2': bg_smoothed_delta2, 'err_delta3': bg_smoothed_delta3,
 87 |                          'err_log10': bg_smoothed_log10}
 88 |     for i, data in enumerate(data_loader):
 89 |         out = model.module.inference_kitti(data)
 90 |         pred_depth = np.squeeze(out['b_fake'])
 91 |         img_path = data['A_paths']
 92 | 
 93 |         if len(data['B_raw'].shape) != 2:
 94 |             smoothed_criteria = evaluate_err(pred_depth, data['B_raw'], smoothed_criteria, scale=80.)
 95 |             rois_smoothed_criteria = evaluate_err(pred_depth, data['B_raw_rois'], rois_smoothed_criteria, scale=80.)
 96 |             bg_smoothed_criteria = evaluate_err(pred_depth, data['B_raw_bg'], bg_smoothed_criteria, scale=80.)
 97 |             print('processing (%04d)-th image... %s' % (i, img_path))
 98 |             print(smoothed_criteria['err_absRel'].GetGlobalAverageValue())
 99 |         save_images(data, pred_depth, scale=256.*80.)
100 | 
101 | 
102 |     LOG_FOUT = open(os.path.join('object_val_results.txt'), 'w')
103 |     def log_string(out_str):
104 |         LOG_FOUT.write(out_str+'\n')
105 |         LOG_FOUT.flush()
106 |         print(out_str)
107 | 
108 | 
109 |     if len(data['B_raw'].shape) != 2:
110 |         log_string("---image-level----")
111 |         log_string("###############absREL ERROR: {}".format(smoothed_criteria['err_absRel'].GetGlobalAverageValue()))
112 |         log_string("###############silog ERROR: {}".format(np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)))
113 |         log_string("###############log10 ERROR: {}".format(smoothed_criteria['err_log10'].GetGlobalAverageValue()))
114 |         log_string("###############RMS ERROR: {}".format(np.sqrt(smoothed_criteria['err_rms'].GetGlobalAverageValue())))
115 |         log_string("###############delta_1 ERROR: {}".format(smoothed_criteria['err_delta1'].GetGlobalAverageValue()))
116 |         log_string("###############delta_2 ERROR: {}".format(smoothed_criteria['err_delta2'].GetGlobalAverageValue()))
117 |         log_string("###############delta_3 ERROR: {}".format(smoothed_criteria['err_delta3'].GetGlobalAverageValue()))
118 |         log_string("###############squaRel ERROR: {}".format(smoothed_criteria['err_squaRel'].GetGlobalAverageValue()))
119 |         log_string("###############logRms ERROR: {}".format(np.sqrt(smoothed_criteria['err_logRms'].GetGlobalAverageValue())))
120 | 
121 |         
122 |         log_string("---rois-level----")
123 |         log_string("###############absREL ERROR: {}".format(rois_smoothed_criteria['err_absRel'].GetGlobalAverageValue()))
124 |         log_string("###############silog ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (rois_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)))
125 |         log_string("###############log10 ERROR: {}".format(rois_smoothed_criteria['err_log10'].GetGlobalAverageValue()))
126 |         log_string("###############RMS ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_rms'].GetGlobalAverageValue())))
127 |         log_string("###############delta_1 ERROR: {}".format(rois_smoothed_criteria['err_delta1'].GetGlobalAverageValue()))
128 |         log_string("###############delta_2 ERROR: {}".format(rois_smoothed_criteria['err_delta2'].GetGlobalAverageValue()))
129 |         log_string("###############delta_3 ERROR: {}".format(rois_smoothed_criteria['err_delta3'].GetGlobalAverageValue()))
130 |         log_string("###############squaRel ERROR: {}".format(rois_smoothed_criteria['err_squaRel'].GetGlobalAverageValue()))
131 |         log_string("###############logRms ERROR: {}".format(np.sqrt(rois_smoothed_criteria['err_logRms'].GetGlobalAverageValue())))
132 | 
133 |         log_string("---bg-level----")
134 |         log_string("###############absREL ERROR: {}".format(bg_smoothed_criteria['err_absRel'].GetGlobalAverageValue()))
135 |         log_string("###############silog ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (bg_smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)))
136 |         log_string("###############log10 ERROR: {}".format(bg_smoothed_criteria['err_log10'].GetGlobalAverageValue()))
137 |         log_string("###############RMS ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_rms'].GetGlobalAverageValue())))
138 |         log_string("###############delta_1 ERROR: {}".format(bg_smoothed_criteria['err_delta1'].GetGlobalAverageValue()))
139 |         log_string("###############delta_2 ERROR: {}".format(bg_smoothed_criteria['err_delta2'].GetGlobalAverageValue()))
140 |         log_string("###############delta_3 ERROR: {}".format(bg_smoothed_criteria['err_delta3'].GetGlobalAverageValue()))
141 |         log_string("###############squaRel ERROR: {}".format(bg_smoothed_criteria['err_squaRel'].GetGlobalAverageValue()))
142 |         log_string("###############logRms ERROR: {}".format(np.sqrt(bg_smoothed_criteria['err_logRms'].GetGlobalAverageValue())))
143 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/__init__.py


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/core/__init__.py


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import os
  7 | import numpy as np
  8 | from lib.utils.collections import AttrDict
  9 | from lib.utils.misc import get_run_name
 10 | from tools.parse_arg_train import TrainOptions
 11 | from tools.parse_arg_val import ValOptions
 12 | 
 13 | # ---------------------------------------------------------------------------- #
 14 | # Load parse for training, val, and test
 15 | # ---------------------------------------------------------------------------- #
 16 | train_opt = TrainOptions()
 17 | train_args = train_opt.parse()
 18 | train_opt.print_options(train_args)
 19 | 
 20 | val_opt = ValOptions()
 21 | val_args = val_opt.parse()
 22 | val_args.batchsize = 1
 23 | val_args.thread = 0
 24 | val_opt.print_options(val_args)
 25 | 
 26 | __C = AttrDict()
 27 | # Consumers can get config by:
 28 | cfg = __C
 29 | 
 30 | # Random note: avoid using '.ON' as a config key since yaml converts it to True;
 31 | # prefer 'ENABLED' instead
 32 | # Root directory of project
 33 | __C.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 34 | __C.EXP_NAME = os.path.dirname(__file__).split('/')[-1]
 35 | __C.DATASET = train_args.dataset
 36 | # "Fun" fact: the history of where these values comes from is lost (From Detectron lol)
 37 | __C.RGB_PIXEL_MEANS = (102.9801, 115.9465, 122.7717)
 38 | __C.RGB_PIXEL_VARS = (1, 1, 1)
 39 | __C.CROP_SIZE = (385, 513) if 'kitti' in train_args.dataset else (385, 385)  #height * width
 40 | 
 41 | # ---------------------------------------------------------------------------- #
 42 | # Models configurations
 43 | # ---------------------------------------------------------------------------- #
 44 | __C.MODEL = AttrDict()
 45 | __C.MODEL.INIT_TYPE = 'xavier'
 46 | # Configure the model type for the encoder, e.g.ResNeXt50_32x4d_body_stride16
 47 | __C.MODEL.ENCODER = train_args.encoder
 48 | __C.MODEL.MODEL_REPOSITORY = 'pretrained_model'
 49 | __C.MODEL.PRETRAINED_WEIGHTS = train_args.pretrained_model
 50 | __C.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = True
 51 | 
 52 | # Configure resnet and resnext
 53 | __C.MODEL.RESNET_BOTTLENECK_DIM = [64, 256, 512, 1024, 2048] if 'ResNeXt' in train_args.encoder else [32, 24, 32, 96, 320]
 54 | __C.MODEL.RESNET_BLOCK_DIM = [64, 64, 128, 256, 512]
 55 | # Place the stride 2 conv on the 1x1 filter
 56 | # Use True only for the original MSRA ResNet; use False for C2 and Torch models
 57 | __C.MODEL.RESNET_STRIDE_1X1 = True
 58 | # Set bn type of resnet, bn->batch normalization, affine->affine transformation
 59 | __C.MODEL.RESNET_BN_TYPE = 'bn'
 60 | 
 61 | # Freeze the batch normalization layer of pretrained model
 62 | __C.MODEL.FREEZE_BACKBONE_BN = False
 63 | # Configure the decoder
 64 | __C.MODEL.FCN_DIM_IN = [512, 256, 256, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [128, 64, 64, 64, 64, 64]
 65 | __C.MODEL.FCN_DIM_OUT = [256, 256, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [64, 64, 64, 64, 64]
 66 | __C.MODEL.LATERAL_OUT = [512, 256, 256, 256] if 'ResNeXt' in train_args.encoder else [128, 64, 64, 64]
 67 | 
 68 | 
 69 | # Configure input and output channel of the model
 70 | __C.MODEL.ENCODRE_INPUT_C = 3
 71 | __C.MODEL.DECODER_OUTPUT_C = train_args.decoder_out_c
 72 | 
 73 | # Configure weight for different losses
 74 | __C.MODEL.DIFF_LOSS_WEIGHT = 6
 75 | 
 76 | # ---------------------------------------------------------------------------- #
 77 | # Data configurations
 78 | # ---------------------------------------------------------------------------- #
 79 | __C.DATA = AttrDict()
 80 | __C.DATA.DATA_SET = train_args.dataset
 81 | # Minimum depth
 82 | __C.DATA.DATA_MIN = 0.01 if 'nyu' in train_args.dataset else 0.015
 83 | # Maximum depth
 84 | __C.DATA.DATA_MAX = 1.7 if 'nyu' in train_args.dataset else 1.0
 85 | # Minimum depth in log space
 86 | __C.DATA.DATA_MIN_LOG = np.log10(__C.DATA.DATA_MIN)
 87 | # Interval of each range
 88 | __C.DATA.DEPTH_RANGE_INTERVAL = (np.log10(__C.DATA.DATA_MAX) - np.log10(__C.DATA.DATA_MIN)) / __C.MODEL.DECODER_OUTPUT_C
 89 | # Depth class
 90 | __C.DATA.DEPTH_CLASSES = np.array([__C.DATA.DATA_MIN_LOG + __C.DATA.DEPTH_RANGE_INTERVAL * (i + 0.5) for i in range(__C.MODEL.DECODER_OUTPUT_C)])
 91 | __C.DATA.WCE_LOSS_WEIGHT = [[np.exp(-0.2 * (i - j) ** 2) for i in range(__C.MODEL.DECODER_OUTPUT_C)]
 92 |                             for j in np.arange(__C.MODEL.DECODER_OUTPUT_C)]
 93 | __C.DATA.LOAD_MODEL_NAME = train_args.load_ckpt
 94 | # ---------------------------------------------------------------------------- #
 95 | # Training configurations
 96 | # ---------------------------------------------------------------------------- #
 97 | __C.TRAIN = AttrDict()
 98 | # Load run name, which is the combination of running time and host name
 99 | __C.TRAIN.RUN_NAME = get_run_name()
100 | __C.TRAIN.OUTPUT_ROOT_DIR = './outputs'
101 | #__C.TRAIN.OUTPUT_ROOT_DIR = '/mnt/cephfs_hl/vc/wxl/depth/' + __C.EXP_NAME
102 | # Dir for checkpoint and logs
103 | __C.TRAIN.LOG_DIR = os.path.join(__C.TRAIN.OUTPUT_ROOT_DIR, train_args.dataset + '_' + cfg.TRAIN.RUN_NAME)
104 | # Differ the learning rate between encoder and decoder
105 | __C.TRAIN.DIFF_LR = train_args.scale_decoder_lr
106 | __C.TRAIN.BASE_LR = train_args.lr
107 | __C.TRAIN.MAX_ITER = 0
108 | # Set training epoches, end at the last epoch of list
109 | __C.TRAIN.EPOCH = train_args.epoch
110 | # Snapshot (model checkpoint) period
111 | __C.TRAIN.SNAPSHOT_ITERS = 6000
112 | __C.TRAIN.VAL_STEP = 6000
113 | __C.TRAIN.BATCH_SIZE = train_args.batchsize
114 | __C.TRAIN.GPU_NUM = 1
115 | # Steps for LOG interval
116 | __C.TRAIN.LOG_INTERVAL = 20
117 | __C.TRAIN.LR_DECAY_MILESTONES = __C.TRAIN.EPOCH[1:-1]
118 | 
119 | 
120 | 
121 | def merge_cfg_from_file(datasize, gpu_num):
122 |     __C.TRAIN.MAX_ITER = round(datasize / __C.TRAIN.BATCH_SIZE + 0.5) * __C.TRAIN.EPOCH[-1]
123 |     __C.TRAIN.GPU_NUM = gpu_num
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/lib/models/MobileNetV2.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | from lib.core.config import cfg
  4 | # ---------------------------------------------------------------------------- #
  5 | # Bits for specific architectures (ResNeXt50, ResNeXt101, ...)
  6 | # ---------------------------------------------------------------------------- #
  7 | def MobileNetV2_body():
  8 |     return MobileNetV2()
  9 | 
 10 | def MobileNetV2_body_stride16():
 11 |     return MobileNetV2(output_stride=16)
 12 | 
 13 | def MobileNetV2_body_stride8():
 14 |     return MobileNetV2(output_stride=8)
 15 | 
 16 | 
 17 | def conv_bn(inp, oup, stride):
 18 |     return nn.Sequential(
 19 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 20 |         nn.BatchNorm2d(oup),
 21 |         nn.ReLU6(inplace=True)
 22 |     )
 23 | 
 24 | 
 25 | def conv_1x1_bn(inp, oup):
 26 |     return nn.Sequential(
 27 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 28 |         nn.BatchNorm2d(oup),
 29 |         nn.ReLU6(inplace=True)
 30 |     )
 31 | 
 32 | 
 33 | class InvertedResidual(nn.Module):
 34 |     def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
 35 |         super(InvertedResidual, self).__init__()
 36 |         self.stride = stride
 37 |         assert stride in [1, 2]
 38 | 
 39 |         hidden_dim = round(inp * expand_ratio)
 40 |         self.use_res_connect = self.stride == 1 and inp == oup
 41 | 
 42 |         if expand_ratio == 1:
 43 |             self.conv = nn.Sequential(
 44 |                 # dw
 45 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, groups=hidden_dim, bias=False, padding=dilation, dilation=dilation),
 46 |                 nn.BatchNorm2d(hidden_dim),
 47 |                 nn.ReLU6(inplace=True),
 48 |                 # pw-linear
 49 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 50 |                 nn.BatchNorm2d(oup),
 51 |             )
 52 |         else:
 53 |             self.conv = nn.Sequential(
 54 |                 # pw
 55 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 56 |                 nn.BatchNorm2d(hidden_dim),
 57 |                 nn.ReLU6(inplace=True),
 58 |                 # dw
 59 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, groups=hidden_dim, bias=False, padding=dilation, dilation=dilation),
 60 |                 nn.BatchNorm2d(hidden_dim),
 61 |                 nn.ReLU6(inplace=True),
 62 |                 # pw-linear
 63 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 64 |                 nn.BatchNorm2d(oup),
 65 |             )
 66 | 
 67 |     def forward(self, x):
 68 |         if self.use_res_connect:
 69 |             out = self.conv(x)
 70 |             out += x
 71 |             return out
 72 |         else:
 73 |             return self.conv(x)
 74 | 
 75 | def add_block(res_setting, input_channel, width_mult=1, dilation=1):
 76 |     # building inverted residual blocks
 77 |     block = []
 78 |     for t, c, n, s in res_setting:
 79 |         output_channel = int(c * width_mult)
 80 |         for i in range(n):
 81 |             if i == 0:
 82 |                 block.append(InvertedResidual(input_channel, output_channel, s, expand_ratio=t, dilation=dilation))
 83 |             else:
 84 |                 block.append(InvertedResidual(input_channel, output_channel, 1, expand_ratio=t, dilation=dilation))
 85 |             input_channel = output_channel
 86 |     return nn.Sequential(*block), output_channel
 87 | 
 88 | 
 89 | class MobileNetV2(nn.Module):
 90 |     def __init__(self, width_mult=1., output_stride=32):
 91 |         super(MobileNetV2, self).__init__()
 92 |         input_channel = 32
 93 |         last_channel = 320
 94 |         self.convX = 5
 95 |         stride1 = 1 if 32 / output_stride == 4 else 2
 96 |         stride2 = 1 if 32 / output_stride > 1 else 2
 97 |         dilation1 = 1 if stride1 == 2 else 2
 98 |         dilation2 = 1 if stride2 == 2 else (2 if stride1 == 2 else 4)
 99 | 
100 |         interverted_residual_setting_block2 = [
101 |              #t, c, n, s
102 |             [1, 16, 1, 1],
103 |             [6, 24, 2, 2],
104 |         ]
105 |         interverted_residual_setting_block3 = [
106 |             # t, c, n, s
107 |             [6, 32, 3, 2],
108 |         ]
109 |         interverted_residual_setting_block4 = [
110 |             # t, c, n, s
111 |             [6, 64, 4, stride1],
112 |             [6, 96, 3, 1],
113 |         ]
114 |         interverted_residual_setting_block5 = [
115 |             # t, c, n, s
116 |             [6, 160, 3, stride2],
117 |             [6, 320, 1, 1],
118 |         ]
119 | 
120 | 
121 |         # building first layer
122 |         #assert cfg.CROP_SIZE[0] % 32 == 0 and cfg.CROP_SIZE[1] % 32 == 0
123 |         input_channel = int(input_channel * width_mult)
124 |         self.last_channel = last_channel
125 |         self.res1 = nn.Sequential(conv_bn(3, input_channel, 2))
126 | 
127 |         self.res2, output_channel = add_block(interverted_residual_setting_block2, input_channel, width_mult)
128 | 
129 |         self.res3, output_channel = add_block(interverted_residual_setting_block3, output_channel, width_mult)
130 | 
131 |         self.res4, output_channel = add_block(interverted_residual_setting_block4, output_channel, width_mult, dilation1)
132 | 
133 |         self.res5, output_channel = add_block(interverted_residual_setting_block5, output_channel, width_mult, dilation2)
134 | 
135 |         self._initialize_weights()
136 | 
137 |     def forward(self, x):
138 |         for i in range(self.convX):
139 |             x = getattr(self, 'res%d' % (i + 1))(x)
140 |         return x
141 | 
142 |     def _initialize_weights(self):
143 |         for m in self.modules():
144 |             if isinstance(m, nn.Conv2d):
145 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
146 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
147 |                 if m.bias is not None:
148 |                     m.bias.data.zero_()
149 |             elif isinstance(m, nn.BatchNorm2d):
150 |                 m.weight.data.fill_(1)
151 |                 m.bias.data.zero_()
152 |             elif isinstance(m, nn.Linear):
153 |                 n = m.weight.size(1)
154 |                 m.weight.data.normal_(0, 0.01)
155 |                 m.bias.data.zero_()


--------------------------------------------------------------------------------
/lib/models/ResNeXt.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import torch.nn as nn
  3 | from lib.core.config import cfg
  4 | 
  5 | # ---------------------------------------------------------------------------- #
  6 | # Bits for specific architectures (ResNeXt50, ResNeXt101, ...)
  7 | # ---------------------------------------------------------------------------- #
  8 | 
  9 | def ResNeXt50_32x4d_body_stride16():
 10 |     return ResNeXt_body((3, 4, 6, 3), 32, 4, 16)
 11 | 
 12 | 
 13 | def ResNeXt101_32x4d_body_stride16():
 14 |     return ResNeXt_body((3, 4, 23, 3), 32, 4, 16)
 15 | 
 16 | 
 17 | class ResNeXt_body(nn.Module):
 18 |     def __init__(self, block_counts, cardinality, base_width, output_stride):
 19 |         super().__init__()
 20 |         self.block_counts = block_counts
 21 |         self.convX = len(block_counts) + 1
 22 |         self.num_layers = (sum(block_counts) + 3 * (self.convX == 4)) * 3 + 2
 23 | 
 24 |         self.res1 = basic_bn_stem()
 25 |         dim_in = 64
 26 |         res5_dilate = int(32 / output_stride)
 27 |         res5_stride = 2 if res5_dilate == 1 else 1
 28 |         res4_dilate = 1 if res5_dilate <= 2 else 2
 29 |         res4_stride = 2 if res4_dilate == 1 else 1
 30 | 
 31 |         self.res2, dim_in = add_stage(dim_in, 256, block_counts[0], cardinality, base_width,
 32 |                                       dilation=1, stride_init=1)
 33 |         self.res3, dim_in = add_stage(dim_in, 512, block_counts[1], cardinality, base_width,
 34 |                                       dilation=1, stride_init=2)
 35 |         self.res4, dim_in = add_stage(dim_in, 1024, block_counts[2], cardinality, base_width,
 36 |                                       dilation=res4_dilate, stride_init=res4_stride)
 37 |         self.res5, dim_in = add_stage(dim_in, 2048, block_counts[3], cardinality, base_width,
 38 |                                       dilation=res5_dilate, stride_init=res5_stride)
 39 |         self.spatial_scale = 1 / output_stride
 40 |         self.dim_out = dim_in
 41 |         self._init_modle()
 42 | 
 43 |     def forward(self, x):
 44 |         for i in range(self.convX):
 45 |             x = getattr(self, 'res%d' % (i + 1))(x)
 46 |         return x
 47 | 
 48 | 
 49 |     def train(self, mode=True):
 50 |         # Override
 51 |         self.training = mode
 52 | 
 53 |         for i in range(1, self.convX + 1):
 54 |             getattr(self, 'res%d' % i).train(mode)
 55 |     def _init_modle(self):
 56 |         def freeze_params(m):
 57 |             for p in m.parameters():
 58 |                 p.requires_grad = False
 59 |         if cfg.MODEL.FREEZE_BACKBONE_BN:
 60 |             self.apply(lambda m: freeze_params(m) if isinstance(m, nn.BatchNorm2d) else None)
 61 | 
 62 | def basic_bn_stem():
 63 |     conv1 = nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=False)
 64 |     return nn.Sequential(OrderedDict([
 65 |         ('conv1', conv1),
 66 |         ('bn1', nn.BatchNorm2d(64)),
 67 |         ('relu', nn.ReLU(inplace=True)),
 68 |         ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))]))
 69 | 
 70 | def add_stage(inplanes, outplanes, nblocks, cardinality, base_width, dilation=1, stride_init=2):
 71 |     """Make a stage consist of `nblocks` residual blocks.
 72 |     Returns:
 73 |         - stage module: an nn.Sequentail module of residual blocks
 74 |         - final output dimension
 75 |     """
 76 |     res_blocks = []
 77 |     stride = stride_init
 78 |     for _ in range(nblocks):
 79 |         res_blocks.append(ResNeXtBottleneck(
 80 |             inplanes, outplanes, stride, dilation, cardinality, base_width
 81 |         ))
 82 |         inplanes = outplanes
 83 |         stride = 1
 84 |     return nn.Sequential(*res_blocks), outplanes
 85 | 
 86 | 
 87 | class ResNeXtBottleneck(nn.Module):
 88 |     """
 89 |     RexNeXt bottleneck type C (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua)
 90 |     """
 91 | 
 92 |     def __init__(self, in_channels, out_channels, stride, dilate, cardinality=32, base_width=4):
 93 |         """ Constructor
 94 |         Args:
 95 |             in_channels: input channel dimensionality
 96 |             out_channels: output channel dimensionality
 97 |             stride: conv stride. Replaces pooling layer.
 98 |             cardinality: num of convolution groups.
 99 |             base_width: base number of channels in each group.
100 |             widen_factor: factor to reduce the input dimensionality before convolution.
101 |         """
102 |         super(ResNeXtBottleneck, self).__init__()
103 |         width_ratio = out_channels / 256.
104 |         D = cardinality * base_width * int(width_ratio)
105 |         self.conv1 = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False)
106 |         self.bn1 = nn.BatchNorm2d(D)
107 |         self.conv2 = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=dilate, dilation=dilate, groups=cardinality, bias=False)
108 |         self.bn2 = nn.BatchNorm2d(D)
109 |         self.conv3 = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
110 |         self.bn3 = nn.BatchNorm2d(out_channels)
111 |         self.relu = nn.ReLU(inplace=True)
112 | 
113 | 
114 |         if in_channels != out_channels:
115 |             self.shortcut = nn.Sequential()
116 |             self.shortcut.add_module('conv',
117 |                                      nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0,
118 |                                                bias=False))
119 |             self.shortcut.add_module('bn', nn.BatchNorm2d(out_channels))
120 |         else:
121 |             self.shortcut = None
122 | 
123 |     def forward(self, x):
124 |         residual = x
125 |         out = self.conv1(x)
126 |         out = self.bn1(out)
127 |         out = self.relu(out)
128 |         out = self.conv2(out)
129 |         out = self.bn2(out)
130 |         out = self.relu(out)
131 |         out = self.conv3(out)
132 |         out = self.bn3(out)
133 | 
134 |         if self.shortcut is not None:
135 |             residual = self.shortcut(x)
136 | 
137 |         out += residual
138 |         out = self.relu(out)
139 |         return out
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/models/__init__.py


--------------------------------------------------------------------------------
/lib/models/image_transfer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | from lib.core.config import cfg
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | 
  9 | def fg_bg_maxpooling(clses_bg, clses_fg):
 10 |     "[b,c,h,w]"
 11 |     B,C,H,W = clses_bg.shape
 12 |     clses_bg = clses_bg.permute(0, 2, 3, 1) #[b, h, w, c]        
 13 |     clses_bg = clses_bg.reshape((B, -1, C)).unsqueeze(-1)
 14 | 
 15 |     clses_fg = clses_fg.permute(0, 2, 3, 1) #[b, h, w, c]        
 16 |     clses_fg = clses_fg.reshape((B, -1, C)).unsqueeze(-1)
 17 | 
 18 |     clses_cat = torch.cat((clses_fg, clses_bg), -1) # [b, hxw, c, 2]
 19 | 
 20 |     clses_final = F.max_pool2d(clses_cat, kernel_size=(1, 2)) #[b, hxw, c, 1]
 21 |     clses_final = clses_final.squeeze(-1).reshape((B, H, W, C))
 22 | 
 23 |     clses_final = clses_final.permute(0, 3, 1, 2) # [b, c, h, w]
 24 | 
 25 |     clses_final = F.softmax(clses_final, dim=1)
 26 | 
 27 |     return clses_final
 28 | 
 29 | 
 30 | def class_depth(classes):
 31 |     """
 32 |     Transfer n-channel output of the network in classes to 1-channel depth
 33 |     @classes: n-channel output of the network, [b, c, h, w]
 34 |     :return: 1-channel depth, [b, 1, h, w]
 35 |     """
 36 |     if type(classes).__module__ != torch.__name__:
 37 |         classes = torch.tensor(classes, dtype=torch.float32).cuda()
 38 |     classes = classes.permute(0, 2, 3, 1) #[b, h, w, c]
 39 |     if type(cfg.DATA.DEPTH_CLASSES).__module__ != torch.__name__:
 40 |         cfg.DATA.DEPTH_CLASSES = torch.tensor(cfg.DATA.DEPTH_CLASSES, dtype=torch.float32).cuda()
 41 |     depth = classes * cfg.DATA.DEPTH_CLASSES
 42 |     depth = torch.sum(depth, dim=3, dtype=torch.float32, keepdim=True)
 43 |     depth = 10 ** depth
 44 |     depth = depth.permute(0, 3, 1, 2) #[b, 1, h, w]
 45 |     return depth
 46 | 
 47 | def class_depth_hard(classes):
 48 |     """
 49 |     Transfer n-channel output of the network in classes to 1-channel depth
 50 |     @classes: n-channel output of the network, [b, c, h, w]
 51 |     :return: 1-channel depth, [b, 1, h, w]
 52 |     """
 53 |     if type(classes).__module__ != torch.__name__:
 54 |         classes = torch.tensor(classes, dtype=torch.float32).cuda()
 55 |     classes = classes.permute(0, 2, 3, 1) #[b, h, w, c]
 56 |     if type(cfg.DATA.DEPTH_CLASSES).__module__ != torch.__name__:
 57 |         cfg.DATA.DEPTH_CLASSES = torch.tensor(cfg.DATA.DEPTH_CLASSES, dtype=torch.float32).cuda()
 58 | 
 59 |     # softmax to one-hot
 60 |     max_idx = torch.argmax(classes, -1, keepdim=True)
 61 |     one_hot = torch.FloatTensor(classes.shape).zero_().to(device=max_idx.device)
 62 |     one_hot.scatter_(-1, max_idx, 1)
 63 |     classes = one_hot
 64 | 
 65 |     depth = classes * cfg.DATA.DEPTH_CLASSES
 66 |     depth = torch.sum(depth, dim=3, dtype=torch.float32, keepdim=True)
 67 |     depth = 10 ** depth
 68 |     depth = depth.permute(0, 3, 1, 2) #[b, 1, h, w]
 69 |     return depth
 70 | 
 71 | def depth_class(depth):
 72 |     """
 73 |     Transfer 1-channel depth to 1-channel depth in n depth ranges
 74 |     :param depth: 1-channel depth, [b, 1, h, w]
 75 |     :return: classes [b, 1, h, w]
 76 |     """
 77 |     depth[depth < cfg.DATA.DATA_MIN] = cfg.DATA.DATA_MIN
 78 |     depth[depth > cfg.DATA.DATA_MAX] = cfg.DATA.DATA_MAX
 79 |     classes = torch.round((torch.log10(depth) - cfg.DATA.DATA_MIN_LOG) / cfg.DATA.DEPTH_RANGE_INTERVAL)
 80 |     classes = classes.to(torch.long)
 81 |     classes[classes == cfg.MODEL.DECODER_OUTPUT_C] = cfg.MODEL.DECODER_OUTPUT_C - 1
 82 |     return classes
 83 | 
 84 | 
 85 | def resize_image(img, size):
 86 |     if type(img).__module__ != np.__name__:
 87 |         img = img.cpu().numpy()
 88 |     img = cv2.resize(img, (size[1], size[0]))
 89 |     return img
 90 | 
 91 | 
 92 | def kitti_merge_imgs(left, middle, right, img_shape, crops):
 93 |     left = torch.squeeze(left)
 94 |     right = torch.squeeze(right)
 95 |     middle = torch.squeeze(middle)
 96 |     out = torch.zeros(img_shape, dtype=left.dtype, device=left.device)
 97 |     crops = torch.squeeze(crops)
 98 |     band = 5
 99 | 
100 |     out[:, crops[0][0]:crops[0][0] + crops[0][2] - band] = left[:, 0:left.size(1)-band]
101 |     out[:, crops[1][0]+band:crops[1][0] + crops[1][2] - band] += middle[:, band:middle.size(1)-band]
102 |     out[:, crops[1][0] + crops[1][2] - 2*band:crops[2][0] + crops[2][2]] += right[:, crops[1][0] + crops[1][2] - 2*band-crops[2][0]:]
103 | 
104 |     out[:, crops[1][0]+band:crops[0][0] + crops[0][2] - band] /= 2.0
105 |     out[:, crops[1][0] + crops[1][2] - 2*band:crops[1][0] + crops[1][2] - band] /= 2.0
106 |     out = out.cpu().numpy()
107 | 
108 |     return out
109 | 


--------------------------------------------------------------------------------
/lib/models/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from lib.core.config import cfg
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | def cross_entropy_loss(pred_nosoftmax, gt_class):
  8 |     """
  9 |     Standard cross-entropy loss
 10 |     :param pred_nosoftmax: predicted label
 11 |     :param gt_class: target label
 12 |     :return:
 13 |     """
 14 |     gt_class = torch.squeeze(gt_class)
 15 |     gt_class = gt_class.to(device=pred_nosoftmax.device, dtype=torch.int64)
 16 |     entropy = torch.nn.CrossEntropyLoss(ignore_index=cfg.MODEL.DECODER_OUTPUT_C+1)
 17 |     loss = entropy(pred_nosoftmax, gt_class)
 18 |     return loss
 19 | 
 20 | 
 21 | def weight_crossentropy_loss(pred_nosoftmax, gt, data):
 22 |     """
 23 |     Weighted Cross-entropy Loss
 24 |     :param pred_nosoftmax: predicted label
 25 |     :param gt: target label
 26 |     """
 27 |     invalid_side = data['invalid_side']
 28 |     cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device)
 29 |     weight = cfg.DATA.WCE_LOSS_WEIGHT
 30 |     weight /= torch.sum(weight, 1, keepdim=True)
 31 |     classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype)
 32 |     log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1)
 33 |     log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1))
 34 | 
 35 |     gt_reshape = gt.reshape(-1, 1)
 36 |     one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device)
 37 |     weight = torch.matmul(one_hot, weight)
 38 |     weight_log_pred = weight * log_pred
 39 | 
 40 |     valid_pixes = torch.tensor([0], device=pred_nosoftmax.device, dtype=torch.float)
 41 |     for i in range(gt.size(0)):
 42 |         valid_gt = gt[i, :,  int(invalid_side[i][0]):gt.size(2)-int(invalid_side[i][1]), :]
 43 |         valid_pixes += valid_gt.size(1) * valid_gt.size(2)
 44 |     loss = -1 * torch.sum(weight_log_pred) / valid_pixes
 45 |     return loss
 46 | 
 47 | def rois_weight_crossentropy_loss(pred_nosoftmax, gt, data):
 48 |     """
 49 |     Weighted Cross-entropy Loss
 50 |     :param pred_nosoftmax: predicted label
 51 |     :param gt: target label
 52 |     """
 53 |     invalid_side = data['invalid_side']
 54 |     rois_mask = data['rois_mask']
 55 |     cfg.DATA.WCE_LOSS_WEIGHT = torch.tensor(cfg.DATA.WCE_LOSS_WEIGHT, dtype=torch.float32, device=pred_nosoftmax.device)
 56 |     weight = cfg.DATA.WCE_LOSS_WEIGHT
 57 |     weight /= torch.sum(weight, 1, keepdim=True)
 58 |     classes_range = torch.arange(cfg.MODEL.DECODER_OUTPUT_C, device=gt.device, dtype=gt.dtype)
 59 |     log_pred = torch.nn.functional.log_softmax(pred_nosoftmax, 1)
 60 |     log_pred = torch.t(torch.transpose(log_pred, 0, 1).reshape(log_pred.size(1), -1))
 61 | 
 62 |     gt_reshape = gt.reshape(-1, 1)
 63 |     one_hot = (gt_reshape == classes_range).to(dtype=torch.float, device=pred_nosoftmax.device)
 64 |     weight = torch.matmul(one_hot, weight)
 65 |     weight_log_pred = weight * log_pred
 66 | 
 67 |     valid_pixels = max(rois_mask.sum(), 1)
 68 |     loss = -1 * torch.sum(weight_log_pred) / valid_pixels
 69 |     return loss
 70 | 
 71 | def rois_scale_invariant_loss(pred_depth, data):
 72 |     """
 73 |     Follow Eigen paper, add silog loss, for KITTI benchmark
 74 |     :param pred_depth:
 75 |     :param data:
 76 |     :return:
 77 |     """
 78 |     invalid_side = data['invalid_side']
 79 |     gt_depth = data['B'].cuda()
 80 | 
 81 |     rois_mask = data['rois_mask'].to(device=gt_depth.device)
 82 | 
 83 |     loss_mean = torch.tensor([0.]).cuda()
 84 |     for j in range(pred_depth.size(0)):
 85 |         valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :]
 86 |         valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :]
 87 |         valid_rois_mask = rois_mask[j, :, int(invalid_side[j][0]): rois_mask.size(2) - int(invalid_side[j][1]), :]
 88 | 
 89 |         diff_log = torch.log(valid_pred) - torch.log(valid_gt)
 90 |         diff_log = diff_log * valid_rois_mask.to(dtype=diff_log.dtype)
 91 | 
 92 |         #size = torch.numel(diff_log)
 93 |         size = torch.sum(valid_rois_mask)
 94 |         if size == 0:
 95 |             continue
 96 | 
 97 |         loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2)
 98 |     loss = loss_mean / pred_depth.size(0)
 99 |     return loss
100 | 
101 | 
102 | def scale_invariant_loss(pred_depth, data):
103 |     """
104 |     Follow Eigen paper, add silog loss, for KITTI benchmark
105 |     :param pred_depth:
106 |     :param data:
107 |     :return:
108 |     """
109 |     invalid_side = data['invalid_side']
110 |     gt_depth = data['B'].cuda()
111 | 
112 | 
113 |     loss_mean = torch.tensor([0.]).cuda()
114 |     for j in range(pred_depth.size(0)):
115 |         valid_pred = pred_depth[j, :, int(invalid_side[j][0]): pred_depth.size(2) - int(invalid_side[j][1]), :]
116 |         valid_gt = gt_depth[j, :, int(invalid_side[j][0]): gt_depth.size(2) - int(invalid_side[j][1]), :]
117 | 
118 |         diff_log = torch.log(valid_pred) - torch.log(valid_gt)
119 | 
120 |         size = torch.numel(diff_log)
121 |         #size = torch.sum(valid_rois_mask)
122 |         #if size == 0:
123 |         #    continue
124 | 
125 |         loss_mean += torch.sum(diff_log ** 2) / size - 0.5 * torch.sum(diff_log) ** 2 / (size ** 2)
126 |     loss = loss_mean / pred_depth.size(0)
127 |     return loss
128 | 
129 | 
130 | def berhu_loss(pred_depth, data, scale=80.):
131 |     """
132 |     :param pred_depth:
133 |     :param data:
134 |     :return:
135 |     """
136 |     huber_threshold = 0.2
137 | 
138 |     invalid_side = data['invalid_side']
139 |     gt_depth = data['B'].cuda()
140 | 
141 |     mask = gt_depth > 0
142 | 
143 |     pred_depth = pred_depth * mask.to(dtype=pred_depth.dtype)
144 |     gt_depth = gt_depth * mask.to(dtype=gt_depth.dtype)
145 | 
146 |     diff = torch.abs(gt_depth - pred_depth)
147 |     delta = huber_threshold * torch.max(diff).data.cpu()
148 | 
149 |     part1 = -F.threshold(-diff, -delta, 0.)
150 |     part2 = F.threshold(diff**2 + delta**2, 2*delta**2, 0.) 
151 |     part2 = part2 / (2.*delta)
152 | 
153 |     loss = part1 + part2
154 | 
155 |     loss = loss[mask]
156 |     loss = torch.mean(loss)
157 | 
158 |     return loss
159 | 
160 | 
161 | def rmse_log_loss(pred_depth, data, scale=80.):
162 |     """
163 |     :param pred_depth:
164 |     :param data:
165 |     :return:
166 |     """
167 | 
168 |     gt_depth = data['B'].cuda()
169 |     mask = gt_depth > 0
170 | 
171 |     pred_depth = pred_depth * scale
172 |     gt_depth = gt_depth * scale
173 | 
174 |     diff = torch.log(gt_depth) - torch.log(pred_depth)
175 |     diff = diff[mask]
176 |     
177 |     loss = torch.sqrt(torch.mean(diff**2))
178 |     return loss
179 | 
180 | 
181 | def rmse_loss(pred_depth, data, scale=80.):
182 |     """
183 |     :param pred_depth:
184 |     :param data:
185 |     :return:
186 |     """
187 | 
188 |     gt_depth = data['B'].cuda()
189 |     mask = gt_depth > 0
190 | 
191 |     pred_depth = pred_depth
192 |     gt_depth = gt_depth
193 | 
194 |     diff = gt_depth - pred_depth
195 |     diff = diff[mask]
196 |     
197 |     loss = torch.sqrt(torch.mean(diff**2))
198 |     return loss
199 | 
200 | def mse_loss(pred_depth, data, scale=80.):
201 |     """
202 |     :param pred_depth:
203 |     :param data:
204 |     :return:
205 |     """
206 | 
207 |     gt_depth = data['B'].cuda()
208 |     mask = gt_depth > 0
209 | 
210 |     pred_depth = pred_depth
211 |     gt_depth = gt_depth
212 | 
213 |     diff = gt_depth - pred_depth
214 |     diff = diff[mask]
215 |     
216 |     loss = torch.mean(diff**2)
217 |     return loss
218 | 
219 | 
220 | def rois_rmse_log_loss(pred_depth, data, scale=80.):
221 |     """
222 |     :param pred_depth:
223 |     :param data:
224 |     :return:
225 |     """
226 | 
227 |     gt_depth = data['B'].cuda()
228 | 
229 |     mask = gt_depth > 0
230 |     rois_mask = data['rois_mask'].to(device=gt_depth.device)
231 |     mask = mask & rois_mask
232 | 
233 |     pred_depth = pred_depth
234 |     gt_depth = gt_depth
235 | 
236 |     diff = torch.log(gt_depth) - torch.log(pred_depth)
237 |     diff = diff[mask]
238 |     
239 |     loss = torch.sqrt(torch.mean(diff**2))
240 |     return loss
241 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/bounding_box.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | # transpose
  5 | FLIP_LEFT_RIGHT = 0
  6 | FLIP_TOP_BOTTOM = 1
  7 | 
  8 | 
  9 | class BoxList(object):
 10 |     """
 11 |     This class represents a set of bounding boxes.
 12 |     The bounding boxes are represented as a Nx4 Tensor.
 13 |     In order to uniquely determine the bounding boxes with respect
 14 |     to an image, we also store the corresponding image dimensions.
 15 |     They can contain extra information that is specific to each bounding box, such as
 16 |     labels.
 17 |     """
 18 | 
 19 |     def __init__(self, bbox, image_size, mode="xyxy"):
 20 |         device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu")
 21 |         bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device)
 22 |         if bbox.ndimension() != 2:
 23 |             raise ValueError(
 24 |                 "bbox should have 2 dimensions, got {}".format(bbox.ndimension())
 25 |             )
 26 |         if bbox.size(-1) != 4:
 27 |             raise ValueError(
 28 |                 "last dimension of bbox should have a "
 29 |                 "size of 4, got {}".format(bbox.size(-1))
 30 |             )
 31 |         if mode not in ("xyxy", "xywh"):
 32 |             raise ValueError("mode should be 'xyxy' or 'xywh'")
 33 | 
 34 |         self.bbox = bbox
 35 |         self.size = image_size  # (image_width, image_height)
 36 |         self.mode = mode
 37 |         self.extra_fields = {}
 38 | 
 39 |     def add_field(self, field, field_data):
 40 |         self.extra_fields[field] = field_data
 41 | 
 42 |     def get_field(self, field):
 43 |         return self.extra_fields[field]
 44 | 
 45 |     def has_field(self, field):
 46 |         return field in self.extra_fields
 47 | 
 48 |     def fields(self):
 49 |         return list(self.extra_fields.keys())
 50 | 
 51 |     def _copy_extra_fields(self, bbox):
 52 |         for k, v in bbox.extra_fields.items():
 53 |             self.extra_fields[k] = v
 54 | 
 55 |     def convert(self, mode):
 56 |         if mode not in ("xyxy", "xywh"):
 57 |             raise ValueError("mode should be 'xyxy' or 'xywh'")
 58 |         if mode == self.mode:
 59 |             return self
 60 |         # we only have two modes, so don't need to check
 61 |         # self.mode
 62 |         xmin, ymin, xmax, ymax = self._split_into_xyxy()
 63 |         if mode == "xyxy":
 64 |             bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
 65 |             bbox = BoxList(bbox, self.size, mode=mode)
 66 |         else:
 67 |             TO_REMOVE = 1
 68 |             bbox = torch.cat(
 69 |                 (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1
 70 |             )
 71 |             bbox = BoxList(bbox, self.size, mode=mode)
 72 |         bbox._copy_extra_fields(self)
 73 |         return bbox
 74 | 
 75 |     def _split_into_xyxy(self):
 76 |         if self.mode == "xyxy":
 77 |             xmin, ymin, xmax, ymax = self.bbox.split(1, dim=-1)
 78 |             return xmin, ymin, xmax, ymax
 79 |         elif self.mode == "xywh":
 80 |             TO_REMOVE = 1
 81 |             xmin, ymin, w, h = self.bbox.split(1, dim=-1)
 82 |             return (
 83 |                 xmin,
 84 |                 ymin,
 85 |                 xmin + (w - TO_REMOVE).clamp(min=0),
 86 |                 ymin + (h - TO_REMOVE).clamp(min=0),
 87 |             )
 88 |         else:
 89 |             raise RuntimeError("Should not be here")
 90 | 
 91 |     def resize(self, size, *args, **kwargs):
 92 |         """
 93 |         Returns a resized copy of this bounding box
 94 |         :param size: The requested size in pixels, as a 2-tuple:
 95 |             (width, height).
 96 |         """
 97 | 
 98 |         ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size))
 99 |         if ratios[0] == ratios[1]:
100 |             ratio = ratios[0]
101 |             scaled_box = self.bbox * ratio
102 |             bbox = BoxList(scaled_box, size, mode=self.mode)
103 |             # bbox._copy_extra_fields(self)
104 |             for k, v in self.extra_fields.items():
105 |                 if not isinstance(v, torch.Tensor):
106 |                     v = v.resize(size, *args, **kwargs)
107 |                 bbox.add_field(k, v)
108 |             return bbox
109 | 
110 |         ratio_width, ratio_height = ratios
111 |         xmin, ymin, xmax, ymax = self._split_into_xyxy()
112 |         scaled_xmin = xmin * ratio_width
113 |         scaled_xmax = xmax * ratio_width
114 |         scaled_ymin = ymin * ratio_height
115 |         scaled_ymax = ymax * ratio_height
116 |         scaled_box = torch.cat(
117 |             (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1
118 |         )
119 |         bbox = BoxList(scaled_box, size, mode="xyxy")
120 |         # bbox._copy_extra_fields(self)
121 |         for k, v in self.extra_fields.items():
122 |             if not isinstance(v, torch.Tensor):
123 |                 v = v.resize(size, *args, **kwargs)
124 |             bbox.add_field(k, v)
125 | 
126 |         return bbox.convert(self.mode)
127 | 
128 |     def transpose(self, method):
129 |         """
130 |         Transpose bounding box (flip or rotate in 90 degree steps)
131 |         :param method: One of :py:attr:`PIL.Image.FLIP_LEFT_RIGHT`,
132 |           :py:attr:`PIL.Image.FLIP_TOP_BOTTOM`, :py:attr:`PIL.Image.ROTATE_90`,
133 |           :py:attr:`PIL.Image.ROTATE_180`, :py:attr:`PIL.Image.ROTATE_270`,
134 |           :py:attr:`PIL.Image.TRANSPOSE` or :py:attr:`PIL.Image.TRANSVERSE`.
135 |         """
136 |         if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
137 |             raise NotImplementedError(
138 |                 "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
139 |             )
140 | 
141 |         image_width, image_height = self.size
142 |         xmin, ymin, xmax, ymax = self._split_into_xyxy()
143 |         if method == FLIP_LEFT_RIGHT:
144 |             TO_REMOVE = 1
145 |             transposed_xmin = image_width - xmax - TO_REMOVE
146 |             transposed_xmax = image_width - xmin - TO_REMOVE
147 |             transposed_ymin = ymin
148 |             transposed_ymax = ymax
149 |         elif method == FLIP_TOP_BOTTOM:
150 |             transposed_xmin = xmin
151 |             transposed_xmax = xmax
152 |             transposed_ymin = image_height - ymax
153 |             transposed_ymax = image_height - ymin
154 | 
155 |         transposed_boxes = torch.cat(
156 |             (transposed_xmin, transposed_ymin, transposed_xmax, transposed_ymax), dim=-1
157 |         )
158 |         bbox = BoxList(transposed_boxes, self.size, mode="xyxy")
159 |         # bbox._copy_extra_fields(self)
160 |         for k, v in self.extra_fields.items():
161 |             if not isinstance(v, torch.Tensor):
162 |                 v = v.transpose(method)
163 |             bbox.add_field(k, v)
164 |         return bbox.convert(self.mode)
165 | 
166 |     def crop(self, box):
167 |         """
168 |         Cropss a rectangular region from this bounding box. The box is a
169 |         4-tuple defining the left, upper, right, and lower pixel
170 |         coordinate.
171 |         """
172 |         xmin, ymin, xmax, ymax = self._split_into_xyxy()
173 |         w, h = box[2] - box[0], box[3] - box[1]
174 |         cropped_xmin = (xmin - box[0]).clamp(min=0, max=w)
175 |         cropped_ymin = (ymin - box[1]).clamp(min=0, max=h)
176 |         cropped_xmax = (xmax - box[0]).clamp(min=0, max=w)
177 |         cropped_ymax = (ymax - box[1]).clamp(min=0, max=h)
178 | 
179 |         # TODO should I filter empty boxes here?
180 |         if False:
181 |             is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin == cropped_ymax)
182 | 
183 |         cropped_box = torch.cat(
184 |             (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1
185 |         )
186 |         bbox = BoxList(cropped_box, (w, h), mode="xyxy")
187 |         # bbox._copy_extra_fields(self)
188 |         for k, v in self.extra_fields.items():
189 |             if not isinstance(v, torch.Tensor):
190 |                 v = v.crop(box)
191 |             bbox.add_field(k, v)
192 |         return bbox.convert(self.mode)
193 | 
194 |     # Tensor-like methods
195 | 
196 |     def to(self, device):
197 |         bbox = BoxList(self.bbox.to(device), self.size, self.mode)
198 |         for k, v in self.extra_fields.items():
199 |             if hasattr(v, "to"):
200 |                 v = v.to(device)
201 |             bbox.add_field(k, v)
202 |         return bbox
203 | 
204 |     def __getitem__(self, item):
205 |         bbox = BoxList(self.bbox[item], self.size, self.mode)
206 |         for k, v in self.extra_fields.items():
207 |             bbox.add_field(k, v[item])
208 |         return bbox
209 | 
210 |     def __len__(self):
211 |         return self.bbox.shape[0]
212 | 
213 |     def clip_to_image(self, remove_empty=True):
214 |         TO_REMOVE = 1
215 |         self.bbox[:, 0].clamp_(min=0, max=self.size[0] - TO_REMOVE)
216 |         self.bbox[:, 1].clamp_(min=0, max=self.size[1] - TO_REMOVE)
217 |         self.bbox[:, 2].clamp_(min=0, max=self.size[0] - TO_REMOVE)
218 |         self.bbox[:, 3].clamp_(min=0, max=self.size[1] - TO_REMOVE)
219 |         if remove_empty:
220 |             box = self.bbox
221 |             keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0])
222 |             return self[keep]
223 |         return self
224 | 
225 |     def area(self):
226 |         box = self.bbox
227 |         if self.mode == "xyxy":
228 |             TO_REMOVE = 1
229 |             area = (box[:, 2] - box[:, 0] + TO_REMOVE) * (box[:, 3] - box[:, 1] + TO_REMOVE)
230 |         elif self.mode == "xywh":
231 |             area = box[:, 2] * box[:, 3]
232 |         else:
233 |             raise RuntimeError("Should not be here")
234 | 
235 |         return area
236 | 
237 |     def copy_with_fields(self, fields, skip_missing=False):
238 |         bbox = BoxList(self.bbox, self.size, self.mode)
239 |         if not isinstance(fields, (list, tuple)):
240 |             fields = [fields]
241 |         for field in fields:
242 |             if self.has_field(field):
243 |                 bbox.add_field(field, self.get_field(field))
244 |             elif not skip_missing:
245 |                 raise KeyError("Field '{}' not found in {}".format(field, self))
246 |         return bbox
247 | 
248 |     def __repr__(self):
249 |         s = self.__class__.__name__ + "("
250 |         s += "num_boxes={}, ".format(len(self))
251 |         s += "image_width={}, ".format(self.size[0])
252 |         s += "image_height={}, ".format(self.size[1])
253 |         s += "mode={})".format(self.mode)
254 |         return s
255 | 
256 | 
257 | if __name__ == "__main__":
258 |     bbox = BoxList([[0, 0, 10, 10], [0, 0, 5, 5]], (10, 10))
259 |     s_bbox = bbox.resize((5, 5))
260 |     print(s_bbox)
261 |     print(s_bbox.bbox)
262 | 
263 |     t_bbox = bbox.transpose(0)
264 |     print(t_bbox)
265 |     print(t_bbox.bbox)
266 | 


--------------------------------------------------------------------------------
/lib/utils/chamfer_distance/__init__.py:
--------------------------------------------------------------------------------
1 | from .chamfer_distance import ChamferDistance
2 | 


--------------------------------------------------------------------------------
/lib/utils/chamfer_distance/chamfer_distance.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | 
  3 | // CUDA forward declarations
  4 | int ChamferDistanceKernelLauncher(
  5 |     const int b, const int n,
  6 |     const float* xyz,
  7 |     const int m,
  8 |     const float* xyz2,
  9 |     float* result,
 10 |     int* result_i,
 11 |     float* result2,
 12 |     int* result2_i);
 13 | 
 14 | int ChamferDistanceGradKernelLauncher(
 15 |     const int b, const int n,
 16 |     const float* xyz1,
 17 |     const int m,
 18 |     const float* xyz2,
 19 |     const float* grad_dist1,
 20 |     const int* idx1,
 21 |     const float* grad_dist2,
 22 |     const int* idx2,
 23 |     float* grad_xyz1,
 24 |     float* grad_xyz2);
 25 | 
 26 | 
 27 | void chamfer_distance_forward_cuda(
 28 |     const at::Tensor xyz1, 
 29 |     const at::Tensor xyz2, 
 30 |     const at::Tensor dist1, 
 31 |     const at::Tensor dist2, 
 32 |     const at::Tensor idx1, 
 33 |     const at::Tensor idx2) 
 34 | {
 35 |     ChamferDistanceKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),
 36 |                                             xyz2.size(1), xyz2.data<float>(),
 37 |                                             dist1.data<float>(), idx1.data<int>(),
 38 |                                             dist2.data<float>(), idx2.data<int>());
 39 | }
 40 | 
 41 | void chamfer_distance_backward_cuda(
 42 |     const at::Tensor xyz1,
 43 |     const at::Tensor xyz2, 
 44 |     at::Tensor gradxyz1, 
 45 |     at::Tensor gradxyz2, 
 46 |     at::Tensor graddist1, 
 47 |     at::Tensor graddist2, 
 48 |     at::Tensor idx1, 
 49 |     at::Tensor idx2)
 50 | {
 51 |     ChamferDistanceGradKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),
 52 |                                            xyz2.size(1), xyz2.data<float>(),
 53 |                                            graddist1.data<float>(), idx1.data<int>(),
 54 |                                            graddist2.data<float>(), idx2.data<int>(),
 55 |                                            gradxyz1.data<float>(), gradxyz2.data<float>());
 56 | }
 57 | 
 58 | 
 59 | void nnsearch(
 60 |     const int b, const int n, const int m,
 61 |     const float* xyz1,
 62 |     const float* xyz2,
 63 |     float* dist,
 64 |     int* idx)
 65 | {
 66 |     for (int i = 0; i < b; i++) {
 67 |         for (int j = 0; j < n; j++) {
 68 |             const float x1 = xyz1[(i*n+j)*3+0];
 69 |             const float y1 = xyz1[(i*n+j)*3+1];
 70 |             const float z1 = xyz1[(i*n+j)*3+2];
 71 |             double best = 0;
 72 |             int besti = 0;
 73 |             for (int k = 0; k < m; k++) {
 74 |                 const float x2 = xyz2[(i*m+k)*3+0] - x1;
 75 |                 const float y2 = xyz2[(i*m+k)*3+1] - y1;
 76 |                 const float z2 = xyz2[(i*m+k)*3+2] - z1;
 77 |                 const double d=x2*x2+y2*y2+z2*z2;
 78 |                 if (k==0 || d < best){
 79 |                     best = d;
 80 |                     besti = k;
 81 |                 }
 82 |             }
 83 |             dist[i*n+j] = best;
 84 |             idx[i*n+j] = besti;
 85 |         }
 86 |     }
 87 | }
 88 | 
 89 | 
 90 | void chamfer_distance_forward(
 91 |     const at::Tensor xyz1, 
 92 |     const at::Tensor xyz2, 
 93 |     const at::Tensor dist1, 
 94 |     const at::Tensor dist2, 
 95 |     const at::Tensor idx1, 
 96 |     const at::Tensor idx2) 
 97 | {
 98 |     const int batchsize = xyz1.size(0);
 99 |     const int n = xyz1.size(1);
100 |     const int m = xyz2.size(1);
101 | 
102 |     const float* xyz1_data = xyz1.data<float>();
103 |     const float* xyz2_data = xyz2.data<float>();
104 |     float* dist1_data = dist1.data<float>();
105 |     float* dist2_data = dist2.data<float>();
106 |     int* idx1_data = idx1.data<int>();
107 |     int* idx2_data = idx2.data<int>();
108 | 
109 |     nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data);
110 |     nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data);
111 | }
112 | 
113 | 
114 | void chamfer_distance_backward(
115 |     const at::Tensor xyz1, 
116 |     const at::Tensor xyz2, 
117 |     at::Tensor gradxyz1, 
118 |     at::Tensor gradxyz2, 
119 |     at::Tensor graddist1, 
120 |     at::Tensor graddist2, 
121 |     at::Tensor idx1, 
122 |     at::Tensor idx2) 
123 | {
124 |     const int b = xyz1.size(0);
125 |     const int n = xyz1.size(1);
126 |     const int m = xyz2.size(1);
127 | 
128 |     const float* xyz1_data = xyz1.data<float>();
129 |     const float* xyz2_data = xyz2.data<float>();
130 |     float* gradxyz1_data = gradxyz1.data<float>();
131 |     float* gradxyz2_data = gradxyz2.data<float>();
132 |     float* graddist1_data = graddist1.data<float>();
133 |     float* graddist2_data = graddist2.data<float>();
134 |     const int* idx1_data = idx1.data<int>();
135 |     const int* idx2_data = idx2.data<int>();
136 | 
137 |     for (int i = 0; i < b*n*3; i++)
138 |         gradxyz1_data[i] = 0;
139 |     for (int i = 0; i < b*m*3; i++)
140 |         gradxyz2_data[i] = 0;
141 |     for (int i = 0;i < b; i++) {
142 |         for (int j = 0; j < n; j++) {
143 |             const float x1 = xyz1_data[(i*n+j)*3+0];
144 |             const float y1 = xyz1_data[(i*n+j)*3+1];
145 |             const float z1 = xyz1_data[(i*n+j)*3+2];
146 |             const int j2 = idx1_data[i*n+j];
147 | 
148 |             const float x2 = xyz2_data[(i*m+j2)*3+0];
149 |             const float y2 = xyz2_data[(i*m+j2)*3+1];
150 |             const float z2 = xyz2_data[(i*m+j2)*3+2];
151 |             const float g = graddist1_data[i*n+j]*2;
152 | 
153 |             gradxyz1_data[(i*n+j)*3+0] += g*(x1-x2);
154 |             gradxyz1_data[(i*n+j)*3+1] += g*(y1-y2);
155 |             gradxyz1_data[(i*n+j)*3+2] += g*(z1-z2);
156 |             gradxyz2_data[(i*m+j2)*3+0] -= (g*(x1-x2));
157 |             gradxyz2_data[(i*m+j2)*3+1] -= (g*(y1-y2));
158 |             gradxyz2_data[(i*m+j2)*3+2] -= (g*(z1-z2));
159 |         }
160 |         for (int j = 0; j < m; j++) {
161 |             const float x1 = xyz2_data[(i*m+j)*3+0];
162 |             const float y1 = xyz2_data[(i*m+j)*3+1];
163 |             const float z1 = xyz2_data[(i*m+j)*3+2];
164 |             const int j2 = idx2_data[i*m+j];
165 |             const float x2 = xyz1_data[(i*n+j2)*3+0];
166 |             const float y2 = xyz1_data[(i*n+j2)*3+1];
167 |             const float z2 = xyz1_data[(i*n+j2)*3+2];
168 |             const float g = graddist2_data[i*m+j]*2;
169 |             gradxyz2_data[(i*m+j)*3+0] += g*(x1-x2);
170 |             gradxyz2_data[(i*m+j)*3+1] += g*(y1-y2);
171 |             gradxyz2_data[(i*m+j)*3+2] += g*(z1-z2);
172 |             gradxyz1_data[(i*n+j2)*3+0] -= (g*(x1-x2));
173 |             gradxyz1_data[(i*n+j2)*3+1] -= (g*(y1-y2));
174 |             gradxyz1_data[(i*n+j2)*3+2] -= (g*(z1-z2));
175 |         }
176 |     }
177 | }
178 | 
179 | 
180 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
181 |     m.def("forward", &chamfer_distance_forward, "ChamferDistance forward");
182 |     m.def("forward_cuda", &chamfer_distance_forward_cuda, "ChamferDistance forward (CUDA)");
183 |     m.def("backward", &chamfer_distance_backward, "ChamferDistance backward");
184 |     m.def("backward_cuda", &chamfer_distance_backward_cuda, "ChamferDistance backward (CUDA)");
185 | }
186 | 


--------------------------------------------------------------------------------
/lib/utils/chamfer_distance/chamfer_distance.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | 
  6 | __global__ 
  7 | void ChamferDistanceKernel(
  8 | 	int b,
  9 | 	int n,
 10 | 	const float* xyz,
 11 | 	int m,
 12 | 	const float* xyz2,
 13 | 	float* result,
 14 | 	int* result_i)
 15 | {
 16 | 	const int batch=512;
 17 | 	__shared__ float buf[batch*3];
 18 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 19 | 		for (int k2=0;k2<m;k2+=batch){
 20 | 			int end_k=min(m,k2+batch)-k2;
 21 | 			for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
 22 | 				buf[j]=xyz2[(i*m+k2)*3+j];
 23 | 			}
 24 | 			__syncthreads();
 25 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 26 | 				float x1=xyz[(i*n+j)*3+0];
 27 | 				float y1=xyz[(i*n+j)*3+1];
 28 | 				float z1=xyz[(i*n+j)*3+2];
 29 | 				int best_i=0;
 30 | 				float best=0;
 31 | 				int end_ka=end_k-(end_k&3);
 32 | 				if (end_ka==batch){
 33 | 					for (int k=0;k<batch;k+=4){
 34 | 						{
 35 | 							float x2=buf[k*3+0]-x1;
 36 | 							float y2=buf[k*3+1]-y1;
 37 | 							float z2=buf[k*3+2]-z1;
 38 | 							float d=x2*x2+y2*y2+z2*z2;
 39 | 							if (k==0 || d<best){
 40 | 								best=d;
 41 | 								best_i=k+k2;
 42 | 							}
 43 | 						}
 44 | 						{
 45 | 							float x2=buf[k*3+3]-x1;
 46 | 							float y2=buf[k*3+4]-y1;
 47 | 							float z2=buf[k*3+5]-z1;
 48 | 							float d=x2*x2+y2*y2+z2*z2;
 49 | 							if (d<best){
 50 | 								best=d;
 51 | 								best_i=k+k2+1;
 52 | 							}
 53 | 						}
 54 | 						{
 55 | 							float x2=buf[k*3+6]-x1;
 56 | 							float y2=buf[k*3+7]-y1;
 57 | 							float z2=buf[k*3+8]-z1;
 58 | 							float d=x2*x2+y2*y2+z2*z2;
 59 | 							if (d<best){
 60 | 								best=d;
 61 | 								best_i=k+k2+2;
 62 | 							}
 63 | 						}
 64 | 						{
 65 | 							float x2=buf[k*3+9]-x1;
 66 | 							float y2=buf[k*3+10]-y1;
 67 | 							float z2=buf[k*3+11]-z1;
 68 | 							float d=x2*x2+y2*y2+z2*z2;
 69 | 							if (d<best){
 70 | 								best=d;
 71 | 								best_i=k+k2+3;
 72 | 							}
 73 | 						}
 74 | 					}
 75 | 				}else{
 76 | 					for (int k=0;k<end_ka;k+=4){
 77 | 						{
 78 | 							float x2=buf[k*3+0]-x1;
 79 | 							float y2=buf[k*3+1]-y1;
 80 | 							float z2=buf[k*3+2]-z1;
 81 | 							float d=x2*x2+y2*y2+z2*z2;
 82 | 							if (k==0 || d<best){
 83 | 								best=d;
 84 | 								best_i=k+k2;
 85 | 							}
 86 | 						}
 87 | 						{
 88 | 							float x2=buf[k*3+3]-x1;
 89 | 							float y2=buf[k*3+4]-y1;
 90 | 							float z2=buf[k*3+5]-z1;
 91 | 							float d=x2*x2+y2*y2+z2*z2;
 92 | 							if (d<best){
 93 | 								best=d;
 94 | 								best_i=k+k2+1;
 95 | 							}
 96 | 						}
 97 | 						{
 98 | 							float x2=buf[k*3+6]-x1;
 99 | 							float y2=buf[k*3+7]-y1;
100 | 							float z2=buf[k*3+8]-z1;
101 | 							float d=x2*x2+y2*y2+z2*z2;
102 | 							if (d<best){
103 | 								best=d;
104 | 								best_i=k+k2+2;
105 | 							}
106 | 						}
107 | 						{
108 | 							float x2=buf[k*3+9]-x1;
109 | 							float y2=buf[k*3+10]-y1;
110 | 							float z2=buf[k*3+11]-z1;
111 | 							float d=x2*x2+y2*y2+z2*z2;
112 | 							if (d<best){
113 | 								best=d;
114 | 								best_i=k+k2+3;
115 | 							}
116 | 						}
117 | 					}
118 | 				}
119 | 				for (int k=end_ka;k<end_k;k++){
120 | 					float x2=buf[k*3+0]-x1;
121 | 					float y2=buf[k*3+1]-y1;
122 | 					float z2=buf[k*3+2]-z1;
123 | 					float d=x2*x2+y2*y2+z2*z2;
124 | 					if (k==0 || d<best){
125 | 						best=d;
126 | 						best_i=k+k2;
127 | 					}
128 | 				}
129 | 				if (k2==0 || result[(i*n+j)]>best){
130 | 					result[(i*n+j)]=best;
131 | 					result_i[(i*n+j)]=best_i;
132 | 				}
133 | 			}
134 | 			__syncthreads();
135 | 		}
136 | 	}
137 | }
138 | 
139 | void ChamferDistanceKernelLauncher(
140 |     const int b, const int n,
141 |     const float* xyz,
142 |     const int m,
143 |     const float* xyz2,
144 |     float* result,
145 |     int* result_i,
146 |     float* result2,
147 |     int* result2_i)
148 | {
149 | 	ChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, n, xyz, m, xyz2, result, result_i);
150 | 	ChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, m, xyz2, n, xyz, result2, result2_i);
151 | 
152 | 	cudaError_t err = cudaGetLastError();
153 | 	if (err != cudaSuccess)
154 | 	    printf("error in chamfer distance updateOutput: %s\n", cudaGetErrorString(err));
155 | }
156 | 
157 | 
158 | __global__ 
159 | void ChamferDistanceGradKernel(
160 | 	int b, int n,
161 | 	const float* xyz1,
162 | 	int m,
163 | 	const float* xyz2,
164 | 	const float* grad_dist1,
165 | 	const int* idx1,
166 | 	float* grad_xyz1,
167 | 	float* grad_xyz2)
168 | {
169 | 	for (int i = blockIdx.x; i<b; i += gridDim.x) {
170 | 		for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x*gridDim.y) {
171 | 			float x1=xyz1[(i*n+j)*3+0];
172 | 			float y1=xyz1[(i*n+j)*3+1];
173 | 			float z1=xyz1[(i*n+j)*3+2];
174 | 			int j2=idx1[i*n+j];
175 | 			float x2=xyz2[(i*m+j2)*3+0];
176 | 			float y2=xyz2[(i*m+j2)*3+1];
177 | 			float z2=xyz2[(i*m+j2)*3+2];
178 | 			float g=grad_dist1[i*n+j]*2;
179 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
180 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
181 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
182 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
183 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
184 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
185 | 		}
186 | 	}
187 | }
188 | 
189 | void ChamferDistanceGradKernelLauncher(
190 |     const int b, const int n,
191 |     const float* xyz1,
192 |     const int m,
193 |     const float* xyz2,
194 |     const float* grad_dist1,
195 |     const int* idx1,
196 |     const float* grad_dist2,
197 |     const int* idx2,
198 |     float* grad_xyz1,
199 |     float* grad_xyz2)
200 | {
201 | 	cudaMemset(grad_xyz1, 0, b*n*3*4);
202 | 	cudaMemset(grad_xyz2, 0, b*m*3*4);
203 | 	ChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, n, xyz1, m, xyz2, grad_dist1, idx1, grad_xyz1, grad_xyz2);
204 | 	ChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, m, xyz2, n, xyz1, grad_dist2, idx2, grad_xyz2, grad_xyz1);
205 | 
206 | 	cudaError_t err = cudaGetLastError();
207 |   	if (err != cudaSuccess)
208 | 	    printf("error in chamfer distance get grad: %s\n", cudaGetErrorString(err));
209 | }
210 | 


--------------------------------------------------------------------------------
/lib/utils/chamfer_distance/chamfer_distance.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | 
 4 | from torch.utils.cpp_extension import load
 5 | cd = load(name="cd",
 6 |           sources=["../../lib/utils/chamfer_distance/chamfer_distance.cpp",
 7 |                    "../../lib/utils/chamfer_distance/chamfer_distance.cu"])
 8 | 
 9 | class ChamferDistanceFunction(torch.autograd.Function):
10 |     @staticmethod
11 |     def forward(ctx, xyz1, xyz2):
12 |         batchsize, n, _ = xyz1.size()
13 |         _, m, _ = xyz2.size()
14 |         xyz1 = xyz1.contiguous()
15 |         xyz2 = xyz2.contiguous()
16 |         dist1 = torch.zeros(batchsize, n)
17 |         dist2 = torch.zeros(batchsize, m)
18 | 
19 |         idx1 = torch.zeros(batchsize, n, dtype=torch.int)
20 |         idx2 = torch.zeros(batchsize, m, dtype=torch.int)
21 | 
22 |         if not xyz1.is_cuda:
23 |             cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
24 |         else:
25 |             dist1 = dist1.cuda()
26 |             dist2 = dist2.cuda()
27 |             idx1 = idx1.cuda()
28 |             idx2 = idx2.cuda()
29 |             cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2)
30 | 
31 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
32 | 
33 |         return dist1, dist2
34 | 
35 |     @staticmethod
36 |     def backward(ctx, graddist1, graddist2):
37 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
38 | 
39 |         graddist1 = graddist1.contiguous()
40 |         graddist2 = graddist2.contiguous()
41 | 
42 |         gradxyz1 = torch.zeros(xyz1.size())
43 |         gradxyz2 = torch.zeros(xyz2.size())
44 | 
45 |         if not graddist1.is_cuda:
46 |             cd.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)
47 |         else:
48 |             gradxyz1 = gradxyz1.cuda()
49 |             gradxyz2 = gradxyz2.cuda()
50 |             cd.backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)
51 | 
52 |         return gradxyz1, gradxyz2
53 | 
54 | 
55 | class ChamferDistance(torch.nn.Module):
56 |     def forward(self, xyz1, xyz2):
57 |         return ChamferDistanceFunction.apply(xyz1, xyz2)
58 | 


--------------------------------------------------------------------------------
/lib/utils/collections.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """A simple attribute dictionary used for representing configuration options."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | 
24 | class AttrDict(dict):
25 | 
26 |     IMMUTABLE = '__immutable__'
27 | 
28 |     def __init__(self, *args, **kwargs):
29 |         super(AttrDict, self).__init__(*args, **kwargs)
30 |         self.__dict__[AttrDict.IMMUTABLE] = False
31 | 
32 |     def __getattr__(self, name):
33 |         if name in self.__dict__:
34 |             return self.__dict__[name]
35 |         elif name in self:
36 |             return self[name]
37 |         else:
38 |             raise AttributeError(name)
39 | 
40 |     def __setattr__(self, name, value):
41 |         if not self.__dict__[AttrDict.IMMUTABLE]:
42 |             if name in self.__dict__:
43 |                 self.__dict__[name] = value
44 |             else:
45 |                 self[name] = value
46 |         else:
47 |             raise AttributeError(
48 |                 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 |                 format(name, value)
50 |             )
51 | 
52 |     def immutable(self, is_immutable):
53 |         """Set immutability to is_immutable and recursively apply the setting
54 |         to all nested AttrDicts.
55 |         """
56 |         self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 |         # Recursively set immutable state
58 |         for v in self.__dict__.values():
59 |             if isinstance(v, AttrDict):
60 |                 v.immutable(is_immutable)
61 |         for v in self.values():
62 |             if isinstance(v, AttrDict):
63 |                 v.immutable(is_immutable)
64 | 
65 |     def is_immutable(self):
66 |         return self.__dict__[AttrDict.IMMUTABLE]
67 | 


--------------------------------------------------------------------------------
/lib/utils/evaluate_depth_error.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch
  3 | import numpy as np
  4 | logger = logging.getLogger(__name__)
  5 | 
  6 | 
  7 | def validate_err(pred, gt, smoothed_criteria, mask=None, scale=10.):
  8 |     if type(pred).__module__ == torch.__name__:
  9 |         pred = pred.cpu().numpy()
 10 |     if type(gt).__module__ == torch.__name__:
 11 |         gt = gt.cpu().numpy()
 12 |     gt = np.squeeze(gt)
 13 |     pred = np.squeeze(pred)
 14 |     if mask is not None:
 15 |         gt = gt[mask[0]:mask[1], mask[2]:mask[3]]
 16 |         pred = pred[mask[0]:mask[1], mask[2]:mask[3]]
 17 |     if pred.shape != gt.shape:
 18 |         logger.info('The shapes of dt and gt are not same!')
 19 |         return -1
 20 |     mask2 = gt > 0
 21 |     gt = gt[mask2]
 22 |     pred = pred[mask2]
 23 |     n_pxl = gt.size
 24 |     gt_scale = gt * scale
 25 |     pred_scale = pred * scale
 26 | 
 27 |     # Mean Absolute Relative Error
 28 |     rel = np.abs(gt_scale - pred_scale) / gt_scale  # compute errors
 29 |     abs_rel_sum = np.sum(rel)
 30 |     smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl)
 31 |     return smoothed_criteria
 32 | 
 33 | 
 34 | def validate_err_kitti(pred, gt, smoothed_criteria, mask=None, scale=256.*80.):
 35 |     if type(pred).__module__ == torch.__name__:
 36 |         pred = pred.cpu().numpy()
 37 |     if type(gt).__module__ == torch.__name__:
 38 |         gt = gt.cpu().numpy()
 39 |     gt = np.squeeze(gt)
 40 |     pred = np.squeeze(pred)
 41 |     if mask is not None:
 42 |         gt = gt[mask[0]:mask[1], mask[2]:mask[3]]
 43 |         pred = pred[mask[0]:mask[1], mask[2]:mask[3]]
 44 |     if pred.shape != gt.shape:
 45 |         logger.info('The shapes of dt and gt are not same!')
 46 |         return -1
 47 | 
 48 |     mask2 = gt > 0
 49 |     gt = gt[mask2]
 50 |     pred = pred[mask2]
 51 |     n_pxl = gt.size
 52 |     gt_scale = gt * scale
 53 |     pred_scale = pred * scale
 54 | 
 55 |     # Mean Absolute Relative Error
 56 |     rel = np.abs(gt_scale - pred_scale) / gt_scale  # compute errors
 57 |     abs_rel_sum = np.sum(rel)
 58 |     smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl)
 59 | 
 60 |     # Scale invariant error, silog is an evaluation metric of KITTI benchmark
 61 |     diff_log = np.log(pred_scale) - np.log(gt_scale)
 62 |     diff_log_sum = np.sum(diff_log)
 63 |     smoothed_criteria['err_silog'].AddValue(np.float64(diff_log_sum), n_pxl)
 64 |     diff_log_2 = diff_log ** 2
 65 |     diff_log_2_sum = np.sum(diff_log_2)
 66 |     smoothed_criteria['err_silog2'].AddValue(np.float64(diff_log_2_sum), n_pxl)
 67 |     return smoothed_criteria
 68 | 
 69 | 
 70 | def evaluate_err(pred, gt, smoothed_criteria, mask = None, scale=10.0 ):
 71 |     if type(pred).__module__ != np.__name__:
 72 |         pred = pred.cpu().numpy()
 73 |     if type(gt).__module__ != np.__name__:
 74 |         gt = gt.cpu().numpy()
 75 | 
 76 |     pred = np.squeeze(pred)
 77 |     gt = np.squeeze(gt)
 78 |     if mask is not None:
 79 |         gt = gt[mask[0]:mask[1], mask[2]:mask[3]]
 80 |         pred = pred[mask[0]:mask[1], mask[2]:mask[3]]
 81 |     if pred.shape != gt.shape:
 82 |         logger.info('The shapes of dt and gt are not same!')
 83 |         return -1
 84 | 
 85 |     mask2 = gt > 0
 86 |     gt = gt[mask2]
 87 |     pred = pred[mask2]
 88 |     n_pxl = gt.size
 89 |     gt_scale = gt * scale
 90 |     pred_scale = pred * scale
 91 | 
 92 |     #Mean Absolute Relative Error
 93 |     rel = np.abs(gt - pred) / gt# compute errors
 94 |     abs_rel_sum = np.sum(rel)
 95 |     smoothed_criteria['err_absRel'].AddValue(np.float64(abs_rel_sum), n_pxl)
 96 | 
 97 |     #Square Mean Relative Error
 98 |     s_rel = ((gt_scale - pred_scale) * (gt_scale - pred_scale)) / (gt_scale * gt_scale)# compute errors
 99 |     squa_rel_sum = np.sum(s_rel)
100 |     smoothed_criteria['err_squaRel'].AddValue(np.float64(squa_rel_sum), n_pxl)
101 | 
102 |     #Root Mean Square error
103 |     square = (gt_scale - pred_scale) ** 2
104 |     rms_squa_sum = np.sum(square)
105 |     smoothed_criteria['err_rms'].AddValue(np.float64(rms_squa_sum), n_pxl)
106 | 
107 |     #Log Root Mean Square error
108 |     log_square = (np.log(gt_scale) - np.log(pred_scale)) **2
109 |     log_rms_sum = np.sum(log_square)
110 |     smoothed_criteria['err_logRms'].AddValue(np.float64(log_rms_sum), n_pxl)
111 | 
112 |     # Scale invariant error
113 |     diff_log = np.log(pred_scale) - np.log(gt_scale)
114 |     diff_log_sum = np.sum(diff_log)
115 |     smoothed_criteria['err_silog'].AddValue(np.float64(diff_log_sum), n_pxl)
116 |     diff_log_2 = diff_log ** 2
117 |     diff_log_2_sum = np.sum(diff_log_2)
118 |     smoothed_criteria['err_silog2'].AddValue(np.float64(diff_log_2_sum), n_pxl)
119 | 
120 |     # Mean log10 error
121 |     log10_sum = np.sum(np.abs(np.log10(gt) - np.log10(pred)))
122 |     smoothed_criteria['err_log10'].AddValue(np.float64(log10_sum), n_pxl)
123 | 
124 |     #Delta
125 |     gt_pred = gt_scale / pred_scale
126 |     pred_gt = pred_scale / gt_scale
127 |     gt_pred = np.reshape(gt_pred, (1, -1))
128 |     pred_gt = np.reshape(pred_gt, (1, -1))
129 |     gt_pred_gt = np.concatenate((gt_pred, pred_gt), axis=0)
130 |     ratio_max = np.amax(gt_pred_gt, axis=0)
131 | 
132 |     delta_1_sum = np.sum(ratio_max < 1.25)
133 |     smoothed_criteria['err_delta1'].AddValue(np.float64(delta_1_sum), n_pxl)
134 |     delta_2_sum = np.sum(ratio_max < 1.25**2)
135 |     smoothed_criteria['err_delta2'].AddValue(np.float64(delta_2_sum), n_pxl)
136 |     delta_3_sum = np.sum(ratio_max < 1.25**3)
137 |     smoothed_criteria['err_delta3'].AddValue(np.float64(delta_3_sum), n_pxl)
138 | 
139 |     return smoothed_criteria
140 | 
141 | 


--------------------------------------------------------------------------------
/lib/utils/logging.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | """Utilities for logging."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | from collections import deque
24 | import logging
25 | import numpy as np
26 | import sys
27 | from lib.core.config import cfg
28 | def log_stats(stats, args):
29 |     """Log training statistics to terminal"""
30 |     lines = "[Step %d/%d] [Epoch %d/%d]  [%s]\n" % (
31 |             stats['iter'], cfg.TRAIN.MAX_ITER, stats['epoch'], args.epoch[-1], args.dataset)
32 | 
33 |     lines += "\t\tloss: %.3f,    time: %.6f,    eta: %s\n" % (
34 |         stats['total_loss'], stats['time'], stats['eta'] )
35 | 
36 |     for k in stats:
37 |         if 'loss' in k and 'total_loss' not in k:
38 |             lines += "\t\t" + ", ".join("%s: %.3f" % (k, v) for k, v in stats[k].items()) + ", "
39 | 
40 |     # validate criteria
41 |     lines += "\t\t" + ",       ".join("%s: %.6f" % (k, v) for k, v in stats['val_err'].items()) + ", "
42 |     lines += '\n'
43 | 
44 |     # lr in different groups
45 |     lines += "\t\t" + ",       ".join("%s: %.6f" % (k, v) for k, v in stats['lr'].items()) + ", "
46 |     lines += '\n'
47 |     print(lines[:-1])  # remove last new line
48 | 
49 | class SmoothedValue(object):
50 |     """Track a series of values and provide access to smoothed values over a
51 |     window or the global series average.
52 |     """
53 |     def __init__(self, window_size):
54 |         self.deque = deque(maxlen=window_size)
55 |         self.series = []
56 |         self.total = 0.0
57 |         self.count = 0
58 | 
59 |     def AddValue(self, value, size=1):
60 |         self.deque.append(value)
61 |         self.series.append(value)
62 |         self.count += size
63 |         self.total += value
64 | 
65 |     def GetMedianValue(self):
66 |         return np.median(self.deque)
67 | 
68 |     def GetAverageValue(self):
69 |         return np.mean(self.deque)
70 | 
71 |     def GetGlobalAverageValue(self):
72 |         return self.total / self.count
73 | 
74 | 
75 | def setup_logging(name):
76 |     FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
77 |     # Manually clear root loggers to prevent any module that may have called
78 |     # logging.basicConfig() from blocking our logging setup
79 |     logging.root.handlers = []
80 |     logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
81 |     logger = logging.getLogger(name)
82 |     return logger
83 | 
84 | 


--------------------------------------------------------------------------------
/lib/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import socket
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | def get_run_name():
 7 |     """ A unique name for each run """
 8 |     return datetime.now().strftime(
 9 |         '%b%d-%H-%M-%S') + '_' + socket.gethostname()
10 | 
11 | 
12 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm']
13 | 
14 | 
15 | def is_image_file(filename):
16 |     """Checks if a file is an image.
17 |       Args:
18 |           filename (string): path to a file
19 |       Returns:
20 |           bool: True if the filename ends with a known image extension
21 |     """
22 |     filename_lower = filename.lower()
23 |     return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)
24 | 
25 | 
26 | def get_imagelist_from_dir(dirpath):
27 |     images = []
28 |     for f in os.listdir(dirpath):
29 |         if is_image_file(f):
30 |             images.append(os.path.join(dirpath, f))
31 |     return images
32 | 


--------------------------------------------------------------------------------
/lib/utils/mobilenetv2_weight_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from lib.core.config import cfg
 4 | import numpy as np
 5 | import logging
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | def load_pretrained_imagenet_resnext_weights(model):
 9 |     """Load pretrained weights
10 |     Args:
11 |         num_layers: 50 for res50 and so on.
12 |         model: the generalized rcnnn module
13 |     """
14 |     model_state_dict = model.state_dict()
15 |     weights_file = os.path.join(cfg.ROOT_DIR, cfg.MODEL.MODEL_REPOSITORY, 'MobileNetV2_ImageNet', cfg.MODEL.PRETRAINED_WEIGHTS)
16 |     pretrianed_state_dict = convert_state_dict(torch.load(weights_file), model_state_dict)
17 | 
18 |     for k, v in pretrianed_state_dict.items():
19 |         if k in model_state_dict.keys():
20 |             model_state_dict[k].copy_(pretrianed_state_dict[k])
21 |         else:
22 |             logger.info('Weight %s is not in MobileNetV2 model.' % k)
23 |     logger.info('Pretrained MobileNetV2 weight has been loaded')
24 | 
25 | def convert_state_dict(src_dict, model_dict):
26 |     """Return the correct mapping of tensor name and value
27 | 
28 |     Mapping from the names of torchvision model to our resnet conv_body and box_head.
29 |     """
30 |     dst_dict = {}
31 |     res_block_n = np.array([1, 4, 7, 14, 18])
32 |     for k, v in src_dict.items():
33 |         toks = k.split('.')
34 |         id_n = int(toks[1])
35 |         if id_n < 18 and '17.conv.7' not in k and 'classifier' not in k:
36 |             res_n = np.where(res_block_n > id_n)[0][0] + 1
37 |             n = res_n - 2 if res_n >= 2 else 0
38 |             res_n_m = 0 if id_n - res_block_n[n] < 0 else id_n - res_block_n[n]
39 |             toks[0] = 'res%s' % res_n
40 |             toks[1] = '%s' % res_n_m
41 |             name = '.'.join(toks)
42 |             dst_dict[name] = v
43 |     return dst_dict
44 | 


--------------------------------------------------------------------------------
/lib/utils/net_tools.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import torch
  3 | import os
  4 | from lib.core.config import cfg
  5 | import torch.nn as nn
  6 | from lib.utils.logging import setup_logging
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | import dill
 10 | logger = setup_logging(__name__)
 11 | import cv2
 12 | 
 13 | 
 14 | def get_func(func_name):
 15 |     """Helper to return a function object by name. func_name must identify a
 16 |     function in this module or the path to a function relative to the base
 17 |     'modeling' module.
 18 |     """
 19 |     if func_name == '':
 20 |         return None
 21 |     try:
 22 |         parts = func_name.split('.')
 23 |         # Refers to a function in this module
 24 |         if len(parts) == 1:
 25 |             return globals()[parts[0]]
 26 |         # Otherwise, assume we're referencing a module under modeling
 27 |         module_name = 'lib.models.' + '.'.join(parts[:-1])
 28 |         module = importlib.import_module(module_name)
 29 |         return getattr(module, parts[-1])
 30 |     except Exception:
 31 |         logger.error('Failed to f1ind function: %s', func_name)
 32 |         raise
 33 | 
 34 | def load_ckpt(args, model, optimizer=None, scheduler=None, val_err=[]):
 35 |     """
 36 |     Load checkpoint.
 37 |     """
 38 |     if os.path.isfile(args.load_ckpt):
 39 |         logger.info("loading checkpoint %s", args.load_ckpt)
 40 |         checkpoint = torch.load(args.load_ckpt, map_location=lambda storage, loc: storage, pickle_module=dill)
 41 |         model_dict = model.state_dict()
 42 |         #model_dict = {k: v for k,v in model_dict if k in checkpoint.items()}
 43 |         model_dict.update(checkpoint['model_state_dict'])
 44 |         
 45 |         model.load_state_dict(model_dict)
 46 |         if args.resume:
 47 |             args.batchsize = checkpoint['batch_size']
 48 |             args.start_step = checkpoint['step']
 49 |             args.start_epoch = checkpoint['epoch']
 50 |             optimizer.load_state_dict(checkpoint['optimizer'])
 51 |             scheduler.load_state_dict(checkpoint['scheduler'])
 52 |             if 'val_err' in checkpoint:  # For backward compatibility
 53 |                 val_err[0] = checkpoint['val_err']
 54 |         del checkpoint
 55 |         torch.cuda.empty_cache()
 56 | 
 57 | 
 58 | def save_ckpt(args, step, epoch, model, optimizer, scheduler, val_err={}):
 59 |     """Save checkpoint"""
 60 |     ckpt_dir = os.path.join(cfg.TRAIN.LOG_DIR, 'ckpt')
 61 |     if not os.path.exists(ckpt_dir):
 62 |         os.makedirs(ckpt_dir)
 63 |     save_name = os.path.join(ckpt_dir, 'epoch%d_step%d.pth' %(epoch, step))
 64 |     if isinstance(model, nn.DataParallel):
 65 |         model = model.module
 66 |     torch.save({
 67 |         'step': step,
 68 |         'epoch': epoch,
 69 |         'batch_size': args.batchsize,
 70 |         'scheduler': scheduler.state_dict(),
 71 |         'val_err': val_err,
 72 |         'model_state_dict': model.state_dict(),
 73 |         'optimizer': optimizer.state_dict()},
 74 |         save_name, pickle_module=dill)
 75 |     logger.info('save model: %s', save_name)
 76 | 
 77 | 
 78 | # save image to the disk
 79 | def save_images(data, pred, scale=60000.):
 80 |     rgb = data['A_raw']
 81 |     gt = data['B_raw']
 82 |     if type(rgb).__module__ != np.__name__:
 83 |         rgb = rgb.cpu().numpy()
 84 |         rgb = np.squeeze(rgb)
 85 |         rgb = rgb[:, :, ::-1]
 86 |     if type(gt).__module__ != np.__name__:
 87 |         gt = gt.cpu().numpy()
 88 |         gt = np.squeeze(gt)
 89 |     if type(pred).__module__ != np.__name__:
 90 |         pred = pred.cpu().numpy()
 91 |         pred = np.squeeze(pred)
 92 |     model_name = (cfg.DATA.LOAD_MODEL_NAME.split('/')[-1]).split('.')[0]
 93 |     image_dir = os.path.join(cfg.TRAIN.OUTPUT_ROOT_DIR, '../evaluation', model_name)
 94 |     if not os.path.exists(image_dir):
 95 |         os.makedirs(image_dir)
 96 | 
 97 | 
 98 |     if 'kitti' in cfg.DATASET:
 99 |         name = data['A_paths'][0].split('/')[-4] + '-' + data['A_paths'][0].split('/')[-1].split('.')[0]
100 |     else:
101 |         name = data['A_paths'][0].split('/')[-1].split('.')[0]
102 |     rgb_name = '%s_%s.png' % (name, 'rgb')
103 |     gt_name = '%s_%s.png' % (name, 'gt')
104 |     gt_raw_name = '%s_%s.png' % (name, 'gt-raw')
105 |     pred_name = '%s_%s.png' % (name, 'pred')
106 |     pred_raw_name = '%s_%s.png' % (name, 'pred-raw')
107 | 
108 |     plt.imsave(os.path.join(image_dir, rgb_name), rgb)
109 |     if len(data['B_raw'].shape) != 2:
110 |         plt.imsave(os.path.join(image_dir, gt_name), gt, cmap='rainbow')
111 |         gt_scale = gt * scale
112 |         gt_scale = gt_scale.astype('uint16')
113 |         cv2.imwrite(os.path.join(image_dir, gt_raw_name), gt_scale)
114 |     plt.imsave(os.path.join(image_dir, pred_name), pred, cmap='rainbow')
115 |     pred_raw = pred * scale
116 |     pred_raw = pred_raw.astype('uint16')
117 |     cv2.imwrite(os.path.join(image_dir, pred_raw_name), pred_raw)
118 | 
119 | 


--------------------------------------------------------------------------------
/lib/utils/obj_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from lib.utils.bounding_box import BoxList
  4 | 
  5 | class ObjectLabel:
  6 |     """Object Label Class
  7 |     1    type         Describes the type of object: 'Car', 'Van', 'Truck',
  8 |                       'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
  9 |                       'Misc' or 'DontCare'
 10 | 
 11 |     1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
 12 |                       truncated refers to the object leaving image boundaries
 13 | 
 14 |     1    occluded     Integer (0,1,2,3) indicating occlusion state:
 15 |                       0 = fully visible, 1 = partly occluded
 16 |                       2 = largely occluded, 3 = unknown
 17 | 
 18 |     1    alpha        Observation angle of object, ranging [-pi..pi]
 19 | 
 20 |     4    bbox         2D bounding box of object in the image (0-based index):
 21 |                       contains left, top, right, bottom pixel coordinates
 22 | 
 23 |     3    dimensions   3D object dimensions: height, width, length (in meters)
 24 | 
 25 |     3    location     3D object location x,y,z in camera coordinates (in meters)
 26 | 
 27 |     1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
 28 | 
 29 |     1    score        Only for results: Float, indicating confidence in
 30 |                       detection, needed for p/r curves, higher is better.
 31 |     """
 32 | 
 33 |     def __init__(self):
 34 |         self.type = ""  # Type of object
 35 |         self.truncation = 0.
 36 |         self.occlusion = 0.
 37 |         self.alpha = 0.
 38 |         self.x1 = 0.
 39 |         self.y1 = 0.
 40 |         self.x2 = 0.
 41 |         self.y2 = 0.
 42 |         self.h = 0.
 43 |         self.w = 0.
 44 |         self.l = 0.
 45 |         self.t = (0., 0., 0.)
 46 |         self.ry = 0.
 47 |         self.score = 0.
 48 | 
 49 |     def __eq__(self, other):
 50 |         """Compares the given object to the current ObjectLabel instance.
 51 | 
 52 |         :param other: object to compare to this instance against
 53 |         :return: True, if other and current instance is the same
 54 |         """
 55 |         if not isinstance(other, ObjectLabel):
 56 |             return False
 57 | 
 58 |         if self.__dict__ != other.__dict__:
 59 |             return False
 60 |         else:
 61 |             return True
 62 | 
 63 | 
 64 | def read_labels(label_path, results=False, ignore_cate=None):
 65 |     """Reads in label data file from Kitti Dataset.
 66 | 
 67 |     Returns:
 68 |     obj_list -- List of instances of class ObjectLabel.
 69 | 
 70 |     Keyword arguments:
 71 |     label_dir -- directory of the label files
 72 |     img_idx -- index of the image
 73 |     """
 74 | 
 75 |     # Define the object list
 76 |     obj_list = []
 77 | 
 78 |     # Extract the list
 79 |     if os.stat(label_path).st_size == 0:
 80 |         return
 81 | 
 82 |     if results:
 83 |         p = np.loadtxt(label_path, delimiter=' ',
 84 |                        dtype=str,
 85 |                        usecols=np.arange(start=0, step=1, stop=16))
 86 |     else:
 87 |         p = np.loadtxt(label_path, delimiter=' ',
 88 |                        dtype=str,
 89 |                        usecols=np.arange(start=0, step=1, stop=15))
 90 | 
 91 |     # Check if the output is single dimensional or multi dimensional
 92 |     if len(p.shape) > 1:
 93 |         label_num = p.shape[0]
 94 |     else:
 95 |         label_num = 1
 96 | 
 97 |     for idx in np.arange(label_num):
 98 |         obj = ObjectLabel()
 99 | 
100 |         if label_num > 1:
101 |             # Fill in the object list
102 |             obj.type = p[idx, 0]
103 |             obj.truncation = float(p[idx, 1])
104 |             obj.occlusion = float(p[idx, 2])
105 |             obj.alpha = float(p[idx, 3])
106 |             obj.x1 = float(p[idx, 4])
107 |             obj.y1 = float(p[idx, 5])
108 |             obj.x2 = float(p[idx, 6])
109 |             obj.y2 = float(p[idx, 7])
110 |             obj.h = float(p[idx, 8])
111 |             obj.w = float(p[idx, 9])
112 |             obj.l = float(p[idx, 10])
113 |             obj.t = (float(p[idx, 11]), float(p[idx, 12]), float(p[idx, 13]))
114 |             obj.ry = float(p[idx, 14])
115 |             if results:
116 |                 obj.score = float(p[idx, 15])
117 |             else:
118 |                 obj.score = 0.0
119 |         else:
120 |             # Fill in the object list
121 |             obj.type = p[0]
122 |             obj.truncation = float(p[1])
123 |             obj.occlusion = float(p[2])
124 |             obj.alpha = float(p[3])
125 |             obj.x1 = float(p[4])
126 |             obj.y1 = float(p[5])
127 |             obj.x2 = float(p[6])
128 |             obj.y2 = float(p[7])
129 |             obj.h = float(p[8])
130 |             obj.w = float(p[9])
131 |             obj.l = float(p[10])
132 |             obj.t = (float(p[11]), float(p[12]), float(p[13]))
133 |             obj.ry = float(p[14])
134 |             if results:
135 |                 obj.score = float(p[15])
136 |             else:
137 |                 obj.score = 0.0
138 | 
139 |         if ignore_cate is not None and obj.type in ignore_cate:
140 |             continue
141 |         else:
142 |             obj_list.append(obj)
143 | 
144 |     return obj_list
145 | 
146 | 
147 | def rois2mask(rois, shape):
148 |     mask = np.zeros(shape)
149 |     for roi in rois:
150 |         mask[int(roi.y1):int(roi.y2)+1, int(roi.x1):int(roi.x2)+1] = 1.
151 | 
152 |     return mask
153 | 
154 | 
155 | def rois2mask_shrink(rois, shape):
156 |     mask = np.zeros(shape)
157 |     for roi in rois:
158 |         h = int(roi.y2) - int(roi.y1) + 1
159 |         w = int(roi.x2) - int(roi.x1) + 1
160 |         
161 |         mask[int(roi.y1)+int(h/4.):int(roi.y2)+1-int(h/4.), int(roi.x1)+int(w/4):int(roi.x2)+1-int(w/4)] = 1.
162 | 
163 |     return mask
164 | 
165 | 
166 | def rois2mask_ins(rois, shape):
167 |     np.random.shuffle(rois)
168 |     mask = np.zeros(shape)
169 |     for idx, roi in enumerate(rois):
170 |         mask[int(roi.y1):int(roi.y2)+1, int(roi.x1):int(roi.x2)+1] = 1. + idx
171 | 
172 |     return mask
173 |         
174 | 
175 | def rois2boxlist(rois, image_size, mode="xyxy"):
176 |     boxes = []
177 |     np.random.shuffle(rois)
178 |     for roi in rois:
179 |         xyxy = [int(roi.x1), int(roi.y1), int(roi.x2), int(roi.y2)]
180 |         boxes.append(xyxy)
181 | 
182 |     boxlist = BoxList(boxes, image_size, mode=mode)
183 |     return boxlist
184 |     
185 | 


--------------------------------------------------------------------------------
/lib/utils/resnext_weights_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from lib.core.config import cfg
 4 | import logging
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | def load_pretrained_imagenet_resnext_weights(model):
 8 |     """Load pretrained weights
 9 |     Args:
10 |         num_layers: 50 for res50 and so on.
11 |         model: the generalized rcnnn module
12 |     """
13 |     weights_file = os.path.join(cfg.ROOT_DIR, cfg.MODEL.MODEL_REPOSITORY, 'ResNeXt_ImageNet', cfg.MODEL.PRETRAINED_WEIGHTS)
14 |     pretrianed_state_dict = convert_state_dict(torch.load(weights_file))
15 | 
16 |     model_state_dict = model.state_dict()
17 | 
18 |     for k, v in pretrianed_state_dict.items():
19 |         if k in model_state_dict.keys():
20 |             model_state_dict[k].copy_(pretrianed_state_dict[k])
21 |         else:
22 |             print('Weight %s is not in ResNeXt model.' % k)
23 |     logger.info('Pretrained ResNeXt weight has been loaded')
24 | 
25 | def convert_state_dict(src_dict):
26 |     """Return the correct mapping of tensor name and value
27 | 
28 |     Mapping from the names of torchvision model to our resnet conv_body and box_head.
29 |     """
30 |     dst_dict = {}
31 |     res_id = 1
32 |     map1 = ['conv1.', 'bn1.', ' ', 'conv2.', 'bn2.']
33 |     map2 = [[' ', 'conv3.', 'bn3.'], ['shortcut.conv.', 'shortcut.bn.']]
34 |     for k, v in src_dict.items():
35 |         toks = k.split('.')
36 |         if int(toks[0]) == 0:
37 |             name = 'res%d.' % res_id + 'conv1.' + toks[-1]
38 |         elif int(toks[0]) == 1:
39 |             name = 'res%d.' % res_id + 'bn1.' + toks[-1]
40 |         elif int(toks[0]) >=4 and int(toks[0]) <= 7:
41 |             name_res = 'res%d.%d.' % (int(toks[0])-2, int(toks[1]))
42 |             if len(toks) == 7:
43 |                 name = name_res + map1[int(toks[-2])] + toks[-1]
44 |             elif len(toks) == 6:
45 |                 name = name_res + map2[int(toks[-3])][int(toks[-2])] + toks[-1]
46 |         else:
47 |             continue
48 |         dst_dict[name] = v
49 | 
50 |     return dst_dict
51 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import time
 7 | 
 8 | 
 9 | class Timer(object):
10 |   """A simple timer."""
11 | 
12 |   def __init__(self):
13 |     self.reset()
14 | 
15 |   def tic(self):
16 |     # using time.time instead of time.clock because time time.clock
17 |     # does not normalize for multithreading
18 |     self.start_time = time.time()
19 | 
20 |   def toc(self, average=True):
21 |     self.diff = time.time() - self.start_time
22 |     self.total_time += self.diff
23 |     self.calls += 1
24 |     self.average_time = self.total_time / self.calls
25 |     if average:
26 |       return self.average_time
27 |     else:
28 |       return self.diff
29 | 
30 |   def reset(self):
31 |     self.total_time = 0.
32 |     self.calls = 0
33 |     self.start_time = 0.
34 |     self.diff = 0.
35 |     self.average_time = 0.
36 | 


--------------------------------------------------------------------------------
/lib/utils/training_stats.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """Utilities for training."""
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | from __future__ import unicode_literals
  8 | 
  9 | from collections import defaultdict, OrderedDict
 10 | import datetime
 11 | from lib.core.config import cfg
 12 | 
 13 | 
 14 | from lib.utils.logging import log_stats
 15 | from lib.utils.logging import SmoothedValue
 16 | from lib.utils.timer import Timer
 17 | 
 18 | 
 19 | 
 20 | class TrainingStats(object):
 21 |     """Track vital training statistics."""
 22 |     def __init__(self, args, log_period=20, tensorboard_logger=None):
 23 |         # Output logging period in SGD iterations
 24 |         self.args = args
 25 |         self.log_period = log_period
 26 |         self.tblogger = tensorboard_logger
 27 |         self.tb_ignored_keys = ['iter', 'eta', 'epoch', 'time']
 28 |         self.iter_timer = Timer()
 29 |         # Window size for smoothing tracked values (with median filtering)
 30 |         self.filter_size = 20
 31 |         def create_smoothed_value():
 32 |             return SmoothedValue(self.filter_size)
 33 |         self.smoothed_losses = defaultdict(create_smoothed_value)
 34 |         self.smoothed_metrics = defaultdict(create_smoothed_value)
 35 |         self.smoothed_total_loss = SmoothedValue(self.filter_size)
 36 | 
 37 | 
 38 |     def IterTic(self):
 39 |         self.iter_timer.tic()
 40 | 
 41 |     def IterToc(self):
 42 |         return self.iter_timer.toc(average=False)
 43 | 
 44 |     def ResetIterTimer(self):
 45 |         self.iter_timer.reset()
 46 | 
 47 |     def UpdateIterStats(self, loss):
 48 |         """Update tracked iteration statistics."""
 49 |         total_loss = 0
 50 |         for k in loss:
 51 |             # all losses except the total loss: loss['all']
 52 |             if k != 'total_loss':
 53 |                 self.smoothed_losses[k].AddValue(float(loss[k]))
 54 | 
 55 |         total_loss += loss['total_loss']
 56 |         self.smoothed_total_loss.AddValue(float(total_loss))
 57 | 
 58 |     def LogIterStats(self, cur_iter, cur_epoch, optimizer, val_err={}):
 59 |         """Log the tracked statistics."""
 60 |         if (cur_iter % self.log_period == 0):
 61 |             stats = self.GetStats(cur_iter, cur_epoch, optimizer, val_err)
 62 |             log_stats(stats, self.args)
 63 |             if self.tblogger:
 64 |                 self.tb_log_stats(stats, cur_iter)
 65 | 
 66 |     def tb_log_stats(self, stats, cur_iter):
 67 |         """Log the tracked statistics to tensorboard"""
 68 |         for k in stats:
 69 |             if k not in self.tb_ignored_keys:
 70 |                 v = stats[k]
 71 |                 if isinstance(v, dict):
 72 |                     self.tb_log_stats(v, cur_iter)
 73 |                 else:
 74 |                     self.tblogger.add_scalar(k, v, cur_iter)
 75 | 
 76 | 
 77 |     def GetStats(self, cur_iter, cur_epoch, optimizer, val_err = {}):
 78 |         eta_seconds = self.iter_timer.average_time * (
 79 |                 cfg.TRAIN.MAX_ITER - cur_iter
 80 |         )
 81 |         eta = str(datetime.timedelta(seconds=int(eta_seconds)))
 82 |         stats = OrderedDict(
 83 |             iter=cur_iter,  # 1-indexed
 84 |             time=self.iter_timer.average_time,
 85 |             eta=eta,
 86 |             total_loss=self.smoothed_total_loss.GetMedianValue(),
 87 |             epoch=cur_epoch,
 88 |         )
 89 |         optimizer_state_dict = optimizer.state_dict()
 90 |         lr = {}
 91 |         for i in range(len(optimizer_state_dict['param_groups'])):
 92 |             lr_name = 'group%d_lr' % i
 93 |             lr[lr_name] = optimizer_state_dict['param_groups'][i]['lr']
 94 | 
 95 |         stats['lr'] = OrderedDict(lr)
 96 |         for k, v in self.smoothed_losses.items():
 97 |             stats[k] = OrderedDict([(k, v.GetMedianValue())])
 98 | 
 99 |         stats['val_err'] = OrderedDict(val_err)
100 |         return stats
101 | 


--------------------------------------------------------------------------------
/prepocessing/gen_depth.sh:
--------------------------------------------------------------------------------
1 | python generate_depth.py --data_path ~/wangxinlong/data/KITTI/Kitti/object/training/ --split_file ~/wangxinlong/data/KITTI/Kitti/object/trainval.txt 
2 | 


--------------------------------------------------------------------------------
/prepocessing/generate_depth.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import numpy as np
 5 | import scipy.misc as ssc
 6 | 
 7 | import kitti_util
 8 | 
 9 | 
10 | def generate_depth_from_velo(pc_velo, height, width, calib):
11 |     pts_2d = calib.project_velo_to_image(pc_velo)
12 |     fov_inds = (pts_2d[:, 0] < width - 1) & (pts_2d[:, 0] >= 0) & \
13 |                (pts_2d[:, 1] < height - 1) & (pts_2d[:, 1] >= 0)
14 |     fov_inds = fov_inds & (pc_velo[:, 0] > 2)
15 |     imgfov_pc_velo = pc_velo[fov_inds, :]
16 |     imgfov_pts_2d = pts_2d[fov_inds, :]
17 |     imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)
18 |     depth_map = np.zeros((height, width)) - 1
19 |     imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int)
20 |     for i in range(imgfov_pts_2d.shape[0]):
21 |         depth = imgfov_pc_rect[i, 2]
22 |         depth_map[int(imgfov_pts_2d[i, 1]), int(imgfov_pts_2d[i, 0])] = depth
23 | 
24 |     return depth_map
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     parser = argparse.ArgumentParser(description='Generate Depth')
29 |     parser.add_argument('--data_path', type=str, default='~/Kitti/object/training/')
30 |     parser.add_argument('--split_file', type=str, default='~/Kitti/object/train.txt')
31 |     args = parser.parse_args()
32 | 
33 |     assert os.path.isdir(args.data_path)
34 |     lidar_dir = args.data_path + '/velodyne/'
35 |     calib_dir = args.data_path + '/calib/'
36 |     image_dir = args.data_path + '/image_2/'
37 |     lidepth_dir = args.data_path + '/lidepth/'
38 | 
39 |     assert os.path.isdir(lidar_dir)
40 |     assert os.path.isdir(calib_dir)
41 |     assert os.path.isdir(image_dir)
42 | 
43 |     if not os.path.isdir(lidepth_dir):
44 |         os.makedirs(lidepth_dir)
45 | 
46 |     lidar_files = [x for x in os.listdir(lidar_dir) if x[-3:] == 'bin']
47 |     lidar_files = sorted(lidar_files)
48 | 
49 |     assert os.path.isfile(args.split_file)
50 |     with open(args.split_file, 'r') as f:
51 |         file_names = [x.strip() for x in f.readlines()]
52 | 
53 |     for fn in lidar_files:
54 |         predix = fn[:-4]
55 |         if predix not in file_names:
56 |             continue
57 |         calib_file = '{}/{}.txt'.format(calib_dir, predix)
58 |         calib = kitti_util.Calibration(calib_file)
59 |         # load point cloud
60 |         lidar = np.fromfile(lidar_dir + '/' + fn, dtype=np.float32).reshape((-1, 4))[:, :3]
61 |         image_file = '{}/{}.png'.format(image_dir, predix)
62 |         image = ssc.imread(image_file)
63 |         height, width = image.shape[:2]
64 |         lidepth = generate_depth_from_velo(lidar, height, width, calib)
65 |         np.save(lidepth_dir + '/' + predix, lidepth)
66 |         print('Finish Depth {}'.format(predix))
67 | 


--------------------------------------------------------------------------------
/prepocessing/generate_disp.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import numpy as np
 5 | import scipy.misc as ssc
 6 | 
 7 | import kitti_util
 8 | 
 9 | 
10 | def generate_dispariy_from_velo(pc_velo, height, width, calib):
11 |     pts_2d = calib.project_velo_to_image(pc_velo)
12 |     fov_inds = (pts_2d[:, 0] < width - 1) & (pts_2d[:, 0] >= 0) & \
13 |                (pts_2d[:, 1] < height - 1) & (pts_2d[:, 1] >= 0)
14 |     fov_inds = fov_inds & (pc_velo[:, 0] > 2)
15 |     imgfov_pc_velo = pc_velo[fov_inds, :]
16 |     imgfov_pts_2d = pts_2d[fov_inds, :]
17 |     imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)
18 |     depth_map = np.zeros((height, width)) - 1
19 |     imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int)
20 |     for i in range(imgfov_pts_2d.shape[0]):
21 |         depth = imgfov_pc_rect[i, 2]
22 |         depth_map[int(imgfov_pts_2d[i, 1]), int(imgfov_pts_2d[i, 0])] = depth
23 |     baseline = 0.54
24 | 
25 |     disp_map = (calib.f_u * baseline) / depth_map
26 |     return disp_map
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     parser = argparse.ArgumentParser(description='Generate Disparity')
31 |     parser.add_argument('--data_path', type=str, default='~/Kitti/object/training/')
32 |     parser.add_argument('--split_file', type=str, default='~/Kitti/object/train.txt')
33 |     args = parser.parse_args()
34 | 
35 |     assert os.path.isdir(args.data_path)
36 |     lidar_dir = args.data_path + '/velodyne/'
37 |     calib_dir = args.data_path + '/calib/'
38 |     image_dir = args.data_path + '/image_2/'
39 |     disparity_dir = args.data_path + '/disparity/'
40 | 
41 |     assert os.path.isdir(lidar_dir)
42 |     assert os.path.isdir(calib_dir)
43 |     assert os.path.isdir(image_dir)
44 | 
45 |     if not os.path.isdir(disparity_dir):
46 |         os.makedirs(disparity_dir)
47 | 
48 |     lidar_files = [x for x in os.listdir(lidar_dir) if x[-3:] == 'bin']
49 |     lidar_files = sorted(lidar_files)
50 | 
51 |     assert os.path.isfile(args.split_file)
52 |     with open(args.split_file, 'r') as f:
53 |         file_names = [x.strip() for x in f.readlines()]
54 | 
55 |     for fn in lidar_files:
56 |         predix = fn[:-4]
57 |         if predix not in file_names:
58 |             continue
59 |         calib_file = '{}/{}.txt'.format(calib_dir, predix)
60 |         calib = kitti_util.Calibration(calib_file)
61 |         # load point cloud
62 |         lidar = np.fromfile(lidar_dir + '/' + fn, dtype=np.float32).reshape((-1, 4))[:, :3]
63 |         image_file = '{}/{}.png'.format(image_dir, predix)
64 |         image = ssc.imread(image_file)
65 |         height, width = image.shape[:2]
66 |         disp = generate_dispariy_from_velo(lidar, height, width, calib)
67 |         np.save(disparity_dir + '/' + predix, disp)
68 |         print('Finish Disparity {}'.format(predix))
69 | 


--------------------------------------------------------------------------------
/prepocessing/generate_lidar.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import numpy as np
 5 | import scipy.misc as ssc
 6 | 
 7 | import kitti_util
 8 | 
 9 | 
10 | def project_disp_to_depth(calib, disp, max_high):
11 |     disp[disp < 0] = 0
12 |     baseline = 0.54
13 |     mask = disp > 0
14 |     depth = calib.f_u * baseline / (disp + 1. - mask)
15 |     rows, cols = depth.shape
16 |     c, r = np.meshgrid(np.arange(cols), np.arange(rows))
17 |     points = np.stack([c, r, depth])
18 |     points = points.reshape((3, -1))
19 |     points = points.T
20 |     points = points[mask.reshape(-1)]
21 |     cloud = calib.project_image_to_velo(points)
22 |     valid = (cloud[:, 0] >= 0) & (cloud[:, 2] < max_high)
23 |     return cloud[valid]
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     parser = argparse.ArgumentParser(description='Generate Libar')
28 |     parser.add_argument('--calib_dir', type=str,
29 |                         default='~/Kitti/object/training/calib')
30 |     parser.add_argument('--disparity_dir', type=str,
31 |                         default='~/Kitti/object/training/predicted_disparity')
32 |     parser.add_argument('--save_dir', type=str,
33 |                         default='~/Kitti/object/training/predicted_velodyne')
34 |     parser.add_argument('--max_high', type=int, default=1)
35 |     args = parser.parse_args()
36 | 
37 |     assert os.path.isdir(args.disparity_dir)
38 |     assert os.path.isdir(args.calib_dir)
39 | 
40 |     if not os.path.isdir(args.save_dir):
41 |         os.makedirs(args.save_dir)
42 | 
43 |     disps = [x for x in os.listdir(args.disparity_dir) if x[-3:] == 'png']
44 |     disps = sorted(disps)
45 | 
46 |     for fn in disps:
47 |         predix = fn[:-4]
48 |         calib_file = '{}/{}.txt'.format(args.calib_dir, predix)
49 |         calib = kitti_util.Calibration(calib_file)
50 |         disp_map = ssc.imread(args.disparity_dir + '/' + fn) / 256.
51 |         lidar = project_disp_to_depth(calib, disp_map, args.max_high)
52 |         # pad 1 in the indensity dimension
53 |         lidar = np.concatenate([lidar, np.ones((lidar.shape[0], 1))], 1)
54 |         lidar = lidar.astype(np.float32)
55 |         lidar.tofile('{}/{}.bin'.format(args.save_dir, predix))
56 |         print('Finish Depth {}'.format(predix))
57 | 


--------------------------------------------------------------------------------
/prepocessing/kitti_process_RANSAC.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import numpy as np
 5 | from sklearn.linear_model import RANSACRegressor
 6 | 
 7 | import kitti_util as utils
 8 | 
 9 | 
10 | def extract_ransac(calib_dir, lidar_dir, planes_dir):
11 |     data_idx_list = [x[:-4] for x in os.listdir(lidar_dir) if x[-4:] == '.bin']
12 | 
13 |     if not os.path.isdir(planes_dir):
14 |         os.makedirs(planes_dir)
15 | 
16 |     for data_idx in data_idx_list:
17 | 
18 |         print('------------- ', data_idx)
19 |         calib = calib_dir + '/' + data_idx + '.txt'
20 |         calib = utils.Calibration(calib)
21 |         pc_velo = lidar_dir + '/' + data_idx + '.bin'
22 |         pc_velo = np.fromfile(pc_velo, dtype=np.float32).reshape(-1, 4)
23 |         pc_rect = calib.project_velo_to_rect(pc_velo[:, :3])
24 |         valid_loc = (pc_rect[:, 1] > 1.5) & \
25 |                     (pc_rect[:, 1] < 1.86) & \
26 |                     (pc_rect[:, 2] > 0) & \
27 |                     (pc_rect[:, 2] < 40) & \
28 |                     (pc_rect[:, 0] > -15) & \
29 |                     (pc_rect[:, 0] < 15)
30 |         pc_rect = pc_rect[valid_loc]
31 |         if len(pc_rect) < 1:
32 |             w = [0, -1, 0]
33 |             h = 1.65
34 |         else:
35 |             reg = RANSACRegressor().fit(pc_rect[:, [0, 2]], pc_rect[:, 1])
36 |             w = np.zeros(3)
37 |             w[0] = reg.estimator_.coef_[0]
38 |             w[2] = reg.estimator_.coef_[1]
39 |             w[1] = -1.0
40 |             h = reg.estimator_.intercept_
41 |             w = w / np.linalg.norm(w)
42 |         print(w)
43 |         print(h)
44 | 
45 |         lines = ['# Plane', 'Width 4', 'Height 1']
46 | 
47 |         plane_file = os.path.join(planes_dir, data_idx + '.txt')
48 |         result_lines = lines[:3]
49 |         result_lines.append("{:e} {:e} {:e} {:e}".format(w[0], w[1], w[2], h))
50 |         result_str = '\n'.join(result_lines)
51 |         with open(plane_file, 'w') as f:
52 |             f.write(result_str)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     parser = argparse.ArgumentParser()
57 |     parser.add_argument('--calib_dir', default='KITTI/object/training/calib')
58 |     parser.add_argument('--lidar_dir', default='KITTI/object/training/velodyne')
59 |     parser.add_argument('--planes_dir', default='KITTI/object/training/velodyne_planes')
60 |     args = parser.parse_args()
61 | 
62 |     extract_ransac(args.calib_dir, args.lidar_dir, args.planes_dir)
63 | 


--------------------------------------------------------------------------------
/prepocessing/kitti_util.py:
--------------------------------------------------------------------------------
  1 | """ Helper methods for loading and parsing KITTI data.
  2 | 
  3 | Author: Charles R. Qi
  4 | Date: September 2017
  5 | """
  6 | from __future__ import print_function
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | class Calibration(object):
 12 |     ''' Calibration matrices and utils
 13 |         3d XYZ in <label>.txt are in rect camera coord.
 14 |         2d box xy are in image2 coord
 15 |         Points in <lidar>.bin are in Velodyne coord.
 16 | 
 17 |         y_image2 = P^2_rect * x_rect
 18 |         y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
 19 |         x_ref = Tr_velo_to_cam * x_velo
 20 |         x_rect = R0_rect * x_ref
 21 | 
 22 |         P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
 23 |                     0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
 24 |                     0,      0,      1,      0]
 25 |                  = K * [1|t]
 26 | 
 27 |         image2 coord:
 28 |          ----> x-axis (u)
 29 |         |
 30 |         |
 31 |         v y-axis (v)
 32 | 
 33 |         velodyne coord:
 34 |         front x, left y, up z
 35 | 
 36 |         rect/ref camera coord:
 37 |         right x, down y, front z
 38 | 
 39 |         Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
 40 | 
 41 |         TODO(rqi): do matrix multiplication only once for each projection.
 42 |     '''
 43 | 
 44 |     def __init__(self, calib_filepath):
 45 | 
 46 |         calibs = self.read_calib_file(calib_filepath)
 47 |         # Projection matrix from rect camera coord to image2 coord
 48 |         self.P = calibs['P2']
 49 |         self.P = np.reshape(self.P, [3, 4])
 50 |         # Rigid transform from Velodyne coord to reference camera coord
 51 |         self.V2C = calibs['Tr_velo_to_cam']
 52 |         self.V2C = np.reshape(self.V2C, [3, 4])
 53 |         self.C2V = inverse_rigid_trans(self.V2C)
 54 |         # Rotation from reference camera coord to rect camera coord
 55 |         self.R0 = calibs['R0_rect']
 56 |         self.R0 = np.reshape(self.R0, [3, 3])
 57 | 
 58 |         # Camera intrinsics and extrinsics
 59 |         self.c_u = self.P[0, 2]
 60 |         self.c_v = self.P[1, 2]
 61 |         self.f_u = self.P[0, 0]
 62 |         self.f_v = self.P[1, 1]
 63 |         self.b_x = self.P[0, 3] / (-self.f_u)  # relative
 64 |         self.b_y = self.P[1, 3] / (-self.f_v)
 65 | 
 66 |     def read_calib_file(self, filepath):
 67 |         ''' Read in a calibration file and parse into a dictionary.
 68 |         Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
 69 |         '''
 70 |         data = {}
 71 |         with open(filepath, 'r') as f:
 72 |             for line in f.readlines():
 73 |                 line = line.rstrip()
 74 |                 if len(line) == 0: continue
 75 |                 key, value = line.split(':', 1)
 76 |                 # The only non-float values in these files are dates, which
 77 |                 # we don't care about anyway
 78 |                 try:
 79 |                     data[key] = np.array([float(x) for x in value.split()])
 80 |                 except ValueError:
 81 |                     pass
 82 | 
 83 |         return data
 84 | 
 85 |     def cart2hom(self, pts_3d):
 86 |         ''' Input: nx3 points in Cartesian
 87 |             Oupput: nx4 points in Homogeneous by pending 1
 88 |         '''
 89 |         n = pts_3d.shape[0]
 90 |         pts_3d_hom = np.hstack((pts_3d, np.ones((n, 1))))
 91 |         return pts_3d_hom
 92 | 
 93 |     # =========================== 
 94 |     # ------- 3d to 3d ---------- 
 95 |     # =========================== 
 96 |     def project_velo_to_ref(self, pts_3d_velo):
 97 |         pts_3d_velo = self.cart2hom(pts_3d_velo)  # nx4
 98 |         return np.dot(pts_3d_velo, np.transpose(self.V2C))
 99 | 
100 |     def project_ref_to_velo(self, pts_3d_ref):
101 |         pts_3d_ref = self.cart2hom(pts_3d_ref)  # nx4
102 |         return np.dot(pts_3d_ref, np.transpose(self.C2V))
103 | 
104 |     def project_rect_to_ref(self, pts_3d_rect):
105 |         ''' Input and Output are nx3 points '''
106 |         return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
107 | 
108 |     def project_ref_to_rect(self, pts_3d_ref):
109 |         ''' Input and Output are nx3 points '''
110 |         return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
111 | 
112 |     def project_rect_to_velo(self, pts_3d_rect):
113 |         ''' Input: nx3 points in rect camera coord.
114 |             Output: nx3 points in velodyne coord.
115 |         '''
116 |         pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
117 |         return self.project_ref_to_velo(pts_3d_ref)
118 | 
119 |     def project_velo_to_rect(self, pts_3d_velo):
120 |         pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
121 |         return self.project_ref_to_rect(pts_3d_ref)
122 | 
123 |     # =========================== 
124 |     # ------- 3d to 2d ---------- 
125 |     # =========================== 
126 |     def project_rect_to_image(self, pts_3d_rect):
127 |         ''' Input: nx3 points in rect camera coord.
128 |             Output: nx2 points in image2 coord.
129 |         '''
130 |         pts_3d_rect = self.cart2hom(pts_3d_rect)
131 |         pts_2d = np.dot(pts_3d_rect, np.transpose(self.P))  # nx3
132 |         pts_2d[:, 0] /= pts_2d[:, 2]
133 |         pts_2d[:, 1] /= pts_2d[:, 2]
134 |         return pts_2d[:, 0:2]
135 | 
136 |     def project_velo_to_image(self, pts_3d_velo):
137 |         ''' Input: nx3 points in velodyne coord.
138 |             Output: nx2 points in image2 coord.
139 |         '''
140 |         pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
141 |         return self.project_rect_to_image(pts_3d_rect)
142 | 
143 |     # =========================== 
144 |     # ------- 2d to 3d ---------- 
145 |     # =========================== 
146 |     def project_image_to_rect(self, uv_depth):
147 |         ''' Input: nx3 first two channels are uv, 3rd channel
148 |                    is depth in rect camera coord.
149 |             Output: nx3 points in rect camera coord.
150 |         '''
151 |         n = uv_depth.shape[0]
152 |         x = ((uv_depth[:, 0] - self.c_u) * uv_depth[:, 2]) / self.f_u + self.b_x
153 |         y = ((uv_depth[:, 1] - self.c_v) * uv_depth[:, 2]) / self.f_v + self.b_y
154 |         pts_3d_rect = np.zeros((n, 3))
155 |         pts_3d_rect[:, 0] = x
156 |         pts_3d_rect[:, 1] = y
157 |         pts_3d_rect[:, 2] = uv_depth[:, 2]
158 |         return pts_3d_rect
159 | 
160 |     def project_image_to_velo(self, uv_depth):
161 |         pts_3d_rect = self.project_image_to_rect(uv_depth)
162 |         return self.project_rect_to_velo(pts_3d_rect)
163 | 
164 | 
165 | def inverse_rigid_trans(Tr):
166 |     ''' Inverse a rigid body transform matrix (3x4 as [R|t])
167 |         [R'|-R't; 0|1]
168 |     '''
169 |     inv_Tr = np.zeros_like(Tr)  # 3x4
170 |     inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3])
171 |     inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3])
172 |     return inv_Tr
173 | 


--------------------------------------------------------------------------------
/pretrained_model/ResNeXt_ImageNet:
--------------------------------------------------------------------------------
1 | /data00/home/wangxinlong/experiments/VNL_mono_depth/datasets/pretrained_model/ResNeXt_ImageNet


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WXinlong/ForeSeE/5f87a1d51b9a16d848d1adb8e7563024cd616674/tools/__init__.py


--------------------------------------------------------------------------------
/tools/kitti_prediction.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(BASE_DIR)
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from tools.parse_arg_test import TestOptions
 12 | from data.load_dataset import CustomerDataLoader
 13 | from lib.models.depth_normal_model import DepthNormal
 14 | from lib.utils.net_tools import load_ckpt
 15 | from lib.utils.logging import setup_logging, SmoothedValue
 16 | 
 17 | logger = setup_logging(__name__)
 18 | 
 19 | 
 20 | 
 21 | # Add by users
 22 | pcd_folder = os.path.join(ROOT_DIR, 'output')
 23 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib')
 24 | if not os.path.exists(pcd_folder):
 25 |     os.makedirs(pcd_folder)
 26 | 
 27 | def main():
 28 |     test_args = TestOptions().parse()
 29 |     test_args.thread = 1   # test code only supports thread = 1
 30 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 31 | 
 32 |     data_loader = CustomerDataLoader(test_args)
 33 |     test_datasize = len(data_loader)
 34 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 35 |     # load model
 36 |     model = DepthNormal()
 37 |     # evaluate mode
 38 |     model.eval()
 39 | 
 40 |     # load checkpoint
 41 |     if test_args.load_ckpt:
 42 |         load_ckpt(test_args, model)
 43 |     model.cuda()
 44 |     model = torch.nn.DataParallel(model)
 45 | 
 46 |     for i, data in enumerate(data_loader):
 47 |         out = model.module.inference(data)
 48 |         pred_depth = np.squeeze(out['b_fake']) * 80.
 49 | 
 50 |         # the image size has been padded to the size (385, 1243)
 51 |         pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 52 | 
 53 |         #######################################################################################
 54 |         # add by users
 55 |         img_name = data['A_paths'][0].split('/')[-1][:-4]
 56 |         calib_name = img_name + '.txt'
 57 |         calib_dir = os.path.join(calib_fold, calib_name)
 58 |         camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None)
 59 |         P3_0 = camera_para[3]
 60 |         P2_0 = camera_para[2]
 61 |         P3_2 = P3_0
 62 |         P3_2[4] -= P2_0[4]
 63 |         R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2)
 64 |         Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1)
 65 | 
 66 |         pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6])
 67 |         # Transfer points in cam2 coordinate to cam0 coordinate
 68 |         pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]])
 69 | 
 70 |         # Transfer points in cam0 coordinate to velo coordinate
 71 |         pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0)
 72 | 
 73 |         rgb = data['A_raw'][0].cpu().numpy()
 74 | 
 75 |         save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 76 |         #save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 77 |         print('saved', img_name)
 78 |         #######################################################################################
 79 | 
 80 | 
 81 | ##########################################################################
 82 | # others
 83 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0):
 84 |     pcd_cam0_3n = pcd_cam0.reshape((3, -1))
 85 |     R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3))
 86 |     R_rect0_inv = np.linalg.inv(R_rect0)
 87 | 
 88 |     # X_cam0_raw = (R_rect0)^-1 * X_cam0
 89 |     pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n)
 90 | 
 91 |     T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4))
 92 |     R_velo_cam0 = T_velo_cam0[:, 0:3]
 93 |     T_velo_cam0 = T_velo_cam0[:, 3]
 94 |     R_cam0_velo = np.linalg.inv(R_velo_cam0)
 95 |     T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0)
 96 | 
 97 |     # X_velo = R*X_cam0 + T
 98 |     T_cam0_velo = T_cam0_velo[:, np.newaxis]
 99 |     pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo
100 |     pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2])
101 |     return pcd_velo
102 | 
103 | 
104 | def reconstruct_3D(depth, cu, cv, fx, fy):
105 |     width = depth.shape[1]
106 |     height = depth.shape[0]
107 |     row = np.arange(0, width, 1)
108 |     u = np.array([row for _ in np.arange(height)])
109 |     col = np.arange(0, height, 1)
110 |     v = np.array([col for _ in np.arange(width)])
111 |     v = v.transpose(1, 0)
112 | 
113 |     x = (u - cu) * depth / fx
114 |     y = (v - cv) * depth / fy
115 |     z = depth
116 | 
117 |     x = x[np.newaxis, :, :]
118 |     y = y[np.newaxis, :, :]
119 |     z = z[np.newaxis, :, :]
120 |     return np.concatenate([x, y, z], axis=0)
121 | 
122 | def save_ply(pcd, rgb, path):
123 |     width = rgb.shape[1]
124 |     height = rgb.shape[0]
125 |     x = np.reshape(pcd[0], (width * height, 1))
126 |     y = np.reshape(pcd[1], (width * height, 1))
127 |     z = np.reshape(pcd[2], (width * height, 1))
128 |     x = np.squeeze(x)
129 |     y = np.squeeze(y)
130 |     z = np.squeeze(z)
131 | 
132 |     rgb = np.reshape(rgb, (width * height, 3))
133 |     r = rgb[:, 2]
134 |     g = rgb[:, 1]
135 |     b = rgb[:, 0]
136 |     r = np.squeeze(r)
137 |     g = np.squeeze(g)
138 |     b = np.squeeze(b)
139 | 
140 |     ply_head = 'ply\n' \
141 |                'format ascii 1.0\n' \
142 |                'element vertex %d\n' \
143 |                'property float x\n' \
144 |                'property float y\n' \
145 |                'property float z\n' \
146 |                'property uchar red\n' \
147 |                'property uchar green\n' \
148 |                'property uchar blue\n' \
149 |                'end_header' % r.shape[0]
150 |     # ---- Save ply data to disk
151 |     np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='')
152 | ##########################################################################
153 | 
154 | if __name__ == '__main__':
155 |    main()
156 | 


--------------------------------------------------------------------------------
/tools/kitti_prediction_sample.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(BASE_DIR)
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from tools.parse_arg_test import TestOptions
 12 | from data.load_dataset import CustomerDataLoader
 13 | from lib.models.depth_normal_model import DepthNormal
 14 | from lib.utils.net_tools import load_ckpt
 15 | from lib.utils.logging import setup_logging, SmoothedValue
 16 | 
 17 | logger = setup_logging(__name__)
 18 | 
 19 | 
 20 | 
 21 | # Add by users
 22 | pcd_folder = os.path.join(ROOT_DIR, 'output')
 23 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib')
 24 | if not os.path.exists(pcd_folder):
 25 |     os.makedirs(pcd_folder)
 26 | 
 27 | def main():
 28 |     test_args = TestOptions().parse()
 29 |     test_args.thread = 1   # test code only supports thread = 1
 30 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 31 | 
 32 |     data_loader = CustomerDataLoader(test_args)
 33 |     test_datasize = len(data_loader)
 34 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 35 |     # load model
 36 |     model = DepthNormal()
 37 |     # evaluate mode
 38 |     model.eval()
 39 | 
 40 |     # load checkpoint
 41 |     if test_args.load_ckpt:
 42 |         load_ckpt(test_args, model)
 43 |     model.cuda()
 44 |     model = torch.nn.DataParallel(model)
 45 | 
 46 |     for i, data in enumerate(data_loader):
 47 |         out = model.module.inference(data)
 48 |         pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w]
 49 |         pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w]
 50 | 
 51 |         # the image size has been padded to the size (385, 1243)
 52 |         pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 53 |         pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 54 | 
 55 |         sample_th = 0.15
 56 |         sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w]
 57 | 
 58 |         #######################################################################################
 59 |         # add by users
 60 |         img_name = data['A_paths'][0].split('/')[-1][:-4]
 61 |         calib_name = img_name + '.txt'
 62 |         calib_dir = os.path.join(calib_fold, calib_name)
 63 |         camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None)
 64 |         P3_0 = camera_para[3]
 65 |         P2_0 = camera_para[2]
 66 |         P3_2 = P3_0
 67 |         P3_2[4] -= P2_0[4]
 68 |         R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2)
 69 |         Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1)
 70 | 
 71 |         pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6])
 72 |         # Transfer points in cam2 coordinate to cam0 coordinate
 73 |         pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]])
 74 | 
 75 |         # Transfer points in cam0 coordinate to velo coordinate
 76 |         pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0)
 77 | 
 78 |         rgb = data['A_raw'][0].cpu().numpy()
 79 | 
 80 |         save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '_sample.ply', sample_mask=sample_mask)
 81 |         #save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 82 |         print('saved', img_name)
 83 |         #######################################################################################
 84 | 
 85 | 
 86 | ##########################################################################
 87 | # others
 88 | def get_sample_mask(conf, threshold):
 89 |     max_conf = np.amax(conf, axis=0) # [h, w]
 90 |     print(max_conf.shape)
 91 |     return max_conf >= threshold
 92 | 
 93 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0):
 94 |     pcd_cam0_3n = pcd_cam0.reshape((3, -1))
 95 |     R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3))
 96 |     R_rect0_inv = np.linalg.inv(R_rect0)
 97 | 
 98 |     # X_cam0_raw = (R_rect0)^-1 * X_cam0
 99 |     pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n)
100 | 
101 |     T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4))
102 |     R_velo_cam0 = T_velo_cam0[:, 0:3]
103 |     T_velo_cam0 = T_velo_cam0[:, 3]
104 |     R_cam0_velo = np.linalg.inv(R_velo_cam0)
105 |     T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0)
106 | 
107 |     # X_velo = R*X_cam0 + T
108 |     T_cam0_velo = T_cam0_velo[:, np.newaxis]
109 |     pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo
110 |     pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2])
111 |     return pcd_velo
112 | 
113 | 
114 | def reconstruct_3D(depth, cu, cv, fx, fy):
115 |     width = depth.shape[1]
116 |     height = depth.shape[0]
117 |     row = np.arange(0, width, 1)
118 |     u = np.array([row for _ in np.arange(height)])
119 |     col = np.arange(0, height, 1)
120 |     v = np.array([col for _ in np.arange(width)])
121 |     v = v.transpose(1, 0)
122 | 
123 |     x = (u - cu) * depth / fx
124 |     y = (v - cv) * depth / fy
125 |     z = depth
126 | 
127 |     x = x[np.newaxis, :, :]
128 |     y = y[np.newaxis, :, :]
129 |     z = z[np.newaxis, :, :]
130 |     return np.concatenate([x, y, z], axis=0)
131 | 
132 | def save_ply(pcd, rgb, path, sample_mask=None):
133 |     width = rgb.shape[1]
134 |     height = rgb.shape[0]
135 |     x = np.reshape(pcd[0], width * height)
136 |     y = np.reshape(pcd[1], width * height)
137 |     z = np.reshape(pcd[2], width * height)
138 | 
139 |     rgb = np.reshape(rgb, (width * height, 3))
140 | 
141 |     if sample_mask is not None:
142 |         sample_mask = np.reshape(sample_mask, width * height)
143 |         x = x[sample_mask]
144 |         y = y[sample_mask]
145 |         z = z[sample_mask]
146 |         
147 |         rgb = rgb[sample_mask, :]
148 | 
149 |     r = rgb[:, 2]
150 |     g = rgb[:, 1]
151 |     b = rgb[:, 0]
152 |     r = np.squeeze(r)
153 |     g = np.squeeze(g)
154 |     b = np.squeeze(b)
155 | 
156 |     ply_head = 'ply\n' \
157 |                'format ascii 1.0\n' \
158 |                'element vertex %d\n' \
159 |                'property float x\n' \
160 |                'property float y\n' \
161 |                'property float z\n' \
162 |                'property uchar red\n' \
163 |                'property uchar green\n' \
164 |                'property uchar blue\n' \
165 |                'end_header' % r.shape[0]
166 |     # ---- Save ply data to disk
167 |     np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='')
168 | ##########################################################################
169 | 
170 | if __name__ == '__main__':
171 |    main()
172 | 


--------------------------------------------------------------------------------
/tools/kitti_prediction_sample_diff.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(BASE_DIR)
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from tools.parse_arg_test import TestOptions
 12 | from data.load_dataset import CustomerDataLoader
 13 | from lib.models.depth_normal_model import DepthNormal
 14 | from lib.utils.net_tools import load_ckpt
 15 | from lib.utils.logging import setup_logging, SmoothedValue
 16 | 
 17 | logger = setup_logging(__name__)
 18 | 
 19 | 
 20 | 
 21 | # Add by users
 22 | pcd_folder = os.path.join(ROOT_DIR, 'output')
 23 | calib_fold = os.path.join(ROOT_DIR, 'datasets/KITTI_object/training/calib')
 24 | if not os.path.exists(pcd_folder):
 25 |     os.makedirs(pcd_folder)
 26 | 
 27 | def main():
 28 |     test_args = TestOptions().parse()
 29 |     test_args.thread = 1   # test code only supports thread = 1
 30 |     test_args.batchsize = 1  # test code only supports batchSize = 1
 31 | 
 32 |     data_loader = CustomerDataLoader(test_args)
 33 |     test_datasize = len(data_loader)
 34 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
 35 |     # load model
 36 |     model = DepthNormal()
 37 |     # evaluate mode
 38 |     model.eval()
 39 | 
 40 |     # load checkpoint
 41 |     if test_args.load_ckpt:
 42 |         load_ckpt(test_args, model)
 43 |     model.cuda()
 44 |     model = torch.nn.DataParallel(model)
 45 | 
 46 |     for i, data in enumerate(data_loader):
 47 |         out = model.module.inference(data)
 48 |         pred_depth = np.squeeze(out['b_fake']) * 80. # [h, w]
 49 |         pred_conf = np.squeeze(out['b_fake_conf']) # [c, h, w]
 50 | 
 51 |         # the image size has been padded to the size (385, 1243)
 52 |         pred_depth_crop = pred_depth[data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 53 |         pred_conf_crop = pred_conf[:, data['pad_raw'][0][0]:, data['pad_raw'][0][2]:]
 54 | 
 55 |         sample_th = 0.2
 56 |         sample_mask = get_sample_mask(pred_conf_crop.cpu().numpy(), threshold=sample_th) # [h, w]
 57 | 
 58 |         #######################################################################################
 59 |         # add by users
 60 |         img_name = data['A_paths'][0].split('/')[-1][:-4]
 61 |         calib_name = img_name + '.txt'
 62 |         calib_dir = os.path.join(calib_fold, calib_name)
 63 |         camera_para = np.genfromtxt(calib_dir, delimiter=' ', skip_footer= 3, dtype=None)
 64 |         P3_0 = camera_para[3]
 65 |         P2_0 = camera_para[2]
 66 |         P3_2 = P3_0
 67 |         P3_2[4] -= P2_0[4]
 68 |         R0_rect = np.genfromtxt(calib_dir, delimiter=' ', skip_header=4, skip_footer=2)
 69 |         Tr_velo_to_cam0 = np.genfromtxt(calib_dir, delimiter=' ', skip_header=5, skip_footer=1)
 70 | 
 71 |         pcd_cam2 = reconstruct_3D(pred_depth_crop.cpu().numpy(), P3_2[3], P3_2[7], P3_2[1], P3_2[6])
 72 |         # Transfer points in cam2 coordinate to cam0 coordinate
 73 |         pcd_cam0 = pcd_cam2 - np.array([[[P2_0[4] / P2_0[1]]], [[P2_0[8] / P2_0[1]]], [[P2_0[12] / P2_0[1]]]])
 74 | 
 75 |         # Transfer points in cam0 coordinate to velo coordinate
 76 |         pcd_velo = transfer_points_in_cam0_to_velo(pcd_cam0, R0_rect, Tr_velo_to_cam0)
 77 | 
 78 |         rgb = data['A_raw'][0].cpu().numpy()
 79 | 
 80 |         save_ply(pcd_velo, rgb, os.path.join(pcd_folder, img_name) + '_sample_diff.ply', sample_mask=sample_mask)
 81 |         #save_ply(pcd_cam2, rgb, os.path.join(pcd_folder, img_name) + '.ply')
 82 |         print('saved', img_name)
 83 |         #######################################################################################
 84 | 
 85 | 
 86 | ##########################################################################
 87 | # others
 88 | def get_sample_mask(conf, threshold):
 89 |     max_conf = np.max(conf, axis=0, keepdims=True) # [h, w]
 90 |     max_mask = conf == max_conf
 91 |     conf[max_mask] = 0.
 92 |     secmax_conf = np.max(conf, axis=0, keepdims=True)
 93 |     diff_conf = max_conf = secmax_conf
 94 |     diff_conf = np.squeeze(diff_conf)
 95 |     return diff_conf >= threshold
 96 | 
 97 | def transfer_points_in_cam0_to_velo(pcd_cam0, R_rect0, T_velo_cam0):
 98 |     pcd_cam0_3n = pcd_cam0.reshape((3, -1))
 99 |     R_rect0 = np.array(R_rect0[1:], dtype=np.float64).reshape((3, 3))
100 |     R_rect0_inv = np.linalg.inv(R_rect0)
101 | 
102 |     # X_cam0_raw = (R_rect0)^-1 * X_cam0
103 |     pcd_cam0_raw = np.matmul(R_rect0_inv, pcd_cam0_3n)
104 | 
105 |     T_velo_cam0 = np.array(T_velo_cam0[1:], dtype=np.float64).reshape((3, 4))
106 |     R_velo_cam0 = T_velo_cam0[:, 0:3]
107 |     T_velo_cam0 = T_velo_cam0[:, 3]
108 |     R_cam0_velo = np.linalg.inv(R_velo_cam0)
109 |     T_cam0_velo = -np.matmul(R_cam0_velo, T_velo_cam0)
110 | 
111 |     # X_velo = R*X_cam0 + T
112 |     T_cam0_velo = T_cam0_velo[:, np.newaxis]
113 |     pcd_velo_3n = np.matmul(R_cam0_velo, pcd_cam0_raw) + T_cam0_velo
114 |     pcd_velo = pcd_velo_3n.reshape(3, pcd_cam0.shape[1], pcd_cam0.shape[2])
115 |     return pcd_velo
116 | 
117 | 
118 | def reconstruct_3D(depth, cu, cv, fx, fy):
119 |     width = depth.shape[1]
120 |     height = depth.shape[0]
121 |     row = np.arange(0, width, 1)
122 |     u = np.array([row for _ in np.arange(height)])
123 |     col = np.arange(0, height, 1)
124 |     v = np.array([col for _ in np.arange(width)])
125 |     v = v.transpose(1, 0)
126 | 
127 |     x = (u - cu) * depth / fx
128 |     y = (v - cv) * depth / fy
129 |     z = depth
130 | 
131 |     x = x[np.newaxis, :, :]
132 |     y = y[np.newaxis, :, :]
133 |     z = z[np.newaxis, :, :]
134 |     return np.concatenate([x, y, z], axis=0)
135 | 
136 | def save_ply(pcd, rgb, path, sample_mask=None):
137 |     width = rgb.shape[1]
138 |     height = rgb.shape[0]
139 |     x = np.reshape(pcd[0], width * height)
140 |     y = np.reshape(pcd[1], width * height)
141 |     z = np.reshape(pcd[2], width * height)
142 | 
143 |     rgb = np.reshape(rgb, (width * height, 3))
144 | 
145 |     if sample_mask is not None:
146 |         sample_mask = np.reshape(sample_mask, width * height)
147 |         x = x[sample_mask]
148 |         y = y[sample_mask]
149 |         z = z[sample_mask]
150 |         
151 |         rgb = rgb[sample_mask, :]
152 | 
153 |     r = rgb[:, 2]
154 |     g = rgb[:, 1]
155 |     b = rgb[:, 0]
156 |     r = np.squeeze(r)
157 |     g = np.squeeze(g)
158 |     b = np.squeeze(b)
159 | 
160 |     ply_head = 'ply\n' \
161 |                'format ascii 1.0\n' \
162 |                'element vertex %d\n' \
163 |                'property float x\n' \
164 |                'property float y\n' \
165 |                'property float z\n' \
166 |                'property uchar red\n' \
167 |                'property uchar green\n' \
168 |                'property uchar blue\n' \
169 |                'end_header' % r.shape[0]
170 |     # ---- Save ply data to disk
171 |     np.savetxt(path, np.column_stack((x, y, z, r, g, b)), fmt="%f %f %f %d %d %d", header=ply_head, comments='')
172 | ##########################################################################
173 | 
174 | if __name__ == '__main__':
175 |    main()
176 | 


--------------------------------------------------------------------------------
/tools/parse_arg_base.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | class BaseOptions():
 4 |     def __init__(self):
 5 |         self.initialized = False
 6 | 
 7 |     def initialize(self, parser):
 8 |         parser.add_argument('--dataroot', required=True, help='Path to images')
 9 |         parser.add_argument('--batchsize', type=int, default=2, help='Batch size')
10 |         parser.add_argument('--dataset', type=str, default='nyudv2', help='Dataset for training, [kitti | nyudv2]')
11 |         parser.add_argument('--lr', type=float, default=0.001, help='Initial learning rate')
12 |         parser.add_argument('--scale_decoder_lr', type=float, default=1, help='Scale factor for the decoder learning rate')
13 |         parser.add_argument('--load_ckpt', help='Checkpoint path to load')
14 |         parser.add_argument('--resume', action='store_true', help='Resume to train')
15 |         parser.add_argument('--encoder', default='ResNeXt101_32x4d_body_stride16', type=str,
16 |                                 help='Set encoder model, [ResNeXt50_32x4d_body_stride16, MobileNetV2_body_stride8, ResNeXt101_32x4d_body_stride16]')
17 |         parser.add_argument('--pretrained_model', default='resnext101_32x4d.pth', type=str, help='Pretrained model')
18 |         parser.add_argument('--decoder_out_c', type=int, default=150, help='Output channel of the decoder')
19 |         parser.add_argument('--epoch', default=[0, 30], nargs='+', type=int, help='Decaying epoch milestone: 0 30 40 50')
20 |         parser.add_argument('--thread', default=2, type=int, help='Thread for loading data')
21 |         parser.add_argument('--use_tfboard', action='store_true', help='Tensorboard to log training info')
22 |         parser.add_argument('--results_dir', type=str, default='./evaluation', help='Output dir')
23 |         parser.add_argument('--pcd_dir', type=str, default='output/', help='point cloud output dir')
24 |         parser.add_argument('--start_epoch', default=0, type=int, help='Starting epoch for training')
25 |         parser.add_argument('--start_step', default=0, type=int,
26 |                             help='set starting step for training, especially for resuming training')
27 |         self.initialized = True
28 |         return parser
29 | 
30 |     def gather_options(self):
31 |         # initialize parser with basic options
32 |         if not self.initialized:
33 |             parser = argparse.ArgumentParser(
34 |                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
35 |             parser = self.initialize(parser)
36 |         self.parser = parser
37 |         return parser.parse_args()
38 | 
39 |     def print_options(self, opt):
40 |         message = ''
41 |         message += '----------------- Options ---------------\n'
42 |         for k, v in sorted(vars(opt).items()):
43 |             comment = ''
44 |             default = self.parser.get_default(k)
45 |             if v != default:
46 |                 comment = '\t[default: %s]' % str(default)
47 |             message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
48 |         message += '----------------- End -------------------'
49 |         print(message)
50 | 
51 |     def parse(self):
52 |         opt = self.gather_options()
53 |         self.print_options(opt)
54 |         self.opt = opt
55 |         return self.opt
56 | 


--------------------------------------------------------------------------------
/tools/parse_arg_test.py:
--------------------------------------------------------------------------------
1 | from tools.parse_arg_base import BaseOptions
2 | 
3 | class TestOptions(BaseOptions):
4 |     def initialize(self, parser):
5 |         parser = BaseOptions.initialize(self, parser)
6 |         parser.add_argument('--phase', type=str, default='test', help='test flag')
7 |         return parser
8 | 


--------------------------------------------------------------------------------
/tools/parse_arg_train.py:
--------------------------------------------------------------------------------
 1 | from tools.parse_arg_base import BaseOptions
 2 | 
 3 | 
 4 | class TrainOptions(BaseOptions):
 5 |     def initialize(self, parser):
 6 |         parser = BaseOptions.initialize(self, parser)
 7 |         parser.add_argument('--phase', type=str, default='train', help='Training flag')
 8 |         self.isTrain = True
 9 |         return parser
10 | 


--------------------------------------------------------------------------------
/tools/parse_arg_val.py:
--------------------------------------------------------------------------------
1 | from tools.parse_arg_base import BaseOptions
2 | 
3 | class ValOptions(BaseOptions):
4 |     def initialize(self, parser):
5 |         parser = BaseOptions.initialize(self, parser)
6 |         parser.add_argument('--phase', type=str, default='val', help='Validation flag')
7 |         return parser
8 | 


--------------------------------------------------------------------------------
/tools/test_kitti.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 7 | ROOT_DIR = os.path.dirname(BASE_DIR)
 8 | sys.path.append(BASE_DIR)
 9 | sys.path.append(ROOT_DIR)
10 | 
11 | from tools.parse_arg_test import TestOptions
12 | from data.load_dataset import CustomerDataLoader
13 | from lib.models.depth_normal_model import DepthNormal
14 | from lib.utils.net_tools import load_ckpt
15 | from lib.utils.evaluate_depth_error import evaluate_err
16 | from lib.utils.net_tools import save_images
17 | from lib.utils.logging import setup_logging, SmoothedValue
18 | logger = setup_logging(__name__)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     test_args = TestOptions().parse()
23 |     test_args.thread = 1   # test code only supports thread = 1
24 |     test_args.batchsize = 1  # test code only supports batchSize = 1
25 | 
26 |     data_loader = CustomerDataLoader(test_args)
27 |     test_datasize = len(data_loader)
28 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
29 |     # load model
30 |     model = DepthNormal()
31 |     # evaluate mode
32 |     model.eval()
33 | 
34 |     # load checkpoint
35 |     if test_args.load_ckpt:
36 |        load_ckpt(test_args, model)
37 |     model.cuda()
38 |     model = torch.nn.DataParallel(model)
39 | 
40 |     # test
41 |     smoothed_absRel = SmoothedValue(test_datasize)
42 |     smoothed_rms = SmoothedValue(test_datasize)
43 |     smoothed_logRms = SmoothedValue(test_datasize)
44 |     smoothed_squaRel = SmoothedValue(test_datasize)
45 |     smoothed_silog = SmoothedValue(test_datasize)
46 |     smoothed_silog2 = SmoothedValue(test_datasize)
47 |     smoothed_log10 = SmoothedValue(test_datasize)
48 |     smoothed_delta1 = SmoothedValue(test_datasize)
49 |     smoothed_delta2 = SmoothedValue(test_datasize)
50 |     smoothed_delta3 = SmoothedValue(test_datasize)
51 |     smoothed_criteria = {'err_absRel':smoothed_absRel, 'err_squaRel': smoothed_squaRel, 'err_rms': smoothed_rms,
52 |                          'err_silog': smoothed_silog, 'err_logRms': smoothed_logRms, 'err_silog2': smoothed_silog2,
53 |                          'err_delta1': smoothed_delta1, 'err_delta2': smoothed_delta2, 'err_delta3': smoothed_delta3,
54 |                          'err_log10': smoothed_log10}
55 | 
56 |     for i, data in enumerate(data_loader):
57 |         out = model.module.inference_kitti(data)
58 |         pred_depth = np.squeeze(out['b_fake'])
59 |         img_path = data['A_paths']
60 | 
61 |         if len(data['B_raw'].shape) != 2:
62 |             smoothed_criteria = evaluate_err(pred_depth, data['B_raw'], smoothed_criteria, scale=80.)
63 |             print('processing (%04d)-th image... %s' % (i, img_path))
64 |             print(smoothed_criteria['err_absRel'].GetGlobalAverageValue())
65 |         #save_images(data, pred_depth, scale=256.*80.)
66 | 
67 | 
68 |     if len(data['B_raw'].shape) != 2:
69 |         print("###############absREL ERROR: %f", smoothed_criteria['err_absRel'].GetGlobalAverageValue())
70 |         print("###############silog ERROR: %f", np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))
71 |         print("###############log10 ERROR: %f", smoothed_criteria['err_log10'].GetGlobalAverageValue())
72 |         print("###############RMS ERROR: %f", np.sqrt(smoothed_criteria['err_rms'].GetGlobalAverageValue()))
73 |         print("###############delta_1 ERROR: %f", smoothed_criteria['err_delta1'].GetGlobalAverageValue())
74 |         print("###############delta_2 ERROR: %f", smoothed_criteria['err_delta2'].GetGlobalAverageValue())
75 |         print("###############delta_3 ERROR: %f", smoothed_criteria['err_delta3'].GetGlobalAverageValue())
76 |         print("###############squaRel ERROR: %f", smoothed_criteria['err_squaRel'].GetGlobalAverageValue())
77 |         print("###############logRms ERROR: %f", np.sqrt(smoothed_criteria['err_logRms'].GetGlobalAverageValue()))
78 | 
79 | 


--------------------------------------------------------------------------------
/tools/train_kitti.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import math
  4 | import traceback
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | ROOT_DIR = os.path.dirname(BASE_DIR)
  8 | sys.path.append(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | from data.load_dataset import CustomerDataLoader
 12 | from lib.utils.training_stats import TrainingStats
 13 | from lib.utils.evaluate_depth_error import validate_err_kitti
 14 | from lib.models.depth_normal_model import *
 15 | from lib.core.config import cfg, train_args, val_args, merge_cfg_from_file
 16 | from lib.utils.net_tools import save_ckpt, load_ckpt
 17 | from lib.utils.logging import setup_logging, SmoothedValue
 18 | logger = setup_logging(__name__)
 19 | 
 20 | 
 21 | def train(train_dataloader, model, epoch, loss_func,
 22 |           optimizer, scheduler, training_stats, val_dataloader=None, val_err=[], ignore_step=-1):
 23 |     model.train()
 24 |     epoch_steps = math.ceil(len(train_dataloader) / cfg.TRAIN.BATCH_SIZE)
 25 |     base_steps = epoch_steps * epoch + ignore_step if ignore_step != -1 else epoch_steps * epoch
 26 |     for i, data in enumerate(train_dataloader):
 27 |         if ignore_step != -1 and i > epoch_steps - ignore_step:
 28 |             return
 29 |         scheduler.step()  # decay lr every iteration
 30 |         training_stats.IterTic()
 31 |         out = model(data)
 32 |         losses = loss_func.criterion(out['b_fake'], out['b_fake_nosoftmax'], data, epoch)
 33 |         optimizer.optim(losses)
 34 | 
 35 |         step = base_steps + i + 1
 36 |         training_stats.UpdateIterStats(losses)
 37 |         training_stats.IterToc()
 38 |         training_stats.LogIterStats(step, epoch, optimizer.optimizer, val_err[0])
 39 | 
 40 |         # validate the model
 41 |         if step % cfg.TRAIN.VAL_STEP == 0 and step != 0 and val_dataloader is not None:#
 42 |             model.eval()
 43 |             val_err[0] = val_kitti(val_dataloader, model)
 44 |             print("val_erro: {}".format(val_err[0]))
 45 |             # training mode
 46 |             model.train()
 47 |         # save checkpoint
 48 |         if step % cfg.TRAIN.SNAPSHOT_ITERS == 0 and step != 0:
 49 |             save_ckpt(train_args, step, epoch, model, optimizer.optimizer, scheduler, val_err[0])
 50 | 
 51 | 
 52 | def val_kitti(val_dataloader, model):
 53 |     """
 54 |     Validate the model.
 55 |     """
 56 |     smoothed_absRel = SmoothedValue(len(val_dataloader))
 57 |     smoothed_silog = SmoothedValue(len(val_dataloader))
 58 |     smoothed_silog2 = SmoothedValue(len(val_dataloader))
 59 |     smoothed_criteria = {'err_absRel': smoothed_absRel, 'err_silog': smoothed_silog, 'err_silog2': smoothed_silog2}
 60 |     for i, data in enumerate(val_dataloader):
 61 |         pred_depth = model.module.inference_kitti(data)
 62 |         smoothed_criteria = validate_err_kitti(pred_depth['b_fake'], data['B_raw'], smoothed_criteria)
 63 |         print(np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))
 64 |     val_metrics = {'abs_rel': smoothed_criteria['err_absRel'].GetGlobalAverageValue(),
 65 |             'silog': np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2)}
 66 |     print(val_metrics)
 67 |     return val_metrics
 68 | 
 69 | if __name__=='__main__':
 70 |     train_dataloader = CustomerDataLoader(train_args)
 71 |     train_datasize = len(train_dataloader)
 72 |     gpu_num = torch.cuda.device_count()
 73 |     merge_cfg_from_file(train_datasize, gpu_num)
 74 | 
 75 |     val_dataloader = CustomerDataLoader(val_args)
 76 |     val_datasize = len(val_dataloader)
 77 | 
 78 |     # tensorboard logger
 79 |     if train_args.use_tfboard:
 80 |         from tensorboardX import SummaryWriter
 81 |         tblogger = SummaryWriter(cfg.TRAIN.LOG_DIR)
 82 | 
 83 |     # training status for logging
 84 |     training_stats = TrainingStats(train_args, cfg.TRAIN.LOG_INTERVAL,
 85 |                                    tblogger if train_args.use_tfboard else None)
 86 | 
 87 |     # total iterations
 88 |     total_iters = math.ceil(train_datasize / train_args.batchsize) * train_args.epoch[-1]
 89 | 
 90 |     # load model
 91 |     model = DepthNormal()
 92 | 
 93 |     if gpu_num != -1:
 94 |         logger.info('{:>15}: {:<30}'.format('GPU_num', gpu_num))
 95 |         logger.info('{:>15}: {:<30}'.format('train_data_size', train_datasize))
 96 |         logger.info('{:>15}: {:<30}'.format('val_data_size', val_datasize))
 97 |         logger.info('{:>15}: {:<30}'.format('total_iterations', total_iters))
 98 |         model.cuda()
 99 |     #optimizer
100 |     optimizer = ModelOptimizer(model)
101 |     #loss function
102 |     loss_func = ModelLoss()
103 | 
104 |     val_err = [{'abs_rel': 0, 'silog': 0}]
105 | 
106 |     ignore_step = -1
107 | 
108 |     # Lerning strategy
109 |     lr_optim_lambda = lambda iter: (1.0 - iter / (float(total_iters))) ** 0.9
110 |     scheduler = torch.optim.lr_scheduler.LambdaLR(
111 |         optimizer.optimizer, lr_lambda=lr_optim_lambda)
112 | 
113 |     # load checkpoint
114 |     if train_args.load_ckpt:
115 |         load_ckpt(train_args, model, optimizer.optimizer, scheduler, val_err)
116 |         ignore_step = train_args.start_step - train_args.start_epoch * math.ceil(train_datasize / train_args.batchsize)
117 | 
118 |     if gpu_num != -1:
119 |         model = torch.nn.DataParallel(model)
120 |     try:
121 |         for epoch in range(train_args.start_epoch, cfg.TRAIN.EPOCH[-1]):
122 |             # training
123 |             train(train_dataloader, model, epoch, loss_func, optimizer, scheduler, training_stats,
124 |                   val_dataloader, val_err, ignore_step)
125 |             ignore_step = -1
126 | 
127 |     except (RuntimeError, KeyboardInterrupt):
128 |         logger.info('Save ckpt on exception ...')
129 |         stack_trace = traceback.format_exc()
130 |         print(stack_trace)
131 | 
132 |     finally:
133 |         if train_args.use_tfboard:
134 |             tblogger.close()
135 | 


--------------------------------------------------------------------------------
/tools/val_kitti.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 7 | ROOT_DIR = os.path.dirname(BASE_DIR)
 8 | sys.path.append(BASE_DIR)
 9 | sys.path.append(ROOT_DIR)
10 | 
11 | from tools.parse_arg_val import ValOptions
12 | from data.load_dataset import CustomerDataLoader
13 | from lib.models.depth_normal_model import DepthNormal
14 | from lib.utils.net_tools import load_ckpt
15 | from lib.utils.evaluate_depth_error import evaluate_err
16 | from lib.utils.net_tools import save_images
17 | from lib.utils.logging import setup_logging, SmoothedValue
18 | logger = setup_logging(__name__)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     test_args = ValOptions().parse()
23 |     test_args.thread = 1   # test code only supports thread = 1
24 |     test_args.batchsize = 1  # test code only supports batchSize = 1
25 | 
26 |     data_loader = CustomerDataLoader(test_args)
27 |     test_datasize = len(data_loader)
28 |     logger.info('{:>15}: {:<30}'.format('test_data_size', test_datasize))
29 |     # load model
30 |     model = DepthNormal()
31 |     # evaluate mode
32 |     model.eval()
33 | 
34 |     # load checkpoint
35 |     if test_args.load_ckpt:
36 |        load_ckpt(test_args, model)
37 |     model.cuda()
38 |     model = torch.nn.DataParallel(model)
39 | 
40 |     # test
41 |     smoothed_absRel = SmoothedValue(test_datasize)
42 |     smoothed_rms = SmoothedValue(test_datasize)
43 |     smoothed_logRms = SmoothedValue(test_datasize)
44 |     smoothed_squaRel = SmoothedValue(test_datasize)
45 |     smoothed_silog = SmoothedValue(test_datasize)
46 |     smoothed_silog2 = SmoothedValue(test_datasize)
47 |     smoothed_log10 = SmoothedValue(test_datasize)
48 |     smoothed_delta1 = SmoothedValue(test_datasize)
49 |     smoothed_delta2 = SmoothedValue(test_datasize)
50 |     smoothed_delta3 = SmoothedValue(test_datasize)
51 |     smoothed_criteria = {'err_absRel':smoothed_absRel, 'err_squaRel': smoothed_squaRel, 'err_rms': smoothed_rms,
52 |                          'err_silog': smoothed_silog, 'err_logRms': smoothed_logRms, 'err_silog2': smoothed_silog2,
53 |                          'err_delta1': smoothed_delta1, 'err_delta2': smoothed_delta2, 'err_delta3': smoothed_delta3,
54 |                          'err_log10': smoothed_log10}
55 | 
56 |     for i, data in enumerate(data_loader):
57 |         out = model.module.inference_kitti(data)
58 |         pred_depth = np.squeeze(out['b_fake'])
59 |         img_path = data['A_paths']
60 | 
61 |         if len(data['B_raw'].shape) != 2:
62 |             smoothed_criteria = evaluate_err(pred_depth, data['B_raw'], smoothed_criteria, scale=80.)
63 |             print('processing (%04d)-th image... %s' % (i, img_path))
64 |             print(smoothed_criteria['err_absRel'].GetGlobalAverageValue())
65 |         #save_images(data, pred_depth, scale=256.*80.)
66 | 
67 | 
68 |     if len(data['B_raw'].shape) != 2:
69 |         print("###############absREL ERROR: %f", smoothed_criteria['err_absRel'].GetGlobalAverageValue())
70 |         print("###############silog ERROR: %f", np.sqrt(smoothed_criteria['err_silog2'].GetGlobalAverageValue() - (smoothed_criteria['err_silog'].GetGlobalAverageValue())**2))
71 |         print("###############log10 ERROR: %f", smoothed_criteria['err_log10'].GetGlobalAverageValue())
72 |         print("###############RMS ERROR: %f", np.sqrt(smoothed_criteria['err_rms'].GetGlobalAverageValue()))
73 |         print("###############delta_1 ERROR: %f", smoothed_criteria['err_delta1'].GetGlobalAverageValue())
74 |         print("###############delta_2 ERROR: %f", smoothed_criteria['err_delta2'].GetGlobalAverageValue())
75 |         print("###############delta_3 ERROR: %f", smoothed_criteria['err_delta3'].GetGlobalAverageValue())
76 |         print("###############squaRel ERROR: %f", smoothed_criteria['err_squaRel'].GetGlobalAverageValue())
77 |         print("###############logRms ERROR: %f", np.sqrt(smoothed_criteria['err_logRms'].GetGlobalAverageValue()))
78 | 
79 | 


--------------------------------------------------------------------------------