├── Tracking ├── __init__.py ├── utils │ ├── __init__.py │ ├── eval_utils.py │ ├── train_utils.py │ └── vis_utils.py ├── datasets │ ├── __init__.py │ ├── front_dataset.py │ └── consec_graph_dataset.py ├── networks │ ├── __init__.py │ ├── edge_classifier.py │ ├── mlp.py │ ├── voxel_encoder.py │ └── mpn.py ├── inference.py ├── train.py ├── graph_cfg.py └── options.py ├── BlenderProc └── utils │ ├── __init__.py │ ├── libvoxelize │ ├── __init__.py │ ├── voxelize.pyx │ └── tribox2.h │ ├── libmesh │ ├── __init__.py │ ├── triangle_hash.pyx │ └── inside_mesh.py │ ├── voxels.py │ └── binvox_rw.py ├── Detection ├── roi_heads │ ├── __init__.py │ ├── voxel_head.py │ ├── roi_heads.py │ └── nocs_head.py ├── evaluator │ ├── mask.py │ └── _mask.pyx ├── inference │ ├── inference_metrics.py │ └── inference_utils.py ├── data │ ├── office_dataset.py │ └── mapper_heads.py ├── cfg_setup.py ├── train_net.py └── register_dataset.py ├── demo └── Teaser.png ├── .gitignore ├── baseconfig.py ├── Utility └── analyse_datset.py ├── README.md ├── PoseEst └── pose_utils.py └── environment.yml /Tracking/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BlenderProc/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BlenderProc/utils/libvoxelize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Tracking/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_utils import * 2 | -------------------------------------------------------------------------------- /Tracking/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .front_dataset import * 2 | from .graph_dataset import * 3 | -------------------------------------------------------------------------------- /Detection/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_heads import * 2 | from .roi_heads import VoxelNocsHeads 3 | -------------------------------------------------------------------------------- /demo/Teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DomiSchmauser/3D_MOT_Differentiable_Pose_Estimation/HEAD/demo/Teaser.png -------------------------------------------------------------------------------- /Tracking/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp import * 2 | from .voxel_encoder import * 3 | from .edge_classifier import * 4 | from .mpn import * 5 | -------------------------------------------------------------------------------- /BlenderProc/utils/libmesh/__init__.py: -------------------------------------------------------------------------------- 1 | from .inside_mesh import ( 2 | check_mesh_contains, MeshIntersector, TriangleIntersector2d 3 | ) 4 | 5 | 6 | __all__ = [ 7 | check_mesh_contains, MeshIntersector, TriangleIntersector2d 8 | ] 9 | -------------------------------------------------------------------------------- /Tracking/inference.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from options import Options 4 | import os 5 | import argparse 6 | 7 | 8 | # the directory that options.py resides in 9 | file_dir = os.path.dirname(__file__) 10 | 11 | options = Options() 12 | opts = options.parse() 13 | 14 | if opts.use_graph: 15 | from mpn_trainer import Trainer 16 | else: 17 | from trainer import Trainer 18 | 19 | if __name__ == "__main__": 20 | trainer = Trainer(opts) 21 | trainer.inference(vis_pose=False, classwise=True) 22 | -------------------------------------------------------------------------------- /Tracking/networks/edge_classifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | ''' 6 | Binary Classifier for classifying active/ non-active edges 7 | ''' 8 | 9 | class EdgeClassifier(nn.Module): 10 | 11 | def __init__(self, input_dim=32, intermed_dim=None): 12 | super(EdgeClassifier, self).__init__() 13 | if intermed_dim is None: 14 | self.fc1 = nn.Linear(input_dim, 16) 15 | self.fc2 = nn.Linear(16, 1) 16 | else: 17 | self.fc1 = nn.Linear(input_dim, intermed_dim) 18 | self.fc2 = nn.Linear(intermed_dim, 1) 19 | 20 | def forward(self, x): 21 | 22 | x = F.relu(self.fc1(x)) 23 | x = self.fc2(x) 24 | 25 | return x -------------------------------------------------------------------------------- /Detection/evaluator/mask.py: -------------------------------------------------------------------------------- 1 | import pycocotools._mask as _mask 2 | 3 | iou = _mask.iou 4 | merge = _mask.merge 5 | frPyObjects = _mask.frPyObjects 6 | 7 | def encode(bimask): 8 | if len(bimask.shape) == 3: 9 | return _mask.encode(bimask) 10 | elif len(bimask.shape) == 2: 11 | h, w = bimask.shape 12 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 13 | 14 | def decode(rleObjs): 15 | if type(rleObjs) == list: 16 | return _mask.decode(rleObjs) 17 | else: 18 | return _mask.decode([rleObjs])[:,:,0] 19 | 20 | def area(rleObjs): 21 | if type(rleObjs) == list: 22 | return _mask.area(rleObjs) 23 | else: 24 | return _mask.area([rleObjs])[0] 25 | 26 | def toBbox(rleObjs): 27 | if type(rleObjs) == list: 28 | return _mask.toBbox(rleObjs) 29 | else: 30 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /Tracking/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from options import Options 4 | import os, shutil,sys 5 | import argparse 6 | 7 | # the directory that options.py resides in 8 | file_dir = os.path.dirname(__file__) 9 | 10 | options = Options() 11 | opts = options.parse() 12 | 13 | if opts.use_graph: 14 | from mpn_trainer import Trainer 15 | else: 16 | from trainer import Trainer 17 | 18 | sys.path.append('..') #Hack add ROOT DIR 19 | from baseconfig import CONF 20 | 21 | if __name__ == "__main__": 22 | 23 | # Remove old files 24 | if os.path.exists(CONF.PATH.TRACKOUTPUT): 25 | print('Removing old outputs ...') 26 | shutil.rmtree(CONF.PATH.TRACKOUTPUT) 27 | os.mkdir(CONF.PATH.TRACKOUTPUT) 28 | 29 | trainer = Trainer(opts) 30 | if opts.precompute_feats: 31 | trainer.precompute() 32 | else: 33 | trainer.train() 34 | -------------------------------------------------------------------------------- /Tracking/graph_cfg.py: -------------------------------------------------------------------------------- 1 | # Graph Setup 2 | 3 | def init_graph_cfg(node_in_size=16): 4 | ''' 5 | Graph Neural Network setup 6 | ''' 7 | graph_cfg = { 8 | 'undirected_graph': True, 9 | 'use_time_aware_mp': False, 10 | 'use_leaky_relu': True, 11 | 'max_frame_dist': 5, 12 | 'num_mp_steps': 4, 13 | 'node_agg_fn': 'mean', 14 | 'reattach_initial_nodes': False, 15 | 'reattach_initial_edges': True, 16 | 'encoder_feats_dict': { 17 | 'edge_in_dim': 8, 18 | 'edge_fc_dims': [12], 19 | 'edge_out_dim': 12, 20 | 'node_out_dim': node_in_size, 21 | 'dropout_p': None, 22 | 'use_batchnorm': False, 23 | }, 24 | 'edge_model_feats_dict': { 25 | 'fc_dims': [32, 12], 26 | 'dropout_p': None, 27 | 'use_batchnorm': False, 28 | }, 29 | 'node_model_feats_dict': { 30 | 'fc_dims': [20, node_in_size], 31 | 'dropout_p': None, 32 | 'use_batchnorm': False, 33 | }, 34 | } 35 | return graph_cfg 36 | -------------------------------------------------------------------------------- /Tracking/networks/mlp.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class MLP(nn.Module): 5 | def __init__(self, input_dim, fc_dims, dropout_p=0.4, use_batchnorm=False, use_leaky_relu=True): 6 | super(MLP, self).__init__() 7 | 8 | if use_leaky_relu: 9 | self.activation = nn.LeakyReLU(inplace=True) 10 | else: 11 | self.activation = nn.ReLU(inplace=True) 12 | 13 | assert isinstance(fc_dims, (list, tuple)), 'fc_dims must be either a list or a tuple, but got {}'.format( 14 | type(fc_dims)) 15 | 16 | layers = [] 17 | for idx, dim in enumerate(fc_dims): 18 | layers.append(nn.Linear(input_dim, dim)) 19 | if use_batchnorm and dim != 1: 20 | layers.append(nn.BatchNorm1d(dim)) 21 | 22 | if dim != 1: 23 | layers.append(self.activation) 24 | 25 | if dropout_p is not None and dim != 1: 26 | layers.append(nn.Dropout(p=dropout_p)) 27 | 28 | input_dim = dim 29 | 30 | self.layers = nn.Sequential(*layers) 31 | 32 | def forward(self, input): 33 | output = self.layers(input) 34 | return output -------------------------------------------------------------------------------- /Tracking/networks/voxel_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class VoxelEncoder(nn.Module): 6 | 7 | ''' 3D-convolutional encoder network for voxel input. 8 | Args: 9 | dim (int): input dimension 32 x 32 x 32 10 | c_dim (int): output dimension 9 11 | ''' 12 | 13 | def __init__(self, input_channel=1, output_channel=9): 14 | super().__init__() 15 | self.relu = F.relu 16 | self.leaky_relu = F.leaky_relu 17 | 18 | self.conv_in = nn.Conv3d(input_channel, 8, 3, padding=1) # 1 x 32 x 32 x 32 19 | self.conv_0 = nn.Conv3d(8, 16, 3, padding=1, stride=2) 20 | self.conv_1 = nn.Conv3d(16, 32, 3, padding=1, stride=2) 21 | self.conv_2 = nn.Conv3d(32, 32, 3, padding=1, stride=2) 22 | self.fc = nn.Linear(32 * 4 * 4 * 4, 256) 23 | self.fc2 = nn.Linear(256, output_channel) 24 | 25 | def forward(self, x): 26 | batch_size = x.size(0) # x_shape = BS x in_channels x 32 x 32 x 32 27 | 28 | net = self.conv_in(x) 29 | #print('l1',net.shape) 30 | net = self.conv_0(self.relu(net)) 31 | #print('l2', net.shape) 32 | net = self.conv_1(self.relu(net)) 33 | #print('l3', net.shape) 34 | net = self.conv_2(self.relu(net)) 35 | #print('l4', net.shape) 36 | 37 | hidden = net.view(batch_size, 32 * 4 * 4 * 4) 38 | output = self.fc(self.leaky_relu(hidden)) 39 | output = self.fc2(self.leaky_relu(output)) # BS x out_dim 40 | 41 | 42 | return output -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | !BlenderProc/resources/front_3D/3D-FUTURE-model/ 2 | 3 | 4 | # Detection 5 | Detection/outputs 6 | Detection/outputs_v1 7 | Detection/outputs_v2 8 | Detection/outputs_v3 9 | Detection/outputs_v4 10 | Detection/predicted_data 11 | Detection/model 12 | Detection/front_dataset 13 | Detection/office_dataset 14 | Detection/office_dataset_all 15 | Detection/scannet 16 | Detection/__pycache__ 17 | 18 | # Junk 19 | .idea 20 | *__pychache__ 21 | 22 | 23 | # Blenderproc 24 | BlenderProc/blender 25 | BlenderProc/.idea/ 26 | BlenderProc/.vscode/ 27 | BlenderProc/output/ 28 | BlenderProc/output_good/ 29 | BlenderProc/debug/ 30 | BlenderProc/resources/cctextures 31 | BlenderProc/resources/scenenet/*zip 32 | BlenderProc/resources/scenenet/SceneNetData 33 | BlenderProc/resources/scenenet/texture* 34 | BlenderProc/resources/blenderkit 35 | BlenderProc/resources/IKEA 36 | BlenderProc/resources/pix3d 37 | BlenderProc/resources/front_3D/3D-FRONT 38 | BlenderProc/resources/front_3D/3D-FRONT-texture 39 | BlenderProc/resources/front_3D/3D-FUTURE-model 40 | BlenderProc/examples/front_3d_with_improved_mat/output 41 | BlenderProc/examples/front_3d/output 42 | BlenderProc/output 43 | *.blend1 44 | .vscode 45 | *.pyc 46 | *.swp 47 | *.so 48 | BlenderProc/docs/build/ 49 | BlenderProc/!docs/source/index.rst 50 | BlenderProc/docs/source/*rst 51 | BlenderProc/utils/libmesh/*.cpp 52 | BlenderProc/utils/libvoxelize/*.c 53 | BlenderProc/build/ 54 | 55 | Detection/outputs_1/ 56 | 57 | # Pix2Vox 58 | Pix2Vox 59 | Pix2Vox/instance/ 60 | Pix2Vox/target/ 61 | Pix2Vox/datasets/ 62 | Pix2Vox/output/ 63 | Pix2Vox/pretrained/ 64 | Pix2Vox/runs/ 65 | 66 | # Pose 67 | PoseEst/data/ 68 | 69 | # Tracking 70 | Tracking/output/ 71 | Tracking/output_v1/ 72 | Tracking/model/ 73 | 74 | # Utils 75 | Utility/buggy_scenes.txt 76 | Utility/voxel_scenes.txt 77 | 78 | # Backup 79 | Backup 80 | 81 | !BlenderProc/resources/front_3D/3D-FUTURE-model/*.json 82 | -------------------------------------------------------------------------------- /baseconfig.py: -------------------------------------------------------------------------------- 1 | import os 2 | from easydict import EasyDict 3 | 4 | CONF = EasyDict() 5 | CONF.PATH = EasyDict() 6 | 7 | # Base Folder 8 | CONF.PATH.BASE = os.path.abspath(os.path.dirname(__file__)) #Base Graph3DMOT path 9 | CONF.PATH.BPROC = os.path.join(CONF.PATH.BASE, "BlenderProc") 10 | CONF.PATH.DETECT = os.path.join(CONF.PATH.BASE, "Detection") 11 | CONF.PATH.PROJ = os.path.join(CONF.PATH.BASE, "PoseEst") 12 | CONF.PATH.TRACK = os.path.join(CONF.PATH.BASE, 'Tracking') 13 | 14 | # Front Data Generation 15 | CONF.PATH.FRONTDATA = os.path.join(CONF.PATH.BPROC, "resources/front_3D") 16 | CONF.PATH.FRONT3D = os.path.join(CONF.PATH.FRONTDATA, "3D-FRONT") 17 | CONF.PATH.FUTURE3D = os.path.join(CONF.PATH.FRONTDATA, "3D-FUTURE-model") 18 | CONF.PATH.FRONTTEXT = os.path.join(CONF.PATH.FRONTDATA, "3D-FRONT-texture") 19 | 20 | # Detection 21 | 22 | # MOTFront storage folder 23 | CONF.PATH.DETECTDATA = os.path.join(CONF.PATH.DETECT, 'front_dataset/') 24 | CONF.PATH.DETECTTRAIN = os.path.join(CONF.PATH.DETECTDATA, 'train') 25 | CONF.PATH.DETECTVAL = os.path.join(CONF.PATH.DETECTDATA, 'val') 26 | CONF.PATH.DETECTTEST = os.path.join(CONF.PATH.DETECTDATA, 'test') 27 | CONF.PATH.DETECTVIS = os.path.join(CONF.PATH.DETECTDATA, 'vis') 28 | CONF.PATH.VOXELDATA = os.path.join(CONF.PATH.DETECTDATA, 'voxel') # storage for binvox model folder 29 | # Pretrained Detection network folder 30 | CONF.PATH.DETECTMODEL = os.path.join(CONF.PATH.DETECT, 'model/') 31 | 32 | # Projection (for debugging) 33 | CONF.PATH.PROJDATA = os.path.join(CONF.PATH.PROJ, 'data') 34 | 35 | # Tracking (Data folder for seperate Tracking pipeline training) 36 | CONF.PATH.TRACKDATA = os.path.join(CONF.PATH.DETECT, 'predicted_data') 37 | 38 | # Outputs/ Logging 39 | CONF.PATH.DETECTOUTPUT = os.path.join(CONF.PATH.DETECT, 'outputs') 40 | CONF.PATH.BPROCOUTPUT = os.path.join(CONF.PATH.BPROC, 'output') 41 | CONF.PATH.TRACKOUTPUT = os.path.join(CONF.PATH.TRACK, 'output') 42 | 43 | -------------------------------------------------------------------------------- /Utility/analyse_datset.py: -------------------------------------------------------------------------------- 1 | import os, json, cv2, csv, sys 2 | import shutil 3 | sys.path.append('..') #Hack add ROOT DIR 4 | from baseconfig import CONF 5 | 6 | def get_dataset_info(img_path, combined=False): 7 | 8 | mapping_file = os.path.join(img_path[:-6], "3D_front_mapping.csv") 9 | _, csv_dict = read_csv_mapping(mapping_file) 10 | mapping_list, name_list = [], [] 11 | 12 | folders = os.listdir(img_path) 13 | bad_folder = [] 14 | img_count = 0 15 | 16 | for folder in folders: 17 | 18 | json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json") 19 | 20 | with open(json_file) as f: 21 | imgs_anns = json.load(f) 22 | 23 | for idx, v in enumerate(imgs_anns['images']): 24 | img_count += 1 25 | for anno in imgs_anns['annotations']: 26 | if anno['image_id'] == v['id']: 27 | cat_id = anno['category_id'] 28 | try: 29 | name = csv_dict[cat_id] 30 | except: 31 | bad_folder.append(folder) 32 | if not name in name_list: 33 | name_list.append(name) 34 | 35 | if cat_id in mapping_list: 36 | pass 37 | else: 38 | mapping_list.append(cat_id) 39 | for l in list(set(bad_folder)): 40 | print("remove folder", os.path.join(CONF.PATH.DETECTTRAIN, l)) 41 | shutil.rmtree(os.path.join(CONF.PATH.DETECTTRAIN, l), ignore_errors=True) 42 | 43 | if combined: 44 | return mapping_list, name_list, img_count 45 | else: 46 | return mapping_list, name_list 47 | 48 | def read_csv_mapping(path): 49 | """ Loads an idset mapping from a csv file, assuming the rows are sorted by their ids. 50 | :param path: Path to csv file 51 | """ 52 | 53 | with open(path, 'r') as csvfile: 54 | reader = csv.DictReader(csvfile) 55 | new_id_label_map = [] 56 | new_label_id_map = {} 57 | 58 | for row in reader: 59 | new_id_label_map.append(row["name"]) 60 | new_label_id_map[int(row["id"])] = row["name"] 61 | 62 | return new_id_label_map, new_label_id_map 63 | -------------------------------------------------------------------------------- /Tracking/utils/eval_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | import open3d as o3d 5 | import mathutils 6 | 7 | from sklearn.metrics import recall_score 8 | from sklearn.metrics import precision_score 9 | from sklearn.metrics import f1_score 10 | 11 | from Tracking.utils.train_utils import convert_voxel_to_pc 12 | 13 | 14 | def get_precision(predictions, targets): 15 | 16 | # Binarize predictions 17 | predictions[predictions >= 0.5] = 1 18 | predictions[predictions < 0.5] = 0 19 | 20 | precision = precision_score(targets, predictions, zero_division=0) 21 | return precision 22 | 23 | def get_recall(predictions, targets): 24 | 25 | # Binarize predictions 26 | predictions[predictions >= 0.5] = 1 27 | predictions[predictions < 0.5] = 0 28 | 29 | recall = recall_score(targets, predictions, zero_division=0) 30 | return recall 31 | 32 | def get_f1(predictions, targets): 33 | 34 | # Binarize predictions 35 | predictions[predictions >= 0.5] = 1 36 | predictions[predictions < 0.5] = 0 37 | 38 | f1 = f1_score(targets, predictions, zero_division='warn') # warn only once 39 | return f1 40 | 41 | def get_MOTA(predictions, targets, gt_objects, misses, fps): 42 | ''' 43 | Full val/test set MOTA calculations 44 | MOTA score: 1 - num_misses + false positives + id_switches / total num_objects in all frames 45 | false_positives: Predicted 3D BBOX does not overlap with any GT 3D BBOX more than a threshold e.g. 0.2 IoU 46 | num_misses: For a GT 3D BBOX there exist no predicted 3D BBOX which overlaps more than min threshold, or less pred than gt objects 47 | id_switches: GT trajectory and predicted trajectory have do not match in object identities, predicted active/nonactive edge incorrect 48 | ''' 49 | 50 | # Binarize predictions 51 | predictions[predictions >= 0.5] = 1 52 | predictions[predictions < 0.5] = 0 53 | 54 | id_switches = np.count_nonzero(targets - predictions) 55 | MOTA = 1.0 - (float(misses + fps + id_switches) / float(gt_objects)) 56 | 57 | return MOTA, id_switches 58 | 59 | def get_mota_df(num_gt_objs, num_misses, num_fps, num_switches): 60 | ''' 61 | Calculates a mota score over all frames seen 62 | ''' 63 | mota = 1.0 - (float(num_misses + num_fps + num_switches) / float(num_gt_objs)) 64 | return mota 65 | 66 | 67 | -------------------------------------------------------------------------------- /BlenderProc/utils/libvoxelize/voxelize.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | from libc.math cimport floor, ceil 3 | from cython.view cimport array as cvarray 4 | 5 | cdef extern from "tribox2.h": 6 | int triBoxOverlap(float boxcenter[3], float boxhalfsize[3], 7 | float tri0[3], float tri1[3], float tri2[3]) 8 | 9 | 10 | @cython.boundscheck(False) # Deactivate bounds checking 11 | @cython.wraparound(False) # Deactivate negative indexing. 12 | cpdef int voxelize_mesh_(bint[:, :, :] occ, float[:, :, ::1] faces): 13 | assert(faces.shape[1] == 3) 14 | assert(faces.shape[2] == 3) 15 | 16 | n_faces = faces.shape[0] 17 | cdef int i 18 | for i in range(n_faces): 19 | voxelize_triangle_(occ, faces[i]) 20 | 21 | 22 | @cython.boundscheck(False) # Deactivate bounds checking 23 | @cython.wraparound(False) # Deactivate negative indexing. 24 | cpdef int voxelize_triangle_(bint[:, :, :] occupancies, float[:, ::1] triverts): 25 | cdef int bbox_min[3] 26 | cdef int bbox_max[3] 27 | cdef int i, j, k 28 | cdef float boxhalfsize[3] 29 | cdef float boxcenter[3] 30 | cdef bint intersection 31 | 32 | boxhalfsize[:] = (0.5, 0.5, 0.5) 33 | 34 | for i in range(3): 35 | bbox_min[i] = ( 36 | min(triverts[0, i], triverts[1, i], triverts[2, i]) 37 | ) 38 | bbox_min[i] = min(max(bbox_min[i], 0), occupancies.shape[i] - 1) 39 | 40 | for i in range(3): 41 | bbox_max[i] = ( 42 | max(triverts[0, i], triverts[1, i], triverts[2, i]) 43 | ) 44 | bbox_max[i] = min(max(bbox_max[i], 0), occupancies.shape[i] - 1) 45 | 46 | for i in range(bbox_min[0], bbox_max[0] + 1): 47 | for j in range(bbox_min[1], bbox_max[1] + 1): 48 | for k in range(bbox_min[2], bbox_max[2] + 1): 49 | boxcenter[:] = (i + 0.5, j + 0.5, k + 0.5) 50 | intersection = triBoxOverlap(&boxcenter[0], &boxhalfsize[0], 51 | &triverts[0, 0], &triverts[1, 0], &triverts[2, 0]) 52 | occupancies[i, j, k] |= intersection 53 | 54 | 55 | @cython.boundscheck(False) # Deactivate bounds checking 56 | @cython.wraparound(False) # Deactivate negative indexing. 57 | cdef int test_triangle_aabb(float[::1] boxcenter, float[::1] boxhalfsize, float[:, ::1] triverts): 58 | assert(boxcenter.shape[0] == 3) 59 | assert(boxhalfsize.shape[0] == 3) 60 | assert(triverts.shape[0] == triverts.shape[1] == 3) 61 | 62 | # print(triverts) 63 | # Call functions 64 | cdef int result = triBoxOverlap(&boxcenter[0], &boxhalfsize[0], 65 | &triverts[0, 0], &triverts[1, 0], &triverts[2, 0]) 66 | return result 67 | -------------------------------------------------------------------------------- /BlenderProc/utils/libmesh/triangle_hash.pyx: -------------------------------------------------------------------------------- 1 | 2 | # distutils: language=c++ 3 | import numpy as np 4 | cimport numpy as np 5 | cimport cython 6 | from libcpp.vector cimport vector 7 | from libc.math cimport floor, ceil 8 | 9 | cdef class TriangleHash: 10 | cdef vector[vector[int]] spatial_hash 11 | cdef int resolution 12 | 13 | def __cinit__(self, double[:, :, :] triangles, int resolution): 14 | self.spatial_hash.resize(resolution * resolution) 15 | self.resolution = resolution 16 | self._build_hash(triangles) 17 | 18 | @cython.boundscheck(False) # Deactivate bounds checking 19 | @cython.wraparound(False) # Deactivate negative indexing. 20 | cdef int _build_hash(self, double[:, :, :] triangles): 21 | assert(triangles.shape[1] == 3) 22 | assert(triangles.shape[2] == 2) 23 | 24 | cdef int n_tri = triangles.shape[0] 25 | cdef int bbox_min[2] 26 | cdef int bbox_max[2] 27 | 28 | cdef int i_tri, j, x, y 29 | cdef int spatial_idx 30 | 31 | for i_tri in range(n_tri): 32 | # Compute bounding box 33 | for j in range(2): 34 | bbox_min[j] = min( 35 | triangles[i_tri, 0, j], triangles[i_tri, 1, j], triangles[i_tri, 2, j] 36 | ) 37 | bbox_max[j] = max( 38 | triangles[i_tri, 0, j], triangles[i_tri, 1, j], triangles[i_tri, 2, j] 39 | ) 40 | bbox_min[j] = min(max(bbox_min[j], 0), self.resolution - 1) 41 | bbox_max[j] = min(max(bbox_max[j], 0), self.resolution - 1) 42 | 43 | # Find all voxels where bounding box intersects 44 | for x in range(bbox_min[0], bbox_max[0] + 1): 45 | for y in range(bbox_min[1], bbox_max[1] + 1): 46 | spatial_idx = self.resolution * x + y 47 | self.spatial_hash[spatial_idx].push_back(i_tri) 48 | 49 | @cython.boundscheck(False) # Deactivate bounds checking 50 | @cython.wraparound(False) # Deactivate negative indexing. 51 | cpdef query(self, double[:, :] points): 52 | assert(points.shape[1] == 2) 53 | cdef int n_points = points.shape[0] 54 | 55 | cdef vector[int] points_indices 56 | cdef vector[int] tri_indices 57 | # cdef int[:] points_indices_np 58 | # cdef int[:] tri_indices_np 59 | 60 | cdef int i_point, k, x, y 61 | cdef int spatial_idx 62 | 63 | for i_point in range(n_points): 64 | x = int(points[i_point, 0]) 65 | y = int(points[i_point, 1]) 66 | if not (0 <= x < self.resolution and 0 <= y < self.resolution): 67 | continue 68 | 69 | spatial_idx = self.resolution * x + y 70 | for i_tri in self.spatial_hash[spatial_idx]: 71 | points_indices.push_back(i_point) 72 | tri_indices.push_back(i_tri) 73 | 74 | points_indices_np = np.zeros(points_indices.size(), dtype=np.int32) 75 | tri_indices_np = np.zeros(tri_indices.size(), dtype=np.int32) 76 | 77 | cdef int[:] points_indices_view = points_indices_np 78 | cdef int[:] tri_indices_view = tri_indices_np 79 | 80 | for k in range(points_indices.size()): 81 | points_indices_view[k] = points_indices[k] 82 | 83 | for k in range(tri_indices.size()): 84 | tri_indices_view[k] = tri_indices[k] 85 | 86 | return points_indices_np, tri_indices_np 87 | -------------------------------------------------------------------------------- /Detection/inference/inference_metrics.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import torch 5 | import mathutils 6 | import math 7 | import open3d as o3d 8 | from eulerangles import euler2matrix 9 | 10 | 11 | def compute_voxel_iou(generated_volume, ground_truth_volume): 12 | ''' 13 | 3D voxel IoU between two voxel grids 14 | ''' 15 | 16 | _volume = torch.ge(generated_volume, 0.5).float() 17 | intersection = torch.sum(_volume.mul(ground_truth_volume)).float() 18 | union = torch.sum(torch.ge(_volume.add(ground_truth_volume), 1)).float() # if _volume+ gt_volume >= 1 is union 19 | voxel_iou = (intersection / union).detach().cpu().item() 20 | 21 | return voxel_iou 22 | 23 | def get_rotation_diff(gt_rotation, pred_rotation): 24 | ''' 25 | gt_rotation: as euler coordinates xyz in radiants 26 | pred_rotation: as rotation matrix 27 | cls_name: indicating rotation symmetry 28 | calculate rotations difference between gt and predicted rotation matrix, min for two poses in y rotated by 180 degree, distinc 29 | ''' 30 | 31 | euler = mathutils.Euler(gt_rotation) 32 | gt_rotation = np.array(euler.to_matrix()) 33 | 34 | R1 = pred_rotation / np.cbrt(np.linalg.det(pred_rotation)) # R1 = pred 35 | R2 = gt_rotation / np.cbrt(np.linalg.det(gt_rotation)) # R2 = GT 36 | 37 | y_180_RT = np.diag([-1.0, 1.0, -1.0]) 38 | R = R1 @ R2.transpose() 39 | 40 | R_rot = R1 @ y_180_RT @ R2.transpose() 41 | theta = min(np.arccos((np.trace(R) - 1) / 2), 42 | np.arccos((np.trace(R_rot) - 1) / 2)) 43 | 44 | theta_deg = theta * 180 / np.pi 45 | 46 | return theta_deg 47 | 48 | def get_location_diff(gt_location, pred_location): 49 | ''' 50 | Calculate location difference of predicted pose in meter 51 | gt_location: xyz location in world coords 52 | pred_location: pred xyz location 53 | ''' 54 | 55 | dist = np.linalg.norm(gt_location - pred_location) 56 | 57 | return dist 58 | 59 | 60 | def get_location_diff_boxcenter(gt_3dbox, pred_3dbox): 61 | ''' 62 | Calculate location difference of predicted pose in meter based on bounding box centers 63 | gt_3dbox: 8x3 array 64 | pred_3dbox: 8x3 array 65 | ''' 66 | 67 | gtloc_box = o3d.geometry.OrientedBoundingBox() 68 | 69 | if gt_3dbox.sum() == 0: 70 | return None 71 | try: 72 | gtloc_box = gtloc_box.create_from_points(o3d.utility.Vector3dVector(gt_3dbox)) 73 | except: 74 | return None 75 | 76 | center_gtbox = gtloc_box.get_center() 77 | 78 | predloc_box = o3d.geometry.OrientedBoundingBox() 79 | try: 80 | predloc_box = predloc_box.create_from_points(o3d.utility.Vector3dVector(pred_3dbox)) 81 | except: 82 | return None 83 | center_predbox = predloc_box.get_center() 84 | 85 | dist = np.linalg.norm(center_gtbox - center_predbox) 86 | #print('Box location center', center_gtbox, center_predbox) 87 | 88 | return dist 89 | 90 | def get_mean_iou(voxel_list): 91 | 92 | if voxel_list: 93 | voxel_arr = np.array(voxel_list) 94 | voxel_arr = voxel_arr[~np.isnan(voxel_arr)] 95 | voxel_arr = voxel_arr.mean() 96 | else: 97 | voxel_arr = 'No Data' 98 | 99 | return voxel_arr 100 | 101 | def get_median_iou(voxel_list): 102 | 103 | if voxel_list: 104 | voxel_arr = np.array(voxel_list) 105 | voxel_arr = voxel_arr[~np.isnan(voxel_arr)] 106 | voxel_arr = np.median(voxel_arr) 107 | else: 108 | voxel_arr = 'No Data' 109 | 110 | return voxel_arr -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # 3D Multi-Object Tracking with Differentiable Pose Estimation 3 | 4 |

5 | Our network leverages a 2D detection backbone with additional NOC prediction and 3D reconstruction heads to predict per-object dense correspondences maps and 7-DoF pose parameters. We leverage those correspondences in our neural message passing based, fully end-to-end learnable network to model dependencies between objects over time for consistent multi-object tracking 6 | 7 | 8 | ## Introduction 9 | This a PyTorch implementation of our work "3D Multi-Object Tracking with Differentiable Pose Estimation". 10 | In this project, we present a novel framework for 3D Multi-Object Tracking in indoor scenes. 11 | 12 | Please check out the project website [3D_MOT](https://domischmauser.github.io/3D_MOT/). 13 | 14 | ## Dataset 15 | To download our novel 3D MOT dataset **MOTFront** consisting of 2381 unique indoor sequences, check out [Dataset](https://domischmauser.github.io/3D_MOT/). 16 | Or use the direct download link [MOTFront](http://kaldir.vc.in.tum.de/dominik/MOTFront.zip). 17 | 18 | ## Setup 19 | To install network dependencies refer to **environment.yaml**. 20 | We tested our code on a Linux / Ubuntu distribution. 21 | 22 | ## Paths 23 | Refer to the **baseconfig.py** file for a general path setup. 24 | Store pre-trained Detection networks in the **Detection/model** directory and pre-trained Tracking networks in the **Tracking/model** directory. 25 | Store MOTFront data in the **Detection/front_dataset** directory with subdirectories **train**, **val**, **test**. 26 | Store the **3D_front_mapping.csv** file in the **Detection/front_dataset** directory. 27 | 28 | 29 | ## Directories 30 | We split the code into two main blocks, represented with two folders: **Detection** and **Tracking**. 31 | 32 | Configurations for training the detection or end-to-end network can be set in the **cfg_setup.py** file. 33 | Configurations for training the tracking network can be set in the **options.py** and **graph_cfg.py** file. 34 | 35 | ## Basic Usage 36 | For training the 3D reconstruction and pose estimation pipeline independently, run the command: 37 | ``` 38 | python train_net.py 39 | ``` 40 | 41 | For training the tracking pipeline independently, which: 42 | - Requires inference results from the 3D reconstruction and pose estimation pipeline in a hdf5 format 43 | 44 | Run the command: 45 | ``` 46 | python train.py 47 | ``` 48 | 49 | For training our end-to-end network, set **eval_only = False** and run the command: 50 | ``` 51 | python train_combined.py 52 | ``` 53 | 54 | For inference on the 3D reconstruction and pose estimation pipeline, which: 55 | - Loads a pretrained network (best_model.pth) from the model folder 56 | - Stores inference results in the predicted_data folder in a hdf5 format 57 | 58 | Run the command: 59 | ``` 60 | python inference_detector.py 61 | ``` 62 | 63 | For inference on our tracking pipeline, which: 64 | - Loads a pretrained network (pretrained/edge_classifier.pth etc...) from the model folder 65 | - Ensure to set --use_graph if you are using a pretrained network with graph 66 | 67 | Run the command: 68 | ``` 69 | python inference.py 70 | ``` 71 | 72 | For inference on our end-to-end network, set the variables **eval_first = True** and **eval_only = True**: 73 | ``` 74 | python train_combined.py 75 | ``` 76 | 77 | 78 | 79 | ## Citation 80 | 81 | If you use the MOTFront data or code in your work, please kindly cite our work and our paper: 82 | 83 | ```bibtex 84 | @misc{https://doi.org/10.48550/arxiv.2206.13785, 85 | doi = {10.48550/ARXIV.2206.13785}, 86 | url = {https://arxiv.org/abs/2206.13785}, 87 | author = {Schmauser, Dominik and Qiu, Zeju and Müller, Norman and Nießner, Matthias}, 88 | keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, 89 | title = {3D Multi-Object Tracking with Differentiable Pose Estimation}, 90 | publisher = {arXiv}, 91 | year = {2022}, 92 | copyright = {Creative Commons Attribution 4.0 International} 93 | } 94 | ``` 95 | 96 | 97 | -------------------------------------------------------------------------------- /Detection/data/office_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import cv2 4 | import numpy as np 5 | 6 | from torch.utils.data import Dataset 7 | 8 | sys.path.append('..') #Hack add ROOT DIR 9 | from baseconfig import CONF 10 | 11 | class Office_dataset(Dataset): 12 | ''' 13 | Office Real-World dataset data loader class 14 | ''' 15 | def __init__(self, base_dir, split='infer'): 16 | self.split = split 17 | self.data_dir = base_dir 18 | self.scenes = [f for f in os.listdir(os.path.abspath(self.data_dir))] 19 | self.scenes.sort() 20 | self.imgs = [] 21 | for scene in self.scenes: 22 | scene_path = os.path.join(self.data_dir, scene, 'rgb') 23 | scene_imgs = [os.path.join(scene_path, img) for img in os.listdir(scene_path)] 24 | scene_imgs.sort() 25 | self.imgs += scene_imgs 26 | self.mask_person = False 27 | self.resize_img = True 28 | 29 | def __len__(self): 30 | return len(self.imgs) 31 | 32 | def __getitem__(self, idx): 33 | 34 | img_dict = dict() 35 | 36 | img_path = self.imgs[idx] 37 | seq_path = img_path[:img_path.find('rgb')] 38 | img_name = img_path[img_path.find('rgb')+4:] 39 | depth_path = os.path.join(seq_path, 'depth', img_name) 40 | densepose_path = os.path.join(seq_path, 'denseposes', img_name) 41 | 42 | # RGB 43 | rgb_img = self.load_rgb(img_path, fmt='bgr') #todo needs loading as bgr 44 | rgb_img_fs = rgb_img 45 | 46 | # Depth 47 | depth_img = self.load_depth(depth_path) 48 | depth_img_fs = depth_img 49 | 50 | # Densepose 51 | densepose_mask = self.load_depth(densepose_path) 52 | bin_mask = (densepose_mask == 0.0).astype(int) # BG 1, Person 0 53 | bin_mask = np.expand_dims(bin_mask, axis=-1).repeat(3, axis=-1) 54 | 55 | if self.mask_person: 56 | rgb_img *= bin_mask 57 | #rgb_img[rgb_img == 0] = 255 58 | 59 | if self.resize_img: 60 | rgb_img = cv2.resize(rgb_img, dsize=(320, 240), interpolation=cv2.INTER_LINEAR) 61 | depth_img = cv2.resize(depth_img, dsize=(320, 240), interpolation=cv2.INTER_LINEAR) 62 | 63 | # Camera calibration 64 | calibration = os.path.join(seq_path, 'calibration.txt') 65 | with open(calibration) as f: 66 | tmp = f.readlines() 67 | 68 | calibration_list = tmp[0].split() 69 | fx, fy = float(calibration_list[0]), float(calibration_list[1]) 70 | cx, cy = float(calibration_list[2]), float(calibration_list[3]) 71 | camera_intrinsics_fs = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) #fullsized 72 | if self.resize_img: 73 | fx *= 0.5 74 | fy *= 0.5 75 | cx *= 0.5 76 | cy *= 0.5 77 | camera_intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 78 | 79 | # Output 80 | img_dict['seq_id'] = seq_path 81 | img_dict['img_id'] = img_name 82 | img_dict['rgb'] = rgb_img 83 | img_dict['rgb_fs'] = rgb_img_fs 84 | img_dict['depth'] = depth_img 85 | img_dict['depth_fs'] = depth_img_fs 86 | img_dict['densepose'] = densepose_mask 87 | img_dict['camera_intrinsics'] = camera_intrinsics 88 | img_dict['camera_intrinsics_fs'] = camera_intrinsics_fs 89 | 90 | return img_dict 91 | 92 | def load_rgb(self, rgb_path, fmt='bgr'): 93 | ''' 94 | Loads a rgb image from a png file 95 | Detectron uses BGR! 96 | ''' 97 | bgr_img = cv2.imread(rgb_path) 98 | if fmt == 'rgb': 99 | rgb_img = bgr_img[:, :, ::-1] 100 | elif fmt == 'bgr': 101 | rgb_img = bgr_img 102 | rgb_img = np.array(rgb_img, dtype=np.float32) 103 | 104 | return rgb_img 105 | 106 | def load_depth(self, depth_path): 107 | ''' 108 | Loads a depth image or a densepose image from a png file 109 | ''' 110 | depth_img = cv2.imread(depth_path) 111 | depth_img = np.array(depth_img[:,:,0], dtype=np.float32) #all channels equal only use first 112 | 113 | return depth_img -------------------------------------------------------------------------------- /PoseEst/pose_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | 5 | def evaluateModel(OutTransform, SourceHom, TargetHom, PassThreshold): 6 | 7 | Diff = TargetHom - np.matmul(OutTransform, SourceHom) 8 | ResidualVec = np.linalg.norm(Diff[:3, :], axis=0) 9 | Residual = np.linalg.norm(ResidualVec) 10 | InlierIdx = np.where(ResidualVec < PassThreshold) 11 | nInliers = np.count_nonzero(InlierIdx) 12 | InlierRatio = nInliers / SourceHom.shape[1] 13 | 14 | return Residual, InlierRatio, InlierIdx[0] 15 | 16 | def estimateSimilarityUmeyama(SourceHom, TargetHom): 17 | ''' 18 | Procrustes analysis for pose fitting 19 | SourceHom: Pointcloud from NOCS map 20 | TargetHom: Depth pointcloud equals GT 21 | ''' 22 | 23 | SourceCentroid = np.mean(SourceHom[:3, :], axis=1) 24 | TargetCentroid = np.mean(TargetHom[:3, :], axis=1) 25 | nPoints = SourceHom.shape[1] 26 | 27 | CenteredSource = SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose() 28 | CenteredTarget = TargetHom[:3, :] - np.tile(TargetCentroid, (nPoints, 1)).transpose() 29 | 30 | CovMatrix = np.matmul(CenteredTarget, np.transpose(CenteredSource)) / nPoints 31 | 32 | if np.isnan(CovMatrix).any(): 33 | print('nPoints:', nPoints) 34 | print(SourceHom.shape) 35 | print(TargetHom.shape) 36 | raise RuntimeError('There are NANs in the input.') 37 | 38 | U, D, Vh = np.linalg.svd(CovMatrix, full_matrices=True) 39 | d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0 40 | if d: 41 | D[-1] = -D[-1] 42 | U[:, -1] = -U[:, -1] 43 | 44 | Rotation = np.matmul(U, Vh).T 45 | 46 | varP = np.var(SourceHom[:3, :], axis=1).sum() 47 | if varP * np.sum(D) != 0: 48 | ScaleFact = 1/varP * np.sum(D) # scale factor 49 | else: 50 | ScaleFact = 1 # scale factor set to 1 since otherwise division by 0 51 | 52 | Scales = np.array([ScaleFact, ScaleFact, ScaleFact]) 53 | ScaleMatrix = np.diag(Scales) 54 | 55 | Translation = TargetHom[:3, :].mean(axis=1) - SourceHom[:3, :].mean(axis=1).dot(ScaleFact*Rotation) 56 | 57 | OutTransform = np.identity(4) 58 | OutTransform[:3, :3] = ScaleMatrix @ Rotation # todo check if T is correct 59 | OutTransform[:3, 3] = Translation 60 | 61 | return Scales, Rotation, Translation, OutTransform 62 | 63 | def getRANSACInliers(SourceHom, TargetHom, MaxIterations=100, PassThreshold=200, StopThreshold=1): 64 | ''' 65 | RANSAC Outlier Removal 66 | ''' 67 | 68 | BestResidual = 1e10 69 | BestInlierRatio = 0 70 | BestInlierIdx = np.arange(SourceHom.shape[1]) 71 | for i in range(0, MaxIterations): 72 | # Pick 10 random (but corresponding) points from source and target 73 | RandIdx = np.random.randint(SourceHom.shape[1], size=10) 74 | _, _, _, OutTransform = estimateSimilarityUmeyama(SourceHom[:, RandIdx], TargetHom[:, RandIdx]) 75 | Residual, InlierRatio, InlierIdx = evaluateModel(OutTransform, SourceHom, TargetHom, PassThreshold) 76 | if Residual < BestResidual: 77 | BestResidual = Residual 78 | BestInlierRatio = InlierRatio 79 | BestInlierIdx = InlierIdx 80 | if BestResidual < StopThreshold: 81 | break 82 | 83 | return SourceHom[:, BestInlierIdx], TargetHom[:, BestInlierIdx], BestInlierRatio 84 | 85 | 86 | def estimateSimilarityTransform(source: np.array, target: np.array, verbose=False, ratio_adapt = 1): 87 | SourceHom = np.transpose(np.hstack([source, np.ones([source.shape[0], 1])])) 88 | TargetHom = np.transpose(np.hstack([target, np.ones([source.shape[0], 1])])) 89 | 90 | # Auto-parameter selection based on source-target heuristics 91 | TargetNorm = np.mean(np.linalg.norm(target, axis=1)) 92 | SourceNorm = np.mean(np.linalg.norm(source, axis=1)) 93 | RatioTS = (TargetNorm / SourceNorm) 94 | RatioST = (SourceNorm / TargetNorm) 95 | PassT = RatioST*ratio_adapt if(RatioST>RatioTS) else RatioTS*ratio_adapt 96 | StopT = PassT / 100 97 | nIter = 100 98 | if verbose: 99 | print('Pass threshold: ', PassT) 100 | print('Stop threshold: ', StopT) 101 | print('Number of iterations: ', nIter) 102 | 103 | SourceInliersHom, TargetInliersHom, BestInlierRatio = getRANSACInliers(SourceHom, TargetHom, MaxIterations=nIter, PassThreshold=PassT, StopThreshold=StopT) 104 | 105 | if(BestInlierRatio < 0.1): 106 | print('[ WARN ] - Something is wrong. Small BestInlierRatio: ', BestInlierRatio) 107 | return None, None, None, None 108 | 109 | Scales, Rotation, Translation, OutTransform = estimateSimilarityUmeyama(SourceInliersHom, TargetInliersHom) 110 | 111 | if verbose: 112 | print('BestInlierRatio:', BestInlierRatio) 113 | print('Rotation:\n', Rotation) 114 | print('Translation:\n', Translation) 115 | print('Scales:', Scales) 116 | 117 | return Scales, Rotation, Translation, OutTransform 118 | -------------------------------------------------------------------------------- /BlenderProc/utils/voxels.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy import ndimage 4 | # from skimage.measure import block_reduce 5 | from utils.libvoxelize.voxelize import voxelize_mesh_ 6 | from utils.libmesh import check_mesh_contains 7 | 8 | 9 | class VoxelGrid: 10 | def __init__(self, data, loc=(0., 0., 0.), scale=1): 11 | assert(data.shape[0] == data.shape[1] == data.shape[2]) 12 | data = np.asarray(data, dtype=np.bool) 13 | loc = np.asarray(loc) 14 | self.data = data 15 | self.loc = loc 16 | self.scale = scale 17 | 18 | @property 19 | def resolution(self): 20 | assert(self.data.shape[0] == self.data.shape[1] == self.data.shape[2]) 21 | return self.data.shape[0] 22 | 23 | def contains(self, points): 24 | nx = self.resolution 25 | 26 | # Rescale bounding box to [-0.5, 0.5]^3 27 | points = (points - self.loc) / self.scale 28 | # Discretize points to [0, nx-1]^3 29 | points_i = ((points + 0.5) * nx).astype(np.int32) 30 | # i1, i2, i3 have sizes (batch_size, T) 31 | i1, i2, i3 = points_i[..., 0], points_i[..., 1], points_i[..., 2] 32 | # Only use indices inside bounding box 33 | mask = ( 34 | (i1 >= 0) & (i2 >= 0) & (i3 >= 0) 35 | & (nx > i1) & (nx > i2) & (nx > i3) 36 | ) 37 | # Prevent out of bounds error 38 | i1 = i1[mask] 39 | i2 = i2[mask] 40 | i3 = i3[mask] 41 | 42 | # Compute values, default value outside box is 0 43 | occ = np.zeros(points.shape[:-1], dtype=np.bool) 44 | occ[mask] = self.data[i1, i2, i3] 45 | 46 | return occ 47 | 48 | 49 | def voxelize_ray(mesh, resolution): 50 | occ_surface = voxelize_surface(mesh, resolution) 51 | # TODO: use surface voxels here? 52 | occ_interior = voxelize_interior(mesh, resolution) 53 | occ = (occ_interior | occ_surface) 54 | return occ 55 | 56 | 57 | def voxelize_fill(mesh, resolution): 58 | bounds = mesh.bounds 59 | if (np.abs(bounds) >= 0.5).any(): 60 | raise ValueError('voxelize fill is only supported if mesh is inside [-0.5, 0.5]^3/') 61 | 62 | occ = voxelize_surface(mesh, resolution) 63 | occ = ndimage.morphology.binary_fill_holes(occ) 64 | return occ 65 | 66 | 67 | def voxelize_surface(mesh, resolution): 68 | vertices = mesh.vertices 69 | faces = mesh.faces 70 | 71 | vertices = (vertices + 0.5) * resolution # in range[0,32] 72 | face_loc = vertices[faces] 73 | occ = np.full((resolution,) * 3, 0, dtype=np.int32) 74 | face_loc = face_loc.astype(np.float32) 75 | 76 | voxelize_mesh_(occ, face_loc) 77 | occ = (occ != 0) 78 | 79 | return occ 80 | 81 | 82 | def voxelize_interior(mesh, resolution): 83 | shape = (resolution,) * 3 84 | bb_min = (0.5,) * 3 85 | bb_max = (resolution - 0.5,) * 3 86 | # Create points. Add noise to break symmetry 87 | points = make_3d_grid(bb_min, bb_max, shape=shape).numpy() 88 | points = points + 0.1 * (np.random.rand(*points.shape) - 0.5) 89 | points = (points / resolution - 0.5) 90 | occ = check_mesh_contains(mesh, points) 91 | occ = occ.reshape(shape) 92 | 93 | return occ 94 | 95 | 96 | def check_voxel_occupied(occupancy_grid): 97 | occ = occupancy_grid 98 | 99 | occupied = ( 100 | occ[..., :-1, :-1, :-1] 101 | & occ[..., :-1, :-1, 1:] 102 | & occ[..., :-1, 1:, :-1] 103 | & occ[..., :-1, 1:, 1:] 104 | & occ[..., 1:, :-1, :-1] 105 | & occ[..., 1:, :-1, 1:] 106 | & occ[..., 1:, 1:, :-1] 107 | & occ[..., 1:, 1:, 1:] 108 | ) 109 | return occupied 110 | 111 | 112 | def check_voxel_unoccupied(occupancy_grid): 113 | occ = occupancy_grid 114 | 115 | unoccupied = ~( 116 | occ[..., :-1, :-1, :-1] 117 | | occ[..., :-1, :-1, 1:] 118 | | occ[..., :-1, 1:, :-1] 119 | | occ[..., :-1, 1:, 1:] 120 | | occ[..., 1:, :-1, :-1] 121 | | occ[..., 1:, :-1, 1:] 122 | | occ[..., 1:, 1:, :-1] 123 | | occ[..., 1:, 1:, 1:] 124 | ) 125 | return unoccupied 126 | 127 | 128 | def check_voxel_boundary(occupancy_grid): 129 | occupied = check_voxel_occupied(occupancy_grid) 130 | unoccupied = check_voxel_unoccupied(occupancy_grid) 131 | return ~occupied & ~unoccupied 132 | 133 | 134 | def make_3d_grid(bb_min, bb_max, shape): 135 | ''' Makes a 3D grid. 136 | 137 | Args: 138 | bb_min (tuple): bounding box minimum 139 | bb_max (tuple): bounding box maximum 140 | shape (tuple): output shape 141 | ''' 142 | size = shape[0] * shape[1] * shape[2] 143 | 144 | pxs = torch.linspace(bb_min[0], bb_max[0], shape[0]) 145 | pys = torch.linspace(bb_min[1], bb_max[1], shape[1]) 146 | pzs = torch.linspace(bb_min[2], bb_max[2], shape[2]) 147 | 148 | pxs = pxs.view(-1, 1, 1).expand(*shape).contiguous().view(size) 149 | pys = pys.view(1, -1, 1).expand(*shape).contiguous().view(size) 150 | pzs = pzs.view(1, 1, -1).expand(*shape).contiguous().view(size) 151 | p = torch.stack([pxs, pys, pzs], dim=1) 152 | 153 | return p 154 | -------------------------------------------------------------------------------- /Detection/cfg_setup.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | from detectron2.config import get_cfg 3 | from detectron2.config import CfgNode as CN 4 | from detectron2 import model_zoo 5 | 6 | sys.path.append('..') #Hack add ROOT DIR 7 | from baseconfig import CONF 8 | 9 | # Initialize training config 10 | def init_cfg(num_classes, combined=False, run_test=False, office=False, office_train=False): 11 | ''' 12 | Set parameters: 13 | run_test: for final test run 14 | eval_period: num iterations between each evaluation run 15 | ims_per_batch: batch size 16 | checkpoint period: save model after n iterations 17 | base_lr & weight_decay: training setup 18 | ''' 19 | 20 | cfg = get_cfg() 21 | cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # Loads only backbone weights 22 | 23 | # MOTFront Dataset 24 | if not office: 25 | cfg.DATASETS.TRAIN = ("front_train",) 26 | if not run_test: 27 | cfg.DATASETS.TEST = ("front_val",) 28 | else: 29 | cfg.DATASETS.TEST = ("front_test",) 30 | 31 | # Office Dataset 32 | else: 33 | cfg.DATASETS.TRAIN = ("office_train",) 34 | if office_train: 35 | cfg.DATASETS.TEST = ("office_train",) 36 | else: 37 | cfg.DATASETS.TEST = ("office_inference",) 38 | 39 | cfg.TEST.EVAL_PERIOD = 1000 40 | cfg.TEST.IMG_SAVE_FREQ = 4 # Every 4th evaluation run save pred images to tensorboard 41 | cfg.TEST.START_EVAL = 1 # Start evaluation after n iterations 42 | cfg.DATALOADER.ASPECT_RATIO_GROUPING = False 43 | 44 | # Dataloader 45 | cfg.DATALOADER.NUM_WORKERS = 0 46 | 47 | # Input 48 | cfg.INPUT.MIN_SIZE_TRAIN = (240,) 49 | # Sample size of smallest side by choice or random selection from range give by 50 | # INPUT.MIN_SIZE_TRAIN 51 | cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice" 52 | # Maximum size of the side of the image during training 53 | cfg.INPUT.MAX_SIZE_TRAIN = 320 54 | # Size of the smallest side of the image during testing. Set to zero to disable resize in testing. 55 | cfg.INPUT.MIN_SIZE_TEST = 240 56 | # Maximum size of the side of the image during testing 57 | cfg.INPUT.MAX_SIZE_TEST = 320 58 | cfg.INPUT.RANDOM_FLIP = 'none' 59 | cfg.INPUT.FORMAT = "BGR" # Image input format -> will be transformed to rgb in mapper heads 60 | 61 | # ROI HEADS 62 | cfg.MODEL.ROI_HEADS.NAME = "VoxelNocsHeads" 63 | cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 64 | cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes 65 | cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.75] 66 | cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.20 67 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1 68 | 69 | if not office: 70 | cfg.MODEL.PIXEL_MEAN = [59.64, 61.96, 64.02] # MOTFront 71 | else: 72 | cfg.MODEL.PIXEL_MEAN = [92.0080866, 98.01352945, 121.7431208] # office 73 | 74 | cfg.MODEL.PIXEL_STD = [1, 1, 1] 75 | cfg.MODEL.MASK_ON = True 76 | 77 | # Voxel Head 78 | cfg.MODEL.VOXEL_ON = True 79 | cfg.MODEL.ROI_VOXEL_HEAD = CN() 80 | cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT = 0.75 81 | if office_train: 82 | cfg.MODEL.VOXEL_ON = True 83 | cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT = 0.015 84 | 85 | cfg.MODEL.ROI_VOXEL_HEAD.NAME = 'Pix2VoxDecoder' 86 | cfg.MODEL.ROI_VOXEL_HEAD.POOLER_RESOLUTION = 14 87 | cfg.MODEL.ROI_VOXEL_HEAD.POOLER_TYPE = "ROIAlign" 88 | cfg.MODEL.ROI_VOXEL_HEAD.POOLER_SAMPLING_RATIO = 0 89 | 90 | # Nocs Head 91 | cfg.MODEL.NOCS_ON = True 92 | if office_train: 93 | cfg.MODEL.NOCS_ON = False 94 | cfg.MODEL.ROI_NOCS_HEAD = CN() 95 | cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS = False # True for classification loss, False for smooth l1 loss 96 | cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS = 32 97 | if cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS: 98 | cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT = 0.2 99 | else: 100 | cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT = 3 101 | cfg.MODEL.ROI_NOCS_HEAD.IOU_THRES = 0.5 102 | cfg.MODEL.ROI_NOCS_HEAD.NAME = 'NocsDecoder' 103 | cfg.MODEL.ROI_NOCS_HEAD.POOLER_RESOLUTION = 14 104 | cfg.MODEL.ROI_NOCS_HEAD.POOLER_TYPE = "ROIAlign" 105 | cfg.MODEL.ROI_NOCS_HEAD.POOLER_SAMPLING_RATIO = 0 106 | 107 | # Solver Options 108 | cfg.SOLVER.CHECKPOINT_PERIOD = 3000 #save model each n iterations 109 | cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" 110 | cfg.SOLVER.STEPS = [] # decay learning rate 111 | cfg.SOLVER.WARMUP_FACTOR = 1 112 | cfg.SOLVER.WARMUP_ITERS = 0 113 | cfg.SOLVER.WARMUP_METHOD = "linear" 114 | cfg.SOLVER.GAMMA = 1 115 | cfg.SOLVER.WEIGHT_DECAY = 0.0005 # L2-Regularization 116 | cfg.SOLVER.IMS_PER_BATCH = 2 # Batch size 117 | cfg.SOLVER.BASE_LR = 0.0008 118 | cfg.SOLVER.MAX_ITER = 240000 119 | 120 | # Combined settings 121 | if combined: 122 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 123 | cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.4 # Overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold) 124 | elif office: 125 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # higher more suppression 126 | cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2 # lower more suppression, overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold) 127 | 128 | 129 | cfg.OUTPUT_DIR = CONF.PATH.DETECTOUTPUT 130 | 131 | return cfg 132 | 133 | 134 | def inference_cfg(num_classes): 135 | 136 | train_cfg = init_cfg(num_classes) 137 | train_cfg.MODEL.WEIGHTS = os.path.join(CONF.PATH.DETECTMODEL, "best_model.pth") # path to the model we just trained 138 | train_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 139 | train_cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.4 # Overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold) 140 | 141 | return train_cfg 142 | -------------------------------------------------------------------------------- /BlenderProc/utils/libmesh/inside_mesh.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .triangle_hash import TriangleHash as _TriangleHash 3 | 4 | 5 | def check_mesh_contains(mesh, points, hash_resolution=512): 6 | intersector = MeshIntersector(mesh, hash_resolution) 7 | contains = intersector.query(points) 8 | return contains 9 | 10 | 11 | class MeshIntersector: 12 | def __init__(self, mesh, resolution=512): 13 | triangles = mesh.vertices[mesh.faces].astype(np.float64) 14 | n_tri = triangles.shape[0] 15 | 16 | self.resolution = resolution 17 | self.bbox_min = triangles.reshape(3 * n_tri, 3).min(axis=0) 18 | self.bbox_max = triangles.reshape(3 * n_tri, 3).max(axis=0) 19 | # Tranlate and scale it to [0.5, self.resolution - 0.5]^3 20 | self.scale = (resolution - 1) / (self.bbox_max - self.bbox_min) 21 | self.translate = 0.5 - self.scale * self.bbox_min 22 | 23 | self._triangles = triangles = self.rescale(triangles) 24 | # assert(np.allclose(triangles.reshape(-1, 3).min(0), 0.5)) 25 | # assert(np.allclose(triangles.reshape(-1, 3).max(0), resolution - 0.5)) 26 | 27 | triangles2d = triangles[:, :, :2] 28 | self._tri_intersector2d = TriangleIntersector2d( 29 | triangles2d, resolution) 30 | 31 | def query(self, points): 32 | # Rescale points 33 | points = self.rescale(points) 34 | 35 | # placeholder result with no hits we'll fill in later 36 | contains = np.zeros(len(points), dtype=np.bool) 37 | 38 | # cull points outside of the axis aligned bounding box 39 | # this avoids running ray tests unless points are close 40 | inside_aabb = np.all( 41 | (0 <= points) & (points <= self.resolution), axis=1) 42 | if not inside_aabb.any(): 43 | return contains 44 | 45 | # Only consider points inside bounding box 46 | mask = inside_aabb 47 | points = points[mask] 48 | 49 | # Compute intersection depth and check order 50 | points_indices, tri_indices = self._tri_intersector2d.query(points[:, :2]) 51 | 52 | triangles_intersect = self._triangles[tri_indices] 53 | points_intersect = points[points_indices] 54 | 55 | depth_intersect, abs_n_2 = self.compute_intersection_depth( 56 | points_intersect, triangles_intersect) 57 | 58 | # Count number of intersections in both directions 59 | smaller_depth = depth_intersect >= points_intersect[:, 2] * abs_n_2 60 | bigger_depth = depth_intersect < points_intersect[:, 2] * abs_n_2 61 | points_indices_0 = points_indices[smaller_depth] 62 | points_indices_1 = points_indices[bigger_depth] 63 | 64 | nintersect0 = np.bincount(points_indices_0, minlength=points.shape[0]) 65 | nintersect1 = np.bincount(points_indices_1, minlength=points.shape[0]) 66 | 67 | # Check if point contained in mesh 68 | contains1 = (np.mod(nintersect0, 2) == 1) 69 | contains2 = (np.mod(nintersect1, 2) == 1) 70 | if (contains1 != contains2).any(): 71 | print('Warning: contains1 != contains2 for some points.') 72 | contains[mask] = (contains1 & contains2) 73 | return contains 74 | 75 | def compute_intersection_depth(self, points, triangles): 76 | t1 = triangles[:, 0, :] 77 | t2 = triangles[:, 1, :] 78 | t3 = triangles[:, 2, :] 79 | 80 | v1 = t3 - t1 81 | v2 = t2 - t1 82 | # v1 = v1 / np.linalg.norm(v1, axis=-1, keepdims=True) 83 | # v2 = v2 / np.linalg.norm(v2, axis=-1, keepdims=True) 84 | 85 | normals = np.cross(v1, v2) 86 | alpha = np.sum(normals[:, :2] * (t1[:, :2] - points[:, :2]), axis=1) 87 | 88 | n_2 = normals[:, 2] 89 | t1_2 = t1[:, 2] 90 | s_n_2 = np.sign(n_2) 91 | abs_n_2 = np.abs(n_2) 92 | 93 | mask = (abs_n_2 != 0) 94 | 95 | depth_intersect = np.full(points.shape[0], np.nan) 96 | depth_intersect[mask] = \ 97 | t1_2[mask] * abs_n_2[mask] + alpha[mask] * s_n_2[mask] 98 | 99 | # Test the depth: 100 | # TODO: remove and put into tests 101 | # points_new = np.concatenate([points[:, :2], depth_intersect[:, None]], axis=1) 102 | # alpha = (normals * t1).sum(-1) 103 | # mask = (depth_intersect == depth_intersect) 104 | # assert(np.allclose((points_new[mask] * normals[mask]).sum(-1), 105 | # alpha[mask])) 106 | return depth_intersect, abs_n_2 107 | 108 | def rescale(self, array): 109 | array = self.scale * array + self.translate 110 | return array 111 | 112 | 113 | class TriangleIntersector2d: 114 | def __init__(self, triangles, resolution=128): 115 | self.triangles = triangles 116 | self.tri_hash = _TriangleHash(triangles, resolution) 117 | 118 | def query(self, points): 119 | point_indices, tri_indices = self.tri_hash.query(points) 120 | point_indices = np.array(point_indices, dtype=np.int64) 121 | tri_indices = np.array(tri_indices, dtype=np.int64) 122 | points = points[point_indices] 123 | triangles = self.triangles[tri_indices] 124 | mask = self.check_triangles(points, triangles) 125 | point_indices = point_indices[mask] 126 | tri_indices = tri_indices[mask] 127 | return point_indices, tri_indices 128 | 129 | def check_triangles(self, points, triangles): 130 | contains = np.zeros(points.shape[0], dtype=np.bool) 131 | A = triangles[:, :2] - triangles[:, 2:] 132 | A = A.transpose([0, 2, 1]) 133 | y = points - triangles[:, 2] 134 | 135 | detA = A[:, 0, 0] * A[:, 1, 1] - A[:, 0, 1] * A[:, 1, 0] 136 | 137 | mask = (np.abs(detA) != 0.) 138 | A = A[mask] 139 | y = y[mask] 140 | detA = detA[mask] 141 | 142 | s_detA = np.sign(detA) 143 | abs_detA = np.abs(detA) 144 | 145 | u = (A[:, 1, 1] * y[:, 0] - A[:, 0, 1] * y[:, 1]) * s_detA 146 | v = (-A[:, 1, 0] * y[:, 0] + A[:, 0, 0] * y[:, 1]) * s_detA 147 | 148 | sum_uv = u + v 149 | contains[mask] = ( 150 | (0 < u) & (u < abs_detA) & (0 < v) & (v < abs_detA) 151 | & (0 < sum_uv) & (sum_uv < abs_detA) 152 | ) 153 | return contains 154 | 155 | -------------------------------------------------------------------------------- /Tracking/options.py: -------------------------------------------------------------------------------- 1 | 2 | import os, sys 3 | import argparse 4 | 5 | # the directory that options.py resides in 6 | file_dir = os.path.dirname(__file__) 7 | 8 | sys.path.append('..') #Hack add ROOT DIR 9 | from baseconfig import CONF 10 | 11 | 12 | class Options: 13 | def __init__(self): 14 | self.parser = argparse.ArgumentParser( 15 | description="Tracking options") 16 | 17 | # PATHS 18 | self.parser.add_argument("--base_dir", 19 | type=str, 20 | help="path to the training data", 21 | default=CONF.PATH.TRACKDATA) 22 | self.parser.add_argument("--log_dir", 23 | type=str, 24 | help="log directory", 25 | default=CONF.PATH.TRACKOUTPUT) 26 | 27 | # Network 28 | self.parser.add_argument("--use_graph", 29 | type=bool, 30 | help="Use Graph Neural Network for edge classification", 31 | default=True) 32 | self.parser.add_argument("--seq_len", 33 | type=int, 34 | help="Length of the input sequence", 35 | default=25) 36 | self.parser.add_argument("--no_pose", 37 | type=bool, 38 | help="Exclude pose for edge classification", 39 | default=False) 40 | self.parser.add_argument("--no_geo", 41 | type=bool, 42 | help="Exclude pose for edge classification", 43 | default=False) 44 | self.parser.add_argument("--rel_app", 45 | type=bool, 46 | help="Use a relative appearance feature for graph edges", 47 | default=False) 48 | self.parser.add_argument("--as_quaternion", 49 | type=bool, 50 | help="Use quaternion angles for rotation", 51 | default=False) 52 | self.parser.add_argument("--precompute_feats", 53 | type=bool, 54 | help="Precompute Siamese features and store as hdf5", 55 | default=False) 56 | 57 | 58 | # Model Parameters 59 | self.parser.add_argument("--learning_rate", 60 | type=float, 61 | help="learning rate", 62 | default=1e-3) 63 | self.parser.add_argument("--weight_decay", # L2 Regularization 64 | type=float, 65 | help="weight decay", 66 | default=1e-4) # 1e-4 67 | self.parser.add_argument("--num_epochs", 68 | type=int, 69 | help="number of epochs", 70 | default=100) 71 | self.parser.add_argument("--batch_size", 72 | type=int, 73 | help="batch size", 74 | default=2) 75 | self.parser.add_argument("--use_augmentation", 76 | type=bool, 77 | help="use data augmentation", 78 | default=False) 79 | self.parser.add_argument("--num_workers", 80 | type=int, 81 | help="number of dataloader workers", 82 | default=0) 83 | self.parser.add_argument("--use_triplet", 84 | type=bool, 85 | help="Use triplet loss for edge classification", 86 | default=False) 87 | self.parser.add_argument("--use_l1", 88 | type=bool, 89 | help="Use l1 loss for edge classification", 90 | default=False) 91 | 92 | # Logging 93 | self.parser.add_argument("--save_frequency", 94 | type=int, 95 | help="number of epochs between each save", 96 | default=15) 97 | 98 | self.parser.add_argument("--start_saving", 99 | type=int, 100 | help="epoch start to save weights", 101 | default=15) 102 | 103 | self.parser.add_argument("--start_saving_optimizer", 104 | type=int, 105 | help="epoch start to save weights", 106 | default=14) 107 | 108 | self.parser.add_argument("--log_frequency", 109 | type=int, 110 | help="number of batches between each tensorboard log", 111 | default=10) 112 | 113 | self.parser.add_argument("--save_model", 114 | type=bool, 115 | help="save model", 116 | default=True) 117 | 118 | self.parser.add_argument("--resume", 119 | type=bool, 120 | help="resume training", 121 | default=False) 122 | 123 | self.parser.add_argument("--load_weights_folder", 124 | type=str, 125 | help="folder of pretrain model", 126 | default=os.path.join(file_dir, "model/pretrained")) 127 | 128 | self.parser.add_argument("--models_to_load", 129 | type=list, 130 | help="pretrained model to load", 131 | default=['edge_classifier', 'edge_encoder', 'voxel_encoder', 'graph_net']) 132 | 133 | def parse(self): 134 | self.options = self.parser.parse_args() 135 | return self.options 136 | -------------------------------------------------------------------------------- /BlenderProc/utils/libvoxelize/tribox2.h: -------------------------------------------------------------------------------- 1 | /********************************************************/ 2 | /* AABB-triangle overlap test code */ 3 | /* by Tomas Akenine-M�ller */ 4 | /* Function: int triBoxOverlap(float boxcenter[3], */ 5 | /* float boxhalfsize[3],float triverts[3][3]); */ 6 | /* History: */ 7 | /* 2001-03-05: released the code in its first version */ 8 | /* 2001-06-18: changed the order of the tests, faster */ 9 | /* */ 10 | /* Acknowledgement: Many thanks to Pierre Terdiman for */ 11 | /* suggestions and discussions on how to optimize code. */ 12 | /* Thanks to David Hunt for finding a ">="-bug! */ 13 | /********************************************************/ 14 | #include 15 | #include 16 | 17 | #define X 0 18 | #define Y 1 19 | #define Z 2 20 | 21 | #define CROSS(dest,v1,v2) \ 22 | dest[0]=v1[1]*v2[2]-v1[2]*v2[1]; \ 23 | dest[1]=v1[2]*v2[0]-v1[0]*v2[2]; \ 24 | dest[2]=v1[0]*v2[1]-v1[1]*v2[0]; 25 | 26 | #define DOT(v1,v2) (v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2]) 27 | 28 | #define SUB(dest,v1,v2) \ 29 | dest[0]=v1[0]-v2[0]; \ 30 | dest[1]=v1[1]-v2[1]; \ 31 | dest[2]=v1[2]-v2[2]; 32 | 33 | #define FINDMINMAX(x0,x1,x2,min,max) \ 34 | min = max = x0; \ 35 | if(x1max) max=x1;\ 37 | if(x2max) max=x2; 39 | 40 | int planeBoxOverlap(float normal[3],float d, float maxbox[3]) 41 | { 42 | int q; 43 | float vmin[3],vmax[3]; 44 | for(q=X;q<=Z;q++) 45 | { 46 | if(normal[q]>0.0f) 47 | { 48 | vmin[q]=-maxbox[q]; 49 | vmax[q]=maxbox[q]; 50 | } 51 | else 52 | { 53 | vmin[q]=maxbox[q]; 54 | vmax[q]=-maxbox[q]; 55 | } 56 | } 57 | if(DOT(normal,vmin)+d>0.0f) return 0; 58 | if(DOT(normal,vmax)+d>=0.0f) return 1; 59 | 60 | return 0; 61 | } 62 | 63 | 64 | /*======================== X-tests ========================*/ 65 | #define AXISTEST_X01(a, b, fa, fb) \ 66 | p0 = a*v0[Y] - b*v0[Z]; \ 67 | p2 = a*v2[Y] - b*v2[Z]; \ 68 | if(p0rad || max<-rad) return 0; 71 | 72 | #define AXISTEST_X2(a, b, fa, fb) \ 73 | p0 = a*v0[Y] - b*v0[Z]; \ 74 | p1 = a*v1[Y] - b*v1[Z]; \ 75 | if(p0rad || max<-rad) return 0; 78 | 79 | /*======================== Y-tests ========================*/ 80 | #define AXISTEST_Y02(a, b, fa, fb) \ 81 | p0 = -a*v0[X] + b*v0[Z]; \ 82 | p2 = -a*v2[X] + b*v2[Z]; \ 83 | if(p0rad || max<-rad) return 0; 86 | 87 | #define AXISTEST_Y1(a, b, fa, fb) \ 88 | p0 = -a*v0[X] + b*v0[Z]; \ 89 | p1 = -a*v1[X] + b*v1[Z]; \ 90 | if(p0rad || max<-rad) return 0; 93 | 94 | /*======================== Z-tests ========================*/ 95 | 96 | #define AXISTEST_Z12(a, b, fa, fb) \ 97 | p1 = a*v1[X] - b*v1[Y]; \ 98 | p2 = a*v2[X] - b*v2[Y]; \ 99 | if(p2rad || max<-rad) return 0; 102 | 103 | #define AXISTEST_Z0(a, b, fa, fb) \ 104 | p0 = a*v0[X] - b*v0[Y]; \ 105 | p1 = a*v1[X] - b*v1[Y]; \ 106 | if(p0rad || max<-rad) return 0; 109 | 110 | int triBoxOverlap(float boxcenter[3],float boxhalfsize[3],float tri0[3], float tri1[3], float tri2[3]) 111 | { 112 | 113 | /* use separating axis theorem to test overlap between triangle and box */ 114 | /* need to test for overlap in these directions: */ 115 | /* 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */ 116 | /* we do not even need to test these) */ 117 | /* 2) normal of the triangle */ 118 | /* 3) crossproduct(edge from tri, {x,y,z}-directin) */ 119 | /* this gives 3x3=9 more tests */ 120 | float v0[3],v1[3],v2[3]; 121 | float min,max,d,p0,p1,p2,rad,fex,fey,fez; 122 | float normal[3],e0[3],e1[3],e2[3]; 123 | 124 | /* This is the fastest branch on Sun */ 125 | /* move everything so that the boxcenter is in (0,0,0) */ 126 | SUB(v0, tri0, boxcenter); 127 | SUB(v1, tri1, boxcenter); 128 | SUB(v2, tri2, boxcenter); 129 | 130 | /* compute triangle edges */ 131 | SUB(e0,v1,v0); /* tri edge 0 */ 132 | SUB(e1,v2,v1); /* tri edge 1 */ 133 | SUB(e2,v0,v2); /* tri edge 2 */ 134 | 135 | /* Bullet 3: */ 136 | /* test the 9 tests first (this was faster) */ 137 | fex = fabs(e0[X]); 138 | fey = fabs(e0[Y]); 139 | fez = fabs(e0[Z]); 140 | AXISTEST_X01(e0[Z], e0[Y], fez, fey); 141 | AXISTEST_Y02(e0[Z], e0[X], fez, fex); 142 | AXISTEST_Z12(e0[Y], e0[X], fey, fex); 143 | 144 | fex = fabs(e1[X]); 145 | fey = fabs(e1[Y]); 146 | fez = fabs(e1[Z]); 147 | AXISTEST_X01(e1[Z], e1[Y], fez, fey); 148 | AXISTEST_Y02(e1[Z], e1[X], fez, fex); 149 | AXISTEST_Z0(e1[Y], e1[X], fey, fex); 150 | 151 | fex = fabs(e2[X]); 152 | fey = fabs(e2[Y]); 153 | fez = fabs(e2[Z]); 154 | AXISTEST_X2(e2[Z], e2[Y], fez, fey); 155 | AXISTEST_Y1(e2[Z], e2[X], fez, fex); 156 | AXISTEST_Z12(e2[Y], e2[X], fey, fex); 157 | 158 | /* Bullet 1: */ 159 | /* first test overlap in the {x,y,z}-directions */ 160 | /* find min, max of the triangle each direction, and test for overlap in */ 161 | /* that direction -- this is equivalent to testing a minimal AABB around */ 162 | /* the triangle against the AABB */ 163 | 164 | /* test in X-direction */ 165 | FINDMINMAX(v0[X],v1[X],v2[X],min,max); 166 | if(min>boxhalfsize[X] || max<-boxhalfsize[X]) return 0; 167 | 168 | /* test in Y-direction */ 169 | FINDMINMAX(v0[Y],v1[Y],v2[Y],min,max); 170 | if(min>boxhalfsize[Y] || max<-boxhalfsize[Y]) return 0; 171 | 172 | /* test in Z-direction */ 173 | FINDMINMAX(v0[Z],v1[Z],v2[Z],min,max); 174 | if(min>boxhalfsize[Z] || max<-boxhalfsize[Z]) return 0; 175 | 176 | /* Bullet 2: */ 177 | /* test if the box intersects the plane of the triangle */ 178 | /* compute plane equation of triangle: normal*x+d=0 */ 179 | CROSS(normal,e0,e1); 180 | d=-DOT(normal,v0); /* plane eq: normal.x+d=0 */ 181 | if(!planeBoxOverlap(normal,d,boxhalfsize)) return 0; 182 | 183 | return 1; /* box and triangle overlaps */ 184 | } 185 | -------------------------------------------------------------------------------- /Detection/roi_heads/voxel_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | import fvcore.nn.weight_init as weight_init 4 | import sys 5 | import torch 6 | import numpy as np 7 | #import matplotlib.pyplot as plt 8 | 9 | from detectron2.layers import ShapeSpec, cat 10 | from detectron2.utils.events import get_event_storage 11 | from detectron2.utils.registry import Registry 12 | from detectron2.structures import Boxes, BoxMode, pairwise_iou 13 | from torch import nn 14 | from torch.nn import functional as F 15 | from typing import Dict 16 | 17 | sys.path.append('..') #Hack add ROOT DIR 18 | from Detection.inference.inference_metrics import compute_voxel_iou 19 | from Detection.utils.train_utils import init_weights, balanced_BCE_loss 20 | 21 | 22 | 23 | ROI_VOXEL_HEAD_REGISTRY = Registry("ROI_VOXEL_HEAD") 24 | 25 | 26 | def voxel_loss(pred_voxel_logits, instances, pred_boxes, loss_weight=1, iou_thres=0.5): 27 | ''' 28 | Calculate BCE loss between predicted 32³ voxel grid and GT voxel grid if IoU larger threshold 29 | ''' 30 | 31 | start_instance = 0 32 | pred_voxel_logits = torch.squeeze(pred_voxel_logits, dim=1) # Num obj x 32x32x32 33 | mean_voxel_iou = [] 34 | loss_gt_voxels = [] 35 | loss_pred_voxels = [] 36 | 37 | 38 | for instances_per_image in instances: 39 | if len(instances_per_image) == 0: 40 | continue 41 | 42 | end_instance = start_instance + len(instances_per_image) 43 | 44 | gt_voxel_logits = instances_per_image.gt_voxels.to(dtype=torch.float) 45 | gt_boxes_per_image = instances_per_image.gt_boxes 46 | 47 | for i in range(start_instance, end_instance): 48 | 49 | abs_pred_box = pred_boxes[i, :].to(dtype=torch.int64) 50 | pred_box = Boxes(torch.unsqueeze(abs_pred_box, dim=0)) # XYXY 51 | 52 | pred_voxel = pred_voxel_logits[i,:,:,:] 53 | 54 | if torch.sum(pred_voxel) == 0: # empty detections 55 | continue 56 | 57 | ious = pairwise_iou(gt_boxes_per_image, pred_box) 58 | idx_max_iou = int(torch.argmax(ious)) 59 | max_iou = ious[idx_max_iou] 60 | 61 | if max_iou >= iou_thres: 62 | 63 | gt_voxel = gt_voxel_logits[idx_max_iou,:,:,:] 64 | voxel_iou = compute_voxel_iou(pred_voxel, gt_voxel) 65 | mean_voxel_iou.append(voxel_iou) 66 | loss_gt_voxels.append(torch.unsqueeze(gt_voxel, dim=0)) 67 | loss_pred_voxels.append(torch.unsqueeze(pred_voxel, dim=0)) 68 | 69 | start_instance = end_instance 70 | 71 | if mean_voxel_iou: 72 | get_event_storage().put_scalar("training/voxel_iou", np.array(mean_voxel_iou).mean()) 73 | 74 | gt_voxels = cat(loss_gt_voxels, dim=0) 75 | pred_voxels = cat(loss_pred_voxels, dim=0) 76 | 77 | assert pred_voxels.shape == gt_voxels.shape 78 | 79 | voxel_loss = balanced_BCE_loss(gt_voxels, pred_voxels) 80 | voxel_loss = voxel_loss * loss_weight 81 | 82 | return voxel_loss, gt_voxels 83 | 84 | 85 | def voxel_inference(pred_voxel_logits, pred_instances): # shape Num obj x 1 x D x H x W, Num img x Instance class 86 | 87 | voxel_probs_pred = pred_voxel_logits 88 | num_boxes_per_image = [len(i) for i in pred_instances] 89 | 90 | if np.array(num_boxes_per_image).sum() == 0: 91 | print('No predicted instances found for batch...') 92 | return 93 | 94 | voxel_probs_pred = voxel_probs_pred.split(num_boxes_per_image, dim=0) 95 | 96 | # Assign predicted voxels # instances and predictions different len -> moving idx 97 | for inst, prob in zip(pred_instances, voxel_probs_pred): 98 | 99 | if len(inst) == 0: 100 | print('No predicted instances found ...') 101 | continue 102 | 103 | if prob.sum() == 0: # sigmoid of 0 = 0.5 -< (prob.numel() * 0.5) 104 | inst.pred_voxels = torch.tensor([]).cuda() 105 | else: 106 | inst.pred_voxels = torch.squeeze(prob, dim=1) # (Num inst in 1 img, D, H, W) 107 | 108 | 109 | class Decoder(torch.nn.Module): 110 | """ 111 | Decoder Module from Pix2Vox++ Implementation 112 | """ 113 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 114 | super(Decoder, self).__init__() 115 | 116 | self.input_shape = input_shape 117 | 118 | # Layer Definition 119 | self.layer1 = torch.nn.Sequential( 120 | torch.nn.ConvTranspose3d(784, 512, kernel_size=3, stride=1, bias=False, padding=1), 121 | torch.nn.BatchNorm3d(512), 122 | torch.nn.ReLU() 123 | ) 124 | self.layer2 = torch.nn.Sequential( 125 | torch.nn.ConvTranspose3d(512, 128, kernel_size=4, stride=2, bias=False, padding=1), 126 | torch.nn.BatchNorm3d(128), 127 | torch.nn.ReLU() 128 | ) 129 | self.layer3 = torch.nn.Sequential( 130 | torch.nn.ConvTranspose3d(128, 32, kernel_size=4, stride=2, bias=False, padding=1), 131 | torch.nn.BatchNorm3d(32), 132 | torch.nn.ReLU() 133 | ) 134 | self.layer4 = torch.nn.Sequential( 135 | torch.nn.ConvTranspose3d(32, 8, kernel_size=4, stride=2, bias=False, padding=1), 136 | torch.nn.BatchNorm3d(8), 137 | torch.nn.ReLU() 138 | ) 139 | self.layer5 = torch.nn.Sequential( 140 | torch.nn.ConvTranspose3d(8, 1, kernel_size=1, bias=False), 141 | ) 142 | 143 | def forward(self, features): 144 | """ 145 | """ 146 | num_obj = features.shape[0] 147 | if num_obj != 0: 148 | gen_volume = features.view(num_obj, -1, 4, 4, 4) 149 | #print(gen_volume.size()) # torch.Size([num_obj, 784, 4, 4, 4]) 150 | gen_volume = self.layer1(gen_volume) 151 | #print(gen_volume.size()) # torch.Size([num_obj, 512, 4, 4, 4]) 152 | gen_volume = self.layer2(gen_volume) 153 | #print(gen_volume.size()) # torch.Size([num_obj, 128, 8, 8, 8]) 154 | gen_volume = self.layer3(gen_volume) 155 | #print(gen_volume.size()) # torch.Size([num_obj, 32, 16, 16, 16]) 156 | gen_volume = self.layer4(gen_volume) 157 | #print(gen_volume.size()) # torch.Size([num_obj, 8, 32, 32, 32]) 158 | gen_volume = self.layer5(gen_volume) 159 | #print(gen_volume.size()) # torch.Size([num_obj, 1, 32, 32, 32]) 160 | else: 161 | gen_volume = torch.zeros([1, 1, 32, 32, 32]) 162 | 163 | return gen_volume 164 | 165 | 166 | @ROI_VOXEL_HEAD_REGISTRY.register() 167 | class Pix2VoxDecoder(nn.Module): 168 | """ 169 | A voxel head with several conv layers, plus an upsample layer. 170 | """ 171 | 172 | def __init__(self, cfg, input_shape): 173 | super(Pix2VoxDecoder, self).__init__() 174 | 175 | # Model 176 | self.decoder = Decoder(cfg, input_shape) 177 | #init_weights(self.decoder, init_type='kaiming', init_gain=0.02) 178 | 179 | 180 | def forward(self, x): 181 | 182 | x = self.decoder(x) #Batchsize x channels x H x W 183 | 184 | return x 185 | 186 | 187 | def build_voxel_head(cfg, input_shape): 188 | name = cfg.MODEL.ROI_VOXEL_HEAD.NAME 189 | return ROI_VOXEL_HEAD_REGISTRY.get(name)(cfg, input_shape) 190 | -------------------------------------------------------------------------------- /Tracking/datasets/front_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import cv2 4 | import numpy as np 5 | import json 6 | import h5py 7 | import torch 8 | import open3d as o3d 9 | 10 | from torchvision import transforms 11 | from torch.utils.data import Dataset 12 | 13 | sys.path.append('..') #Hack add ROOT DIR 14 | from baseconfig import CONF 15 | 16 | class Front_dataset(Dataset): 17 | def __init__(self, base_dir, split='train', transform=None, with_scene_pc=False): 18 | self.transform = transform # using transform in torch 19 | self.split = split 20 | self.scenes_skip = [] 21 | self.data_dir = os.path.join(base_dir, self.split) 22 | self.hdf5_dir = os.path.join(CONF.PATH.DETECTDATA, self.split) 23 | self.scenes = [f for f in os.listdir(os.path.abspath(self.data_dir)) if f not in self.scenes_skip] 24 | self.json_dir = os.path.join(CONF.PATH.DETECTDATA, self.split) 25 | self.camera_intrinsics = np.array([[292.87803547399, 0, 0], [0, 292.87803547399, 0], [0, 0, 1]]) 26 | self.with_scene_pc = with_scene_pc 27 | 28 | def __len__(self): 29 | return len(self.scenes) 30 | 31 | def __getitem__(self, idx): 32 | 33 | scene = self.scenes[idx] 34 | 35 | data_path = os.path.join(self.data_dir, scene) 36 | #json_path = os.path.join(self.json_dir, scene) 37 | 38 | unsorted_imgs = [f for f in os.listdir(os.path.abspath(data_path)) if 'feat' not in f] 39 | img_ints = [int(img[:-3]) for img in unsorted_imgs] 40 | imgs = [im for _, im in sorted(zip(img_ints, unsorted_imgs))] 41 | 42 | output = [] 43 | for idx_, img in enumerate(imgs): 44 | 45 | # Load scan pointcloud 46 | if self.with_scene_pc: 47 | hdf5_path = os.path.join(self.hdf5_dir, scene, str(idx_) + '.hdf5') 48 | rgb_path = os.path.join(self.hdf5_dir, scene, 'coco_data', 'rgb_' + str(idx_).zfill(4) + '.png') 49 | depth_map, campose, cx, cy = self.load_hdf5(hdf5_path) 50 | self.camera_intrinsics[0, 2] = cx 51 | self.camera_intrinsics[1, 2] = cy 52 | rgb_img = self.load_rgb(rgb_path) 53 | cam_rgb_pc = self.backproject_rgb(rgb_img, depth_map, self.camera_intrinsics) 54 | world_pc = self.cam2world(cam_rgb_pc, campose) 55 | 56 | 57 | img_path = os.path.join(data_path, img) 58 | hf = h5py.File(img_path, 'r') 59 | 60 | # Unpack GT data 61 | gt_object_id = np.array(hf.get("gt_objid")) 62 | gt_voxels = np.array(hf.get("gt_voxels")) 63 | gt_3Dbbox = np.array(hf.get("gt_3Dbbox")) 64 | gt_locations = np.array(hf.get("gt_locations")) 65 | gt_rotations = np.array(hf.get("gt_rotations")) 66 | gt_compl_box = np.array(hf.get("gt_compl_box")) 67 | gt_scales = np.array(hf.get("gt_scales")) 68 | gt_classes = np.array(hf.get("gt_cls")) - 1 # -1 because predicted starts at 0 and gt at 1 69 | 70 | # Unpack predicted data 71 | classes = np.array(hf.get("classes")) #from 0 to 6 72 | objectness_scores = np.array(hf.get("objectness_scores")) 73 | rotations = np.array(hf.get("rotations")) 74 | translations = np.array(hf.get("translations")) 75 | scales = np.array(hf.get("scales")) 76 | voxels = np.array(hf.get("voxels")) 77 | pred_3Dbbox = np.array(hf.get("pred_3Dbbox")) 78 | 79 | img_dict = {'classes': torch.tensor(classes, dtype=torch.int), 80 | 'objectness_scores': objectness_scores, 81 | 'rotations': torch.tensor(rotations), 82 | 'translations': torch.tensor(translations), 83 | 'scales': torch.tensor(scales), 84 | 'voxels': torch.tensor(voxels), 85 | 'pred_3Dbbox': torch.tensor(pred_3Dbbox), 86 | 'gt_object_id': torch.tensor(gt_object_id), 87 | 'gt_locations': torch.tensor(gt_locations), 88 | 'gt_rotations': torch.tensor(gt_rotations), 89 | 'gt_3Dbbox': torch.tensor(gt_3Dbbox), 90 | 'gt_compl_box': torch.tensor(gt_compl_box), 91 | 'gt_scales': torch.tensor(gt_scales), 92 | 'gt_classes': torch.tensor(gt_classes), 93 | 'gt_voxels': gt_voxels, 94 | 'image': img, 95 | 'scene': scene 96 | } 97 | if self.with_scene_pc: 98 | img_dict['world_pc'] = world_pc 99 | output.append(img_dict) 100 | 101 | return output # list of parameters of n images 102 | 103 | def load_hdf5(self, hdf5_path): 104 | ''' 105 | Loads campose and depth map from an hdf5 file 106 | returns additional camera intrinsics cx, cy 107 | ''' 108 | 109 | with h5py.File(hdf5_path, 'r') as data: 110 | for key in data.keys(): 111 | if key == 'depth': 112 | depth = np.array(data[key]) 113 | elif key == 'campose': 114 | campose = np.array(data[key]) 115 | 116 | img_width = depth.shape[1] 117 | img_height = depth.shape[0] 118 | 119 | cx = (img_width / 2) - 0.5 # 0,0 is center top-left pixel -> -0,5 120 | cy = (img_height / 2) - 0.5 # 0,0 is center top-left pixel -> -0,5 121 | 122 | return depth, campose, cx, cy 123 | 124 | def load_rgb(self, rgb_path): 125 | ''' 126 | Loads a rgb image from a png file 127 | ''' 128 | bgr_img = cv2.imread(rgb_path) 129 | rgb_img = bgr_img[:, :, ::-1] 130 | rgb_img = np.array(rgb_img, dtype=np.float32) 131 | 132 | return rgb_img 133 | 134 | def backproject_rgb(self, rgb, depth, intrinsics, debug_mode=False): 135 | ''' 136 | Backproject depth map to camera space, with additional rgb values 137 | Returns: Depth PC with according RGB values in camspace, used idxs in pixel space 138 | ''' 139 | 140 | intrinsics_inv = np.linalg.inv(intrinsics) 141 | non_zero_mask = (depth > 0) 142 | 143 | idxs = np.where(non_zero_mask) 144 | grid = np.array([idxs[1], idxs[0]]) 145 | 146 | length = grid.shape[1] 147 | ones = np.ones([1, length]) 148 | uv_grid = np.concatenate((grid, ones), axis=0) # [3, num_pixel] 149 | 150 | xyz = intrinsics_inv @ uv_grid # [3, num_pixel] 151 | xyz = np.transpose(xyz) # [num_pixel, 3] 152 | 153 | z = depth[idxs[0], idxs[1]] 154 | 155 | pts = xyz * z[:, np.newaxis] / xyz[:, -1:] 156 | pts[:, 1] = -pts[:, 1] 157 | pts[:, 2] = -pts[:, 2] 158 | 159 | rgb_vals = rgb[idxs[0], idxs[1]] 160 | 161 | rgb_pts = np.concatenate((pts, rgb_vals), axis=-1) 162 | 163 | if debug_mode: 164 | depth_pc_obj = o3d.geometry.PointCloud() 165 | nocs_origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1, origin=[0, 0, 0]) 166 | depth_pc_obj.points = o3d.utility.Vector3dVector(pts) 167 | o3d.visualization.draw_geometries([depth_pc_obj, nocs_origin]) 168 | 169 | return rgb_pts 170 | 171 | def cam2world(self, rgb_pts, campose): 172 | ''' 173 | transform camera space pc to world space pc 174 | ''' 175 | trans = campose[:3, 3:] 176 | rot = campose[:3, :3] 177 | 178 | cam_pts = rgb_pts[:, :3] 179 | world_pc = np.dot(rot, cam_pts.transpose()) + trans 180 | world_pc = world_pc.transpose() 181 | 182 | rgb_world = np.concatenate((world_pc, rgb_pts[:, 3:]), axis=-1) 183 | 184 | return rgb_world -------------------------------------------------------------------------------- /Tracking/utils/train_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import numpy as np 5 | from scipy.spatial import ConvexHull 6 | from torch.nn import init 7 | 8 | import sys 9 | 10 | def init_weights(net, init_type='normal', init_gain=0.02): 11 | """Initialize network weights. 12 | Parameters: 13 | net (network) -- network to be initialized 14 | init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal 15 | init_gain (float) -- scaling factor for normal, xavier and orthogonal. 16 | """ 17 | 18 | def init_func(m): # define the initialization function 19 | classname = m.__class__.__name__ 20 | if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): 21 | if init_type == 'normal': 22 | init.normal_(m.weight.data, 0.0, init_gain) 23 | 24 | elif init_type == 'xavier': 25 | init.xavier_normal_(m.weight.data, gain=init_gain) 26 | 27 | elif init_type == 'kaiming': 28 | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 29 | 30 | elif init_type == 'orthogonal': 31 | init.orthogonal_(m.weight.data, gain=init_gain) 32 | 33 | else: 34 | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) 35 | 36 | if hasattr(m, 'bias') and m.bias is not None: 37 | init.constant_(m.bias.data, 0.0) 38 | elif classname.find( 39 | 'BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. 40 | init.normal_(m.weight.data, 1.0, init_gain) 41 | init.constant_(m.bias.data, 0.0) 42 | 43 | print('initialize network with %s' % init_type) 44 | net.apply(init_func) # apply the initialization function 45 | 46 | 47 | def get_quaternion_from_euler(roll, pitch, yaw): 48 | """ 49 | Convert an Euler angle to a quaternion. 50 | 51 | Input 52 | :param roll: The roll (rotation around x-axis) angle in radians. 53 | :param pitch: The pitch (rotation around y-axis) angle in radians. 54 | :param yaw: The yaw (rotation around z-axis) angle in radians. 55 | 56 | Output 57 | :return qx, qy, qz, qw: The orientation in quaternion [x,y,z,w] format 58 | """ 59 | qx = torch.sin(roll / 2) * torch.cos(pitch / 2) * torch.cos(yaw / 2) - torch.cos(roll / 2) * torch.sin(pitch / 2) * torch.sin(yaw / 2) 60 | qy = torch.cos(roll / 2) * torch.sin(pitch / 2) * torch.cos(yaw / 2) + torch.sin(roll / 2) * torch.cos(pitch / 2) * torch.sin(yaw / 2) 61 | qz = torch.cos(roll / 2) * torch.cos(pitch / 2) * torch.sin(yaw / 2) - torch.sin(roll / 2) * torch.sin(pitch / 2) * torch.cos(yaw / 2) 62 | qw = torch.cos(roll / 2) * torch.cos(pitch / 2) * torch.cos(yaw / 2) + torch.sin(roll / 2) * torch.sin(pitch / 2) * torch.sin(yaw / 2) 63 | quat_angles = torch.cat([torch.unsqueeze(qx, dim=-1), torch.unsqueeze(qy, dim=-1), torch.unsqueeze(qz, dim=-1), torch.unsqueeze(qw, dim=-1)], dim=-1) 64 | 65 | return quat_angles 66 | 67 | def check_pair(pred_bbox, gt_bboxes, gt_ids, thres=0.01): 68 | 69 | ious = [] 70 | for i in range(gt_bboxes.shape[0]): 71 | iou, _ = compute_3d_iou(pred_bbox, gt_bboxes[i,:,:]) 72 | ious.append(iou) 73 | 74 | max_iou = np.array(ious).max() 75 | max_iou_idx = np.argmax(np.array(ious)) 76 | if max_iou >= thres: 77 | obj_id = gt_ids[max_iou_idx] 78 | else: 79 | obj_id = None 80 | 81 | return obj_id 82 | 83 | def compute_3d_iou(corners1, corners2): 84 | 85 | # corner points are in counter clockwise order 86 | rect1 = [(corners1[i, 0], corners1[i, 2]) for i in range(3, -1, -1)] 87 | rect2 = [(corners2[i, 0], corners2[i, 2]) for i in range(3, -1, -1)] 88 | 89 | area1 = poly_area(np.array(rect1)[:, 0], np.array(rect1)[:, 1]) 90 | area2 = poly_area(np.array(rect2)[:, 0], np.array(rect2)[:, 1]) 91 | 92 | inter, inter_area = convex_hull_intersection(rect1, rect2) 93 | iou_2d = inter_area / (area1 + area2 - inter_area) 94 | ymax = min(corners1[0, 1], corners2[0, 1]) 95 | ymin = max(corners1[4, 1], corners2[4, 1]) 96 | 97 | inter_vol = inter_area * max(0.0, ymax - ymin) 98 | 99 | vol1 = box3d_vol(corners1) 100 | vol2 = box3d_vol(corners2) 101 | iou = inter_vol / (vol1 + vol2 - inter_vol) 102 | 103 | return iou, iou_2d 104 | 105 | # Helper functions -------------------------------------------- 106 | 107 | def poly_area(x,y): 108 | return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) 109 | 110 | def convex_hull_intersection(p1, p2): 111 | """ Compute area of two convex hull's intersection area. 112 | p1,p2 are a list of (x,y) tuples of hull vertices. 113 | return a list of (x,y) for the intersection and its volume 114 | """ 115 | inter_p = polygon_clip(p1, p2) 116 | if inter_p is not None: 117 | hull_inter = ConvexHull(inter_p) 118 | return inter_p, hull_inter.volume 119 | else: 120 | return None, 0.0 121 | 122 | def box3d_vol(corners): 123 | 124 | ''' corners: (8,3) no assumption on axis direction ''' 125 | 126 | a = torch.sqrt(torch.sum((corners[0,:] - corners[1,:])**2)) 127 | b = torch.sqrt(torch.sum((corners[1,:] - corners[2,:])**2)) 128 | c = torch.sqrt(torch.sum((corners[0,:] - corners[4,:])**2)) 129 | return a*b*c 130 | 131 | 132 | def polygon_clip(subjectPolygon, clipPolygon): 133 | """ Clip a polygon with another polygon. 134 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 135 | Args: 136 | subjectPolygon: a list of (x,y) 2d points, any polygon. 137 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 138 | Note: 139 | **points have to be counter-clockwise ordered** 140 | Return: 141 | a list of (x,y) vertex point for the intersection polygon. 142 | """ 143 | 144 | def inside(p): 145 | return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0]) 146 | 147 | def computeIntersection(): 148 | dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]] 149 | dp = [s[0] - e[0], s[1] - e[1]] 150 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 151 | n2 = s[0] * e[1] - s[1] * e[0] 152 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 153 | return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3] 154 | 155 | outputList = subjectPolygon 156 | cp1 = clipPolygon[-1] 157 | 158 | for clipVertex in clipPolygon: 159 | cp2 = clipVertex 160 | inputList = outputList 161 | outputList = [] 162 | s = inputList[-1] 163 | 164 | for subjectVertex in inputList: 165 | e = subjectVertex 166 | if inside(e): 167 | if not inside(s): 168 | outputList.append(computeIntersection()) 169 | outputList.append(e) 170 | elif inside(s): 171 | outputList.append(computeIntersection()) 172 | s = e 173 | cp1 = cp2 174 | if len(outputList) == 0: 175 | return None 176 | return (outputList) 177 | 178 | def sec_to_hm_str(t): 179 | """Convert time in seconds to a nice string 180 | e.g. 10239 -> '02h50m39s' 181 | """ 182 | h, m, s = sec_to_hm(t) 183 | return "{:02d}h{:02d}m{:02d}s".format(h, m, s) 184 | 185 | 186 | def sec_to_hm(t): 187 | """Convert time in seconds to time in hours, minutes and seconds 188 | e.g. 10239 -> (2, 50, 39) 189 | """ 190 | t = int(t) 191 | s = t % 60 192 | t //= 60 193 | m = t % 60 194 | t //= 60 195 | return t, m, s 196 | 197 | def convert_voxel_to_pc(voxel_grid, rot, trans, scale): 198 | ''' 199 | Converts a voxel grid to a point cloud with according pose 200 | voxel_grid: 32x32x32 tensor binary 201 | rot, trans, scale: output from run pose function 202 | returns pc: n x 3 array 203 | ''' 204 | 205 | nonzero_inds = np.nonzero(voxel_grid)[:-1] 206 | points = nonzero_inds / 32 - 0.5 207 | points = points.detach().cpu().numpy() 208 | 209 | global_scalerot = (np.identity(3) * scale.copy()) @ rot 210 | world_pc = global_scalerot @ points.transpose() + np.expand_dims(trans.copy(), axis=-1) 211 | world_pc = world_pc.transpose() 212 | 213 | return world_pc 214 | -------------------------------------------------------------------------------- /Detection/train_net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os, sys, shutil 3 | import torch 4 | import roi_heads #Required for call register() 5 | from collections import OrderedDict 6 | import detectron2.utils.comm as comm 7 | from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer 8 | 9 | from detectron2.engine import default_argument_parser, default_writers, launch 10 | from detectron2.evaluation import print_csv_format 11 | 12 | from detectron2.engine import DefaultTrainer 13 | from detectron2.data import build_detection_test_loader, build_detection_train_loader 14 | 15 | from detectron2.modeling import build_model 16 | from detectron2.solver import build_lr_scheduler, build_optimizer 17 | from detectron2.utils.events import EventStorage 18 | 19 | from register_dataset import RegisterDataset 20 | from data.mapper_heads import VoxNocsMapper 21 | from evaluator.FrontEvaluator import FrontEvaluator 22 | from evaluator.CocoEvaluator import COCOEvaluator 23 | from evaluator.EvaluatorUtils import inference_on_dataset_voxnocs, inference_on_dataset_coco 24 | from Utility.analyse_datset import get_dataset_info 25 | from cfg_setup import init_cfg 26 | 27 | sys.path.append('..') #Hack add ROOT DIR 28 | from baseconfig import CONF 29 | 30 | 31 | logger = logging.getLogger("front_logger") 32 | 33 | 34 | class FrontTrainer(DefaultTrainer): 35 | ''' 36 | Main Detectron2 MOTFront network training class 37 | ''' 38 | 39 | @classmethod 40 | def build_evaluator_coco(cls, cfg, dataset_name, output_folder=None): 41 | if output_folder is None: 42 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 43 | return COCOEvaluator(dataset_name, ('bbox', 'segm'), True, output_folder) 44 | 45 | @classmethod 46 | def build_evaluator_voxnocs(cls, cfg, dataset_name, output_folder=None): 47 | if output_folder is None: 48 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 49 | return FrontEvaluator(dataset_name, ('vox', 'nocs'), True, output_folder) 50 | 51 | @classmethod 52 | def build_fronttest_loader(cls, cfg): 53 | dataset_names = cfg.DATASETS.TEST[0] 54 | return build_detection_test_loader( 55 | cfg, dataset_names, mapper=VoxNocsMapper(cfg, is_train=False, dataset_names=dataset_names) 56 | ) 57 | 58 | @classmethod 59 | def build_train_loader(cls, cfg): 60 | dataset_names = cfg.DATASETS.TRAIN[0] 61 | return build_detection_train_loader( 62 | cfg, mapper=VoxNocsMapper(cfg, is_train=True, dataset_names=dataset_names) 63 | ) 64 | 65 | @classmethod 66 | def do_test(cls, cfg, model, save_img_pred=False): 67 | print('Evaluation starts...') 68 | results = OrderedDict() 69 | 70 | for dataset_name in cfg.DATASETS.TEST: 71 | 72 | data_loader = cls.build_fronttest_loader(cfg) 73 | evaluator_voxnocs = cls.build_evaluator_voxnocs(cfg, dataset_name) 74 | results_voxnocs = inference_on_dataset_voxnocs(model, data_loader, evaluator_voxnocs, logger, cfg, save_img_pred) 75 | 76 | evaluator_coco = cls.build_evaluator_coco(cfg, dataset_name) 77 | results_coco = inference_on_dataset_coco(model, data_loader, evaluator_coco, logger) 78 | 79 | results_coco['voxel'] = results_voxnocs['voxel'] 80 | results_coco['nocs'] = results_voxnocs['nocs'] 81 | 82 | results[dataset_name] = results_coco 83 | if comm.is_main_process(): 84 | assert isinstance(results_coco, dict), "Evaluator must return a dict on the main process. Got {} instead.".format(results_coco) 85 | logger.info("Evaluation results for {} in csv format:".format(dataset_name)) 86 | print_csv_format(results_coco) 87 | 88 | if len(results) == 1: 89 | results = list(results.values())[0] 90 | return results 91 | 92 | @classmethod 93 | def do_train(cls, cfg, model, resume=False): 94 | print('Training starts...') 95 | model.train() 96 | optimizer = build_optimizer(cfg, model) 97 | scheduler = build_lr_scheduler(cfg, optimizer) 98 | 99 | checkpointer = DetectionCheckpointer( 100 | model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler 101 | ) 102 | start_iter = ( 103 | checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 104 | ) 105 | max_iter = cfg.SOLVER.MAX_ITER 106 | 107 | periodic_checkpointer = PeriodicCheckpointer( 108 | checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter 109 | ) 110 | 111 | writers = default_writers(cfg.OUTPUT_DIR, max_iter) if comm.is_main_process() else [] 112 | 113 | data_loader = cls.build_train_loader(cfg) 114 | logger.info("Starting training from iteration {}".format(start_iter)) 115 | with EventStorage(start_iter) as storage: 116 | for data, iteration in zip(data_loader, range(start_iter, max_iter)): 117 | storage.iter = iteration 118 | 119 | loss_dict = model(data) 120 | 121 | losses = sum(loss_dict.values()) 122 | 123 | if (iteration + 1) % 100 == 0: 124 | print('Iteration ', iteration+1,' of ', max_iter, ' , Training Loss: ', losses.detach().cpu().item()) 125 | 126 | assert torch.isfinite(losses).all(), loss_dict 127 | 128 | loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} 129 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 130 | if comm.is_main_process(): 131 | storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) 132 | 133 | optimizer.zero_grad() 134 | losses.backward() 135 | optimizer.step() 136 | storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) 137 | scheduler.step() 138 | 139 | if (cfg.TEST.EVAL_PERIOD > 0 and (iteration + 1) % (cfg.TEST.IMG_SAVE_FREQ * cfg.TEST.EVAL_PERIOD) == 0 and iteration != max_iter - 1 and (iteration+1) >= cfg.TEST.START_EVAL): 140 | cls.do_test(cfg, model, save_img_pred=True) 141 | comm.synchronize() 142 | elif (cfg.TEST.EVAL_PERIOD > 0 and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0 and iteration != max_iter - 1 and (iteration+1) >= cfg.TEST.START_EVAL): 143 | cls.do_test(cfg, model, save_img_pred=False) 144 | comm.synchronize() 145 | 146 | if iteration - start_iter > 5 and ((iteration + 1) % 20 == 0 or iteration == max_iter - 1): 147 | for writer in writers: 148 | writer.write() 149 | periodic_checkpointer.step(iteration) 150 | 151 | ## ------------------------------ Static Functions -------------------------------------------------------------------- 152 | def setup(): 153 | TRAIN_IMG_DIR = CONF.PATH.DETECTTRAIN 154 | mapping_list, name_list = get_dataset_info(TRAIN_IMG_DIR) 155 | mapping_list, name_list = zip(*sorted(zip(mapping_list, name_list))) 156 | 157 | num_classes = len(mapping_list) 158 | cfg = init_cfg(num_classes) 159 | return cfg, mapping_list, name_list 160 | 161 | 162 | def main(args): 163 | cfg, mapping_list, name_list = setup() 164 | print('Existing Classes :', name_list) 165 | 166 | register_cls = RegisterDataset(mapping_list, name_list) 167 | register_cls.reg_dset() 168 | 169 | # Visualise annotations for debugging 170 | # register_cls.eval_annotation() 171 | 172 | # Remove old files 173 | if os.path.exists(CONF.PATH.DETECTOUTPUT): 174 | print('Removing old outputs ...') 175 | shutil.rmtree(CONF.PATH.DETECTOUTPUT) 176 | 177 | os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) 178 | 179 | model = build_model(cfg) 180 | logger.info("Model:\n{}".format(model)) 181 | if args.eval_only: 182 | print('ONLY EVALUATION') 183 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 184 | cfg.MODEL.WEIGHTS, resume=args.resume 185 | ) 186 | return FrontTrainer.do_test(cfg, model, False) 187 | 188 | FrontTrainer.do_train(cfg, model, resume=args.resume) 189 | 190 | 191 | if __name__ == "__main__": 192 | args = default_argument_parser().parse_args() 193 | print("Command Line Args:", args) 194 | launch( 195 | main, 196 | args.num_gpus, 197 | num_machines=args.num_machines, 198 | machine_rank=args.machine_rank, 199 | dist_url=args.dist_url, 200 | args=(args,), 201 | ) 202 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: 3dmot 2 | channels: 3 | - pyg 4 | - anaconda 5 | - pytorch 6 | - conda-forge 7 | - defaults 8 | dependencies: 9 | - _libgcc_mutex=0.1=main 10 | - _openmp_mutex=4.5=1_gnu 11 | - binutils_impl_linux-64=2.31.1=h6176602_1 12 | - binutils_linux-64=2.31.1=h6176602_9 13 | - blas=1.0=mkl 14 | - blosc=1.21.0=h8c45485_0 15 | - brotli=1.0.9=he6710b0_2 16 | - brotlipy=0.7.0=py38h497a2fe_1001 17 | - brunsli=0.1=h2531618_0 18 | - bzip2=1.0.8=h7b6447c_0 19 | - c-ares=1.18.1=h7f8727e_0 20 | - ca-certificates=2021.10.8=ha878542_0 21 | - certifi=2021.10.8=py38h578d9bd_1 22 | - cfitsio=3.470=hf0d0db6_6 23 | - charls=2.2.0=h2531618_0 24 | - charset-normalizer=2.0.10=pyhd8ed1ab_0 25 | - cloudpickle=2.0.0=pyhd3eb1b0_0 26 | - colorama=0.4.4=pyh9f0ad1d_0 27 | - cryptography=35.0.0=py38ha5dfef3_0 28 | - cudatoolkit=10.2.89=hfd86e86_1 29 | - cytoolz=0.11.0=py38h7b6447c_0 30 | - dask-core=2021.10.0=pyhd3eb1b0_0 31 | - ffmpeg=4.3=hf484d3e_0 32 | - fonttools=4.25.0=pyhd3eb1b0_0 33 | - freetype=2.11.0=h70c0345_0 34 | - fsspec=2022.1.0=pyhd3eb1b0_0 35 | - gcc_impl_linux-64=7.3.0=habb00fd_1 36 | - gcc_linux-64=7.3.0=h553295d_9 37 | - giflib=5.2.1=h7b6447c_0 38 | - gmp=6.2.1=h2531618_2 39 | - gnutls=3.6.15=he1e5248_0 40 | - googledrivedownloader=0.4=pyhd3deb0d_1 41 | - gxx_impl_linux-64=7.3.0=hdf63c60_1 42 | - gxx_linux-64=7.3.0=h553295d_9 43 | - imagecodecs=2021.8.26=py38h4cda21f_0 44 | - imageio=2.9.0=pyhd3eb1b0_0 45 | - intel-openmp=2021.4.0=h06a4308_3561 46 | - jpeg=9d=h7f8727e_0 47 | - jxrlib=1.1=h7b6447c_2 48 | - kiwisolver=1.3.1=py38h2531618_0 49 | - krb5=1.19.2=hac12032_0 50 | - lame=3.100=h7b6447c_0 51 | - lcms2=2.12=h3be6417_0 52 | - ld_impl_linux-64=2.33.1=h53a641e_7 53 | - lerc=3.0=h295c915_0 54 | - libaec=1.0.4=he6710b0_1 55 | - libcurl=7.80.0=h0b77cf5_0 56 | - libdeflate=1.8=h7f8727e_5 57 | - libedit=3.1.20210910=h7f8727e_0 58 | - libev=4.33=h7f8727e_1 59 | - libffi=3.3=he6710b0_2 60 | - libgcc=7.2.0=h69d50b8_2 61 | - libgcc-ng=9.3.0=h5101ec6_17 62 | - libgfortran-ng=7.5.0=ha8ba4b0_17 63 | - libgfortran4=7.5.0=ha8ba4b0_17 64 | - libgomp=9.3.0=h5101ec6_17 65 | - libiconv=1.15=h63c8f33_5 66 | - libidn2=2.3.2=h7f8727e_0 67 | - libnghttp2=1.46.0=hce63b2e_0 68 | - libopenblas=0.3.2=h5a2b251_1 69 | - libpng=1.6.37=hbc83047_0 70 | - libssh2=1.9.0=h1ba5d50_1 71 | - libstdcxx-ng=9.3.0=hd4cf53a_17 72 | - libtasn1=4.16.0=h27cfd23_0 73 | - libtiff=4.2.0=h85742a9_0 74 | - libunistring=0.9.10=h27cfd23_0 75 | - libuv=1.40.0=h7b6447c_0 76 | - libwebp=1.2.0=h89dd481_0 77 | - libwebp-base=1.2.0=h27cfd23_0 78 | - libzopfli=1.0.3=he6710b0_0 79 | - locket=0.2.1=py38h06a4308_1 80 | - lz4-c=1.9.3=h295c915_1 81 | - matplotlib-base=3.5.0=py38h3ed280b_0 82 | - mkl=2021.4.0=h06a4308_640 83 | - mkl-service=2.4.0=py38h7f8727e_0 84 | - mkl_fft=1.3.1=py38hd3c417c_0 85 | - mkl_random=1.2.2=py38h51133e4_0 86 | - munkres=1.1.4=py_0 87 | - ncurses=6.3=h7f8727e_2 88 | - nettle=3.7.3=hbbd107a_1 89 | - networkx=2.6.3=pyhd3eb1b0_0 90 | - numpy=1.21.2=py38h20f2e39_0 91 | - numpy-base=1.21.2=py38h79a1101_0 92 | - olefile=0.46=pyhd3eb1b0_0 93 | - openblas-devel=0.3.2=0 94 | - openh264=2.1.1=h4ff587b_0 95 | - openjpeg=2.4.0=h3ad879b_0 96 | - openssl=1.1.1m=h7f8727e_0 97 | - partd=1.2.0=pyhd3eb1b0_0 98 | - pillow=8.4.0=py38h5aabda8_0 99 | - pip=21.2.4=py38h06a4308_0 100 | - pycparser=2.21=pyhd8ed1ab_0 101 | - pyg=2.0.3=py38_torch_1.10.0_cu102 102 | - pyopenssl=21.0.0=pyhd8ed1ab_0 103 | - pyparsing=3.0.4=pyhd3eb1b0_0 104 | - pysocks=1.7.1=py38h578d9bd_4 105 | - python=3.8.12=h12debd9_0 106 | - python-dateutil=2.8.2=pyhd3eb1b0_0 107 | - python-louvain=0.15=pyhd3deb0d_0 108 | - python_abi=3.8=2_cp38 109 | - pytorch=1.10.1=py3.8_cuda10.2_cudnn7.6.5_0 110 | - pytorch-cluster=1.5.9=py38_torch_1.10.0_cu102 111 | - pytorch-mutex=1.0=cuda 112 | - pytorch-scatter=2.0.9=py38_torch_1.10.0_cu102 113 | - pytorch-sparse=0.6.12=py38_torch_1.10.0_cu102 114 | - pytorch-spline-conv=1.2.1=py38_torch_1.10.0_cu102 115 | - pywavelets=1.1.1=py38h7b6447c_2 116 | - readline=8.1.2=h7f8727e_1 117 | - scikit-image=0.18.1=py38ha9443f7_0 118 | - scikit-learn=1.0.2=py38h51133e4_1 119 | - setuptools=58.0.4=py38h06a4308_0 120 | - six=1.16.0=pyhd3eb1b0_0 121 | - snappy=1.1.8=he6710b0_0 122 | - sqlite=3.37.0=hc218d9a_0 123 | - tk=8.6.11=h1ccaba5_0 124 | - toolz=0.11.2=pyhd3eb1b0_0 125 | - torchaudio=0.10.1=py38_cu102 126 | - torchvision=0.11.2=py38_cu102 127 | - typing_extensions=3.10.0.2=pyh06a4308_0 128 | - wheel=0.37.1=pyhd3eb1b0_0 129 | - xz=5.2.5=h7b6447c_0 130 | - yaml=0.2.5=h7b6447c_0 131 | - zfp=0.5.5=h295c915_6 132 | - zlib=1.2.11=h7f8727e_4 133 | - zstd=1.4.9=haebb681_0 134 | - pip: 135 | - absl-py==0.12.0 136 | - addict==2.4.0 137 | - antlr4-python3-runtime==4.8 138 | - anyio==3.2.1 139 | - appdirs==1.4.4 140 | - argon2-cffi==20.1.0 141 | - async-generator==1.10 142 | - attrs==20.3.0 143 | - babel==2.9.1 144 | - backcall==0.2.0 145 | - black==21.4b2 146 | - bleach==3.3.0 147 | - block-timer==0.2.0 148 | - cachetools==4.2.2 149 | - cffi==1.14.5 150 | - chamferdist==1.0.0 151 | - click==8.0.3 152 | - cycler==0.10.0 153 | - cython==0.29.23 154 | - defusedxml==0.7.1 155 | - deprecation==2.1.0 156 | - detectron2==0.6+cu102 157 | - dnspython==2.2.0 158 | - dvis==0.8.2.2 159 | - easydict==1.9 160 | - entrypoints==0.3 161 | - eulerangles==1.0.2 162 | - eventlet==0.33.0 163 | - flask==1.1.2 164 | - flask-socketio==4.3.0 165 | - future==0.18.2 166 | - fvcore==0.1.5.post20220212 167 | - google-auth==1.30.0 168 | - google-auth-oauthlib==0.4.4 169 | - greenlet==1.1.2 170 | - grpcio==1.37.0 171 | - h5py==3.2.1 172 | - hydra-core==1.1.1 173 | - idna==2.10 174 | - importlib-resources==5.4.0 175 | - iopath==0.1.8 176 | - ipykernel==5.5.3 177 | - ipython==7.22.0 178 | - ipython-genutils==0.2.0 179 | - ipywidgets==7.6.3 180 | - jedi==0.18.0 181 | - jinja2==2.11.3 182 | - joblib==1.0.1 183 | - json5==0.9.6 184 | - jsonpatch==1.32 185 | - jsonpointer==2.2 186 | - jsonschema==3.2.0 187 | - jupyter-client==6.1.12 188 | - jupyter-core==4.7.1 189 | - jupyter-packaging==0.10.3 190 | - jupyter-server==1.9.0 191 | - jupyterlab==3.0.16 192 | - jupyterlab-pygments==0.1.2 193 | - jupyterlab-server==2.6.0 194 | - jupyterlab-widgets==1.0.0 195 | - kornia==0.5.8 196 | - markdown==3.3.4 197 | - markupsafe==1.1.1 198 | - mathutils==2.81.2 199 | - matplotlib==3.4.1 200 | - minkowskiengine==0.5.4 201 | - mistune==0.8.4 202 | - motmetrics==1.2.0 203 | - mypy-extensions==0.4.3 204 | - natsort==7.1.1 205 | - nbclassic==0.3.1 206 | - nbclient==0.5.3 207 | - nbconvert==6.0.7 208 | - nbformat==5.1.3 209 | - nest-asyncio==1.5.1 210 | - ninja==1.10.2.3 211 | - notebook==6.3.0 212 | - oauthlib==3.1.0 213 | - omegaconf==2.1.1 214 | - open3d==0.10.0.0 215 | - opencv-python==4.5.3.56 216 | - packaging==20.9 217 | - pandas==1.3.0 218 | - pandocfilters==1.4.3 219 | - parso==0.8.2 220 | - pathspec==0.9.0 221 | - pexpect==4.8.0 222 | - pickleshare==0.7.5 223 | - plotly==5.2.1 224 | - portalocker==2.3.0 225 | - prometheus-client==0.10.1 226 | - prompt-toolkit==3.0.18 227 | - protobuf==3.15.8 228 | - ptyprocess==0.7.0 229 | - pyasn1==0.4.8 230 | - pyasn1-modules==0.2.8 231 | - pycocotools==2.0.2 232 | - pydot==1.4.2 233 | - pygments==2.8.1 234 | - pyrsistent==0.17.3 235 | - python-engineio==3.14.2 236 | - python-socketio==4.6.1 237 | - pytz==2021.1 238 | - pyyaml==5.4.1 239 | - pyzmq==22.0.3 240 | - regex==2022.1.18 241 | - requests==2.25.1 242 | - requests-oauthlib==1.3.0 243 | - requests-unixsocket==0.2.0 244 | - rsa==4.7.2 245 | - scipy==1.6.3 246 | - send2trash==1.5.0 247 | - simple-websocket==0.2.0 248 | - sniffio==1.2.0 249 | - tabulate==0.8.9 250 | - tenacity==8.0.1 251 | - tensorboard==2.5.0 252 | - tensorboard-data-server==0.6.0 253 | - tensorboard-plugin-wit==1.8.0 254 | - tensorboardx==2.2 255 | - termcolor==1.1.0 256 | - terminado==0.9.4 257 | - testpath==0.4.4 258 | - threadpoolctl==2.1.0 259 | - tifffile==2021.4.8 260 | - toml==0.10.2 261 | - tomlkit==0.7.2 262 | - torchfile==0.1.0 263 | - tornado==6.1 264 | - tqdm==4.60.0 265 | - traitlets==5.0.5 266 | - trescope==0.0.1 267 | - trimesh==3.9.20 268 | - urllib3==1.26.4 269 | - visdom==0.1.8.9 270 | - vision3d==0.5 271 | - wcwidth==0.2.5 272 | - webencodings==0.5.1 273 | - websocket-client==1.1.0 274 | - werkzeug==1.0.1 275 | - widgetsnbextension==3.5.1 276 | - wsproto==1.0.0 277 | - xmltodict==0.12.0 278 | - yacs==0.1.8 279 | - zipp==3.7.0 280 | prefix: /home/dominik/miniconda3/envs/3dmot 281 | -------------------------------------------------------------------------------- /Tracking/datasets/consec_graph_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | from torch_geometric.data import Data 4 | from torch_geometric.utils.undirected import to_undirected 5 | 6 | sys.path.append('..') #Hack add ROOT DIR 7 | from Tracking.utils.train_utils import check_pair 8 | 9 | 10 | class GraphDataset(): 11 | ''' 12 | Graph dataset class enables data handling for pytorch geometric graphs 13 | init_node_emb: voxel features, shape: num nodes x feature dim 14 | rotations, translations, scales, -> edge features, shape: num nodes x (3 or 1) 15 | instances_count: per image instances 16 | ''' 17 | 18 | def __init__(self, init_node_emb, rotations, translations, scales, input, instances_count, num_images=25): 19 | 20 | self.init_node_emb = init_node_emb 21 | self.rotations = rotations 22 | self.translations = translations 23 | self.scales = scales 24 | self.input = input 25 | self.instances_count = instances_count 26 | self.num_images = num_images 27 | self.box_iou_thres = 0.01 # Min IoU threshold GT and predicted 3D box 28 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 29 | 30 | 31 | def get_edge_data(self, is_undirected=True): 32 | ''' 33 | Get edge attributes and according edge indicies 34 | Currently directed graph and only consecutive frames connected 35 | is_undirected: Graph nodes connected both ways 0-1, 1-0, duplicate idxs, edge features and targets 36 | ''' 37 | 38 | relative_scales = [] 39 | relative_rotations = [] 40 | relative_positions = [] 41 | relative_times = [] 42 | 43 | img_inst_count = 0 44 | edge_idxs = [] 45 | 46 | # Validation data 47 | vis_idxs = [] 48 | false_positives = 0 49 | targets = [] 50 | node_color = [] 51 | 52 | for t in range(self.num_images - 1): 53 | 54 | gt_bbox_1 = self.input[t]['gt_3Dbbox'] # num inst x 8 pts x xyz 55 | gt_bbox_2 = self.input[t+1]['gt_3Dbbox'] 56 | 57 | gt_id_1 = self.input[t]['gt_object_id'] # num inst 58 | gt_id_2 = self.input[t+1]['gt_object_id'] 59 | 60 | pred_bbox_1 = self.input[t]['pred_3Dbbox'] # num inst x 8pts x xyz 61 | pred_bbox_2 = self.input[t+1]['pred_3Dbbox'] 62 | 63 | start_inst_count = img_inst_count # start count instances frame t 64 | img_inst_count += self.instances_count[t] # start count instances frame t+1 65 | consecutive_inst_count = img_inst_count + self.instances_count[t+1] 66 | 67 | for n in range(start_inst_count, img_inst_count): 68 | 69 | # Object Matching frame t 70 | try: 71 | obj_id_1 = check_pair(pred_bbox_1[n-start_inst_count, :, :], gt_bbox_1, gt_id_1, 72 | thres=self.box_iou_thres) 73 | except: 74 | obj_id_1 = None 75 | print('Issue with convex hull', ', Bad scene:', input[0]['scene']) 76 | 77 | if obj_id_1 is None: 78 | false_positives += 1 # No overlapping GT bounding box found 79 | node_color.append(1) 80 | continue # SKIP THIS INSTANCE FOR GRAPH CONSTRUCTION 81 | else: 82 | node_color.append(0) 83 | 84 | for m in range(img_inst_count, consecutive_inst_count): # n0-m0 n0-m1 n1-m0 n1-m1 .... 85 | 86 | # Object Matching frame t+1 87 | try: 88 | obj_id_2 = check_pair(pred_bbox_2[m-img_inst_count, :, :], gt_bbox_2, gt_id_2, thres=self.box_iou_thres) 89 | except: 90 | obj_id_2 = None 91 | print('Issue with convex hull', ', Bad scene:', input[0]['scene']) 92 | 93 | # ONLY FOR LAST FRAME WHICH ISNT COVERED IN OUTER LOOP ADD FP 94 | if t == self.num_images - 2 and n == img_inst_count - 1: 95 | if obj_id_2 is None: 96 | false_positives += 1 97 | node_color.append(1) 98 | else: 99 | node_color.append(0) 100 | 101 | # GT targets: active (1) and non-active (0) connections 102 | if obj_id_1 == obj_id_2 and obj_id_1 is not None and obj_id_2 is not None: # both objects exist and same id 103 | target = 1 104 | elif obj_id_1 != obj_id_2 and obj_id_1 is not None and obj_id_2 is not None: 105 | target = 0 106 | elif obj_id_2 is None: # false prediction for any object in consecutive frame -> exclude 107 | continue 108 | 109 | vis_idxs.append({'image': t, 'obj_1': n, 'obj_2': m, 'obj_id_1': int(obj_id_1), 110 | 'obj_id_2': int(obj_id_2)}) 111 | 112 | if is_undirected: 113 | targets.append(target) # obj1 img1 with all obj img2, obj2 img1 with all obj img2 ... per sequence 114 | targets.append(target) # twice for undirected 0-1 and 1-0 115 | else: 116 | targets.append(target) 117 | # Edge feature construction 118 | edge_idxs.append([n, m]) # 0 - 1 119 | 120 | relative_scale = torch.unsqueeze(torch.log(self.scales[m, :] / self.scales[n, :]), 121 | dim=0) # feat t+1 / feat t 122 | relative_scales.append(relative_scale) 123 | relative_position = torch.unsqueeze(self.translations[m, :] - self.translations[n, :], dim=0) 124 | relative_positions.append(relative_position) 125 | relative_rot = torch.unsqueeze(self.rotations[m, :] - self.rotations[n, :], dim=0) 126 | relative_rotations.append(relative_rot) 127 | relative_time = torch.unsqueeze(torch.tensor([t + 1 - t], dtype=torch.int64), 128 | dim=0) # always 1 for consecutive frames 129 | relative_times.append(relative_time) 130 | # relative_appearance -> could be also an edge feature but is already encoded in the node 131 | 132 | if is_undirected: 133 | edge_idxs.append([m, n]) # 1 - 0 134 | 135 | relative_scale = torch.unsqueeze(torch.log(self.scales[m, :] / self.scales[n, :]), 136 | dim=0) # feat t / feat t+1 137 | relative_scales.append(relative_scale) 138 | relative_position = torch.unsqueeze(self.translations[m, :] - self.translations[n, :], dim=0) 139 | relative_positions.append(relative_position) 140 | relative_rot = torch.unsqueeze(self.rotations[m, :] - self.rotations[n, :], dim=0) 141 | relative_rotations.append(relative_rot) 142 | relative_time = torch.unsqueeze(torch.tensor([t + 1 - t], dtype=torch.int64), 143 | dim=0) # always 1 for consecutive frames 144 | relative_times.append(relative_time) 145 | # relative_appearance -> could be also an edge feature but is already encoded in the node 146 | 147 | 148 | relative_scales = torch.cat(relative_scales, dim=0) # num_edges x 1 149 | relative_positions = torch.cat(relative_positions, dim=0) # num_edges x 3 150 | relative_rotations = torch.cat(relative_rotations, dim=0) # num_edges x 3 151 | relative_times = torch.cat(relative_times, dim=0) # num_edges x 1 152 | 153 | edge_attr = torch.cat((relative_positions, relative_rotations, relative_scales, relative_times), 154 | dim=-1).to(dtype=torch.float32, device=self.device) # Num edges x feat_dim 155 | edge_index = torch.tensor(edge_idxs, dtype=torch.long).t().contiguous().to(self.device) 156 | 157 | return edge_index, edge_attr, torch.tensor(targets, dtype=torch.float32, device=self.device), vis_idxs, false_positives, torch.tensor(node_color) 158 | 159 | def construct_batch_graph(self, is_undirected=True): 160 | ''' 161 | Returns batch graph data: x: Node Embeddings, shape: Num nodes x feature dim(9) 162 | edge_idx: Edge indicies, shape: 2 x Num edges 163 | edge_attr: Edge features, shape: Num edges x feature dim(8) 164 | y: targets, shape: Num edges 165 | ''' 166 | 167 | edge_idx, edge_attr, targets, vis_idxs, false_positives, node_color = self.get_edge_data(is_undirected=is_undirected) 168 | batch_graph = Data(x=self.init_node_emb, edge_index=edge_idx, edge_attr=edge_attr, y=targets) 169 | 170 | return batch_graph, vis_idxs, false_positives, node_color -------------------------------------------------------------------------------- /Detection/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from typing import Dict 3 | import numpy as np 4 | import sys 5 | from detectron2.layers import ShapeSpec, cat 6 | from detectron2.modeling import ROI_HEADS_REGISTRY 7 | from detectron2.modeling.poolers import ROIPooler 8 | from detectron2.modeling.roi_heads.roi_heads import select_foreground_proposals, StandardROIHeads 9 | from detectron2.data import MetadataCatalog 10 | from detectron2.utils.registry import Registry 11 | from roi_heads.voxel_head import ( 12 | build_voxel_head, 13 | voxel_inference, 14 | voxel_loss, 15 | ) 16 | 17 | from roi_heads.nocs_head import ( 18 | build_nocs_head, 19 | nocs_inference, 20 | nocs_loss, 21 | ) 22 | 23 | import torch 24 | 25 | @ROI_HEADS_REGISTRY.register() 26 | class VoxelNocsHeads(StandardROIHeads): 27 | """ 28 | The ROI specific heads for Voxel and Nocs branch 29 | """ 30 | 31 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 32 | super().__init__(cfg, input_shape) 33 | self._init_voxel_head(cfg, input_shape) 34 | self._init_nocs_head(cfg, input_shape) 35 | self._misc = {} 36 | 37 | self.train_dataset_names = cfg.DATASETS.TRAIN[0] 38 | self.test_dataset_names = cfg.DATASETS.TEST[0] 39 | self.metadata = MetadataCatalog.get(self.train_dataset_names) 40 | if 'thing_classes' in self.metadata.as_dict(): 41 | self.class_mapping = self.metadata.thing_classes 42 | 43 | 44 | def _init_voxel_head(self, cfg, input_shape): 45 | 46 | self.voxel_on = cfg.MODEL.VOXEL_ON 47 | self.voxel_loss_weight = cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT 48 | 49 | if not self.voxel_on: 50 | return 51 | 52 | voxel_pooler_resolution = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_RESOLUTION 53 | voxel_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) 54 | voxel_sampling_ratio = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_SAMPLING_RATIO 55 | voxel_pooler_type = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_TYPE 56 | 57 | in_channels = [input_shape[f].channels for f in self.in_features][0] 58 | 59 | self.voxel_pooler = ROIPooler( 60 | output_size=voxel_pooler_resolution, 61 | scales=voxel_pooler_scales, 62 | sampling_ratio=voxel_sampling_ratio, 63 | pooler_type=voxel_pooler_type, 64 | ) 65 | shape = ShapeSpec( 66 | channels=in_channels, width=voxel_pooler_resolution, height=voxel_pooler_resolution 67 | ) 68 | self.voxel_head = build_voxel_head(cfg, shape) 69 | 70 | def _init_nocs_head(self, cfg, input_shape): 71 | 72 | self.nocs_on = cfg.MODEL.NOCS_ON 73 | self.nocs_loss_weight = cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT 74 | self.iou_threshold = cfg.MODEL.ROI_NOCS_HEAD.IOU_THRES 75 | self.use_bin_loss = cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS 76 | self.num_bins = cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS 77 | 78 | if not self.nocs_on: 79 | return 80 | nocs_pooler_resolution = cfg.MODEL.ROI_NOCS_HEAD.POOLER_RESOLUTION 81 | nocs_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) 82 | nocs_sampling_ratio = cfg.MODEL.ROI_NOCS_HEAD.POOLER_SAMPLING_RATIO 83 | nocs_pooler_type = cfg.MODEL.ROI_NOCS_HEAD.POOLER_TYPE 84 | 85 | 86 | in_channels = [input_shape[f].channels for f in self.in_features][0] 87 | 88 | self.nocs_pooler = ROIPooler( 89 | output_size=nocs_pooler_resolution, 90 | scales=nocs_pooler_scales, 91 | sampling_ratio=nocs_sampling_ratio, 92 | pooler_type=nocs_pooler_type, 93 | ) 94 | shape = ShapeSpec( 95 | channels=in_channels, width=nocs_pooler_resolution, height=nocs_pooler_resolution 96 | ) 97 | self.nocs_head = build_nocs_head(cfg, shape) 98 | 99 | def forward(self, images, features, proposals, targets=None): # targets imgs x instances 100 | """ 101 | See :class:`ROIHeads.forward`. 102 | """ 103 | 104 | instances, losses = super().forward(images, features, proposals, targets) # forward method for default heads (BBOX, MASK) #proposals N = batchsize 105 | del images, targets 106 | if self.training: 107 | losses.update(self._forward_voxel(features, instances)) # features input data mapping feature map name to tensor, axis 0 = N num images 108 | losses.update(self._forward_nocs(features, instances)) # features input data mapping feature map name to tensor, axis 0 = N num images 109 | return [], losses 110 | 111 | else: 112 | pred_instances = self.forward_with_given_boxes_voxnocs(features, instances) 113 | return pred_instances, {} 114 | 115 | 116 | def forward_with_given_boxes_voxnocs(self, features, instances): 117 | """ 118 | Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. 119 | Args: 120 | features: same as in `forward()` 121 | instances (list[Instances]): instances to predict other outputs. Expect the keys 122 | "pred_boxes" and "pred_classes" to exist. 123 | Returns: 124 | instances (Instances): the same `Instances` object, with extra 125 | fields such as `pred_masks` or `pred_voxels` or 'pred_nocs. 126 | """ 127 | assert not self.training 128 | #instances = super().forward_with_given_boxes(features, instances) 129 | 130 | assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") and instances[0].has("pred_masks") 131 | 132 | instances = self._forward_voxel(features, instances) 133 | instances = self._forward_nocs(features, instances) 134 | 135 | return instances 136 | 137 | def _forward_voxel(self, features, instances): 138 | """ 139 | Forward logic for the voxel branch. 140 | Args: 141 | features (list[Tensor]): #level input features for voxel prediction 142 | instances (list[Instances]): the per-image instances to train/predict meshes. 143 | In training, they can be the proposals. 144 | In inference, they can be the predicted boxes. 145 | Returns: 146 | In training, a dict of losses. 147 | In inference, update `instances` with new fields "pred_voxels" and return it. 148 | """ 149 | if not self.voxel_on: 150 | return {} if self.training else instances 151 | 152 | features = [features[f] for f in self.in_features] 153 | 154 | if self.training: 155 | # The loss is only defined on positive proposals. 156 | proposals, _ = select_foreground_proposals(instances, self.num_classes) 157 | proposal_boxes = [x.proposal_boxes for x in proposals] 158 | 159 | losses = {} 160 | if self.voxel_on: 161 | voxel_features = self.voxel_pooler(features, proposal_boxes) #M total number of boxes aggregated over all N batch images x 256 x 14 x 14 162 | voxel_logits = self.voxel_head(voxel_features) #Num objs x 1 x 32 x 32 x 32, zeros for empty detection 163 | src_boxes = cat([p.tensor for p in proposal_boxes]) # num obj x 4 format XYXY 164 | loss_voxel, _ = voxel_loss( 165 | voxel_logits, proposals, src_boxes, loss_weight=self.voxel_loss_weight, iou_thres=self.iou_threshold 166 | ) 167 | losses.update({"loss_voxel": loss_voxel}) 168 | 169 | return losses 170 | else: 171 | pred_boxes = [x.pred_boxes for x in instances] 172 | 173 | if self.voxel_on: 174 | 175 | voxel_features = self.voxel_pooler(features, pred_boxes) # BS x 256 x 14 x 14 176 | voxel_logits = self.voxel_head(voxel_features) 177 | voxel_inference(voxel_logits, instances) 178 | 179 | return instances 180 | 181 | def _forward_nocs(self, features, instances): 182 | """ 183 | Forward logic for the voxel branch. 184 | Args: 185 | features (list[Tensor]): #level input features for nocs prediction 186 | instances (list[Instances]): the per-image instances to train/predict nocs. 187 | In training, they can be the proposals. 188 | In inference, they can be the predicted boxes. 189 | Returns: 190 | In training, a dict of losses. 191 | In inference, update `instances` with new fields "pred_nocs" and return it. 192 | """ 193 | if not self.nocs_on: 194 | return {} if self.training else instances 195 | 196 | features = [features[f] for f in self.in_features] 197 | 198 | if self.training: 199 | # The loss is only defined on positive proposals. 200 | proposals, _ = select_foreground_proposals(instances, self.num_classes) 201 | proposal_boxes = [x.proposal_boxes for x in proposals] 202 | 203 | losses = {} 204 | if self.nocs_on: 205 | nocs_features = self.nocs_pooler(features, proposal_boxes) #M total number of boxes aggregated over all N batch images 206 | nocs_map_rgb = self.nocs_head(nocs_features) # num obj x 3 x 28 x 28 (l1), num obj x num bins x 3 x 28 x 28 (bin) 207 | src_boxes = cat([p.tensor for p in proposal_boxes]) #num obj x 4 format XYXY 208 | loss_nocs, _ = nocs_loss( 209 | nocs_map_rgb, proposals, src_boxes, loss_weight=self.nocs_loss_weight, iou_thres=self.iou_threshold, 210 | cls_mapping=self.class_mapping, use_bin_loss=self.use_bin_loss, num_bins=self.num_bins 211 | ) 212 | losses.update({"loss_nocs": loss_nocs}) 213 | 214 | return losses 215 | else: 216 | pred_boxes = [x.pred_boxes for x in instances] 217 | 218 | if self.nocs_on: 219 | 220 | nocs_features = self.nocs_pooler(features, pred_boxes) # BS x 256 x 14 x 14 221 | nocs_map_rgb = self.nocs_head(nocs_features) # BS x 3 x 28 x 28 (RGB) 222 | nocs_inference(nocs_map_rgb, instances, use_bin_loss=self.use_bin_loss, num_bins=self.num_bins) 223 | 224 | return instances 225 | -------------------------------------------------------------------------------- /Detection/inference/inference_utils.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import torch 3 | import numpy as np 4 | import os, sys, cv2 5 | import open3d as o3d 6 | import copy 7 | 8 | from sklearn.metrics import recall_score 9 | from sklearn.metrics import precision_score 10 | from sklearn.metrics import f1_score 11 | 12 | sys.path.append('..') #Hack add ROOT DIR 13 | 14 | from BlenderProc.utils import binvox_rw 15 | from baseconfig import CONF 16 | from Detection.inference.inference_metrics import get_mean_iou, get_median_iou 17 | from PoseEst.pose_estimation import backproject, cam2world, sort_bbox 18 | 19 | 20 | def get_scale(m): 21 | if type(m) == torch.Tensor: 22 | return m.norm(dim=0) 23 | return np.linalg.norm(m, axis=0) 24 | 25 | def transform_icp_points(source, transformation): 26 | ''' 27 | transforms source pc to align with target point cloud based on a learned icp transformation 28 | ''' 29 | source_temp = copy.deepcopy(source) 30 | return source_temp.transform(transformation) 31 | 32 | def draw_registration_result(source, target, transformation): 33 | ''' 34 | Visualise ICP Matching results 35 | ''' 36 | source_temp = copy.deepcopy(source) 37 | target_temp = copy.deepcopy(target) 38 | source_temp.paint_uniform_color([1, 0.706, 0]) 39 | target_temp.paint_uniform_color([0, 0.651, 0.929]) 40 | source_temp.transform(transformation) 41 | o3d.visualization.draw_geometries([source_temp, target_temp]) 42 | 43 | def construct_box(segpc, ax_aligned=False): 44 | ''' 45 | calculates 3D bounding box around segmentation pointcloud 46 | ''' 47 | if ax_aligned: 48 | bbox3d_obj = o3d.geometry.AxisAlignedBoundingBox() 49 | else: 50 | bbox3d_obj = o3d.geometry.OrientedBoundingBox() 51 | bbox_3d = bbox3d_obj.create_from_points(o3d.utility.Vector3dVector(segpc)) 52 | center_3d = bbox_3d.get_center() 53 | 54 | pred_box = sort_bbox(np.array(bbox_3d.get_box_points())) 55 | 56 | if not ax_aligned: 57 | scale = bbox_3d.extent 58 | rotation = bbox_3d.R 59 | cad2world = np.diag([0, 0, 0, 1]).astype(np.float32) 60 | cad2world[:3, :3] = np.diag(scale) @ rotation 61 | cad2world[:3, 3] = center_3d 62 | 63 | return torch.tensor(pred_box), center_3d, cad2world 64 | 65 | return torch.tensor(pred_box), center_3d 66 | 67 | def project_segmask_F2F(pred_bin_mask, abs_bbox, depth, intrinsics): 68 | ''' 69 | Projection segmask to pointcloud for F2F - MaskRCNN baseline 70 | ''' 71 | 72 | depth = np.array(depth, dtype=np.float32) # HxW 73 | 74 | # Zero pad depth image 75 | depth_pad = np.zeros((240, 320)) 76 | depth_pad[int(abs_bbox[1]):int(abs_bbox[3]), int(abs_bbox[0]):int(abs_bbox[2])] = depth[int(abs_bbox[1]):int(abs_bbox[3]), 77 | int(abs_bbox[0]):int(abs_bbox[2])] 78 | depth = depth_pad 79 | 80 | depth_pts, _ = backproject(depth, intrinsics, np.array(pred_bin_mask.cpu())) # depth in camera space 81 | 82 | return depth_pts 83 | 84 | def project_segmask(pred_bin_mask, abs_bbox, depth, campose): 85 | ''' 86 | Projection segmask to pointcloud for F2F - MaskRCNN baseline 87 | ''' 88 | 89 | depth = np.array(depth, dtype=np.float32) # HxW 90 | 91 | # Zero pad depth image 92 | depth_pad = np.zeros((240, 320)) 93 | depth_pad[int(abs_bbox[1]):int(abs_bbox[3]), int(abs_bbox[0]):int(abs_bbox[2])] = depth[int(abs_bbox[1]):int(abs_bbox[3]), 94 | int(abs_bbox[0]):int(abs_bbox[2])] 95 | depth = depth_pad 96 | 97 | img_width = depth.shape[1] 98 | img_height = depth.shape[0] 99 | cx = (img_width / 2) - 0.5 # 0,0 is center top-left pixel -> -0,5 100 | cy = (img_height / 2) - 0.5 # 0,0 is center top-left pixel -> -0,5 101 | fx = 292.87803547399 102 | fy = 292.87803547399 103 | intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 104 | 105 | depth_pts, _ = backproject(depth, intrinsics, np.array(pred_bin_mask.cpu())) # depth in camera space 106 | depth_world = cam2world(depth_pts, campose) 107 | 108 | return depth_world 109 | 110 | def convert_voxel_to_pc(voxel_grid, rot, trans, scale): 111 | ''' 112 | Converts a voxel grid to a point cloud with according pose 113 | voxel_grid: 32x32x32 tensor binary 114 | rot, trans, scale: output from run pose function 115 | scale already encoded in rotation 116 | returns pc: n x 3 array 117 | ''' 118 | 119 | nonzero_inds = np.nonzero(voxel_grid)[:-1] 120 | 121 | points = nonzero_inds / 32 - 0.5 122 | points = points.detach().cpu().numpy() 123 | 124 | #global_scalerot = (np.identity(3) * scale.copy()) @ rot 125 | world_pc = rot @ points.transpose() + np.expand_dims(trans.copy(), axis=-1) 126 | world_pc = world_pc.transpose() 127 | 128 | return world_pc 129 | 130 | def add_halfheight(location, box): 131 | ''' 132 | Object location z-center is at bottom, calculate half height of the object 133 | and add to shift z-center to correct location 134 | ''' 135 | z_coords = [] 136 | for pt in box: 137 | z = pt[-1] 138 | z_coords.append(z) 139 | z_coords = np.array(z_coords) 140 | half_height = np.abs(z_coords.max() - z_coords.min()) / 2 141 | location[-1] = half_height # Center location is at bottom object 142 | 143 | return location 144 | 145 | def load_hdf5(path): 146 | with h5py.File(path, 'r') as data: 147 | for key in data.keys(): 148 | if key == 'depth': 149 | depth = np.array(data[key]) 150 | elif key == 'campose': 151 | campose = np.array(data[key]) 152 | 153 | return depth, campose 154 | 155 | def get_nocs(nocs_path): 156 | ''' 157 | loads GT nocs image 158 | cv2.imread -1 for using all color depth values 159 | ''' 160 | 161 | nocs = cv2.imread(nocs_path, -1) #BGRA 162 | nocs = nocs[:,:,:3] 163 | nocs = np.array(nocs[:, :, ::-1], dtype=np.float32) # RGB 164 | 165 | return nocs 166 | 167 | def log_results(metrics): 168 | 169 | voxel_iou = [] 170 | chair_iou = [] 171 | table_iou = [] 172 | sofa_iou = [] 173 | bed_iou = [] 174 | tv_stand_iou = [] 175 | cooler_iou = [] 176 | night_stand_iou = [] 177 | distances = [] 178 | thetas = [] 179 | for seq in metrics: 180 | for img in seq: 181 | for key, value in img.items(): 182 | if key == 'voxel_ious': 183 | voxel_iou.append(value) 184 | elif key == 'chair_ious': 185 | chair_iou.append(value) 186 | elif key == 'table_ious': 187 | table_iou.append(value) 188 | elif key == 'sofa_ious': 189 | sofa_iou.append(value) 190 | elif key == 'bed_ious': 191 | bed_iou.append(value) 192 | elif key == 'tv_stand_ious': 193 | tv_stand_iou.append(value) 194 | elif key == 'cooler_ious': 195 | cooler_iou.append(value) 196 | elif key == 'night_stand_ious': 197 | night_stand_iou.append(value) 198 | elif key == 'pose_distance': 199 | for entity in value: 200 | distances.append(entity) 201 | elif key == 'pose_rotationdiff': 202 | for entity in value: 203 | thetas.append(entity) 204 | 205 | 206 | mean_voxel_iou = get_mean_iou(voxel_iou) 207 | mean_chair_iou = get_mean_iou(chair_iou) 208 | mean_table_iou = get_mean_iou(table_iou) 209 | mean_sofa_iou = get_mean_iou(sofa_iou) 210 | mean_bed_iou = get_mean_iou(bed_iou) 211 | mean_tv_iou = get_mean_iou(tv_stand_iou) 212 | mean_cooler_iou = get_mean_iou(cooler_iou) 213 | mean_night_iou = get_mean_iou(night_stand_iou) 214 | 215 | mean_rotation_diff = get_median_iou(thetas) 216 | mean_distance = get_median_iou(distances) 217 | 218 | print('Voxel_IoU :', mean_voxel_iou, ', Voxel_Chair_IoU :', mean_chair_iou, ', Voxel_Table_IoU :', mean_table_iou, 219 | ', Voxel_Sofa_IoU :', mean_sofa_iou, ', Voxel_Bed_IoU :', mean_bed_iou, 220 | ', Voxel_TVstand_IoU :', mean_tv_iou, ', Voxel_WineCooler_IoU :', mean_cooler_iou, 221 | ', Voxel_NightStand_IoU :', mean_night_iou, 222 | ', Rotation Difference [°] :', mean_rotation_diff, ', Location Difference [m] :', mean_distance ) 223 | 224 | def calculate_F2F_metrics(outputs): 225 | 226 | overall_gt_objects = 0 227 | overall_misses = 0 228 | overall_fps = 0 229 | overall_predictions = [] 230 | overall_targets = [] 231 | 232 | for seq in outputs: 233 | 234 | overall_gt_objects += seq['total_gt_objs'] 235 | overall_misses += seq['misses'] 236 | overall_fps += seq['false_positives'] 237 | overall_predictions.append(seq['prediction']) 238 | overall_targets.append(seq['target']) 239 | 240 | predictions = np.concatenate(overall_predictions) 241 | targets = np.concatenate(overall_targets) 242 | 243 | F1 = f1_score(targets, predictions, zero_division='warn') # warn only once 244 | Prec = precision_score(targets, predictions, zero_division=0) 245 | Rec = recall_score(targets, predictions, zero_division=0) 246 | 247 | id_switches = np.count_nonzero(targets - predictions) 248 | MOTA = 1.0 - (float(overall_misses + overall_fps + id_switches) / float(overall_gt_objects)) 249 | 250 | print('MOTA score :', MOTA, ', F1 score :', F1, ', Precision :', Prec, 251 | ', Recall :', Rec) 252 | 253 | def log_F2F_results(metrics): 254 | ''' 255 | F2F-MaskRCNN result logging 256 | metrics: list of sequences (tuple(MOTA,F1,Precision,Recall)) 257 | ''' 258 | 259 | overall_mota = [] 260 | overall_F1 = [] 261 | overall_precision = [] 262 | overall_recall = [] 263 | 264 | for seq in metrics: 265 | 266 | mota, f1, precision, recall = seq[0], seq[1], seq[2], seq[3] 267 | 268 | overall_mota.append(mota) 269 | overall_F1.append(f1) 270 | overall_precision.append(precision) 271 | overall_recall.append(recall) 272 | 273 | mean_mota = np.array(overall_mota).mean() 274 | mean_f1 = np.array(overall_F1).mean() 275 | mean_precision = np.array(overall_precision).mean() 276 | mean_recall = np.array(overall_recall).mean() 277 | 278 | print('MOTA score :', mean_mota, ', F1 score :', mean_f1, ', Precision :', mean_precision, 279 | ', Recall :', mean_recall) 280 | -------------------------------------------------------------------------------- /BlenderProc/utils/binvox_rw.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Daniel Maturana 2 | # This file is part of binvox-rw-py. 3 | # 4 | # binvox-rw-py is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # binvox-rw-py is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with binvox-rw-py. If not, see . 16 | # 17 | # Modified by Christopher B. Choy 18 | # for python 3 support 19 | 20 | """ 21 | Binvox to Numpy and back. 22 | 23 | 24 | >>> import numpy as np 25 | >>> import binvox_rw 26 | >>> with open('chair.binvox', 'rb') as f: 27 | ... m1 = binvox_rw.read_as_3d_array(f) 28 | ... 29 | >>> m1.dims 30 | [32, 32, 32] 31 | >>> m1.scale 32 | 41.133000000000003 33 | >>> m1.translate 34 | [0.0, 0.0, 0.0] 35 | >>> with open('chair_out.binvox', 'wb') as f: 36 | ... m1.write(f) 37 | ... 38 | >>> with open('chair_out.binvox', 'rb') as f: 39 | ... m2 = binvox_rw.read_as_3d_array(f) 40 | ... 41 | >>> m1.dims==m2.dims 42 | True 43 | >>> m1.scale==m2.scale 44 | True 45 | >>> m1.translate==m2.translate 46 | True 47 | >>> np.all(m1.data==m2.data) 48 | True 49 | 50 | >>> with open('chair.binvox', 'rb') as f: 51 | ... md = binvox_rw.read_as_3d_array(f) 52 | ... 53 | >>> with open('chair.binvox', 'rb') as f: 54 | ... ms = binvox_rw.read_as_coord_array(f) 55 | ... 56 | >>> data_ds = binvox_rw.dense_to_sparse(md.data) 57 | >>> data_sd = binvox_rw.sparse_to_dense(ms.data, 32) 58 | >>> np.all(data_sd==md.data) 59 | True 60 | >>> # the ordering of elements returned by numpy.nonzero changes with axis 61 | >>> # ordering, so to compare for equality we first lexically sort the voxels. 62 | >>> np.all(ms.data[:, np.lexsort(ms.data)] == data_ds[:, np.lexsort(data_ds)]) 63 | True 64 | """ 65 | 66 | import numpy as np 67 | 68 | class Voxels(object): 69 | """ Holds a binvox model. 70 | data is either a three-dimensional numpy boolean array (dense representation) 71 | or a two-dimensional numpy float array (coordinate representation). 72 | 73 | dims, translate and scale are the model metadata. 74 | 75 | dims are the voxel dimensions, e.g. [32, 32, 32] for a 32x32x32 model. 76 | 77 | scale and translate relate the voxels to the original model coordinates. 78 | 79 | To translate voxel coordinates i, j, k to original coordinates x, y, z: 80 | 81 | x_n = (i+.5)/dims[0] 82 | y_n = (j+.5)/dims[1] 83 | z_n = (k+.5)/dims[2] 84 | x = scale*x_n + translate[0] 85 | y = scale*y_n + translate[1] 86 | z = scale*z_n + translate[2] 87 | 88 | """ 89 | 90 | def __init__(self, data, dims, translate, scale, axis_order): 91 | self.data = data 92 | self.dims = dims 93 | self.translate = translate 94 | self.scale = scale 95 | assert (axis_order in ('xzy', 'xyz')) 96 | self.axis_order = axis_order 97 | 98 | def clone(self): 99 | data = self.data.copy() 100 | dims = self.dims[:] 101 | translate = self.translate[:] 102 | return Voxels(data, dims, translate, self.scale, self.axis_order) 103 | 104 | def write(self, fp): 105 | write(self, fp) 106 | 107 | def read_header(fp): 108 | """ Read binvox header. Mostly meant for internal use. 109 | """ 110 | line = fp.readline().strip() 111 | if not line.startswith(b'#binvox'): 112 | raise IOError('Not a binvox file') 113 | dims = [int(i) for i in fp.readline().strip().split(b' ')[1:]] 114 | translate = [float(i) for i in fp.readline().strip().split(b' ')[1:]] 115 | scale = [float(i) for i in fp.readline().strip().split(b' ')[1:]][0] 116 | line = fp.readline() 117 | return dims, translate, scale 118 | 119 | def read_as_3d_array(fp, fix_coords=True): 120 | """ Read binary binvox format as array. 121 | 122 | Returns the model with accompanying metadata. 123 | 124 | Voxels are stored in a three-dimensional numpy array, which is simple and 125 | direct, but may use a lot of memory for large models. (Storage requirements 126 | are 8*(d^3) bytes, where d is the dimensions of the binvox model. Numpy 127 | boolean arrays use a byte per element). 128 | 129 | Doesn't do any checks on input except for the '#binvox' line. 130 | """ 131 | dims, translate, scale = read_header(fp) 132 | raw_data = np.frombuffer(fp.read(), dtype=np.uint8) 133 | # if just using reshape() on the raw data: 134 | # indexing the array as array[i,j,k], the indices map into the 135 | # coords as: 136 | # i -> x 137 | # j -> z 138 | # k -> y 139 | # if fix_coords is true, then data is rearranged so that 140 | # mapping is 141 | # i -> x 142 | # j -> y 143 | # k -> z 144 | values, counts = raw_data[::2], raw_data[1::2] 145 | data = np.repeat(values, counts).astype(np.bool) 146 | data = data.reshape(dims) 147 | if fix_coords: 148 | # xzy to xyz TODO the right thing 149 | data = np.transpose(data, (0, 2, 1)) 150 | axis_order = 'xyz' 151 | else: 152 | axis_order = 'xzy' 153 | return Voxels(data, dims, translate, scale, axis_order) 154 | 155 | 156 | def read_as_coord_array(fp, fix_coords=True): 157 | """ Read binary binvox format as coordinates. 158 | 159 | Returns binvox model with voxels in a "coordinate" representation, i.e. an 160 | 3 x N array where N is the number of nonzero voxels. Each column 161 | corresponds to a nonzero voxel and the 3 rows are the (x, z, y) coordinates 162 | of the voxel. (The odd ordering is due to the way binvox format lays out 163 | data). Note that coordinates refer to the binvox voxels, without any 164 | scaling or translation. 165 | 166 | Use this to save memory if your model is very sparse (mostly empty). 167 | 168 | Doesn't do any checks on input except for the '#binvox' line. 169 | """ 170 | dims, translate, scale = read_header(fp) 171 | raw_data = np.frombuffer(fp.read(), dtype=np.uint8) 172 | 173 | values, counts = raw_data[::2], raw_data[1::2] 174 | 175 | sz = np.prod(dims) 176 | index, end_index = 0, 0 177 | end_indices = np.cumsum(counts) 178 | indices = np.concatenate(([0], end_indices[:-1])).astype(end_indices.dtype) 179 | 180 | values = values.astype(np.bool) 181 | indices = indices[values] 182 | end_indices = end_indices[values] 183 | 184 | nz_voxels = [] 185 | for index, end_index in zip(indices, end_indices): 186 | nz_voxels.extend(range(index, end_index)) 187 | nz_voxels = np.array(nz_voxels) 188 | # TODO are these dims correct? 189 | # according to docs, 190 | # index = x * wxh + z * width + y; // wxh = width * height = d * d 191 | 192 | x = nz_voxels / (dims[0]*dims[1]) 193 | zwpy = nz_voxels % (dims[0]*dims[1]) # z*w + y 194 | z = zwpy / dims[0] 195 | y = zwpy % dims[0] 196 | if fix_coords: 197 | data = np.vstack((x, y, z)) 198 | axis_order = 'xyz' 199 | else: 200 | data = np.vstack((x, z, y)) 201 | axis_order = 'xzy' 202 | 203 | #return Voxels(data, dims, translate, scale, axis_order) 204 | return Voxels(np.ascontiguousarray(data), dims, translate, scale, axis_order) 205 | 206 | def dense_to_sparse(voxel_data, dtype=np.int): 207 | """ From dense representation to sparse (coordinate) representation. 208 | No coordinate reordering. 209 | """ 210 | if voxel_data.ndim!=3: 211 | raise ValueError('voxel_data is wrong shape; should be 3D array.') 212 | return np.asarray(np.nonzero(voxel_data), dtype) 213 | 214 | def sparse_to_dense(voxel_data, dims, dtype=np.bool): 215 | if voxel_data.ndim!=2 or voxel_data.shape[0]!=3: 216 | raise ValueError('voxel_data is wrong shape; should be 3xN array.') 217 | if np.isscalar(dims): 218 | dims = [dims]*3 219 | dims = np.atleast_2d(dims).T 220 | # truncate to integers 221 | xyz = voxel_data.astype(np.int) 222 | # discard voxels that fall outside dims 223 | valid_ix = ~np.any((xyz < 0) | (xyz >= dims), 0) 224 | xyz = xyz[:,valid_ix] 225 | out = np.zeros(dims.flatten(), dtype=dtype) 226 | out[tuple(xyz)] = True 227 | return out 228 | 229 | #def get_linear_index(x, y, z, dims): 230 | #""" Assuming xzy order. (y increasing fastest. 231 | #TODO ensure this is right when dims are not all same 232 | #""" 233 | #return x*(dims[1]*dims[2]) + z*dims[1] + y 234 | 235 | def write(voxel_model, fp): 236 | """ Write binary binvox format. 237 | 238 | Note that when saving a model in sparse (coordinate) format, it is first 239 | converted to dense format. 240 | 241 | Doesn't check if the model is 'sane'. 242 | 243 | """ 244 | if voxel_model.data.ndim==2: 245 | # TODO avoid conversion to dense 246 | dense_voxel_data = sparse_to_dense(voxel_model.data, voxel_model.dims) 247 | else: 248 | dense_voxel_data = voxel_model.data 249 | 250 | fp.write(b'#binvox 1\n') 251 | fp.write(str.encode('dim '+' '.join(map(str, voxel_model.dims))+'\n')) 252 | fp.write(str.encode('translate '+' '.join(map(str, voxel_model.translate))+'\n')) 253 | fp.write(str.encode('scale '+str(voxel_model.scale)+'\n')) 254 | fp.write(b'data\n') 255 | if not voxel_model.axis_order in ('xzy', 'xyz'): 256 | raise ValueError('Unsupported voxel model axis order') 257 | 258 | if voxel_model.axis_order=='xzy': 259 | voxels_flat = dense_voxel_data.flatten() 260 | elif voxel_model.axis_order=='xyz': 261 | voxels_flat = np.transpose(dense_voxel_data, (0, 2, 1)).flatten() 262 | 263 | # keep a sort of state machine for writing run length encoding 264 | state = voxels_flat[0] 265 | ctr = 0 266 | for c in voxels_flat: 267 | if c==state: 268 | ctr += 1 269 | # if ctr hits max, dump 270 | if ctr==255: 271 | #fp.write(str.encode(chr(state))) 272 | #fp.write(str.encode(chr(ctr))) 273 | fp.write(str.encode(chr(state),encoding='latin-1')) 274 | fp.write(str.encode(chr(ctr),encoding='latin-1')) 275 | ctr = 0 276 | else: 277 | # if switch state, dump 278 | #fp.write(str.encode(chr(state))) 279 | #fp.write(str.encode(chr(ctr))) 280 | fp.write(str.encode(chr(state),encoding='latin-1')) 281 | fp.write(str.encode(chr(ctr),encoding='latin-1')) 282 | state = c 283 | ctr = 1 284 | # flush out remainders 285 | if ctr > 0: 286 | #fp.write(str.encode(chr(state))) 287 | #fp.write(str.encode(chr(ctr))) 288 | fp.write(str.encode(chr(state),encoding='latin-1')) 289 | fp.write(str.encode(chr(ctr),encoding='latin-1')) 290 | 291 | 292 | if __name__ == '__main__': 293 | import doctest 294 | doctest.testmod() 295 | -------------------------------------------------------------------------------- /Tracking/networks/mpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_scatter import scatter_mean, scatter_max, scatter_add 4 | 5 | from Tracking.networks.mlp import MLP 6 | 7 | 8 | class MetaLayer(nn.Module): # Single Layer of Message Passing Network 9 | 10 | def __init__(self, edge_model=None, node_model=None): 11 | super(MetaLayer, self).__init__() 12 | 13 | self.edge_model = edge_model 14 | self.node_model = node_model 15 | self.reset_parameters() 16 | 17 | def reset_parameters(self): 18 | for item in [self.node_model, self.edge_model]: 19 | if hasattr(item, 'reset_parameters'): 20 | item.reset_parameters() 21 | 22 | def forward(self, x, edge_index, edge_attr): 23 | """ 24 | Does a single node and edge feature vectors update. 25 | Args: 26 | x: node features matrix with shape [num_nodes, num_node_features] 27 | edge_index: tensor with shape [2, M], with M being the number of edges, indicating nonzero entries in the graph adjacency (i.e. edges) 28 | edge_attr: edge features matrix (ordered by edge_index) 29 | Returns: Updated Node and Edge Feature matrices 30 | """ 31 | row, col = edge_index # row = start node, col = destination node 32 | 33 | # Edge Update 34 | edge_attr = self.edge_model(x[row], x[col], edge_attr) # edge update with nodes i,j and edge ij 35 | 36 | # Node Update 37 | x = self.node_model(x, edge_index, edge_attr) # node i update with node i at t-1 and edge ij at t 38 | 39 | return x, edge_attr 40 | 41 | def __repr__(self): 42 | return '{}(edge_model={}, node_model={})'.format(self.__class__.__name__, self.edge_model, self.node_model) 43 | 44 | class EdgeModel(nn.Module): 45 | 46 | def __init__(self, edge_mlp): 47 | super(EdgeModel, self).__init__() 48 | self.edge_mlp = edge_mlp 49 | 50 | def forward(self, source, target, edge_attr): 51 | out = torch.cat([source, target, edge_attr], dim=1) 52 | return self.edge_mlp(out) 53 | 54 | class NodeModel(nn.Module): 55 | ''' 56 | try, except to avoid cuda error 57 | ''' 58 | 59 | def __init__(self, node_mlp, node_agg_fn): 60 | super(NodeModel, self).__init__() 61 | 62 | self.node_mlp = node_mlp 63 | self.node_agg_fn = node_agg_fn 64 | 65 | def forward(self, x, edge_index, edge_attr): 66 | 67 | row, col = edge_index 68 | 69 | message = self.node_agg_fn(edge_attr, row, x.size(0)) # node_i x edge_dim 70 | 71 | node_message = torch.cat([x, message], dim=1) 72 | return self.node_mlp(node_message) 73 | 74 | class TimeAwareNodeModel(nn.Module): 75 | """ 76 | Class used to peform the node update during Neural mwssage passing 77 | """ 78 | def __init__(self, flow_in_mlp, flow_out_mlp, node_mlp, node_agg_fn): 79 | super(TimeAwareNodeModel, self).__init__() 80 | 81 | self.flow_in_mlp = flow_in_mlp 82 | self.flow_out_mlp = flow_out_mlp 83 | self.node_mlp = node_mlp 84 | self.node_agg_fn = node_agg_fn 85 | 86 | def forward(self, x, edge_index, edge_attr): 87 | row, col = edge_index 88 | flow_out_mask = row < col 89 | flow_out_row, flow_out_col = row[flow_out_mask], col[flow_out_mask] 90 | flow_out_input = torch.cat([x[flow_out_col], edge_attr[flow_out_mask]], dim=1) 91 | flow_out = self.flow_out_mlp(flow_out_input) 92 | flow_out = self.node_agg_fn(flow_out, flow_out_row, x.size(0)) 93 | 94 | flow_in_mask = row > col 95 | flow_in_row, flow_in_col = row[flow_in_mask], col[flow_in_mask] 96 | flow_in_input = torch.cat([x[flow_in_col], edge_attr[flow_in_mask]], dim=1) 97 | flow_in = self.flow_in_mlp(flow_in_input) 98 | 99 | flow_in = self.node_agg_fn(flow_in, flow_in_row, x.size(0)) 100 | flow = torch.cat((flow_in, flow_out), dim=1) 101 | 102 | return self.node_mlp(flow) 103 | 104 | class MLPGraphIndependent(nn.Module): 105 | 106 | def __init__(self, edge_in_dim = None, edge_out_dim = None, edge_fc_dims = None, 107 | dropout_p = None, use_batchnorm = None, use_leaky_relu=False): 108 | super(MLPGraphIndependent, self).__init__() 109 | 110 | self.edge_mlp = MLP(input_dim=edge_in_dim, fc_dims=list(edge_fc_dims) + [edge_out_dim], 111 | dropout_p=dropout_p, use_batchnorm=use_batchnorm, use_leaky_relu=use_leaky_relu) 112 | 113 | def forward(self, edge_feats = None): 114 | 115 | out_edge_feats = self.edge_mlp(edge_feats) 116 | 117 | return out_edge_feats 118 | 119 | class MPGraph(nn.Module): 120 | """ 121 | Main Model Class. Contains all the components of the model. It consists of of several networks: 122 | Edge Encoder: MLP encodes initial edge embedding 123 | Edge MLP: Updates edge embedding with Nodes i, j and Edge ij 124 | Node MLP: Updates node embedding with Node i and Edge ij 125 | """ 126 | 127 | def __init__(self, model_params, time_aware_mp=False, use_leaky_relu=True): 128 | super(MPGraph, self).__init__() 129 | 130 | self.model_params = model_params 131 | if use_leaky_relu: 132 | self.relu = nn.LeakyReLU() 133 | else: 134 | self.relu = nn.ReLU() 135 | 136 | # Define Encoder Network 137 | encoder_feats_dict = model_params['encoder_feats_dict'] 138 | self.encoder = MLPGraphIndependent(edge_in_dim=encoder_feats_dict['edge_in_dim'], 139 | edge_fc_dims=encoder_feats_dict['edge_fc_dims'], 140 | edge_out_dim=encoder_feats_dict['edge_out_dim'], 141 | use_leaky_relu=use_leaky_relu) 142 | 143 | # Define the 'Core' message passing network (i.e. node and edge update models) 144 | self.MPNet = self._build_core_MPNet(model_params=model_params, encoder_feats_dict=encoder_feats_dict, time_aware_mp=time_aware_mp, use_leaky_relu=use_leaky_relu) 145 | self.num_mp_steps = model_params['num_mp_steps'] 146 | 147 | def _build_core_MPNet(self, model_params, encoder_feats_dict, time_aware_mp, use_leaky_relu=None): 148 | # Define an aggregation operator for nodes to 'gather' messages from incident edges 149 | node_agg_fn = model_params['node_agg_fn'] 150 | assert node_agg_fn.lower() in ('mean', 'max', 'sum'), "node_agg_fn can only be 'max', 'mean' or 'sum'." 151 | 152 | if node_agg_fn == 'mean': 153 | node_agg_fn = lambda out, row, x_size: scatter_mean(out, row, dim=0, dim_size=x_size) # out=source tensor, row=index to scatter, dim_size=same size as num nodes = x.0 154 | 155 | elif node_agg_fn == 'max': 156 | node_agg_fn = lambda out, row, x_size: scatter_max(out, row, dim=0, dim_size=x_size)[0] 157 | 158 | elif node_agg_fn == 'sum': 159 | node_agg_fn = lambda out, row, x_size: scatter_add(out, row, dim=0, dim_size=x_size) 160 | 161 | # Define all MLPs involved in the graph network 162 | self.reattach_initial_nodes = model_params['reattach_initial_nodes'] 163 | self.reattach_initial_edges = model_params['reattach_initial_edges'] 164 | 165 | edge_factor = 2 if self.reattach_initial_edges else 1 166 | node_factor = 2 if self.reattach_initial_nodes else 1 167 | 168 | edge_model_in_dim = node_factor * 2 * encoder_feats_dict['node_out_dim'] + edge_factor * encoder_feats_dict[ 169 | 'edge_out_dim'] # h_i, h_j, h_ij 170 | node_model_in_dim = node_factor * encoder_feats_dict['node_out_dim'] + encoder_feats_dict['edge_out_dim'] 171 | 172 | # Define all MLPs used within the MPN 173 | edge_model_feats_dict = model_params['edge_model_feats_dict'] 174 | node_model_feats_dict = model_params['node_model_feats_dict'] 175 | 176 | edge_mlp = MLP(input_dim=edge_model_in_dim, 177 | fc_dims=edge_model_feats_dict['fc_dims'], 178 | dropout_p=edge_model_feats_dict['dropout_p'], 179 | use_batchnorm=edge_model_feats_dict['use_batchnorm'], 180 | use_leaky_relu=use_leaky_relu) 181 | 182 | if time_aware_mp: 183 | 184 | node_mlp = MLP(input_dim=2 * encoder_feats_dict['node_out_dim'], 185 | fc_dims=node_model_feats_dict['fc_dims'], 186 | dropout_p=node_model_feats_dict['dropout_p'], 187 | use_batchnorm=node_model_feats_dict['use_batchnorm'], 188 | use_leaky_relu=use_leaky_relu) 189 | 190 | flow_in_mlp = MLP(input_dim=node_model_in_dim, 191 | fc_dims=node_model_feats_dict['fc_dims'], 192 | dropout_p=None, 193 | use_batchnorm=False, 194 | use_leaky_relu=use_leaky_relu) 195 | 196 | flow_out_mlp = MLP(input_dim=node_model_in_dim, 197 | fc_dims=node_model_feats_dict['fc_dims'], 198 | dropout_p=None, 199 | use_batchnorm=False, 200 | use_leaky_relu=use_leaky_relu) 201 | 202 | # Define all MLPs used within the MPN 203 | return MetaLayer(edge_model=EdgeModel(edge_mlp=edge_mlp), 204 | node_model=TimeAwareNodeModel(flow_in_mlp=flow_in_mlp, flow_out_mlp=flow_out_mlp, 205 | node_mlp=node_mlp, node_agg_fn=node_agg_fn)) 206 | 207 | else: 208 | 209 | node_mlp = MLP(input_dim=node_model_in_dim, 210 | fc_dims=node_model_feats_dict['fc_dims'], 211 | dropout_p=node_model_feats_dict['dropout_p'], 212 | use_batchnorm=node_model_feats_dict['use_batchnorm']) 213 | 214 | # Define all MLPs used within the MPN 215 | return MetaLayer(edge_model=EdgeModel(edge_mlp=edge_mlp), 216 | node_model=NodeModel(node_mlp=node_mlp, node_agg_fn=node_agg_fn)) 217 | 218 | 219 | 220 | def forward(self, data): 221 | """ 222 | Provides a fractional solution to the data association problem. 223 | First, node and edge features are independently encoded by the encoder network. Then, they are iteratively 224 | 'combined' for a fixed number of steps via the Message Passing Network (self.MPNet). 225 | """ 226 | 227 | x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr 228 | 229 | # Encoding features step 230 | latent_edge_feats = self.encoder(edge_feats=edge_attr) 231 | latent_node_feats = self.relu(x) 232 | #latent_node_feats = x 233 | initial_edge_feats = latent_edge_feats 234 | initial_node_feats = latent_node_feats 235 | 236 | outputs = [] 237 | 238 | # During training, the feature vectors that the MPNetwork outputs for the last self.num_class_steps message 239 | # passing steps are classified in order to compute the loss. 240 | for step in range(1, self.num_mp_steps + 1): 241 | 242 | # Reattach the initially encoded embeddings before the update 243 | if self.reattach_initial_edges: 244 | latent_edge_feats = torch.cat((initial_edge_feats, latent_edge_feats), dim=1) 245 | if self.reattach_initial_nodes: 246 | latent_node_feats = torch.cat((initial_node_feats, latent_node_feats), dim=1) 247 | 248 | # Message Passing Step 249 | latent_node_feats, latent_edge_feats = self.MPNet(latent_node_feats, edge_index, latent_edge_feats) 250 | 251 | if step > 1: # For classifying edges at multiple message passing step times 252 | outputs.append(latent_edge_feats) 253 | 254 | return outputs#[latent_edge_feats] 255 | -------------------------------------------------------------------------------- /Detection/data/mapper_heads.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | 6 | import copy 7 | import logging 8 | import numpy as np 9 | from typing import List, Optional, Union 10 | import torch 11 | import cv2 12 | import h5py 13 | 14 | from detectron2.config import configurable 15 | from detectron2.structures import BoxMode 16 | from detectron2.structures import polygons_to_bitmask 17 | from detectron2.utils.visualizer import GenericMask 18 | from detectron2.data import DatasetMapper 19 | 20 | from BlenderProc.utils import binvox_rw 21 | 22 | import detectron2.data.detection_utils as utils 23 | import detectron2.data.transforms as T 24 | 25 | 26 | sys.path.append('..') #Hack add ROOT DIR 27 | from Detection.utils.train_utils import crop_segmask, get_voxel 28 | 29 | __all__ = ["VoxNocsMapper", "VoxMapper"] 30 | 31 | class VoxNocsMapper: 32 | ''' 33 | Dataset mapper class to handle MOTFront data with a Detectron2 network training pipeline with Voxel and NOCs head 34 | ''' 35 | 36 | def __init__(self, cfg, use_instance_mask: bool = False, instance_mask_format: str = "polygon", 37 | recompute_boxes: bool = False, is_train=True, dataset_names=None,): 38 | 39 | if recompute_boxes: 40 | assert use_instance_mask, "recompute_boxes requires instance masks" 41 | self.is_train = is_train 42 | self.augmentations = None # list with augmentations NOT IMPLEMENTED YET 43 | self.cfg = cfg 44 | self.image_format = cfg.INPUT.FORMAT 45 | self.use_instance_mask = use_instance_mask 46 | self.instance_mask_format = instance_mask_format 47 | self.recompute_boxes = recompute_boxes 48 | # fmt: on 49 | logger = logging.getLogger(__name__) 50 | mode = "training" if is_train else "inference" 51 | logger.info(f"[DatasetMapper] Augmentations used in {mode}: {self.augmentations}") 52 | 53 | self.dataset_names = dataset_names 54 | self.voxel_on = cfg.MODEL.VOXEL_ON 55 | self.nocs_on = cfg.MODEL.NOCS_ON 56 | 57 | def _transform_annotations(self, dataset_dict, transforms, image_shape): 58 | annos = [ 59 | utils.transform_instance_annotations( 60 | obj, transforms, image_shape, keypoint_hflip_indices=None 61 | ) 62 | for obj in dataset_dict['annotations'] 63 | ] 64 | instances = utils.annotations_to_instances( 65 | annos, image_shape, mask_format=self.instance_mask_format 66 | ) 67 | 68 | if self.recompute_boxes: 69 | instances.gt_boxes = instances.gt_masks.get_bounding_boxes() 70 | dataset_dict["instances"] = utils.filter_empty_instances(instances) 71 | 72 | def __call__(self, dataset_dict): 73 | dataset_dict = copy.deepcopy(dataset_dict) 74 | 75 | image = utils.read_image(dataset_dict["file_name"], format=self.image_format) # H x W x C 76 | utils.check_image_size(dataset_dict, image) 77 | 78 | sem_seg_gt = None 79 | self.augmentations = T.AugmentationList(utils.build_augmentation(self.cfg, self.is_train)) 80 | aug_input = T.AugInput(image, sem_seg=sem_seg_gt) 81 | transforms = self.augmentations(aug_input) 82 | image, sem_seg_gt = aug_input.image, aug_input.sem_seg 83 | image_shape = image.shape[:2] # h, w 84 | 85 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # C x H x W 86 | 87 | if not self.is_train: 88 | pass 89 | #dataset_dict.pop("annotations", None) 90 | #return dataset_dict 91 | 92 | nocs_map = self.get_nocs(dataset_dict["nocs_map"]) 93 | depth_map = self.load_hdf5(dataset_dict["depth_map"]) 94 | 95 | dataset_dict["depth_map"] = depth_map 96 | dataset_dict["nocs_map"] = nocs_map 97 | 98 | for anno in dataset_dict['annotations']: 99 | voxel = get_voxel(anno["voxel"], anno["scale"]) 100 | nocs_obj = crop_segmask(nocs_map, anno['bbox'], anno['segmentation']) 101 | depth_obj = self.crop_depth(depth_map, anno['bbox'], anno['segmentation']) 102 | 103 | anno["voxel"] = voxel 104 | anno["nocs"] = nocs_obj 105 | anno["depth"] = depth_obj 106 | 107 | if "annotations" in dataset_dict: 108 | self._transform_annotations(dataset_dict, transforms, image_shape) 109 | 110 | if self.voxel_on: 111 | count = 0 112 | for anno in dataset_dict['annotations']: 113 | if count == 0: 114 | gt_voxels = torch.unsqueeze(anno['voxel'], 0) 115 | else: 116 | gt_voxel = torch.unsqueeze(anno['voxel'], 0) 117 | gt_voxels = torch.cat((gt_voxels, gt_voxel), 0) 118 | count += 1 119 | 120 | dataset_dict['instances'].set('gt_voxels', gt_voxels) 121 | 122 | if self.nocs_on: 123 | max_height, max_width = self.get_max_dims(dataset_dict['annotations']) 124 | count = 0 125 | for anno in dataset_dict['annotations']: 126 | width = anno['nocs'].shape[1] 127 | height = anno['nocs'].shape[0] 128 | p2d = (0, 0, 0, max_width - width, 0, max_height - height) # pad image to right 129 | if count == 0: 130 | gt_nocs = torch.unsqueeze(anno['nocs'], 0) # 1 x H x W x 3 131 | gt_nocs = F.pad(gt_nocs, p2d, "constant", 300) # 300 not a pixel value # 1 x maxH x maxW x 3 132 | else: 133 | gt_noc = torch.unsqueeze(anno['nocs'], 0) 134 | gt_noc = F.pad(gt_noc, p2d, "constant", 300) # 300 not a pixel value 135 | gt_nocs = torch.cat((gt_nocs, gt_noc), 0) 136 | count += 1 137 | 138 | dataset_dict['instances'].set('gt_nocs', gt_nocs) 139 | 140 | return dataset_dict 141 | 142 | 143 | @staticmethod 144 | def get_max_dims(dset): 145 | ''' 146 | padding image crops 147 | ''' 148 | 149 | max_height = 0 150 | max_width = 0 151 | 152 | for anno in dset: 153 | height, width = anno['nocs'].shape[0], anno['nocs'].shape[1] 154 | 155 | if height >= max_height: 156 | max_height = height 157 | 158 | if width >= max_width: 159 | max_width = width 160 | 161 | return max_height, max_width 162 | 163 | @staticmethod 164 | def get_nocs(nocs_path): 165 | 166 | nocs = cv2.imread(nocs_path, -1) #BGRA 167 | nocs = nocs[:,:,:3] 168 | nocs = np.array(nocs[:, :, ::-1], dtype=np.float32) # RGB 169 | 170 | return nocs 171 | 172 | @staticmethod 173 | def crop_depth(depth_img, bbox, segmap): 174 | 175 | abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32) 176 | 177 | gm = GenericMask(segmap, 240, 320) 178 | bin_mask = gm.polygons_to_mask(segmap) 179 | binary_mask = bin_mask[:, :] 180 | crop_im = np.multiply(depth_img, binary_mask) 181 | cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2])]) 182 | 183 | return torch.from_numpy(cropped_im).to(torch.float32) 184 | 185 | @staticmethod 186 | def load_hdf5(path): 187 | with h5py.File(path, 'r') as data: 188 | for key in data.keys(): 189 | if key == 'depth': 190 | depth = np.array(data[key]) 191 | 192 | return depth 193 | 194 | 195 | class VoxMapper: 196 | ''' 197 | Dataset mapper class to handle MOTFront data with a Detectron2 network training pipeline with Voxel head 198 | ''' 199 | 200 | def __init__( 201 | self, 202 | cfg, 203 | use_instance_mask: bool = False, 204 | instance_mask_format: str = "polygon", 205 | recompute_boxes: bool = False, 206 | is_train=True, 207 | dataset_names=None, 208 | ): 209 | if recompute_boxes: 210 | assert use_instance_mask, "recompute_boxes requires instance masks" 211 | # fmt: off 212 | self.is_train = is_train 213 | self.augmentations = None # list with augmentations NOT IMPLEMENTED YET 214 | self.cfg = cfg 215 | self.image_format = cfg.INPUT.FORMAT 216 | self.use_instance_mask = use_instance_mask 217 | self.instance_mask_format = instance_mask_format 218 | self.recompute_boxes = recompute_boxes 219 | # fmt: on 220 | logger = logging.getLogger(__name__) 221 | mode = "training" if is_train else "inference" 222 | logger.info(f"[DatasetMapper] Augmentations used in {mode}: {self.augmentations}") 223 | 224 | self.dataset_names = dataset_names 225 | self.voxel_on = cfg.MODEL.VOXEL_ON 226 | self.nocs_on = cfg.MODEL.NOCS_ON 227 | 228 | 229 | def _transform_annotations(self, dataset_dict, transforms, image_shape): 230 | 231 | 232 | annos = [ 233 | utils.transform_instance_annotations( 234 | obj, transforms, image_shape, keypoint_hflip_indices=None 235 | ) 236 | for obj in dataset_dict['annotations'] 237 | ] 238 | instances = utils.annotations_to_instances( 239 | annos, image_shape, mask_format=self.instance_mask_format 240 | ) 241 | 242 | if self.recompute_boxes: 243 | instances.gt_boxes = instances.gt_masks.get_bounding_boxes() 244 | dataset_dict["instances"] = utils.filter_empty_instances(instances) 245 | 246 | def __call__(self, dataset_dict): 247 | dataset_dict = copy.deepcopy(dataset_dict) 248 | 249 | image = utils.read_image(dataset_dict["file_name"], format=self.image_format) # H x W x C 250 | utils.check_image_size(dataset_dict, image) 251 | 252 | sem_seg_gt = None 253 | self.augmentations = T.AugmentationList(utils.build_augmentation(self.cfg, self.is_train)) 254 | aug_input = T.AugInput(image, sem_seg=sem_seg_gt) 255 | transforms = self.augmentations(aug_input) 256 | image, sem_seg_gt = aug_input.image, aug_input.sem_seg 257 | 258 | image_shape = image.shape[:2] # h, w 259 | 260 | dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # C x H x W 261 | 262 | if not self.is_train: 263 | pass 264 | #dataset_dict.pop("annotations", None) 265 | #return dataset_dict 266 | 267 | for anno in dataset_dict['annotations']: 268 | voxel = anno["voxel"] 269 | anno["voxel"] = voxel 270 | 271 | 272 | if "annotations" in dataset_dict: 273 | self._transform_annotations(dataset_dict, transforms, image_shape) 274 | 275 | if self.voxel_on: 276 | count = 0 277 | for anno in dataset_dict['annotations']: 278 | if count == 0: 279 | gt_voxels = torch.unsqueeze(anno['voxel'], 0) 280 | else: 281 | gt_voxel = torch.unsqueeze(anno['voxel'], 0) 282 | gt_voxels = torch.cat((gt_voxels, gt_voxel), 0) 283 | count += 1 284 | 285 | dataset_dict['instances'].set('gt_voxels', gt_voxels) 286 | 287 | return dataset_dict 288 | 289 | 290 | @staticmethod 291 | def get_max_dims(dset): 292 | ''' 293 | padding image crops 294 | ''' 295 | 296 | max_height = 0 297 | max_width = 0 298 | 299 | for anno in dset: 300 | height, width = anno['nocs'].shape[0], anno['nocs'].shape[1] 301 | 302 | if height >= max_height: 303 | max_height = height 304 | 305 | if width >= max_width: 306 | max_width = width 307 | 308 | return max_height, max_width -------------------------------------------------------------------------------- /Detection/evaluator/_mask.pyx: -------------------------------------------------------------------------------- 1 | import sys 2 | PYTHON_VERSION = sys.version_info[0] 3 | 4 | # import both Python-level and C-level symbols of Numpy 5 | # the API uses Numpy to interface C and Python 6 | import numpy as np 7 | cimport numpy as np 8 | from libc.stdlib cimport malloc, free 9 | 10 | # intialized Numpy. must do. 11 | np.import_array() 12 | 13 | # import numpy C function 14 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 15 | cdef extern from "numpy/arrayobject.h": 16 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 17 | 18 | # Declare the prototype of the C functions in MaskApi.h 19 | cdef extern from "maskApi.h": 20 | ctypedef unsigned int uint 21 | ctypedef unsigned long siz 22 | ctypedef unsigned char byte 23 | ctypedef double* BB 24 | ctypedef struct RLE: 25 | siz h, 26 | siz w, 27 | siz m, 28 | uint* cnts, 29 | void rlesInit( RLE **R, siz n ) 30 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 31 | void rleDecode( const RLE *R, byte *mask, siz n ) 32 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 33 | void rleArea( const RLE *R, siz n, uint *a ) 34 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 35 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 36 | void rleToBbox( const RLE *R, BB bb, siz n ) 37 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 38 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 39 | char* rleToString( const RLE *R ) 40 | void rleFrString( RLE *R, char *s, siz h, siz w ) 41 | 42 | # python class to wrap RLE array in C 43 | # the class handles the memory allocation and deallocation 44 | cdef class RLEs: 45 | cdef RLE *_R 46 | cdef siz _n 47 | 48 | def __cinit__(self, siz n =0): 49 | rlesInit(&self._R, n) 50 | self._n = n 51 | 52 | # free the RLE array here 53 | def __dealloc__(self): 54 | if self._R is not NULL: 55 | for i in range(self._n): 56 | free(self._R[i].cnts) 57 | free(self._R) 58 | def __getattr__(self, key): 59 | if key == 'n': 60 | return self._n 61 | raise AttributeError(key) 62 | 63 | # python class to wrap Mask array in C 64 | # the class handles the memory allocation and deallocation 65 | cdef class Masks: 66 | cdef byte *_mask 67 | cdef siz _h 68 | cdef siz _w 69 | cdef siz _n 70 | 71 | def __cinit__(self, h, w, n): 72 | self._mask = malloc(h*w*n* sizeof(byte)) 73 | self._h = h 74 | self._w = w 75 | self._n = n 76 | # def __dealloc__(self): 77 | # the memory management of _mask has been passed to np.ndarray 78 | # it doesn't need to be freed here 79 | 80 | # called when passing into np.array() and return an np.ndarray in column-major order 81 | def __array__(self): 82 | cdef np.npy_intp shape[1] 83 | shape[0] = self._h*self._w*self._n 84 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 85 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 86 | # The _mask allocated by Masks is now handled by ndarray 87 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 88 | return ndarray 89 | 90 | # internal conversion from Python RLEs object to compressed RLE format 91 | def _toString(RLEs Rs): 92 | cdef siz n = Rs.n 93 | cdef bytes py_string 94 | cdef char* c_string 95 | objs = [] 96 | for i in range(n): 97 | c_string = rleToString( &Rs._R[i] ) 98 | py_string = c_string 99 | objs.append({ 100 | 'size': [Rs._R[i].h, Rs._R[i].w], 101 | 'counts': py_string 102 | }) 103 | free(c_string) 104 | return objs 105 | 106 | # internal conversion from compressed RLE format to Python RLEs object 107 | def _frString(rleObjs): 108 | cdef siz n = len(rleObjs) 109 | Rs = RLEs(n) 110 | cdef bytes py_string 111 | cdef char* c_string 112 | for i, obj in enumerate(rleObjs): 113 | if PYTHON_VERSION == 2: 114 | py_string = str(obj['counts']).encode('utf8') 115 | elif PYTHON_VERSION == 3: 116 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 117 | else: 118 | raise Exception('Python version must be 2 or 3') 119 | c_string = py_string 120 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 121 | return Rs 122 | 123 | # encode mask to RLEs objects 124 | # list of RLE string can be generated by RLEs member function 125 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 126 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 127 | cdef RLEs Rs = RLEs(n) 128 | rleEncode(Rs._R,mask.data,h,w,n) 129 | objs = _toString(Rs) 130 | return objs 131 | 132 | # decode mask from compressed list of RLE string or RLEs object 133 | def decode(rleObjs): 134 | cdef RLEs Rs = _frString(rleObjs) 135 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 136 | masks = Masks(h, w, n) 137 | rleDecode(Rs._R, masks._mask, n); 138 | return np.array(masks) 139 | 140 | def merge(rleObjs, intersect=0): 141 | cdef RLEs Rs = _frString(rleObjs) 142 | cdef RLEs R = RLEs(1) 143 | rleMerge(Rs._R, R._R, Rs._n, intersect) 144 | obj = _toString(R)[0] 145 | return obj 146 | 147 | def area(rleObjs): 148 | cdef RLEs Rs = _frString(rleObjs) 149 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 150 | rleArea(Rs._R, Rs._n, _a) 151 | cdef np.npy_intp shape[1] 152 | shape[0] = Rs._n 153 | a = np.array((Rs._n, ), dtype=np.uint8) 154 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 155 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 156 | return a 157 | 158 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 159 | def iou( dt, gt, pyiscrowd ): 160 | def _preproc(objs): 161 | if len(objs) == 0: 162 | return objs 163 | if type(objs) == np.ndarray: 164 | if len(objs.shape) == 1: 165 | objs = objs.reshape((objs[0], 1)) 166 | # check if it's Nx4 bbox 167 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 168 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 169 | objs = objs.astype(np.double) 170 | elif type(objs) == list: 171 | # check if list is in box format and convert it to np.ndarray 172 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 173 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 174 | if isbox: 175 | objs = np.array(objs, dtype=np.double) 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((1,objs.shape[0])) 178 | elif isrle: 179 | objs = _frString(objs) 180 | else: 181 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 182 | else: 183 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 184 | return objs 185 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 186 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 187 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 188 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 189 | def _len(obj): 190 | cdef siz N = 0 191 | if type(obj) == RLEs: 192 | N = obj.n 193 | elif len(obj)==0: 194 | pass 195 | elif type(obj) == np.ndarray: 196 | N = obj.shape[0] 197 | return N 198 | # convert iscrowd to numpy array 199 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 200 | # simple type checking 201 | cdef siz m, n 202 | dt = _preproc(dt) 203 | gt = _preproc(gt) 204 | m = _len(dt) 205 | n = _len(gt) 206 | if m == 0 or n == 0: 207 | return [] 208 | if not type(dt) == type(gt): 209 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 210 | 211 | # define local variables 212 | cdef double* _iou = 0 213 | cdef np.npy_intp shape[1] 214 | # check type and assign iou function 215 | if type(dt) == RLEs: 216 | _iouFun = _rleIou 217 | elif type(dt) == np.ndarray: 218 | _iouFun = _bbIou 219 | else: 220 | raise Exception('input data type not allowed.') 221 | _iou = malloc(m*n* sizeof(double)) 222 | iou = np.zeros((m*n, ), dtype=np.double) 223 | shape[0] = m*n 224 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 225 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 226 | _iouFun(dt, gt, iscrowd, m, n, iou) 227 | return iou.reshape((m,n), order='F') 228 | 229 | def toBbox( rleObjs ): 230 | cdef RLEs Rs = _frString(rleObjs) 231 | cdef siz n = Rs.n 232 | cdef BB _bb = malloc(4*n* sizeof(double)) 233 | rleToBbox( Rs._R, _bb, n ) 234 | cdef np.npy_intp shape[1] 235 | shape[0] = 4*n 236 | bb = np.array((1,4*n), dtype=np.double) 237 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 238 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 239 | return bb 240 | 241 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 242 | cdef siz n = bb.shape[0] 243 | Rs = RLEs(n) 244 | rleFrBbox( Rs._R, bb.data, h, w, n ) 245 | objs = _toString(Rs) 246 | return objs 247 | 248 | def frPoly( poly, siz h, siz w ): 249 | cdef np.ndarray[np.double_t, ndim=1] np_poly 250 | n = len(poly) 251 | Rs = RLEs(n) 252 | for i, p in enumerate(poly): 253 | np_poly = np.array(p, dtype=np.double, order='F') 254 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 255 | objs = _toString(Rs) 256 | return objs 257 | 258 | def frUncompressedRLE(ucRles, siz h, siz w): 259 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 260 | cdef RLE R 261 | cdef uint *data 262 | n = len(ucRles) 263 | objs = [] 264 | for i in range(n): 265 | Rs = RLEs(1) 266 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 267 | # time for malloc can be saved here but it's fine 268 | data = malloc(len(cnts)* sizeof(uint)) 269 | for j in range(len(cnts)): 270 | data[j] = cnts[j] 271 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 272 | Rs._R[0] = R 273 | objs.append(_toString(Rs)[0]) 274 | return objs 275 | 276 | def frPyObjects(pyobj, h, w): 277 | # encode rle from a list of python objects 278 | if type(pyobj) == np.ndarray: 279 | objs = frBbox(pyobj, h, w) 280 | elif type(pyobj) == list and len(pyobj[0]) == 4: 281 | objs = frBbox(pyobj, h, w) 282 | elif type(pyobj) == list and len(pyobj[0]) > 4: 283 | objs = frPoly(pyobj, h, w) 284 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 285 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 286 | objs = frUncompressedRLE(pyobj, h, w) 287 | # encode rle from single python object 288 | elif type(pyobj) == list and len(pyobj) == 4: 289 | objs = frBbox([pyobj], h, w)[0] 290 | elif type(pyobj) == list and len(pyobj) > 4: 291 | objs = frPoly([pyobj], h, w)[0] 292 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 293 | objs = frUncompressedRLE([pyobj], h, w)[0] 294 | else: 295 | raise Exception('input type is not supported.') 296 | return objs -------------------------------------------------------------------------------- /Tracking/utils/vis_utils.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import torch 5 | import mathutils 6 | 7 | from Tracking.utils.train_utils import convert_voxel_to_pc 8 | import open3d as o3d 9 | from scipy.spatial.transform import Rotation as R 10 | from scipy.spatial.transform import Slerp 11 | from scipy.ndimage import gaussian_filter1d 12 | from scipy import interpolate 13 | 14 | 15 | def box2minmax(corner_pt_box): 16 | ''' 17 | Box from 8x3 to minmax format 18 | Only works properly for axis aligned boxes 19 | ''' 20 | xyz_min = torch.min(corner_pt_box, dim=0).values 21 | xyz_max = torch.max(corner_pt_box, dim=0).values 22 | box = np.concatenate((xyz_min.numpy(), xyz_max.numpy())) 23 | return box 24 | 25 | def box2minmax_axaligned(corner_pt_box): 26 | ''' 27 | Box from 8x3 to minmax format 28 | For non-axis aligned boxes, first enclose with axis-aligned box, then calc minmax 29 | ''' 30 | 31 | bbox3d_obj = o3d.geometry.AxisAlignedBoundingBox() 32 | bbox_3d = bbox3d_obj.create_from_points(o3d.utility.Vector3dVector(corner_pt_box)) 33 | corner_pt_box = torch.from_numpy(np.array(bbox_3d.get_box_points())) 34 | xyz_min = torch.min(corner_pt_box, dim=0).values 35 | xyz_max = torch.max(corner_pt_box, dim=0).values 36 | box = np.concatenate((xyz_min.numpy(), xyz_max.numpy())) 37 | return box 38 | 39 | def cad2world_mat(rot, loc, scale, with_scale=True): 40 | ''' 41 | Return cad2world matrix from annotations 42 | ''' 43 | cad2world = torch.eye(4) 44 | scale_mat = torch.diag(torch.tensor([scale, scale, scale])) 45 | if with_scale: 46 | cad2world[:3, :3] = scale_mat @ euler_to_rot(rot, fmt='torch') 47 | else: 48 | cad2world[:3, :3] = euler_to_rot(rot, fmt='torch') 49 | 50 | cad2world[:3, 3] = loc 51 | return cad2world 52 | 53 | def euler_to_rot(euler_rot, fmt='torch', constraint=False): 54 | ''' 55 | Euler to 3x3 Rotation Matrix transform 56 | ''' 57 | 58 | if constraint: 59 | euler_rot = torch.tensor([0, 0, euler_rot[2]]) 60 | euler = mathutils.Euler(euler_rot) 61 | rot = np.array(euler.to_matrix()) 62 | 63 | if fmt == 'torch': 64 | return torch.from_numpy(rot) 65 | else: 66 | return rot 67 | 68 | 69 | def visualize_graph(G, color): 70 | ''' 71 | Visualise Graph data connectivity 72 | ''' 73 | plt.figure(figsize=(7,7)) 74 | plt.xticks([]) 75 | plt.yticks([]) 76 | nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False, 77 | node_color=color, cmap="Set2") 78 | plt.show() 79 | 80 | 81 | 82 | def fuse_pose(trajectories, seq_len=None): 83 | ''' 84 | Pose fusion via slurp and spline interpolation 85 | ''' 86 | 87 | def get_scale(m): 88 | if type(m) == torch.Tensor: 89 | return m.norm(dim=0) 90 | return np.linalg.norm(m, axis=0) 91 | 92 | def fill_last(fill_list, exp_dim=False): 93 | # fill with last value 94 | for t_idx, tt in enumerate(fill_list): 95 | if tt is None: 96 | for i in range(t_idx - 1, -1, -1): 97 | if fill_list[i] is not None: 98 | if exp_dim: 99 | fill_list[t_idx] = np.expand_dims(fill_list[i], axis=0) 100 | else: 101 | fill_list[t_idx] = fill_list[i] 102 | break 103 | return fill_list 104 | 105 | def fill_last_t(fill_list, exp_dim=True): 106 | # fill with last value 107 | for t_idx, tt in enumerate(fill_list): 108 | if tt.sum() == 0: 109 | for i in range(t_idx - 1, -1, -1): 110 | if fill_list[i].sum() != 0: 111 | if exp_dim: 112 | fill_list[t_idx] = np.expand_dims(np.squeeze(fill_list[i]), axis=0) 113 | else: 114 | fill_list[t_idx] = fill_list[i] 115 | break 116 | else: 117 | fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0) 118 | 119 | for t_idx, tt in enumerate(fill_list): 120 | if len(tt.shape) == 1: 121 | fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0) 122 | 123 | return fill_list 124 | 125 | def unscale_mat(cad2world): 126 | 127 | c2w_cpy = torch.clone(cad2world) 128 | rot = cad2world[:3, :3] 129 | scale = get_scale(rot) 130 | unscaled_rot = rot / scale 131 | c2w_cpy[:3, :3] = unscaled_rot 132 | return c2w_cpy 133 | 134 | new_trajectories = [] 135 | 136 | times = np.arange(seq_len) 137 | for traj in trajectories: 138 | key_times = [] 139 | key_trans = [] 140 | key_rots = [] 141 | t_trans = [np.zeros(3) for i in range(seq_len)] 142 | t_vox = [None for i in range(seq_len)] 143 | t_box = [None for i in range(seq_len)] 144 | t_id = [None for i in range(seq_len)] 145 | t_scale = [None for i in range(seq_len)] 146 | for pred in traj: 147 | key_rots.append(torch.unsqueeze(unscale_mat(pred['obj']['cad2world'][:3, :3]), dim=0)) 148 | key_trans.append(torch.unsqueeze(pred['obj']['cad2world'][:3, 3], dim=0)) 149 | key_times.append(pred['scan_idx']) 150 | t_trans[pred['scan_idx']] = pred['obj']['cad2world'][:3, 3].numpy() 151 | t_vox[pred['scan_idx']] = pred['obj']['voxel'] 152 | t_id[pred['scan_idx']] = pred['obj']['obj_idx'] 153 | t_box[pred['scan_idx']] = pred['obj']['compl_box'] 154 | t_scale[pred['scan_idx']] = get_scale(pred['obj']['cad2world'][:3, :3]) 155 | 156 | times = np.linspace(key_times[0], key_times[-1], num=key_times[-1]-key_times[0]+1).astype(np.int) 157 | traj_rots = torch.cat(key_rots, dim=0).numpy() 158 | key_trans = torch.cat(key_trans, dim=0).numpy() 159 | 160 | t_trans = np.concatenate(fill_last_t(t_trans, exp_dim=True), axis=0) 161 | t_trans[:, 0] = gaussian_filter1d(t_trans[:, 0], 3) 162 | t_trans[:, 1] = gaussian_filter1d(t_trans[:, 1], 3) 163 | t_trans[:, 2] = gaussian_filter1d(t_trans[:, 2], 3) 164 | t_vox = fill_last(t_vox) 165 | t_id = fill_last(t_id) 166 | t_box = fill_last(t_box) 167 | t_scale = fill_last(t_scale) 168 | 169 | r = R.from_matrix(traj_rots) 170 | slerp = Slerp(key_times, r) 171 | interp_rots = slerp(times) 172 | interp_rotmat = interp_rots.as_matrix() 173 | euler_rots = interp_rots.as_euler('xyz') 174 | euler_rots[:,-1] = gaussian_filter1d(euler_rots[:,-1], 3) #3 = sigma = standard deviation 175 | euler_rots[:, -1] = np.clip(euler_rots[:,-1], euler_rots[0,-1] - (0.2 * euler_rots[0,-1]), euler_rots[0,-1] + (0.2 * euler_rots[0,-1])) 176 | r_e = R.from_euler('xyz', euler_rots, degrees=False) 177 | interp_rotmat = r_e.as_matrix() 178 | 179 | 180 | 181 | #test = np.diag(t_scale[0]) @ interp_rotmat[0,:,:] 182 | constraint_flip = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) 183 | new_traj = [] 184 | for t in times: 185 | t_dict = dict() 186 | t_dict['scan_idx'] = t 187 | t_dict['obj'] = dict() 188 | t_dict['obj']['cad2world'] = np.identity(4) 189 | t_dict['obj']['cad2world'][:3, :3] = (np.diag(t_scale[t]) @ interp_rotmat[t-key_times[0],:,:]) 190 | t_dict['obj']['cad2world'][:3, 3] = t_trans[t] 191 | t_dict['obj']['voxel'] = t_vox[t] 192 | t_dict['obj']['obj_idx'] = t_id[t] 193 | t_dict['obj']['compl_box'] = t_box[t] 194 | new_traj.append(t_dict) 195 | 196 | new_trajectories.append(new_traj) 197 | 198 | return new_trajectories 199 | 200 | 201 | def fuse_pose_F2F(trajectories, seq_len=125, constraint=True): 202 | ''' 203 | Pose fusion via slurp and spline interpolation 204 | ''' 205 | 206 | def get_scale(m): 207 | if type(m) == torch.Tensor: 208 | return m.norm(dim=0) 209 | return np.linalg.norm(m, axis=0) 210 | 211 | def fill_last(fill_list, exp_dim=False): 212 | # fill with last value 213 | for t_idx, tt in enumerate(fill_list): 214 | if tt is None: 215 | for i in range(t_idx - 1, -1, -1): 216 | if fill_list[i] is not None: 217 | if exp_dim: 218 | fill_list[t_idx] = np.expand_dims(fill_list[i], axis=0) 219 | else: 220 | fill_list[t_idx] = fill_list[i] 221 | break 222 | return fill_list 223 | 224 | def fill_last_t(fill_list, exp_dim=True): 225 | # fill with last value 226 | for t_idx, tt in enumerate(fill_list): 227 | if tt.sum() == 0: 228 | for i in range(t_idx - 1, -1, -1): 229 | if fill_list[i].sum() != 0: 230 | if exp_dim: 231 | fill_list[t_idx] = np.expand_dims(np.squeeze(fill_list[i]), axis=0) 232 | else: 233 | fill_list[t_idx] = fill_list[i] 234 | break 235 | else: 236 | fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0) 237 | 238 | for t_idx, tt in enumerate(fill_list): 239 | if len(tt.shape) == 1: 240 | fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0) 241 | 242 | return fill_list 243 | 244 | def unscale_mat(cad2world): 245 | 246 | c2w_cpy = np.copy(cad2world) 247 | rot = cad2world[:3, :3] 248 | scale = get_scale(rot) 249 | unscaled_rot = rot / scale 250 | c2w_cpy[:3, :3] = unscaled_rot 251 | return c2w_cpy 252 | 253 | new_trajectories = [] 254 | 255 | times = np.arange(seq_len) 256 | for traj in trajectories: 257 | key_times = [] 258 | key_trans = [] 259 | key_rots = [] 260 | t_trans = [np.zeros(3) for i in range(seq_len)] 261 | t_vox = [None for i in range(seq_len)] 262 | t_box = [None for i in range(seq_len)] 263 | t_id = [None for i in range(seq_len)] 264 | t_scale = [None for i in range(seq_len)] 265 | for pred in traj: 266 | key_rots.append(np.expand_dims(unscale_mat(pred['obj']['cad2world'][:3, :3]), axis=0)) 267 | key_trans.append(np.expand_dims(pred['obj']['cad2world'][:3, 3], axis=0)) 268 | key_times.append(pred['scan_idx']) 269 | t_trans[pred['scan_idx']] = pred['obj']['cad2world'][:3, 3]#.numpy() 270 | t_vox[pred['scan_idx']] = pred['obj']['obj_pc'] 271 | t_id[pred['scan_idx']] = pred['obj']['obj_idx'] 272 | t_box[pred['scan_idx']] = pred['obj']['obj_box'] 273 | t_scale[pred['scan_idx']] = get_scale(pred['obj']['cad2world'][:3, :3]) 274 | 275 | times = np.linspace(key_times[0], key_times[-1], num=key_times[-1]-key_times[0]+1).astype(np.int) 276 | traj_rots = np.concatenate(key_rots, axis=0)#.numpy() 277 | #key_trans = torch.cat(key_trans, dim=0)#.numpy() 278 | 279 | t_trans = np.concatenate(fill_last_t(t_trans, exp_dim=True), axis=0) 280 | t_trans[:, 0] = gaussian_filter1d(t_trans[:, 0], 3) 281 | t_trans[:, 1] = gaussian_filter1d(t_trans[:, 1], 3) 282 | t_trans[:, 2] = gaussian_filter1d(t_trans[:, 2], 3) 283 | t_vox = fill_last(t_vox) 284 | t_id = fill_last(t_id) 285 | t_box = fill_last(t_box) 286 | t_scale = fill_last(t_scale) 287 | 288 | r = R.from_matrix(traj_rots) 289 | slerp = Slerp(key_times, r) 290 | interp_rots = slerp(times) 291 | interp_rotmat = interp_rots.as_matrix() 292 | euler_rots = interp_rots.as_euler('xyz') 293 | euler_rots[:,-1] = gaussian_filter1d(euler_rots[:,-1], 3) #3 = sigma = standard deviation 294 | euler_rots[:, -1] = np.clip(euler_rots[:,-1], euler_rots[0,-1] - (0.2 * euler_rots[0,-1]), euler_rots[0,-1] + (0.2 * euler_rots[0,-1])) 295 | if constraint: 296 | euler_rots[:,0] = 0 297 | euler_rots[:,1] = 0 298 | r_e = R.from_euler('xyz', euler_rots, degrees=False) 299 | interp_rotmat = r_e.as_matrix() 300 | 301 | 302 | #test = np.diag(t_scale[0]) @ interp_rotmat[0,:,:] 303 | constraint_flip = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) 304 | new_traj = [] 305 | for t in times: 306 | t_dict = dict() 307 | t_dict['scan_idx'] = t 308 | t_dict['obj'] = dict() 309 | t_dict['obj']['cad2world'] = np.identity(4) 310 | t_dict['obj']['cad2world'][:3, :3] = (np.diag(t_scale[t]) @ interp_rotmat[t-key_times[0],:,:]) 311 | t_dict['obj']['cad2world'][:3, 3] = t_trans[t] 312 | t_dict['obj']['obj_pc'] = t_vox[t] 313 | t_dict['obj']['obj_idx'] = t_id[t] 314 | t_dict['obj']['obj_box'] = t_box[t] 315 | new_traj.append(t_dict) 316 | 317 | new_trajectories.append(new_traj) 318 | 319 | return new_trajectories -------------------------------------------------------------------------------- /Detection/roi_heads/nocs_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import fvcore.nn.weight_init as weight_init 3 | import torch 4 | import numpy as np 5 | from detectron2.layers import ShapeSpec, cat, roi_align 6 | from detectron2.utils.events import get_event_storage 7 | from detectron2.utils.registry import Registry 8 | from detectron2.structures import Boxes, BoxMode, pairwise_iou 9 | from torch import nn 10 | from typing import Dict 11 | import sys 12 | sys.path.append('..') #Hack add ROOT DIR 13 | from Detection.utils.train_utils import init_weights, symmetry_smooth_l1_loss, symmetry_bin_loss, crop_nocs, nocs_prob_to_value 14 | 15 | import matplotlib.pyplot as plt 16 | 17 | ROI_NOCS_HEAD_REGISTRY = Registry("ROI_NOCS_HEAD") 18 | 19 | 20 | def nocs_loss(pred_nocsmap, instances, pred_boxes, 21 | loss_weight=3, iou_thres=0.5, cls_mapping=None, use_bin_loss=False, num_bins=32): 22 | ''' 23 | Calculate loss between predicted and gt nocs map if same category id and max IoU box > threshold 24 | per batch 25 | iou_thres: IoU threshold used for positive samples for loss calculation 26 | cls_mapping: class id to name mapping used for symmetry in loss 27 | use_bin_loss: if true use classification loss else use smooth l1 loss 28 | ''' 29 | 30 | l1_loss = 0 31 | device = torch.device("cuda") 32 | #batch_size = len(instances) 33 | start_instance = 0 34 | num_instances_overlap = 0 35 | 36 | 37 | for instances_per_image in instances: 38 | if len(instances_per_image) == 0: 39 | continue 40 | 41 | end_instance = start_instance + len(instances_per_image) 42 | 43 | gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) 44 | gt_boxes_per_image = instances_per_image.gt_boxes 45 | gt_nocs_per_image = instances_per_image.gt_nocs 46 | 47 | for i in range(start_instance, end_instance): 48 | 49 | abs_pred_box = pred_boxes[i,:].to(dtype=torch.int64) 50 | pred_box = Boxes(torch.unsqueeze(abs_pred_box, dim=0)) # XYXY 51 | patch_heigth = int(abs_pred_box[3] - abs_pred_box[1]) # Y 52 | patch_width = int(abs_pred_box[2] - abs_pred_box[0]) # X 53 | 54 | pred_nocs = pred_nocsmap[i] # (32) x C x 28 x 28 (bin) 55 | 56 | ious = pairwise_iou(gt_boxes_per_image, pred_box) 57 | idx_max_iou = int(torch.argmax(ious)) 58 | max_iou = ious[idx_max_iou] 59 | 60 | if max_iou >= iou_thres: 61 | 62 | num_instances_overlap += 1 63 | 64 | gt_box = gt_boxes_per_image.tensor[idx_max_iou,:].to(dtype=torch.int64) 65 | 66 | gt_nocs = gt_nocs_per_image[idx_max_iou, :, :, :] # H x W x C 67 | gt_nocs = torch.squeeze(crop_nocs(gt_nocs), dim=0).to(device=device) # C x H x W 68 | 69 | gt_cls = cls_mapping[gt_classes_per_image[idx_max_iou]] 70 | 71 | # Get overlapping pixels for loss computation -> Positive ROIs 72 | x_min = int(torch.max(torch.tensor([gt_box[0], abs_pred_box[0]]))) 73 | x_max = int(torch.min(torch.tensor([gt_box[2], abs_pred_box[2]]))) 74 | y_min = int(torch.max(torch.tensor([gt_box[1], abs_pred_box[1]]))) 75 | y_max = int(torch.min(torch.tensor([gt_box[3], abs_pred_box[3]]))) 76 | 77 | # Symmetry Loss: Rotate gt_overlap 90,180,270 degree around y_axis and take min 78 | if use_bin_loss: 79 | # Roi Align pred nocs to pred box shape 80 | tmp_box = [torch.unsqueeze( 81 | torch.tensor([0, 0, pred_nocs.shape[3], pred_nocs.shape[2]], dtype=torch.float32, 82 | device=device), dim=0)] * num_bins 83 | pred_patch = roi_align(pred_nocs.to(device=device), tmp_box, 84 | output_size=(patch_heigth, patch_width), aligned=True) # num_bins x 3 x H x W 85 | 86 | # Full image patches 87 | full_patch = torch.zeros(num_bins, 3, 240, 320) 88 | full_patch[:, :, abs_pred_box[1]:abs_pred_box[3], abs_pred_box[0]:abs_pred_box[2]] = pred_patch 89 | 90 | gt_patch = torch.zeros(3, 240, 320) 91 | gt_patch[:, gt_box[1]:gt_box[3], gt_box[0]:gt_box[2]] = gt_nocs 92 | 93 | # Loss only on overlap ROI with GT 94 | pred_overlap = full_patch[:, :, y_min:y_max, x_min:x_max] # binsxCxHxW 95 | gt_overlap = gt_patch[:, y_min:y_max, x_min:x_max] # CxHxW 96 | # print(pred_overlap.shape, max_iou, pred_patch.shape) 97 | 98 | obj_loss = symmetry_bin_loss(gt_overlap, pred_overlap, gt_cls=gt_cls, num_bins=num_bins) 99 | 100 | else: 101 | # Roi Align pred nocs to pred box shape 102 | tmp_box = [torch.unsqueeze( 103 | torch.tensor([0, 0, pred_nocs.shape[2], pred_nocs.shape[1]], dtype=torch.float32, 104 | device=device), dim=0)] 105 | pred_patch = roi_align(torch.unsqueeze(pred_nocs.to(device=device), dim=0), tmp_box, 106 | output_size=(patch_heigth, patch_width), aligned=True) 107 | pred_patch = torch.squeeze(pred_patch, dim=0) # C x H x W of predicted box 108 | 109 | # Full image patches 110 | full_patch = torch.zeros(3, 240, 320) 111 | full_patch[:, abs_pred_box[1]:abs_pred_box[3], abs_pred_box[0]:abs_pred_box[2]] = pred_patch 112 | 113 | gt_patch = torch.zeros(3, 240, 320) 114 | gt_patch[:, gt_box[1]:gt_box[3], gt_box[0]:gt_box[2]] = gt_nocs 115 | 116 | # Loss only on overlap ROI with GT 117 | pred_overlap = full_patch[:, y_min:y_max, x_min:x_max] # CxHxW 118 | gt_overlap = gt_patch[:, y_min:y_max, x_min:x_max] # CxHxW 119 | # print(pred_overlap.shape, max_iou, pred_patch.shape) 120 | 121 | obj_loss = symmetry_smooth_l1_loss(gt_overlap, pred_overlap, gt_cls=gt_cls) 122 | 123 | l1_loss += obj_loss 124 | 125 | start_instance = end_instance 126 | 127 | l1_loss = l1_loss * loss_weight / num_instances_overlap 128 | 129 | return l1_loss, None 130 | 131 | def nocs_inference(pred_nocsmap, pred_instances, use_bin_loss=False, num_bins=32): # shape num obj 3x 28 x 28 (RGB), Num img x num obj ... 132 | 133 | num_boxes_per_image = [len(i) for i in pred_instances] 134 | nocs_pred = pred_nocsmap.split(num_boxes_per_image, dim=0) 135 | 136 | if np.array(num_boxes_per_image).sum() == 0: 137 | return 138 | 139 | # instances and predictions always same len just empty 140 | for prob, instances in zip(nocs_pred, pred_instances): 141 | 142 | if len(instances) == 0: 143 | print('No predicted instances found ...') 144 | continue 145 | 146 | num_pred_instances = prob.shape[0] 147 | num_dims = len(prob.shape) 148 | 149 | if use_bin_loss and num_pred_instances != 0 and num_dims == 5: 150 | 151 | x_prob = nocs_prob_to_value(prob, channel=0, num_bins=num_bins) 152 | y_prob = nocs_prob_to_value(prob, channel=1, num_bins=num_bins) 153 | z_prob = nocs_prob_to_value(prob, channel=2, num_bins=num_bins) 154 | prob = torch.cat((x_prob, y_prob, z_prob), dim=1) 155 | 156 | instances.pred_nocs = prob 157 | 158 | 159 | class NocsModel(torch.nn.Module): 160 | """ 161 | Decoder Module NOCS 162 | """ 163 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 164 | super(NocsModel, self).__init__() 165 | 166 | self.input_shape = input_shape 167 | self.use_bin_loss = cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS 168 | self.num_bins = cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS 169 | 170 | # Layer Definition 171 | if self.use_bin_loss: 172 | self.layer1_R = torch.nn.Sequential( 173 | torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1), # 14 174 | torch.nn.ReLU(), 175 | torch.nn.BatchNorm2d(128) 176 | ) 177 | self.layer1_G = torch.nn.Sequential( 178 | torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1), # 14 179 | torch.nn.ReLU(), 180 | torch.nn.BatchNorm2d(128) 181 | ) 182 | self.layer1_B = torch.nn.Sequential( 183 | torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1), # 14 184 | torch.nn.ReLU(), 185 | torch.nn.BatchNorm2d(128) 186 | ) 187 | # Layer 2 188 | self.layer2_R = torch.nn.Sequential( 189 | torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1), 190 | torch.nn.ReLU(), 191 | torch.nn.BatchNorm2d(64) 192 | ) 193 | self.layer2_G = torch.nn.Sequential( 194 | torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1), 195 | torch.nn.ReLU(), 196 | torch.nn.BatchNorm2d(64) 197 | ) 198 | self.layer2_B = torch.nn.Sequential( 199 | torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1), 200 | torch.nn.ReLU(), 201 | torch.nn.BatchNorm2d(64) 202 | ) 203 | # Layer 3 204 | self.layer3_R = torch.nn.Sequential( 205 | torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1), # 28 x num_bins R/x - head 206 | #torch.nn.LogSoftmax(dim=1) 207 | ) 208 | self.layer3_G = torch.nn.Sequential( 209 | torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1), # 28 x num_bins G/y - head 210 | #torch.nn.LogSoftmax(dim=1) 211 | ) 212 | self.layer3_B = torch.nn.Sequential( 213 | torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1), # 28 x num_bins B/z - head 214 | #torch.nn.LogSoftmax(dim=1) 215 | ) 216 | else: 217 | self.layer0 = torch.nn.Sequential( 218 | torch.nn.ConvTranspose2d(256, 256, kernel_size=3, stride=1, bias=True, padding=1), # 14 219 | torch.nn.ReLU(), 220 | torch.nn.BatchNorm2d(256) 221 | ) 222 | self.layer1 = torch.nn.Sequential( 223 | torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1), # 14 224 | torch.nn.ReLU(), 225 | torch.nn.BatchNorm2d(128) 226 | ) 227 | self.layer2 = torch.nn.Sequential( 228 | torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1),# 28 # use kernel size divisible by stride 229 | torch.nn.ReLU(), 230 | torch.nn.BatchNorm2d(64) 231 | ) 232 | self.layer3 = torch.nn.Sequential( 233 | torch.nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, bias=True, padding=1), # 28 x RGB 234 | torch.nn.Sigmoid() 235 | ) 236 | 237 | def forward(self, features): 238 | """ 239 | input features from ROI Pool, dim num instances, 256 x 14 x 14 240 | """ 241 | 242 | if self.use_bin_loss: 243 | R_features = self.layer1_R(features) 244 | R_features = self.layer2_R(R_features) 245 | R_features = torch.unsqueeze(self.layer3_R(R_features), dim=1) # num obj x 1 x num_bins x 28 x 28 246 | 247 | G_features = self.layer1_G(features) 248 | G_features = self.layer2_G(G_features) 249 | G_features = torch.unsqueeze(self.layer3_G(G_features), dim=1) # num obj x 1 x num_bins x 28 x 28 250 | 251 | B_features = self.layer1_B(features) 252 | B_features = self.layer2_B(B_features) 253 | B_features = torch.unsqueeze(self.layer3_B(B_features), dim=1) # num obj x 1 x num_bins x 28 x 28 254 | 255 | features = torch.cat((R_features, G_features, B_features), dim=1).permute(0, 2, 1, 3, 4).contiguous() # num obj x num_bins x 3 x 28 x 28 256 | 257 | else: 258 | features = self.layer0(features) 259 | features = self.layer1(features) 260 | features = self.layer2(features) 261 | features = self.layer3(features) # num obj x 3 x 28 x 28 262 | 263 | return features 264 | 265 | 266 | @ROI_NOCS_HEAD_REGISTRY.register() 267 | class NocsDecoder(nn.Module): 268 | """ 269 | A Nocs head with upsample layer (with `ConvTranspose2d`). 270 | """ 271 | 272 | def __init__(self, cfg, input_shape): 273 | super(NocsDecoder, self).__init__() 274 | 275 | ### Model 276 | self.nocs_layers = NocsModel(cfg, input_shape) 277 | init_weights(self.nocs_layers, init_type='kaiming', init_gain=0.02) 278 | 279 | 280 | def forward(self, x): 281 | 282 | x = self.nocs_layers(x) #BS x C x H x W 283 | 284 | return x 285 | 286 | 287 | def build_nocs_head(cfg, input_shape): 288 | name = cfg.MODEL.ROI_NOCS_HEAD.NAME 289 | return ROI_NOCS_HEAD_REGISTRY.get(name)(cfg, input_shape) 290 | -------------------------------------------------------------------------------- /Detection/register_dataset.py: -------------------------------------------------------------------------------- 1 | # import some common libraries 2 | import torch 3 | import numpy as np 4 | import os, json, cv2, random, csv, pickle, sys 5 | import h5py 6 | from pycocotools.coco import COCO 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | # import some common detectron2 utilities 11 | from detectron2.utils.visualizer import Visualizer 12 | from detectron2.data import MetadataCatalog, DatasetCatalog 13 | from detectron2.structures import BoxMode 14 | from detectron2.structures import polygons_to_bitmask 15 | from detectron2.utils.visualizer import GenericMask 16 | 17 | from PIL import Image 18 | 19 | sys.path.append('..') #Hack add ROOT DIR 20 | from baseconfig import CONF 21 | 22 | from BlenderProc.utils import binvox_rw 23 | from Detection.utils.train_utils import get_voxel 24 | 25 | 26 | # Define directory to images 27 | IMG_DIR = CONF.PATH.DETECTDATA 28 | 29 | # custom dataset registration 30 | class RegisterDataset: 31 | 32 | def __init__(self, mapping_list, name_list, img_dir=IMG_DIR): 33 | self.img_dir = img_dir 34 | self.mapping_list = list(mapping_list) 35 | self.name_list = list(name_list) 36 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 37 | 38 | def get_front_dicts(self, img_path): 39 | 40 | mapping_file = os.path.join(self.img_dir, "3D_front_mapping.csv") 41 | _, csv_dict = self.read_csv_mapping(mapping_file) 42 | 43 | folders = os.listdir(img_path) 44 | 45 | dataset_dicts = [] 46 | for folder in folders: 47 | 48 | json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json") 49 | 50 | with open(json_file) as f: 51 | imgs_anns = json.load(f) 52 | 53 | camposes = [] 54 | all_objs = [] 55 | for idx, v in enumerate(imgs_anns['images']): 56 | 57 | record = {} 58 | 59 | filename = os.path.join(img_path, folder, 'coco_data', v["file_name"]) 60 | depth_name = os.path.join(img_path, folder, str(idx) + '.hdf5') 61 | 62 | record["file_name"] = filename 63 | record["image_id"] = str(v['id']) + '_' + folder[:8] 64 | record["height"] = v['height'] 65 | record["width"] = v['width'] 66 | # record["nocs_map"] = self.get_nocs(v["file_name"], img_path, folder) 67 | # record["depth_map"], record['campose'] = self.load_hdf5(depth_name) 68 | record["nocs_map"] = filename.replace('rgb', 'nocs') 69 | record["depth_map"] = depth_name 70 | record["campose"] = self.load_campose(depth_name) 71 | 72 | depth = [] 73 | objs = [] 74 | voxels = [] 75 | boxes = [] 76 | segmap_store = [] 77 | category = [] 78 | object_ids = [] 79 | gt_rotations = [] 80 | gt_locations = [] 81 | gt_3dbox = [] 82 | gt_scales = [] 83 | 84 | for anno in imgs_anns['annotations']: 85 | if anno['image_id'] == v['id']: 86 | cat_id = anno['category_id'] 87 | object_id = anno['id'] 88 | jid = anno['jid'] 89 | scale = np.array(anno['3Dscale']) 90 | 91 | #voxel = os.path.join(CONF.PATH.FUTURE3D, jid, 'model.binvox') 92 | voxel = os.path.join(CONF.PATH.VOXELDATA, jid, 'model.binvox') 93 | name = csv_dict[cat_id] 94 | 95 | #nocs_obj = self.crop_segmask(record["nocs_map"], anno['bbox'], anno['segmentation']) 96 | #depth_obj = self.crop_depth(record["depth_map"], anno['bbox'], anno['segmentation']) 97 | 98 | if not name in self.name_list: 99 | self.name_list.append(name) 100 | 101 | if cat_id in self.mapping_list: 102 | id = self.mapping_list.index(cat_id) 103 | else: 104 | self.mapping_list.append(cat_id) 105 | id = self.mapping_list.index(cat_id) 106 | 107 | obj = { 108 | "bbox": anno['bbox'], 109 | "bbox_mode": BoxMode.XYWH_ABS, 110 | "segmentation": anno['segmentation'], 111 | "category_id": id, 112 | "voxel": voxel, 113 | "scale": scale, 114 | "jid": jid, 115 | "id": object_id, 116 | } 117 | objs.append(obj) 118 | segmap_store.append(anno['segmentation']) 119 | voxels.append(voxel) 120 | category.append(id) 121 | boxes.append(anno['bbox']) 122 | #depth.append(depth_obj) 123 | object_ids.append(object_id) 124 | gt_rotations.append(anno['3Drot']) 125 | anno_3dloc = self.add_halfheight(anno['3Dloc'].copy(), anno['3Dbbox']) 126 | gt_locations.append(anno_3dloc) 127 | gt_3dbox.append(np.array(anno['3Dbbox'])) 128 | gt_scales.append(scale) 129 | 130 | record['cat_id'] = category # starts at 0 131 | record['vox'] = voxels 132 | record['segmap'] = segmap_store 133 | record['boxes'] = boxes 134 | record["annotations"] = objs 135 | record['object_id'] = object_ids 136 | record['rotations'] = gt_rotations 137 | record['locations'] = gt_locations 138 | record['3dboxes'] = gt_3dbox 139 | record['3dscales'] = gt_scales 140 | #all_objs.append(objs) 141 | #camposes.append(record['campose']) 142 | dataset_dicts.append(record) 143 | 144 | ''' 145 | with open('optimization.pickle', 'wb') as handle: 146 | all_objs.append(camposes) 147 | pickle.dump(all_objs, handle, protocol=pickle.HIGHEST_PROTOCOL) 148 | sys.exit() 149 | ''' 150 | 151 | return dataset_dicts 152 | 153 | def get_eval_dicts(self, img_path): 154 | 155 | mapping_file = os.path.join(self.img_dir, "3D_front_mapping.csv") 156 | _, csv_dict = self.read_csv_mapping(mapping_file) 157 | 158 | folders = os.listdir(img_path) 159 | 160 | dataset_dicts = [] 161 | for folder in folders: 162 | 163 | json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json") 164 | 165 | with open(json_file) as f: 166 | imgs_anns = json.load(f) 167 | 168 | for idx, v in enumerate(imgs_anns['images']): 169 | if idx == 0: 170 | record = {} 171 | 172 | filename = os.path.join(img_path, folder, 'coco_data', v["file_name"]) 173 | 174 | record["file_name"] = filename 175 | record["image_id"] = str(v['id']) + '_' + folder[:8] 176 | record["height"] = v['height'] 177 | record["width"] = v['width'] 178 | record["nocs_map"] = self.get_nocs(v["file_name"], img_path, folder) 179 | 180 | objs = [] 181 | for anno in imgs_anns['annotations']: 182 | if anno['image_id'] == v['id']: 183 | jid = anno['jid'] 184 | voxel = get_voxel(os.path.join(CONF.PATH.VOXELDATA, jid, 'model.binvox'), np.array(anno['3Dscale'])) 185 | cat_id = anno['category_id'] 186 | name = csv_dict[cat_id] 187 | nocs_obj = self.crop_segmask(record["nocs_map"], anno['bbox'], anno['segmentation']) 188 | if not name in self.name_list: 189 | self.name_list.append(name) 190 | 191 | if cat_id in self.mapping_list: 192 | id = self.mapping_list.index(cat_id) 193 | else: 194 | self.mapping_list.append(cat_id) 195 | id = self.mapping_list.index(cat_id) 196 | 197 | obj = { 198 | "bbox": anno['bbox'], 199 | "bbox_mode": BoxMode.XYWH_ABS, 200 | "segmentation": anno['segmentation'], 201 | "category_id": id, 202 | "voxel": voxel, 203 | "nocs": nocs_obj, 204 | } 205 | objs.append(obj) 206 | 207 | record["annotations"] = objs 208 | dataset_dicts.append(record) 209 | 210 | return dataset_dicts 211 | 212 | # register train and val dataset 213 | def reg_dset(self): 214 | for d in ["train", "val", "test"]: 215 | DatasetCatalog.register("front_" + d, lambda d=d: self.get_front_dicts(self.img_dir + d)) 216 | MetadataCatalog.get("front_" + d).set(thing_classes=self.name_list) 217 | print("Registered Dataset") 218 | 219 | # data mean, std 220 | def calculate_mean_std(self): 221 | dataset_dicts = self.get_front_dicts(os.path.join(self.img_dir, 'train')) 222 | 223 | data_mean = np.zeros((1, 3)) 224 | data_std = np.zeros((1, 3)) 225 | data_len = len(dataset_dicts) 226 | 227 | for idx, d in enumerate(dataset_dicts): 228 | img = cv2.imread(d["file_name"]) 229 | data_mean = data_mean + np.mean(img, axis=(0, 1)) / data_len 230 | data_std = data_std + np.std(img, axis=(0, 1)) / data_len 231 | print("data mean", data_mean) 232 | return data_mean, data_std 233 | 234 | # visualize annotations 235 | def vis_annotation(self, num_imgs=1): 236 | front_metadata = MetadataCatalog.get("front_train") 237 | dataset_dicts = self.get_front_dicts(os.path.join(self.img_dir, 'train')) 238 | 239 | for d in random.sample(dataset_dicts, num_imgs): 240 | img = cv2.imread(d["file_name"]) 241 | visualizer = Visualizer(img[:, :, ::-1], metadata=front_metadata, scale=1) 242 | out = visualizer.draw_dataset_dict(d) 243 | cv2.imshow('image', out.get_image()[:, :, ::-1]) 244 | cv2.waitKey(500) 245 | 246 | # evaluate annotations 247 | def eval_annotation(self): 248 | front_metadata = MetadataCatalog.get("front_train") 249 | dataset_dicts = self.get_eval_dicts(os.path.join(self.img_dir, 'train')) 250 | 251 | for idx, d in enumerate(dataset_dicts): 252 | img = cv2.imread(d["file_name"]) 253 | visualizer = Visualizer(img[:, :, ::-1], metadata=front_metadata, scale=1) 254 | out = visualizer.draw_dataset_dict(d) 255 | cv2.imshow('image', out.get_image()[:, :, ::-1]) 256 | print("image id: ", idx, " image name: ", d["file_name"]) 257 | cv2.waitKey(0) 258 | 259 | @staticmethod 260 | def read_csv_mapping(path): 261 | """ Loads an idset mapping from a csv file, assuming the rows are sorted by their ids. 262 | :param path: Path to csv file 263 | """ 264 | 265 | with open(path, 'r') as csvfile: 266 | reader = csv.DictReader(csvfile) 267 | new_id_label_map = [] 268 | new_label_id_map = {} 269 | 270 | for row in reader: 271 | new_id_label_map.append(row["name"]) 272 | new_label_id_map[int(row["id"])] = row["name"] 273 | 274 | return new_id_label_map, new_label_id_map 275 | 276 | @staticmethod 277 | def write_pickle(img_dir, filename, pickle_data): 278 | 279 | filepath = os.path.join(img_dir, filename + ".pkl") 280 | print("PATH",filepath) 281 | if 'train' in img_dir: 282 | print('intrain') 283 | with open(filepath, 'wb') as f: 284 | pickle.dump(pickle_data, f) 285 | 286 | @staticmethod 287 | def load_pickle(self,img_dir, filename): 288 | 289 | if 'val' in img_dir: 290 | filepath = os.path.join(img_dir[:-4],'train', filename + ".pkl") 291 | with open(filepath, 'rb') as f: 292 | data = pickle.load(f) 293 | return data[0], data[1] 294 | else: 295 | return [],[] 296 | 297 | @staticmethod 298 | def get_nocs(filename, img_path, folder): 299 | nocs_name = filename.replace('rgb', 'nocs') 300 | nocs_path = os.path.join(img_path, folder, 'coco_data', nocs_name) 301 | nocs = cv2.imread(nocs_path) #BGRA 302 | nocs = nocs[:,:,:3] 303 | nocs = nocs[:, :, ::-1] # RGB 304 | 305 | nocs = np.array(nocs, dtype=np.float32) / 255 306 | 307 | return nocs 308 | 309 | @staticmethod 310 | def crop_segmask(nocs_img, bbox, segmap): 311 | 312 | abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32) 313 | # width = torch.abs(abs_bbox[2] - abs_bbox[0]) 314 | # height = torch.abs(abs_bbox[3] - abs_bbox[1]) 315 | 316 | gm = GenericMask(segmap, 240, 320) 317 | bin_mask = gm.polygons_to_mask(segmap) 318 | binary_mask = bin_mask[:,:, None] 319 | crop_im = np.multiply(nocs_img,binary_mask) 320 | cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2]),:]) 321 | # cropped_im = np.clip(cropped_im, 0, 1) 322 | 323 | cropped_im[cropped_im == 0] = 1 324 | 325 | return torch.from_numpy(cropped_im).to(torch.float32) 326 | 327 | @staticmethod 328 | def load_campose(path): 329 | 330 | with h5py.File(path, 'r') as data: 331 | for key in data.keys(): 332 | if key == 'campose': 333 | campose = np.array(data[key]) 334 | 335 | return campose 336 | 337 | @staticmethod 338 | def load_hdf5(path): 339 | 340 | with h5py.File(path, 'r') as data: 341 | for key in data.keys(): 342 | if key == 'depth': 343 | depth = np.array(data[key]) 344 | elif key == 'campose': 345 | campose = np.array(data[key]) 346 | 347 | return depth, campose 348 | 349 | @staticmethod 350 | def crop_depth(depth_img, bbox, segmap): 351 | 352 | abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32) 353 | 354 | gm = GenericMask(segmap, 240, 320) 355 | bin_mask = gm.polygons_to_mask(segmap) 356 | binary_mask = bin_mask[:, :] 357 | crop_im = np.multiply(depth_img, binary_mask) 358 | #crop_im[crop_im == 0] = 255 359 | cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2])]) 360 | 361 | return torch.from_numpy(cropped_im).to(torch.float32) 362 | 363 | @staticmethod 364 | def add_halfheight(location, box): 365 | ''' 366 | Object location z-center is at bottom, calculate half height of the object 367 | and add to shift z-center to correct location 368 | ''' 369 | z_coords = [] 370 | for pt in box: 371 | z = pt[-1] 372 | z_coords.append(z) 373 | z_coords = np.array(z_coords) 374 | half_height = np.abs(z_coords.max() - z_coords.min()) / 2 375 | location[-1] = half_height # Center location is at bottom object 376 | 377 | return location --------------------------------------------------------------------------------