├── Tracking
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── eval_utils.py
    │   ├── train_utils.py
    │   └── vis_utils.py
    ├── datasets
    │   ├── __init__.py
    │   ├── front_dataset.py
    │   └── consec_graph_dataset.py
    ├── networks
    │   ├── __init__.py
    │   ├── edge_classifier.py
    │   ├── mlp.py
    │   ├── voxel_encoder.py
    │   └── mpn.py
    ├── inference.py
    ├── train.py
    ├── graph_cfg.py
    └── options.py
├── BlenderProc
    └── utils
    │   ├── __init__.py
    │   ├── libvoxelize
    │       ├── __init__.py
    │       ├── voxelize.pyx
    │       └── tribox2.h
    │   ├── libmesh
    │       ├── __init__.py
    │       ├── triangle_hash.pyx
    │       └── inside_mesh.py
    │   ├── voxels.py
    │   └── binvox_rw.py
├── Detection
    ├── roi_heads
    │   ├── __init__.py
    │   ├── voxel_head.py
    │   ├── roi_heads.py
    │   └── nocs_head.py
    ├── evaluator
    │   ├── mask.py
    │   └── _mask.pyx
    ├── inference
    │   ├── inference_metrics.py
    │   └── inference_utils.py
    ├── data
    │   ├── office_dataset.py
    │   └── mapper_heads.py
    ├── cfg_setup.py
    ├── train_net.py
    └── register_dataset.py
├── demo
    └── Teaser.png
├── .gitignore
├── baseconfig.py
├── Utility
    └── analyse_datset.py
├── README.md
├── PoseEst
    └── pose_utils.py
└── environment.yml


/Tracking/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libvoxelize/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Tracking/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_utils import *
2 | 


--------------------------------------------------------------------------------
/Tracking/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .front_dataset import *
2 | from .graph_dataset import *
3 | 


--------------------------------------------------------------------------------
/Detection/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_heads import *
2 | from .roi_heads import VoxelNocsHeads
3 | 


--------------------------------------------------------------------------------
/demo/Teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DomiSchmauser/3D_MOT_Differentiable_Pose_Estimation/HEAD/demo/Teaser.png


--------------------------------------------------------------------------------
/Tracking/networks/__init__.py:
--------------------------------------------------------------------------------
1 | from .mlp import *
2 | from .voxel_encoder import *
3 | from .edge_classifier import *
4 | from .mpn import *
5 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libmesh/__init__.py:
--------------------------------------------------------------------------------
1 | from .inside_mesh import (
2 |     check_mesh_contains, MeshIntersector, TriangleIntersector2d
3 | )
4 | 
5 | 
6 | __all__ = [
7 |     check_mesh_contains, MeshIntersector, TriangleIntersector2d
8 | ]
9 | 


--------------------------------------------------------------------------------
/Tracking/inference.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | from options import Options
 4 | import os
 5 | import argparse
 6 | 
 7 | 
 8 | # the directory that options.py resides in
 9 | file_dir = os.path.dirname(__file__)
10 | 
11 | options = Options()
12 | opts = options.parse()
13 | 
14 | if opts.use_graph:
15 |     from mpn_trainer import Trainer
16 | else:
17 |     from trainer import Trainer
18 | 
19 | if __name__ == "__main__":
20 |     trainer = Trainer(opts)
21 |     trainer.inference(vis_pose=False, classwise=True)
22 | 


--------------------------------------------------------------------------------
/Tracking/networks/edge_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | '''
 6 | Binary Classifier for classifying active/ non-active edges 
 7 | '''
 8 | 
 9 | class EdgeClassifier(nn.Module):
10 | 
11 |     def __init__(self, input_dim=32, intermed_dim=None):
12 |         super(EdgeClassifier, self).__init__()
13 |         if intermed_dim is None:
14 |             self.fc1 = nn.Linear(input_dim, 16)
15 |             self.fc2 = nn.Linear(16, 1)
16 |         else:
17 |             self.fc1 = nn.Linear(input_dim, intermed_dim)
18 |             self.fc2 = nn.Linear(intermed_dim, 1)
19 | 
20 |     def forward(self, x):
21 | 
22 |         x = F.relu(self.fc1(x))
23 |         x = self.fc2(x)
24 | 
25 |         return x


--------------------------------------------------------------------------------
/Detection/evaluator/mask.py:
--------------------------------------------------------------------------------
 1 | import pycocotools._mask as _mask
 2 | 
 3 | iou         = _mask.iou
 4 | merge       = _mask.merge
 5 | frPyObjects = _mask.frPyObjects
 6 | 
 7 | def encode(bimask):
 8 |     if len(bimask.shape) == 3:
 9 |         return _mask.encode(bimask)
10 |     elif len(bimask.shape) == 2:
11 |         h, w = bimask.shape
12 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
13 | 
14 | def decode(rleObjs):
15 |     if type(rleObjs) == list:
16 |         return _mask.decode(rleObjs)
17 |     else:
18 |         return _mask.decode([rleObjs])[:,:,0]
19 | 
20 | def area(rleObjs):
21 |     if type(rleObjs) == list:
22 |         return _mask.area(rleObjs)
23 |     else:
24 |         return _mask.area([rleObjs])[0]
25 | 
26 | def toBbox(rleObjs):
27 |     if type(rleObjs) == list:
28 |         return _mask.toBbox(rleObjs)
29 |     else:
30 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/Tracking/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | from options import Options
 4 | import os, shutil,sys
 5 | import argparse
 6 | 
 7 | # the directory that options.py resides in
 8 | file_dir = os.path.dirname(__file__)
 9 | 
10 | options = Options()
11 | opts = options.parse()
12 | 
13 | if opts.use_graph:
14 |     from mpn_trainer import Trainer
15 | else:
16 |     from trainer import Trainer
17 | 
18 | sys.path.append('..') #Hack add ROOT DIR
19 | from baseconfig import CONF
20 | 
21 | if __name__ == "__main__":
22 | 
23 |     # Remove old files
24 |     if os.path.exists(CONF.PATH.TRACKOUTPUT):
25 |         print('Removing old outputs ...')
26 |         shutil.rmtree(CONF.PATH.TRACKOUTPUT)
27 |         os.mkdir(CONF.PATH.TRACKOUTPUT)
28 | 
29 |     trainer = Trainer(opts)
30 |     if opts.precompute_feats:
31 |         trainer.precompute()
32 |     else:
33 |         trainer.train()
34 | 


--------------------------------------------------------------------------------
/Tracking/graph_cfg.py:
--------------------------------------------------------------------------------
 1 | # Graph Setup
 2 | 
 3 | def init_graph_cfg(node_in_size=16):
 4 |     '''
 5 |     Graph Neural Network setup
 6 |     '''
 7 |     graph_cfg = {
 8 |         'undirected_graph': True,
 9 |         'use_time_aware_mp': False,
10 |         'use_leaky_relu': True,
11 |         'max_frame_dist': 5,
12 |         'num_mp_steps': 4,
13 |         'node_agg_fn': 'mean',
14 |         'reattach_initial_nodes': False,
15 |         'reattach_initial_edges': True,
16 |         'encoder_feats_dict': {
17 |             'edge_in_dim': 8,
18 |             'edge_fc_dims': [12],
19 |             'edge_out_dim': 12,
20 |             'node_out_dim': node_in_size,
21 |             'dropout_p': None,
22 |             'use_batchnorm': False,
23 |         },
24 |         'edge_model_feats_dict': {
25 |             'fc_dims': [32, 12],
26 |             'dropout_p': None,
27 |             'use_batchnorm': False,
28 |         },
29 |         'node_model_feats_dict': {
30 |             'fc_dims': [20, node_in_size],
31 |             'dropout_p': None,
32 |             'use_batchnorm': False,
33 |         },
34 |     }
35 |     return graph_cfg
36 | 


--------------------------------------------------------------------------------
/Tracking/networks/mlp.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class MLP(nn.Module):
 5 |     def __init__(self, input_dim, fc_dims, dropout_p=0.4, use_batchnorm=False, use_leaky_relu=True):
 6 |         super(MLP, self).__init__()
 7 | 
 8 |         if use_leaky_relu:
 9 |             self.activation = nn.LeakyReLU(inplace=True)
10 |         else:
11 |             self.activation = nn.ReLU(inplace=True)
12 | 
13 |         assert isinstance(fc_dims, (list, tuple)), 'fc_dims must be either a list or a tuple, but got {}'.format(
14 |             type(fc_dims))
15 | 
16 |         layers = []
17 |         for idx, dim in enumerate(fc_dims):
18 |             layers.append(nn.Linear(input_dim, dim))
19 |             if use_batchnorm and dim != 1:
20 |                 layers.append(nn.BatchNorm1d(dim))
21 | 
22 |             if dim != 1:
23 |                 layers.append(self.activation)
24 | 
25 |             if dropout_p is not None and dim != 1:
26 |                 layers.append(nn.Dropout(p=dropout_p))
27 | 
28 |             input_dim = dim
29 | 
30 |         self.layers = nn.Sequential(*layers)
31 | 
32 |     def forward(self, input):
33 |         output = self.layers(input)
34 |         return output


--------------------------------------------------------------------------------
/Tracking/networks/voxel_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class VoxelEncoder(nn.Module):
 6 | 
 7 |     ''' 3D-convolutional encoder network for voxel input.
 8 |     Args:
 9 |         dim (int): input dimension 32 x 32 x 32
10 |         c_dim (int): output dimension 9
11 |     '''
12 | 
13 |     def __init__(self, input_channel=1, output_channel=9):
14 |         super().__init__()
15 |         self.relu = F.relu
16 |         self.leaky_relu = F.leaky_relu
17 | 
18 |         self.conv_in = nn.Conv3d(input_channel, 8, 3, padding=1) # 1 x 32 x 32 x 32
19 |         self.conv_0 = nn.Conv3d(8, 16, 3, padding=1, stride=2)
20 |         self.conv_1 = nn.Conv3d(16, 32, 3, padding=1, stride=2)
21 |         self.conv_2 = nn.Conv3d(32, 32, 3, padding=1, stride=2)
22 |         self.fc = nn.Linear(32 * 4 * 4 * 4, 256)
23 |         self.fc2 = nn.Linear(256, output_channel)
24 | 
25 |     def forward(self, x):
26 |         batch_size = x.size(0) # x_shape = BS x in_channels x 32 x 32 x 32
27 | 
28 |         net = self.conv_in(x)
29 |         #print('l1',net.shape)
30 |         net = self.conv_0(self.relu(net))
31 |         #print('l2', net.shape)
32 |         net = self.conv_1(self.relu(net))
33 |         #print('l3', net.shape)
34 |         net = self.conv_2(self.relu(net))
35 |         #print('l4', net.shape)
36 | 
37 |         hidden = net.view(batch_size, 32 * 4 * 4 * 4)
38 |         output = self.fc(self.leaky_relu(hidden))
39 |         output = self.fc2(self.leaky_relu(output)) # BS x out_dim
40 | 
41 | 
42 |         return output


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | !BlenderProc/resources/front_3D/3D-FUTURE-model/
 2 | 
 3 | 
 4 | # Detection
 5 | Detection/outputs
 6 | Detection/outputs_v1
 7 | Detection/outputs_v2
 8 | Detection/outputs_v3
 9 | Detection/outputs_v4
10 | Detection/predicted_data
11 | Detection/model
12 | Detection/front_dataset
13 | Detection/office_dataset
14 | Detection/office_dataset_all
15 | Detection/scannet
16 | Detection/__pycache__
17 | 
18 | # Junk
19 | .idea
20 | *__pychache__
21 | 
22 | 
23 | # Blenderproc
24 | BlenderProc/blender
25 | BlenderProc/.idea/
26 | BlenderProc/.vscode/
27 | BlenderProc/output/
28 | BlenderProc/output_good/
29 | BlenderProc/debug/
30 | BlenderProc/resources/cctextures
31 | BlenderProc/resources/scenenet/*zip
32 | BlenderProc/resources/scenenet/SceneNetData
33 | BlenderProc/resources/scenenet/texture*
34 | BlenderProc/resources/blenderkit
35 | BlenderProc/resources/IKEA
36 | BlenderProc/resources/pix3d
37 | BlenderProc/resources/front_3D/3D-FRONT
38 | BlenderProc/resources/front_3D/3D-FRONT-texture
39 | BlenderProc/resources/front_3D/3D-FUTURE-model
40 | BlenderProc/examples/front_3d_with_improved_mat/output
41 | BlenderProc/examples/front_3d/output
42 | BlenderProc/output
43 | *.blend1
44 | .vscode
45 | *.pyc
46 | *.swp
47 | *.so
48 | BlenderProc/docs/build/
49 | BlenderProc/!docs/source/index.rst
50 | BlenderProc/docs/source/*rst
51 | BlenderProc/utils/libmesh/*.cpp
52 | BlenderProc/utils/libvoxelize/*.c
53 | BlenderProc/build/
54 | 
55 | Detection/outputs_1/
56 | 
57 | # Pix2Vox
58 | Pix2Vox
59 | Pix2Vox/instance/
60 | Pix2Vox/target/
61 | Pix2Vox/datasets/
62 | Pix2Vox/output/
63 | Pix2Vox/pretrained/
64 | Pix2Vox/runs/
65 | 
66 | # Pose
67 | PoseEst/data/
68 | 
69 | # Tracking
70 | Tracking/output/
71 | Tracking/output_v1/
72 | Tracking/model/
73 | 
74 | # Utils
75 | Utility/buggy_scenes.txt
76 | Utility/voxel_scenes.txt
77 | 
78 | # Backup
79 | Backup
80 | 
81 | !BlenderProc/resources/front_3D/3D-FUTURE-model/*.json
82 | 


--------------------------------------------------------------------------------
/baseconfig.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from easydict import EasyDict
 3 | 
 4 | CONF = EasyDict()
 5 | CONF.PATH = EasyDict()
 6 | 
 7 | # Base Folder
 8 | CONF.PATH.BASE = os.path.abspath(os.path.dirname(__file__)) #Base Graph3DMOT path
 9 | CONF.PATH.BPROC = os.path.join(CONF.PATH.BASE, "BlenderProc")
10 | CONF.PATH.DETECT = os.path.join(CONF.PATH.BASE, "Detection")
11 | CONF.PATH.PROJ = os.path.join(CONF.PATH.BASE, "PoseEst")
12 | CONF.PATH.TRACK = os.path.join(CONF.PATH.BASE, 'Tracking')
13 | 
14 | # Front Data Generation
15 | CONF.PATH.FRONTDATA = os.path.join(CONF.PATH.BPROC, "resources/front_3D")
16 | CONF.PATH.FRONT3D = os.path.join(CONF.PATH.FRONTDATA, "3D-FRONT")
17 | CONF.PATH.FUTURE3D = os.path.join(CONF.PATH.FRONTDATA, "3D-FUTURE-model")
18 | CONF.PATH.FRONTTEXT = os.path.join(CONF.PATH.FRONTDATA, "3D-FRONT-texture")
19 | 
20 | # Detection
21 | 
22 | # MOTFront storage folder
23 | CONF.PATH.DETECTDATA = os.path.join(CONF.PATH.DETECT, 'front_dataset/')
24 | CONF.PATH.DETECTTRAIN = os.path.join(CONF.PATH.DETECTDATA, 'train')
25 | CONF.PATH.DETECTVAL = os.path.join(CONF.PATH.DETECTDATA, 'val')
26 | CONF.PATH.DETECTTEST = os.path.join(CONF.PATH.DETECTDATA, 'test')
27 | CONF.PATH.DETECTVIS = os.path.join(CONF.PATH.DETECTDATA, 'vis')
28 | CONF.PATH.VOXELDATA = os.path.join(CONF.PATH.DETECTDATA, 'voxel') # storage for binvox model folder
29 | # Pretrained Detection network folder
30 | CONF.PATH.DETECTMODEL = os.path.join(CONF.PATH.DETECT, 'model/')
31 | 
32 | # Projection (for debugging)
33 | CONF.PATH.PROJDATA = os.path.join(CONF.PATH.PROJ, 'data')
34 | 
35 | # Tracking (Data folder for seperate Tracking pipeline training)
36 | CONF.PATH.TRACKDATA = os.path.join(CONF.PATH.DETECT, 'predicted_data')
37 | 
38 | # Outputs/ Logging
39 | CONF.PATH.DETECTOUTPUT = os.path.join(CONF.PATH.DETECT, 'outputs')
40 | CONF.PATH.BPROCOUTPUT = os.path.join(CONF.PATH.BPROC, 'output')
41 | CONF.PATH.TRACKOUTPUT = os.path.join(CONF.PATH.TRACK, 'output')
42 | 
43 | 


--------------------------------------------------------------------------------
/Utility/analyse_datset.py:
--------------------------------------------------------------------------------
 1 | import os, json, cv2, csv, sys
 2 | import shutil
 3 | sys.path.append('..') #Hack add ROOT DIR
 4 | from baseconfig import CONF
 5 | 
 6 | def get_dataset_info(img_path, combined=False):
 7 | 
 8 |     mapping_file = os.path.join(img_path[:-6], "3D_front_mapping.csv")
 9 |     _, csv_dict = read_csv_mapping(mapping_file)
10 |     mapping_list, name_list = [], []
11 | 
12 |     folders = os.listdir(img_path)
13 |     bad_folder = []
14 |     img_count = 0
15 | 
16 |     for folder in folders:
17 | 
18 |         json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json")
19 | 
20 |         with open(json_file) as f:
21 |             imgs_anns = json.load(f)
22 | 
23 |         for idx, v in enumerate(imgs_anns['images']):
24 |             img_count += 1
25 |             for anno in imgs_anns['annotations']:
26 |                 if anno['image_id'] == v['id']:
27 |                     cat_id = anno['category_id']
28 |                     try:
29 |                         name = csv_dict[cat_id]
30 |                     except:
31 |                         bad_folder.append(folder)
32 |                     if not name in name_list:
33 |                         name_list.append(name)
34 | 
35 |                     if cat_id in mapping_list:
36 |                         pass
37 |                     else:
38 |                         mapping_list.append(cat_id)
39 |     for l in list(set(bad_folder)):
40 |         print("remove folder", os.path.join(CONF.PATH.DETECTTRAIN, l))
41 |         shutil.rmtree(os.path.join(CONF.PATH.DETECTTRAIN, l), ignore_errors=True)
42 | 
43 |     if combined:
44 |         return mapping_list, name_list, img_count
45 |     else:
46 |         return mapping_list, name_list
47 | 
48 | def read_csv_mapping(path):
49 |     """ Loads an idset mapping from a csv file, assuming the rows are sorted by their ids.
50 |     :param path: Path to csv file
51 |     """
52 | 
53 |     with open(path, 'r') as csvfile:
54 |         reader = csv.DictReader(csvfile)
55 |         new_id_label_map = []
56 |         new_label_id_map = {}
57 | 
58 |         for row in reader:
59 |             new_id_label_map.append(row["name"])
60 |             new_label_id_map[int(row["id"])] = row["name"]
61 | 
62 |         return new_id_label_map, new_label_id_map
63 | 


--------------------------------------------------------------------------------
/Tracking/utils/eval_utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import torch
 3 | import numpy as np
 4 | import open3d as o3d
 5 | import mathutils
 6 | 
 7 | from sklearn.metrics import recall_score
 8 | from sklearn.metrics import precision_score
 9 | from sklearn.metrics import f1_score
10 | 
11 | from Tracking.utils.train_utils import convert_voxel_to_pc
12 | 
13 | 
14 | def get_precision(predictions, targets):
15 | 
16 |     # Binarize predictions
17 |     predictions[predictions >= 0.5] = 1
18 |     predictions[predictions < 0.5] = 0
19 | 
20 |     precision = precision_score(targets, predictions, zero_division=0)
21 |     return precision
22 | 
23 | def get_recall(predictions, targets):
24 | 
25 |     # Binarize predictions
26 |     predictions[predictions >= 0.5] = 1
27 |     predictions[predictions < 0.5] = 0
28 | 
29 |     recall = recall_score(targets, predictions, zero_division=0)
30 |     return recall
31 | 
32 | def get_f1(predictions, targets):
33 | 
34 |     # Binarize predictions
35 |     predictions[predictions >= 0.5] = 1
36 |     predictions[predictions < 0.5] = 0
37 | 
38 |     f1 = f1_score(targets, predictions, zero_division='warn') # warn only once
39 |     return f1
40 | 
41 | def get_MOTA(predictions, targets, gt_objects, misses, fps):
42 |     '''
43 |     Full val/test set MOTA calculations
44 |     MOTA score: 1 - num_misses + false positives + id_switches / total num_objects in all frames
45 |     false_positives: Predicted 3D BBOX does not overlap with any GT 3D BBOX more than a threshold e.g. 0.2 IoU
46 |     num_misses: For a GT 3D BBOX there exist no predicted 3D BBOX which overlaps more than min threshold, or less pred than gt objects
47 |     id_switches: GT trajectory and predicted trajectory have do not match in object identities, predicted active/nonactive edge incorrect
48 |     '''
49 | 
50 |     # Binarize predictions
51 |     predictions[predictions >= 0.5] = 1
52 |     predictions[predictions < 0.5] = 0
53 | 
54 |     id_switches = np.count_nonzero(targets - predictions)
55 |     MOTA = 1.0 - (float(misses + fps + id_switches) / float(gt_objects))
56 | 
57 |     return MOTA, id_switches
58 | 
59 | def get_mota_df(num_gt_objs, num_misses, num_fps, num_switches):
60 |     '''
61 |     Calculates a mota score over all frames seen
62 |     '''
63 |     mota = 1.0 - (float(num_misses + num_fps + num_switches) / float(num_gt_objs))
64 |     return mota
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libvoxelize/voxelize.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | from libc.math cimport floor, ceil
 3 | from cython.view cimport array as cvarray
 4 | 
 5 | cdef extern from "tribox2.h":
 6 |     int triBoxOverlap(float boxcenter[3], float boxhalfsize[3],
 7 |                       float tri0[3], float tri1[3], float tri2[3])
 8 | 
 9 | 
10 | @cython.boundscheck(False)  # Deactivate bounds checking
11 | @cython.wraparound(False)   # Deactivate negative indexing.
12 | cpdef int voxelize_mesh_(bint[:, :, :] occ, float[:, :, ::1] faces):
13 |     assert(faces.shape[1] == 3)
14 |     assert(faces.shape[2] == 3)
15 | 
16 |     n_faces = faces.shape[0]
17 |     cdef int i
18 |     for i in range(n_faces):
19 |         voxelize_triangle_(occ, faces[i])
20 | 
21 | 
22 | @cython.boundscheck(False)  # Deactivate bounds checking
23 | @cython.wraparound(False)   # Deactivate negative indexing.
24 | cpdef int voxelize_triangle_(bint[:, :, :] occupancies, float[:, ::1] triverts):
25 |     cdef int bbox_min[3]
26 |     cdef int bbox_max[3]
27 |     cdef int i, j, k
28 |     cdef float boxhalfsize[3]
29 |     cdef float boxcenter[3]
30 |     cdef bint intersection
31 | 
32 |     boxhalfsize[:] = (0.5, 0.5, 0.5)
33 | 
34 |     for i in range(3):
35 |         bbox_min[i] = <int> (
36 |             min(triverts[0, i], triverts[1, i], triverts[2, i])
37 |         )
38 |         bbox_min[i] = min(max(bbox_min[i], 0), occupancies.shape[i] - 1)
39 | 
40 |     for i in range(3):
41 |         bbox_max[i] = <int> (
42 |             max(triverts[0, i], triverts[1, i], triverts[2, i])
43 |         )
44 |         bbox_max[i] = min(max(bbox_max[i], 0), occupancies.shape[i] - 1)
45 | 
46 |     for i in range(bbox_min[0], bbox_max[0] + 1): 
47 |         for j in range(bbox_min[1], bbox_max[1] + 1): 
48 |             for k in range(bbox_min[2], bbox_max[2] + 1):
49 |                 boxcenter[:] = (i + 0.5, j + 0.5, k + 0.5)
50 |                 intersection = triBoxOverlap(&boxcenter[0], &boxhalfsize[0],
51 |                                              &triverts[0, 0], &triverts[1, 0], &triverts[2, 0])
52 |                 occupancies[i, j, k] |= intersection
53 | 
54 | 
55 | @cython.boundscheck(False)  # Deactivate bounds checking
56 | @cython.wraparound(False)   # Deactivate negative indexing.
57 | cdef int test_triangle_aabb(float[::1] boxcenter, float[::1] boxhalfsize, float[:, ::1] triverts):
58 |     assert(boxcenter.shape[0] == 3)
59 |     assert(boxhalfsize.shape[0] == 3)
60 |     assert(triverts.shape[0] == triverts.shape[1] == 3)
61 |     
62 |     # print(triverts)
63 |     # Call functions
64 |     cdef int result = triBoxOverlap(&boxcenter[0], &boxhalfsize[0],
65 |                                     &triverts[0, 0], &triverts[1, 0], &triverts[2, 0])
66 |     return result
67 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libmesh/triangle_hash.pyx:
--------------------------------------------------------------------------------
 1 | 
 2 | # distutils: language=c++
 3 | import numpy as np
 4 | cimport numpy as np
 5 | cimport cython
 6 | from libcpp.vector cimport vector
 7 | from libc.math cimport floor, ceil
 8 | 
 9 | cdef class TriangleHash:
10 |     cdef vector[vector[int]] spatial_hash
11 |     cdef int resolution
12 | 
13 |     def __cinit__(self, double[:, :, :] triangles, int resolution):
14 |         self.spatial_hash.resize(resolution * resolution)
15 |         self.resolution = resolution
16 |         self._build_hash(triangles)
17 | 
18 |     @cython.boundscheck(False)  # Deactivate bounds checking
19 |     @cython.wraparound(False)   # Deactivate negative indexing.
20 |     cdef int _build_hash(self, double[:, :, :] triangles):
21 |         assert(triangles.shape[1] == 3)
22 |         assert(triangles.shape[2] == 2)
23 | 
24 |         cdef int n_tri = triangles.shape[0]
25 |         cdef int bbox_min[2]
26 |         cdef int bbox_max[2]
27 |         
28 |         cdef int i_tri, j, x, y
29 |         cdef int spatial_idx
30 | 
31 |         for i_tri in range(n_tri):
32 |             # Compute bounding box
33 |             for j in range(2):
34 |                 bbox_min[j] = <int> min(
35 |                     triangles[i_tri, 0, j], triangles[i_tri, 1, j], triangles[i_tri, 2, j]
36 |                 )
37 |                 bbox_max[j] = <int> max(
38 |                     triangles[i_tri, 0, j], triangles[i_tri, 1, j], triangles[i_tri, 2, j]
39 |                 )
40 |                 bbox_min[j] = min(max(bbox_min[j], 0), self.resolution - 1)
41 |                 bbox_max[j] = min(max(bbox_max[j], 0), self.resolution - 1)
42 | 
43 |             # Find all voxels where bounding box intersects
44 |             for x in range(bbox_min[0], bbox_max[0] + 1):
45 |                 for y in range(bbox_min[1], bbox_max[1] + 1):
46 |                     spatial_idx = self.resolution * x + y
47 |                     self.spatial_hash[spatial_idx].push_back(i_tri)
48 | 
49 |     @cython.boundscheck(False)  # Deactivate bounds checking
50 |     @cython.wraparound(False)   # Deactivate negative indexing.
51 |     cpdef query(self, double[:, :] points):
52 |         assert(points.shape[1] == 2)
53 |         cdef int n_points = points.shape[0]
54 | 
55 |         cdef vector[int] points_indices
56 |         cdef vector[int] tri_indices
57 |         # cdef int[:] points_indices_np
58 |         # cdef int[:] tri_indices_np
59 | 
60 |         cdef int i_point, k, x, y
61 |         cdef int spatial_idx
62 | 
63 |         for i_point in range(n_points):
64 |             x = int(points[i_point, 0])
65 |             y = int(points[i_point, 1])
66 |             if not (0 <= x < self.resolution and 0 <= y < self.resolution):
67 |                 continue
68 | 
69 |             spatial_idx = self.resolution * x +  y
70 |             for i_tri in self.spatial_hash[spatial_idx]:
71 |                 points_indices.push_back(i_point)
72 |                 tri_indices.push_back(i_tri)
73 | 
74 |         points_indices_np = np.zeros(points_indices.size(), dtype=np.int32)
75 |         tri_indices_np = np.zeros(tri_indices.size(), dtype=np.int32)
76 | 
77 |         cdef int[:] points_indices_view = points_indices_np
78 |         cdef int[:] tri_indices_view = tri_indices_np
79 | 
80 |         for k in range(points_indices.size()):
81 |             points_indices_view[k] = points_indices[k]
82 | 
83 |         for k in range(tri_indices.size()):
84 |             tri_indices_view[k] = tri_indices[k]
85 |             
86 |         return points_indices_np, tri_indices_np
87 | 


--------------------------------------------------------------------------------
/Detection/inference/inference_metrics.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import mathutils
  6 | import math
  7 | import open3d as o3d
  8 | from eulerangles import euler2matrix
  9 | 
 10 | 
 11 | def compute_voxel_iou(generated_volume, ground_truth_volume):
 12 |     '''
 13 |     3D voxel IoU between two voxel grids
 14 |     '''
 15 | 
 16 |     _volume = torch.ge(generated_volume, 0.5).float()
 17 |     intersection = torch.sum(_volume.mul(ground_truth_volume)).float()
 18 |     union = torch.sum(torch.ge(_volume.add(ground_truth_volume), 1)).float() # if _volume+ gt_volume >= 1 is union
 19 |     voxel_iou = (intersection / union).detach().cpu().item()
 20 | 
 21 |     return voxel_iou
 22 | 
 23 | def get_rotation_diff(gt_rotation, pred_rotation):
 24 |     '''
 25 |     gt_rotation: as euler coordinates xyz in radiants
 26 |     pred_rotation: as rotation matrix
 27 |     cls_name: indicating rotation symmetry
 28 |     calculate rotations difference between gt and predicted rotation matrix, min for two poses in y rotated by 180 degree, distinc
 29 |     '''
 30 | 
 31 |     euler = mathutils.Euler(gt_rotation)
 32 |     gt_rotation = np.array(euler.to_matrix())
 33 | 
 34 |     R1 = pred_rotation / np.cbrt(np.linalg.det(pred_rotation)) # R1 = pred
 35 |     R2 = gt_rotation / np.cbrt(np.linalg.det(gt_rotation)) # R2 = GT
 36 | 
 37 |     y_180_RT = np.diag([-1.0, 1.0, -1.0])
 38 |     R = R1 @ R2.transpose()
 39 | 
 40 |     R_rot = R1 @ y_180_RT @ R2.transpose()
 41 |     theta = min(np.arccos((np.trace(R) - 1) / 2),
 42 |                 np.arccos((np.trace(R_rot) - 1) / 2))
 43 | 
 44 |     theta_deg = theta * 180 / np.pi
 45 | 
 46 |     return theta_deg
 47 | 
 48 | def get_location_diff(gt_location, pred_location):
 49 |     '''
 50 |     Calculate location difference of predicted pose in meter
 51 |     gt_location: xyz location in world coords
 52 |     pred_location: pred xyz location
 53 |     '''
 54 | 
 55 |     dist = np.linalg.norm(gt_location - pred_location)
 56 | 
 57 |     return dist
 58 | 
 59 | 
 60 | def get_location_diff_boxcenter(gt_3dbox, pred_3dbox):
 61 |     '''
 62 |     Calculate location difference of predicted pose in meter based on bounding box centers
 63 |     gt_3dbox: 8x3 array
 64 |     pred_3dbox: 8x3 array
 65 |     '''
 66 | 
 67 |     gtloc_box = o3d.geometry.OrientedBoundingBox()
 68 | 
 69 |     if gt_3dbox.sum() == 0:
 70 |         return None
 71 |     try:
 72 |         gtloc_box = gtloc_box.create_from_points(o3d.utility.Vector3dVector(gt_3dbox))
 73 |     except:
 74 |         return None
 75 | 
 76 |     center_gtbox = gtloc_box.get_center()
 77 | 
 78 |     predloc_box = o3d.geometry.OrientedBoundingBox()
 79 |     try:
 80 |         predloc_box = predloc_box.create_from_points(o3d.utility.Vector3dVector(pred_3dbox))
 81 |     except:
 82 |         return None
 83 |     center_predbox = predloc_box.get_center()
 84 | 
 85 |     dist = np.linalg.norm(center_gtbox - center_predbox)
 86 |     #print('Box location center', center_gtbox, center_predbox)
 87 | 
 88 |     return dist
 89 | 
 90 | def get_mean_iou(voxel_list):
 91 | 
 92 |     if voxel_list:
 93 |         voxel_arr = np.array(voxel_list)
 94 |         voxel_arr = voxel_arr[~np.isnan(voxel_arr)]
 95 |         voxel_arr = voxel_arr.mean()
 96 |     else:
 97 |         voxel_arr = 'No Data'
 98 | 
 99 |     return voxel_arr
100 | 
101 | def get_median_iou(voxel_list):
102 | 
103 |     if voxel_list:
104 |         voxel_arr = np.array(voxel_list)
105 |         voxel_arr = voxel_arr[~np.isnan(voxel_arr)]
106 |         voxel_arr = np.median(voxel_arr)
107 |     else:
108 |         voxel_arr = 'No Data'
109 | 
110 |     return voxel_arr


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # 3D Multi-Object Tracking with Differentiable Pose Estimation
 3 | 
 4 | <p align="center"><img src="demo/Teaser.png" width="800px"/></p>
 5 | Our network leverages a 2D detection backbone with additional NOC prediction and 3D reconstruction heads to predict per-object dense correspondences maps and 7-DoF pose parameters. We leverage those correspondences in our neural message passing based, fully end-to-end learnable network to model dependencies between objects over time for consistent multi-object tracking
 6 | 
 7 | 
 8 | ## Introduction
 9 | This a PyTorch implementation of our work "3D Multi-Object Tracking with Differentiable Pose Estimation".
10 | In this project, we present a novel framework for 3D Multi-Object Tracking in indoor scenes.
11 | 
12 | Please check out the project website [3D_MOT](https://domischmauser.github.io/3D_MOT/).
13 | 
14 | ## Dataset 
15 | To download our novel 3D MOT dataset **MOTFront** consisting of 2381 unique indoor sequences, check out [Dataset](https://domischmauser.github.io/3D_MOT/).
16 | Or use the direct download link [MOTFront](http://kaldir.vc.in.tum.de/dominik/MOTFront.zip).
17 | 
18 | ## Setup 
19 | To install network dependencies refer to **environment.yaml**.
20 | We tested our code on a Linux / Ubuntu distribution.
21 | 
22 | ## Paths
23 | Refer to the **baseconfig.py** file for a general path setup. 
24 | Store pre-trained Detection networks in the **Detection/model** directory and pre-trained Tracking networks in the **Tracking/model** directory.
25 | Store MOTFront data in the **Detection/front_dataset** directory with subdirectories **train**, **val**, **test**. 
26 | Store the **3D_front_mapping.csv** file in the **Detection/front_dataset** directory.
27 | 
28 | 
29 | ## Directories
30 | We split the code into two main blocks, represented with two folders: **Detection** and **Tracking**.
31 | 
32 | Configurations for training the detection or end-to-end network can be set in the **cfg_setup.py** file. 
33 | Configurations for training the tracking network can be set in the **options.py** and **graph_cfg.py** file. 
34 | 
35 | ## Basic Usage
36 | For training the 3D reconstruction and pose estimation pipeline independently, run the command: 
37 | ```
38 | python train_net.py
39 | ```
40 | 
41 | For training the tracking pipeline independently, which:
42 | - Requires inference results from the 3D reconstruction and pose estimation pipeline in a hdf5 format
43 | 
44 | Run the command: 
45 | ```
46 | python train.py
47 | ```
48 | 
49 | For training our end-to-end network, set **eval_only = False** and run the command: 
50 | ```
51 | python train_combined.py
52 | ```
53 | 
54 | For inference on the 3D reconstruction and pose estimation pipeline, which:
55 | - Loads a pretrained network (best_model.pth) from the model folder
56 | - Stores inference results in the predicted_data folder in a hdf5 format
57 | 
58 | Run the command: 
59 | ```
60 | python inference_detector.py
61 | ```
62 | 
63 | For inference on our tracking pipeline, which:
64 | - Loads a pretrained network (pretrained/edge_classifier.pth etc...) from the model folder
65 | - Ensure to set --use_graph if you are using a pretrained network with graph 
66 | 
67 | Run the command: 
68 | ```
69 | python inference.py
70 | ```
71 | 
72 | For inference on our end-to-end network, set the variables **eval_first = True** and **eval_only = True**: 
73 | ```
74 | python train_combined.py
75 | ```
76 | 
77 | 
78 | 
79 | ## Citation
80 | 
81 | If you use the MOTFront data or code in your work, please kindly cite our work and our paper:
82 | 
83 | ```bibtex
84 | @misc{https://doi.org/10.48550/arxiv.2206.13785,
85 |   doi = {10.48550/ARXIV.2206.13785},
86 |   url = {https://arxiv.org/abs/2206.13785},
87 |   author = {Schmauser, Dominik and Qiu, Zeju and Müller, Norman and Nießner, Matthias},
88 |   keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
89 |   title = {3D Multi-Object Tracking with Differentiable Pose Estimation},
90 |   publisher = {arXiv},
91 |   year = {2022},
92 |   copyright = {Creative Commons Attribution 4.0 International}
93 | }
94 | ```
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/Detection/data/office_dataset.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | from torch.utils.data import Dataset
  7 | 
  8 | sys.path.append('..') #Hack add ROOT DIR
  9 | from baseconfig import CONF
 10 | 
 11 | class Office_dataset(Dataset):
 12 |     '''
 13 |     Office Real-World dataset data loader class
 14 |     '''
 15 |     def __init__(self, base_dir, split='infer'):
 16 |         self.split = split
 17 |         self.data_dir = base_dir
 18 |         self.scenes = [f for f in os.listdir(os.path.abspath(self.data_dir))]
 19 |         self.scenes.sort()
 20 |         self.imgs = []
 21 |         for scene in self.scenes:
 22 |             scene_path = os.path.join(self.data_dir, scene, 'rgb')
 23 |             scene_imgs = [os.path.join(scene_path, img) for img in os.listdir(scene_path)]
 24 |             scene_imgs.sort()
 25 |             self.imgs += scene_imgs
 26 |         self.mask_person = False
 27 |         self.resize_img = True
 28 | 
 29 |     def __len__(self):
 30 |         return len(self.imgs)
 31 | 
 32 |     def __getitem__(self, idx):
 33 | 
 34 |         img_dict = dict()
 35 | 
 36 |         img_path = self.imgs[idx]
 37 |         seq_path = img_path[:img_path.find('rgb')]
 38 |         img_name = img_path[img_path.find('rgb')+4:]
 39 |         depth_path = os.path.join(seq_path, 'depth', img_name)
 40 |         densepose_path = os.path.join(seq_path, 'denseposes', img_name)
 41 | 
 42 |         # RGB
 43 |         rgb_img = self.load_rgb(img_path, fmt='bgr') #todo needs loading as bgr
 44 |         rgb_img_fs = rgb_img
 45 | 
 46 |         # Depth
 47 |         depth_img = self.load_depth(depth_path)
 48 |         depth_img_fs = depth_img
 49 | 
 50 |         # Densepose
 51 |         densepose_mask = self.load_depth(densepose_path)
 52 |         bin_mask = (densepose_mask == 0.0).astype(int) # BG 1, Person 0
 53 |         bin_mask = np.expand_dims(bin_mask, axis=-1).repeat(3, axis=-1)
 54 | 
 55 |         if self.mask_person:
 56 |             rgb_img *= bin_mask
 57 |             #rgb_img[rgb_img == 0] = 255
 58 | 
 59 |         if self.resize_img:
 60 |             rgb_img = cv2.resize(rgb_img, dsize=(320, 240), interpolation=cv2.INTER_LINEAR)
 61 |             depth_img = cv2.resize(depth_img, dsize=(320, 240), interpolation=cv2.INTER_LINEAR)
 62 | 
 63 |         # Camera calibration
 64 |         calibration = os.path.join(seq_path, 'calibration.txt')
 65 |         with open(calibration) as f:
 66 |             tmp = f.readlines()
 67 | 
 68 |         calibration_list = tmp[0].split()
 69 |         fx, fy = float(calibration_list[0]), float(calibration_list[1])
 70 |         cx, cy = float(calibration_list[2]), float(calibration_list[3])
 71 |         camera_intrinsics_fs = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) #fullsized
 72 |         if self.resize_img:
 73 |             fx *= 0.5
 74 |             fy *= 0.5
 75 |             cx *= 0.5
 76 |             cy *= 0.5
 77 |         camera_intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
 78 | 
 79 |         # Output
 80 |         img_dict['seq_id'] = seq_path
 81 |         img_dict['img_id'] = img_name
 82 |         img_dict['rgb'] = rgb_img
 83 |         img_dict['rgb_fs'] = rgb_img_fs
 84 |         img_dict['depth'] = depth_img
 85 |         img_dict['depth_fs'] = depth_img_fs
 86 |         img_dict['densepose'] = densepose_mask
 87 |         img_dict['camera_intrinsics'] = camera_intrinsics
 88 |         img_dict['camera_intrinsics_fs'] = camera_intrinsics_fs
 89 | 
 90 |         return img_dict
 91 | 
 92 |     def load_rgb(self, rgb_path, fmt='bgr'):
 93 |         '''
 94 |         Loads a rgb image from a png file
 95 |         Detectron uses BGR!
 96 |         '''
 97 |         bgr_img = cv2.imread(rgb_path)
 98 |         if fmt == 'rgb':
 99 |             rgb_img = bgr_img[:, :, ::-1]
100 |         elif fmt == 'bgr':
101 |             rgb_img = bgr_img
102 |         rgb_img = np.array(rgb_img, dtype=np.float32)
103 | 
104 |         return rgb_img
105 | 
106 |     def load_depth(self, depth_path):
107 |         '''
108 |         Loads a depth image or a densepose image from a png file
109 |         '''
110 |         depth_img = cv2.imread(depth_path)
111 |         depth_img = np.array(depth_img[:,:,0], dtype=np.float32) #all channels equal only use first
112 | 
113 |         return depth_img


--------------------------------------------------------------------------------
/PoseEst/pose_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | 
  4 | 
  5 | def evaluateModel(OutTransform, SourceHom, TargetHom, PassThreshold):
  6 | 
  7 |     Diff = TargetHom - np.matmul(OutTransform, SourceHom)
  8 |     ResidualVec = np.linalg.norm(Diff[:3, :], axis=0)
  9 |     Residual = np.linalg.norm(ResidualVec)
 10 |     InlierIdx = np.where(ResidualVec < PassThreshold)
 11 |     nInliers = np.count_nonzero(InlierIdx)
 12 |     InlierRatio = nInliers / SourceHom.shape[1]
 13 | 
 14 |     return Residual, InlierRatio, InlierIdx[0]
 15 | 
 16 | def estimateSimilarityUmeyama(SourceHom, TargetHom):
 17 |     '''
 18 |     Procrustes analysis for pose fitting
 19 |     SourceHom: Pointcloud from NOCS map
 20 |     TargetHom: Depth pointcloud equals GT
 21 |     '''
 22 | 
 23 |     SourceCentroid = np.mean(SourceHom[:3, :], axis=1)
 24 |     TargetCentroid = np.mean(TargetHom[:3, :], axis=1)
 25 |     nPoints = SourceHom.shape[1]
 26 | 
 27 |     CenteredSource = SourceHom[:3, :] - np.tile(SourceCentroid, (nPoints, 1)).transpose()
 28 |     CenteredTarget = TargetHom[:3, :] - np.tile(TargetCentroid, (nPoints, 1)).transpose()
 29 | 
 30 |     CovMatrix = np.matmul(CenteredTarget, np.transpose(CenteredSource)) / nPoints
 31 | 
 32 |     if np.isnan(CovMatrix).any():
 33 |         print('nPoints:', nPoints)
 34 |         print(SourceHom.shape)
 35 |         print(TargetHom.shape)
 36 |         raise RuntimeError('There are NANs in the input.')
 37 | 
 38 |     U, D, Vh = np.linalg.svd(CovMatrix, full_matrices=True)
 39 |     d = (np.linalg.det(U) * np.linalg.det(Vh)) < 0.0
 40 |     if d:
 41 |         D[-1] = -D[-1]
 42 |         U[:, -1] = -U[:, -1]
 43 | 
 44 |     Rotation = np.matmul(U, Vh).T
 45 | 
 46 |     varP = np.var(SourceHom[:3, :], axis=1).sum()
 47 |     if varP * np.sum(D) != 0:
 48 |         ScaleFact = 1/varP * np.sum(D)  # scale factor
 49 |     else:
 50 |         ScaleFact = 1  # scale factor set to 1 since otherwise division by 0
 51 | 
 52 |     Scales = np.array([ScaleFact, ScaleFact, ScaleFact])
 53 |     ScaleMatrix = np.diag(Scales)
 54 | 
 55 |     Translation = TargetHom[:3, :].mean(axis=1) - SourceHom[:3, :].mean(axis=1).dot(ScaleFact*Rotation)
 56 | 
 57 |     OutTransform = np.identity(4)
 58 |     OutTransform[:3, :3] = ScaleMatrix @ Rotation # todo check if T is correct
 59 |     OutTransform[:3, 3] = Translation
 60 | 
 61 |     return Scales, Rotation, Translation, OutTransform
 62 | 
 63 | def getRANSACInliers(SourceHom, TargetHom, MaxIterations=100, PassThreshold=200, StopThreshold=1):
 64 |     '''
 65 |     RANSAC Outlier Removal
 66 |     '''
 67 | 
 68 |     BestResidual = 1e10
 69 |     BestInlierRatio = 0
 70 |     BestInlierIdx = np.arange(SourceHom.shape[1])
 71 |     for i in range(0, MaxIterations):
 72 |         # Pick 10 random (but corresponding) points from source and target
 73 |         RandIdx = np.random.randint(SourceHom.shape[1], size=10)
 74 |         _, _, _, OutTransform = estimateSimilarityUmeyama(SourceHom[:, RandIdx], TargetHom[:, RandIdx])
 75 |         Residual, InlierRatio, InlierIdx = evaluateModel(OutTransform, SourceHom, TargetHom, PassThreshold)
 76 |         if Residual < BestResidual:
 77 |             BestResidual = Residual
 78 |             BestInlierRatio = InlierRatio
 79 |             BestInlierIdx = InlierIdx
 80 |         if BestResidual < StopThreshold:
 81 |             break
 82 | 
 83 |     return SourceHom[:, BestInlierIdx], TargetHom[:, BestInlierIdx], BestInlierRatio
 84 | 
 85 | 
 86 | def estimateSimilarityTransform(source: np.array, target: np.array, verbose=False, ratio_adapt = 1):
 87 |     SourceHom = np.transpose(np.hstack([source, np.ones([source.shape[0], 1])]))
 88 |     TargetHom = np.transpose(np.hstack([target, np.ones([source.shape[0], 1])]))
 89 | 
 90 |     # Auto-parameter selection based on source-target heuristics
 91 |     TargetNorm = np.mean(np.linalg.norm(target, axis=1))
 92 |     SourceNorm = np.mean(np.linalg.norm(source, axis=1))
 93 |     RatioTS = (TargetNorm / SourceNorm)
 94 |     RatioST = (SourceNorm / TargetNorm)
 95 |     PassT = RatioST*ratio_adapt if(RatioST>RatioTS) else RatioTS*ratio_adapt
 96 |     StopT = PassT / 100
 97 |     nIter = 100
 98 |     if verbose:
 99 |         print('Pass threshold: ', PassT)
100 |         print('Stop threshold: ', StopT)
101 |         print('Number of iterations: ', nIter)
102 | 
103 |     SourceInliersHom, TargetInliersHom, BestInlierRatio = getRANSACInliers(SourceHom, TargetHom, MaxIterations=nIter, PassThreshold=PassT, StopThreshold=StopT)
104 | 
105 |     if(BestInlierRatio < 0.1):
106 |         print('[ WARN ] - Something is wrong. Small BestInlierRatio: ', BestInlierRatio)
107 |         return None, None, None, None
108 | 
109 |     Scales, Rotation, Translation, OutTransform = estimateSimilarityUmeyama(SourceInliersHom, TargetInliersHom)
110 | 
111 |     if verbose:
112 |         print('BestInlierRatio:', BestInlierRatio)
113 |         print('Rotation:\n', Rotation)
114 |         print('Translation:\n', Translation)
115 |         print('Scales:', Scales)
116 | 
117 |     return Scales, Rotation, Translation, OutTransform
118 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/voxels.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scipy import ndimage
  4 | # from skimage.measure import block_reduce
  5 | from utils.libvoxelize.voxelize import voxelize_mesh_
  6 | from utils.libmesh import check_mesh_contains
  7 | 
  8 | 
  9 | class VoxelGrid:
 10 |     def __init__(self, data, loc=(0., 0., 0.), scale=1):
 11 |         assert(data.shape[0] == data.shape[1] == data.shape[2])
 12 |         data = np.asarray(data, dtype=np.bool)
 13 |         loc = np.asarray(loc)
 14 |         self.data = data
 15 |         self.loc = loc
 16 |         self.scale = scale
 17 | 
 18 |     @property
 19 |     def resolution(self):
 20 |         assert(self.data.shape[0] == self.data.shape[1] == self.data.shape[2])
 21 |         return self.data.shape[0]
 22 | 
 23 |     def contains(self, points):
 24 |         nx = self.resolution
 25 | 
 26 |         # Rescale bounding box to [-0.5, 0.5]^3
 27 |         points = (points - self.loc) / self.scale
 28 |         # Discretize points to [0, nx-1]^3
 29 |         points_i = ((points + 0.5) * nx).astype(np.int32)
 30 |         # i1, i2, i3 have sizes (batch_size, T)
 31 |         i1, i2, i3 = points_i[..., 0],  points_i[..., 1],  points_i[..., 2]
 32 |         # Only use indices inside bounding box
 33 |         mask = (
 34 |             (i1 >= 0) & (i2 >= 0) & (i3 >= 0)
 35 |             & (nx > i1) & (nx > i2) & (nx > i3)
 36 |         )
 37 |         # Prevent out of bounds error
 38 |         i1 = i1[mask]
 39 |         i2 = i2[mask]
 40 |         i3 = i3[mask]
 41 | 
 42 |         # Compute values, default value outside box is 0
 43 |         occ = np.zeros(points.shape[:-1], dtype=np.bool)
 44 |         occ[mask] = self.data[i1, i2, i3]
 45 | 
 46 |         return occ
 47 | 
 48 | 
 49 | def voxelize_ray(mesh, resolution):
 50 |     occ_surface = voxelize_surface(mesh, resolution)
 51 |     # TODO: use surface voxels here?
 52 |     occ_interior = voxelize_interior(mesh, resolution)
 53 |     occ = (occ_interior | occ_surface)
 54 |     return occ
 55 | 
 56 | 
 57 | def voxelize_fill(mesh, resolution):
 58 |     bounds = mesh.bounds
 59 |     if (np.abs(bounds) >= 0.5).any():
 60 |         raise ValueError('voxelize fill is only supported if mesh is inside [-0.5, 0.5]^3/')
 61 | 
 62 |     occ = voxelize_surface(mesh, resolution)
 63 |     occ = ndimage.morphology.binary_fill_holes(occ)
 64 |     return occ
 65 | 
 66 | 
 67 | def voxelize_surface(mesh, resolution):
 68 |     vertices = mesh.vertices
 69 |     faces = mesh.faces
 70 | 
 71 |     vertices = (vertices + 0.5) * resolution # in range[0,32]
 72 |     face_loc = vertices[faces]
 73 |     occ = np.full((resolution,) * 3, 0, dtype=np.int32)
 74 |     face_loc = face_loc.astype(np.float32)
 75 | 
 76 |     voxelize_mesh_(occ, face_loc)
 77 |     occ = (occ != 0)
 78 | 
 79 |     return occ
 80 | 
 81 | 
 82 | def voxelize_interior(mesh, resolution):
 83 |     shape = (resolution,) * 3
 84 |     bb_min = (0.5,) * 3
 85 |     bb_max = (resolution - 0.5,) * 3
 86 |     # Create points. Add noise to break symmetry
 87 |     points = make_3d_grid(bb_min, bb_max, shape=shape).numpy()
 88 |     points = points + 0.1 * (np.random.rand(*points.shape) - 0.5)
 89 |     points = (points / resolution - 0.5)
 90 |     occ = check_mesh_contains(mesh, points)
 91 |     occ = occ.reshape(shape)
 92 | 
 93 |     return occ
 94 | 
 95 | 
 96 | def check_voxel_occupied(occupancy_grid):
 97 |     occ = occupancy_grid
 98 | 
 99 |     occupied = (
100 |         occ[..., :-1, :-1, :-1]
101 |         & occ[..., :-1, :-1, 1:]
102 |         & occ[..., :-1, 1:, :-1]
103 |         & occ[..., :-1, 1:, 1:]
104 |         & occ[..., 1:, :-1, :-1]
105 |         & occ[..., 1:, :-1, 1:]
106 |         & occ[..., 1:, 1:, :-1]
107 |         & occ[..., 1:, 1:, 1:]
108 |     )
109 |     return occupied
110 | 
111 | 
112 | def check_voxel_unoccupied(occupancy_grid):
113 |     occ = occupancy_grid
114 | 
115 |     unoccupied = ~(
116 |         occ[..., :-1, :-1, :-1]
117 |         | occ[..., :-1, :-1, 1:]
118 |         | occ[..., :-1, 1:, :-1]
119 |         | occ[..., :-1, 1:, 1:]
120 |         | occ[..., 1:, :-1, :-1]
121 |         | occ[..., 1:, :-1, 1:]
122 |         | occ[..., 1:, 1:, :-1]
123 |         | occ[..., 1:, 1:, 1:]
124 |     )
125 |     return unoccupied
126 | 
127 | 
128 | def check_voxel_boundary(occupancy_grid):
129 |     occupied = check_voxel_occupied(occupancy_grid)
130 |     unoccupied = check_voxel_unoccupied(occupancy_grid)
131 |     return ~occupied & ~unoccupied
132 | 
133 | 
134 | def make_3d_grid(bb_min, bb_max, shape):
135 |     ''' Makes a 3D grid.
136 | 
137 |     Args:
138 |         bb_min (tuple): bounding box minimum
139 |         bb_max (tuple): bounding box maximum
140 |         shape (tuple): output shape
141 |     '''
142 |     size = shape[0] * shape[1] * shape[2]
143 | 
144 |     pxs = torch.linspace(bb_min[0], bb_max[0], shape[0])
145 |     pys = torch.linspace(bb_min[1], bb_max[1], shape[1])
146 |     pzs = torch.linspace(bb_min[2], bb_max[2], shape[2])
147 | 
148 |     pxs = pxs.view(-1, 1, 1).expand(*shape).contiguous().view(size)
149 |     pys = pys.view(1, -1, 1).expand(*shape).contiguous().view(size)
150 |     pzs = pzs.view(1, 1, -1).expand(*shape).contiguous().view(size)
151 |     p = torch.stack([pxs, pys, pzs], dim=1)
152 | 
153 |     return p
154 | 


--------------------------------------------------------------------------------
/Detection/cfg_setup.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | from detectron2.config import get_cfg
  3 | from detectron2.config import CfgNode as CN
  4 | from detectron2 import model_zoo
  5 | 
  6 | sys.path.append('..') #Hack add ROOT DIR
  7 | from baseconfig import CONF
  8 | 
  9 | # Initialize training config
 10 | def init_cfg(num_classes, combined=False, run_test=False, office=False, office_train=False):
 11 |     '''
 12 |     Set parameters:
 13 |     run_test: for final test run
 14 |     eval_period: num iterations between each evaluation run
 15 |     ims_per_batch: batch size
 16 |     checkpoint period: save model after n iterations
 17 |     base_lr & weight_decay: training setup
 18 |     '''
 19 | 
 20 |     cfg = get_cfg()
 21 |     cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # Loads only backbone weights
 22 | 
 23 |     # MOTFront Dataset
 24 |     if not office:
 25 |         cfg.DATASETS.TRAIN = ("front_train",)
 26 |         if not run_test:
 27 |             cfg.DATASETS.TEST = ("front_val",)
 28 |         else:
 29 |             cfg.DATASETS.TEST = ("front_test",)
 30 | 
 31 |     # Office Dataset
 32 |     else:
 33 |         cfg.DATASETS.TRAIN = ("office_train",)
 34 |         if office_train:
 35 |             cfg.DATASETS.TEST = ("office_train",)
 36 |         else:
 37 |             cfg.DATASETS.TEST = ("office_inference",)
 38 | 
 39 |     cfg.TEST.EVAL_PERIOD = 1000
 40 |     cfg.TEST.IMG_SAVE_FREQ = 4 # Every 4th evaluation run save pred images to tensorboard
 41 |     cfg.TEST.START_EVAL = 1  # Start evaluation after n iterations
 42 |     cfg.DATALOADER.ASPECT_RATIO_GROUPING = False
 43 | 
 44 |     # Dataloader
 45 |     cfg.DATALOADER.NUM_WORKERS = 0
 46 | 
 47 |     # Input
 48 |     cfg.INPUT.MIN_SIZE_TRAIN = (240,)
 49 |     # Sample size of smallest side by choice or random selection from range give by
 50 |     # INPUT.MIN_SIZE_TRAIN
 51 |     cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
 52 |     # Maximum size of the side of the image during training
 53 |     cfg.INPUT.MAX_SIZE_TRAIN = 320
 54 |     # Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
 55 |     cfg.INPUT.MIN_SIZE_TEST = 240
 56 |     # Maximum size of the side of the image during testing
 57 |     cfg.INPUT.MAX_SIZE_TEST = 320
 58 |     cfg.INPUT.RANDOM_FLIP = 'none'
 59 |     cfg.INPUT.FORMAT = "BGR" # Image input format -> will be transformed to rgb in mapper heads
 60 | 
 61 |     # ROI HEADS
 62 |     cfg.MODEL.ROI_HEADS.NAME = "VoxelNocsHeads"
 63 |     cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
 64 |     cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
 65 |     cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.75]
 66 |     cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.20
 67 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
 68 | 
 69 |     if not office:
 70 |         cfg.MODEL.PIXEL_MEAN = [59.64, 61.96, 64.02] # MOTFront
 71 |     else:
 72 |         cfg.MODEL.PIXEL_MEAN = [92.0080866, 98.01352945, 121.7431208] # office
 73 | 
 74 |     cfg.MODEL.PIXEL_STD = [1, 1, 1]
 75 |     cfg.MODEL.MASK_ON = True
 76 | 
 77 |     # Voxel Head
 78 |     cfg.MODEL.VOXEL_ON = True
 79 |     cfg.MODEL.ROI_VOXEL_HEAD = CN()
 80 |     cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT = 0.75
 81 |     if office_train:
 82 |         cfg.MODEL.VOXEL_ON = True
 83 |         cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT = 0.015
 84 | 
 85 |     cfg.MODEL.ROI_VOXEL_HEAD.NAME = 'Pix2VoxDecoder'
 86 |     cfg.MODEL.ROI_VOXEL_HEAD.POOLER_RESOLUTION = 14
 87 |     cfg.MODEL.ROI_VOXEL_HEAD.POOLER_TYPE = "ROIAlign"
 88 |     cfg.MODEL.ROI_VOXEL_HEAD.POOLER_SAMPLING_RATIO = 0
 89 | 
 90 |     # Nocs Head
 91 |     cfg.MODEL.NOCS_ON = True
 92 |     if office_train:
 93 |         cfg.MODEL.NOCS_ON = False
 94 |     cfg.MODEL.ROI_NOCS_HEAD = CN()
 95 |     cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS = False # True for classification loss, False for smooth l1 loss
 96 |     cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS = 32
 97 |     if cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS:
 98 |         cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT = 0.2
 99 |     else:
100 |         cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT = 3
101 |     cfg.MODEL.ROI_NOCS_HEAD.IOU_THRES = 0.5
102 |     cfg.MODEL.ROI_NOCS_HEAD.NAME = 'NocsDecoder'
103 |     cfg.MODEL.ROI_NOCS_HEAD.POOLER_RESOLUTION = 14
104 |     cfg.MODEL.ROI_NOCS_HEAD.POOLER_TYPE = "ROIAlign"
105 |     cfg.MODEL.ROI_NOCS_HEAD.POOLER_SAMPLING_RATIO = 0
106 | 
107 |     # Solver Options
108 |     cfg.SOLVER.CHECKPOINT_PERIOD = 3000 #save model each n iterations
109 |     cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR"
110 |     cfg.SOLVER.STEPS = []  # decay learning rate
111 |     cfg.SOLVER.WARMUP_FACTOR = 1
112 |     cfg.SOLVER.WARMUP_ITERS = 0
113 |     cfg.SOLVER.WARMUP_METHOD = "linear"
114 |     cfg.SOLVER.GAMMA = 1
115 |     cfg.SOLVER.WEIGHT_DECAY = 0.0005 # L2-Regularization
116 |     cfg.SOLVER.IMS_PER_BATCH = 2 # Batch size
117 |     cfg.SOLVER.BASE_LR = 0.0008
118 |     cfg.SOLVER.MAX_ITER = 240000
119 | 
120 |     # Combined settings
121 |     if combined:
122 |         cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
123 |         cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.4  # Overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold)
124 |     elif office:
125 |         cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # higher more suppression
126 |         cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2  # lower more suppression, overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold)
127 | 
128 | 
129 |     cfg.OUTPUT_DIR = CONF.PATH.DETECTOUTPUT
130 | 
131 |     return cfg
132 | 
133 | 
134 | def inference_cfg(num_classes):
135 | 
136 |     train_cfg = init_cfg(num_classes)
137 |     train_cfg.MODEL.WEIGHTS = os.path.join(CONF.PATH.DETECTMODEL, "best_model.pth")  # path to the model we just trained
138 |     train_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
139 |     train_cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.4 # Overlap threshold used for non-maximum suppression (suppress boxes with IoU >= this threshold)
140 | 
141 |     return train_cfg
142 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libmesh/inside_mesh.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from .triangle_hash import TriangleHash as _TriangleHash
  3 | 
  4 | 
  5 | def check_mesh_contains(mesh, points, hash_resolution=512):
  6 |     intersector = MeshIntersector(mesh, hash_resolution)
  7 |     contains = intersector.query(points)
  8 |     return contains
  9 | 
 10 | 
 11 | class MeshIntersector:
 12 |     def __init__(self, mesh, resolution=512):
 13 |         triangles = mesh.vertices[mesh.faces].astype(np.float64)
 14 |         n_tri = triangles.shape[0]
 15 | 
 16 |         self.resolution = resolution
 17 |         self.bbox_min = triangles.reshape(3 * n_tri, 3).min(axis=0)
 18 |         self.bbox_max = triangles.reshape(3 * n_tri, 3).max(axis=0)
 19 |         # Tranlate and scale it to [0.5, self.resolution - 0.5]^3
 20 |         self.scale = (resolution - 1) / (self.bbox_max - self.bbox_min)
 21 |         self.translate = 0.5 - self.scale * self.bbox_min
 22 | 
 23 |         self._triangles = triangles = self.rescale(triangles)
 24 |         # assert(np.allclose(triangles.reshape(-1, 3).min(0), 0.5))
 25 |         # assert(np.allclose(triangles.reshape(-1, 3).max(0), resolution - 0.5))
 26 | 
 27 |         triangles2d = triangles[:, :, :2]
 28 |         self._tri_intersector2d = TriangleIntersector2d(
 29 |             triangles2d, resolution)
 30 | 
 31 |     def query(self, points):
 32 |         # Rescale points
 33 |         points = self.rescale(points)
 34 | 
 35 |         # placeholder result with no hits we'll fill in later
 36 |         contains = np.zeros(len(points), dtype=np.bool)
 37 | 
 38 |         # cull points outside of the axis aligned bounding box
 39 |         # this avoids running ray tests unless points are close
 40 |         inside_aabb = np.all(
 41 |             (0 <= points) & (points <= self.resolution), axis=1)
 42 |         if not inside_aabb.any():
 43 |             return contains
 44 | 
 45 |         # Only consider points inside bounding box
 46 |         mask = inside_aabb
 47 |         points = points[mask]
 48 | 
 49 |         # Compute intersection depth and check order
 50 |         points_indices, tri_indices = self._tri_intersector2d.query(points[:, :2])
 51 | 
 52 |         triangles_intersect = self._triangles[tri_indices]
 53 |         points_intersect = points[points_indices]
 54 | 
 55 |         depth_intersect, abs_n_2 = self.compute_intersection_depth(
 56 |             points_intersect, triangles_intersect)
 57 | 
 58 |         # Count number of intersections in both directions
 59 |         smaller_depth = depth_intersect >= points_intersect[:, 2] * abs_n_2
 60 |         bigger_depth = depth_intersect < points_intersect[:, 2] * abs_n_2
 61 |         points_indices_0 = points_indices[smaller_depth]
 62 |         points_indices_1 = points_indices[bigger_depth]
 63 | 
 64 |         nintersect0 = np.bincount(points_indices_0, minlength=points.shape[0])
 65 |         nintersect1 = np.bincount(points_indices_1, minlength=points.shape[0])
 66 |         
 67 |         # Check if point contained in mesh
 68 |         contains1 = (np.mod(nintersect0, 2) == 1)
 69 |         contains2 = (np.mod(nintersect1, 2) == 1)
 70 |         if (contains1 != contains2).any():
 71 |             print('Warning: contains1 != contains2 for some points.')
 72 |         contains[mask] = (contains1 & contains2)
 73 |         return contains
 74 | 
 75 |     def compute_intersection_depth(self, points, triangles):
 76 |         t1 = triangles[:, 0, :]
 77 |         t2 = triangles[:, 1, :]
 78 |         t3 = triangles[:, 2, :]
 79 | 
 80 |         v1 = t3 - t1
 81 |         v2 = t2 - t1
 82 |         # v1 = v1 / np.linalg.norm(v1, axis=-1, keepdims=True)
 83 |         # v2 = v2 / np.linalg.norm(v2, axis=-1, keepdims=True)
 84 | 
 85 |         normals = np.cross(v1, v2)
 86 |         alpha = np.sum(normals[:, :2] * (t1[:, :2] - points[:, :2]), axis=1)
 87 | 
 88 |         n_2 = normals[:, 2]
 89 |         t1_2 = t1[:, 2]
 90 |         s_n_2 = np.sign(n_2)
 91 |         abs_n_2 = np.abs(n_2)
 92 | 
 93 |         mask = (abs_n_2 != 0)
 94 |     
 95 |         depth_intersect = np.full(points.shape[0], np.nan)
 96 |         depth_intersect[mask] = \
 97 |             t1_2[mask] * abs_n_2[mask] + alpha[mask] * s_n_2[mask]
 98 | 
 99 |         # Test the depth:
100 |         # TODO: remove and put into tests
101 |         # points_new = np.concatenate([points[:, :2], depth_intersect[:, None]], axis=1)
102 |         # alpha = (normals * t1).sum(-1)
103 |         # mask = (depth_intersect == depth_intersect)
104 |         # assert(np.allclose((points_new[mask] * normals[mask]).sum(-1),
105 |         #                    alpha[mask]))
106 |         return depth_intersect, abs_n_2
107 | 
108 |     def rescale(self, array):
109 |         array = self.scale * array + self.translate
110 |         return array
111 | 
112 | 
113 | class TriangleIntersector2d:
114 |     def __init__(self, triangles, resolution=128):
115 |         self.triangles = triangles
116 |         self.tri_hash = _TriangleHash(triangles, resolution)
117 | 
118 |     def query(self, points):
119 |         point_indices, tri_indices = self.tri_hash.query(points)
120 |         point_indices = np.array(point_indices, dtype=np.int64)
121 |         tri_indices = np.array(tri_indices, dtype=np.int64)
122 |         points = points[point_indices]
123 |         triangles = self.triangles[tri_indices]
124 |         mask = self.check_triangles(points, triangles)
125 |         point_indices = point_indices[mask]
126 |         tri_indices = tri_indices[mask]
127 |         return point_indices, tri_indices
128 | 
129 |     def check_triangles(self, points, triangles):
130 |         contains = np.zeros(points.shape[0], dtype=np.bool)
131 |         A = triangles[:, :2] - triangles[:, 2:]
132 |         A = A.transpose([0, 2, 1])
133 |         y = points - triangles[:, 2]
134 | 
135 |         detA = A[:, 0, 0] * A[:, 1, 1] - A[:, 0, 1] * A[:, 1, 0]
136 |         
137 |         mask = (np.abs(detA) != 0.)
138 |         A = A[mask]
139 |         y = y[mask]
140 |         detA = detA[mask]
141 | 
142 |         s_detA = np.sign(detA)
143 |         abs_detA = np.abs(detA)
144 | 
145 |         u = (A[:, 1, 1] * y[:, 0] - A[:, 0, 1] * y[:, 1]) * s_detA
146 |         v = (-A[:, 1, 0] * y[:, 0] + A[:, 0, 0] * y[:, 1]) * s_detA
147 | 
148 |         sum_uv = u + v
149 |         contains[mask] = (
150 |             (0 < u) & (u < abs_detA) & (0 < v) & (v < abs_detA)
151 |             & (0 < sum_uv) & (sum_uv < abs_detA)
152 |         )
153 |         return contains
154 | 
155 | 


--------------------------------------------------------------------------------
/Tracking/options.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os, sys
  3 | import argparse
  4 | 
  5 | # the directory that options.py resides in
  6 | file_dir = os.path.dirname(__file__)
  7 | 
  8 | sys.path.append('..') #Hack add ROOT DIR
  9 | from baseconfig import CONF
 10 | 
 11 | 
 12 | class Options:
 13 |     def __init__(self):
 14 |         self.parser = argparse.ArgumentParser(
 15 |             description="Tracking options")
 16 | 
 17 |         # PATHS
 18 |         self.parser.add_argument("--base_dir",
 19 |                                  type=str,
 20 |                                  help="path to the training data",
 21 |                                  default=CONF.PATH.TRACKDATA)
 22 |         self.parser.add_argument("--log_dir",
 23 |                                  type=str,
 24 |                                  help="log directory",
 25 |                                  default=CONF.PATH.TRACKOUTPUT)
 26 | 
 27 |         # Network
 28 |         self.parser.add_argument("--use_graph",
 29 |                                  type=bool,
 30 |                                  help="Use Graph Neural Network for edge classification",
 31 |                                  default=True)
 32 |         self.parser.add_argument("--seq_len",
 33 |                                  type=int,
 34 |                                  help="Length of the input sequence",
 35 |                                  default=25)
 36 |         self.parser.add_argument("--no_pose",
 37 |                                  type=bool,
 38 |                                  help="Exclude pose for edge classification",
 39 |                                  default=False)
 40 |         self.parser.add_argument("--no_geo",
 41 |                                  type=bool,
 42 |                                  help="Exclude pose for edge classification",
 43 |                                  default=False)
 44 |         self.parser.add_argument("--rel_app",
 45 |                                  type=bool,
 46 |                                  help="Use a relative appearance feature for graph edges",
 47 |                                  default=False)
 48 |         self.parser.add_argument("--as_quaternion",
 49 |                                  type=bool,
 50 |                                  help="Use quaternion angles for rotation",
 51 |                                  default=False)
 52 |         self.parser.add_argument("--precompute_feats",
 53 |                                  type=bool,
 54 |                                  help="Precompute Siamese features and store as hdf5",
 55 |                                  default=False)
 56 | 
 57 | 
 58 |         # Model Parameters
 59 |         self.parser.add_argument("--learning_rate",
 60 |                                  type=float,
 61 |                                  help="learning rate",
 62 |                                  default=1e-3)
 63 |         self.parser.add_argument("--weight_decay", # L2 Regularization
 64 |                                  type=float,
 65 |                                  help="weight decay",
 66 |                                  default=1e-4) # 1e-4
 67 |         self.parser.add_argument("--num_epochs",
 68 |                                  type=int,
 69 |                                  help="number of epochs",
 70 |                                  default=100)
 71 |         self.parser.add_argument("--batch_size",
 72 |                                  type=int,
 73 |                                  help="batch size",
 74 |                                  default=2)
 75 |         self.parser.add_argument("--use_augmentation",
 76 |                                  type=bool,
 77 |                                  help="use data augmentation",
 78 |                                  default=False)
 79 |         self.parser.add_argument("--num_workers",
 80 |                                  type=int,
 81 |                                  help="number of dataloader workers",
 82 |                                  default=0)
 83 |         self.parser.add_argument("--use_triplet",
 84 |                                  type=bool,
 85 |                                  help="Use triplet loss for edge classification",
 86 |                                  default=False)
 87 |         self.parser.add_argument("--use_l1",
 88 |                                  type=bool,
 89 |                                  help="Use l1 loss for edge classification",
 90 |                                  default=False)
 91 | 
 92 |         # Logging
 93 |         self.parser.add_argument("--save_frequency",
 94 |                                  type=int,
 95 |                                  help="number of epochs between each save",
 96 |                                  default=15)
 97 | 
 98 |         self.parser.add_argument("--start_saving",
 99 |                                  type=int,
100 |                                  help="epoch start to save weights",
101 |                                  default=15)
102 | 
103 |         self.parser.add_argument("--start_saving_optimizer",
104 |                                  type=int,
105 |                                  help="epoch start to save weights",
106 |                                  default=14)
107 | 
108 |         self.parser.add_argument("--log_frequency",
109 |                                  type=int,
110 |                                  help="number of batches between each tensorboard log",
111 |                                  default=10)
112 | 
113 |         self.parser.add_argument("--save_model",
114 |                                  type=bool,
115 |                                  help="save model",
116 |                                  default=True)
117 | 
118 |         self.parser.add_argument("--resume",
119 |                                  type=bool,
120 |                                  help="resume training",
121 |                                  default=False)
122 | 
123 |         self.parser.add_argument("--load_weights_folder",
124 |                                  type=str,
125 |                                  help="folder of pretrain model",
126 |                                  default=os.path.join(file_dir, "model/pretrained"))
127 | 
128 |         self.parser.add_argument("--models_to_load",
129 |                                  type=list,
130 |                                  help="pretrained model to load",
131 |                                  default=['edge_classifier', 'edge_encoder', 'voxel_encoder', 'graph_net'])
132 | 
133 |     def parse(self):
134 |         self.options = self.parser.parse_args()
135 |         return self.options
136 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/libvoxelize/tribox2.h:
--------------------------------------------------------------------------------
  1 | /********************************************************/
  2 | /* AABB-triangle overlap test code                      */
  3 | /* by Tomas Akenine-M�ller                              */
  4 | /* Function: int triBoxOverlap(float boxcenter[3],      */
  5 | /*          float boxhalfsize[3],float triverts[3][3]); */
  6 | /* History:                                             */
  7 | /*   2001-03-05: released the code in its first version */
  8 | /*   2001-06-18: changed the order of the tests, faster */
  9 | /*                                                      */
 10 | /* Acknowledgement: Many thanks to Pierre Terdiman for  */
 11 | /* suggestions and discussions on how to optimize code. */
 12 | /* Thanks to David Hunt for finding a ">="-bug!         */
 13 | /********************************************************/
 14 | #include <math.h>
 15 | #include <stdio.h>
 16 | 
 17 | #define X 0
 18 | #define Y 1
 19 | #define Z 2
 20 | 
 21 | #define CROSS(dest,v1,v2) \
 22 |           dest[0]=v1[1]*v2[2]-v1[2]*v2[1]; \
 23 |           dest[1]=v1[2]*v2[0]-v1[0]*v2[2]; \
 24 |           dest[2]=v1[0]*v2[1]-v1[1]*v2[0];
 25 | 
 26 | #define DOT(v1,v2) (v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2])
 27 | 
 28 | #define SUB(dest,v1,v2) \
 29 |           dest[0]=v1[0]-v2[0]; \
 30 |           dest[1]=v1[1]-v2[1]; \
 31 |           dest[2]=v1[2]-v2[2];
 32 | 
 33 | #define FINDMINMAX(x0,x1,x2,min,max) \
 34 |   min = max = x0;   \
 35 |   if(x1<min) min=x1;\
 36 |   if(x1>max) max=x1;\
 37 |   if(x2<min) min=x2;\
 38 |   if(x2>max) max=x2;
 39 | 
 40 | int planeBoxOverlap(float normal[3],float d, float maxbox[3])
 41 | {
 42 |   int q;
 43 |   float vmin[3],vmax[3];
 44 |   for(q=X;q<=Z;q++)
 45 |   {
 46 |     if(normal[q]>0.0f)
 47 |     {
 48 |       vmin[q]=-maxbox[q];
 49 |       vmax[q]=maxbox[q];
 50 |     }
 51 |     else
 52 |     {
 53 |       vmin[q]=maxbox[q];
 54 |       vmax[q]=-maxbox[q];
 55 |     }
 56 |   }
 57 |   if(DOT(normal,vmin)+d>0.0f) return 0;
 58 |   if(DOT(normal,vmax)+d>=0.0f) return 1;
 59 | 
 60 |   return 0;
 61 | }
 62 | 
 63 | 
 64 | /*======================== X-tests ========================*/
 65 | #define AXISTEST_X01(a, b, fa, fb)             \
 66 |     p0 = a*v0[Y] - b*v0[Z];                    \
 67 |     p2 = a*v2[Y] - b*v2[Z];                    \
 68 |         if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
 69 |     rad = fa * boxhalfsize[Y] + fb * boxhalfsize[Z];   \
 70 |     if(min>rad || max<-rad) return 0;
 71 | 
 72 | #define AXISTEST_X2(a, b, fa, fb)              \
 73 |     p0 = a*v0[Y] - b*v0[Z];                    \
 74 |     p1 = a*v1[Y] - b*v1[Z];                    \
 75 |         if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
 76 |     rad = fa * boxhalfsize[Y] + fb * boxhalfsize[Z];   \
 77 |     if(min>rad || max<-rad) return 0;
 78 | 
 79 | /*======================== Y-tests ========================*/
 80 | #define AXISTEST_Y02(a, b, fa, fb)             \
 81 |     p0 = -a*v0[X] + b*v0[Z];                   \
 82 |     p2 = -a*v2[X] + b*v2[Z];                       \
 83 |         if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} \
 84 |     rad = fa * boxhalfsize[X] + fb * boxhalfsize[Z];   \
 85 |     if(min>rad || max<-rad) return 0;
 86 | 
 87 | #define AXISTEST_Y1(a, b, fa, fb)              \
 88 |     p0 = -a*v0[X] + b*v0[Z];                   \
 89 |     p1 = -a*v1[X] + b*v1[Z];                       \
 90 |         if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
 91 |     rad = fa * boxhalfsize[X] + fb * boxhalfsize[Z];   \
 92 |     if(min>rad || max<-rad) return 0;
 93 | 
 94 | /*======================== Z-tests ========================*/
 95 | 
 96 | #define AXISTEST_Z12(a, b, fa, fb)             \
 97 |     p1 = a*v1[X] - b*v1[Y];                    \
 98 |     p2 = a*v2[X] - b*v2[Y];                    \
 99 |         if(p2<p1) {min=p2; max=p1;} else {min=p1; max=p2;} \
100 |     rad = fa * boxhalfsize[X] + fb * boxhalfsize[Y];   \
101 |     if(min>rad || max<-rad) return 0;
102 | 
103 | #define AXISTEST_Z0(a, b, fa, fb)              \
104 |     p0 = a*v0[X] - b*v0[Y];                \
105 |     p1 = a*v1[X] - b*v1[Y];                    \
106 |         if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} \
107 |     rad = fa * boxhalfsize[X] + fb * boxhalfsize[Y];   \
108 |     if(min>rad || max<-rad) return 0;
109 | 
110 | int triBoxOverlap(float boxcenter[3],float boxhalfsize[3],float tri0[3], float tri1[3], float tri2[3])
111 | {
112 | 
113 |   /*    use separating axis theorem to test overlap between triangle and box */
114 |   /*    need to test for overlap in these directions: */
115 |   /*    1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */
116 |   /*       we do not even need to test these) */
117 |   /*    2) normal of the triangle */
118 |   /*    3) crossproduct(edge from tri, {x,y,z}-directin) */
119 |   /*       this gives 3x3=9 more tests */
120 |    float v0[3],v1[3],v2[3];
121 |    float min,max,d,p0,p1,p2,rad,fex,fey,fez;
122 |    float normal[3],e0[3],e1[3],e2[3];
123 | 
124 |    /* This is the fastest branch on Sun */
125 |    /* move everything so that the boxcenter is in (0,0,0) */
126 |    SUB(v0, tri0, boxcenter);
127 |    SUB(v1, tri1, boxcenter);
128 |    SUB(v2, tri2, boxcenter);
129 | 
130 |    /* compute triangle edges */
131 |    SUB(e0,v1,v0);      /* tri edge 0 */
132 |    SUB(e1,v2,v1);      /* tri edge 1 */
133 |    SUB(e2,v0,v2);      /* tri edge 2 */
134 | 
135 |    /* Bullet 3:  */
136 |    /*  test the 9 tests first (this was faster) */
137 |    fex = fabs(e0[X]);
138 |    fey = fabs(e0[Y]);
139 |    fez = fabs(e0[Z]);
140 |    AXISTEST_X01(e0[Z], e0[Y], fez, fey);
141 |    AXISTEST_Y02(e0[Z], e0[X], fez, fex);
142 |    AXISTEST_Z12(e0[Y], e0[X], fey, fex);
143 | 
144 |    fex = fabs(e1[X]);
145 |    fey = fabs(e1[Y]);
146 |    fez = fabs(e1[Z]);
147 |    AXISTEST_X01(e1[Z], e1[Y], fez, fey);
148 |    AXISTEST_Y02(e1[Z], e1[X], fez, fex);
149 |    AXISTEST_Z0(e1[Y], e1[X], fey, fex);
150 | 
151 |    fex = fabs(e2[X]);
152 |    fey = fabs(e2[Y]);
153 |    fez = fabs(e2[Z]);
154 |    AXISTEST_X2(e2[Z], e2[Y], fez, fey);
155 |    AXISTEST_Y1(e2[Z], e2[X], fez, fex);
156 |    AXISTEST_Z12(e2[Y], e2[X], fey, fex);
157 | 
158 |    /* Bullet 1: */
159 |    /*  first test overlap in the {x,y,z}-directions */
160 |    /*  find min, max of the triangle each direction, and test for overlap in */
161 |    /*  that direction -- this is equivalent to testing a minimal AABB around */
162 |    /*  the triangle against the AABB */
163 | 
164 |    /* test in X-direction */
165 |    FINDMINMAX(v0[X],v1[X],v2[X],min,max);
166 |    if(min>boxhalfsize[X] || max<-boxhalfsize[X]) return 0;
167 | 
168 |    /* test in Y-direction */
169 |    FINDMINMAX(v0[Y],v1[Y],v2[Y],min,max);
170 |    if(min>boxhalfsize[Y] || max<-boxhalfsize[Y]) return 0;
171 | 
172 |    /* test in Z-direction */
173 |    FINDMINMAX(v0[Z],v1[Z],v2[Z],min,max);
174 |    if(min>boxhalfsize[Z] || max<-boxhalfsize[Z]) return 0;
175 | 
176 |    /* Bullet 2: */
177 |    /*  test if the box intersects the plane of the triangle */
178 |    /*  compute plane equation of triangle: normal*x+d=0 */
179 |    CROSS(normal,e0,e1);
180 |    d=-DOT(normal,v0);  /* plane eq: normal.x+d=0 */
181 |    if(!planeBoxOverlap(normal,d,boxhalfsize)) return 0;
182 | 
183 |    return 1;   /* box and triangle overlaps */
184 | }
185 | 


--------------------------------------------------------------------------------
/Detection/roi_heads/voxel_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | 
  3 | import fvcore.nn.weight_init as weight_init
  4 | import sys
  5 | import torch
  6 | import numpy as np
  7 | #import matplotlib.pyplot as plt
  8 | 
  9 | from detectron2.layers import ShapeSpec, cat
 10 | from detectron2.utils.events import get_event_storage
 11 | from detectron2.utils.registry import Registry
 12 | from detectron2.structures import Boxes, BoxMode, pairwise_iou
 13 | from torch import nn
 14 | from torch.nn import functional as F
 15 | from typing import Dict
 16 | 
 17 | sys.path.append('..') #Hack add ROOT DIR
 18 | from Detection.inference.inference_metrics import compute_voxel_iou
 19 | from Detection.utils.train_utils import init_weights, balanced_BCE_loss
 20 | 
 21 | 
 22 | 
 23 | ROI_VOXEL_HEAD_REGISTRY = Registry("ROI_VOXEL_HEAD")
 24 | 
 25 | 
 26 | def voxel_loss(pred_voxel_logits, instances, pred_boxes, loss_weight=1, iou_thres=0.5):
 27 |     '''
 28 |     Calculate BCE loss between predicted 32³ voxel grid and GT voxel grid if IoU larger threshold
 29 |     '''
 30 | 
 31 |     start_instance = 0
 32 |     pred_voxel_logits = torch.squeeze(pred_voxel_logits, dim=1)  # Num obj x 32x32x32
 33 |     mean_voxel_iou = []
 34 |     loss_gt_voxels = []
 35 |     loss_pred_voxels = []
 36 | 
 37 | 
 38 |     for instances_per_image in instances:
 39 |         if len(instances_per_image) == 0:
 40 |             continue
 41 | 
 42 |         end_instance = start_instance + len(instances_per_image)
 43 | 
 44 |         gt_voxel_logits = instances_per_image.gt_voxels.to(dtype=torch.float)
 45 |         gt_boxes_per_image = instances_per_image.gt_boxes
 46 | 
 47 |         for i in range(start_instance, end_instance):
 48 | 
 49 |             abs_pred_box = pred_boxes[i, :].to(dtype=torch.int64)
 50 |             pred_box = Boxes(torch.unsqueeze(abs_pred_box, dim=0))  # XYXY
 51 | 
 52 |             pred_voxel = pred_voxel_logits[i,:,:,:]
 53 | 
 54 |             if torch.sum(pred_voxel) == 0: # empty detections
 55 |                 continue
 56 | 
 57 |             ious = pairwise_iou(gt_boxes_per_image, pred_box)
 58 |             idx_max_iou = int(torch.argmax(ious))
 59 |             max_iou = ious[idx_max_iou]
 60 | 
 61 |             if max_iou >= iou_thres:
 62 | 
 63 |                 gt_voxel = gt_voxel_logits[idx_max_iou,:,:,:]
 64 |                 voxel_iou = compute_voxel_iou(pred_voxel, gt_voxel)
 65 |                 mean_voxel_iou.append(voxel_iou)
 66 |                 loss_gt_voxels.append(torch.unsqueeze(gt_voxel, dim=0))
 67 |                 loss_pred_voxels.append(torch.unsqueeze(pred_voxel, dim=0))
 68 | 
 69 |         start_instance = end_instance
 70 | 
 71 |     if mean_voxel_iou:
 72 |         get_event_storage().put_scalar("training/voxel_iou", np.array(mean_voxel_iou).mean())
 73 | 
 74 |     gt_voxels = cat(loss_gt_voxels, dim=0)
 75 |     pred_voxels = cat(loss_pred_voxels, dim=0)
 76 | 
 77 |     assert pred_voxels.shape == gt_voxels.shape
 78 | 
 79 |     voxel_loss = balanced_BCE_loss(gt_voxels, pred_voxels)
 80 |     voxel_loss = voxel_loss * loss_weight
 81 | 
 82 |     return voxel_loss, gt_voxels
 83 | 
 84 | 
 85 | def voxel_inference(pred_voxel_logits, pred_instances): # shape Num obj x 1 x D x H x W, Num img x Instance class
 86 | 
 87 |     voxel_probs_pred = pred_voxel_logits
 88 |     num_boxes_per_image = [len(i) for i in pred_instances]
 89 | 
 90 |     if np.array(num_boxes_per_image).sum() == 0:
 91 |         print('No predicted instances found for batch...')
 92 |         return
 93 | 
 94 |     voxel_probs_pred = voxel_probs_pred.split(num_boxes_per_image, dim=0)
 95 | 
 96 |     # Assign predicted voxels   # instances and predictions different len -> moving idx
 97 |     for inst, prob in zip(pred_instances, voxel_probs_pred):
 98 | 
 99 |         if len(inst) == 0:
100 |             print('No predicted instances found ...')
101 |             continue
102 | 
103 |         if prob.sum() == 0: # sigmoid of 0 = 0.5 -< (prob.numel() * 0.5)
104 |             inst.pred_voxels = torch.tensor([]).cuda()
105 |         else:
106 |             inst.pred_voxels = torch.squeeze(prob, dim=1)  # (Num inst in 1 img, D, H, W)
107 | 
108 | 
109 | class Decoder(torch.nn.Module):
110 |     """
111 |     Decoder Module from Pix2Vox++ Implementation
112 |     """
113 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
114 |         super(Decoder, self).__init__()
115 | 
116 |         self.input_shape = input_shape
117 | 
118 |         # Layer Definition
119 |         self.layer1 = torch.nn.Sequential(
120 |             torch.nn.ConvTranspose3d(784, 512, kernel_size=3, stride=1, bias=False, padding=1),
121 |             torch.nn.BatchNorm3d(512),
122 |             torch.nn.ReLU()
123 |         )
124 |         self.layer2 = torch.nn.Sequential(
125 |             torch.nn.ConvTranspose3d(512, 128, kernel_size=4, stride=2, bias=False, padding=1),
126 |             torch.nn.BatchNorm3d(128),
127 |             torch.nn.ReLU()
128 |         )
129 |         self.layer3 = torch.nn.Sequential(
130 |             torch.nn.ConvTranspose3d(128, 32, kernel_size=4, stride=2, bias=False, padding=1),
131 |             torch.nn.BatchNorm3d(32),
132 |             torch.nn.ReLU()
133 |         )
134 |         self.layer4 = torch.nn.Sequential(
135 |             torch.nn.ConvTranspose3d(32, 8, kernel_size=4, stride=2, bias=False, padding=1),
136 |             torch.nn.BatchNorm3d(8),
137 |             torch.nn.ReLU()
138 |         )
139 |         self.layer5 = torch.nn.Sequential(
140 |             torch.nn.ConvTranspose3d(8, 1, kernel_size=1, bias=False),
141 |         )
142 | 
143 |     def forward(self, features):
144 |         """
145 |         """
146 |         num_obj = features.shape[0]
147 |         if num_obj != 0:
148 |             gen_volume = features.view(num_obj, -1, 4, 4, 4)
149 |             #print(gen_volume.size())   # torch.Size([num_obj, 784, 4, 4, 4])
150 |             gen_volume = self.layer1(gen_volume)
151 |             #print(gen_volume.size())   # torch.Size([num_obj, 512, 4, 4, 4])
152 |             gen_volume = self.layer2(gen_volume)
153 |             #print(gen_volume.size())   # torch.Size([num_obj, 128, 8, 8, 8])
154 |             gen_volume = self.layer3(gen_volume)
155 |             #print(gen_volume.size())   # torch.Size([num_obj, 32, 16, 16, 16])
156 |             gen_volume = self.layer4(gen_volume)
157 |             #print(gen_volume.size())   # torch.Size([num_obj, 8, 32, 32, 32])
158 |             gen_volume = self.layer5(gen_volume)
159 |             #print(gen_volume.size())   # torch.Size([num_obj, 1, 32, 32, 32])
160 |         else:
161 |             gen_volume = torch.zeros([1, 1, 32, 32, 32])
162 | 
163 |         return gen_volume
164 | 
165 | 
166 | @ROI_VOXEL_HEAD_REGISTRY.register()
167 | class Pix2VoxDecoder(nn.Module):
168 |     """
169 |     A voxel head with several conv layers, plus an upsample layer.
170 |     """
171 | 
172 |     def __init__(self, cfg, input_shape):
173 |         super(Pix2VoxDecoder, self).__init__()
174 | 
175 |         # Model
176 |         self.decoder = Decoder(cfg, input_shape)
177 |         #init_weights(self.decoder, init_type='kaiming', init_gain=0.02)
178 | 
179 | 
180 |     def forward(self, x):
181 | 
182 |         x = self.decoder(x) #Batchsize x channels x H x W
183 | 
184 |         return x
185 | 
186 | 
187 | def build_voxel_head(cfg, input_shape):
188 |     name = cfg.MODEL.ROI_VOXEL_HEAD.NAME
189 |     return ROI_VOXEL_HEAD_REGISTRY.get(name)(cfg, input_shape)
190 | 


--------------------------------------------------------------------------------
/Tracking/datasets/front_dataset.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import json
  6 | import h5py
  7 | import torch
  8 | import open3d as o3d
  9 | 
 10 | from torchvision import transforms
 11 | from torch.utils.data import Dataset
 12 | 
 13 | sys.path.append('..') #Hack add ROOT DIR
 14 | from baseconfig import CONF
 15 | 
 16 | class Front_dataset(Dataset):
 17 |     def __init__(self, base_dir, split='train', transform=None, with_scene_pc=False):
 18 |         self.transform = transform  # using transform in torch
 19 |         self.split = split
 20 |         self.scenes_skip = []
 21 |         self.data_dir = os.path.join(base_dir, self.split)
 22 |         self.hdf5_dir = os.path.join(CONF.PATH.DETECTDATA, self.split)
 23 |         self.scenes = [f for f in os.listdir(os.path.abspath(self.data_dir)) if f not in self.scenes_skip]
 24 |         self.json_dir = os.path.join(CONF.PATH.DETECTDATA, self.split)
 25 |         self.camera_intrinsics = np.array([[292.87803547399, 0, 0], [0, 292.87803547399, 0], [0, 0, 1]])
 26 |         self.with_scene_pc = with_scene_pc
 27 | 
 28 |     def __len__(self):
 29 |         return len(self.scenes)
 30 | 
 31 |     def __getitem__(self, idx):
 32 | 
 33 |         scene = self.scenes[idx]
 34 | 
 35 |         data_path = os.path.join(self.data_dir, scene)
 36 |         #json_path = os.path.join(self.json_dir, scene)
 37 | 
 38 |         unsorted_imgs = [f for f in os.listdir(os.path.abspath(data_path)) if 'feat' not in f]
 39 |         img_ints = [int(img[:-3]) for img in unsorted_imgs]
 40 |         imgs = [im for _, im in sorted(zip(img_ints, unsorted_imgs))]
 41 | 
 42 |         output = []
 43 |         for idx_, img in enumerate(imgs):
 44 | 
 45 |             # Load scan pointcloud
 46 |             if self.with_scene_pc:
 47 |                 hdf5_path = os.path.join(self.hdf5_dir, scene, str(idx_) + '.hdf5')
 48 |                 rgb_path = os.path.join(self.hdf5_dir, scene, 'coco_data', 'rgb_' + str(idx_).zfill(4) + '.png')
 49 |                 depth_map, campose, cx, cy = self.load_hdf5(hdf5_path)
 50 |                 self.camera_intrinsics[0, 2] = cx
 51 |                 self.camera_intrinsics[1, 2] = cy
 52 |                 rgb_img = self.load_rgb(rgb_path)
 53 |                 cam_rgb_pc = self.backproject_rgb(rgb_img, depth_map, self.camera_intrinsics)
 54 |                 world_pc = self.cam2world(cam_rgb_pc, campose)
 55 | 
 56 | 
 57 |             img_path = os.path.join(data_path, img)
 58 |             hf = h5py.File(img_path, 'r')
 59 | 
 60 |             # Unpack GT data
 61 |             gt_object_id = np.array(hf.get("gt_objid"))
 62 |             gt_voxels = np.array(hf.get("gt_voxels"))
 63 |             gt_3Dbbox = np.array(hf.get("gt_3Dbbox"))
 64 |             gt_locations = np.array(hf.get("gt_locations"))
 65 |             gt_rotations = np.array(hf.get("gt_rotations"))
 66 |             gt_compl_box = np.array(hf.get("gt_compl_box"))
 67 |             gt_scales = np.array(hf.get("gt_scales"))
 68 |             gt_classes = np.array(hf.get("gt_cls")) - 1 # -1 because predicted starts at 0 and gt at 1
 69 | 
 70 |             # Unpack predicted data
 71 |             classes = np.array(hf.get("classes")) #from 0 to 6
 72 |             objectness_scores = np.array(hf.get("objectness_scores"))
 73 |             rotations = np.array(hf.get("rotations"))
 74 |             translations = np.array(hf.get("translations"))
 75 |             scales = np.array(hf.get("scales"))
 76 |             voxels = np.array(hf.get("voxels"))
 77 |             pred_3Dbbox = np.array(hf.get("pred_3Dbbox"))
 78 | 
 79 |             img_dict = {'classes': torch.tensor(classes, dtype=torch.int),
 80 |                 'objectness_scores': objectness_scores,
 81 |                 'rotations': torch.tensor(rotations),
 82 |                 'translations': torch.tensor(translations),
 83 |                 'scales': torch.tensor(scales),
 84 |                 'voxels': torch.tensor(voxels),
 85 |                 'pred_3Dbbox': torch.tensor(pred_3Dbbox),
 86 |                 'gt_object_id': torch.tensor(gt_object_id),
 87 |                 'gt_locations': torch.tensor(gt_locations),
 88 |                 'gt_rotations': torch.tensor(gt_rotations),
 89 |                 'gt_3Dbbox': torch.tensor(gt_3Dbbox),
 90 |                 'gt_compl_box': torch.tensor(gt_compl_box),
 91 |                 'gt_scales': torch.tensor(gt_scales),
 92 |                 'gt_classes': torch.tensor(gt_classes),
 93 |                 'gt_voxels': gt_voxels,
 94 |                 'image': img,
 95 |                 'scene': scene
 96 |             }
 97 |             if self.with_scene_pc:
 98 |                 img_dict['world_pc'] = world_pc
 99 |             output.append(img_dict)
100 | 
101 |         return output # list of parameters of n images
102 | 
103 |     def load_hdf5(self, hdf5_path):
104 |         '''
105 |         Loads campose and depth map from an hdf5 file
106 |         returns additional camera intrinsics cx, cy
107 |         '''
108 | 
109 |         with h5py.File(hdf5_path, 'r') as data:
110 |             for key in data.keys():
111 |                 if key == 'depth':
112 |                     depth = np.array(data[key])
113 |                 elif key == 'campose':
114 |                     campose = np.array(data[key])
115 | 
116 |         img_width = depth.shape[1]
117 |         img_height = depth.shape[0]
118 | 
119 |         cx = (img_width / 2) - 0.5  # 0,0 is center top-left pixel -> -0,5
120 |         cy = (img_height / 2) - 0.5  # 0,0 is center top-left pixel -> -0,5
121 | 
122 |         return depth, campose, cx, cy
123 | 
124 |     def load_rgb(self, rgb_path):
125 |         '''
126 |         Loads a rgb image from a png file
127 |         '''
128 |         bgr_img = cv2.imread(rgb_path)
129 |         rgb_img = bgr_img[:, :, ::-1]
130 |         rgb_img = np.array(rgb_img, dtype=np.float32)
131 | 
132 |         return rgb_img
133 | 
134 |     def backproject_rgb(self, rgb, depth, intrinsics, debug_mode=False):
135 |         '''
136 |         Backproject depth map to camera space, with additional rgb values
137 |         Returns: Depth PC with according RGB values in camspace, used idxs in pixel space
138 |         '''
139 | 
140 |         intrinsics_inv = np.linalg.inv(intrinsics)
141 |         non_zero_mask = (depth > 0)
142 | 
143 |         idxs = np.where(non_zero_mask)
144 |         grid = np.array([idxs[1], idxs[0]])
145 | 
146 |         length = grid.shape[1]
147 |         ones = np.ones([1, length])
148 |         uv_grid = np.concatenate((grid, ones), axis=0)  # [3, num_pixel]
149 | 
150 |         xyz = intrinsics_inv @ uv_grid  # [3, num_pixel]
151 |         xyz = np.transpose(xyz)  # [num_pixel, 3]
152 | 
153 |         z = depth[idxs[0], idxs[1]]
154 | 
155 |         pts = xyz * z[:, np.newaxis] / xyz[:, -1:]
156 |         pts[:, 1] = -pts[:, 1]
157 |         pts[:, 2] = -pts[:, 2]
158 | 
159 |         rgb_vals = rgb[idxs[0], idxs[1]]
160 | 
161 |         rgb_pts = np.concatenate((pts, rgb_vals), axis=-1)
162 | 
163 |         if debug_mode:
164 |             depth_pc_obj = o3d.geometry.PointCloud()
165 |             nocs_origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1, origin=[0, 0, 0])
166 |             depth_pc_obj.points = o3d.utility.Vector3dVector(pts)
167 |             o3d.visualization.draw_geometries([depth_pc_obj, nocs_origin])
168 | 
169 |         return rgb_pts
170 | 
171 |     def cam2world(self, rgb_pts, campose):
172 |         '''
173 |         transform camera space pc to world space pc
174 |         '''
175 |         trans = campose[:3, 3:]
176 |         rot = campose[:3, :3]
177 | 
178 |         cam_pts = rgb_pts[:, :3]
179 |         world_pc = np.dot(rot, cam_pts.transpose()) + trans
180 |         world_pc = world_pc.transpose()
181 | 
182 |         rgb_world = np.concatenate((world_pc, rgb_pts[:, 3:]), axis=-1)
183 | 
184 |         return rgb_world


--------------------------------------------------------------------------------
/Tracking/utils/train_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import torch
  4 | import numpy as np
  5 | from scipy.spatial import ConvexHull
  6 | from torch.nn import init
  7 | 
  8 | import sys
  9 | 
 10 | def init_weights(net, init_type='normal', init_gain=0.02):
 11 |     """Initialize network weights.
 12 |     Parameters:
 13 |         net (network)   -- network to be initialized
 14 |         init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
 15 |         init_gain (float)    -- scaling factor for normal, xavier and orthogonal.
 16 |     """
 17 | 
 18 |     def init_func(m):  # define the initialization function
 19 |         classname = m.__class__.__name__
 20 |         if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
 21 |             if init_type == 'normal':
 22 |                 init.normal_(m.weight.data, 0.0, init_gain)
 23 | 
 24 |             elif init_type == 'xavier':
 25 |                 init.xavier_normal_(m.weight.data, gain=init_gain)
 26 | 
 27 |             elif init_type == 'kaiming':
 28 |                 init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
 29 | 
 30 |             elif init_type == 'orthogonal':
 31 |                 init.orthogonal_(m.weight.data, gain=init_gain)
 32 | 
 33 |             else:
 34 |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
 35 | 
 36 |             if hasattr(m, 'bias') and m.bias is not None:
 37 |                 init.constant_(m.bias.data, 0.0)
 38 |         elif classname.find(
 39 |                 'BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
 40 |             init.normal_(m.weight.data, 1.0, init_gain)
 41 |             init.constant_(m.bias.data, 0.0)
 42 | 
 43 |     print('initialize network with %s' % init_type)
 44 |     net.apply(init_func)  # apply the initialization function <init_func>
 45 | 
 46 | 
 47 | def get_quaternion_from_euler(roll, pitch, yaw):
 48 |     """
 49 |     Convert an Euler angle to a quaternion.
 50 | 
 51 |     Input
 52 |       :param roll: The roll (rotation around x-axis) angle in radians.
 53 |       :param pitch: The pitch (rotation around y-axis) angle in radians.
 54 |       :param yaw: The yaw (rotation around z-axis) angle in radians.
 55 | 
 56 |     Output
 57 |       :return qx, qy, qz, qw: The orientation in quaternion [x,y,z,w] format
 58 |     """
 59 |     qx = torch.sin(roll / 2) * torch.cos(pitch / 2) * torch.cos(yaw / 2) - torch.cos(roll / 2) * torch.sin(pitch / 2) * torch.sin(yaw / 2)
 60 |     qy = torch.cos(roll / 2) * torch.sin(pitch / 2) * torch.cos(yaw / 2) + torch.sin(roll / 2) * torch.cos(pitch / 2) * torch.sin(yaw / 2)
 61 |     qz = torch.cos(roll / 2) * torch.cos(pitch / 2) * torch.sin(yaw / 2) - torch.sin(roll / 2) * torch.sin(pitch / 2) * torch.cos(yaw / 2)
 62 |     qw = torch.cos(roll / 2) * torch.cos(pitch / 2) * torch.cos(yaw / 2) + torch.sin(roll / 2) * torch.sin(pitch / 2) * torch.sin(yaw / 2)
 63 |     quat_angles = torch.cat([torch.unsqueeze(qx, dim=-1), torch.unsqueeze(qy, dim=-1), torch.unsqueeze(qz, dim=-1), torch.unsqueeze(qw, dim=-1)], dim=-1)
 64 | 
 65 |     return quat_angles
 66 | 
 67 | def check_pair(pred_bbox, gt_bboxes, gt_ids, thres=0.01):
 68 | 
 69 |     ious = []
 70 |     for i in range(gt_bboxes.shape[0]):
 71 |         iou, _ = compute_3d_iou(pred_bbox, gt_bboxes[i,:,:])
 72 |         ious.append(iou)
 73 | 
 74 |     max_iou = np.array(ious).max()
 75 |     max_iou_idx = np.argmax(np.array(ious))
 76 |     if max_iou >= thres:
 77 |         obj_id = gt_ids[max_iou_idx]
 78 |     else:
 79 |         obj_id = None
 80 | 
 81 |     return obj_id
 82 | 
 83 | def compute_3d_iou(corners1, corners2):
 84 | 
 85 |     # corner points are in counter clockwise order
 86 |     rect1 = [(corners1[i, 0], corners1[i, 2]) for i in range(3, -1, -1)]
 87 |     rect2 = [(corners2[i, 0], corners2[i, 2]) for i in range(3, -1, -1)]
 88 | 
 89 |     area1 = poly_area(np.array(rect1)[:, 0], np.array(rect1)[:, 1])
 90 |     area2 = poly_area(np.array(rect2)[:, 0], np.array(rect2)[:, 1])
 91 | 
 92 |     inter, inter_area = convex_hull_intersection(rect1, rect2)
 93 |     iou_2d = inter_area / (area1 + area2 - inter_area)
 94 |     ymax = min(corners1[0, 1], corners2[0, 1])
 95 |     ymin = max(corners1[4, 1], corners2[4, 1])
 96 | 
 97 |     inter_vol = inter_area * max(0.0, ymax - ymin)
 98 | 
 99 |     vol1 = box3d_vol(corners1)
100 |     vol2 = box3d_vol(corners2)
101 |     iou = inter_vol / (vol1 + vol2 - inter_vol)
102 | 
103 |     return iou, iou_2d
104 | 
105 | # Helper functions --------------------------------------------
106 | 
107 | def poly_area(x,y):
108 |     return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
109 | 
110 | def convex_hull_intersection(p1, p2):
111 |     """ Compute area of two convex hull's intersection area.
112 |         p1,p2 are a list of (x,y) tuples of hull vertices.
113 |         return a list of (x,y) for the intersection and its volume
114 |     """
115 |     inter_p = polygon_clip(p1, p2)
116 |     if inter_p is not None:
117 |         hull_inter = ConvexHull(inter_p)
118 |         return inter_p, hull_inter.volume
119 |     else:
120 |         return None, 0.0
121 | 
122 | def box3d_vol(corners):
123 | 
124 |     ''' corners: (8,3) no assumption on axis direction '''
125 | 
126 |     a = torch.sqrt(torch.sum((corners[0,:] - corners[1,:])**2))
127 |     b = torch.sqrt(torch.sum((corners[1,:] - corners[2,:])**2))
128 |     c = torch.sqrt(torch.sum((corners[0,:] - corners[4,:])**2))
129 |     return a*b*c
130 | 
131 | 
132 | def polygon_clip(subjectPolygon, clipPolygon):
133 |     """ Clip a polygon with another polygon.
134 |     Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
135 |     Args:
136 |       subjectPolygon: a list of (x,y) 2d points, any polygon.
137 |       clipPolygon: a list of (x,y) 2d points, has to be *convex*
138 |     Note:
139 |       **points have to be counter-clockwise ordered**
140 |     Return:
141 |       a list of (x,y) vertex point for the intersection polygon.
142 |     """
143 | 
144 |     def inside(p):
145 |         return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
146 | 
147 |     def computeIntersection():
148 |         dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
149 |         dp = [s[0] - e[0], s[1] - e[1]]
150 |         n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
151 |         n2 = s[0] * e[1] - s[1] * e[0]
152 |         n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
153 |         return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
154 | 
155 |     outputList = subjectPolygon
156 |     cp1 = clipPolygon[-1]
157 | 
158 |     for clipVertex in clipPolygon:
159 |         cp2 = clipVertex
160 |         inputList = outputList
161 |         outputList = []
162 |         s = inputList[-1]
163 | 
164 |         for subjectVertex in inputList:
165 |             e = subjectVertex
166 |             if inside(e):
167 |                 if not inside(s):
168 |                     outputList.append(computeIntersection())
169 |                 outputList.append(e)
170 |             elif inside(s):
171 |                 outputList.append(computeIntersection())
172 |             s = e
173 |         cp1 = cp2
174 |         if len(outputList) == 0:
175 |             return None
176 |     return (outputList)
177 | 
178 | def sec_to_hm_str(t):
179 |     """Convert time in seconds to a nice string
180 |     e.g. 10239 -> '02h50m39s'
181 |     """
182 |     h, m, s = sec_to_hm(t)
183 |     return "{:02d}h{:02d}m{:02d}s".format(h, m, s)
184 | 
185 | 
186 | def sec_to_hm(t):
187 |     """Convert time in seconds to time in hours, minutes and seconds
188 |     e.g. 10239 -> (2, 50, 39)
189 |     """
190 |     t = int(t)
191 |     s = t % 60
192 |     t //= 60
193 |     m = t % 60
194 |     t //= 60
195 |     return t, m, s
196 | 
197 | def convert_voxel_to_pc(voxel_grid, rot, trans, scale):
198 |     '''
199 |     Converts a voxel grid to a point cloud with according pose
200 |     voxel_grid: 32x32x32 tensor binary
201 |     rot, trans, scale: output from run pose function
202 |     returns pc: n x 3 array
203 |     '''
204 | 
205 |     nonzero_inds = np.nonzero(voxel_grid)[:-1]
206 |     points = nonzero_inds / 32 - 0.5
207 |     points = points.detach().cpu().numpy()
208 | 
209 |     global_scalerot = (np.identity(3) * scale.copy()) @ rot
210 |     world_pc = global_scalerot @ points.transpose() + np.expand_dims(trans.copy(), axis=-1)
211 |     world_pc = world_pc.transpose()
212 | 
213 |     return world_pc
214 | 


--------------------------------------------------------------------------------
/Detection/train_net.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os, sys, shutil
  3 | import torch
  4 | import roi_heads #Required for call register()
  5 | from collections import OrderedDict
  6 | import detectron2.utils.comm as comm
  7 | from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
  8 | 
  9 | from detectron2.engine import default_argument_parser, default_writers, launch
 10 | from detectron2.evaluation import print_csv_format
 11 | 
 12 | from detectron2.engine import DefaultTrainer
 13 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 14 | 
 15 | from detectron2.modeling import build_model
 16 | from detectron2.solver import build_lr_scheduler, build_optimizer
 17 | from detectron2.utils.events import EventStorage
 18 | 
 19 | from register_dataset import RegisterDataset
 20 | from data.mapper_heads import VoxNocsMapper
 21 | from evaluator.FrontEvaluator import FrontEvaluator
 22 | from evaluator.CocoEvaluator import COCOEvaluator
 23 | from evaluator.EvaluatorUtils import inference_on_dataset_voxnocs, inference_on_dataset_coco
 24 | from Utility.analyse_datset import get_dataset_info
 25 | from cfg_setup import init_cfg
 26 | 
 27 | sys.path.append('..') #Hack add ROOT DIR
 28 | from baseconfig import CONF
 29 | 
 30 | 
 31 | logger = logging.getLogger("front_logger")
 32 | 
 33 | 
 34 | class FrontTrainer(DefaultTrainer):
 35 |     '''
 36 |     Main Detectron2 MOTFront network training class
 37 |     '''
 38 | 
 39 |     @classmethod
 40 |     def build_evaluator_coco(cls, cfg, dataset_name, output_folder=None):
 41 |         if output_folder is None:
 42 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
 43 |         return COCOEvaluator(dataset_name, ('bbox', 'segm'), True, output_folder)
 44 | 
 45 |     @classmethod
 46 |     def build_evaluator_voxnocs(cls, cfg, dataset_name, output_folder=None):
 47 |         if output_folder is None:
 48 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
 49 |         return FrontEvaluator(dataset_name, ('vox', 'nocs'), True, output_folder)
 50 | 
 51 |     @classmethod
 52 |     def build_fronttest_loader(cls, cfg):
 53 |         dataset_names = cfg.DATASETS.TEST[0]
 54 |         return build_detection_test_loader(
 55 |             cfg, dataset_names, mapper=VoxNocsMapper(cfg, is_train=False, dataset_names=dataset_names)
 56 |         )
 57 | 
 58 |     @classmethod
 59 |     def build_train_loader(cls, cfg):
 60 |         dataset_names = cfg.DATASETS.TRAIN[0]
 61 |         return build_detection_train_loader(
 62 |             cfg, mapper=VoxNocsMapper(cfg, is_train=True, dataset_names=dataset_names)
 63 |         )
 64 | 
 65 |     @classmethod
 66 |     def do_test(cls, cfg, model, save_img_pred=False):
 67 |         print('Evaluation starts...')
 68 |         results = OrderedDict()
 69 | 
 70 |         for dataset_name in cfg.DATASETS.TEST:
 71 | 
 72 |             data_loader = cls.build_fronttest_loader(cfg)
 73 |             evaluator_voxnocs = cls.build_evaluator_voxnocs(cfg, dataset_name)
 74 |             results_voxnocs = inference_on_dataset_voxnocs(model, data_loader, evaluator_voxnocs, logger, cfg, save_img_pred)
 75 | 
 76 |             evaluator_coco = cls.build_evaluator_coco(cfg, dataset_name)
 77 |             results_coco = inference_on_dataset_coco(model, data_loader, evaluator_coco, logger)
 78 | 
 79 |             results_coco['voxel'] = results_voxnocs['voxel']
 80 |             results_coco['nocs'] = results_voxnocs['nocs']
 81 | 
 82 |             results[dataset_name] = results_coco
 83 |             if comm.is_main_process():
 84 |                 assert isinstance(results_coco, dict), "Evaluator must return a dict on the main process. Got {} instead.".format(results_coco)
 85 |                 logger.info("Evaluation results for {} in csv format:".format(dataset_name))
 86 |                 print_csv_format(results_coco)
 87 | 
 88 |         if len(results) == 1:
 89 |             results = list(results.values())[0]
 90 |         return results
 91 | 
 92 |     @classmethod
 93 |     def do_train(cls, cfg, model, resume=False):
 94 |         print('Training starts...')
 95 |         model.train()
 96 |         optimizer = build_optimizer(cfg, model)
 97 |         scheduler = build_lr_scheduler(cfg, optimizer)
 98 | 
 99 |         checkpointer = DetectionCheckpointer(
100 |             model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler
101 |         )
102 |         start_iter = (
103 |                 checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1
104 |         )
105 |         max_iter = cfg.SOLVER.MAX_ITER
106 | 
107 |         periodic_checkpointer = PeriodicCheckpointer(
108 |             checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter
109 |         )
110 | 
111 |         writers = default_writers(cfg.OUTPUT_DIR, max_iter) if comm.is_main_process() else []
112 | 
113 |         data_loader = cls.build_train_loader(cfg)
114 |         logger.info("Starting training from iteration {}".format(start_iter))
115 |         with EventStorage(start_iter) as storage:
116 |             for data, iteration in zip(data_loader, range(start_iter, max_iter)):
117 |                 storage.iter = iteration
118 | 
119 |                 loss_dict = model(data)
120 | 
121 |                 losses = sum(loss_dict.values())
122 | 
123 |                 if (iteration + 1) % 100 == 0:
124 |                     print('Iteration ', iteration+1,' of ', max_iter, ' , Training Loss: ', losses.detach().cpu().item())
125 | 
126 |                 assert torch.isfinite(losses).all(), loss_dict
127 | 
128 |                 loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
129 |                 losses_reduced = sum(loss for loss in loss_dict_reduced.values())
130 |                 if comm.is_main_process():
131 |                     storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)
132 | 
133 |                 optimizer.zero_grad()
134 |                 losses.backward()
135 |                 optimizer.step()
136 |                 storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
137 |                 scheduler.step()
138 | 
139 |                 if (cfg.TEST.EVAL_PERIOD > 0 and (iteration + 1) % (cfg.TEST.IMG_SAVE_FREQ * cfg.TEST.EVAL_PERIOD) == 0 and iteration != max_iter - 1 and (iteration+1) >= cfg.TEST.START_EVAL):
140 |                     cls.do_test(cfg, model, save_img_pred=True)
141 |                     comm.synchronize()
142 |                 elif (cfg.TEST.EVAL_PERIOD > 0 and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0 and iteration != max_iter - 1 and (iteration+1) >= cfg.TEST.START_EVAL):
143 |                     cls.do_test(cfg, model, save_img_pred=False)
144 |                     comm.synchronize()
145 | 
146 |                 if iteration - start_iter > 5 and ((iteration + 1) % 20 == 0 or iteration == max_iter - 1):
147 |                     for writer in writers:
148 |                         writer.write()
149 |                 periodic_checkpointer.step(iteration)
150 | 
151 | ## ------------------------------ Static Functions --------------------------------------------------------------------
152 | def setup():
153 |     TRAIN_IMG_DIR = CONF.PATH.DETECTTRAIN
154 |     mapping_list, name_list = get_dataset_info(TRAIN_IMG_DIR)
155 |     mapping_list, name_list = zip(*sorted(zip(mapping_list, name_list)))
156 | 
157 |     num_classes = len(mapping_list)
158 |     cfg = init_cfg(num_classes)
159 |     return cfg, mapping_list, name_list
160 | 
161 | 
162 | def main(args):
163 |     cfg, mapping_list, name_list = setup()
164 |     print('Existing Classes :', name_list)
165 | 
166 |     register_cls = RegisterDataset(mapping_list, name_list)
167 |     register_cls.reg_dset()
168 | 
169 |     # Visualise annotations for debugging
170 |     # register_cls.eval_annotation()
171 | 
172 |     # Remove old files
173 |     if os.path.exists(CONF.PATH.DETECTOUTPUT):
174 |         print('Removing old outputs ...')
175 |         shutil.rmtree(CONF.PATH.DETECTOUTPUT)
176 | 
177 |     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
178 | 
179 |     model = build_model(cfg)
180 |     logger.info("Model:\n{}".format(model))
181 |     if args.eval_only:
182 |         print('ONLY EVALUATION')
183 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
184 |             cfg.MODEL.WEIGHTS, resume=args.resume
185 |         )
186 |         return FrontTrainer.do_test(cfg, model, False)
187 | 
188 |     FrontTrainer.do_train(cfg, model, resume=args.resume)
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     args = default_argument_parser().parse_args()
193 |     print("Command Line Args:", args)
194 |     launch(
195 |         main,
196 |         args.num_gpus,
197 |         num_machines=args.num_machines,
198 |         machine_rank=args.machine_rank,
199 |         dist_url=args.dist_url,
200 |         args=(args,),
201 |     )
202 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: 3dmot
  2 | channels:
  3 |   - pyg
  4 |   - anaconda
  5 |   - pytorch
  6 |   - conda-forge
  7 |   - defaults
  8 | dependencies:
  9 |   - _libgcc_mutex=0.1=main
 10 |   - _openmp_mutex=4.5=1_gnu
 11 |   - binutils_impl_linux-64=2.31.1=h6176602_1
 12 |   - binutils_linux-64=2.31.1=h6176602_9
 13 |   - blas=1.0=mkl
 14 |   - blosc=1.21.0=h8c45485_0
 15 |   - brotli=1.0.9=he6710b0_2
 16 |   - brotlipy=0.7.0=py38h497a2fe_1001
 17 |   - brunsli=0.1=h2531618_0
 18 |   - bzip2=1.0.8=h7b6447c_0
 19 |   - c-ares=1.18.1=h7f8727e_0
 20 |   - ca-certificates=2021.10.8=ha878542_0
 21 |   - certifi=2021.10.8=py38h578d9bd_1
 22 |   - cfitsio=3.470=hf0d0db6_6
 23 |   - charls=2.2.0=h2531618_0
 24 |   - charset-normalizer=2.0.10=pyhd8ed1ab_0
 25 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 26 |   - colorama=0.4.4=pyh9f0ad1d_0
 27 |   - cryptography=35.0.0=py38ha5dfef3_0
 28 |   - cudatoolkit=10.2.89=hfd86e86_1
 29 |   - cytoolz=0.11.0=py38h7b6447c_0
 30 |   - dask-core=2021.10.0=pyhd3eb1b0_0
 31 |   - ffmpeg=4.3=hf484d3e_0
 32 |   - fonttools=4.25.0=pyhd3eb1b0_0
 33 |   - freetype=2.11.0=h70c0345_0
 34 |   - fsspec=2022.1.0=pyhd3eb1b0_0
 35 |   - gcc_impl_linux-64=7.3.0=habb00fd_1
 36 |   - gcc_linux-64=7.3.0=h553295d_9
 37 |   - giflib=5.2.1=h7b6447c_0
 38 |   - gmp=6.2.1=h2531618_2
 39 |   - gnutls=3.6.15=he1e5248_0
 40 |   - googledrivedownloader=0.4=pyhd3deb0d_1
 41 |   - gxx_impl_linux-64=7.3.0=hdf63c60_1
 42 |   - gxx_linux-64=7.3.0=h553295d_9
 43 |   - imagecodecs=2021.8.26=py38h4cda21f_0
 44 |   - imageio=2.9.0=pyhd3eb1b0_0
 45 |   - intel-openmp=2021.4.0=h06a4308_3561
 46 |   - jpeg=9d=h7f8727e_0
 47 |   - jxrlib=1.1=h7b6447c_2
 48 |   - kiwisolver=1.3.1=py38h2531618_0
 49 |   - krb5=1.19.2=hac12032_0
 50 |   - lame=3.100=h7b6447c_0
 51 |   - lcms2=2.12=h3be6417_0
 52 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 53 |   - lerc=3.0=h295c915_0
 54 |   - libaec=1.0.4=he6710b0_1
 55 |   - libcurl=7.80.0=h0b77cf5_0
 56 |   - libdeflate=1.8=h7f8727e_5
 57 |   - libedit=3.1.20210910=h7f8727e_0
 58 |   - libev=4.33=h7f8727e_1
 59 |   - libffi=3.3=he6710b0_2
 60 |   - libgcc=7.2.0=h69d50b8_2
 61 |   - libgcc-ng=9.3.0=h5101ec6_17
 62 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
 63 |   - libgfortran4=7.5.0=ha8ba4b0_17
 64 |   - libgomp=9.3.0=h5101ec6_17
 65 |   - libiconv=1.15=h63c8f33_5
 66 |   - libidn2=2.3.2=h7f8727e_0
 67 |   - libnghttp2=1.46.0=hce63b2e_0
 68 |   - libopenblas=0.3.2=h5a2b251_1
 69 |   - libpng=1.6.37=hbc83047_0
 70 |   - libssh2=1.9.0=h1ba5d50_1
 71 |   - libstdcxx-ng=9.3.0=hd4cf53a_17
 72 |   - libtasn1=4.16.0=h27cfd23_0
 73 |   - libtiff=4.2.0=h85742a9_0
 74 |   - libunistring=0.9.10=h27cfd23_0
 75 |   - libuv=1.40.0=h7b6447c_0
 76 |   - libwebp=1.2.0=h89dd481_0
 77 |   - libwebp-base=1.2.0=h27cfd23_0
 78 |   - libzopfli=1.0.3=he6710b0_0
 79 |   - locket=0.2.1=py38h06a4308_1
 80 |   - lz4-c=1.9.3=h295c915_1
 81 |   - matplotlib-base=3.5.0=py38h3ed280b_0
 82 |   - mkl=2021.4.0=h06a4308_640
 83 |   - mkl-service=2.4.0=py38h7f8727e_0
 84 |   - mkl_fft=1.3.1=py38hd3c417c_0
 85 |   - mkl_random=1.2.2=py38h51133e4_0
 86 |   - munkres=1.1.4=py_0
 87 |   - ncurses=6.3=h7f8727e_2
 88 |   - nettle=3.7.3=hbbd107a_1
 89 |   - networkx=2.6.3=pyhd3eb1b0_0
 90 |   - numpy=1.21.2=py38h20f2e39_0
 91 |   - numpy-base=1.21.2=py38h79a1101_0
 92 |   - olefile=0.46=pyhd3eb1b0_0
 93 |   - openblas-devel=0.3.2=0
 94 |   - openh264=2.1.1=h4ff587b_0
 95 |   - openjpeg=2.4.0=h3ad879b_0
 96 |   - openssl=1.1.1m=h7f8727e_0
 97 |   - partd=1.2.0=pyhd3eb1b0_0
 98 |   - pillow=8.4.0=py38h5aabda8_0
 99 |   - pip=21.2.4=py38h06a4308_0
100 |   - pycparser=2.21=pyhd8ed1ab_0
101 |   - pyg=2.0.3=py38_torch_1.10.0_cu102
102 |   - pyopenssl=21.0.0=pyhd8ed1ab_0
103 |   - pyparsing=3.0.4=pyhd3eb1b0_0
104 |   - pysocks=1.7.1=py38h578d9bd_4
105 |   - python=3.8.12=h12debd9_0
106 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
107 |   - python-louvain=0.15=pyhd3deb0d_0
108 |   - python_abi=3.8=2_cp38
109 |   - pytorch=1.10.1=py3.8_cuda10.2_cudnn7.6.5_0
110 |   - pytorch-cluster=1.5.9=py38_torch_1.10.0_cu102
111 |   - pytorch-mutex=1.0=cuda
112 |   - pytorch-scatter=2.0.9=py38_torch_1.10.0_cu102
113 |   - pytorch-sparse=0.6.12=py38_torch_1.10.0_cu102
114 |   - pytorch-spline-conv=1.2.1=py38_torch_1.10.0_cu102
115 |   - pywavelets=1.1.1=py38h7b6447c_2
116 |   - readline=8.1.2=h7f8727e_1
117 |   - scikit-image=0.18.1=py38ha9443f7_0
118 |   - scikit-learn=1.0.2=py38h51133e4_1
119 |   - setuptools=58.0.4=py38h06a4308_0
120 |   - six=1.16.0=pyhd3eb1b0_0
121 |   - snappy=1.1.8=he6710b0_0
122 |   - sqlite=3.37.0=hc218d9a_0
123 |   - tk=8.6.11=h1ccaba5_0
124 |   - toolz=0.11.2=pyhd3eb1b0_0
125 |   - torchaudio=0.10.1=py38_cu102
126 |   - torchvision=0.11.2=py38_cu102
127 |   - typing_extensions=3.10.0.2=pyh06a4308_0
128 |   - wheel=0.37.1=pyhd3eb1b0_0
129 |   - xz=5.2.5=h7b6447c_0
130 |   - yaml=0.2.5=h7b6447c_0
131 |   - zfp=0.5.5=h295c915_6
132 |   - zlib=1.2.11=h7f8727e_4
133 |   - zstd=1.4.9=haebb681_0
134 |   - pip:
135 |     - absl-py==0.12.0
136 |     - addict==2.4.0
137 |     - antlr4-python3-runtime==4.8
138 |     - anyio==3.2.1
139 |     - appdirs==1.4.4
140 |     - argon2-cffi==20.1.0
141 |     - async-generator==1.10
142 |     - attrs==20.3.0
143 |     - babel==2.9.1
144 |     - backcall==0.2.0
145 |     - black==21.4b2
146 |     - bleach==3.3.0
147 |     - block-timer==0.2.0
148 |     - cachetools==4.2.2
149 |     - cffi==1.14.5
150 |     - chamferdist==1.0.0
151 |     - click==8.0.3
152 |     - cycler==0.10.0
153 |     - cython==0.29.23
154 |     - defusedxml==0.7.1
155 |     - deprecation==2.1.0
156 |     - detectron2==0.6+cu102
157 |     - dnspython==2.2.0
158 |     - dvis==0.8.2.2
159 |     - easydict==1.9
160 |     - entrypoints==0.3
161 |     - eulerangles==1.0.2
162 |     - eventlet==0.33.0
163 |     - flask==1.1.2
164 |     - flask-socketio==4.3.0
165 |     - future==0.18.2
166 |     - fvcore==0.1.5.post20220212
167 |     - google-auth==1.30.0
168 |     - google-auth-oauthlib==0.4.4
169 |     - greenlet==1.1.2
170 |     - grpcio==1.37.0
171 |     - h5py==3.2.1
172 |     - hydra-core==1.1.1
173 |     - idna==2.10
174 |     - importlib-resources==5.4.0
175 |     - iopath==0.1.8
176 |     - ipykernel==5.5.3
177 |     - ipython==7.22.0
178 |     - ipython-genutils==0.2.0
179 |     - ipywidgets==7.6.3
180 |     - jedi==0.18.0
181 |     - jinja2==2.11.3
182 |     - joblib==1.0.1
183 |     - json5==0.9.6
184 |     - jsonpatch==1.32
185 |     - jsonpointer==2.2
186 |     - jsonschema==3.2.0
187 |     - jupyter-client==6.1.12
188 |     - jupyter-core==4.7.1
189 |     - jupyter-packaging==0.10.3
190 |     - jupyter-server==1.9.0
191 |     - jupyterlab==3.0.16
192 |     - jupyterlab-pygments==0.1.2
193 |     - jupyterlab-server==2.6.0
194 |     - jupyterlab-widgets==1.0.0
195 |     - kornia==0.5.8
196 |     - markdown==3.3.4
197 |     - markupsafe==1.1.1
198 |     - mathutils==2.81.2
199 |     - matplotlib==3.4.1
200 |     - minkowskiengine==0.5.4
201 |     - mistune==0.8.4
202 |     - motmetrics==1.2.0
203 |     - mypy-extensions==0.4.3
204 |     - natsort==7.1.1
205 |     - nbclassic==0.3.1
206 |     - nbclient==0.5.3
207 |     - nbconvert==6.0.7
208 |     - nbformat==5.1.3
209 |     - nest-asyncio==1.5.1
210 |     - ninja==1.10.2.3
211 |     - notebook==6.3.0
212 |     - oauthlib==3.1.0
213 |     - omegaconf==2.1.1
214 |     - open3d==0.10.0.0
215 |     - opencv-python==4.5.3.56
216 |     - packaging==20.9
217 |     - pandas==1.3.0
218 |     - pandocfilters==1.4.3
219 |     - parso==0.8.2
220 |     - pathspec==0.9.0
221 |     - pexpect==4.8.0
222 |     - pickleshare==0.7.5
223 |     - plotly==5.2.1
224 |     - portalocker==2.3.0
225 |     - prometheus-client==0.10.1
226 |     - prompt-toolkit==3.0.18
227 |     - protobuf==3.15.8
228 |     - ptyprocess==0.7.0
229 |     - pyasn1==0.4.8
230 |     - pyasn1-modules==0.2.8
231 |     - pycocotools==2.0.2
232 |     - pydot==1.4.2
233 |     - pygments==2.8.1
234 |     - pyrsistent==0.17.3
235 |     - python-engineio==3.14.2
236 |     - python-socketio==4.6.1
237 |     - pytz==2021.1
238 |     - pyyaml==5.4.1
239 |     - pyzmq==22.0.3
240 |     - regex==2022.1.18
241 |     - requests==2.25.1
242 |     - requests-oauthlib==1.3.0
243 |     - requests-unixsocket==0.2.0
244 |     - rsa==4.7.2
245 |     - scipy==1.6.3
246 |     - send2trash==1.5.0
247 |     - simple-websocket==0.2.0
248 |     - sniffio==1.2.0
249 |     - tabulate==0.8.9
250 |     - tenacity==8.0.1
251 |     - tensorboard==2.5.0
252 |     - tensorboard-data-server==0.6.0
253 |     - tensorboard-plugin-wit==1.8.0
254 |     - tensorboardx==2.2
255 |     - termcolor==1.1.0
256 |     - terminado==0.9.4
257 |     - testpath==0.4.4
258 |     - threadpoolctl==2.1.0
259 |     - tifffile==2021.4.8
260 |     - toml==0.10.2
261 |     - tomlkit==0.7.2
262 |     - torchfile==0.1.0
263 |     - tornado==6.1
264 |     - tqdm==4.60.0
265 |     - traitlets==5.0.5
266 |     - trescope==0.0.1
267 |     - trimesh==3.9.20
268 |     - urllib3==1.26.4
269 |     - visdom==0.1.8.9
270 |     - vision3d==0.5
271 |     - wcwidth==0.2.5
272 |     - webencodings==0.5.1
273 |     - websocket-client==1.1.0
274 |     - werkzeug==1.0.1
275 |     - widgetsnbextension==3.5.1
276 |     - wsproto==1.0.0
277 |     - xmltodict==0.12.0
278 |     - yacs==0.1.8
279 |     - zipp==3.7.0
280 | prefix: /home/dominik/miniconda3/envs/3dmot
281 | 


--------------------------------------------------------------------------------
/Tracking/datasets/consec_graph_dataset.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch
  3 | from torch_geometric.data import Data
  4 | from torch_geometric.utils.undirected import to_undirected
  5 | 
  6 | sys.path.append('..') #Hack add ROOT DIR
  7 | from Tracking.utils.train_utils import check_pair
  8 | 
  9 | 
 10 | class GraphDataset():
 11 |     '''
 12 |     Graph dataset class enables data handling for pytorch geometric graphs
 13 |     init_node_emb: voxel features, shape: num nodes x feature dim
 14 |     rotations, translations, scales, -> edge features, shape: num nodes x (3 or 1)
 15 |     instances_count: per image instances
 16 |     '''
 17 | 
 18 |     def __init__(self, init_node_emb, rotations, translations, scales, input, instances_count, num_images=25):
 19 | 
 20 |         self.init_node_emb = init_node_emb
 21 |         self.rotations = rotations
 22 |         self.translations = translations
 23 |         self.scales = scales
 24 |         self.input = input
 25 |         self.instances_count = instances_count
 26 |         self.num_images = num_images
 27 |         self.box_iou_thres = 0.01  # Min IoU threshold GT and predicted 3D box
 28 |         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 29 | 
 30 | 
 31 |     def get_edge_data(self, is_undirected=True):
 32 |         '''
 33 |         Get edge attributes and according edge indicies
 34 |         Currently directed graph and only consecutive frames connected
 35 |         is_undirected: Graph nodes connected both ways 0-1, 1-0, duplicate idxs, edge features and targets
 36 |         '''
 37 | 
 38 |         relative_scales = []
 39 |         relative_rotations = []
 40 |         relative_positions = []
 41 |         relative_times = []
 42 | 
 43 |         img_inst_count = 0
 44 |         edge_idxs = []
 45 | 
 46 |         # Validation data
 47 |         vis_idxs = []
 48 |         false_positives = 0
 49 |         targets = []
 50 |         node_color = []
 51 | 
 52 |         for t in range(self.num_images - 1):
 53 | 
 54 |             gt_bbox_1 = self.input[t]['gt_3Dbbox']  # num inst x 8 pts x xyz
 55 |             gt_bbox_2 = self.input[t+1]['gt_3Dbbox']
 56 | 
 57 |             gt_id_1 = self.input[t]['gt_object_id']  # num inst
 58 |             gt_id_2 = self.input[t+1]['gt_object_id']
 59 | 
 60 |             pred_bbox_1 = self.input[t]['pred_3Dbbox']  # num inst x 8pts x xyz
 61 |             pred_bbox_2 = self.input[t+1]['pred_3Dbbox']
 62 | 
 63 |             start_inst_count = img_inst_count # start count instances frame t
 64 |             img_inst_count += self.instances_count[t] # start count instances frame t+1
 65 |             consecutive_inst_count = img_inst_count + self.instances_count[t+1]
 66 | 
 67 |             for n in range(start_inst_count, img_inst_count):
 68 | 
 69 |                 # Object Matching frame t
 70 |                 try:
 71 |                     obj_id_1 = check_pair(pred_bbox_1[n-start_inst_count, :, :], gt_bbox_1, gt_id_1,
 72 |                                           thres=self.box_iou_thres)
 73 |                 except:
 74 |                     obj_id_1 = None
 75 |                     print('Issue with convex hull', ', Bad scene:', input[0]['scene'])
 76 | 
 77 |                 if obj_id_1 is None:
 78 |                     false_positives += 1  # No overlapping GT bounding box found
 79 |                     node_color.append(1)
 80 |                     continue  # SKIP THIS INSTANCE FOR GRAPH CONSTRUCTION
 81 |                 else:
 82 |                     node_color.append(0)
 83 | 
 84 |                 for m in range(img_inst_count, consecutive_inst_count):  # n0-m0 n0-m1 n1-m0 n1-m1 ....
 85 | 
 86 |                     # Object Matching frame t+1
 87 |                     try:
 88 |                         obj_id_2 = check_pair(pred_bbox_2[m-img_inst_count, :, :], gt_bbox_2, gt_id_2, thres=self.box_iou_thres)
 89 |                     except:
 90 |                         obj_id_2 = None
 91 |                         print('Issue with convex hull', ', Bad scene:', input[0]['scene'])
 92 | 
 93 |                     # ONLY FOR LAST FRAME WHICH ISNT COVERED IN OUTER LOOP ADD FP
 94 |                     if t == self.num_images - 2 and n == img_inst_count - 1:
 95 |                         if obj_id_2 is None:
 96 |                             false_positives += 1
 97 |                             node_color.append(1)
 98 |                         else:
 99 |                             node_color.append(0)
100 | 
101 |                     # GT targets: active (1) and non-active (0) connections
102 |                     if obj_id_1 == obj_id_2 and obj_id_1 is not None and obj_id_2 is not None:  # both objects exist and same id
103 |                         target = 1
104 |                     elif obj_id_1 != obj_id_2 and obj_id_1 is not None and obj_id_2 is not None:
105 |                         target = 0
106 |                     elif obj_id_2 is None:  # false prediction for any object in consecutive frame -> exclude
107 |                         continue
108 | 
109 |                     vis_idxs.append({'image': t, 'obj_1': n, 'obj_2': m, 'obj_id_1': int(obj_id_1),
110 |                                      'obj_id_2': int(obj_id_2)})
111 | 
112 |                     if is_undirected:
113 |                         targets.append(target)  # obj1 img1 with all obj img2, obj2 img1 with all obj img2 ... per sequence
114 |                         targets.append(target)  # twice for undirected 0-1 and 1-0
115 |                     else:
116 |                         targets.append(target)
117 |                     # Edge feature construction
118 |                     edge_idxs.append([n, m]) # 0 - 1
119 | 
120 |                     relative_scale = torch.unsqueeze(torch.log(self.scales[m, :] / self.scales[n, :]),
121 |                                                      dim=0)  # feat t+1 / feat t
122 |                     relative_scales.append(relative_scale)
123 |                     relative_position = torch.unsqueeze(self.translations[m, :] - self.translations[n, :], dim=0)
124 |                     relative_positions.append(relative_position)
125 |                     relative_rot = torch.unsqueeze(self.rotations[m, :] - self.rotations[n, :], dim=0)
126 |                     relative_rotations.append(relative_rot)
127 |                     relative_time = torch.unsqueeze(torch.tensor([t + 1 - t], dtype=torch.int64),
128 |                                                     dim=0)  # always 1 for consecutive frames
129 |                     relative_times.append(relative_time)
130 |                     # relative_appearance -> could be also an edge feature but is already encoded in the node
131 | 
132 |                     if is_undirected:
133 |                         edge_idxs.append([m, n]) # 1 - 0
134 | 
135 |                         relative_scale = torch.unsqueeze(torch.log(self.scales[m, :] / self.scales[n, :]),
136 |                                                          dim=0)  # feat t / feat t+1
137 |                         relative_scales.append(relative_scale)
138 |                         relative_position = torch.unsqueeze(self.translations[m, :] - self.translations[n, :], dim=0)
139 |                         relative_positions.append(relative_position)
140 |                         relative_rot = torch.unsqueeze(self.rotations[m, :] - self.rotations[n, :], dim=0)
141 |                         relative_rotations.append(relative_rot)
142 |                         relative_time = torch.unsqueeze(torch.tensor([t + 1 - t], dtype=torch.int64),
143 |                                                         dim=0)  # always 1 for consecutive frames
144 |                         relative_times.append(relative_time)
145 |                         # relative_appearance -> could be also an edge feature but is already encoded in the node
146 | 
147 | 
148 |         relative_scales = torch.cat(relative_scales, dim=0)  # num_edges x 1
149 |         relative_positions = torch.cat(relative_positions, dim=0)  # num_edges x 3
150 |         relative_rotations = torch.cat(relative_rotations, dim=0)  # num_edges x 3
151 |         relative_times = torch.cat(relative_times, dim=0)  # num_edges x 1
152 | 
153 |         edge_attr = torch.cat((relative_positions, relative_rotations, relative_scales, relative_times),
154 |                               dim=-1).to(dtype=torch.float32, device=self.device)  # Num edges x feat_dim
155 |         edge_index = torch.tensor(edge_idxs, dtype=torch.long).t().contiguous().to(self.device)
156 | 
157 |         return edge_index, edge_attr, torch.tensor(targets, dtype=torch.float32, device=self.device), vis_idxs, false_positives, torch.tensor(node_color)
158 | 
159 |     def construct_batch_graph(self, is_undirected=True):
160 |         '''
161 |         Returns batch graph data:   x: Node Embeddings, shape: Num nodes x feature dim(9)
162 |                                     edge_idx: Edge indicies, shape: 2 x Num edges
163 |                                     edge_attr: Edge features, shape: Num edges x feature dim(8)
164 |                                     y: targets, shape: Num edges
165 |         '''
166 | 
167 |         edge_idx, edge_attr, targets, vis_idxs, false_positives, node_color = self.get_edge_data(is_undirected=is_undirected)
168 |         batch_graph = Data(x=self.init_node_emb, edge_index=edge_idx, edge_attr=edge_attr, y=targets)
169 | 
170 |         return batch_graph, vis_idxs, false_positives, node_color


--------------------------------------------------------------------------------
/Detection/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | from typing import Dict
  3 | import numpy as np
  4 | import sys
  5 | from detectron2.layers import ShapeSpec, cat
  6 | from detectron2.modeling import ROI_HEADS_REGISTRY
  7 | from detectron2.modeling.poolers import ROIPooler
  8 | from detectron2.modeling.roi_heads.roi_heads import select_foreground_proposals, StandardROIHeads
  9 | from detectron2.data import MetadataCatalog
 10 | from detectron2.utils.registry import Registry
 11 | from roi_heads.voxel_head import (
 12 |     build_voxel_head,
 13 |     voxel_inference,
 14 |     voxel_loss,
 15 | )
 16 | 
 17 | from roi_heads.nocs_head import (
 18 |     build_nocs_head,
 19 |     nocs_inference,
 20 |     nocs_loss,
 21 | )
 22 | 
 23 | import torch
 24 | 
 25 | @ROI_HEADS_REGISTRY.register()
 26 | class VoxelNocsHeads(StandardROIHeads):
 27 |     """
 28 |     The ROI specific heads for Voxel and Nocs branch
 29 |     """
 30 | 
 31 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 32 |         super().__init__(cfg, input_shape)
 33 |         self._init_voxel_head(cfg, input_shape)
 34 |         self._init_nocs_head(cfg, input_shape)
 35 |         self._misc = {}
 36 | 
 37 |         self.train_dataset_names = cfg.DATASETS.TRAIN[0]
 38 |         self.test_dataset_names = cfg.DATASETS.TEST[0]
 39 |         self.metadata = MetadataCatalog.get(self.train_dataset_names)
 40 |         if 'thing_classes' in self.metadata.as_dict():
 41 |             self.class_mapping = self.metadata.thing_classes
 42 | 
 43 | 
 44 |     def _init_voxel_head(self, cfg, input_shape):
 45 | 
 46 |         self.voxel_on = cfg.MODEL.VOXEL_ON
 47 |         self.voxel_loss_weight = cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT
 48 | 
 49 |         if not self.voxel_on:
 50 |             return
 51 | 
 52 |         voxel_pooler_resolution = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_RESOLUTION
 53 |         voxel_pooler_scales     = tuple(1.0 / input_shape[k].stride for k in self.in_features)
 54 |         voxel_sampling_ratio    = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_SAMPLING_RATIO
 55 |         voxel_pooler_type       = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_TYPE
 56 | 
 57 |         in_channels = [input_shape[f].channels for f in self.in_features][0]
 58 | 
 59 |         self.voxel_pooler = ROIPooler(
 60 |             output_size=voxel_pooler_resolution,
 61 |             scales=voxel_pooler_scales,
 62 |             sampling_ratio=voxel_sampling_ratio,
 63 |             pooler_type=voxel_pooler_type,
 64 |         )
 65 |         shape = ShapeSpec(
 66 |             channels=in_channels, width=voxel_pooler_resolution, height=voxel_pooler_resolution
 67 |         )
 68 |         self.voxel_head = build_voxel_head(cfg, shape)
 69 | 
 70 |     def _init_nocs_head(self, cfg, input_shape):
 71 | 
 72 |         self.nocs_on = cfg.MODEL.NOCS_ON
 73 |         self.nocs_loss_weight = cfg.MODEL.ROI_NOCS_HEAD.LOSS_WEIGHT
 74 |         self.iou_threshold = cfg.MODEL.ROI_NOCS_HEAD.IOU_THRES
 75 |         self.use_bin_loss = cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS
 76 |         self.num_bins = cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS
 77 | 
 78 |         if not self.nocs_on:
 79 |             return
 80 |         nocs_pooler_resolution = cfg.MODEL.ROI_NOCS_HEAD.POOLER_RESOLUTION
 81 |         nocs_pooler_scales     = tuple(1.0 / input_shape[k].stride for k in self.in_features)
 82 |         nocs_sampling_ratio    = cfg.MODEL.ROI_NOCS_HEAD.POOLER_SAMPLING_RATIO
 83 |         nocs_pooler_type       = cfg.MODEL.ROI_NOCS_HEAD.POOLER_TYPE
 84 | 
 85 | 
 86 |         in_channels = [input_shape[f].channels for f in self.in_features][0]
 87 | 
 88 |         self.nocs_pooler = ROIPooler(
 89 |             output_size=nocs_pooler_resolution,
 90 |             scales=nocs_pooler_scales,
 91 |             sampling_ratio=nocs_sampling_ratio,
 92 |             pooler_type=nocs_pooler_type,
 93 |         )
 94 |         shape = ShapeSpec(
 95 |             channels=in_channels, width=nocs_pooler_resolution, height=nocs_pooler_resolution
 96 |         )
 97 |         self.nocs_head = build_nocs_head(cfg, shape)
 98 | 
 99 |     def forward(self, images, features, proposals, targets=None): # targets imgs x instances
100 |         """
101 |         See :class:`ROIHeads.forward`.
102 |         """
103 | 
104 |         instances, losses = super().forward(images, features, proposals, targets) # forward method for default heads (BBOX, MASK) #proposals N = batchsize
105 |         del images, targets
106 |         if self.training:
107 |             losses.update(self._forward_voxel(features, instances)) # features input data mapping feature map name to tensor, axis 0 = N num images
108 |             losses.update(self._forward_nocs(features, instances))  # features input data mapping feature map name to tensor, axis 0 = N num images
109 |             return [], losses
110 | 
111 |         else:
112 |             pred_instances = self.forward_with_given_boxes_voxnocs(features, instances)
113 |             return pred_instances, {}
114 | 
115 | 
116 |     def forward_with_given_boxes_voxnocs(self, features, instances):
117 |         """
118 |         Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
119 |         Args:
120 |             features: same as in `forward()`
121 |             instances (list[Instances]): instances to predict other outputs. Expect the keys
122 |                 "pred_boxes" and "pred_classes" to exist.
123 |         Returns:
124 |             instances (Instances): the same `Instances` object, with extra
125 |                 fields such as `pred_masks` or `pred_voxels` or 'pred_nocs.
126 |         """
127 |         assert not self.training
128 |         #instances = super().forward_with_given_boxes(features, instances)
129 | 
130 |         assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") and instances[0].has("pred_masks")
131 | 
132 |         instances = self._forward_voxel(features, instances)
133 |         instances = self._forward_nocs(features, instances)
134 | 
135 |         return instances
136 | 
137 |     def _forward_voxel(self, features, instances):
138 |         """
139 |         Forward logic for the voxel branch.
140 |         Args:
141 |             features (list[Tensor]): #level input features for voxel prediction
142 |             instances (list[Instances]): the per-image instances to train/predict meshes.
143 |                 In training, they can be the proposals.
144 |                 In inference, they can be the predicted boxes.
145 |         Returns:
146 |             In training, a dict of losses.
147 |             In inference, update `instances` with new fields "pred_voxels" and return it.
148 |         """
149 |         if not self.voxel_on:
150 |             return {} if self.training else instances
151 | 
152 |         features = [features[f] for f in self.in_features]
153 | 
154 |         if self.training:
155 |             # The loss is only defined on positive proposals.
156 |             proposals, _ = select_foreground_proposals(instances, self.num_classes)
157 |             proposal_boxes = [x.proposal_boxes for x in proposals]
158 | 
159 |             losses = {}
160 |             if self.voxel_on:
161 |                 voxel_features = self.voxel_pooler(features, proposal_boxes) #M total number of boxes aggregated over all N batch images x 256 x 14 x 14
162 |                 voxel_logits = self.voxel_head(voxel_features) #Num objs x 1 x 32 x 32 x 32, zeros for empty detection
163 |                 src_boxes = cat([p.tensor for p in proposal_boxes])  # num obj x 4 format XYXY
164 |                 loss_voxel, _ = voxel_loss(
165 |                     voxel_logits, proposals, src_boxes, loss_weight=self.voxel_loss_weight, iou_thres=self.iou_threshold
166 |                 )
167 |                 losses.update({"loss_voxel": loss_voxel})
168 | 
169 |             return losses
170 |         else:
171 |             pred_boxes = [x.pred_boxes for x in instances]
172 | 
173 |             if self.voxel_on:
174 | 
175 |                 voxel_features = self.voxel_pooler(features, pred_boxes) # BS x 256 x 14 x 14
176 |                 voxel_logits = self.voxel_head(voxel_features)
177 |                 voxel_inference(voxel_logits, instances)
178 | 
179 |             return instances
180 | 
181 |     def _forward_nocs(self, features, instances):
182 |         """
183 |         Forward logic for the voxel branch.
184 |         Args:
185 |             features (list[Tensor]): #level input features for nocs prediction
186 |             instances (list[Instances]): the per-image instances to train/predict nocs.
187 |                 In training, they can be the proposals.
188 |                 In inference, they can be the predicted boxes.
189 |         Returns:
190 |             In training, a dict of losses.
191 |             In inference, update `instances` with new fields "pred_nocs" and return it.
192 |         """
193 |         if not self.nocs_on:
194 |             return {} if self.training else instances
195 | 
196 |         features = [features[f] for f in self.in_features]
197 | 
198 |         if self.training:
199 |             # The loss is only defined on positive proposals.
200 |             proposals, _ = select_foreground_proposals(instances, self.num_classes)
201 |             proposal_boxes = [x.proposal_boxes for x in proposals]
202 | 
203 |             losses = {}
204 |             if self.nocs_on:
205 |                 nocs_features = self.nocs_pooler(features, proposal_boxes) #M total number of boxes aggregated over all N batch images
206 |                 nocs_map_rgb = self.nocs_head(nocs_features) # num obj x 3 x 28 x 28  (l1), num obj x num bins x 3 x 28 x 28 (bin)
207 |                 src_boxes = cat([p.tensor for p in proposal_boxes]) #num obj x 4 format XYXY
208 |                 loss_nocs, _ = nocs_loss(
209 |                     nocs_map_rgb, proposals, src_boxes, loss_weight=self.nocs_loss_weight, iou_thres=self.iou_threshold,
210 |                     cls_mapping=self.class_mapping, use_bin_loss=self.use_bin_loss, num_bins=self.num_bins
211 |                 )
212 |                 losses.update({"loss_nocs": loss_nocs})
213 | 
214 |             return losses
215 |         else:
216 |             pred_boxes = [x.pred_boxes for x in instances]
217 | 
218 |             if self.nocs_on:
219 | 
220 |                 nocs_features = self.nocs_pooler(features, pred_boxes) # BS x 256 x 14 x 14
221 |                 nocs_map_rgb = self.nocs_head(nocs_features) # BS x 3 x 28 x 28 (RGB)
222 |                 nocs_inference(nocs_map_rgb, instances, use_bin_loss=self.use_bin_loss, num_bins=self.num_bins)
223 | 
224 |             return instances
225 | 


--------------------------------------------------------------------------------
/Detection/inference/inference_utils.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import torch
  3 | import numpy as np
  4 | import os, sys, cv2
  5 | import open3d as o3d
  6 | import copy
  7 | 
  8 | from sklearn.metrics import recall_score
  9 | from sklearn.metrics import precision_score
 10 | from sklearn.metrics import f1_score
 11 | 
 12 | sys.path.append('..') #Hack add ROOT DIR
 13 | 
 14 | from BlenderProc.utils import binvox_rw
 15 | from baseconfig import CONF
 16 | from Detection.inference.inference_metrics import get_mean_iou, get_median_iou
 17 | from PoseEst.pose_estimation import backproject, cam2world, sort_bbox
 18 | 
 19 | 
 20 | def get_scale(m):
 21 |     if type(m) == torch.Tensor:
 22 |         return m.norm(dim=0)
 23 |     return np.linalg.norm(m, axis=0)
 24 | 
 25 | def transform_icp_points(source, transformation):
 26 |     '''
 27 |     transforms source pc to align with target point cloud based on a learned icp transformation
 28 |     '''
 29 |     source_temp = copy.deepcopy(source)
 30 |     return source_temp.transform(transformation)
 31 | 
 32 | def draw_registration_result(source, target, transformation):
 33 |     '''
 34 |     Visualise ICP Matching results
 35 |     '''
 36 |     source_temp = copy.deepcopy(source)
 37 |     target_temp = copy.deepcopy(target)
 38 |     source_temp.paint_uniform_color([1, 0.706, 0])
 39 |     target_temp.paint_uniform_color([0, 0.651, 0.929])
 40 |     source_temp.transform(transformation)
 41 |     o3d.visualization.draw_geometries([source_temp, target_temp])
 42 | 
 43 | def construct_box(segpc, ax_aligned=False):
 44 |     '''
 45 |     calculates 3D bounding box around segmentation pointcloud
 46 |     '''
 47 |     if ax_aligned:
 48 |         bbox3d_obj = o3d.geometry.AxisAlignedBoundingBox()
 49 |     else:
 50 |         bbox3d_obj = o3d.geometry.OrientedBoundingBox()
 51 |     bbox_3d = bbox3d_obj.create_from_points(o3d.utility.Vector3dVector(segpc))
 52 |     center_3d = bbox_3d.get_center()
 53 | 
 54 |     pred_box = sort_bbox(np.array(bbox_3d.get_box_points()))
 55 | 
 56 |     if not ax_aligned:
 57 |         scale = bbox_3d.extent
 58 |         rotation = bbox_3d.R
 59 |         cad2world = np.diag([0, 0, 0, 1]).astype(np.float32)
 60 |         cad2world[:3, :3] = np.diag(scale) @ rotation
 61 |         cad2world[:3, 3] = center_3d
 62 | 
 63 |         return torch.tensor(pred_box), center_3d, cad2world
 64 | 
 65 |     return torch.tensor(pred_box), center_3d
 66 | 
 67 | def project_segmask_F2F(pred_bin_mask, abs_bbox, depth, intrinsics):
 68 |     '''
 69 |     Projection segmask to pointcloud for F2F - MaskRCNN baseline
 70 |     '''
 71 | 
 72 |     depth = np.array(depth, dtype=np.float32)  # HxW
 73 | 
 74 |     # Zero pad depth image
 75 |     depth_pad = np.zeros((240, 320))
 76 |     depth_pad[int(abs_bbox[1]):int(abs_bbox[3]), int(abs_bbox[0]):int(abs_bbox[2])] = depth[int(abs_bbox[1]):int(abs_bbox[3]),
 77 |                                                                                             int(abs_bbox[0]):int(abs_bbox[2])]
 78 |     depth = depth_pad
 79 | 
 80 |     depth_pts, _ = backproject(depth, intrinsics, np.array(pred_bin_mask.cpu())) # depth in camera space
 81 | 
 82 |     return depth_pts
 83 | 
 84 | def project_segmask(pred_bin_mask, abs_bbox, depth, campose):
 85 |     '''
 86 |     Projection segmask to pointcloud for F2F - MaskRCNN baseline
 87 |     '''
 88 | 
 89 |     depth = np.array(depth, dtype=np.float32)  # HxW
 90 | 
 91 |     # Zero pad depth image
 92 |     depth_pad = np.zeros((240, 320))
 93 |     depth_pad[int(abs_bbox[1]):int(abs_bbox[3]), int(abs_bbox[0]):int(abs_bbox[2])] = depth[int(abs_bbox[1]):int(abs_bbox[3]),
 94 |                                                                                             int(abs_bbox[0]):int(abs_bbox[2])]
 95 |     depth = depth_pad
 96 | 
 97 |     img_width = depth.shape[1]
 98 |     img_height = depth.shape[0]
 99 |     cx = (img_width / 2) - 0.5  # 0,0 is center top-left pixel -> -0,5
100 |     cy = (img_height / 2) - 0.5  # 0,0 is center top-left pixel -> -0,5
101 |     fx = 292.87803547399
102 |     fy = 292.87803547399
103 |     intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
104 | 
105 |     depth_pts, _ = backproject(depth, intrinsics, np.array(pred_bin_mask.cpu())) # depth in camera space
106 |     depth_world = cam2world(depth_pts, campose)
107 | 
108 |     return depth_world
109 | 
110 | def convert_voxel_to_pc(voxel_grid, rot, trans, scale):
111 |     '''
112 |     Converts a voxel grid to a point cloud with according pose
113 |     voxel_grid: 32x32x32 tensor binary
114 |     rot, trans, scale: output from run pose function
115 |     scale already encoded in rotation
116 |     returns pc: n x 3 array
117 |     '''
118 | 
119 |     nonzero_inds = np.nonzero(voxel_grid)[:-1]
120 | 
121 |     points = nonzero_inds / 32 - 0.5
122 |     points = points.detach().cpu().numpy()
123 | 
124 |     #global_scalerot = (np.identity(3) * scale.copy()) @ rot
125 |     world_pc = rot @ points.transpose() + np.expand_dims(trans.copy(), axis=-1)
126 |     world_pc = world_pc.transpose()
127 | 
128 |     return world_pc
129 | 
130 | def add_halfheight(location, box):
131 |     '''
132 |     Object location z-center is at bottom, calculate half height of the object
133 |     and add to shift z-center to correct location
134 |     '''
135 |     z_coords = []
136 |     for pt in box:
137 |         z = pt[-1]
138 |         z_coords.append(z)
139 |     z_coords = np.array(z_coords)
140 |     half_height = np.abs(z_coords.max() - z_coords.min()) / 2
141 |     location[-1] = half_height  # Center location is at bottom object
142 | 
143 |     return location
144 | 
145 | def load_hdf5(path):
146 |     with h5py.File(path, 'r') as data:
147 |         for key in data.keys():
148 |             if key == 'depth':
149 |                 depth = np.array(data[key])
150 |             elif key == 'campose':
151 |                 campose = np.array(data[key])
152 | 
153 |     return depth, campose
154 | 
155 | def get_nocs(nocs_path):
156 |     '''
157 |     loads GT nocs image
158 |     cv2.imread -1 for using all color depth values
159 |     '''
160 | 
161 |     nocs = cv2.imread(nocs_path, -1) #BGRA
162 |     nocs = nocs[:,:,:3]
163 |     nocs = np.array(nocs[:, :, ::-1], dtype=np.float32) # RGB
164 | 
165 |     return nocs
166 | 
167 | def log_results(metrics):
168 | 
169 |     voxel_iou = []
170 |     chair_iou = []
171 |     table_iou = []
172 |     sofa_iou = []
173 |     bed_iou = []
174 |     tv_stand_iou = []
175 |     cooler_iou = []
176 |     night_stand_iou = []
177 |     distances = []
178 |     thetas = []
179 |     for seq in metrics:
180 |         for img in seq:
181 |             for key, value in img.items():
182 |                 if key == 'voxel_ious':
183 |                     voxel_iou.append(value)
184 |                 elif key == 'chair_ious':
185 |                     chair_iou.append(value)
186 |                 elif key == 'table_ious':
187 |                     table_iou.append(value)
188 |                 elif key == 'sofa_ious':
189 |                     sofa_iou.append(value)
190 |                 elif key == 'bed_ious':
191 |                     bed_iou.append(value)
192 |                 elif key == 'tv_stand_ious':
193 |                     tv_stand_iou.append(value)
194 |                 elif key == 'cooler_ious':
195 |                     cooler_iou.append(value)
196 |                 elif key == 'night_stand_ious':
197 |                     night_stand_iou.append(value)
198 |                 elif key == 'pose_distance':
199 |                     for entity in value:
200 |                         distances.append(entity)
201 |                 elif key == 'pose_rotationdiff':
202 |                     for entity in value:
203 |                         thetas.append(entity)
204 | 
205 | 
206 |     mean_voxel_iou = get_mean_iou(voxel_iou)
207 |     mean_chair_iou = get_mean_iou(chair_iou)
208 |     mean_table_iou = get_mean_iou(table_iou)
209 |     mean_sofa_iou = get_mean_iou(sofa_iou)
210 |     mean_bed_iou = get_mean_iou(bed_iou)
211 |     mean_tv_iou = get_mean_iou(tv_stand_iou)
212 |     mean_cooler_iou = get_mean_iou(cooler_iou)
213 |     mean_night_iou = get_mean_iou(night_stand_iou)
214 | 
215 |     mean_rotation_diff = get_median_iou(thetas)
216 |     mean_distance = get_median_iou(distances)
217 | 
218 |     print('Voxel_IoU :', mean_voxel_iou, ', Voxel_Chair_IoU :', mean_chair_iou, ', Voxel_Table_IoU :', mean_table_iou,
219 |           ', Voxel_Sofa_IoU :', mean_sofa_iou, ', Voxel_Bed_IoU :', mean_bed_iou,
220 |           ', Voxel_TVstand_IoU :', mean_tv_iou, ', Voxel_WineCooler_IoU :', mean_cooler_iou,
221 |           ', Voxel_NightStand_IoU :', mean_night_iou,
222 |           ', Rotation Difference [°] :', mean_rotation_diff, ', Location Difference [m] :', mean_distance )
223 | 
224 | def calculate_F2F_metrics(outputs):
225 | 
226 |     overall_gt_objects = 0
227 |     overall_misses = 0
228 |     overall_fps = 0
229 |     overall_predictions = []
230 |     overall_targets = []
231 | 
232 |     for seq in outputs:
233 | 
234 |         overall_gt_objects += seq['total_gt_objs']
235 |         overall_misses += seq['misses']
236 |         overall_fps += seq['false_positives']
237 |         overall_predictions.append(seq['prediction'])
238 |         overall_targets.append(seq['target'])
239 | 
240 |     predictions = np.concatenate(overall_predictions)
241 |     targets = np.concatenate(overall_targets)
242 | 
243 |     F1 = f1_score(targets, predictions, zero_division='warn')  # warn only once
244 |     Prec = precision_score(targets, predictions, zero_division=0)
245 |     Rec = recall_score(targets, predictions, zero_division=0)
246 | 
247 |     id_switches = np.count_nonzero(targets - predictions)
248 |     MOTA = 1.0 - (float(overall_misses + overall_fps + id_switches) / float(overall_gt_objects))
249 | 
250 |     print('MOTA score :', MOTA, ', F1 score :', F1, ', Precision :', Prec,
251 |           ', Recall :', Rec)
252 | 
253 | def log_F2F_results(metrics):
254 |     '''
255 |     F2F-MaskRCNN result logging
256 |     metrics: list of sequences (tuple(MOTA,F1,Precision,Recall))
257 |     '''
258 | 
259 |     overall_mota = []
260 |     overall_F1 = []
261 |     overall_precision = []
262 |     overall_recall = []
263 | 
264 |     for seq in metrics:
265 | 
266 |         mota, f1, precision, recall = seq[0], seq[1], seq[2], seq[3]
267 | 
268 |         overall_mota.append(mota)
269 |         overall_F1.append(f1)
270 |         overall_precision.append(precision)
271 |         overall_recall.append(recall)
272 | 
273 |     mean_mota = np.array(overall_mota).mean()
274 |     mean_f1 = np.array(overall_F1).mean()
275 |     mean_precision = np.array(overall_precision).mean()
276 |     mean_recall = np.array(overall_recall).mean()
277 | 
278 |     print('MOTA score :', mean_mota, ', F1 score :', mean_f1, ', Precision :', mean_precision,
279 |           ', Recall :', mean_recall)
280 | 


--------------------------------------------------------------------------------
/BlenderProc/utils/binvox_rw.py:
--------------------------------------------------------------------------------
  1 | #  Copyright (C) 2012 Daniel Maturana
  2 | #  This file is part of binvox-rw-py.
  3 | #
  4 | #  binvox-rw-py is free software: you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation, either version 3 of the License, or
  7 | #  (at your option) any later version.
  8 | #
  9 | #  binvox-rw-py is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #
 14 | #  You should have received a copy of the GNU General Public License
 15 | #  along with binvox-rw-py. If not, see <http://www.gnu.org/licenses/>.
 16 | #
 17 | #  Modified by Christopher B. Choy <chrischoy at ai dot stanford dot edu>
 18 | #  for python 3 support
 19 | 
 20 | """
 21 | Binvox to Numpy and back.
 22 | 
 23 | 
 24 | >>> import numpy as np
 25 | >>> import binvox_rw
 26 | >>> with open('chair.binvox', 'rb') as f:
 27 | ...     m1 = binvox_rw.read_as_3d_array(f)
 28 | ...
 29 | >>> m1.dims
 30 | [32, 32, 32]
 31 | >>> m1.scale
 32 | 41.133000000000003
 33 | >>> m1.translate
 34 | [0.0, 0.0, 0.0]
 35 | >>> with open('chair_out.binvox', 'wb') as f:
 36 | ...     m1.write(f)
 37 | ...
 38 | >>> with open('chair_out.binvox', 'rb') as f:
 39 | ...     m2 = binvox_rw.read_as_3d_array(f)
 40 | ...
 41 | >>> m1.dims==m2.dims
 42 | True
 43 | >>> m1.scale==m2.scale
 44 | True
 45 | >>> m1.translate==m2.translate
 46 | True
 47 | >>> np.all(m1.data==m2.data)
 48 | True
 49 | 
 50 | >>> with open('chair.binvox', 'rb') as f:
 51 | ...     md = binvox_rw.read_as_3d_array(f)
 52 | ...
 53 | >>> with open('chair.binvox', 'rb') as f:
 54 | ...     ms = binvox_rw.read_as_coord_array(f)
 55 | ...
 56 | >>> data_ds = binvox_rw.dense_to_sparse(md.data)
 57 | >>> data_sd = binvox_rw.sparse_to_dense(ms.data, 32)
 58 | >>> np.all(data_sd==md.data)
 59 | True
 60 | >>> # the ordering of elements returned by numpy.nonzero changes with axis
 61 | >>> # ordering, so to compare for equality we first lexically sort the voxels.
 62 | >>> np.all(ms.data[:, np.lexsort(ms.data)] == data_ds[:, np.lexsort(data_ds)])
 63 | True
 64 | """
 65 | 
 66 | import numpy as np
 67 | 
 68 | class Voxels(object):
 69 |     """ Holds a binvox model.
 70 |     data is either a three-dimensional numpy boolean array (dense representation)
 71 |     or a two-dimensional numpy float array (coordinate representation).
 72 | 
 73 |     dims, translate and scale are the model metadata.
 74 | 
 75 |     dims are the voxel dimensions, e.g. [32, 32, 32] for a 32x32x32 model.
 76 | 
 77 |     scale and translate relate the voxels to the original model coordinates.
 78 | 
 79 |     To translate voxel coordinates i, j, k to original coordinates x, y, z:
 80 | 
 81 |     x_n = (i+.5)/dims[0]
 82 |     y_n = (j+.5)/dims[1]
 83 |     z_n = (k+.5)/dims[2]
 84 |     x = scale*x_n + translate[0]
 85 |     y = scale*y_n + translate[1]
 86 |     z = scale*z_n + translate[2]
 87 | 
 88 |     """
 89 | 
 90 |     def __init__(self, data, dims, translate, scale, axis_order):
 91 |         self.data = data
 92 |         self.dims = dims
 93 |         self.translate = translate
 94 |         self.scale = scale
 95 |         assert (axis_order in ('xzy', 'xyz'))
 96 |         self.axis_order = axis_order
 97 | 
 98 |     def clone(self):
 99 |         data = self.data.copy()
100 |         dims = self.dims[:]
101 |         translate = self.translate[:]
102 |         return Voxels(data, dims, translate, self.scale, self.axis_order)
103 | 
104 |     def write(self, fp):
105 |         write(self, fp)
106 | 
107 | def read_header(fp):
108 |     """ Read binvox header. Mostly meant for internal use.
109 |     """
110 |     line = fp.readline().strip()
111 |     if not line.startswith(b'#binvox'):
112 |         raise IOError('Not a binvox file')
113 |     dims = [int(i) for i in fp.readline().strip().split(b' ')[1:]]
114 |     translate = [float(i) for i in fp.readline().strip().split(b' ')[1:]]
115 |     scale = [float(i) for i in fp.readline().strip().split(b' ')[1:]][0]
116 |     line = fp.readline()
117 |     return dims, translate, scale
118 | 
119 | def read_as_3d_array(fp, fix_coords=True):
120 |     """ Read binary binvox format as array.
121 | 
122 |     Returns the model with accompanying metadata.
123 | 
124 |     Voxels are stored in a three-dimensional numpy array, which is simple and
125 |     direct, but may use a lot of memory for large models. (Storage requirements
126 |     are 8*(d^3) bytes, where d is the dimensions of the binvox model. Numpy
127 |     boolean arrays use a byte per element).
128 | 
129 |     Doesn't do any checks on input except for the '#binvox' line.
130 |     """
131 |     dims, translate, scale = read_header(fp)
132 |     raw_data = np.frombuffer(fp.read(), dtype=np.uint8)
133 |     # if just using reshape() on the raw data:
134 |     # indexing the array as array[i,j,k], the indices map into the
135 |     # coords as:
136 |     # i -> x
137 |     # j -> z
138 |     # k -> y
139 |     # if fix_coords is true, then data is rearranged so that
140 |     # mapping is
141 |     # i -> x
142 |     # j -> y
143 |     # k -> z
144 |     values, counts = raw_data[::2], raw_data[1::2]
145 |     data = np.repeat(values, counts).astype(np.bool)
146 |     data = data.reshape(dims)
147 |     if fix_coords:
148 |         # xzy to xyz TODO the right thing
149 |         data = np.transpose(data, (0, 2, 1))
150 |         axis_order = 'xyz'
151 |     else:
152 |         axis_order = 'xzy'
153 |     return Voxels(data, dims, translate, scale, axis_order)
154 | 
155 | 
156 | def read_as_coord_array(fp, fix_coords=True):
157 |     """ Read binary binvox format as coordinates.
158 | 
159 |     Returns binvox model with voxels in a "coordinate" representation, i.e.  an
160 |     3 x N array where N is the number of nonzero voxels. Each column
161 |     corresponds to a nonzero voxel and the 3 rows are the (x, z, y) coordinates
162 |     of the voxel.  (The odd ordering is due to the way binvox format lays out
163 |     data).  Note that coordinates refer to the binvox voxels, without any
164 |     scaling or translation.
165 | 
166 |     Use this to save memory if your model is very sparse (mostly empty).
167 | 
168 |     Doesn't do any checks on input except for the '#binvox' line.
169 |     """
170 |     dims, translate, scale = read_header(fp)
171 |     raw_data = np.frombuffer(fp.read(), dtype=np.uint8)
172 | 
173 |     values, counts = raw_data[::2], raw_data[1::2]
174 | 
175 |     sz = np.prod(dims)
176 |     index, end_index = 0, 0
177 |     end_indices = np.cumsum(counts)
178 |     indices = np.concatenate(([0], end_indices[:-1])).astype(end_indices.dtype)
179 | 
180 |     values = values.astype(np.bool)
181 |     indices = indices[values]
182 |     end_indices = end_indices[values]
183 | 
184 |     nz_voxels = []
185 |     for index, end_index in zip(indices, end_indices):
186 |         nz_voxels.extend(range(index, end_index))
187 |     nz_voxels = np.array(nz_voxels)
188 |     # TODO are these dims correct?
189 |     # according to docs,
190 |     # index = x * wxh + z * width + y; // wxh = width * height = d * d
191 | 
192 |     x = nz_voxels / (dims[0]*dims[1])
193 |     zwpy = nz_voxels % (dims[0]*dims[1]) # z*w + y
194 |     z = zwpy / dims[0]
195 |     y = zwpy % dims[0]
196 |     if fix_coords:
197 |         data = np.vstack((x, y, z))
198 |         axis_order = 'xyz'
199 |     else:
200 |         data = np.vstack((x, z, y))
201 |         axis_order = 'xzy'
202 | 
203 |     #return Voxels(data, dims, translate, scale, axis_order)
204 |     return Voxels(np.ascontiguousarray(data), dims, translate, scale, axis_order)
205 | 
206 | def dense_to_sparse(voxel_data, dtype=np.int):
207 |     """ From dense representation to sparse (coordinate) representation.
208 |     No coordinate reordering.
209 |     """
210 |     if voxel_data.ndim!=3:
211 |         raise ValueError('voxel_data is wrong shape; should be 3D array.')
212 |     return np.asarray(np.nonzero(voxel_data), dtype)
213 | 
214 | def sparse_to_dense(voxel_data, dims, dtype=np.bool):
215 |     if voxel_data.ndim!=2 or voxel_data.shape[0]!=3:
216 |         raise ValueError('voxel_data is wrong shape; should be 3xN array.')
217 |     if np.isscalar(dims):
218 |         dims = [dims]*3
219 |     dims = np.atleast_2d(dims).T
220 |     # truncate to integers
221 |     xyz = voxel_data.astype(np.int)
222 |     # discard voxels that fall outside dims
223 |     valid_ix = ~np.any((xyz < 0) | (xyz >= dims), 0)
224 |     xyz = xyz[:,valid_ix]
225 |     out = np.zeros(dims.flatten(), dtype=dtype)
226 |     out[tuple(xyz)] = True
227 |     return out
228 | 
229 | #def get_linear_index(x, y, z, dims):
230 |     #""" Assuming xzy order. (y increasing fastest.
231 |     #TODO ensure this is right when dims are not all same
232 |     #"""
233 |     #return x*(dims[1]*dims[2]) + z*dims[1] + y
234 | 
235 | def write(voxel_model, fp):
236 |     """ Write binary binvox format.
237 | 
238 |     Note that when saving a model in sparse (coordinate) format, it is first
239 |     converted to dense format.
240 | 
241 |     Doesn't check if the model is 'sane'.
242 | 
243 |     """
244 |     if voxel_model.data.ndim==2:
245 |         # TODO avoid conversion to dense
246 |         dense_voxel_data = sparse_to_dense(voxel_model.data, voxel_model.dims)
247 |     else:
248 |         dense_voxel_data = voxel_model.data
249 | 
250 |     fp.write(b'#binvox 1\n')
251 |     fp.write(str.encode('dim '+' '.join(map(str, voxel_model.dims))+'\n'))
252 |     fp.write(str.encode('translate '+' '.join(map(str, voxel_model.translate))+'\n'))
253 |     fp.write(str.encode('scale '+str(voxel_model.scale)+'\n'))
254 |     fp.write(b'data\n')
255 |     if not voxel_model.axis_order in ('xzy', 'xyz'):
256 |         raise ValueError('Unsupported voxel model axis order')
257 | 
258 |     if voxel_model.axis_order=='xzy':
259 |         voxels_flat = dense_voxel_data.flatten()
260 |     elif voxel_model.axis_order=='xyz':
261 |         voxels_flat = np.transpose(dense_voxel_data, (0, 2, 1)).flatten()
262 | 
263 |     # keep a sort of state machine for writing run length encoding
264 |     state = voxels_flat[0]
265 |     ctr = 0
266 |     for c in voxels_flat:
267 |         if c==state:
268 |             ctr += 1
269 |             # if ctr hits max, dump
270 |             if ctr==255:
271 |                 #fp.write(str.encode(chr(state)))
272 |                 #fp.write(str.encode(chr(ctr)))
273 |                 fp.write(str.encode(chr(state),encoding='latin-1'))
274 |                 fp.write(str.encode(chr(ctr),encoding='latin-1'))
275 |                 ctr = 0
276 |         else:
277 |             # if switch state, dump
278 |             #fp.write(str.encode(chr(state)))
279 |             #fp.write(str.encode(chr(ctr)))
280 |             fp.write(str.encode(chr(state),encoding='latin-1'))
281 |             fp.write(str.encode(chr(ctr),encoding='latin-1'))
282 |             state = c
283 |             ctr = 1
284 |     # flush out remainders
285 |     if ctr > 0:
286 |         #fp.write(str.encode(chr(state)))
287 |         #fp.write(str.encode(chr(ctr)))
288 |         fp.write(str.encode(chr(state),encoding='latin-1'))
289 |         fp.write(str.encode(chr(ctr),encoding='latin-1'))
290 | 
291 | 
292 | if __name__ == '__main__':
293 |     import doctest
294 |     doctest.testmod()
295 | 


--------------------------------------------------------------------------------
/Tracking/networks/mpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_scatter import scatter_mean, scatter_max, scatter_add
  4 | 
  5 | from Tracking.networks.mlp import MLP
  6 | 
  7 | 
  8 | class MetaLayer(nn.Module): # Single Layer of Message Passing Network
  9 | 
 10 |     def __init__(self, edge_model=None, node_model=None):
 11 |         super(MetaLayer, self).__init__()
 12 | 
 13 |         self.edge_model = edge_model
 14 |         self.node_model = node_model
 15 |         self.reset_parameters()
 16 | 
 17 |     def reset_parameters(self):
 18 |         for item in [self.node_model, self.edge_model]:
 19 |             if hasattr(item, 'reset_parameters'):
 20 |                 item.reset_parameters()
 21 | 
 22 |     def forward(self, x, edge_index, edge_attr):
 23 |         """
 24 |         Does a single node and edge feature vectors update.
 25 |         Args:
 26 |             x: node features matrix with shape [num_nodes, num_node_features]
 27 |             edge_index: tensor with shape [2, M], with M being the number of edges, indicating nonzero entries in the graph adjacency (i.e. edges)
 28 |             edge_attr: edge features matrix (ordered by edge_index)
 29 |         Returns: Updated Node and Edge Feature matrices
 30 |         """
 31 |         row, col = edge_index # row = start node, col = destination node
 32 | 
 33 |         # Edge Update
 34 |         edge_attr = self.edge_model(x[row], x[col], edge_attr) # edge update with nodes i,j and edge ij
 35 | 
 36 |         # Node Update
 37 |         x = self.node_model(x, edge_index, edge_attr) # node i update with node i at t-1 and edge ij at t
 38 | 
 39 |         return x, edge_attr
 40 | 
 41 |     def __repr__(self):
 42 |         return '{}(edge_model={}, node_model={})'.format(self.__class__.__name__, self.edge_model, self.node_model)
 43 | 
 44 | class EdgeModel(nn.Module):
 45 | 
 46 |     def __init__(self, edge_mlp):
 47 |         super(EdgeModel, self).__init__()
 48 |         self.edge_mlp = edge_mlp
 49 | 
 50 |     def forward(self, source, target, edge_attr):
 51 |         out = torch.cat([source, target, edge_attr], dim=1)
 52 |         return self.edge_mlp(out)
 53 | 
 54 | class NodeModel(nn.Module):
 55 |     '''
 56 |     try, except to avoid cuda error
 57 |     '''
 58 | 
 59 |     def __init__(self, node_mlp, node_agg_fn):
 60 |         super(NodeModel, self).__init__()
 61 | 
 62 |         self.node_mlp = node_mlp
 63 |         self.node_agg_fn = node_agg_fn
 64 | 
 65 |     def forward(self, x, edge_index, edge_attr):
 66 | 
 67 |         row, col = edge_index
 68 | 
 69 |         message = self.node_agg_fn(edge_attr, row, x.size(0)) # node_i x edge_dim
 70 | 
 71 |         node_message = torch.cat([x, message], dim=1)
 72 |         return self.node_mlp(node_message)
 73 | 
 74 | class TimeAwareNodeModel(nn.Module):
 75 |     """
 76 |     Class used to peform the node update during Neural mwssage passing
 77 |     """
 78 |     def __init__(self, flow_in_mlp, flow_out_mlp, node_mlp, node_agg_fn):
 79 |         super(TimeAwareNodeModel, self).__init__()
 80 | 
 81 |         self.flow_in_mlp = flow_in_mlp
 82 |         self.flow_out_mlp = flow_out_mlp
 83 |         self.node_mlp = node_mlp
 84 |         self.node_agg_fn = node_agg_fn
 85 | 
 86 |     def forward(self, x, edge_index, edge_attr):
 87 |         row, col = edge_index
 88 |         flow_out_mask = row < col
 89 |         flow_out_row, flow_out_col = row[flow_out_mask], col[flow_out_mask]
 90 |         flow_out_input = torch.cat([x[flow_out_col], edge_attr[flow_out_mask]], dim=1)
 91 |         flow_out = self.flow_out_mlp(flow_out_input)
 92 |         flow_out = self.node_agg_fn(flow_out, flow_out_row, x.size(0))
 93 | 
 94 |         flow_in_mask = row > col
 95 |         flow_in_row, flow_in_col = row[flow_in_mask], col[flow_in_mask]
 96 |         flow_in_input = torch.cat([x[flow_in_col], edge_attr[flow_in_mask]], dim=1)
 97 |         flow_in = self.flow_in_mlp(flow_in_input)
 98 | 
 99 |         flow_in = self.node_agg_fn(flow_in, flow_in_row, x.size(0))
100 |         flow = torch.cat((flow_in, flow_out), dim=1)
101 | 
102 |         return self.node_mlp(flow)
103 | 
104 | class MLPGraphIndependent(nn.Module):
105 | 
106 |     def __init__(self, edge_in_dim = None, edge_out_dim = None, edge_fc_dims = None,
107 |                  dropout_p = None, use_batchnorm = None, use_leaky_relu=False):
108 |         super(MLPGraphIndependent, self).__init__()
109 | 
110 |         self.edge_mlp = MLP(input_dim=edge_in_dim, fc_dims=list(edge_fc_dims) + [edge_out_dim],
111 |                                 dropout_p=dropout_p, use_batchnorm=use_batchnorm, use_leaky_relu=use_leaky_relu)
112 | 
113 |     def forward(self, edge_feats = None):
114 | 
115 |         out_edge_feats = self.edge_mlp(edge_feats)
116 | 
117 |         return out_edge_feats
118 | 
119 | class MPGraph(nn.Module):
120 |     """
121 |     Main Model Class. Contains all the components of the model. It consists of of several networks:
122 |     Edge Encoder: MLP encodes initial edge embedding
123 |     Edge MLP: Updates edge embedding with Nodes i, j and Edge ij
124 |     Node MLP: Updates node embedding with Node i and Edge ij
125 |     """
126 | 
127 |     def __init__(self, model_params, time_aware_mp=False, use_leaky_relu=True):
128 |         super(MPGraph, self).__init__()
129 | 
130 |         self.model_params = model_params
131 |         if use_leaky_relu:
132 |             self.relu = nn.LeakyReLU()
133 |         else:
134 |             self.relu = nn.ReLU()
135 | 
136 |         # Define Encoder Network
137 |         encoder_feats_dict = model_params['encoder_feats_dict']
138 |         self.encoder = MLPGraphIndependent(edge_in_dim=encoder_feats_dict['edge_in_dim'],
139 |                                            edge_fc_dims=encoder_feats_dict['edge_fc_dims'],
140 |                                            edge_out_dim=encoder_feats_dict['edge_out_dim'],
141 |                                            use_leaky_relu=use_leaky_relu)
142 | 
143 |         # Define the 'Core' message passing network (i.e. node and edge update models)
144 |         self.MPNet = self._build_core_MPNet(model_params=model_params, encoder_feats_dict=encoder_feats_dict, time_aware_mp=time_aware_mp, use_leaky_relu=use_leaky_relu)
145 |         self.num_mp_steps = model_params['num_mp_steps']
146 | 
147 |     def _build_core_MPNet(self, model_params, encoder_feats_dict, time_aware_mp, use_leaky_relu=None):
148 |         # Define an aggregation operator for nodes to 'gather' messages from incident edges
149 |         node_agg_fn = model_params['node_agg_fn']
150 |         assert node_agg_fn.lower() in ('mean', 'max', 'sum'), "node_agg_fn can only be 'max', 'mean' or 'sum'."
151 | 
152 |         if node_agg_fn == 'mean':
153 |             node_agg_fn = lambda out, row, x_size: scatter_mean(out, row, dim=0, dim_size=x_size) # out=source tensor, row=index to scatter, dim_size=same size as num nodes = x.0
154 | 
155 |         elif node_agg_fn == 'max':
156 |             node_agg_fn = lambda out, row, x_size: scatter_max(out, row, dim=0, dim_size=x_size)[0]
157 | 
158 |         elif node_agg_fn == 'sum':
159 |             node_agg_fn = lambda out, row, x_size: scatter_add(out, row, dim=0, dim_size=x_size)
160 | 
161 |         # Define all MLPs involved in the graph network
162 |         self.reattach_initial_nodes = model_params['reattach_initial_nodes']
163 |         self.reattach_initial_edges = model_params['reattach_initial_edges']
164 | 
165 |         edge_factor = 2 if self.reattach_initial_edges else 1
166 |         node_factor = 2 if self.reattach_initial_nodes else 1
167 | 
168 |         edge_model_in_dim = node_factor * 2 * encoder_feats_dict['node_out_dim'] + edge_factor * encoder_feats_dict[
169 |             'edge_out_dim'] # h_i, h_j, h_ij
170 |         node_model_in_dim = node_factor * encoder_feats_dict['node_out_dim'] + encoder_feats_dict['edge_out_dim']
171 | 
172 |         # Define all MLPs used within the MPN
173 |         edge_model_feats_dict = model_params['edge_model_feats_dict']
174 |         node_model_feats_dict = model_params['node_model_feats_dict']
175 | 
176 |         edge_mlp = MLP(input_dim=edge_model_in_dim,
177 |                        fc_dims=edge_model_feats_dict['fc_dims'],
178 |                        dropout_p=edge_model_feats_dict['dropout_p'],
179 |                        use_batchnorm=edge_model_feats_dict['use_batchnorm'],
180 |                        use_leaky_relu=use_leaky_relu)
181 | 
182 |         if time_aware_mp:
183 | 
184 |             node_mlp = MLP(input_dim=2 * encoder_feats_dict['node_out_dim'],
185 |                            fc_dims=node_model_feats_dict['fc_dims'],
186 |                            dropout_p=node_model_feats_dict['dropout_p'],
187 |                            use_batchnorm=node_model_feats_dict['use_batchnorm'],
188 |                            use_leaky_relu=use_leaky_relu)
189 | 
190 |             flow_in_mlp = MLP(input_dim=node_model_in_dim,
191 |                               fc_dims=node_model_feats_dict['fc_dims'],
192 |                               dropout_p=None,
193 |                               use_batchnorm=False,
194 |                               use_leaky_relu=use_leaky_relu)
195 | 
196 |             flow_out_mlp = MLP(input_dim=node_model_in_dim,
197 |                                fc_dims=node_model_feats_dict['fc_dims'],
198 |                                dropout_p=None,
199 |                                use_batchnorm=False,
200 |                                use_leaky_relu=use_leaky_relu)
201 | 
202 |             # Define all MLPs used within the MPN
203 |             return MetaLayer(edge_model=EdgeModel(edge_mlp=edge_mlp),
204 |                              node_model=TimeAwareNodeModel(flow_in_mlp=flow_in_mlp, flow_out_mlp=flow_out_mlp,
205 |                                                            node_mlp=node_mlp, node_agg_fn=node_agg_fn))
206 | 
207 |         else:
208 | 
209 |             node_mlp = MLP(input_dim=node_model_in_dim,
210 |                            fc_dims=node_model_feats_dict['fc_dims'],
211 |                            dropout_p=node_model_feats_dict['dropout_p'],
212 |                            use_batchnorm=node_model_feats_dict['use_batchnorm'])
213 | 
214 |             # Define all MLPs used within the MPN
215 |             return MetaLayer(edge_model=EdgeModel(edge_mlp=edge_mlp),
216 |                              node_model=NodeModel(node_mlp=node_mlp, node_agg_fn=node_agg_fn))
217 | 
218 | 
219 | 
220 |     def forward(self, data):
221 |         """
222 |         Provides a fractional solution to the data association problem.
223 |         First, node and edge features are independently encoded by the encoder network. Then, they are iteratively
224 |         'combined' for a fixed number of steps via the Message Passing Network (self.MPNet).
225 |         """
226 | 
227 |         x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
228 | 
229 |         # Encoding features step
230 |         latent_edge_feats = self.encoder(edge_feats=edge_attr)
231 |         latent_node_feats = self.relu(x)
232 |         #latent_node_feats = x
233 |         initial_edge_feats = latent_edge_feats
234 |         initial_node_feats = latent_node_feats
235 | 
236 |         outputs = []
237 | 
238 |         # During training, the feature vectors that the MPNetwork outputs for the  last self.num_class_steps message
239 |         # passing steps are classified in order to compute the loss.
240 |         for step in range(1, self.num_mp_steps + 1):
241 | 
242 |             # Reattach the initially encoded embeddings before the update
243 |             if self.reattach_initial_edges:
244 |                 latent_edge_feats = torch.cat((initial_edge_feats, latent_edge_feats), dim=1)
245 |             if self.reattach_initial_nodes:
246 |                 latent_node_feats = torch.cat((initial_node_feats, latent_node_feats), dim=1)
247 | 
248 |             # Message Passing Step
249 |             latent_node_feats, latent_edge_feats = self.MPNet(latent_node_feats, edge_index, latent_edge_feats)
250 | 
251 |             if step > 1: # For classifying edges at multiple message passing step times
252 |                 outputs.append(latent_edge_feats)
253 | 
254 |         return outputs#[latent_edge_feats]
255 | 


--------------------------------------------------------------------------------
/Detection/data/mapper_heads.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | import copy
  7 | import logging
  8 | import numpy as np
  9 | from typing import List, Optional, Union
 10 | import torch
 11 | import cv2
 12 | import h5py
 13 | 
 14 | from detectron2.config import configurable
 15 | from detectron2.structures import BoxMode
 16 | from detectron2.structures import polygons_to_bitmask
 17 | from detectron2.utils.visualizer import GenericMask
 18 | from detectron2.data import DatasetMapper
 19 | 
 20 | from BlenderProc.utils import binvox_rw
 21 | 
 22 | import detectron2.data.detection_utils as utils
 23 | import detectron2.data.transforms as T
 24 | 
 25 | 
 26 | sys.path.append('..') #Hack add ROOT DIR
 27 | from Detection.utils.train_utils import crop_segmask, get_voxel
 28 | 
 29 | __all__ = ["VoxNocsMapper", "VoxMapper"]
 30 | 
 31 | class VoxNocsMapper:
 32 |     '''
 33 |     Dataset mapper class to handle MOTFront data with a Detectron2 network training pipeline with Voxel and NOCs head
 34 |     '''
 35 | 
 36 |     def __init__(self, cfg, use_instance_mask: bool = False, instance_mask_format: str = "polygon",
 37 |             recompute_boxes: bool = False, is_train=True, dataset_names=None,):
 38 | 
 39 |         if recompute_boxes:
 40 |             assert use_instance_mask, "recompute_boxes requires instance masks"
 41 |         self.is_train = is_train
 42 |         self.augmentations = None  # list with augmentations NOT IMPLEMENTED YET
 43 |         self.cfg = cfg
 44 |         self.image_format = cfg.INPUT.FORMAT
 45 |         self.use_instance_mask = use_instance_mask
 46 |         self.instance_mask_format = instance_mask_format
 47 |         self.recompute_boxes = recompute_boxes
 48 |         # fmt: on
 49 |         logger = logging.getLogger(__name__)
 50 |         mode = "training" if is_train else "inference"
 51 |         logger.info(f"[DatasetMapper] Augmentations used in {mode}: {self.augmentations}")
 52 | 
 53 |         self.dataset_names = dataset_names
 54 |         self.voxel_on = cfg.MODEL.VOXEL_ON
 55 |         self.nocs_on = cfg.MODEL.NOCS_ON
 56 | 
 57 |     def _transform_annotations(self, dataset_dict, transforms, image_shape):
 58 |         annos = [
 59 |             utils.transform_instance_annotations(
 60 |                 obj, transforms, image_shape, keypoint_hflip_indices=None
 61 |             )
 62 |             for obj in dataset_dict['annotations']
 63 |         ]
 64 |         instances = utils.annotations_to_instances(
 65 |             annos, image_shape, mask_format=self.instance_mask_format
 66 |         )
 67 | 
 68 |         if self.recompute_boxes:
 69 |             instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
 70 |         dataset_dict["instances"] = utils.filter_empty_instances(instances)
 71 | 
 72 |     def __call__(self, dataset_dict):
 73 |         dataset_dict = copy.deepcopy(dataset_dict)
 74 | 
 75 |         image = utils.read_image(dataset_dict["file_name"], format=self.image_format) # H x W x C
 76 |         utils.check_image_size(dataset_dict, image)
 77 | 
 78 |         sem_seg_gt = None
 79 |         self.augmentations = T.AugmentationList(utils.build_augmentation(self.cfg, self.is_train))
 80 |         aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
 81 |         transforms = self.augmentations(aug_input)
 82 |         image, sem_seg_gt = aug_input.image, aug_input.sem_seg
 83 |         image_shape = image.shape[:2]  # h, w
 84 | 
 85 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # C x H x W
 86 | 
 87 |         if not self.is_train:
 88 |             pass
 89 |             #dataset_dict.pop("annotations", None)
 90 |             #return dataset_dict
 91 | 
 92 |         nocs_map = self.get_nocs(dataset_dict["nocs_map"])
 93 |         depth_map = self.load_hdf5(dataset_dict["depth_map"])
 94 | 
 95 |         dataset_dict["depth_map"] = depth_map
 96 |         dataset_dict["nocs_map"] = nocs_map
 97 | 
 98 |         for anno in dataset_dict['annotations']:
 99 |             voxel = get_voxel(anno["voxel"], anno["scale"])
100 |             nocs_obj = crop_segmask(nocs_map, anno['bbox'], anno['segmentation'])
101 |             depth_obj = self.crop_depth(depth_map, anno['bbox'], anno['segmentation'])
102 | 
103 |             anno["voxel"] = voxel
104 |             anno["nocs"] = nocs_obj
105 |             anno["depth"] = depth_obj
106 | 
107 |         if "annotations" in dataset_dict:
108 |             self._transform_annotations(dataset_dict, transforms, image_shape)
109 | 
110 |             if self.voxel_on:
111 |                 count = 0
112 |                 for anno in dataset_dict['annotations']:
113 |                     if count == 0:
114 |                         gt_voxels = torch.unsqueeze(anno['voxel'], 0)
115 |                     else:
116 |                         gt_voxel = torch.unsqueeze(anno['voxel'], 0)
117 |                         gt_voxels = torch.cat((gt_voxels, gt_voxel), 0)
118 |                     count += 1
119 | 
120 |                 dataset_dict['instances'].set('gt_voxels', gt_voxels)
121 | 
122 |             if self.nocs_on:
123 |                 max_height, max_width = self.get_max_dims(dataset_dict['annotations'])
124 |                 count = 0
125 |                 for anno in dataset_dict['annotations']:
126 |                     width = anno['nocs'].shape[1]
127 |                     height = anno['nocs'].shape[0]
128 |                     p2d = (0, 0, 0, max_width - width, 0, max_height - height)  # pad image to right
129 |                     if count == 0:
130 |                         gt_nocs = torch.unsqueeze(anno['nocs'], 0)  # 1 x H x W x 3
131 |                         gt_nocs = F.pad(gt_nocs, p2d, "constant", 300)  # 300 not a pixel value # 1 x maxH x maxW x 3
132 |                     else:
133 |                         gt_noc = torch.unsqueeze(anno['nocs'], 0)
134 |                         gt_noc = F.pad(gt_noc, p2d, "constant", 300)  # 300 not a pixel value
135 |                         gt_nocs = torch.cat((gt_nocs, gt_noc), 0)
136 |                     count += 1
137 | 
138 |                 dataset_dict['instances'].set('gt_nocs', gt_nocs)
139 | 
140 |         return dataset_dict
141 | 
142 | 
143 |     @staticmethod
144 |     def get_max_dims(dset):
145 |         '''
146 |         padding image crops
147 |         '''
148 | 
149 |         max_height = 0
150 |         max_width = 0
151 | 
152 |         for anno in dset:
153 |             height, width = anno['nocs'].shape[0], anno['nocs'].shape[1]
154 | 
155 |             if height >= max_height:
156 |                 max_height = height
157 | 
158 |             if width >= max_width:
159 |                 max_width = width
160 | 
161 |         return max_height, max_width
162 | 
163 |     @staticmethod
164 |     def get_nocs(nocs_path):
165 | 
166 |         nocs = cv2.imread(nocs_path, -1) #BGRA
167 |         nocs = nocs[:,:,:3]
168 |         nocs = np.array(nocs[:, :, ::-1], dtype=np.float32) # RGB
169 | 
170 |         return nocs
171 | 
172 |     @staticmethod
173 |     def crop_depth(depth_img, bbox, segmap):
174 | 
175 |         abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32)
176 | 
177 |         gm = GenericMask(segmap, 240, 320)
178 |         bin_mask = gm.polygons_to_mask(segmap)
179 |         binary_mask = bin_mask[:, :]
180 |         crop_im = np.multiply(depth_img, binary_mask)
181 |         cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2])])
182 | 
183 |         return torch.from_numpy(cropped_im).to(torch.float32)
184 | 
185 |     @staticmethod
186 |     def load_hdf5(path):
187 |         with h5py.File(path, 'r') as data:
188 |             for key in data.keys():
189 |                 if key == 'depth':
190 |                     depth = np.array(data[key])
191 | 
192 |         return depth
193 | 
194 | 
195 | class VoxMapper:
196 |     '''
197 |         Dataset mapper class to handle MOTFront data with a Detectron2 network training pipeline with Voxel head
198 |     '''
199 | 
200 |     def __init__(
201 |             self,
202 |             cfg,
203 |             use_instance_mask: bool = False,
204 |             instance_mask_format: str = "polygon",
205 |             recompute_boxes: bool = False,
206 |             is_train=True,
207 |             dataset_names=None,
208 |     ):
209 |         if recompute_boxes:
210 |             assert use_instance_mask, "recompute_boxes requires instance masks"
211 |         # fmt: off
212 |         self.is_train = is_train
213 |         self.augmentations = None  # list with augmentations NOT IMPLEMENTED YET
214 |         self.cfg = cfg
215 |         self.image_format = cfg.INPUT.FORMAT
216 |         self.use_instance_mask = use_instance_mask
217 |         self.instance_mask_format = instance_mask_format
218 |         self.recompute_boxes = recompute_boxes
219 |         # fmt: on
220 |         logger = logging.getLogger(__name__)
221 |         mode = "training" if is_train else "inference"
222 |         logger.info(f"[DatasetMapper] Augmentations used in {mode}: {self.augmentations}")
223 | 
224 |         self.dataset_names = dataset_names
225 |         self.voxel_on = cfg.MODEL.VOXEL_ON
226 |         self.nocs_on = cfg.MODEL.NOCS_ON
227 | 
228 | 
229 |     def _transform_annotations(self, dataset_dict, transforms, image_shape):
230 | 
231 | 
232 |         annos = [
233 |             utils.transform_instance_annotations(
234 |                 obj, transforms, image_shape, keypoint_hflip_indices=None
235 |             )
236 |             for obj in dataset_dict['annotations']
237 |         ]
238 |         instances = utils.annotations_to_instances(
239 |             annos, image_shape, mask_format=self.instance_mask_format
240 |         )
241 | 
242 |         if self.recompute_boxes:
243 |             instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
244 |         dataset_dict["instances"] = utils.filter_empty_instances(instances)
245 | 
246 |     def __call__(self, dataset_dict):
247 |         dataset_dict = copy.deepcopy(dataset_dict)
248 | 
249 |         image = utils.read_image(dataset_dict["file_name"], format=self.image_format) # H x W x C
250 |         utils.check_image_size(dataset_dict, image)
251 | 
252 |         sem_seg_gt = None
253 |         self.augmentations = T.AugmentationList(utils.build_augmentation(self.cfg, self.is_train))
254 |         aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
255 |         transforms = self.augmentations(aug_input)
256 |         image, sem_seg_gt = aug_input.image, aug_input.sem_seg
257 | 
258 |         image_shape = image.shape[:2]  # h, w
259 | 
260 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # C x H x W
261 | 
262 |         if not self.is_train:
263 |             pass
264 |             #dataset_dict.pop("annotations", None)
265 |             #return dataset_dict
266 | 
267 |         for anno in dataset_dict['annotations']:
268 |             voxel = anno["voxel"]
269 |             anno["voxel"] = voxel
270 | 
271 | 
272 |         if "annotations" in dataset_dict:
273 |             self._transform_annotations(dataset_dict, transforms, image_shape)
274 | 
275 |             if self.voxel_on:
276 |                 count = 0
277 |                 for anno in dataset_dict['annotations']:
278 |                     if count == 0:
279 |                         gt_voxels = torch.unsqueeze(anno['voxel'], 0)
280 |                     else:
281 |                         gt_voxel = torch.unsqueeze(anno['voxel'], 0)
282 |                         gt_voxels = torch.cat((gt_voxels, gt_voxel), 0)
283 |                     count += 1
284 | 
285 |                 dataset_dict['instances'].set('gt_voxels', gt_voxels)
286 | 
287 |         return dataset_dict
288 | 
289 | 
290 |     @staticmethod
291 |     def get_max_dims(dset):
292 |         '''
293 |         padding image crops
294 |         '''
295 | 
296 |         max_height = 0
297 |         max_width = 0
298 | 
299 |         for anno in dset:
300 |             height, width = anno['nocs'].shape[0], anno['nocs'].shape[1]
301 | 
302 |             if height >= max_height:
303 |                 max_height = height
304 | 
305 |             if width >= max_width:
306 |                 max_width = width
307 | 
308 |         return max_height, max_width


--------------------------------------------------------------------------------
/Detection/evaluator/_mask.pyx:
--------------------------------------------------------------------------------
  1 | import sys
  2 | PYTHON_VERSION = sys.version_info[0]
  3 | 
  4 | # import both Python-level and C-level symbols of Numpy
  5 | # the API uses Numpy to interface C and Python
  6 | import numpy as np
  7 | cimport numpy as np
  8 | from libc.stdlib cimport malloc, free
  9 | 
 10 | # intialized Numpy. must do.
 11 | np.import_array()
 12 | 
 13 | # import numpy C function
 14 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 15 | cdef extern from "numpy/arrayobject.h":
 16 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 17 | 
 18 | # Declare the prototype of the C functions in MaskApi.h
 19 | cdef extern from "maskApi.h":
 20 |     ctypedef unsigned int uint
 21 |     ctypedef unsigned long siz
 22 |     ctypedef unsigned char byte
 23 |     ctypedef double* BB
 24 |     ctypedef struct RLE:
 25 |         siz h,
 26 |         siz w,
 27 |         siz m,
 28 |         uint* cnts,
 29 |     void rlesInit( RLE **R, siz n )
 30 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 31 |     void rleDecode( const RLE *R, byte *mask, siz n )
 32 |     void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
 33 |     void rleArea( const RLE *R, siz n, uint *a )
 34 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 35 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 36 |     void rleToBbox( const RLE *R, BB bb, siz n )
 37 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 38 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 39 |     char* rleToString( const RLE *R )
 40 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 41 | 
 42 | # python class to wrap RLE array in C
 43 | # the class handles the memory allocation and deallocation
 44 | cdef class RLEs:
 45 |     cdef RLE *_R
 46 |     cdef siz _n
 47 | 
 48 |     def __cinit__(self, siz n =0):
 49 |         rlesInit(&self._R, n)
 50 |         self._n = n
 51 | 
 52 |     # free the RLE array here
 53 |     def __dealloc__(self):
 54 |         if self._R is not NULL:
 55 |             for i in range(self._n):
 56 |                 free(self._R[i].cnts)
 57 |             free(self._R)
 58 |     def __getattr__(self, key):
 59 |         if key == 'n':
 60 |             return self._n
 61 |         raise AttributeError(key)
 62 | 
 63 | # python class to wrap Mask array in C
 64 | # the class handles the memory allocation and deallocation
 65 | cdef class Masks:
 66 |     cdef byte *_mask
 67 |     cdef siz _h
 68 |     cdef siz _w
 69 |     cdef siz _n
 70 | 
 71 |     def __cinit__(self, h, w, n):
 72 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 73 |         self._h = h
 74 |         self._w = w
 75 |         self._n = n
 76 |     # def __dealloc__(self):
 77 |         # the memory management of _mask has been passed to np.ndarray
 78 |         # it doesn't need to be freed here
 79 | 
 80 |     # called when passing into np.array() and return an np.ndarray in column-major order
 81 |     def __array__(self):
 82 |         cdef np.npy_intp shape[1]
 83 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 84 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 85 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 86 |         # The _mask allocated by Masks is now handled by ndarray
 87 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
 88 |         return ndarray
 89 | 
 90 | # internal conversion from Python RLEs object to compressed RLE format
 91 | def _toString(RLEs Rs):
 92 |     cdef siz n = Rs.n
 93 |     cdef bytes py_string
 94 |     cdef char* c_string
 95 |     objs = []
 96 |     for i in range(n):
 97 |         c_string = rleToString( <RLE*> &Rs._R[i] )
 98 |         py_string = c_string
 99 |         objs.append({
100 |             'size': [Rs._R[i].h, Rs._R[i].w],
101 |             'counts': py_string
102 |         })
103 |         free(c_string)
104 |     return objs
105 | 
106 | # internal conversion from compressed RLE format to Python RLEs object
107 | def _frString(rleObjs):
108 |     cdef siz n = len(rleObjs)
109 |     Rs = RLEs(n)
110 |     cdef bytes py_string
111 |     cdef char* c_string
112 |     for i, obj in enumerate(rleObjs):
113 |         if PYTHON_VERSION == 2:
114 |             py_string = str(obj['counts']).encode('utf8')
115 |         elif PYTHON_VERSION == 3:
116 |             py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
117 |         else:
118 |             raise Exception('Python version must be 2 or 3')
119 |         c_string = py_string
120 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
121 |     return Rs
122 | 
123 | # encode mask to RLEs objects
124 | # list of RLE string can be generated by RLEs member function
125 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
126 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
127 |     cdef RLEs Rs = RLEs(n)
128 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
129 |     objs = _toString(Rs)
130 |     return objs
131 | 
132 | # decode mask from compressed list of RLE string or RLEs object
133 | def decode(rleObjs):
134 |     cdef RLEs Rs = _frString(rleObjs)
135 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
136 |     masks = Masks(h, w, n)
137 |     rleDecode(<RLE*>Rs._R, masks._mask, n);
138 |     return np.array(masks)
139 | 
140 | def merge(rleObjs, intersect=0):
141 |     cdef RLEs Rs = _frString(rleObjs)
142 |     cdef RLEs R = RLEs(1)
143 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
144 |     obj = _toString(R)[0]
145 |     return obj
146 | 
147 | def area(rleObjs):
148 |     cdef RLEs Rs = _frString(rleObjs)
149 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
150 |     rleArea(Rs._R, Rs._n, _a)
151 |     cdef np.npy_intp shape[1]
152 |     shape[0] = <np.npy_intp> Rs._n
153 |     a = np.array((Rs._n, ), dtype=np.uint8)
154 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
155 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
156 |     return a
157 | 
158 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
159 | def iou( dt, gt, pyiscrowd ):
160 |     def _preproc(objs):
161 |         if len(objs) == 0:
162 |             return objs
163 |         if type(objs) == np.ndarray:
164 |             if len(objs.shape) == 1:
165 |                 objs = objs.reshape((objs[0], 1))
166 |             # check if it's Nx4 bbox
167 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
168 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
169 |             objs = objs.astype(np.double)
170 |         elif type(objs) == list:
171 |             # check if list is in box format and convert it to np.ndarray
172 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
173 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
174 |             if isbox:
175 |                 objs = np.array(objs, dtype=np.double)
176 |                 if len(objs.shape) == 1:
177 |                     objs = objs.reshape((1,objs.shape[0]))
178 |             elif isrle:
179 |                 objs = _frString(objs)
180 |             else:
181 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
182 |         else:
183 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
184 |         return objs
185 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
186 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
187 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
188 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
189 |     def _len(obj):
190 |         cdef siz N = 0
191 |         if type(obj) == RLEs:
192 |             N = obj.n
193 |         elif len(obj)==0:
194 |             pass
195 |         elif type(obj) == np.ndarray:
196 |             N = obj.shape[0]
197 |         return N
198 |     # convert iscrowd to numpy array
199 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
200 |     # simple type checking
201 |     cdef siz m, n
202 |     dt = _preproc(dt)
203 |     gt = _preproc(gt)
204 |     m = _len(dt)
205 |     n = _len(gt)
206 |     if m == 0 or n == 0:
207 |         return []
208 |     if not type(dt) == type(gt):
209 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
210 | 
211 |     # define local variables
212 |     cdef double* _iou = <double*> 0
213 |     cdef np.npy_intp shape[1]
214 |     # check type and assign iou function
215 |     if type(dt) == RLEs:
216 |         _iouFun = _rleIou
217 |     elif type(dt) == np.ndarray:
218 |         _iouFun = _bbIou
219 |     else:
220 |         raise Exception('input data type not allowed.')
221 |     _iou = <double*> malloc(m*n* sizeof(double))
222 |     iou = np.zeros((m*n, ), dtype=np.double)
223 |     shape[0] = <np.npy_intp> m*n
224 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
225 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
226 |     _iouFun(dt, gt, iscrowd, m, n, iou)
227 |     return iou.reshape((m,n), order='F')
228 | 
229 | def toBbox( rleObjs ):
230 |     cdef RLEs Rs = _frString(rleObjs)
231 |     cdef siz n = Rs.n
232 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
233 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
234 |     cdef np.npy_intp shape[1]
235 |     shape[0] = <np.npy_intp> 4*n
236 |     bb = np.array((1,4*n), dtype=np.double)
237 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
238 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
239 |     return bb
240 | 
241 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
242 |     cdef siz n = bb.shape[0]
243 |     Rs = RLEs(n)
244 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
245 |     objs = _toString(Rs)
246 |     return objs
247 | 
248 | def frPoly( poly, siz h, siz w ):
249 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
250 |     n = len(poly)
251 |     Rs = RLEs(n)
252 |     for i, p in enumerate(poly):
253 |         np_poly = np.array(p, dtype=np.double, order='F')
254 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
255 |     objs = _toString(Rs)
256 |     return objs
257 | 
258 | def frUncompressedRLE(ucRles, siz h, siz w):
259 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
260 |     cdef RLE R
261 |     cdef uint *data
262 |     n = len(ucRles)
263 |     objs = []
264 |     for i in range(n):
265 |         Rs = RLEs(1)
266 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
267 |         # time for malloc can be saved here but it's fine
268 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
269 |         for j in range(len(cnts)):
270 |             data[j] = <uint> cnts[j]
271 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
272 |         Rs._R[0] = R
273 |         objs.append(_toString(Rs)[0])
274 |     return objs
275 | 
276 | def frPyObjects(pyobj, h, w):
277 |     # encode rle from a list of python objects
278 |     if type(pyobj) == np.ndarray:
279 |         objs = frBbox(pyobj, h, w)
280 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
281 |         objs = frBbox(pyobj, h, w)
282 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
283 |         objs = frPoly(pyobj, h, w)
284 |     elif type(pyobj) == list and type(pyobj[0]) == dict \
285 |         and 'counts' in pyobj[0] and 'size' in pyobj[0]:
286 |         objs = frUncompressedRLE(pyobj, h, w)
287 |     # encode rle from single python object
288 |     elif type(pyobj) == list and len(pyobj) == 4:
289 |         objs = frBbox([pyobj], h, w)[0]
290 |     elif type(pyobj) == list and len(pyobj) > 4:
291 |         objs = frPoly([pyobj], h, w)[0]
292 |     elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
293 |         objs = frUncompressedRLE([pyobj], h, w)[0]
294 |     else:
295 |         raise Exception('input type is not supported.')
296 |     return objs


--------------------------------------------------------------------------------
/Tracking/utils/vis_utils.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import torch
  5 | import mathutils
  6 | 
  7 | from Tracking.utils.train_utils import convert_voxel_to_pc
  8 | import open3d as o3d
  9 | from scipy.spatial.transform import Rotation as R
 10 | from scipy.spatial.transform import Slerp
 11 | from scipy.ndimage import gaussian_filter1d
 12 | from scipy import interpolate
 13 | 
 14 | 
 15 | def box2minmax(corner_pt_box):
 16 |     '''
 17 |     Box from 8x3 to minmax format
 18 |     Only works properly for axis aligned boxes
 19 |     '''
 20 |     xyz_min = torch.min(corner_pt_box, dim=0).values
 21 |     xyz_max = torch.max(corner_pt_box, dim=0).values
 22 |     box = np.concatenate((xyz_min.numpy(), xyz_max.numpy()))
 23 |     return box
 24 | 
 25 | def box2minmax_axaligned(corner_pt_box):
 26 |     '''
 27 |     Box from 8x3 to minmax format
 28 |     For non-axis aligned boxes, first enclose with axis-aligned box, then calc minmax
 29 |     '''
 30 | 
 31 |     bbox3d_obj = o3d.geometry.AxisAlignedBoundingBox()
 32 |     bbox_3d = bbox3d_obj.create_from_points(o3d.utility.Vector3dVector(corner_pt_box))
 33 |     corner_pt_box = torch.from_numpy(np.array(bbox_3d.get_box_points()))
 34 |     xyz_min = torch.min(corner_pt_box, dim=0).values
 35 |     xyz_max = torch.max(corner_pt_box, dim=0).values
 36 |     box = np.concatenate((xyz_min.numpy(), xyz_max.numpy()))
 37 |     return box
 38 | 
 39 | def cad2world_mat(rot, loc, scale, with_scale=True):
 40 |     '''
 41 |     Return cad2world matrix from annotations
 42 |     '''
 43 |     cad2world = torch.eye(4)
 44 |     scale_mat = torch.diag(torch.tensor([scale, scale, scale]))
 45 |     if with_scale:
 46 |         cad2world[:3, :3] = scale_mat @ euler_to_rot(rot, fmt='torch')
 47 |     else:
 48 |         cad2world[:3, :3] = euler_to_rot(rot, fmt='torch')
 49 | 
 50 |     cad2world[:3, 3] = loc
 51 |     return cad2world
 52 | 
 53 | def euler_to_rot(euler_rot, fmt='torch', constraint=False):
 54 |     '''
 55 |     Euler to 3x3 Rotation Matrix transform
 56 |     '''
 57 | 
 58 |     if constraint:
 59 |         euler_rot = torch.tensor([0, 0, euler_rot[2]])
 60 |     euler = mathutils.Euler(euler_rot)
 61 |     rot = np.array(euler.to_matrix())
 62 | 
 63 |     if fmt == 'torch':
 64 |         return torch.from_numpy(rot)
 65 |     else:
 66 |         return rot
 67 | 
 68 | 
 69 | def visualize_graph(G, color):
 70 |     '''
 71 |     Visualise Graph data connectivity
 72 |     '''
 73 |     plt.figure(figsize=(7,7))
 74 |     plt.xticks([])
 75 |     plt.yticks([])
 76 |     nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
 77 |                      node_color=color, cmap="Set2")
 78 |     plt.show()
 79 | 
 80 | 
 81 | 
 82 | def fuse_pose(trajectories, seq_len=None):
 83 |     '''
 84 |     Pose fusion via slurp and spline interpolation
 85 |     '''
 86 | 
 87 |     def get_scale(m):
 88 |         if type(m) == torch.Tensor:
 89 |             return m.norm(dim=0)
 90 |         return np.linalg.norm(m, axis=0)
 91 | 
 92 |     def fill_last(fill_list, exp_dim=False):
 93 |         # fill with last value
 94 |         for t_idx, tt in enumerate(fill_list):
 95 |             if tt is None:
 96 |                 for i in range(t_idx - 1, -1, -1):
 97 |                     if fill_list[i] is not None:
 98 |                         if exp_dim:
 99 |                             fill_list[t_idx] = np.expand_dims(fill_list[i], axis=0)
100 |                         else:
101 |                             fill_list[t_idx] = fill_list[i]
102 |                         break
103 |         return fill_list
104 | 
105 |     def fill_last_t(fill_list, exp_dim=True):
106 |         # fill with last value
107 |         for t_idx, tt in enumerate(fill_list):
108 |             if tt.sum() == 0:
109 |                 for i in range(t_idx - 1, -1, -1):
110 |                     if fill_list[i].sum() != 0:
111 |                         if exp_dim:
112 |                             fill_list[t_idx] = np.expand_dims(np.squeeze(fill_list[i]), axis=0)
113 |                         else:
114 |                             fill_list[t_idx] = fill_list[i]
115 |                         break
116 |             else:
117 |                 fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0)
118 | 
119 |         for t_idx, tt in enumerate(fill_list):
120 |             if len(tt.shape) == 1:
121 |                 fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0)
122 | 
123 |         return fill_list
124 | 
125 |     def unscale_mat(cad2world):
126 | 
127 |         c2w_cpy = torch.clone(cad2world)
128 |         rot = cad2world[:3, :3]
129 |         scale = get_scale(rot)
130 |         unscaled_rot = rot / scale
131 |         c2w_cpy[:3, :3] = unscaled_rot
132 |         return c2w_cpy
133 | 
134 |     new_trajectories = []
135 | 
136 |     times = np.arange(seq_len)
137 |     for traj in trajectories:
138 |         key_times = []
139 |         key_trans = []
140 |         key_rots = []
141 |         t_trans = [np.zeros(3) for i in range(seq_len)]
142 |         t_vox = [None for i in range(seq_len)]
143 |         t_box = [None for i in range(seq_len)]
144 |         t_id = [None for i in range(seq_len)]
145 |         t_scale = [None for i in range(seq_len)]
146 |         for pred in traj:
147 |             key_rots.append(torch.unsqueeze(unscale_mat(pred['obj']['cad2world'][:3, :3]), dim=0))
148 |             key_trans.append(torch.unsqueeze(pred['obj']['cad2world'][:3, 3], dim=0))
149 |             key_times.append(pred['scan_idx'])
150 |             t_trans[pred['scan_idx']] = pred['obj']['cad2world'][:3, 3].numpy()
151 |             t_vox[pred['scan_idx']] = pred['obj']['voxel']
152 |             t_id[pred['scan_idx']] = pred['obj']['obj_idx']
153 |             t_box[pred['scan_idx']] = pred['obj']['compl_box']
154 |             t_scale[pred['scan_idx']] = get_scale(pred['obj']['cad2world'][:3, :3])
155 | 
156 |         times = np.linspace(key_times[0], key_times[-1], num=key_times[-1]-key_times[0]+1).astype(np.int)
157 |         traj_rots = torch.cat(key_rots, dim=0).numpy()
158 |         key_trans = torch.cat(key_trans, dim=0).numpy()
159 | 
160 |         t_trans = np.concatenate(fill_last_t(t_trans, exp_dim=True), axis=0)
161 |         t_trans[:, 0] = gaussian_filter1d(t_trans[:, 0], 3)
162 |         t_trans[:, 1] = gaussian_filter1d(t_trans[:, 1], 3)
163 |         t_trans[:, 2] = gaussian_filter1d(t_trans[:, 2], 3)
164 |         t_vox = fill_last(t_vox)
165 |         t_id = fill_last(t_id)
166 |         t_box = fill_last(t_box)
167 |         t_scale = fill_last(t_scale)
168 | 
169 |         r = R.from_matrix(traj_rots)
170 |         slerp = Slerp(key_times, r)
171 |         interp_rots = slerp(times)
172 |         interp_rotmat = interp_rots.as_matrix()
173 |         euler_rots = interp_rots.as_euler('xyz')
174 |         euler_rots[:,-1] = gaussian_filter1d(euler_rots[:,-1], 3) #3 = sigma = standard deviation
175 |         euler_rots[:, -1] = np.clip(euler_rots[:,-1], euler_rots[0,-1] - (0.2 * euler_rots[0,-1]), euler_rots[0,-1] + (0.2 * euler_rots[0,-1]))
176 |         r_e = R.from_euler('xyz', euler_rots, degrees=False)
177 |         interp_rotmat = r_e.as_matrix()
178 | 
179 | 
180 | 
181 |         #test = np.diag(t_scale[0]) @ interp_rotmat[0,:,:]
182 |         constraint_flip = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
183 |         new_traj = []
184 |         for t in times:
185 |             t_dict = dict()
186 |             t_dict['scan_idx'] = t
187 |             t_dict['obj'] = dict()
188 |             t_dict['obj']['cad2world'] = np.identity(4)
189 |             t_dict['obj']['cad2world'][:3, :3] = (np.diag(t_scale[t]) @ interp_rotmat[t-key_times[0],:,:])
190 |             t_dict['obj']['cad2world'][:3, 3] = t_trans[t]
191 |             t_dict['obj']['voxel'] = t_vox[t]
192 |             t_dict['obj']['obj_idx'] = t_id[t]
193 |             t_dict['obj']['compl_box'] = t_box[t]
194 |             new_traj.append(t_dict)
195 | 
196 |         new_trajectories.append(new_traj)
197 | 
198 |     return new_trajectories
199 | 
200 | 
201 | def fuse_pose_F2F(trajectories, seq_len=125, constraint=True):
202 |     '''
203 |     Pose fusion via slurp and spline interpolation
204 |     '''
205 | 
206 |     def get_scale(m):
207 |         if type(m) == torch.Tensor:
208 |             return m.norm(dim=0)
209 |         return np.linalg.norm(m, axis=0)
210 | 
211 |     def fill_last(fill_list, exp_dim=False):
212 |         # fill with last value
213 |         for t_idx, tt in enumerate(fill_list):
214 |             if tt is None:
215 |                 for i in range(t_idx - 1, -1, -1):
216 |                     if fill_list[i] is not None:
217 |                         if exp_dim:
218 |                             fill_list[t_idx] = np.expand_dims(fill_list[i], axis=0)
219 |                         else:
220 |                             fill_list[t_idx] = fill_list[i]
221 |                         break
222 |         return fill_list
223 | 
224 |     def fill_last_t(fill_list, exp_dim=True):
225 |         # fill with last value
226 |         for t_idx, tt in enumerate(fill_list):
227 |             if tt.sum() == 0:
228 |                 for i in range(t_idx - 1, -1, -1):
229 |                     if fill_list[i].sum() != 0:
230 |                         if exp_dim:
231 |                             fill_list[t_idx] = np.expand_dims(np.squeeze(fill_list[i]), axis=0)
232 |                         else:
233 |                             fill_list[t_idx] = fill_list[i]
234 |                         break
235 |             else:
236 |                 fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0)
237 | 
238 |         for t_idx, tt in enumerate(fill_list):
239 |             if len(tt.shape) == 1:
240 |                 fill_list[t_idx] = np.expand_dims(fill_list[t_idx], axis=0)
241 | 
242 |         return fill_list
243 | 
244 |     def unscale_mat(cad2world):
245 | 
246 |         c2w_cpy = np.copy(cad2world)
247 |         rot = cad2world[:3, :3]
248 |         scale = get_scale(rot)
249 |         unscaled_rot = rot / scale
250 |         c2w_cpy[:3, :3] = unscaled_rot
251 |         return c2w_cpy
252 | 
253 |     new_trajectories = []
254 | 
255 |     times = np.arange(seq_len)
256 |     for traj in trajectories:
257 |         key_times = []
258 |         key_trans = []
259 |         key_rots = []
260 |         t_trans = [np.zeros(3) for i in range(seq_len)]
261 |         t_vox = [None for i in range(seq_len)]
262 |         t_box = [None for i in range(seq_len)]
263 |         t_id = [None for i in range(seq_len)]
264 |         t_scale = [None for i in range(seq_len)]
265 |         for pred in traj:
266 |             key_rots.append(np.expand_dims(unscale_mat(pred['obj']['cad2world'][:3, :3]), axis=0))
267 |             key_trans.append(np.expand_dims(pred['obj']['cad2world'][:3, 3], axis=0))
268 |             key_times.append(pred['scan_idx'])
269 |             t_trans[pred['scan_idx']] = pred['obj']['cad2world'][:3, 3]#.numpy()
270 |             t_vox[pred['scan_idx']] = pred['obj']['obj_pc']
271 |             t_id[pred['scan_idx']] = pred['obj']['obj_idx']
272 |             t_box[pred['scan_idx']] = pred['obj']['obj_box']
273 |             t_scale[pred['scan_idx']] = get_scale(pred['obj']['cad2world'][:3, :3])
274 | 
275 |         times = np.linspace(key_times[0], key_times[-1], num=key_times[-1]-key_times[0]+1).astype(np.int)
276 |         traj_rots = np.concatenate(key_rots, axis=0)#.numpy()
277 |         #key_trans = torch.cat(key_trans, dim=0)#.numpy()
278 | 
279 |         t_trans = np.concatenate(fill_last_t(t_trans, exp_dim=True), axis=0)
280 |         t_trans[:, 0] = gaussian_filter1d(t_trans[:, 0], 3)
281 |         t_trans[:, 1] = gaussian_filter1d(t_trans[:, 1], 3)
282 |         t_trans[:, 2] = gaussian_filter1d(t_trans[:, 2], 3)
283 |         t_vox = fill_last(t_vox)
284 |         t_id = fill_last(t_id)
285 |         t_box = fill_last(t_box)
286 |         t_scale = fill_last(t_scale)
287 | 
288 |         r = R.from_matrix(traj_rots)
289 |         slerp = Slerp(key_times, r)
290 |         interp_rots = slerp(times)
291 |         interp_rotmat = interp_rots.as_matrix()
292 |         euler_rots = interp_rots.as_euler('xyz')
293 |         euler_rots[:,-1] = gaussian_filter1d(euler_rots[:,-1], 3) #3 = sigma = standard deviation
294 |         euler_rots[:, -1] = np.clip(euler_rots[:,-1], euler_rots[0,-1] - (0.2 * euler_rots[0,-1]), euler_rots[0,-1] + (0.2 * euler_rots[0,-1]))
295 |         if constraint:
296 |             euler_rots[:,0] = 0
297 |             euler_rots[:,1] = 0
298 |         r_e = R.from_euler('xyz', euler_rots, degrees=False)
299 |         interp_rotmat = r_e.as_matrix()
300 | 
301 | 
302 |         #test = np.diag(t_scale[0]) @ interp_rotmat[0,:,:]
303 |         constraint_flip = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
304 |         new_traj = []
305 |         for t in times:
306 |             t_dict = dict()
307 |             t_dict['scan_idx'] = t
308 |             t_dict['obj'] = dict()
309 |             t_dict['obj']['cad2world'] = np.identity(4)
310 |             t_dict['obj']['cad2world'][:3, :3] = (np.diag(t_scale[t]) @ interp_rotmat[t-key_times[0],:,:])
311 |             t_dict['obj']['cad2world'][:3, 3] = t_trans[t]
312 |             t_dict['obj']['obj_pc'] = t_vox[t]
313 |             t_dict['obj']['obj_idx'] = t_id[t]
314 |             t_dict['obj']['obj_box'] = t_box[t]
315 |             new_traj.append(t_dict)
316 | 
317 |         new_trajectories.append(new_traj)
318 | 
319 |     return new_trajectories


--------------------------------------------------------------------------------
/Detection/roi_heads/nocs_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import fvcore.nn.weight_init as weight_init
  3 | import torch
  4 | import numpy as np
  5 | from detectron2.layers import ShapeSpec, cat, roi_align
  6 | from detectron2.utils.events import get_event_storage
  7 | from detectron2.utils.registry import Registry
  8 | from detectron2.structures import Boxes, BoxMode, pairwise_iou
  9 | from torch import nn
 10 | from typing import Dict
 11 | import sys
 12 | sys.path.append('..') #Hack add ROOT DIR
 13 | from Detection.utils.train_utils import init_weights, symmetry_smooth_l1_loss, symmetry_bin_loss, crop_nocs, nocs_prob_to_value
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | ROI_NOCS_HEAD_REGISTRY = Registry("ROI_NOCS_HEAD")
 18 | 
 19 | 
 20 | def nocs_loss(pred_nocsmap, instances, pred_boxes,
 21 |               loss_weight=3, iou_thres=0.5, cls_mapping=None, use_bin_loss=False, num_bins=32):
 22 |     '''
 23 |     Calculate loss between predicted and gt nocs map if same category id and max IoU box > threshold
 24 |     per batch
 25 |     iou_thres: IoU threshold used for positive samples for loss calculation
 26 |     cls_mapping: class id to name mapping used for symmetry in loss
 27 |     use_bin_loss: if true use classification loss else use smooth l1 loss
 28 |     '''
 29 | 
 30 |     l1_loss = 0
 31 |     device = torch.device("cuda")
 32 |     #batch_size = len(instances)
 33 |     start_instance = 0
 34 |     num_instances_overlap = 0
 35 | 
 36 | 
 37 |     for instances_per_image in instances:
 38 |         if len(instances_per_image) == 0:
 39 |             continue
 40 | 
 41 |         end_instance = start_instance + len(instances_per_image)
 42 | 
 43 |         gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64)
 44 |         gt_boxes_per_image = instances_per_image.gt_boxes
 45 |         gt_nocs_per_image = instances_per_image.gt_nocs
 46 | 
 47 |         for i in range(start_instance, end_instance):
 48 | 
 49 |             abs_pred_box = pred_boxes[i,:].to(dtype=torch.int64)
 50 |             pred_box = Boxes(torch.unsqueeze(abs_pred_box, dim=0)) # XYXY
 51 |             patch_heigth = int(abs_pred_box[3] - abs_pred_box[1])  # Y
 52 |             patch_width = int(abs_pred_box[2] - abs_pred_box[0])  # X
 53 | 
 54 |             pred_nocs = pred_nocsmap[i] #  (32) x C x 28 x 28 (bin)
 55 | 
 56 |             ious = pairwise_iou(gt_boxes_per_image, pred_box)
 57 |             idx_max_iou = int(torch.argmax(ious))
 58 |             max_iou = ious[idx_max_iou]
 59 | 
 60 |             if max_iou >= iou_thres:
 61 | 
 62 |                 num_instances_overlap += 1
 63 | 
 64 |                 gt_box = gt_boxes_per_image.tensor[idx_max_iou,:].to(dtype=torch.int64)
 65 | 
 66 |                 gt_nocs = gt_nocs_per_image[idx_max_iou, :, :, :] # H x W x C
 67 |                 gt_nocs = torch.squeeze(crop_nocs(gt_nocs), dim=0).to(device=device)  # C x H x W
 68 | 
 69 |                 gt_cls = cls_mapping[gt_classes_per_image[idx_max_iou]]
 70 | 
 71 |                 # Get overlapping pixels for loss computation -> Positive ROIs
 72 |                 x_min = int(torch.max(torch.tensor([gt_box[0], abs_pred_box[0]])))
 73 |                 x_max = int(torch.min(torch.tensor([gt_box[2], abs_pred_box[2]])))
 74 |                 y_min = int(torch.max(torch.tensor([gt_box[1], abs_pred_box[1]])))
 75 |                 y_max = int(torch.min(torch.tensor([gt_box[3], abs_pred_box[3]])))
 76 | 
 77 |                 # Symmetry Loss: Rotate gt_overlap 90,180,270 degree around y_axis and take min
 78 |                 if use_bin_loss:
 79 |                     # Roi Align pred nocs to pred box shape
 80 |                     tmp_box = [torch.unsqueeze(
 81 |                         torch.tensor([0, 0, pred_nocs.shape[3], pred_nocs.shape[2]], dtype=torch.float32,
 82 |                                      device=device), dim=0)] * num_bins
 83 |                     pred_patch = roi_align(pred_nocs.to(device=device), tmp_box,
 84 |                                            output_size=(patch_heigth, patch_width), aligned=True) # num_bins x 3 x H x W
 85 | 
 86 |                     # Full image patches
 87 |                     full_patch = torch.zeros(num_bins, 3, 240, 320)
 88 |                     full_patch[:, :, abs_pred_box[1]:abs_pred_box[3], abs_pred_box[0]:abs_pred_box[2]] = pred_patch
 89 | 
 90 |                     gt_patch = torch.zeros(3, 240, 320)
 91 |                     gt_patch[:, gt_box[1]:gt_box[3], gt_box[0]:gt_box[2]] = gt_nocs
 92 | 
 93 |                     # Loss only on overlap ROI with GT
 94 |                     pred_overlap = full_patch[:, :, y_min:y_max, x_min:x_max]  # binsxCxHxW
 95 |                     gt_overlap = gt_patch[:, y_min:y_max, x_min:x_max]  # CxHxW
 96 |                     # print(pred_overlap.shape, max_iou, pred_patch.shape)
 97 | 
 98 |                     obj_loss = symmetry_bin_loss(gt_overlap, pred_overlap, gt_cls=gt_cls, num_bins=num_bins)
 99 | 
100 |                 else:
101 |                     # Roi Align pred nocs to pred box shape
102 |                     tmp_box = [torch.unsqueeze(
103 |                         torch.tensor([0, 0, pred_nocs.shape[2], pred_nocs.shape[1]], dtype=torch.float32,
104 |                                      device=device), dim=0)]
105 |                     pred_patch = roi_align(torch.unsqueeze(pred_nocs.to(device=device), dim=0), tmp_box,
106 |                                            output_size=(patch_heigth, patch_width), aligned=True)
107 |                     pred_patch = torch.squeeze(pred_patch, dim=0)  # C x H x W of predicted box
108 | 
109 |                     # Full image patches
110 |                     full_patch = torch.zeros(3, 240, 320)
111 |                     full_patch[:, abs_pred_box[1]:abs_pred_box[3], abs_pred_box[0]:abs_pred_box[2]] = pred_patch
112 | 
113 |                     gt_patch = torch.zeros(3, 240, 320)
114 |                     gt_patch[:, gt_box[1]:gt_box[3], gt_box[0]:gt_box[2]] = gt_nocs
115 | 
116 |                     # Loss only on overlap ROI with GT
117 |                     pred_overlap = full_patch[:, y_min:y_max, x_min:x_max]  # CxHxW
118 |                     gt_overlap = gt_patch[:, y_min:y_max, x_min:x_max]  # CxHxW
119 |                     # print(pred_overlap.shape, max_iou, pred_patch.shape)
120 | 
121 |                     obj_loss = symmetry_smooth_l1_loss(gt_overlap, pred_overlap, gt_cls=gt_cls)
122 | 
123 |                 l1_loss += obj_loss
124 | 
125 |         start_instance = end_instance
126 | 
127 |     l1_loss = l1_loss * loss_weight / num_instances_overlap
128 | 
129 |     return l1_loss, None
130 | 
131 | def nocs_inference(pred_nocsmap, pred_instances, use_bin_loss=False, num_bins=32): # shape num obj 3x  28 x 28  (RGB), Num img x num obj ...
132 | 
133 |     num_boxes_per_image = [len(i) for i in pred_instances]
134 |     nocs_pred = pred_nocsmap.split(num_boxes_per_image, dim=0)
135 | 
136 |     if np.array(num_boxes_per_image).sum() == 0:
137 |         return
138 | 
139 |     # instances and predictions always same len just empty
140 |     for prob, instances in zip(nocs_pred, pred_instances):
141 | 
142 |         if len(instances) == 0:
143 |             print('No predicted instances found ...')
144 |             continue
145 | 
146 |         num_pred_instances = prob.shape[0]
147 |         num_dims = len(prob.shape)
148 | 
149 |         if use_bin_loss and num_pred_instances != 0 and num_dims == 5:
150 | 
151 |             x_prob = nocs_prob_to_value(prob, channel=0, num_bins=num_bins)
152 |             y_prob = nocs_prob_to_value(prob, channel=1, num_bins=num_bins)
153 |             z_prob = nocs_prob_to_value(prob, channel=2, num_bins=num_bins)
154 |             prob = torch.cat((x_prob, y_prob, z_prob), dim=1)
155 | 
156 |         instances.pred_nocs = prob
157 | 
158 | 
159 | class NocsModel(torch.nn.Module):
160 |     """
161 |     Decoder Module NOCS
162 |     """
163 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
164 |         super(NocsModel, self).__init__()
165 | 
166 |         self.input_shape = input_shape
167 |         self.use_bin_loss = cfg.MODEL.ROI_NOCS_HEAD.USE_BIN_LOSS
168 |         self.num_bins = cfg.MODEL.ROI_NOCS_HEAD.NUM_BINS
169 | 
170 |         # Layer Definition
171 |         if self.use_bin_loss:
172 |             self.layer1_R = torch.nn.Sequential(
173 |                 torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1),  # 14
174 |                 torch.nn.ReLU(),
175 |                 torch.nn.BatchNorm2d(128)
176 |             )
177 |             self.layer1_G = torch.nn.Sequential(
178 |                 torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1),  # 14
179 |                 torch.nn.ReLU(),
180 |                 torch.nn.BatchNorm2d(128)
181 |             )
182 |             self.layer1_B = torch.nn.Sequential(
183 |                 torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1),  # 14
184 |                 torch.nn.ReLU(),
185 |                 torch.nn.BatchNorm2d(128)
186 |             )
187 |             # Layer 2
188 |             self.layer2_R = torch.nn.Sequential(
189 |                 torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1),
190 |                 torch.nn.ReLU(),
191 |                 torch.nn.BatchNorm2d(64)
192 |             )
193 |             self.layer2_G = torch.nn.Sequential(
194 |                 torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1),
195 |                 torch.nn.ReLU(),
196 |                 torch.nn.BatchNorm2d(64)
197 |             )
198 |             self.layer2_B = torch.nn.Sequential(
199 |                 torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1),
200 |                 torch.nn.ReLU(),
201 |                 torch.nn.BatchNorm2d(64)
202 |             )
203 |             # Layer 3
204 |             self.layer3_R = torch.nn.Sequential(
205 |                 torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1),  # 28 x num_bins R/x - head
206 |                 #torch.nn.LogSoftmax(dim=1)
207 |             )
208 |             self.layer3_G = torch.nn.Sequential(
209 |                 torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1),  # 28 x num_bins G/y - head
210 |                 #torch.nn.LogSoftmax(dim=1)
211 |             )
212 |             self.layer3_B = torch.nn.Sequential(
213 |                 torch.nn.ConvTranspose2d(64, self.num_bins, kernel_size=3, stride=1, bias=True, padding=1),  # 28 x num_bins B/z - head
214 |                 #torch.nn.LogSoftmax(dim=1)
215 |             )
216 |         else:
217 |             self.layer0 = torch.nn.Sequential(
218 |                 torch.nn.ConvTranspose2d(256, 256, kernel_size=3, stride=1, bias=True, padding=1),  # 14
219 |                 torch.nn.ReLU(),
220 |                 torch.nn.BatchNorm2d(256)
221 |             )
222 |             self.layer1 = torch.nn.Sequential(
223 |                 torch.nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, bias=True, padding=1),  # 14
224 |                 torch.nn.ReLU(),
225 |                 torch.nn.BatchNorm2d(128)
226 |             )
227 |             self.layer2 = torch.nn.Sequential(
228 |                 torch.nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, bias=True, padding=1),# 28 # use kernel size divisible by stride
229 |                 torch.nn.ReLU(),
230 |                 torch.nn.BatchNorm2d(64)
231 |             )
232 |             self.layer3 = torch.nn.Sequential(
233 |                 torch.nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, bias=True, padding=1),  # 28 x RGB
234 |                 torch.nn.Sigmoid()
235 |             )
236 | 
237 |     def forward(self, features):
238 |         """
239 |         input features from ROI Pool, dim num instances, 256 x 14 x 14
240 |         """
241 | 
242 |         if self.use_bin_loss:
243 |             R_features = self.layer1_R(features)
244 |             R_features = self.layer2_R(R_features)
245 |             R_features = torch.unsqueeze(self.layer3_R(R_features), dim=1)  # num obj x 1 x num_bins x 28 x 28
246 | 
247 |             G_features = self.layer1_G(features)
248 |             G_features = self.layer2_G(G_features)
249 |             G_features = torch.unsqueeze(self.layer3_G(G_features), dim=1)  # num obj x 1 x num_bins x 28 x 28
250 | 
251 |             B_features = self.layer1_B(features)
252 |             B_features = self.layer2_B(B_features)
253 |             B_features = torch.unsqueeze(self.layer3_B(B_features), dim=1)  # num obj x 1 x num_bins x 28 x 28
254 | 
255 |             features = torch.cat((R_features, G_features, B_features), dim=1).permute(0, 2, 1, 3, 4).contiguous() # num obj x num_bins x 3 x 28 x 28
256 | 
257 |         else:
258 |             features = self.layer0(features)
259 |             features = self.layer1(features)
260 |             features = self.layer2(features)
261 |             features = self.layer3(features) # num obj x 3 x 28 x 28
262 | 
263 |         return features
264 | 
265 | 
266 | @ROI_NOCS_HEAD_REGISTRY.register()
267 | class NocsDecoder(nn.Module):
268 |     """
269 |     A Nocs head with upsample layer (with `ConvTranspose2d`).
270 |     """
271 | 
272 |     def __init__(self, cfg, input_shape):
273 |         super(NocsDecoder, self).__init__()
274 | 
275 |         ### Model
276 |         self.nocs_layers = NocsModel(cfg, input_shape)
277 |         init_weights(self.nocs_layers, init_type='kaiming', init_gain=0.02)
278 | 
279 | 
280 |     def forward(self, x):
281 | 
282 |         x = self.nocs_layers(x) #BS x C x H x W
283 | 
284 |         return x
285 | 
286 | 
287 | def build_nocs_head(cfg, input_shape):
288 |     name = cfg.MODEL.ROI_NOCS_HEAD.NAME
289 |     return ROI_NOCS_HEAD_REGISTRY.get(name)(cfg, input_shape)
290 | 


--------------------------------------------------------------------------------
/Detection/register_dataset.py:
--------------------------------------------------------------------------------
  1 | # import some common libraries
  2 | import torch
  3 | import numpy as np
  4 | import os, json, cv2, random, csv, pickle, sys
  5 | import h5py
  6 | from pycocotools.coco import COCO
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | 
 10 | # import some common detectron2 utilities
 11 | from detectron2.utils.visualizer import Visualizer
 12 | from detectron2.data import MetadataCatalog, DatasetCatalog
 13 | from detectron2.structures import BoxMode
 14 | from detectron2.structures import polygons_to_bitmask
 15 | from detectron2.utils.visualizer import GenericMask
 16 | 
 17 | from PIL import Image
 18 | 
 19 | sys.path.append('..') #Hack add ROOT DIR
 20 | from baseconfig import CONF
 21 | 
 22 | from BlenderProc.utils import binvox_rw
 23 | from Detection.utils.train_utils import get_voxel
 24 | 
 25 | 
 26 | # Define directory to images
 27 | IMG_DIR = CONF.PATH.DETECTDATA
 28 | 
 29 | # custom dataset registration
 30 | class RegisterDataset:
 31 | 
 32 |     def __init__(self, mapping_list, name_list, img_dir=IMG_DIR):
 33 |         self.img_dir = img_dir
 34 |         self.mapping_list = list(mapping_list)
 35 |         self.name_list = list(name_list)
 36 |         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 37 | 
 38 |     def get_front_dicts(self, img_path):
 39 | 
 40 |         mapping_file = os.path.join(self.img_dir, "3D_front_mapping.csv")
 41 |         _, csv_dict = self.read_csv_mapping(mapping_file)
 42 | 
 43 |         folders = os.listdir(img_path)
 44 | 
 45 |         dataset_dicts = []
 46 |         for folder in folders:
 47 | 
 48 |             json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json")
 49 | 
 50 |             with open(json_file) as f:
 51 |                 imgs_anns = json.load(f)
 52 | 
 53 |             camposes = []
 54 |             all_objs = []
 55 |             for idx, v in enumerate(imgs_anns['images']):
 56 | 
 57 |                 record = {}
 58 | 
 59 |                 filename = os.path.join(img_path, folder, 'coco_data', v["file_name"])
 60 |                 depth_name = os.path.join(img_path, folder, str(idx) + '.hdf5')
 61 | 
 62 |                 record["file_name"] = filename
 63 |                 record["image_id"] = str(v['id']) + '_' + folder[:8]
 64 |                 record["height"] = v['height']
 65 |                 record["width"] = v['width']
 66 |                 # record["nocs_map"] = self.get_nocs(v["file_name"], img_path, folder)
 67 |                 # record["depth_map"], record['campose'] = self.load_hdf5(depth_name)
 68 |                 record["nocs_map"] = filename.replace('rgb', 'nocs')
 69 |                 record["depth_map"] = depth_name
 70 |                 record["campose"] = self.load_campose(depth_name)
 71 | 
 72 |                 depth = []
 73 |                 objs = []
 74 |                 voxels = []
 75 |                 boxes = []
 76 |                 segmap_store = []
 77 |                 category = []
 78 |                 object_ids = []
 79 |                 gt_rotations = []
 80 |                 gt_locations = []
 81 |                 gt_3dbox = []
 82 |                 gt_scales = []
 83 | 
 84 |                 for anno in imgs_anns['annotations']:
 85 |                     if anno['image_id'] == v['id']:
 86 |                         cat_id = anno['category_id']
 87 |                         object_id = anno['id']
 88 |                         jid = anno['jid']
 89 |                         scale = np.array(anno['3Dscale'])
 90 | 
 91 |                         #voxel = os.path.join(CONF.PATH.FUTURE3D, jid, 'model.binvox')
 92 |                         voxel = os.path.join(CONF.PATH.VOXELDATA, jid, 'model.binvox')
 93 |                         name = csv_dict[cat_id]
 94 | 
 95 |                         #nocs_obj = self.crop_segmask(record["nocs_map"], anno['bbox'], anno['segmentation'])
 96 |                         #depth_obj = self.crop_depth(record["depth_map"], anno['bbox'], anno['segmentation'])
 97 | 
 98 |                         if not name in self.name_list:
 99 |                             self.name_list.append(name)
100 | 
101 |                         if cat_id in self.mapping_list:
102 |                             id = self.mapping_list.index(cat_id)
103 |                         else:
104 |                             self.mapping_list.append(cat_id)
105 |                             id = self.mapping_list.index(cat_id)
106 | 
107 |                         obj = {
108 |                             "bbox": anno['bbox'],
109 |                             "bbox_mode": BoxMode.XYWH_ABS,
110 |                             "segmentation": anno['segmentation'],
111 |                             "category_id": id,
112 |                             "voxel": voxel,
113 |                             "scale": scale,
114 |                             "jid": jid,
115 |                             "id": object_id,
116 |                         }
117 |                         objs.append(obj)
118 |                         segmap_store.append(anno['segmentation'])
119 |                         voxels.append(voxel)
120 |                         category.append(id)
121 |                         boxes.append(anno['bbox'])
122 |                         #depth.append(depth_obj)
123 |                         object_ids.append(object_id)
124 |                         gt_rotations.append(anno['3Drot'])
125 |                         anno_3dloc = self.add_halfheight(anno['3Dloc'].copy(), anno['3Dbbox'])
126 |                         gt_locations.append(anno_3dloc)
127 |                         gt_3dbox.append(np.array(anno['3Dbbox']))
128 |                         gt_scales.append(scale)
129 | 
130 |                 record['cat_id'] = category # starts at 0
131 |                 record['vox'] = voxels
132 |                 record['segmap'] = segmap_store
133 |                 record['boxes'] = boxes
134 |                 record["annotations"] = objs
135 |                 record['object_id'] = object_ids
136 |                 record['rotations'] = gt_rotations
137 |                 record['locations'] = gt_locations
138 |                 record['3dboxes'] = gt_3dbox
139 |                 record['3dscales'] = gt_scales
140 |                 #all_objs.append(objs)
141 |                 #camposes.append(record['campose'])
142 |                 dataset_dicts.append(record)
143 | 
144 |         '''
145 |         with open('optimization.pickle', 'wb') as handle:
146 |             all_objs.append(camposes)
147 |             pickle.dump(all_objs, handle, protocol=pickle.HIGHEST_PROTOCOL)
148 |         sys.exit()
149 |         '''
150 | 
151 |         return dataset_dicts
152 | 
153 |     def get_eval_dicts(self, img_path):
154 | 
155 |         mapping_file = os.path.join(self.img_dir, "3D_front_mapping.csv")
156 |         _, csv_dict = self.read_csv_mapping(mapping_file)
157 | 
158 |         folders = os.listdir(img_path)
159 | 
160 |         dataset_dicts = []
161 |         for folder in folders:
162 | 
163 |             json_file = os.path.join(img_path, folder, "coco_data/coco_annotations.json")
164 | 
165 |             with open(json_file) as f:
166 |                 imgs_anns = json.load(f)
167 | 
168 |             for idx, v in enumerate(imgs_anns['images']):
169 |                 if idx == 0:
170 |                     record = {}
171 | 
172 |                     filename = os.path.join(img_path, folder, 'coco_data', v["file_name"])
173 | 
174 |                     record["file_name"] = filename
175 |                     record["image_id"] = str(v['id']) + '_' + folder[:8]
176 |                     record["height"] = v['height']
177 |                     record["width"] = v['width']
178 |                     record["nocs_map"] = self.get_nocs(v["file_name"], img_path, folder)
179 | 
180 |                     objs = []
181 |                     for anno in imgs_anns['annotations']:
182 |                         if anno['image_id'] == v['id']:
183 |                             jid = anno['jid']
184 |                             voxel = get_voxel(os.path.join(CONF.PATH.VOXELDATA, jid, 'model.binvox'), np.array(anno['3Dscale']))
185 |                             cat_id = anno['category_id']
186 |                             name = csv_dict[cat_id]
187 |                             nocs_obj = self.crop_segmask(record["nocs_map"], anno['bbox'], anno['segmentation'])
188 |                             if not name in self.name_list:
189 |                                 self.name_list.append(name)
190 | 
191 |                             if cat_id in self.mapping_list:
192 |                                 id = self.mapping_list.index(cat_id)
193 |                             else:
194 |                                 self.mapping_list.append(cat_id)
195 |                                 id = self.mapping_list.index(cat_id)
196 | 
197 |                             obj = {
198 |                                 "bbox": anno['bbox'],
199 |                                 "bbox_mode": BoxMode.XYWH_ABS,
200 |                                 "segmentation": anno['segmentation'],
201 |                                 "category_id": id,
202 |                                 "voxel": voxel,
203 |                                 "nocs": nocs_obj,
204 |                             }
205 |                             objs.append(obj)
206 | 
207 |                     record["annotations"] = objs
208 |                     dataset_dicts.append(record)
209 | 
210 |         return dataset_dicts
211 | 
212 |     # register train and val dataset
213 |     def reg_dset(self):
214 |         for d in ["train", "val", "test"]:
215 |             DatasetCatalog.register("front_" + d, lambda d=d: self.get_front_dicts(self.img_dir + d))
216 |             MetadataCatalog.get("front_" + d).set(thing_classes=self.name_list)
217 |         print("Registered Dataset")
218 | 
219 |     # data mean, std
220 |     def calculate_mean_std(self):
221 |         dataset_dicts = self.get_front_dicts(os.path.join(self.img_dir, 'train'))
222 | 
223 |         data_mean = np.zeros((1, 3))
224 |         data_std = np.zeros((1, 3))
225 |         data_len = len(dataset_dicts)
226 | 
227 |         for idx, d in enumerate(dataset_dicts):
228 |             img = cv2.imread(d["file_name"])
229 |             data_mean = data_mean + np.mean(img, axis=(0, 1)) / data_len
230 |             data_std = data_std + np.std(img, axis=(0, 1)) / data_len
231 |             print("data mean", data_mean)
232 |         return data_mean, data_std
233 | 
234 |     # visualize annotations
235 |     def vis_annotation(self, num_imgs=1):
236 |         front_metadata = MetadataCatalog.get("front_train")
237 |         dataset_dicts = self.get_front_dicts(os.path.join(self.img_dir, 'train'))
238 | 
239 |         for d in random.sample(dataset_dicts, num_imgs):
240 |             img = cv2.imread(d["file_name"])
241 |             visualizer = Visualizer(img[:, :, ::-1], metadata=front_metadata, scale=1)
242 |             out = visualizer.draw_dataset_dict(d)
243 |             cv2.imshow('image', out.get_image()[:, :, ::-1])
244 |             cv2.waitKey(500)
245 | 
246 |     # evaluate annotations
247 |     def eval_annotation(self):
248 |         front_metadata = MetadataCatalog.get("front_train")
249 |         dataset_dicts = self.get_eval_dicts(os.path.join(self.img_dir, 'train'))
250 | 
251 |         for idx, d in enumerate(dataset_dicts):
252 |             img = cv2.imread(d["file_name"])
253 |             visualizer = Visualizer(img[:, :, ::-1], metadata=front_metadata, scale=1)
254 |             out = visualizer.draw_dataset_dict(d)
255 |             cv2.imshow('image', out.get_image()[:, :, ::-1])
256 |             print("image id: ", idx, " image name: ", d["file_name"])
257 |             cv2.waitKey(0)
258 | 
259 |     @staticmethod
260 |     def read_csv_mapping(path):
261 |         """ Loads an idset mapping from a csv file, assuming the rows are sorted by their ids.
262 |         :param path: Path to csv file
263 |         """
264 | 
265 |         with open(path, 'r') as csvfile:
266 |             reader = csv.DictReader(csvfile)
267 |             new_id_label_map = []
268 |             new_label_id_map = {}
269 | 
270 |             for row in reader:
271 |                 new_id_label_map.append(row["name"])
272 |                 new_label_id_map[int(row["id"])] = row["name"]
273 | 
274 |             return new_id_label_map, new_label_id_map
275 | 
276 |     @staticmethod
277 |     def write_pickle(img_dir, filename, pickle_data):
278 | 
279 |         filepath = os.path.join(img_dir, filename + ".pkl")
280 |         print("PATH",filepath)
281 |         if 'train' in img_dir:
282 |             print('intrain')
283 |             with open(filepath, 'wb') as f:
284 |                 pickle.dump(pickle_data, f)
285 | 
286 |     @staticmethod
287 |     def load_pickle(self,img_dir, filename):
288 | 
289 |         if 'val' in img_dir:
290 |             filepath = os.path.join(img_dir[:-4],'train', filename + ".pkl")
291 |             with open(filepath, 'rb') as f:
292 |                 data = pickle.load(f)
293 |             return data[0], data[1]
294 |         else:
295 |             return [],[]
296 | 
297 |     @staticmethod
298 |     def get_nocs(filename, img_path, folder):
299 |         nocs_name =  filename.replace('rgb', 'nocs')
300 |         nocs_path = os.path.join(img_path, folder, 'coco_data', nocs_name)
301 |         nocs = cv2.imread(nocs_path) #BGRA
302 |         nocs = nocs[:,:,:3]
303 |         nocs = nocs[:, :, ::-1] # RGB
304 | 
305 |         nocs = np.array(nocs, dtype=np.float32) / 255
306 | 
307 |         return nocs
308 | 
309 |     @staticmethod
310 |     def crop_segmask(nocs_img, bbox, segmap):
311 | 
312 |         abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32)
313 |         # width = torch.abs(abs_bbox[2] - abs_bbox[0])
314 |         # height = torch.abs(abs_bbox[3] - abs_bbox[1])
315 | 
316 |         gm = GenericMask(segmap, 240, 320)
317 |         bin_mask = gm.polygons_to_mask(segmap)
318 |         binary_mask = bin_mask[:,:, None]
319 |         crop_im = np.multiply(nocs_img,binary_mask)
320 |         cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2]),:])
321 |         # cropped_im = np.clip(cropped_im, 0, 1)
322 | 
323 |         cropped_im[cropped_im == 0] = 1
324 | 
325 |         return torch.from_numpy(cropped_im).to(torch.float32)
326 | 
327 |     @staticmethod
328 |     def load_campose(path):
329 | 
330 |         with h5py.File(path, 'r') as data:
331 |             for key in data.keys():
332 |                 if key == 'campose':
333 |                     campose = np.array(data[key])
334 | 
335 |         return campose
336 | 
337 |     @staticmethod
338 |     def load_hdf5(path):
339 | 
340 |         with h5py.File(path, 'r') as data:
341 |             for key in data.keys():
342 |                 if key == 'depth':
343 |                     depth = np.array(data[key])
344 |                 elif key == 'campose':
345 |                     campose = np.array(data[key])
346 | 
347 |         return depth, campose
348 | 
349 |     @staticmethod
350 |     def crop_depth(depth_img, bbox, segmap):
351 | 
352 |         abs_bbox = torch.tensor(BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS), dtype=torch.float32)
353 | 
354 |         gm = GenericMask(segmap, 240, 320)
355 |         bin_mask = gm.polygons_to_mask(segmap)
356 |         binary_mask = bin_mask[:, :]
357 |         crop_im = np.multiply(depth_img, binary_mask)
358 |         #crop_im[crop_im == 0] = 255
359 |         cropped_im = np.array(crop_im[int(abs_bbox[1]):int(abs_bbox[3]),int(abs_bbox[0]):int(abs_bbox[2])])
360 | 
361 |         return torch.from_numpy(cropped_im).to(torch.float32)
362 | 
363 |     @staticmethod
364 |     def add_halfheight(location, box):
365 |         '''
366 |         Object location z-center is at bottom, calculate half height of the object
367 |         and add to shift z-center to correct location
368 |         '''
369 |         z_coords = []
370 |         for pt in box:
371 |             z = pt[-1]
372 |             z_coords.append(z)
373 |         z_coords = np.array(z_coords)
374 |         half_height = np.abs(z_coords.max() - z_coords.min()) / 2
375 |         location[-1] = half_height  # Center location is at bottom object
376 | 
377 |         return location


--------------------------------------------------------------------------------