├── .gitignore ├── LICENSE ├── README.md ├── benchmark ├── eval.py └── predict.py ├── data └── scannet │ ├── README.md │ ├── batch_load_scannet_data.py │ ├── load_scannet_data.py │ ├── meta_data │ ├── nyu40_labels.csv │ ├── scannet_means.npz │ ├── scannet_reference_means.npz │ ├── scannetv2-labels.combined.tsv │ ├── scannetv2.txt │ ├── scannetv2_test.txt │ ├── scannetv2_train.txt │ └── scannetv2_val.txt │ ├── model_util_scannet.py │ ├── scannet_utils.py │ └── visualize.py ├── demo └── ScanRefer.gif ├── docs ├── browser.png ├── davezchen_eccv2020_scanrefer.pdf ├── index.html ├── paper.jpg ├── teaser.png └── w3.css ├── lib ├── ap_helper.py ├── config.py ├── dataset.py ├── enet.py ├── eval_helper.py ├── loss.py ├── loss_helper.py ├── pointnet2 │ ├── _ext_src │ │ ├── include │ │ │ ├── ball_query.h │ │ │ ├── cuda_utils.h │ │ │ ├── group_points.h │ │ │ ├── interpolate.h │ │ │ ├── sampling.h │ │ │ └── utils.h │ │ └── src │ │ │ ├── ball_query.cpp │ │ │ ├── ball_query_gpu.cu │ │ │ ├── bindings.cpp │ │ │ ├── group_points.cpp │ │ │ ├── group_points_gpu.cu │ │ │ ├── interpolate.cpp │ │ │ ├── interpolate_gpu.cu │ │ │ ├── sampling.cpp │ │ │ └── sampling_gpu.cu │ ├── _version.py │ ├── pointnet2_modules.py │ ├── pointnet2_test.py │ ├── pointnet2_utils.py │ ├── pytorch_utils.py │ └── setup.py ├── projection.py └── solver.py ├── models ├── backbone_module.py ├── lang_module.py ├── match_module.py ├── proposal_module.py ├── refnet.py └── voting_module.py ├── requirements.txt ├── scripts ├── compute_multiview_features.py ├── eval.py ├── project_multiview_features.py ├── project_multiview_labels.py ├── train.py └── visualize.py └── utils ├── box_util.py ├── eta.py ├── eval_det.py ├── metric_util.py ├── nms.py ├── nn_distance.py └── pc_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # dataset 2 | data/scanrefer* 3 | data/ScanRefer* 4 | data/glove* 5 | data/scannet/scannet_data 6 | data/scannet/scans 7 | data/scannetv2_enet.pth 8 | 9 | # cache 10 | data/scannet/__pycache__ 11 | lib/__pycache__ 12 | lib/pointnet2/__pycache__ 13 | models/__pycache__ 14 | utils/__pycache__ 15 | .DS_Store 16 | 17 | # pointnet2 18 | lib/pointnet2/build/ 19 | lib/pointnet2/dist/ 20 | lib/pointnet2/pointnet2.egg-info/ 21 | 22 | # output 23 | outputs/ 24 | 25 | # delete 26 | docs/.DS_Store 27 | demo/.DS_Store 28 | 29 | # misc 30 | upload/ -------------------------------------------------------------------------------- /benchmark/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import argparse 5 | 6 | import numpy as np 7 | 8 | from tqdm import tqdm 9 | 10 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 11 | from lib.config import CONF 12 | from utils.box_util import box3d_iou 13 | 14 | SCANREFER_GT = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_test_gt_bbox.json"))) 15 | 16 | def organize_gt(): 17 | organized = {} 18 | 19 | for data in SCANREFER_GT: 20 | scene_id = data["scene_id"] 21 | object_id = data["object_id"] 22 | ann_id = data["ann_id"] 23 | 24 | if scene_id not in organized: 25 | organized[scene_id] = {} 26 | 27 | if object_id not in organized[scene_id]: 28 | organized[scene_id][object_id] = {} 29 | 30 | if ann_id not in organized[scene_id][object_id]: 31 | organized[scene_id][object_id][ann_id] = {} 32 | 33 | organized[scene_id][object_id][ann_id] = data 34 | 35 | return organized 36 | 37 | def evaluate(args): 38 | pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json") 39 | if not os.path.isfile(pred_path): 40 | print("please run `benchmark/predict.py` first to generate bounding boxes") 41 | exit() 42 | 43 | organized_gt = organize_gt() 44 | 45 | with open(pred_path) as f: 46 | predictions = json.load(f) 47 | ious = [] 48 | masks = [] 49 | others = [] 50 | print("evaluating...") 51 | for data in tqdm(predictions): 52 | scene_id = data["scene_id"] 53 | object_id = data["object_id"] 54 | ann_id = data["ann_id"] 55 | pred_bbox = np.array(data["bbox"]) 56 | mask = data["unique_multiple"] 57 | other = data["others"] 58 | 59 | try: 60 | gt_bbox = np.array(organized_gt[scene_id][object_id][ann_id]["bbox"]) 61 | # iou, _ = box3d_iou(pred_bbox, gt_bbox) 62 | iou = box3d_iou(pred_bbox, gt_bbox) 63 | 64 | except KeyError: 65 | iou = 0 66 | 67 | ious.append(iou) 68 | masks.append(mask) 69 | others.append(other) 70 | 71 | # ious = np.array(ious) 72 | # iou_rate_025 = ious[ious >= 0.25].shape[0] / ious.shape[0] 73 | # iou_rate_05 = ious[ious >= 0.5].shape[0] / ious.shape[0] 74 | 75 | # print("\nAcc@0.25IoU: {}".format(iou_rate_025)) 76 | # print("Acc@0.5IoU: {}".format(iou_rate_05)) 77 | 78 | ious = np.array(ious) 79 | masks = np.array(masks) 80 | others = np.array(others) 81 | 82 | multiple_dict = { 83 | "unique": 0, 84 | "multiple": 1 85 | } 86 | others_dict = { 87 | "not_in_others": 0, 88 | "in_others": 1 89 | } 90 | 91 | # evaluation stats 92 | stats = {k: np.sum(masks == v) for k, v in multiple_dict.items()} 93 | stats["overall"] = masks.shape[0] 94 | stats = {} 95 | for k, v in multiple_dict.items(): 96 | stats[k] = {} 97 | for k_o, v_o in others_dict.items(): 98 | stats[k][k_o] = np.sum(np.logical_and(masks == v, others == v_o)) 99 | 100 | stats[k]["overall"] = np.sum(masks == v) 101 | 102 | stats["overall"] = {} 103 | for k_o, v_o in others_dict.items(): 104 | stats["overall"][k_o] = np.sum(others == v_o) 105 | 106 | stats["overall"]["overall"] = masks.shape[0] 107 | 108 | # aggregate scores 109 | scores = {} 110 | for k, v in multiple_dict.items(): 111 | for k_o in others_dict.keys(): 112 | acc_025iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.25)].shape[0] \ 113 | / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \ 114 | if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0 115 | acc_05iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.5)].shape[0] \ 116 | / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \ 117 | if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0 118 | 119 | if k not in scores: 120 | scores[k] = {k_o: {} for k_o in others_dict.keys()} 121 | 122 | scores[k][k_o]["acc@0.25iou"] = acc_025iou 123 | scores[k][k_o]["acc@0.5iou"] = acc_05iou 124 | 125 | acc_025iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.25)].shape[0] \ 126 | / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0 127 | acc_05iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.5)].shape[0] \ 128 | / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0 129 | 130 | scores[k]["overall"] = {} 131 | scores[k]["overall"]["acc@0.25iou"] = acc_025iou 132 | scores[k]["overall"]["acc@0.5iou"] = acc_05iou 133 | 134 | scores["overall"] = {} 135 | for k_o in others_dict.keys(): 136 | acc_025iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.25)].shape[0] \ 137 | / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0 138 | acc_05iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.5)].shape[0] \ 139 | / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0 140 | 141 | # aggregate 142 | scores["overall"][k_o] = {} 143 | scores["overall"][k_o]["acc@0.25iou"] = acc_025iou 144 | scores["overall"][k_o]["acc@0.5iou"] = acc_05iou 145 | 146 | acc_025iou = ious[ious >= 0.25].shape[0] / ious.shape[0] 147 | acc_05iou = ious[ious >= 0.5].shape[0] / ious.shape[0] 148 | 149 | 150 | # aggregate 151 | scores["overall"]["overall"] = {} 152 | scores["overall"]["overall"]["acc@0.25iou"] = acc_025iou 153 | scores["overall"]["overall"]["acc@0.5iou"] = acc_05iou 154 | 155 | # report 156 | print("\nstats:") 157 | for k_s in stats.keys(): 158 | for k_o in stats[k_s].keys(): 159 | print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o])) 160 | 161 | for k_s in scores.keys(): 162 | print("\n{}:".format(k_s)) 163 | for k_m in scores[k_s].keys(): 164 | for metric in scores[k_s][k_m].keys(): 165 | print("{} | {} | {}: {}".format(k_s, k_m, metric, scores[k_s][k_m][metric])) 166 | 167 | if __name__ == "__main__": 168 | parser = argparse.ArgumentParser() 169 | parser.add_argument("--folder", type=str, help="Folder containing the model") 170 | args = parser.parse_args() 171 | 172 | evaluate(args) -------------------------------------------------------------------------------- /benchmark/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import pickle 5 | import argparse 6 | import importlib 7 | import torch 8 | import torch.optim as optim 9 | import torch.nn as nn 10 | import numpy as np 11 | 12 | from torch.utils.data import DataLoader 13 | from datetime import datetime 14 | from tqdm import tqdm 15 | from copy import deepcopy 16 | 17 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 18 | from lib.config import CONF 19 | from lib.dataset import ScannetReferenceDataset 20 | from lib.solver import Solver 21 | from lib.ap_helper import APCalculator, parse_predictions, parse_groundtruths 22 | from lib.loss_helper import get_loss 23 | from lib.eval_helper import get_eval 24 | from models.refnet import RefNet 25 | from utils.box_util import get_3d_box 26 | from data.scannet.model_util_scannet import ScannetDatasetConfig 27 | 28 | SCANREFER_TEST = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_test.json"))) 29 | 30 | def get_dataloader(args, scanrefer, all_scene_list, split, config): 31 | dataset = ScannetReferenceDataset( 32 | scanrefer=scanrefer, 33 | scanrefer_all_scene=all_scene_list, 34 | split=split, 35 | num_points=args.num_points, 36 | use_color=args.use_color, 37 | use_height=(not args.no_height), 38 | use_normal=args.use_normal, 39 | use_multiview=args.use_multiview 40 | ) 41 | print("predict for {} samples".format(len(dataset))) 42 | 43 | dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False) 44 | 45 | return dataset, dataloader 46 | 47 | def get_model(args, config): 48 | # load model 49 | input_channels = int(args.use_multiview) * 128 + int(args.use_normal) * 3 + int(args.use_color) * 3 + int(not args.no_height) 50 | model = RefNet( 51 | num_class=config.num_class, 52 | num_heading_bin=config.num_heading_bin, 53 | num_size_cluster=config.num_size_cluster, 54 | mean_size_arr=config.mean_size_arr, 55 | num_proposal=args.num_proposals, 56 | input_feature_dim=input_channels, 57 | use_lang_classifier=(not args.no_lang_cls), 58 | use_bidir=args.use_bidir 59 | ).cuda() 60 | 61 | model_name = "model.pth" 62 | path = os.path.join(CONF.PATH.OUTPUT, args.folder, model_name) 63 | model.load_state_dict(torch.load(path), strict=False) 64 | model.eval() 65 | 66 | return model 67 | 68 | def get_scannet_scene_list(split): 69 | scene_list = sorted([line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET_META, "scannetv2_{}.txt".format(split)))]) 70 | 71 | return scene_list 72 | 73 | def get_scanrefer(args): 74 | scanrefer = SCANREFER_TEST 75 | scene_list = sorted(list(set([data["scene_id"] for data in scanrefer]))) 76 | scanrefer = [data for data in scanrefer if data["scene_id"] in scene_list] 77 | 78 | return scanrefer, scene_list 79 | 80 | def predict(args): 81 | print("predict bounding boxes...") 82 | # constant 83 | DC = ScannetDatasetConfig() 84 | 85 | # init training dataset 86 | print("preparing data...") 87 | scanrefer, scene_list = get_scanrefer(args) 88 | 89 | # dataloader 90 | _, dataloader = get_dataloader(args, scanrefer, scene_list, "test", DC) 91 | 92 | # model 93 | model = get_model(args, DC) 94 | 95 | # config 96 | POST_DICT = { 97 | "remove_empty_box": True, 98 | "use_3d_nms": True, 99 | "nms_iou": 0.25, 100 | "use_old_type_nms": False, 101 | "cls_nms": True, 102 | "per_class_proposal": True, 103 | "conf_thresh": 0.05, 104 | "dataset_config": DC 105 | } if not args.no_nms else None 106 | 107 | # predict 108 | print("predicting...") 109 | pred_bboxes = [] 110 | for data_dict in tqdm(dataloader): 111 | for key in data_dict: 112 | data_dict[key] = data_dict[key].cuda() 113 | 114 | # feed 115 | data_dict = model(data_dict) 116 | _, data_dict = get_loss( 117 | data_dict=data_dict, 118 | config=DC, 119 | detection=False, 120 | reference=True 121 | ) 122 | 123 | objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long() 124 | 125 | if POST_DICT: 126 | _ = parse_predictions(data_dict, POST_DICT) 127 | nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda() 128 | 129 | # construct valid mask 130 | pred_masks = (nms_masks * objectness_preds_batch == 1).float() 131 | else: 132 | # construct valid mask 133 | pred_masks = (objectness_preds_batch == 1).float() 134 | 135 | pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks, 1) # (B,) 136 | pred_center = data_dict['center'] # (B,K,3) 137 | pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal 138 | pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 139 | pred_heading_class = pred_heading_class # B,num_proposal 140 | pred_heading_residual = pred_heading_residual.squeeze(2) # B,num_proposal 141 | pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal 142 | pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 143 | pred_size_class = pred_size_class 144 | pred_size_residual = pred_size_residual.squeeze(2) # B,num_proposal,3 145 | 146 | for i in range(pred_ref.shape[0]): 147 | # compute the iou 148 | pred_ref_idx = pred_ref[i] 149 | pred_obb = DC.param2obb( 150 | pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(), 151 | pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(), 152 | pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(), 153 | pred_size_class[i, pred_ref_idx].detach().cpu().numpy(), 154 | pred_size_residual[i, pred_ref_idx].detach().cpu().numpy() 155 | ) 156 | pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3]) 157 | 158 | # construct the multiple mask 159 | multiple = data_dict["unique_multiple"][i].item() 160 | 161 | # construct the others mask 162 | others = 1 if data_dict["object_cat"][i] == 17 else 0 163 | 164 | # store data 165 | scanrefer_idx = data_dict["scan_idx"][i].item() 166 | pred_data = { 167 | "scene_id": scanrefer[scanrefer_idx]["scene_id"], 168 | "object_id": scanrefer[scanrefer_idx]["object_id"], 169 | "ann_id": scanrefer[scanrefer_idx]["ann_id"], 170 | "bbox": pred_bbox.tolist(), 171 | "unique_multiple": multiple, 172 | "others": others 173 | } 174 | pred_bboxes.append(pred_data) 175 | 176 | # dump 177 | print("dumping...") 178 | pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json") 179 | with open(pred_path, "w") as f: 180 | json.dump(pred_bboxes, f, indent=4) 181 | 182 | print("done!") 183 | 184 | if __name__ == "__main__": 185 | parser = argparse.ArgumentParser() 186 | parser.add_argument("--folder", type=str, help="Folder containing the model") 187 | parser.add_argument("--gpu", type=str, help="gpu", default="0") 188 | parser.add_argument("--batch_size", type=int, help="batch size", default=8) 189 | parser.add_argument("--num_points", type=int, default=40000, help="Point Number [default: 40000]") 190 | parser.add_argument("--num_proposals", type=int, default=256, help="Proposal number [default: 256]") 191 | parser.add_argument("--seed", type=int, default=42, help="random seed") 192 | parser.add_argument("--no_height", action="store_true", help="Do NOT use height signal in input.") 193 | parser.add_argument("--no_lang_cls", action="store_true", help="Do NOT use language classifier.") 194 | parser.add_argument("--no_nms", action="store_true", help="do NOT use non-maximum suppression for post-processing.") 195 | parser.add_argument("--use_color", action="store_true", help="Use RGB color in input.") 196 | parser.add_argument("--use_normal", action="store_true", help="Use RGB color in input.") 197 | parser.add_argument("--use_multiview", action="store_true", help="Use multiview images.") 198 | parser.add_argument("--use_bidir", action="store_true", help="Use bi-directional GRU.") 199 | args = parser.parse_args() 200 | 201 | # setting 202 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 203 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 204 | 205 | # reproducibility 206 | torch.manual_seed(args.seed) 207 | torch.backends.cudnn.deterministic = True 208 | torch.backends.cudnn.benchmark = False 209 | np.random.seed(args.seed) 210 | 211 | predict(args) 212 | -------------------------------------------------------------------------------- /data/scannet/README.md: -------------------------------------------------------------------------------- 1 | # ScanNet Instructions 2 | 3 | To acquire the access to ScanNet dataset, Please refer to the [ScanNet project page](https://github.com/ScanNet/ScanNet) and follow the instructions there. You will get a `download-scannet.py` script after your request for the ScanNet dataset is approved. Note that only a subset of ScanNet is needed. Once you get `download-scannet.py`, please use the commands below to download the portion of ScanNet that is necessary for ScanRefer: 4 | 5 | ```shell 6 | python2 download-scannet.py -o data/scannet --type _vh_clean_2.ply 7 | python2 download-scannet.py -o data/scannet --type .aggregation.json 8 | python2 download-scannet.py -o data/scannet --type _vh_clean_2.0.010000.segs.json 9 | python2 download-scannet.py -o data/scannet --type .txt 10 | ``` 11 | Roughly 10.6GB free space is needed on your disk. 12 | -------------------------------------------------------------------------------- /data/scannet/batch_load_scannet_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/batch_load_scannet_data.py 3 | 4 | Batch mode in loading Scannet scenes with vertices and ground truth labels for semantic and instance segmentations 5 | 6 | Usage example: python ./batch_load_scannet_data.py 7 | """ 8 | 9 | import os 10 | import datetime 11 | import numpy as np 12 | from load_scannet_data import export 13 | from multiprocessing import Pool 14 | 15 | 16 | SCANNET_DIR = 'scans' 17 | SCAN_NAMES = sorted([line.rstrip() for line in open('meta_data/scannetv2.txt')]) 18 | LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv' 19 | DONOTCARE_CLASS_IDS = np.array([]) 20 | OBJ_CLASS_IDS = np.array([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]) # exclude wall (1), floor (2), ceiling (22) 21 | MAX_NUM_POINT = 50000 22 | OUTPUT_FOLDER = './scannet_data' 23 | 24 | def export_one_scan(scan_name): 25 | output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 26 | mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply') 27 | agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json') 28 | seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json') 29 | meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans. 30 | mesh_vertices, aligned_vertices, semantic_labels, instance_labels, instance_bboxes, aligned_instance_bboxes = export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None) 31 | 32 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 33 | mesh_vertices = mesh_vertices[mask,:] 34 | aligned_vertices = aligned_vertices[mask,:] 35 | semantic_labels = semantic_labels[mask] 36 | instance_labels = instance_labels[mask] 37 | 38 | if instance_bboxes.shape[0] > 1: 39 | num_instances = len(np.unique(instance_labels)) 40 | print('Num of instances: ', num_instances) 41 | 42 | # bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS) 43 | bbox_mask = np.in1d(instance_bboxes[:,-2], OBJ_CLASS_IDS) # match the mesh2cap 44 | instance_bboxes = instance_bboxes[bbox_mask,:] 45 | aligned_instance_bboxes = aligned_instance_bboxes[bbox_mask,:] 46 | print('Num of care instances: ', instance_bboxes.shape[0]) 47 | else: 48 | print("No semantic/instance annotation for test scenes") 49 | 50 | N = mesh_vertices.shape[0] 51 | if N > MAX_NUM_POINT: 52 | choices = np.random.choice(N, MAX_NUM_POINT, replace=False) 53 | mesh_vertices = mesh_vertices[choices, :] 54 | aligned_vertices = aligned_vertices[choices, :] 55 | semantic_labels = semantic_labels[choices] 56 | instance_labels = instance_labels[choices] 57 | 58 | print("Shape of points: {}".format(mesh_vertices.shape)) 59 | 60 | np.save(output_filename_prefix+'_vert.npy', mesh_vertices) 61 | np.save(output_filename_prefix+'_aligned_vert.npy', aligned_vertices) 62 | np.save(output_filename_prefix+'_sem_label.npy', semantic_labels) 63 | np.save(output_filename_prefix+'_ins_label.npy', instance_labels) 64 | np.save(output_filename_prefix+'_bbox.npy', instance_bboxes) 65 | np.save(output_filename_prefix+'_aligned_bbox.npy', aligned_instance_bboxes) 66 | 67 | def batch_export(): 68 | 69 | if not os.path.exists(OUTPUT_FOLDER): 70 | print('Creating new data folder: {}'.format(OUTPUT_FOLDER)) 71 | os.mkdir(OUTPUT_FOLDER) 72 | 73 | with Pool() as pool: 74 | pool.map(export_one_scan, SCAN_NAMES) 75 | 76 | if __name__=='__main__': 77 | batch_export() 78 | -------------------------------------------------------------------------------- /data/scannet/load_scannet_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py 3 | 4 | Load Scannet scenes with vertices and ground truth labels for semantic and instance segmentations 5 | """ 6 | 7 | # python imports 8 | import math 9 | import os, sys, argparse 10 | import inspect 11 | import json 12 | import pdb 13 | import numpy as np 14 | import scannet_utils 15 | 16 | def read_aggregation(filename): 17 | object_id_to_segs = {} 18 | label_to_segs = {} 19 | with open(filename) as f: 20 | data = json.load(f) 21 | num_objects = len(data['segGroups']) 22 | for i in range(num_objects): 23 | object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed 24 | label = data['segGroups'][i]['label'] 25 | segs = data['segGroups'][i]['segments'] 26 | object_id_to_segs[object_id] = segs 27 | if label in label_to_segs: 28 | label_to_segs[label].extend(segs) 29 | else: 30 | label_to_segs[label] = segs 31 | return object_id_to_segs, label_to_segs 32 | 33 | 34 | def read_segmentation(filename): 35 | seg_to_verts = {} 36 | with open(filename) as f: 37 | data = json.load(f) 38 | num_verts = len(data['segIndices']) 39 | for i in range(num_verts): 40 | seg_id = data['segIndices'][i] 41 | if seg_id in seg_to_verts: 42 | seg_to_verts[seg_id].append(i) 43 | else: 44 | seg_to_verts[seg_id] = [i] 45 | return seg_to_verts, num_verts 46 | 47 | 48 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None): 49 | """ points are XYZ RGB (RGB in 0-255), 50 | semantic label as nyu40 ids, 51 | instance label as 1-#instance, 52 | box as (cx,cy,cz,dx,dy,dz,semantic_label) 53 | """ 54 | label_map = scannet_utils.read_label_mapping(label_map_file, label_from='raw_category', label_to='nyu40id') 55 | # mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file) 56 | mesh_vertices = scannet_utils.read_mesh_vertices_rgb_normal(mesh_file) 57 | 58 | # Load scene axis alignment matrix 59 | lines = open(meta_file).readlines() 60 | axis_align_matrix = None 61 | for line in lines: 62 | if 'axisAlignment' in line: 63 | axis_align_matrix = [float(x) for x in line.rstrip().strip('axisAlignment = ').split(' ')] 64 | 65 | if axis_align_matrix != None: 66 | axis_align_matrix = np.array(axis_align_matrix).reshape((4,4)) 67 | pts = np.ones((mesh_vertices.shape[0], 4)) 68 | pts[:,0:3] = mesh_vertices[:,0:3] 69 | pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 70 | aligned_vertices = np.copy(mesh_vertices) 71 | aligned_vertices[:,0:3] = pts[:,0:3] 72 | else: 73 | print("No axis alignment matrix found") 74 | aligned_vertices = mesh_vertices 75 | 76 | # Load semantic and instance labels 77 | if os.path.isfile(agg_file): 78 | object_id_to_segs, label_to_segs = read_aggregation(agg_file) 79 | seg_to_verts, num_verts = read_segmentation(seg_file) 80 | 81 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 82 | object_id_to_label_id = {} 83 | for label, segs in label_to_segs.items(): 84 | label_id = label_map[label] 85 | for seg in segs: 86 | verts = seg_to_verts[seg] 87 | label_ids[verts] = label_id 88 | instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 89 | num_instances = len(np.unique(list(object_id_to_segs.keys()))) 90 | for object_id, segs in object_id_to_segs.items(): 91 | for seg in segs: 92 | verts = seg_to_verts[seg] 93 | instance_ids[verts] = object_id 94 | if object_id not in object_id_to_label_id: 95 | object_id_to_label_id[object_id] = label_ids[verts][0] 96 | 97 | instance_bboxes = np.zeros((num_instances,8)) # also include object id 98 | aligned_instance_bboxes = np.zeros((num_instances,8)) # also include object id 99 | for obj_id in object_id_to_segs: 100 | label_id = object_id_to_label_id[obj_id] 101 | 102 | # bboxes in the original meshes 103 | obj_pc = mesh_vertices[instance_ids==obj_id, 0:3] 104 | if len(obj_pc) == 0: continue 105 | # Compute axis aligned box 106 | # An axis aligned bounding box is parameterized by 107 | # (cx,cy,cz) and (dx,dy,dz) and label id 108 | # where (cx,cy,cz) is the center point of the box, 109 | # dx is the x-axis length of the box. 110 | xmin = np.min(obj_pc[:,0]) 111 | ymin = np.min(obj_pc[:,1]) 112 | zmin = np.min(obj_pc[:,2]) 113 | xmax = np.max(obj_pc[:,0]) 114 | ymax = np.max(obj_pc[:,1]) 115 | zmax = np.max(obj_pc[:,2]) 116 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin, label_id, obj_id-1]) # also include object id 117 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES 118 | instance_bboxes[obj_id-1,:] = bbox 119 | 120 | # bboxes in the aligned meshes 121 | obj_pc = aligned_vertices[instance_ids==obj_id, 0:3] 122 | if len(obj_pc) == 0: continue 123 | # Compute axis aligned box 124 | # An axis aligned bounding box is parameterized by 125 | # (cx,cy,cz) and (dx,dy,dz) and label id 126 | # where (cx,cy,cz) is the center point of the box, 127 | # dx is the x-axis length of the box. 128 | xmin = np.min(obj_pc[:,0]) 129 | ymin = np.min(obj_pc[:,1]) 130 | zmin = np.min(obj_pc[:,2]) 131 | xmax = np.max(obj_pc[:,0]) 132 | ymax = np.max(obj_pc[:,1]) 133 | zmax = np.max(obj_pc[:,2]) 134 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin, label_id, obj_id-1]) # also include object id 135 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES 136 | aligned_instance_bboxes[obj_id-1,:] = bbox 137 | else: 138 | # use zero as placeholders for the test scene 139 | print("use placeholders") 140 | num_verts = mesh_vertices.shape[0] 141 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 142 | instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 143 | instance_bboxes = np.zeros((1, 8)) # also include object id 144 | aligned_instance_bboxes = np.zeros((1, 8)) # also include object id 145 | 146 | if output_file is not None: 147 | np.save(output_file+'_vert.npy', mesh_vertices) 148 | np.save(output_file+'_aligned_vert.npy', aligned_vertices) 149 | np.save(output_file+'_sem_label.npy', label_ids) 150 | np.save(output_file+'_ins_label.npy', instance_ids) 151 | np.save(output_file+'_bbox.npy', instance_bboxes) 152 | np.save(output_file+'_aligned_bbox.npy', instance_bboxes) 153 | 154 | return mesh_vertices, aligned_vertices, label_ids, instance_ids, instance_bboxes, aligned_instance_bboxes 155 | 156 | def main(): 157 | parser = argparse.ArgumentParser() 158 | parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00') 159 | parser.add_argument('--output_file', required=True, help='output file') 160 | parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv') 161 | opt = parser.parse_args() 162 | 163 | scan_name = os.path.split(opt.scan_path)[-1] 164 | mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply') 165 | agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json') 166 | seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json') 167 | meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans. 168 | export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file) 169 | 170 | if __name__ == '__main__': 171 | main() 172 | -------------------------------------------------------------------------------- /data/scannet/meta_data/nyu40_labels.csv: -------------------------------------------------------------------------------- 1 | nyu40id,nyu40class,mappedId,mappedIdConsecutive,weight 2 | 1,wall,(ignore),19,0.0 3 | 2,floor,(ignore),19,0.0 4 | 3,cabinet,3,1,3.9644974086960434 5 | 4,bed,4,2,5.459494152836571 6 | 5,chair,5,3,2.241522691584157 7 | 6,sofa,6,4,4.820655512680854 8 | 7,table,7,5,3.565918577548873 9 | 8,door,8,6,3.538498341919445 10 | 9,window,9,7,4.636521236560596 11 | 10,bookshelf,10,8,5.445050937449535 12 | 11,picture,11,9,5.079250281008131 13 | 12,counter,12,10,6.2030429647735845 14 | 13,blinds,(ignore),19,0.0 15 | 14,desk,14,11,4.622662494840168 16 | 15,shelves,(ignore),19,0.0 17 | 16,curtain,16,12,5.956294301248057 18 | 17,dresser,(ignore),19,0.0 19 | 18,pillow,(ignore),19,0.0 20 | 19,mirror,(ignore),19,0.0 21 | 20,floor_mat,(ignore),19,0.0 22 | 21,clothes,(ignore),19,0.0 23 | 22,ceiling,(ignore),19,0.0 24 | 23,books,(ignore),19,0.0 25 | 24,refridgerator,24,13,5.459141107819665 26 | 25,television,(ignore),19,0.0 27 | 26,paper,(ignore),19,0.0 28 | 27,towel,(ignore),19,0.0 29 | 28,shower_curtain,28,14,6.724871661883906 30 | 29,box,(ignore),19,0.0 31 | 30,whiteboard,(ignore),19,0.0 32 | 31,person,(ignore),19,0.0 33 | 32,night_stand,(ignore),19,0.0 34 | 33,toilet,33,15,5.832442848923174 35 | 34,sink,34,16,5.064773947290611 36 | 35,lamp,(ignore),19,0.0 37 | 36,bathtub,36,17,6.738988357113375 38 | 37,bag,(ignore),19,0.0 39 | 38,otherstructure,(ignore),19,0.0 40 | 39,otherfurniture,39,18,3.375217918833916 41 | 40,otherprop,(ignore),19,0.0 -------------------------------------------------------------------------------- /data/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/data/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /data/scannet/meta_data/scannet_reference_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/data/scannet/meta_data/scannet_reference_means.npz -------------------------------------------------------------------------------- /data/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /data/scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0011_00 2 | scene0011_01 3 | scene0015_00 4 | scene0019_00 5 | scene0019_01 6 | scene0025_00 7 | scene0025_01 8 | scene0025_02 9 | scene0030_00 10 | scene0030_01 11 | scene0030_02 12 | scene0046_00 13 | scene0046_01 14 | scene0046_02 15 | scene0050_00 16 | scene0050_01 17 | scene0050_02 18 | scene0063_00 19 | scene0064_00 20 | scene0064_01 21 | scene0077_00 22 | scene0077_01 23 | scene0081_00 24 | scene0081_01 25 | scene0081_02 26 | scene0084_00 27 | scene0084_01 28 | scene0084_02 29 | scene0086_00 30 | scene0086_01 31 | scene0086_02 32 | scene0088_00 33 | scene0088_01 34 | scene0088_02 35 | scene0088_03 36 | scene0095_00 37 | scene0095_01 38 | scene0100_00 39 | scene0100_01 40 | scene0100_02 41 | scene0131_00 42 | scene0131_01 43 | scene0131_02 44 | scene0139_00 45 | scene0144_00 46 | scene0144_01 47 | scene0146_00 48 | scene0146_01 49 | scene0146_02 50 | scene0149_00 51 | scene0153_00 52 | scene0153_01 53 | scene0164_00 54 | scene0164_01 55 | scene0164_02 56 | scene0164_03 57 | scene0169_00 58 | scene0169_01 59 | scene0187_00 60 | scene0187_01 61 | scene0193_00 62 | scene0193_01 63 | scene0196_00 64 | scene0203_00 65 | scene0203_01 66 | scene0203_02 67 | scene0207_00 68 | scene0207_01 69 | scene0207_02 70 | scene0208_00 71 | scene0217_00 72 | scene0221_00 73 | scene0221_01 74 | scene0222_00 75 | scene0222_01 76 | scene0231_00 77 | scene0231_01 78 | scene0231_02 79 | scene0246_00 80 | scene0249_00 81 | scene0251_00 82 | scene0256_00 83 | scene0256_01 84 | scene0256_02 85 | scene0257_00 86 | scene0277_00 87 | scene0277_01 88 | scene0277_02 89 | scene0278_00 90 | scene0278_01 91 | scene0300_00 92 | scene0300_01 93 | scene0304_00 94 | scene0307_00 95 | scene0307_01 96 | scene0307_02 97 | scene0314_00 98 | scene0316_00 99 | scene0328_00 100 | scene0329_00 101 | scene0329_01 102 | scene0329_02 103 | scene0334_00 104 | scene0334_01 105 | scene0334_02 106 | scene0338_00 107 | scene0338_01 108 | scene0338_02 109 | scene0342_00 110 | scene0343_00 111 | scene0351_00 112 | scene0351_01 113 | scene0353_00 114 | scene0353_01 115 | scene0353_02 116 | scene0354_00 117 | scene0355_00 118 | scene0355_01 119 | scene0356_00 120 | scene0356_01 121 | scene0356_02 122 | scene0357_00 123 | scene0357_01 124 | scene0377_00 125 | scene0377_01 126 | scene0377_02 127 | scene0378_00 128 | scene0378_01 129 | scene0378_02 130 | scene0382_00 131 | scene0382_01 132 | scene0389_00 133 | scene0406_00 134 | scene0406_01 135 | scene0406_02 136 | scene0412_00 137 | scene0412_01 138 | scene0414_00 139 | scene0423_00 140 | scene0423_01 141 | scene0423_02 142 | scene0426_00 143 | scene0426_01 144 | scene0426_02 145 | scene0426_03 146 | scene0427_00 147 | scene0430_00 148 | scene0430_01 149 | scene0432_00 150 | scene0432_01 151 | scene0435_00 152 | scene0435_01 153 | scene0435_02 154 | scene0435_03 155 | scene0441_00 156 | scene0458_00 157 | scene0458_01 158 | scene0461_00 159 | scene0462_00 160 | scene0474_00 161 | scene0474_01 162 | scene0474_02 163 | scene0474_03 164 | scene0474_04 165 | scene0474_05 166 | scene0488_00 167 | scene0488_01 168 | scene0490_00 169 | scene0494_00 170 | scene0496_00 171 | scene0500_00 172 | scene0500_01 173 | scene0518_00 174 | scene0527_00 175 | scene0535_00 176 | scene0549_00 177 | scene0549_01 178 | scene0550_00 179 | scene0552_00 180 | scene0552_01 181 | scene0553_00 182 | scene0553_01 183 | scene0553_02 184 | scene0558_00 185 | scene0558_01 186 | scene0558_02 187 | scene0559_00 188 | scene0559_01 189 | scene0559_02 190 | scene0565_00 191 | scene0568_00 192 | scene0568_01 193 | scene0568_02 194 | scene0574_00 195 | scene0574_01 196 | scene0574_02 197 | scene0575_00 198 | scene0575_01 199 | scene0575_02 200 | scene0578_00 201 | scene0578_01 202 | scene0578_02 203 | scene0580_00 204 | scene0580_01 205 | scene0583_00 206 | scene0583_01 207 | scene0583_02 208 | scene0591_00 209 | scene0591_01 210 | scene0591_02 211 | scene0593_00 212 | scene0593_01 213 | scene0595_00 214 | scene0598_00 215 | scene0598_01 216 | scene0598_02 217 | scene0599_00 218 | scene0599_01 219 | scene0599_02 220 | scene0606_00 221 | scene0606_01 222 | scene0606_02 223 | scene0607_00 224 | scene0607_01 225 | scene0608_00 226 | scene0608_01 227 | scene0608_02 228 | scene0609_00 229 | scene0609_01 230 | scene0609_02 231 | scene0609_03 232 | scene0616_00 233 | scene0616_01 234 | scene0618_00 235 | scene0621_00 236 | scene0629_00 237 | scene0629_01 238 | scene0629_02 239 | scene0633_00 240 | scene0633_01 241 | scene0643_00 242 | scene0644_00 243 | scene0645_00 244 | scene0645_01 245 | scene0645_02 246 | scene0647_00 247 | scene0647_01 248 | scene0648_00 249 | scene0648_01 250 | scene0651_00 251 | scene0651_01 252 | scene0651_02 253 | scene0652_00 254 | scene0653_00 255 | scene0653_01 256 | scene0655_00 257 | scene0655_01 258 | scene0655_02 259 | scene0658_00 260 | scene0660_00 261 | scene0663_00 262 | scene0663_01 263 | scene0663_02 264 | scene0664_00 265 | scene0664_01 266 | scene0664_02 267 | scene0665_00 268 | scene0665_01 269 | scene0670_00 270 | scene0670_01 271 | scene0671_00 272 | scene0671_01 273 | scene0678_00 274 | scene0678_01 275 | scene0678_02 276 | scene0684_00 277 | scene0684_01 278 | scene0685_00 279 | scene0685_01 280 | scene0685_02 281 | scene0686_00 282 | scene0686_01 283 | scene0686_02 284 | scene0689_00 285 | scene0690_00 286 | scene0690_01 287 | scene0693_00 288 | scene0693_01 289 | scene0693_02 290 | scene0695_00 291 | scene0695_01 292 | scene0695_02 293 | scene0695_03 294 | scene0696_00 295 | scene0696_01 296 | scene0696_02 297 | scene0697_00 298 | scene0697_01 299 | scene0697_02 300 | scene0697_03 301 | scene0699_00 302 | scene0700_00 303 | scene0700_01 304 | scene0700_02 305 | scene0701_00 306 | scene0701_01 307 | scene0701_02 308 | scene0702_00 309 | scene0702_01 310 | scene0702_02 311 | scene0704_00 312 | scene0704_01 313 | -------------------------------------------------------------------------------- /data/scannet/model_util_scannet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/model_util_scannet.py 3 | """ 4 | 5 | import numpy as np 6 | import sys 7 | import os 8 | 9 | sys.path.append(os.path.join(os.getcwd(), os.pardir, "lib")) # HACK add the lib folder 10 | from lib.config import CONF 11 | from utils.box_util import get_3d_box 12 | 13 | def in_hull(p, hull): 14 | from scipy.spatial import Delaunay 15 | if not isinstance(hull,Delaunay): 16 | hull = Delaunay(hull) 17 | return hull.find_simplex(p)>=0 18 | 19 | def extract_pc_in_box3d(pc, box3d): 20 | ''' pc: (N,3), box3d: (8,3) ''' 21 | box3d_roi_inds = in_hull(pc[:,0:3], box3d) 22 | return pc[box3d_roi_inds,:], box3d_roi_inds 23 | 24 | def rotate_aligned_boxes(input_boxes, rot_mat): 25 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 26 | new_centers = np.dot(centers, np.transpose(rot_mat)) 27 | 28 | dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0 29 | new_x = np.zeros((dx.shape[0], 4)) 30 | new_y = np.zeros((dx.shape[0], 4)) 31 | 32 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 33 | crnrs = np.zeros((dx.shape[0], 3)) 34 | crnrs[:,0] = crnr[0]*dx 35 | crnrs[:,1] = crnr[1]*dy 36 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 37 | new_x[:,i] = crnrs[:,0] 38 | new_y[:,i] = crnrs[:,1] 39 | 40 | 41 | new_dx = 2.0*np.max(new_x, 1) 42 | new_dy = 2.0*np.max(new_y, 1) 43 | new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1) 44 | 45 | return np.concatenate([new_centers, new_lengths], axis=1) 46 | 47 | def rotate_aligned_boxes_along_axis(input_boxes, rot_mat, axis): 48 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 49 | new_centers = np.dot(centers, np.transpose(rot_mat)) 50 | 51 | if axis == "x": 52 | d1, d2 = lengths[:,1]/2.0, lengths[:,2]/2.0 53 | elif axis == "y": 54 | d1, d2 = lengths[:,0]/2.0, lengths[:,2]/2.0 55 | else: 56 | d1, d2 = lengths[:,0]/2.0, lengths[:,1]/2.0 57 | 58 | new_1 = np.zeros((d1.shape[0], 4)) 59 | new_2 = np.zeros((d1.shape[0], 4)) 60 | 61 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 62 | crnrs = np.zeros((d1.shape[0], 3)) 63 | crnrs[:,0] = crnr[0]*d1 64 | crnrs[:,1] = crnr[1]*d2 65 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 66 | new_1[:,i] = crnrs[:,0] 67 | new_2[:,i] = crnrs[:,1] 68 | 69 | new_d1 = 2.0*np.max(new_1, 1) 70 | new_d2 = 2.0*np.max(new_2, 1) 71 | 72 | if axis == "x": 73 | new_lengths = np.stack((lengths[:,0], new_d1, new_d2), axis=1) 74 | elif axis == "y": 75 | new_lengths = np.stack((new_d1, lengths[:,1], new_d2), axis=1) 76 | else: 77 | new_lengths = np.stack((new_d1, new_d2, lengths[:,2]), axis=1) 78 | 79 | return np.concatenate([new_centers, new_lengths], axis=1) 80 | 81 | class ScannetDatasetConfig(object): 82 | def __init__(self): 83 | self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5, 84 | 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11, 85 | 'refrigerator':12, 'shower curtain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'others':17} 86 | self.class2type = {self.type2class[t]:t for t in self.type2class} 87 | 88 | self.nyu40ids = np.array([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]) # exclude wall (1), floor (2), ceiling (22) 89 | self.nyu40id2class = self._get_nyu40id2class() 90 | self.mean_size_arr = np.load(os.path.join(CONF.PATH.SCANNET, 'meta_data/scannet_reference_means.npz'))['arr_0'] 91 | 92 | self.num_class = len(self.type2class.keys()) 93 | self.num_heading_bin = 1 94 | self.num_size_cluster = len(self.type2class.keys()) 95 | 96 | self.type_mean_size = {} 97 | for i in range(self.num_size_cluster): 98 | self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:] 99 | 100 | def _get_nyu40id2class(self): 101 | lines = [line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET, 'meta_data/scannetv2-labels.combined.tsv'))] 102 | lines = lines[1:] 103 | nyu40ids2class = {} 104 | for i in range(len(lines)): 105 | label_classes_set = set(self.type2class.keys()) 106 | elements = lines[i].split('\t') 107 | nyu40_id = int(elements[4]) 108 | nyu40_name = elements[7] 109 | if nyu40_id in self.nyu40ids: 110 | if nyu40_name not in label_classes_set: 111 | nyu40ids2class[nyu40_id] = self.type2class["others"] 112 | else: 113 | nyu40ids2class[nyu40_id] = self.type2class[nyu40_name] 114 | 115 | return nyu40ids2class 116 | 117 | def angle2class(self, angle): 118 | ''' Convert continuous angle to discrete class 119 | [optinal] also small regression number from 120 | class center angle to current angle. 121 | 122 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 123 | return is class of int32 of 0,1,...,N-1 and a number such that 124 | class*(2pi/N) + number = angle 125 | 126 | NOT USED. 127 | ''' 128 | assert(False) 129 | 130 | def class2angle(self, pred_cls, residual, to_label_format=True): 131 | ''' Inverse function to angle2class. 132 | 133 | As ScanNet only has axis-alined boxes so angles are always 0. ''' 134 | return 0 135 | 136 | def class2angle_batch(self, pred_cls, residual, to_label_format=True): 137 | ''' Inverse function to angle2class. 138 | 139 | As ScanNet only has axis-alined boxes so angles are always 0. ''' 140 | return np.zeros(pred_cls.shape[0]) 141 | 142 | def size2class(self, size, type_name): 143 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 144 | size_class = self.type2class[type_name] 145 | size_residual = size - self.type_mean_size[type_name] 146 | return size_class, size_residual 147 | 148 | def class2size(self, pred_cls, residual): 149 | ''' Inverse function to size2class ''' 150 | return self.mean_size_arr[pred_cls] + residual 151 | 152 | def class2size_batch(self, pred_cls, residual): 153 | ''' Inverse function to size2class ''' 154 | return self.mean_size_arr[pred_cls] + residual 155 | 156 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 157 | heading_angle = self.class2angle(heading_class, heading_residual) 158 | box_size = self.class2size(int(size_class), size_residual) 159 | obb = np.zeros((7,)) 160 | obb[0:3] = center 161 | obb[3:6] = box_size 162 | obb[6] = heading_angle*-1 163 | return obb 164 | 165 | def param2obb_batch(self, center, heading_class, heading_residual, size_class, size_residual): 166 | heading_angle = self.class2angle_batch(heading_class, heading_residual) 167 | box_size = self.class2size_batch(size_class, size_residual) 168 | obb = np.zeros((heading_class.shape[0], 7)) 169 | obb[:, 0:3] = center 170 | obb[:, 3:6] = box_size 171 | obb[:, 6] = heading_angle*-1 172 | return obb 173 | -------------------------------------------------------------------------------- /data/scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py 3 | """ 4 | 5 | import os 6 | import sys 7 | import json 8 | import csv 9 | import numpy as np 10 | 11 | try: 12 | from plyfile import PlyData, PlyElement 13 | except: 14 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 15 | print("pip install plyfile") 16 | sys.exit(-1) 17 | 18 | def normalize_v3(arr): 19 | ''' Normalize a numpy array of 3 component vectors shape=(n,3) ''' 20 | lens = np.sqrt( arr[:,0]**2 + arr[:,1]**2 + arr[:,2]**2 ) 21 | arr[:,0] /= (lens + 1e-8) 22 | arr[:,1] /= (lens + 1e-8) 23 | arr[:,2] /= (lens + 1e-8) 24 | return arr 25 | 26 | def compute_normal(vertices, faces): 27 | #Create a zeroed array with the same type and shape as our vertices i.e., per vertex normal 28 | normals = np.zeros( vertices.shape, dtype=vertices.dtype ) 29 | #Create an indexed view into the vertex array using the array of three indices for triangles 30 | tris = vertices[faces] 31 | #Calculate the normal for all the triangles, by taking the cross product of the vectors v1-v0, and v2-v0 in each triangle 32 | n = np.cross( tris[::,1 ] - tris[::,0] , tris[::,2 ] - tris[::,0] ) 33 | # n is now an array of normals per triangle. The length of each normal is dependent the vertices, 34 | # we need to normalize these, so that our next step weights each normal equally. 35 | normalize_v3(n) 36 | # now we have a normalized array of normals, one per triangle, i.e., per triangle normals. 37 | # But instead of one per triangle (i.e., flat shading), we add to each vertex in that triangle, 38 | # the triangles' normal. Multiple triangles would then contribute to every vertex, so we need to normalize again afterwards. 39 | # The cool part, we can actually add the normals through an indexed view of our (zeroed) per vertex normal array 40 | normals[ faces[:,0] ] += n 41 | normals[ faces[:,1] ] += n 42 | normals[ faces[:,2] ] += n 43 | normalize_v3(normals) 44 | 45 | return normals 46 | 47 | def represents_int(s): 48 | ''' if string s represents an int. ''' 49 | try: 50 | int(s) 51 | return True 52 | except ValueError: 53 | return False 54 | 55 | 56 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 57 | assert os.path.isfile(filename) 58 | mapping = dict() 59 | with open(filename) as csvfile: 60 | reader = csv.DictReader(csvfile, delimiter='\t') 61 | for row in reader: 62 | mapping[row[label_from]] = int(row[label_to]) 63 | if represents_int(list(mapping.keys())[0]): 64 | mapping = {int(k):v for k,v in mapping.items()} 65 | return mapping 66 | 67 | def read_mesh_vertices(filename): 68 | """ read XYZ for each vertex. 69 | """ 70 | assert os.path.isfile(filename) 71 | with open(filename, 'rb') as f: 72 | plydata = PlyData.read(f) 73 | num_verts = plydata['vertex'].count 74 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 75 | vertices[:,0] = plydata['vertex'].data['x'] 76 | vertices[:,1] = plydata['vertex'].data['y'] 77 | vertices[:,2] = plydata['vertex'].data['z'] 78 | return vertices 79 | 80 | def read_mesh_vertices_rgb(filename): 81 | """ read XYZ RGB for each vertex. 82 | Note: RGB values are in 0-255 83 | """ 84 | assert os.path.isfile(filename) 85 | with open(filename, 'rb') as f: 86 | plydata = PlyData.read(f) 87 | num_verts = plydata['vertex'].count 88 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 89 | vertices[:,0] = plydata['vertex'].data['x'] 90 | vertices[:,1] = plydata['vertex'].data['y'] 91 | vertices[:,2] = plydata['vertex'].data['z'] 92 | vertices[:,3] = plydata['vertex'].data['red'] 93 | vertices[:,4] = plydata['vertex'].data['green'] 94 | vertices[:,5] = plydata['vertex'].data['blue'] 95 | return vertices 96 | 97 | def read_mesh_vertices_rgb_normal(filename): 98 | """ read XYZ RGB normals point cloud from filename PLY file """ 99 | assert(os.path.isfile(filename)) 100 | with open(filename, 'rb') as f: 101 | plydata = PlyData.read(f) 102 | num_verts = plydata['vertex'].count 103 | vertices = np.zeros(shape=[num_verts, 9], dtype=np.float32) 104 | vertices[:,0] = plydata['vertex'].data['x'] 105 | vertices[:,1] = plydata['vertex'].data['y'] 106 | vertices[:,2] = plydata['vertex'].data['z'] 107 | vertices[:,3] = plydata['vertex'].data['red'] 108 | vertices[:,4] = plydata['vertex'].data['green'] 109 | vertices[:,5] = plydata['vertex'].data['blue'] 110 | 111 | # compute normals 112 | xyz = np.array([[x, y, z] for x, y, z, _, _, _, _ in plydata["vertex"].data]) 113 | face = np.array([f[0] for f in plydata["face"].data]) 114 | nxnynz = compute_normal(xyz, face) 115 | vertices[:,6:] = nxnynz 116 | return vertices 117 | -------------------------------------------------------------------------------- /data/scannet/visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | 6 | if __name__ == "__main__": 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--scene_id", type=str, help="scene id of scene to be visualized", default="scene0000_00") 9 | args = parser.parse_args() 10 | 11 | verts = np.load("scannet_data/{}_vert.npy".format(args.scene_id)) 12 | aligned_verts = np.load("scannet_data/{}_aligned_vert.npy".format(args.scene_id)) 13 | 14 | 15 | with open("scannet_data/{}_verts.obj".format(args.scene_id), "w") as f: 16 | for i in range(verts.shape[0]): 17 | f.write("v {} {} {} {} {} {}\n".format( 18 | verts[i, 0], 19 | verts[i, 1], 20 | verts[i, 2], 21 | verts[i, 3], 22 | verts[i, 4], 23 | verts[i, 5] 24 | )) 25 | 26 | with open("scannet_data/{}_aligned_verts.obj".format(args.scene_id), "w") as f: 27 | for i in range(aligned_verts.shape[0]): 28 | f.write("v {} {} {} {} {} {}\n".format( 29 | aligned_verts[i, 0], 30 | aligned_verts[i, 1], 31 | aligned_verts[i, 2], 32 | aligned_verts[i, 3], 33 | aligned_verts[i, 4], 34 | aligned_verts[i, 5] 35 | )) 36 | -------------------------------------------------------------------------------- /demo/ScanRefer.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/demo/ScanRefer.gif -------------------------------------------------------------------------------- /docs/browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/browser.png -------------------------------------------------------------------------------- /docs/davezchen_eccv2020_scanrefer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/davezchen_eccv2020_scanrefer.pdf -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ScanRefer: 3D Object Localization in RGB-DScans using Natural Language 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 |
18 | 19 |
20 |
21 | 22 |

ScanRefer: 3D Object Localization in RGB-D Scans using Natural Language

23 |
24 | 25 |

European Conference on Computer Vision (ECCV), 2020.

26 | 27 |

28 | Dave Zhenyu Chen1 29 |      30 | Angel X. Chang2 31 |      32 | Matthias Nießner1 33 |      34 |

35 | 36 |

37 | 1Technical University of Munich 38 |       39 | 2Simon Fraser University 40 |

41 | 42 |
43 |

Submit to our ScanRefer Localization Benchmark here!

44 | 45 |

46 | 47 |

Introduction

48 |

49 | We introduce the task of 3D object localization in RGB-D scans using natural language descriptions. 50 | As input, we assume a point cloud of a scanned 3D scene along with a free-form description of a specified target object. 51 | To address this task, we propose ScanRefer, learning a fused descriptor from 3D object proposals and encoded sentence embeddings. 52 | This fused descriptor correlates language expressions with geometric features, enabling regression of the 3D bounding box of a target object. 53 | We also introduce the ScanRefer dataset, containing 51,583 descriptions of 11,046 objects from 800 ScanNet scenes. 54 | ScanRefer is the first large-scale effort to perform object localization via natural language expression directly in 3D. 55 |

56 | 57 |

Video

58 |

59 | 60 |

61 | 62 |

Browse

63 |

64 | The ScanRefer data can be browsed online in your web browser. Learn more at the ScanRefer Data Browser. 65 |
(For a better browsing experience, we recommend using Google Chrome.) 66 |

67 | 68 |

69 | 70 |

Publication

71 | European Conference on Computer Vision (ECCV), 2020.
72 | Paper | arXiv | Code 73 |
74 | 75 |

76 | 77 | If you find our project useful, please consider citing us: 78 |
 79 | 
 80 | @article{chen2020scanrefer,
 81 |     title={ScanRefer: 3D Object Localization in RGB-D Scans using Natural Language},
 82 |     author={Chen, Dave Zhenyu and Chang, Angel X and Nie{\ss}ner, Matthias},
 83 |     journal={16th European Conference on Computer Vision (ECCV)},
 84 |     year={2020}
 85 | }
 86 | 
 87 | 
88 | 89 |

Dataset Download

90 | 91 | If you would like to access to the ScanRefer dataset, please fill out the ScanRefer Terms of Use Form. Once your request is accepted, you will receive an email with the download link. 92 | 93 |
94 | 95 | 96 |
97 | 98 |
99 |
100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /docs/paper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/paper.jpg -------------------------------------------------------------------------------- /docs/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/teaser.png -------------------------------------------------------------------------------- /lib/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from easydict import EasyDict 4 | 5 | CONF = EasyDict() 6 | 7 | # path 8 | CONF.PATH = EasyDict() 9 | CONF.PATH.BASE = "/home/davech2y/ScanRefer/" # TODO: change this 10 | CONF.PATH.DATA = os.path.join(CONF.PATH.BASE, "data") 11 | CONF.PATH.SCANNET = os.path.join(CONF.PATH.DATA, "scannet") 12 | CONF.PATH.LIB = os.path.join(CONF.PATH.BASE, "lib") 13 | CONF.PATH.MODELS = os.path.join(CONF.PATH.BASE, "models") 14 | CONF.PATH.UTILS = os.path.join(CONF.PATH.BASE, "utils") 15 | 16 | # append to syspath 17 | for _, path in CONF.PATH.items(): 18 | sys.path.append(path) 19 | 20 | # scannet data 21 | CONF.PATH.SCANNET_SCANS = os.path.join(CONF.PATH.SCANNET, "scans") 22 | CONF.PATH.SCANNET_META = os.path.join(CONF.PATH.SCANNET, "meta_data") 23 | CONF.PATH.SCANNET_DATA = os.path.join(CONF.PATH.SCANNET, "scannet_data") 24 | 25 | # data 26 | CONF.SCANNET_DIR = "/mnt/canis/Datasets/ScanNet/public/v2/scans" # TODO change this 27 | CONF.SCANNET_FRAMES_ROOT = "/home/davech2y/frames_square/" # TODO change this 28 | CONF.PROJECTION = "/home/davech2y/multiview_projection_scanrefer" # TODO change this 29 | CONF.ENET_FEATURES_ROOT = "/home/davech2y/enet_features" # TODO change this 30 | CONF.ENET_FEATURES_SUBROOT = os.path.join(CONF.ENET_FEATURES_ROOT, "{}") # scene_id 31 | CONF.ENET_FEATURES_PATH = os.path.join(CONF.ENET_FEATURES_SUBROOT, "{}.npy") # frame_id 32 | CONF.SCANNET_FRAMES = os.path.join(CONF.SCANNET_FRAMES_ROOT, "{}/{}") # scene_id, mode 33 | CONF.SCENE_NAMES = sorted(os.listdir(CONF.SCANNET_DIR)) 34 | CONF.ENET_WEIGHTS = os.path.join(CONF.PATH.BASE, "data/scannetv2_enet.pth") 35 | # CONF.MULTIVIEW = os.path.join(CONF.PATH.SCANNET_DATA, "enet_feats.hdf5") 36 | CONF.MULTIVIEW = os.path.join(CONF.PATH.SCANNET_DATA, "enet_feats_maxpool.hdf5") 37 | CONF.NYU40_LABELS = os.path.join(CONF.PATH.SCANNET_META, "nyu40_labels.csv") 38 | 39 | # scannet 40 | CONF.SCANNETV2_TRAIN = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_train.txt") 41 | CONF.SCANNETV2_VAL = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_val.txt") 42 | CONF.SCANNETV2_TEST = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_test.txt") 43 | CONF.SCANNETV2_LIST = os.path.join(CONF.PATH.SCANNET_META, "scannetv2.txt") 44 | 45 | # output 46 | CONF.PATH.OUTPUT = os.path.join(CONF.PATH.BASE, "outputs") 47 | 48 | # train 49 | CONF.TRAIN = EasyDict() 50 | CONF.TRAIN.MAX_DES_LEN = 126 51 | CONF.TRAIN.SEED = 42 52 | -------------------------------------------------------------------------------- /lib/eval_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | 12 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder 13 | from utils.nn_distance import nn_distance, huber_loss 14 | from lib.ap_helper import parse_predictions 15 | from lib.loss import SoftmaxRankingLoss 16 | from utils.box_util import get_3d_box, get_3d_box_batch, box3d_iou 17 | 18 | def eval_ref_one_sample(pred_bbox, gt_bbox): 19 | """ Evaluate one reference prediction 20 | 21 | Args: 22 | pred_bbox: 8 corners of prediction bounding box, (8, 3) 23 | gt_bbox: 8 corners of ground truth bounding box, (8, 3) 24 | Returns: 25 | iou: intersection over union score 26 | """ 27 | 28 | iou = box3d_iou(pred_bbox, gt_bbox) 29 | 30 | return iou 31 | 32 | def construct_bbox_corners(center, box_size): 33 | sx, sy, sz = box_size 34 | x_corners = [sx/2, sx/2, -sx/2, -sx/2, sx/2, sx/2, -sx/2, -sx/2] 35 | y_corners = [sy/2, -sy/2, -sy/2, sy/2, sy/2, -sy/2, -sy/2, sy/2] 36 | z_corners = [sz/2, sz/2, sz/2, sz/2, -sz/2, -sz/2, -sz/2, -sz/2] 37 | corners_3d = np.vstack([x_corners, y_corners, z_corners]) 38 | corners_3d[0,:] = corners_3d[0,:] + center[0]; 39 | corners_3d[1,:] = corners_3d[1,:] + center[1]; 40 | corners_3d[2,:] = corners_3d[2,:] + center[2]; 41 | corners_3d = np.transpose(corners_3d) 42 | 43 | return corners_3d 44 | 45 | def get_eval(data_dict, config, reference, use_lang_classifier=False, use_oracle=False, use_cat_rand=False, use_best=False, post_processing=None): 46 | """ Loss functions 47 | 48 | Args: 49 | data_dict: dict 50 | config: dataset config instance 51 | reference: flag (False/True) 52 | post_processing: config dict 53 | Returns: 54 | loss: pytorch scalar tensor 55 | data_dict: dict 56 | """ 57 | 58 | batch_size, num_words, _ = data_dict["lang_feat"].shape 59 | 60 | 61 | objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long() 62 | objectness_labels_batch = data_dict['objectness_label'].long() 63 | 64 | if post_processing: 65 | _ = parse_predictions(data_dict, post_processing) 66 | nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda() 67 | 68 | # construct valid mask 69 | pred_masks = (nms_masks * objectness_preds_batch == 1).float() 70 | label_masks = (objectness_labels_batch == 1).float() 71 | else: 72 | # construct valid mask 73 | pred_masks = (objectness_preds_batch == 1).float() 74 | label_masks = (objectness_labels_batch == 1).float() 75 | 76 | cluster_preds = torch.argmax(data_dict["cluster_ref"] * pred_masks, 1).long().unsqueeze(1).repeat(1, pred_masks.shape[1]) 77 | preds = torch.zeros(pred_masks.shape).cuda() 78 | preds = preds.scatter_(1, cluster_preds, 1) 79 | cluster_preds = preds 80 | cluster_labels = data_dict["cluster_labels"].float() 81 | cluster_labels *= label_masks 82 | 83 | # compute classification scores 84 | corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1), dim=1).float() 85 | labels = torch.ones(corrects.shape[0]).cuda() 86 | ref_acc = corrects / (labels + 1e-8) 87 | 88 | # store 89 | data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist() 90 | 91 | # compute localization metrics 92 | if use_best: 93 | pred_ref = torch.argmax(data_dict["cluster_labels"], 1) # (B,) 94 | # store the calibrated predictions and masks 95 | data_dict['cluster_ref'] = data_dict["cluster_labels"] 96 | if use_cat_rand: 97 | cluster_preds = torch.zeros(cluster_labels.shape).cuda() 98 | for i in range(cluster_preds.shape[0]): 99 | num_bbox = data_dict["num_bbox"][i] 100 | sem_cls_label = data_dict["sem_cls_label"][i] 101 | # sem_cls_label = torch.argmax(end_points["sem_cls_scores"], 2)[i] 102 | sem_cls_label[num_bbox:] -= 1 103 | candidate_masks = torch.gather(sem_cls_label == data_dict["object_cat"][i], 0, data_dict["object_assignment"][i]) 104 | candidates = torch.arange(cluster_labels.shape[1])[candidate_masks] 105 | try: 106 | chosen_idx = torch.randperm(candidates.shape[0])[0] 107 | chosen_candidate = candidates[chosen_idx] 108 | cluster_preds[i, chosen_candidate] = 1 109 | except IndexError: 110 | cluster_preds[i, candidates] = 1 111 | 112 | pred_ref = torch.argmax(cluster_preds, 1) # (B,) 113 | # store the calibrated predictions and masks 114 | data_dict['cluster_ref'] = cluster_preds 115 | else: 116 | pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks, 1) # (B,) 117 | # store the calibrated predictions and masks 118 | data_dict['cluster_ref'] = data_dict['cluster_ref'] * pred_masks 119 | 120 | if use_oracle: 121 | pred_center = data_dict['center_label'] # (B,MAX_NUM_OBJ,3) 122 | pred_heading_class = data_dict['heading_class_label'] # B,K2 123 | pred_heading_residual = data_dict['heading_residual_label'] # B,K2 124 | pred_size_class = data_dict['size_class_label'] # B,K2 125 | pred_size_residual = data_dict['size_residual_label'] # B,K2,3 126 | 127 | # assign 128 | pred_center = torch.gather(pred_center, 1, data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3)) 129 | pred_heading_class = torch.gather(pred_heading_class, 1, data_dict["object_assignment"]) 130 | pred_heading_residual = torch.gather(pred_heading_residual, 1, data_dict["object_assignment"]).unsqueeze(-1) 131 | pred_size_class = torch.gather(pred_size_class, 1, data_dict["object_assignment"]) 132 | pred_size_residual = torch.gather(pred_size_residual, 1, data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3)) 133 | else: 134 | pred_center = data_dict['center'] # (B,K,3) 135 | pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal 136 | pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 137 | pred_heading_class = pred_heading_class # B,num_proposal 138 | pred_heading_residual = pred_heading_residual.squeeze(2) # B,num_proposal 139 | pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal 140 | pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 141 | pred_size_class = pred_size_class 142 | pred_size_residual = pred_size_residual.squeeze(2) # B,num_proposal,3 143 | 144 | # store 145 | data_dict["pred_mask"] = pred_masks 146 | data_dict["label_mask"] = label_masks 147 | data_dict['pred_center'] = pred_center 148 | data_dict['pred_heading_class'] = pred_heading_class 149 | data_dict['pred_heading_residual'] = pred_heading_residual 150 | data_dict['pred_size_class'] = pred_size_class 151 | data_dict['pred_size_residual'] = pred_size_residual 152 | 153 | gt_ref = torch.argmax(data_dict["ref_box_label"], 1) 154 | gt_center = data_dict['center_label'] # (B,MAX_NUM_OBJ,3) 155 | gt_heading_class = data_dict['heading_class_label'] # B,K2 156 | gt_heading_residual = data_dict['heading_residual_label'] # B,K2 157 | gt_size_class = data_dict['size_class_label'] # B,K2 158 | gt_size_residual = data_dict['size_residual_label'] # B,K2,3 159 | 160 | ious = [] 161 | multiple = [] 162 | others = [] 163 | pred_bboxes = [] 164 | gt_bboxes = [] 165 | for i in range(pred_ref.shape[0]): 166 | # compute the iou 167 | pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i] 168 | pred_obb = config.param2obb( 169 | pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(), 170 | pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(), 171 | pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(), 172 | pred_size_class[i, pred_ref_idx].detach().cpu().numpy(), 173 | pred_size_residual[i, pred_ref_idx].detach().cpu().numpy() 174 | ) 175 | gt_obb = config.param2obb( 176 | gt_center[i, gt_ref_idx, 0:3].detach().cpu().numpy(), 177 | gt_heading_class[i, gt_ref_idx].detach().cpu().numpy(), 178 | gt_heading_residual[i, gt_ref_idx].detach().cpu().numpy(), 179 | gt_size_class[i, gt_ref_idx].detach().cpu().numpy(), 180 | gt_size_residual[i, gt_ref_idx].detach().cpu().numpy() 181 | ) 182 | pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3]) 183 | gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3]) 184 | iou = eval_ref_one_sample(pred_bbox, gt_bbox) 185 | ious.append(iou) 186 | 187 | # NOTE: get_3d_box() will return problematic bboxes 188 | pred_bbox = construct_bbox_corners(pred_obb[0:3], pred_obb[3:6]) 189 | gt_bbox = construct_bbox_corners(gt_obb[0:3], gt_obb[3:6]) 190 | pred_bboxes.append(pred_bbox) 191 | gt_bboxes.append(gt_bbox) 192 | 193 | # construct the multiple mask 194 | multiple.append(data_dict["unique_multiple"][i].item()) 195 | 196 | # construct the others mask 197 | flag = 1 if data_dict["object_cat"][i] == 17 else 0 198 | others.append(flag) 199 | 200 | # lang 201 | if reference and use_lang_classifier: 202 | data_dict["lang_acc"] = (torch.argmax(data_dict['lang_scores'], 1) == data_dict["object_cat"]).float().mean() 203 | else: 204 | data_dict["lang_acc"] = torch.zeros(1)[0].cuda() 205 | 206 | # store 207 | data_dict["ref_iou"] = ious 208 | data_dict["ref_iou_rate_0.25"] = np.array(ious)[np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0] 209 | data_dict["ref_iou_rate_0.5"] = np.array(ious)[np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0] 210 | data_dict["ref_multiple_mask"] = multiple 211 | data_dict["ref_others_mask"] = others 212 | data_dict["pred_bboxes"] = pred_bboxes 213 | data_dict["gt_bboxes"] = gt_bboxes 214 | 215 | # -------------------------------------------- 216 | # Some other statistics 217 | obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2) # B,K 218 | obj_acc = torch.sum((obj_pred_val==data_dict['objectness_label'].long()).float()*data_dict['objectness_mask'])/(torch.sum(data_dict['objectness_mask'])+1e-6) 219 | data_dict['obj_acc'] = obj_acc 220 | # detection semantic classification 221 | sem_cls_label = torch.gather(data_dict['sem_cls_label'], 1, data_dict['object_assignment']) # select (B,K) from (B,K2) 222 | sem_cls_pred = data_dict['sem_cls_scores'].argmax(-1) # (B,K) 223 | sem_match = (sem_cls_label == sem_cls_pred).float() 224 | data_dict["sem_acc"] = (sem_match * data_dict["pred_mask"]).sum() / data_dict["pred_mask"].sum() 225 | 226 | return data_dict 227 | -------------------------------------------------------------------------------- /lib/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class SoftmaxRankingLoss(nn.Module): 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def forward(self, inputs, targets): 10 | # input check 11 | assert inputs.shape == targets.shape 12 | 13 | # compute the probabilities 14 | probs = F.softmax(inputs + 1e-8, dim=1) 15 | 16 | # reduction 17 | loss = -torch.sum(torch.log(probs + 1e-8) * targets, dim=1).mean() 18 | 19 | return loss -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 5 | const int nsample); 6 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #define TOTAL_THREADS 512 14 | 15 | inline int opt_n_threads(int work_size) { 16 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 17 | 18 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 19 | } 20 | 21 | inline dim3 opt_block_config(int x, int y) { 22 | const int x_threads = opt_n_threads(x); 23 | const int y_threads = 24 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 25 | dim3 block_config(x_threads, y_threads, 1); 26 | 27 | return block_config; 28 | } 29 | 30 | #define CUDA_CHECK_ERRORS() \ 31 | do { \ 32 | cudaError_t err = cudaGetLastError(); \ 33 | if (cudaSuccess != err) { \ 34 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 35 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 36 | __FILE__); \ 37 | exit(-1); \ 38 | } \ 39 | } while (0) 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 5 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 6 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 7 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 8 | at::Tensor weight); 9 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 10 | at::Tensor weight, const int m); 11 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 5 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 6 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 7 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #define CHECK_CUDA(x) \ 6 | do { \ 7 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 8 | } while (0) 9 | 10 | #define CHECK_CONTIGUOUS(x) \ 11 | do { \ 12 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_IS_INT(x) \ 16 | do { \ 17 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 18 | #x " must be an int tensor"); \ 19 | } while (0) 20 | 21 | #define CHECK_IS_FLOAT(x) \ 22 | do { \ 23 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 24 | #x " must be a float tensor"); \ 25 | } while (0) 26 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | #include "ball_query.h" 2 | #include "utils.h" 3 | 4 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 5 | int nsample, const float *new_xyz, 6 | const float *xyz, int *idx); 7 | 8 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 9 | const int nsample) { 10 | CHECK_CONTIGUOUS(new_xyz); 11 | CHECK_CONTIGUOUS(xyz); 12 | CHECK_IS_FLOAT(new_xyz); 13 | CHECK_IS_FLOAT(xyz); 14 | 15 | if (new_xyz.is_cuda()) { 16 | CHECK_CUDA(xyz); 17 | } 18 | 19 | at::Tensor idx = 20 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 21 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 22 | 23 | if (new_xyz.is_cuda()) { 24 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 25 | radius, nsample, new_xyz.data_ptr(), 26 | xyz.data_ptr(), idx.data_ptr()); 27 | } else { 28 | AT_ASSERT(false, "CPU not supported"); 29 | } 30 | 31 | return idx; 32 | } 33 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | 7 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 8 | // output: idx(b, m, nsample) 9 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 10 | int nsample, 11 | const float *__restrict__ new_xyz, 12 | const float *__restrict__ xyz, 13 | int *__restrict__ idx) { 14 | int batch_index = blockIdx.x; 15 | xyz += batch_index * n * 3; 16 | new_xyz += batch_index * m * 3; 17 | idx += m * nsample * batch_index; 18 | 19 | int index = threadIdx.x; 20 | int stride = blockDim.x; 21 | 22 | float radius2 = radius * radius; 23 | for (int j = index; j < m; j += stride) { 24 | float new_x = new_xyz[j * 3 + 0]; 25 | float new_y = new_xyz[j * 3 + 1]; 26 | float new_z = new_xyz[j * 3 + 2]; 27 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 28 | float x = xyz[k * 3 + 0]; 29 | float y = xyz[k * 3 + 1]; 30 | float z = xyz[k * 3 + 2]; 31 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 32 | (new_z - z) * (new_z - z); 33 | if (d2 < radius2) { 34 | if (cnt == 0) { 35 | for (int l = 0; l < nsample; ++l) { 36 | idx[j * nsample + l] = k; 37 | } 38 | } 39 | idx[j * nsample + cnt] = k; 40 | ++cnt; 41 | } 42 | } 43 | } 44 | } 45 | 46 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 47 | int nsample, const float *new_xyz, 48 | const float *xyz, int *idx) { 49 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 50 | query_ball_point_kernel<<>>( 51 | b, n, m, radius, nsample, new_xyz, xyz, idx); 52 | 53 | CUDA_CHECK_ERRORS(); 54 | } 55 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | #include "ball_query.h" 2 | #include "group_points.h" 3 | #include "interpolate.h" 4 | #include "sampling.h" 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 7 | m.def("gather_points", &gather_points); 8 | m.def("gather_points_grad", &gather_points_grad); 9 | m.def("furthest_point_sampling", &furthest_point_sampling); 10 | 11 | m.def("three_nn", &three_nn); 12 | m.def("three_interpolate", &three_interpolate); 13 | m.def("three_interpolate_grad", &three_interpolate_grad); 14 | 15 | m.def("ball_query", &ball_query); 16 | 17 | m.def("group_points", &group_points); 18 | m.def("group_points_grad", &group_points_grad); 19 | } 20 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | #include "group_points.h" 2 | #include "utils.h" 3 | 4 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 5 | const float *points, const int *idx, 6 | float *out); 7 | 8 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 9 | int nsample, const float *grad_out, 10 | const int *idx, float *grad_points); 11 | 12 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 13 | CHECK_CONTIGUOUS(points); 14 | CHECK_CONTIGUOUS(idx); 15 | CHECK_IS_FLOAT(points); 16 | CHECK_IS_INT(idx); 17 | 18 | if (points.is_cuda()) { 19 | CHECK_CUDA(idx); 20 | } 21 | 22 | at::Tensor output = 23 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 24 | at::device(points.device()).dtype(at::ScalarType::Float)); 25 | 26 | if (points.is_cuda()) { 27 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 28 | idx.size(1), idx.size(2), 29 | points.data_ptr(), idx.data_ptr(), 30 | output.data_ptr()); 31 | } else { 32 | AT_ASSERT(false, "CPU not supported"); 33 | } 34 | 35 | return output; 36 | } 37 | 38 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 39 | CHECK_CONTIGUOUS(grad_out); 40 | CHECK_CONTIGUOUS(idx); 41 | CHECK_IS_FLOAT(grad_out); 42 | CHECK_IS_INT(idx); 43 | 44 | if (grad_out.is_cuda()) { 45 | CHECK_CUDA(idx); 46 | } 47 | 48 | at::Tensor output = 49 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 50 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 51 | 52 | if (grad_out.is_cuda()) { 53 | group_points_grad_kernel_wrapper( 54 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 55 | grad_out.data_ptr(), idx.data_ptr(), 56 | output.data_ptr()); 57 | } else { 58 | AT_ASSERT(false, "CPU not supported"); 59 | } 60 | 61 | return output; 62 | } 63 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | 6 | // input: points(b, c, n) idx(b, npoints, nsample) 7 | // output: out(b, c, npoints, nsample) 8 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 9 | int nsample, 10 | const float *__restrict__ points, 11 | const int *__restrict__ idx, 12 | float *__restrict__ out) { 13 | int batch_index = blockIdx.x; 14 | points += batch_index * n * c; 15 | idx += batch_index * npoints * nsample; 16 | out += batch_index * npoints * nsample * c; 17 | 18 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 19 | const int stride = blockDim.y * blockDim.x; 20 | for (int i = index; i < c * npoints; i += stride) { 21 | const int l = i / npoints; 22 | const int j = i % npoints; 23 | for (int k = 0; k < nsample; ++k) { 24 | int ii = idx[j * nsample + k]; 25 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 26 | } 27 | } 28 | } 29 | 30 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 31 | const float *points, const int *idx, 32 | float *out) { 33 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 34 | 35 | group_points_kernel<<>>( 36 | b, c, n, npoints, nsample, points, idx, out); 37 | 38 | CUDA_CHECK_ERRORS(); 39 | } 40 | 41 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 42 | // output: grad_points(b, c, n) 43 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 44 | int nsample, 45 | const float *__restrict__ grad_out, 46 | const int *__restrict__ idx, 47 | float *__restrict__ grad_points) { 48 | int batch_index = blockIdx.x; 49 | grad_out += batch_index * npoints * nsample * c; 50 | idx += batch_index * npoints * nsample; 51 | grad_points += batch_index * n * c; 52 | 53 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 54 | const int stride = blockDim.y * blockDim.x; 55 | for (int i = index; i < c * npoints; i += stride) { 56 | const int l = i / npoints; 57 | const int j = i % npoints; 58 | for (int k = 0; k < nsample; ++k) { 59 | int ii = idx[j * nsample + k]; 60 | atomicAdd(grad_points + l * n + ii, 61 | grad_out[(l * npoints + j) * nsample + k]); 62 | } 63 | } 64 | } 65 | 66 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 67 | int nsample, const float *grad_out, 68 | const int *idx, float *grad_points) { 69 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 70 | 71 | group_points_grad_kernel<<>>( 72 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 73 | 74 | CUDA_CHECK_ERRORS(); 75 | } 76 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include "interpolate.h" 2 | #include "utils.h" 3 | 4 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 5 | const float *known, float *dist2, int *idx); 6 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 7 | const float *points, const int *idx, 8 | const float *weight, float *out); 9 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 10 | const float *grad_out, 11 | const int *idx, const float *weight, 12 | float *grad_points); 13 | 14 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 15 | CHECK_CONTIGUOUS(unknowns); 16 | CHECK_CONTIGUOUS(knows); 17 | CHECK_IS_FLOAT(unknowns); 18 | CHECK_IS_FLOAT(knows); 19 | 20 | if (unknowns.is_cuda()) { 21 | CHECK_CUDA(knows); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 26 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 27 | at::Tensor dist2 = 28 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 29 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (unknowns.is_cuda()) { 32 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 33 | unknowns.data_ptr(), knows.data_ptr(), 34 | dist2.data_ptr(), idx.data_ptr()); 35 | } else { 36 | AT_ASSERT(false, "CPU not supported"); 37 | } 38 | 39 | return {dist2, idx}; 40 | } 41 | 42 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 43 | at::Tensor weight) { 44 | CHECK_CONTIGUOUS(points); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_CONTIGUOUS(weight); 47 | CHECK_IS_FLOAT(points); 48 | CHECK_IS_INT(idx); 49 | CHECK_IS_FLOAT(weight); 50 | 51 | if (points.is_cuda()) { 52 | CHECK_CUDA(idx); 53 | CHECK_CUDA(weight); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 58 | at::device(points.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (points.is_cuda()) { 61 | three_interpolate_kernel_wrapper( 62 | points.size(0), points.size(1), points.size(2), idx.size(1), 63 | points.data_ptr(), idx.data_ptr(), weight.data_ptr(), 64 | output.data_ptr()); 65 | } else { 66 | AT_ASSERT(false, "CPU not supported"); 67 | } 68 | 69 | return output; 70 | } 71 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 72 | at::Tensor weight, const int m) { 73 | CHECK_CONTIGUOUS(grad_out); 74 | CHECK_CONTIGUOUS(idx); 75 | CHECK_CONTIGUOUS(weight); 76 | CHECK_IS_FLOAT(grad_out); 77 | CHECK_IS_INT(idx); 78 | CHECK_IS_FLOAT(weight); 79 | 80 | if (grad_out.is_cuda()) { 81 | CHECK_CUDA(idx); 82 | CHECK_CUDA(weight); 83 | } 84 | 85 | at::Tensor output = 86 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 87 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 88 | 89 | if (grad_out.is_cuda()) { 90 | three_interpolate_grad_kernel_wrapper( 91 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 92 | grad_out.data_ptr(), idx.data_ptr(), 93 | weight.data_ptr(), output.data_ptr()); 94 | } else { 95 | AT_ASSERT(false, "CPU not supported"); 96 | } 97 | 98 | return output; 99 | } 100 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | 7 | // input: unknown(b, n, 3) known(b, m, 3) 8 | // output: dist2(b, n, 3), idx(b, n, 3) 9 | __global__ void three_nn_kernel(int b, int n, int m, 10 | const float *__restrict__ unknown, 11 | const float *__restrict__ known, 12 | float *__restrict__ dist2, 13 | int *__restrict__ idx) { 14 | int batch_index = blockIdx.x; 15 | unknown += batch_index * n * 3; 16 | known += batch_index * m * 3; 17 | dist2 += batch_index * n * 3; 18 | idx += batch_index * n * 3; 19 | 20 | int index = threadIdx.x; 21 | int stride = blockDim.x; 22 | for (int j = index; j < n; j += stride) { 23 | float ux = unknown[j * 3 + 0]; 24 | float uy = unknown[j * 3 + 1]; 25 | float uz = unknown[j * 3 + 2]; 26 | 27 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 28 | int besti1 = 0, besti2 = 0, besti3 = 0; 29 | for (int k = 0; k < m; ++k) { 30 | float x = known[k * 3 + 0]; 31 | float y = known[k * 3 + 1]; 32 | float z = known[k * 3 + 2]; 33 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 34 | if (d < best1) { 35 | best3 = best2; 36 | besti3 = besti2; 37 | best2 = best1; 38 | besti2 = besti1; 39 | best1 = d; 40 | besti1 = k; 41 | } else if (d < best2) { 42 | best3 = best2; 43 | besti3 = besti2; 44 | best2 = d; 45 | besti2 = k; 46 | } else if (d < best3) { 47 | best3 = d; 48 | besti3 = k; 49 | } 50 | } 51 | dist2[j * 3 + 0] = best1; 52 | dist2[j * 3 + 1] = best2; 53 | dist2[j * 3 + 2] = best3; 54 | 55 | idx[j * 3 + 0] = besti1; 56 | idx[j * 3 + 1] = besti2; 57 | idx[j * 3 + 2] = besti3; 58 | } 59 | } 60 | 61 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 62 | const float *known, float *dist2, int *idx) { 63 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 64 | three_nn_kernel<<>>(b, n, m, unknown, known, 65 | dist2, idx); 66 | 67 | CUDA_CHECK_ERRORS(); 68 | } 69 | 70 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 71 | // output: out(b, c, n) 72 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 73 | const float *__restrict__ points, 74 | const int *__restrict__ idx, 75 | const float *__restrict__ weight, 76 | float *__restrict__ out) { 77 | int batch_index = blockIdx.x; 78 | points += batch_index * m * c; 79 | 80 | idx += batch_index * n * 3; 81 | weight += batch_index * n * 3; 82 | 83 | out += batch_index * n * c; 84 | 85 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 86 | const int stride = blockDim.y * blockDim.x; 87 | for (int i = index; i < c * n; i += stride) { 88 | const int l = i / n; 89 | const int j = i % n; 90 | float w1 = weight[j * 3 + 0]; 91 | float w2 = weight[j * 3 + 1]; 92 | float w3 = weight[j * 3 + 2]; 93 | 94 | int i1 = idx[j * 3 + 0]; 95 | int i2 = idx[j * 3 + 1]; 96 | int i3 = idx[j * 3 + 2]; 97 | 98 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 99 | points[l * m + i3] * w3; 100 | } 101 | } 102 | 103 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 104 | const float *points, const int *idx, 105 | const float *weight, float *out) { 106 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 107 | three_interpolate_kernel<<>>( 108 | b, c, m, n, points, idx, weight, out); 109 | 110 | CUDA_CHECK_ERRORS(); 111 | } 112 | 113 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 114 | // output: grad_points(b, c, m) 115 | 116 | __global__ void three_interpolate_grad_kernel( 117 | int b, int c, int n, int m, const float *__restrict__ grad_out, 118 | const int *__restrict__ idx, const float *__restrict__ weight, 119 | float *__restrict__ grad_points) { 120 | int batch_index = blockIdx.x; 121 | grad_out += batch_index * n * c; 122 | idx += batch_index * n * 3; 123 | weight += batch_index * n * 3; 124 | grad_points += batch_index * m * c; 125 | 126 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 127 | const int stride = blockDim.y * blockDim.x; 128 | for (int i = index; i < c * n; i += stride) { 129 | const int l = i / n; 130 | const int j = i % n; 131 | float w1 = weight[j * 3 + 0]; 132 | float w2 = weight[j * 3 + 1]; 133 | float w3 = weight[j * 3 + 2]; 134 | 135 | int i1 = idx[j * 3 + 0]; 136 | int i2 = idx[j * 3 + 1]; 137 | int i3 = idx[j * 3 + 2]; 138 | 139 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 140 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 141 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 142 | } 143 | } 144 | 145 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 146 | const float *grad_out, 147 | const int *idx, const float *weight, 148 | float *grad_points) { 149 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 150 | three_interpolate_grad_kernel<<>>( 151 | b, c, n, m, grad_out, idx, weight, grad_points); 152 | 153 | CUDA_CHECK_ERRORS(); 154 | } 155 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | #include "sampling.h" 2 | #include "utils.h" 3 | 4 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 5 | const float *points, const int *idx, 6 | float *out); 7 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 8 | const float *grad_out, const int *idx, 9 | float *grad_points); 10 | 11 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 12 | const float *dataset, float *temp, 13 | int *idxs); 14 | 15 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 16 | CHECK_CONTIGUOUS(points); 17 | CHECK_CONTIGUOUS(idx); 18 | CHECK_IS_FLOAT(points); 19 | CHECK_IS_INT(idx); 20 | 21 | if (points.is_cuda()) { 22 | CHECK_CUDA(idx); 23 | } 24 | 25 | at::Tensor output = 26 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 27 | at::device(points.device()).dtype(at::ScalarType::Float)); 28 | 29 | if (points.is_cuda()) { 30 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 31 | idx.size(1), points.data_ptr(), 32 | idx.data_ptr(), output.data_ptr()); 33 | } else { 34 | AT_ASSERT(false, "CPU not supported"); 35 | } 36 | 37 | return output; 38 | } 39 | 40 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 41 | const int n) { 42 | CHECK_CONTIGUOUS(grad_out); 43 | CHECK_CONTIGUOUS(idx); 44 | CHECK_IS_FLOAT(grad_out); 45 | CHECK_IS_INT(idx); 46 | 47 | if (grad_out.is_cuda()) { 48 | CHECK_CUDA(idx); 49 | } 50 | 51 | at::Tensor output = 52 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 53 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 54 | 55 | if (grad_out.is_cuda()) { 56 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 57 | idx.size(1), grad_out.data_ptr(), 58 | idx.data_ptr(), 59 | output.data_ptr()); 60 | } else { 61 | AT_ASSERT(false, "CPU not supported"); 62 | } 63 | 64 | return output; 65 | } 66 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 67 | CHECK_CONTIGUOUS(points); 68 | CHECK_IS_FLOAT(points); 69 | 70 | at::Tensor output = 71 | torch::zeros({points.size(0), nsamples}, 72 | at::device(points.device()).dtype(at::ScalarType::Int)); 73 | 74 | at::Tensor tmp = 75 | torch::full({points.size(0), points.size(1)}, 1e10, 76 | at::device(points.device()).dtype(at::ScalarType::Float)); 77 | 78 | if (points.is_cuda()) { 79 | furthest_point_sampling_kernel_wrapper( 80 | points.size(0), points.size(1), nsamples, points.data_ptr(), 81 | tmp.data_ptr(), output.data_ptr()); 82 | } else { 83 | AT_ASSERT(false, "CPU not supported"); 84 | } 85 | 86 | return output; 87 | } 88 | -------------------------------------------------------------------------------- /lib/pointnet2/_ext_src/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | 6 | // input: points(b, c, n) idx(b, m) 7 | // output: out(b, c, m) 8 | __global__ void gather_points_kernel(int b, int c, int n, int m, 9 | const float *__restrict__ points, 10 | const int *__restrict__ idx, 11 | float *__restrict__ out) { 12 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 13 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 14 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 15 | int a = idx[i * m + j]; 16 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 17 | } 18 | } 19 | } 20 | } 21 | 22 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 23 | const float *points, const int *idx, 24 | float *out) { 25 | gather_points_kernel<<>>(b, c, n, npoints, 27 | points, idx, out); 28 | 29 | CUDA_CHECK_ERRORS(); 30 | } 31 | 32 | // input: grad_out(b, c, m) idx(b, m) 33 | // output: grad_points(b, c, n) 34 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 35 | const float *__restrict__ grad_out, 36 | const int *__restrict__ idx, 37 | float *__restrict__ grad_points) { 38 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 39 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 40 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 41 | int a = idx[i * m + j]; 42 | atomicAdd(grad_points + (i * c + l) * n + a, 43 | grad_out[(i * c + l) * m + j]); 44 | } 45 | } 46 | } 47 | } 48 | 49 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 50 | const float *grad_out, const int *idx, 51 | float *grad_points) { 52 | gather_points_grad_kernel<<>>( 54 | b, c, n, npoints, grad_out, idx, grad_points); 55 | 56 | CUDA_CHECK_ERRORS(); 57 | } 58 | 59 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 60 | int idx1, int idx2) { 61 | const float v1 = dists[idx1], v2 = dists[idx2]; 62 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 63 | dists[idx1] = max(v1, v2); 64 | dists_i[idx1] = v2 > v1 ? i2 : i1; 65 | } 66 | 67 | // Input dataset: (b, n, 3), tmp: (b, n) 68 | // Ouput idxs (b, m) 69 | template 70 | __global__ void furthest_point_sampling_kernel( 71 | int b, int n, int m, const float *__restrict__ dataset, 72 | float *__restrict__ temp, int *__restrict__ idxs) { 73 | if (m <= 0) return; 74 | __shared__ float dists[block_size]; 75 | __shared__ int dists_i[block_size]; 76 | 77 | int batch_index = blockIdx.x; 78 | dataset += batch_index * n * 3; 79 | temp += batch_index * n; 80 | idxs += batch_index * m; 81 | 82 | int tid = threadIdx.x; 83 | const int stride = block_size; 84 | 85 | int old = 0; 86 | if (threadIdx.x == 0) idxs[0] = old; 87 | 88 | __syncthreads(); 89 | for (int j = 1; j < m; j++) { 90 | int besti = 0; 91 | float best = -1; 92 | float x1 = dataset[old * 3 + 0]; 93 | float y1 = dataset[old * 3 + 1]; 94 | float z1 = dataset[old * 3 + 2]; 95 | for (int k = tid; k < n; k += stride) { 96 | float x2, y2, z2; 97 | x2 = dataset[k * 3 + 0]; 98 | y2 = dataset[k * 3 + 1]; 99 | z2 = dataset[k * 3 + 2]; 100 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 101 | if (mag <= 1e-3) continue; 102 | 103 | float d = 104 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 105 | 106 | float d2 = min(d, temp[k]); 107 | temp[k] = d2; 108 | besti = d2 > best ? k : besti; 109 | best = d2 > best ? d2 : best; 110 | } 111 | dists[tid] = best; 112 | dists_i[tid] = besti; 113 | __syncthreads(); 114 | 115 | if (block_size >= 512) { 116 | if (tid < 256) { 117 | __update(dists, dists_i, tid, tid + 256); 118 | } 119 | __syncthreads(); 120 | } 121 | if (block_size >= 256) { 122 | if (tid < 128) { 123 | __update(dists, dists_i, tid, tid + 128); 124 | } 125 | __syncthreads(); 126 | } 127 | if (block_size >= 128) { 128 | if (tid < 64) { 129 | __update(dists, dists_i, tid, tid + 64); 130 | } 131 | __syncthreads(); 132 | } 133 | if (block_size >= 64) { 134 | if (tid < 32) { 135 | __update(dists, dists_i, tid, tid + 32); 136 | } 137 | __syncthreads(); 138 | } 139 | if (block_size >= 32) { 140 | if (tid < 16) { 141 | __update(dists, dists_i, tid, tid + 16); 142 | } 143 | __syncthreads(); 144 | } 145 | if (block_size >= 16) { 146 | if (tid < 8) { 147 | __update(dists, dists_i, tid, tid + 8); 148 | } 149 | __syncthreads(); 150 | } 151 | if (block_size >= 8) { 152 | if (tid < 4) { 153 | __update(dists, dists_i, tid, tid + 4); 154 | } 155 | __syncthreads(); 156 | } 157 | if (block_size >= 4) { 158 | if (tid < 2) { 159 | __update(dists, dists_i, tid, tid + 2); 160 | } 161 | __syncthreads(); 162 | } 163 | if (block_size >= 2) { 164 | if (tid < 1) { 165 | __update(dists, dists_i, tid, tid + 1); 166 | } 167 | __syncthreads(); 168 | } 169 | 170 | old = dists_i[0]; 171 | if (tid == 0) idxs[j] = old; 172 | } 173 | } 174 | 175 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 176 | const float *dataset, float *temp, 177 | int *idxs) { 178 | unsigned int n_threads = opt_n_threads(n); 179 | 180 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 181 | 182 | switch (n_threads) { 183 | case 512: 184 | furthest_point_sampling_kernel<512> 185 | <<>>(b, n, m, dataset, temp, idxs); 186 | break; 187 | case 256: 188 | furthest_point_sampling_kernel<256> 189 | <<>>(b, n, m, dataset, temp, idxs); 190 | break; 191 | case 128: 192 | furthest_point_sampling_kernel<128> 193 | <<>>(b, n, m, dataset, temp, idxs); 194 | break; 195 | case 64: 196 | furthest_point_sampling_kernel<64> 197 | <<>>(b, n, m, dataset, temp, idxs); 198 | break; 199 | case 32: 200 | furthest_point_sampling_kernel<32> 201 | <<>>(b, n, m, dataset, temp, idxs); 202 | break; 203 | case 16: 204 | furthest_point_sampling_kernel<16> 205 | <<>>(b, n, m, dataset, temp, idxs); 206 | break; 207 | case 8: 208 | furthest_point_sampling_kernel<8> 209 | <<>>(b, n, m, dataset, temp, idxs); 210 | break; 211 | case 4: 212 | furthest_point_sampling_kernel<4> 213 | <<>>(b, n, m, dataset, temp, idxs); 214 | break; 215 | case 2: 216 | furthest_point_sampling_kernel<2> 217 | <<>>(b, n, m, dataset, temp, idxs); 218 | break; 219 | case 1: 220 | furthest_point_sampling_kernel<1> 221 | <<>>(b, n, m, dataset, temp, idxs); 222 | break; 223 | default: 224 | furthest_point_sampling_kernel<512> 225 | <<>>(b, n, m, dataset, temp, idxs); 226 | } 227 | 228 | CUDA_CHECK_ERRORS(); 229 | } 230 | -------------------------------------------------------------------------------- /lib/pointnet2/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.0.0" 2 | -------------------------------------------------------------------------------- /lib/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Testing customized ops. ''' 7 | 8 | import torch 9 | from torch.autograd import gradcheck 10 | import numpy as np 11 | 12 | import os 13 | import sys 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | import pointnet2_utils 17 | 18 | def test_interpolation_grad(): 19 | batch_size = 1 20 | feat_dim = 2 21 | m = 4 22 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | 24 | def interpolate_func(inputs): 25 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 26 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 27 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 28 | return interpolated_feats 29 | 30 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 31 | 32 | if __name__=='__main__': 33 | test_interpolation_grad() 34 | -------------------------------------------------------------------------------- /lib/pointnet2/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch ''' 7 | import torch 8 | import torch.nn as nn 9 | from typing import List, Tuple 10 | 11 | class SharedMLP(nn.Sequential): 12 | 13 | def __init__( 14 | self, 15 | args: List[int], 16 | *, 17 | bn: bool = False, 18 | activation=nn.ReLU(inplace=True), 19 | preact: bool = False, 20 | first: bool = False, 21 | name: str = "" 22 | ): 23 | super().__init__() 24 | 25 | for i in range(len(args) - 1): 26 | self.add_module( 27 | name + 'layer{}'.format(i), 28 | Conv2d( 29 | args[i], 30 | args[i + 1], 31 | bn=(not first or not preact or (i != 0)) and bn, 32 | activation=activation 33 | if (not first or not preact or (i != 0)) else None, 34 | preact=preact 35 | ) 36 | ) 37 | 38 | 39 | class _BNBase(nn.Sequential): 40 | 41 | def __init__(self, in_size, batch_norm=None, name=""): 42 | super().__init__() 43 | self.add_module(name + "bn", batch_norm(in_size)) 44 | 45 | nn.init.constant_(self[0].weight, 1.0) 46 | nn.init.constant_(self[0].bias, 0) 47 | 48 | 49 | class BatchNorm1d(_BNBase): 50 | 51 | def __init__(self, in_size: int, *, name: str = ""): 52 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 53 | 54 | 55 | class BatchNorm2d(_BNBase): 56 | 57 | def __init__(self, in_size: int, name: str = ""): 58 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 59 | 60 | 61 | class BatchNorm3d(_BNBase): 62 | 63 | def __init__(self, in_size: int, name: str = ""): 64 | super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name) 65 | 66 | 67 | class _ConvBase(nn.Sequential): 68 | 69 | def __init__( 70 | self, 71 | in_size, 72 | out_size, 73 | kernel_size, 74 | stride, 75 | padding, 76 | activation, 77 | bn, 78 | init, 79 | conv=None, 80 | batch_norm=None, 81 | bias=True, 82 | preact=False, 83 | name="" 84 | ): 85 | super().__init__() 86 | 87 | bias = bias and (not bn) 88 | conv_unit = conv( 89 | in_size, 90 | out_size, 91 | kernel_size=kernel_size, 92 | stride=stride, 93 | padding=padding, 94 | bias=bias 95 | ) 96 | init(conv_unit.weight) 97 | if bias: 98 | nn.init.constant_(conv_unit.bias, 0) 99 | 100 | if bn: 101 | if not preact: 102 | bn_unit = batch_norm(out_size) 103 | else: 104 | bn_unit = batch_norm(in_size) 105 | 106 | if preact: 107 | if bn: 108 | self.add_module(name + 'bn', bn_unit) 109 | 110 | if activation is not None: 111 | self.add_module(name + 'activation', activation) 112 | 113 | self.add_module(name + 'conv', conv_unit) 114 | 115 | if not preact: 116 | if bn: 117 | self.add_module(name + 'bn', bn_unit) 118 | 119 | if activation is not None: 120 | self.add_module(name + 'activation', activation) 121 | 122 | 123 | class Conv1d(_ConvBase): 124 | 125 | def __init__( 126 | self, 127 | in_size: int, 128 | out_size: int, 129 | *, 130 | kernel_size: int = 1, 131 | stride: int = 1, 132 | padding: int = 0, 133 | activation=nn.ReLU(inplace=True), 134 | bn: bool = False, 135 | init=nn.init.kaiming_normal_, 136 | bias: bool = True, 137 | preact: bool = False, 138 | name: str = "" 139 | ): 140 | super().__init__( 141 | in_size, 142 | out_size, 143 | kernel_size, 144 | stride, 145 | padding, 146 | activation, 147 | bn, 148 | init, 149 | conv=nn.Conv1d, 150 | batch_norm=BatchNorm1d, 151 | bias=bias, 152 | preact=preact, 153 | name=name 154 | ) 155 | 156 | 157 | class Conv2d(_ConvBase): 158 | 159 | def __init__( 160 | self, 161 | in_size: int, 162 | out_size: int, 163 | *, 164 | kernel_size: Tuple[int, int] = (1, 1), 165 | stride: Tuple[int, int] = (1, 1), 166 | padding: Tuple[int, int] = (0, 0), 167 | activation=nn.ReLU(inplace=True), 168 | bn: bool = False, 169 | init=nn.init.kaiming_normal_, 170 | bias: bool = True, 171 | preact: bool = False, 172 | name: str = "" 173 | ): 174 | super().__init__( 175 | in_size, 176 | out_size, 177 | kernel_size, 178 | stride, 179 | padding, 180 | activation, 181 | bn, 182 | init, 183 | conv=nn.Conv2d, 184 | batch_norm=BatchNorm2d, 185 | bias=bias, 186 | preact=preact, 187 | name=name 188 | ) 189 | 190 | 191 | class Conv3d(_ConvBase): 192 | 193 | def __init__( 194 | self, 195 | in_size: int, 196 | out_size: int, 197 | *, 198 | kernel_size: Tuple[int, int, int] = (1, 1, 1), 199 | stride: Tuple[int, int, int] = (1, 1, 1), 200 | padding: Tuple[int, int, int] = (0, 0, 0), 201 | activation=nn.ReLU(inplace=True), 202 | bn: bool = False, 203 | init=nn.init.kaiming_normal_, 204 | bias: bool = True, 205 | preact: bool = False, 206 | name: str = "" 207 | ): 208 | super().__init__( 209 | in_size, 210 | out_size, 211 | kernel_size, 212 | stride, 213 | padding, 214 | activation, 215 | bn, 216 | init, 217 | conv=nn.Conv3d, 218 | batch_norm=BatchNorm3d, 219 | bias=bias, 220 | preact=preact, 221 | name=name 222 | ) 223 | 224 | 225 | class FC(nn.Sequential): 226 | 227 | def __init__( 228 | self, 229 | in_size: int, 230 | out_size: int, 231 | *, 232 | activation=nn.ReLU(inplace=True), 233 | bn: bool = False, 234 | init=None, 235 | preact: bool = False, 236 | name: str = "" 237 | ): 238 | super().__init__() 239 | 240 | fc = nn.Linear(in_size, out_size, bias=not bn) 241 | if init is not None: 242 | init(fc.weight) 243 | if not bn: 244 | nn.init.constant_(fc.bias, 0) 245 | 246 | if preact: 247 | if bn: 248 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 249 | 250 | if activation is not None: 251 | self.add_module(name + 'activation', activation) 252 | 253 | self.add_module(name + 'fc', fc) 254 | 255 | if not preact: 256 | if bn: 257 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 258 | 259 | if activation is not None: 260 | self.add_module(name + 'activation', activation) 261 | 262 | def set_bn_momentum_default(bn_momentum): 263 | 264 | def fn(m): 265 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 266 | m.momentum = bn_momentum 267 | 268 | return fn 269 | 270 | 271 | class BNMomentumScheduler(object): 272 | 273 | def __init__( 274 | self, model, bn_lambda, last_epoch=-1, 275 | setter=set_bn_momentum_default 276 | ): 277 | if not isinstance(model, nn.Module): 278 | raise RuntimeError( 279 | "Class '{}' is not a PyTorch nn Module".format( 280 | type(model).__name__ 281 | ) 282 | ) 283 | 284 | self.model = model 285 | self.setter = setter 286 | self.lmbd = bn_lambda 287 | 288 | self.step(last_epoch + 1) 289 | self.last_epoch = last_epoch 290 | 291 | def step(self, epoch=None): 292 | if epoch is None: 293 | epoch = self.last_epoch + 1 294 | 295 | self.last_epoch = epoch 296 | self.model.apply(self.setter(self.lmbd(epoch))) 297 | 298 | 299 | -------------------------------------------------------------------------------- /lib/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import os.path as osp 4 | 5 | from setuptools import find_packages, setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | _this_dir = osp.dirname(osp.abspath(__file__)) 9 | _ext_src_root = "_ext_src" 10 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 11 | "{}/src/*.cu".format(_ext_src_root) 12 | ) 13 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 14 | 15 | requirements = ["torch>=1.4"] 16 | 17 | os.environ["TORCH_CUDA_ARCH_LIST"] = "3.7+PTX;5.0;6.0;6.1;6.2;7.0;7.5" 18 | 19 | exec(open("_version.py").read()) 20 | 21 | setup( 22 | name='pointnet2', 23 | version=__version__, 24 | packages=find_packages(), 25 | install_requires=requirements, 26 | ext_modules=[ 27 | CUDAExtension( 28 | name='pointnet2._ext', 29 | sources=_ext_sources, 30 | extra_compile_args={ 31 | "cxx": ["-O3"], 32 | "nvcc": ["-O3", "-Xfatbin", "-compress-all"], 33 | }, 34 | include_dirs=[osp.join(_this_dir, _ext_src_root, "include")], 35 | ) 36 | ], 37 | cmdclass={"build_ext": BuildExtension}, 38 | include_package_data=True, 39 | ) -------------------------------------------------------------------------------- /models/backbone_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import sys 6 | import os 7 | 8 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder 9 | from lib.pointnet2.pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 10 | 11 | class Pointnet2Backbone(nn.Module): 12 | r""" 13 | Backbone network for point cloud feature learning. 14 | Based on Pointnet++ single-scale grouping network. 15 | 16 | Parameters 17 | ---------- 18 | input_feature_dim: int 19 | Number of input channels in the feature descriptor for each point. 20 | e.g. 3 for RGB. 21 | """ 22 | def __init__(self, input_feature_dim=0): 23 | super().__init__() 24 | 25 | self.input_feature_dim = input_feature_dim 26 | 27 | # --------- 4 SET ABSTRACTION LAYERS --------- 28 | self.sa1 = PointnetSAModuleVotes( 29 | npoint=2048, 30 | radius=0.2, 31 | nsample=64, 32 | mlp=[input_feature_dim, 64, 64, 128], 33 | use_xyz=True, 34 | normalize_xyz=True 35 | ) 36 | 37 | self.sa2 = PointnetSAModuleVotes( 38 | npoint=1024, 39 | radius=0.4, 40 | nsample=32, 41 | mlp=[128, 128, 128, 256], 42 | use_xyz=True, 43 | normalize_xyz=True 44 | ) 45 | 46 | self.sa3 = PointnetSAModuleVotes( 47 | npoint=512, 48 | radius=0.8, 49 | nsample=16, 50 | mlp=[256, 128, 128, 256], 51 | use_xyz=True, 52 | normalize_xyz=True 53 | ) 54 | 55 | self.sa4 = PointnetSAModuleVotes( 56 | npoint=256, 57 | radius=1.2, 58 | nsample=16, 59 | mlp=[256, 128, 128, 256], 60 | use_xyz=True, 61 | normalize_xyz=True 62 | ) 63 | 64 | # --------- 2 FEATURE UPSAMPLING LAYERS -------- 65 | self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) 66 | self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) 67 | 68 | def _break_up_pc(self, pc): 69 | xyz = pc[..., :3].contiguous() 70 | features = pc[..., 3:].transpose(1, 2).contiguous() if pc.size(-1) > 3 else None 71 | 72 | return xyz, features 73 | 74 | def forward(self, data_dict): 75 | r""" 76 | Forward pass of the network 77 | 78 | Parameters 79 | ---------- 80 | pointcloud: Variable(torch.cuda.FloatTensor) 81 | (B, N, 3 + input_feature_dim) tensor 82 | Point cloud to run predicts on 83 | Each point in the point-cloud MUST 84 | be formated as (x, y, z, features...) 85 | 86 | Returns 87 | ---------- 88 | data_dict: {XXX_xyz, XXX_features, XXX_inds} 89 | XXX_xyz: float32 Tensor of shape (B,K,3) 90 | XXX_features: float32 Tensor of shape (B,K,D) 91 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 92 | """ 93 | 94 | pointcloud = data_dict["point_clouds"] 95 | 96 | batch_size = pointcloud.shape[0] 97 | 98 | xyz, features = self._break_up_pc(pointcloud) 99 | 100 | # --------- 4 SET ABSTRACTION LAYERS --------- 101 | xyz, features, fps_inds = self.sa1(xyz, features) 102 | data_dict['sa1_inds'] = fps_inds 103 | data_dict['sa1_xyz'] = xyz 104 | data_dict['sa1_features'] = features 105 | 106 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 107 | data_dict['sa2_inds'] = fps_inds 108 | data_dict['sa2_xyz'] = xyz 109 | data_dict['sa2_features'] = features 110 | 111 | xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511 112 | data_dict['sa3_xyz'] = xyz 113 | data_dict['sa3_features'] = features 114 | 115 | xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255 116 | data_dict['sa4_xyz'] = xyz 117 | data_dict['sa4_features'] = features 118 | 119 | # --------- 2 FEATURE UPSAMPLING LAYERS -------- 120 | features = self.fp1(data_dict['sa3_xyz'], data_dict['sa4_xyz'], data_dict['sa3_features'], data_dict['sa4_features']) 121 | features = self.fp2(data_dict['sa2_xyz'], data_dict['sa3_xyz'], data_dict['sa2_features'], features) 122 | data_dict['fp2_features'] = features 123 | data_dict['fp2_xyz'] = data_dict['sa2_xyz'] 124 | num_seed = data_dict['fp2_xyz'].shape[1] 125 | data_dict['fp2_inds'] = data_dict['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 126 | return data_dict 127 | 128 | if __name__=='__main__': 129 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 130 | print(backbone_net) 131 | backbone_net.eval() 132 | out = backbone_net(torch.rand(16,20000,6).cuda()) 133 | for key in sorted(out.keys()): 134 | print(key, '\t', out[key].shape) 135 | -------------------------------------------------------------------------------- /models/lang_module.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | 8 | class LangModule(nn.Module): 9 | def __init__(self, num_text_classes, use_lang_classifier=True, use_bidir=False, 10 | emb_size=300, hidden_size=256): 11 | super().__init__() 12 | 13 | self.num_text_classes = num_text_classes 14 | self.use_lang_classifier = use_lang_classifier 15 | self.use_bidir = use_bidir 16 | 17 | self.gru = nn.GRU( 18 | input_size=emb_size, 19 | hidden_size=hidden_size, 20 | batch_first=True, 21 | bidirectional=self.use_bidir 22 | ) 23 | lang_size = hidden_size * 2 if self.use_bidir else hidden_size 24 | 25 | # language classifier 26 | if use_lang_classifier: 27 | self.lang_cls = nn.Sequential( 28 | nn.Linear(lang_size, num_text_classes), 29 | nn.Dropout() 30 | ) 31 | 32 | 33 | def forward(self, data_dict): 34 | """ 35 | encode the input descriptions 36 | """ 37 | 38 | word_embs = data_dict["lang_feat"] 39 | lang_feat = pack_padded_sequence(word_embs, data_dict["lang_len"], batch_first=True, enforce_sorted=False) 40 | 41 | # encode description 42 | _, lang_last = self.gru(lang_feat) 43 | lang_last = lang_last.permute(1, 0, 2).contiguous().flatten(start_dim=1) # batch_size, hidden_size * num_dir 44 | 45 | # store the encoded language features 46 | data_dict["lang_emb"] = lang_last # B, hidden_size 47 | 48 | # classify 49 | if self.use_lang_classifier: 50 | data_dict["lang_scores"] = self.lang_cls(data_dict["lang_emb"]) 51 | 52 | return data_dict 53 | 54 | -------------------------------------------------------------------------------- /models/match_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class MatchModule(nn.Module): 5 | def __init__(self, num_proposals=256, lang_size=256, hidden_size=128): 6 | super().__init__() 7 | 8 | self.num_proposals = num_proposals 9 | self.lang_size = lang_size 10 | self.hidden_size = hidden_size 11 | 12 | self.fuse = nn.Sequential( 13 | nn.Conv1d(self.lang_size + 128, hidden_size, 1), 14 | nn.ReLU() 15 | ) 16 | # self.match = nn.Conv1d(hidden_size, 1, 1) 17 | self.match = nn.Sequential( 18 | nn.Conv1d(hidden_size, hidden_size, 1), 19 | nn.ReLU(), 20 | nn.BatchNorm1d(hidden_size), 21 | nn.Conv1d(hidden_size, hidden_size, 1), 22 | nn.ReLU(), 23 | nn.BatchNorm1d(hidden_size), 24 | nn.Conv1d(hidden_size, 1, 1) 25 | ) 26 | 27 | def forward(self, data_dict): 28 | """ 29 | Args: 30 | xyz: (B,K,3) 31 | features: (B,C,K) 32 | Returns: 33 | scores: (B,num_proposal,2+3+NH*2+NS*4) 34 | """ 35 | 36 | # unpack outputs from detection branch 37 | features = data_dict['aggregated_vote_features'] # batch_size, num_proposal, 128 38 | objectness_masks = data_dict['objectness_scores'].max(2)[1].float().unsqueeze(2) # batch_size, num_proposals, 1 39 | 40 | # unpack outputs from language branch 41 | lang_feat = data_dict["lang_emb"] # batch_size, lang_size 42 | lang_feat = lang_feat.unsqueeze(1).repeat(1, self.num_proposals, 1) # batch_size, num_proposals, lang_size 43 | 44 | # fuse 45 | features = torch.cat([features, lang_feat], dim=-1) # batch_size, num_proposals, 128 + lang_size 46 | features = features.permute(0, 2, 1).contiguous() # batch_size, 128 + lang_size, num_proposals 47 | 48 | # fuse features 49 | features = self.fuse(features) # batch_size, hidden_size, num_proposals 50 | 51 | # mask out invalid proposals 52 | objectness_masks = objectness_masks.permute(0, 2, 1).contiguous() # batch_size, 1, num_proposals 53 | features = features * objectness_masks 54 | 55 | # match 56 | confidences = self.match(features).squeeze(1) # batch_size, num_proposals 57 | 58 | data_dict["cluster_ref"] = confidences 59 | 60 | return data_dict 61 | -------------------------------------------------------------------------------- /models/proposal_module.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/models/proposal_module.py 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import numpy as np 9 | import os 10 | import sys 11 | 12 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder 13 | import lib.pointnet2.pointnet2_utils 14 | from lib.pointnet2.pointnet2_modules import PointnetSAModuleVotes 15 | 16 | class ProposalModule(nn.Module): 17 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256): 18 | super().__init__() 19 | 20 | self.num_class = num_class 21 | self.num_heading_bin = num_heading_bin 22 | self.num_size_cluster = num_size_cluster 23 | self.mean_size_arr = mean_size_arr 24 | self.num_proposal = num_proposal 25 | self.sampling = sampling 26 | self.seed_feat_dim = seed_feat_dim 27 | 28 | # Vote clustering 29 | self.vote_aggregation = PointnetSAModuleVotes( 30 | npoint=self.num_proposal, 31 | radius=0.3, 32 | nsample=16, 33 | mlp=[self.seed_feat_dim, 128, 128, 128], 34 | use_xyz=True, 35 | normalize_xyz=True 36 | ) 37 | 38 | # Object proposal/detection 39 | # Objectness scores (2), center residual (3), 40 | # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4) 41 | self.proposal = nn.Sequential( 42 | nn.Conv1d(128,128,1, bias=False), 43 | nn.BatchNorm1d(128), 44 | nn.ReLU(), 45 | nn.Conv1d(128,128,1, bias=False), 46 | nn.BatchNorm1d(128), 47 | nn.ReLU(), 48 | nn.Conv1d(128,2+3+num_heading_bin*2+num_size_cluster*4+self.num_class,1) 49 | ) 50 | 51 | def forward(self, xyz, features, data_dict): 52 | """ 53 | Args: 54 | xyz: (B,K,3) 55 | features: (B,C,K) 56 | Returns: 57 | scores: (B,num_proposal,2+3+NH*2+NS*4) 58 | """ 59 | 60 | # Farthest point sampling (FPS) on votes 61 | xyz, features, fps_inds = self.vote_aggregation(xyz, features) 62 | 63 | sample_inds = fps_inds 64 | 65 | data_dict['aggregated_vote_xyz'] = xyz # (batch_size, num_proposal, 3) 66 | data_dict['aggregated_vote_features'] = features.permute(0, 2, 1).contiguous() # (batch_size, num_proposal, 128) 67 | data_dict['aggregated_vote_inds'] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal 68 | 69 | # --------- PROPOSAL GENERATION --------- 70 | net = self.proposal(features) 71 | data_dict = self.decode_scores(net, data_dict, self.num_class, self.num_heading_bin, self.num_size_cluster, self.mean_size_arr) 72 | 73 | return data_dict 74 | 75 | def decode_scores(self, net, data_dict, num_class, num_heading_bin, num_size_cluster, mean_size_arr): 76 | """ 77 | decode the predicted parameters for the bounding boxes 78 | 79 | """ 80 | net_transposed = net.transpose(2,1).contiguous() # (batch_size, 1024, ..) 81 | batch_size = net_transposed.shape[0] 82 | num_proposal = net_transposed.shape[1] 83 | 84 | objectness_scores = net_transposed[:,:,0:2] 85 | 86 | base_xyz = data_dict['aggregated_vote_xyz'] # (batch_size, num_proposal, 3) 87 | center = base_xyz + net_transposed[:,:,2:5] # (batch_size, num_proposal, 3) 88 | 89 | heading_scores = net_transposed[:,:,5:5+num_heading_bin] 90 | heading_residuals_normalized = net_transposed[:,:,5+num_heading_bin:5+num_heading_bin*2] 91 | 92 | size_scores = net_transposed[:,:,5+num_heading_bin*2:5+num_heading_bin*2+num_size_cluster] 93 | size_residuals_normalized = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster:5+num_heading_bin*2+num_size_cluster*4].view([batch_size, num_proposal, num_size_cluster, 3]) # Bxnum_proposalxnum_size_clusterx3 94 | 95 | sem_cls_scores = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster*4:] # Bxnum_proposalx10 96 | 97 | # store 98 | data_dict['objectness_scores'] = objectness_scores 99 | data_dict['center'] = center 100 | data_dict['heading_scores'] = heading_scores # Bxnum_proposalxnum_heading_bin 101 | data_dict['heading_residuals_normalized'] = heading_residuals_normalized # Bxnum_proposalxnum_heading_bin (should be -1 to 1) 102 | data_dict['heading_residuals'] = heading_residuals_normalized * (np.pi/num_heading_bin) # Bxnum_proposalxnum_heading_bin 103 | data_dict['size_scores'] = size_scores 104 | data_dict['size_residuals_normalized'] = size_residuals_normalized 105 | data_dict['size_residuals'] = size_residuals_normalized * torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0) 106 | data_dict['sem_cls_scores'] = sem_cls_scores 107 | 108 | return data_dict 109 | 110 | -------------------------------------------------------------------------------- /models/refnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import sys 5 | import os 6 | 7 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder 8 | from models.backbone_module import Pointnet2Backbone 9 | from models.voting_module import VotingModule 10 | from models.proposal_module import ProposalModule 11 | from models.lang_module import LangModule 12 | from models.match_module import MatchModule 13 | 14 | class RefNet(nn.Module): 15 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 16 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling="vote_fps", 17 | use_lang_classifier=True, use_bidir=False, no_reference=False, 18 | emb_size=300, hidden_size=256): 19 | super().__init__() 20 | 21 | self.num_class = num_class 22 | self.num_heading_bin = num_heading_bin 23 | self.num_size_cluster = num_size_cluster 24 | self.mean_size_arr = mean_size_arr 25 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 26 | self.input_feature_dim = input_feature_dim 27 | self.num_proposal = num_proposal 28 | self.vote_factor = vote_factor 29 | self.sampling = sampling 30 | self.use_lang_classifier = use_lang_classifier 31 | self.use_bidir = use_bidir 32 | self.no_reference = no_reference 33 | 34 | 35 | # --------- PROPOSAL GENERATION --------- 36 | # Backbone point feature learning 37 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 38 | 39 | # Hough voting 40 | self.vgen = VotingModule(self.vote_factor, 256) 41 | 42 | # Vote aggregation and object proposal 43 | self.proposal = ProposalModule(num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling) 44 | 45 | if not no_reference: 46 | # --------- LANGUAGE ENCODING --------- 47 | # Encode the input descriptions into vectors 48 | # (including attention and language classification) 49 | self.lang = LangModule(num_class, use_lang_classifier, use_bidir, emb_size, hidden_size) 50 | 51 | # --------- PROPOSAL MATCHING --------- 52 | # Match the generated proposals and select the most confident ones 53 | self.match = MatchModule(num_proposals=num_proposal, lang_size=(1 + int(self.use_bidir)) * hidden_size) 54 | 55 | def forward(self, data_dict): 56 | """ Forward pass of the network 57 | 58 | Args: 59 | data_dict: dict 60 | { 61 | point_clouds, 62 | lang_feat 63 | } 64 | 65 | point_clouds: Variable(torch.cuda.FloatTensor) 66 | (B, N, 3 + input_channels) tensor 67 | Point cloud to run predicts on 68 | Each point in the point-cloud MUST 69 | be formated as (x, y, z, features...) 70 | Returns: 71 | end_points: dict 72 | """ 73 | 74 | ####################################### 75 | # # 76 | # DETECTION BRANCH # 77 | # # 78 | ####################################### 79 | 80 | # --------- HOUGH VOTING --------- 81 | data_dict = self.backbone_net(data_dict) 82 | 83 | # --------- HOUGH VOTING --------- 84 | xyz = data_dict["fp2_xyz"] 85 | features = data_dict["fp2_features"] 86 | data_dict["seed_inds"] = data_dict["fp2_inds"] 87 | data_dict["seed_xyz"] = xyz 88 | data_dict["seed_features"] = features 89 | 90 | xyz, features = self.vgen(xyz, features) 91 | features_norm = torch.norm(features, p=2, dim=1) 92 | features = features.div(features_norm.unsqueeze(1)) 93 | data_dict["vote_xyz"] = xyz 94 | data_dict["vote_features"] = features 95 | 96 | # --------- PROPOSAL GENERATION --------- 97 | data_dict = self.proposal(xyz, features, data_dict) 98 | 99 | if not self.no_reference: 100 | ####################################### 101 | # # 102 | # LANGUAGE BRANCH # 103 | # # 104 | ####################################### 105 | 106 | # --------- LANGUAGE ENCODING --------- 107 | data_dict = self.lang(data_dict) 108 | 109 | ####################################### 110 | # # 111 | # PROPOSAL MATCHING # 112 | # # 113 | ####################################### 114 | 115 | # --------- PROPOSAL MATCHING --------- 116 | data_dict = self.match(data_dict) 117 | 118 | return data_dict 119 | -------------------------------------------------------------------------------- /models/voting_module.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Voting module: generate votes from XYZ and features of seed points. 3 | 4 | Modified from: https://github.com/facebookresearch/votenet/blob/master/models/voting_module.py 5 | ''' 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | class VotingModule(nn.Module): 12 | def __init__(self, vote_factor, seed_feature_dim): 13 | """ Votes generation from seed point features. 14 | 15 | Args: 16 | vote_facotr: int 17 | number of votes generated from each seed point 18 | seed_feature_dim: int 19 | number of channels of seed point features 20 | vote_feature_dim: int 21 | number of channels of vote features 22 | """ 23 | super().__init__() 24 | self.vote_factor = vote_factor 25 | self.in_dim = seed_feature_dim 26 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 27 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 28 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 29 | self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 30 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 31 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 32 | 33 | def forward(self, seed_xyz, seed_features): 34 | """ Forward pass. 35 | 36 | Arguments: 37 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 38 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 39 | Returns: 40 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 41 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 42 | """ 43 | batch_size = seed_xyz.shape[0] 44 | num_seed = seed_xyz.shape[1] 45 | num_vote = num_seed*self.vote_factor 46 | net = F.relu(self.bn1(self.conv1(seed_features))) 47 | net = F.relu(self.bn2(self.conv2(net))) 48 | net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 49 | 50 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim).contiguous() 51 | offset = net[:,:,:,0:3] 52 | vote_xyz = seed_xyz.unsqueeze(2) + offset 53 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 54 | 55 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 56 | vote_features = seed_features.transpose(2,1).unsqueeze(2).contiguous() + residual_features 57 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 58 | vote_features = vote_features.transpose(2,1).contiguous() 59 | 60 | return vote_xyz, vote_features 61 | 62 | if __name__=='__main__': 63 | net = VotingModule(2, 256).cuda() 64 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 65 | print('xyz', xyz.shape) 66 | print('features', features.shape) 67 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | plyfile 2 | opencv-python 3 | trimesh==2.35.39 4 | tensorboardX 5 | easydict 6 | tqdm 7 | h5py 8 | matplotlib -------------------------------------------------------------------------------- /scripts/compute_multiview_features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import torch 5 | import argparse 6 | import numpy as np 7 | import torch.nn as nn 8 | import torchvision.transforms as transforms 9 | from torch.utils.data import Dataset, DataLoader 10 | from imageio import imread 11 | from PIL import Image 12 | from tqdm import tqdm 13 | 14 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 15 | from lib.enet import create_enet_for_3d 16 | from lib.config import CONF 17 | 18 | # scannet data 19 | # NOTE: read only! 20 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES 21 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file 22 | SCANNET_LIST = CONF.SCANNETV2_LIST 23 | 24 | ENET_PATH = CONF.ENET_WEIGHTS 25 | ENET_FEATURE_ROOT = CONF.ENET_FEATURES_SUBROOT 26 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH 27 | 28 | class EnetDataset(Dataset): 29 | def __init__(self): 30 | self._init_resources() 31 | 32 | def __len__(self): 33 | return len(self.data) 34 | 35 | def __getitem__(self, idx): 36 | scene_id, frame_id = self.data[idx] 37 | image = self._load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256]) 38 | 39 | return scene_id, frame_id, image 40 | 41 | def _init_resources(self): 42 | self._get_scene_list() 43 | self.data = [] 44 | for scene_id in self.scene_list: 45 | frame_list = sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color")), key=lambda x:int(x.split(".")[0])) 46 | for frame_file in frame_list: 47 | self.data.append( 48 | ( 49 | scene_id, 50 | int(frame_file.split(".")[0]) 51 | ) 52 | ) 53 | 54 | def _get_scene_list(self): 55 | with open(SCANNET_LIST, 'r') as f: 56 | self.scene_list = sorted(list(set(f.read().splitlines()))) 57 | 58 | def _resize_crop_image(self, image, new_image_dims): 59 | image_dims = [image.shape[1], image.shape[0]] 60 | if image_dims != new_image_dims: 61 | resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1]))) 62 | image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image)) 63 | image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image) 64 | 65 | return np.array(image) 66 | 67 | def _load_image(self, file, image_dims): 68 | image = imread(file) 69 | # preprocess 70 | image = self._resize_crop_image(image, image_dims) 71 | if len(image.shape) == 3: # color image 72 | image = np.transpose(image, [2, 0, 1]) # move feature to front 73 | image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0)) 74 | elif len(image.shape) == 2: # label image 75 | image = np.expand_dims(image, 0) 76 | else: 77 | raise ValueError 78 | 79 | return image 80 | 81 | def collate_fn(self, data): 82 | scene_ids, frame_ids, images = zip(*data) 83 | scene_ids = list(scene_ids) 84 | frame_ids = list(frame_ids) 85 | images = torch.stack(images, 0).cuda() 86 | 87 | return scene_ids, frame_ids, images 88 | 89 | def create_enet(): 90 | enet_fixed, enet_trainable, _ = create_enet_for_3d(41, ENET_PATH, 21) 91 | enet = nn.Sequential( 92 | enet_fixed, 93 | enet_trainable 94 | ).cuda() 95 | enet.eval() 96 | for param in enet.parameters(): 97 | param.requires_grad = False 98 | 99 | return enet 100 | 101 | if __name__ == "__main__": 102 | parser = argparse.ArgumentParser() 103 | parser.add_argument('--gpu', type=str, help='gpu', default='0') 104 | args = parser.parse_args() 105 | 106 | # setting 107 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 108 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 109 | 110 | # init 111 | dataset = EnetDataset() 112 | dataloader = DataLoader(dataset, batch_size=256, shuffle=False, collate_fn=dataset.collate_fn) 113 | enet = create_enet() 114 | 115 | # feed 116 | print("extracting multiview features from ENet...") 117 | for scene_ids, frame_ids, images in tqdm(dataloader): 118 | features = enet(images) 119 | batch_size = images.shape[0] 120 | for batch_id in range(batch_size): 121 | os.makedirs(ENET_FEATURE_ROOT.format(scene_ids[batch_id]), exist_ok=True) 122 | np.save(ENET_FEATURE_PATH.format(scene_ids[batch_id], frame_ids[batch_id]), features[batch_id].cpu().numpy()) 123 | 124 | print("done!") 125 | 126 | -------------------------------------------------------------------------------- /scripts/project_multiview_features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import torch 5 | import torch.nn as nn 6 | import argparse 7 | import numpy as np 8 | from tqdm import tqdm 9 | from plyfile import PlyData, PlyElement 10 | import math 11 | from imageio import imread 12 | from PIL import Image 13 | import torchvision.transforms as transforms 14 | 15 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 16 | from lib.config import CONF 17 | from lib.projection import ProjectionHelper 18 | 19 | SCANNET_LIST = CONF.SCANNETV2_LIST 20 | SCANNET_DATA = CONF.PATH.SCANNET_DATA 21 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES 22 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file 23 | 24 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH 25 | ENET_FEATURE_DATABASE = CONF.MULTIVIEW 26 | 27 | # projection 28 | INTRINSICS = [[37.01983, 0, 20, 0],[0, 38.52470, 15.5, 0],[0, 0, 1, 0],[0, 0, 0, 1]] 29 | PROJECTOR = ProjectionHelper(INTRINSICS, 0.1, 4.0, [41, 32], 0.05) 30 | 31 | def get_scene_list(): 32 | with open(SCANNET_LIST, 'r') as f: 33 | return sorted(list(set(f.read().splitlines()))) 34 | 35 | def to_tensor(arr): 36 | return torch.Tensor(arr).cuda() 37 | 38 | def resize_crop_image(image, new_image_dims): 39 | image_dims = [image.shape[1], image.shape[0]] 40 | if image_dims == new_image_dims: 41 | return image 42 | resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1]))) 43 | image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image)) 44 | image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image) 45 | image = np.array(image) 46 | 47 | return image 48 | 49 | def load_image(file, image_dims): 50 | image = imread(file) 51 | # preprocess 52 | image = resize_crop_image(image, image_dims) 53 | if len(image.shape) == 3: # color image 54 | image = np.transpose(image, [2, 0, 1]) # move feature to front 55 | image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0)) 56 | elif len(image.shape) == 2: # label image 57 | # image = np.expand_dims(image, 0) 58 | pass 59 | else: 60 | raise 61 | 62 | return image 63 | 64 | def load_pose(filename): 65 | lines = open(filename).read().splitlines() 66 | assert len(lines) == 4 67 | lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)] 68 | 69 | return np.asarray(lines).astype(np.float32) 70 | 71 | def load_depth(file, image_dims): 72 | depth_image = imread(file) 73 | # preprocess 74 | depth_image = resize_crop_image(depth_image, image_dims) 75 | depth_image = depth_image.astype(np.float32) / 1000.0 76 | 77 | return depth_image 78 | 79 | def get_scene_data(scene_list): 80 | scene_data = {} 81 | for scene_id in scene_list: 82 | # load the original vertices, not the axis-aligned ones 83 | scene_data[scene_id] = np.load(os.path.join(SCANNET_DATA, scene_id)+"_vert.npy")[:, :3] 84 | 85 | return scene_data 86 | 87 | def compute_projection(points, depth, camera_to_world): 88 | """ 89 | :param points: tensor containing all points of the point cloud (num_points, 3) 90 | :param depth: depth map (size: proj_image) 91 | :param camera_to_world: camera pose (4, 4) 92 | 93 | :return indices_3d (array with point indices that correspond to a pixel), 94 | :return indices_2d (array with pixel indices that correspond to a point) 95 | 96 | note: 97 | the first digit of indices represents the number of relevant points 98 | the rest digits are for the projection mapping 99 | """ 100 | num_points = points.shape[0] 101 | num_frames = depth.shape[0] 102 | indices_3ds = torch.zeros(num_frames, num_points + 1).long().cuda() 103 | indices_2ds = torch.zeros(num_frames, num_points + 1).long().cuda() 104 | 105 | for i in range(num_frames): 106 | indices = PROJECTOR.compute_projection(to_tensor(points), to_tensor(depth[i]), to_tensor(camera_to_world[i])) 107 | if indices: 108 | indices_3ds[i] = indices[0].long() 109 | indices_2ds[i] = indices[1].long() 110 | print("found {} mappings in {} points from frame {}".format(indices_3ds[i][0], num_points, i)) 111 | 112 | return indices_3ds, indices_2ds 113 | 114 | if __name__ == "__main__": 115 | parser = argparse.ArgumentParser() 116 | parser.add_argument('--gpu', type=str, help='gpu', default='0') 117 | parser.add_argument("--maxpool", action="store_true", help="use max pooling to aggregate features \ 118 | (use majority voting in label projection mode)") 119 | args = parser.parse_args() 120 | 121 | # setting 122 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 123 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 124 | 125 | scene_list = get_scene_list() 126 | scene_data = get_scene_data(scene_list) 127 | with h5py.File(ENET_FEATURE_DATABASE, "w", libver="latest") as database: 128 | print("projecting multiview features to point cloud...") 129 | for scene_id in scene_list: 130 | print("processing {}...".format(scene_id)) 131 | scene = scene_data[scene_id] 132 | # load frames 133 | frame_list = list(map(lambda x: x.split(".")[0], sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color"))))) 134 | scene_images = np.zeros((len(frame_list), 3, 256, 328)) 135 | scene_depths = np.zeros((len(frame_list), 32, 41)) 136 | scene_poses = np.zeros((len(frame_list), 4, 4)) 137 | for i, frame_id in enumerate(frame_list): 138 | scene_images[i] = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256]) 139 | scene_depths[i] = load_depth(SCANNET_FRAME_PATH.format(scene_id, "depth", "{}.png".format(frame_id)), [41, 32]) 140 | scene_poses[i] = load_pose(SCANNET_FRAME_PATH.format(scene_id, "pose", "{}.txt".format(frame_id))) 141 | 142 | # compute projections for each chunk 143 | projection_3d, projection_2d = compute_projection(scene, scene_depths, scene_poses) 144 | 145 | # compute valid projections 146 | projections = [] 147 | for i in range(projection_3d.shape[0]): 148 | num_valid = projection_3d[i, 0] 149 | if num_valid == 0: 150 | continue 151 | 152 | projections.append((frame_list[i], projection_3d[i], projection_2d[i])) 153 | 154 | # # project 155 | # point_features = to_tensor(scene).new(scene.shape[0], 128).fill_(0) 156 | # for i, projection in enumerate(projections): 157 | # frame_id = projection[0] 158 | # projection_3d = projection[1] 159 | # projection_2d = projection[2] 160 | # feat = to_tensor(np.load(ENET_FEATURE_PATH.format(scene_id, frame_id))) 161 | # proj_feat = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0) 162 | # if i == 0: 163 | # point_features = proj_feat 164 | # else: 165 | # mask = ((point_features == 0).sum(1) == 128).nonzero().squeeze(1) 166 | # point_features[mask] = proj_feat[mask] 167 | 168 | # project 169 | point_features = to_tensor(scene).new(scene.shape[0], 128).fill_(0) 170 | for i, projection in enumerate(projections): 171 | frame_id = projection[0] 172 | projection_3d = projection[1] 173 | projection_2d = projection[2] 174 | feat = to_tensor(np.load(ENET_FEATURE_PATH.format(scene_id, frame_id))) 175 | 176 | proj_feat = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0) 177 | 178 | if args.maxpool: 179 | # only apply max pooling on the overlapping points 180 | # find out the points that are covered in projection 181 | feat_mask = ((proj_feat == 0).sum(1) != 128).bool() 182 | # find out the points that are not filled with features 183 | point_mask = ((point_features == 0).sum(1) == 128).bool() 184 | 185 | # for the points that are not filled with features 186 | # and are covered in projection, 187 | # simply fill those points with projected features 188 | mask = point_mask * feat_mask 189 | point_features[mask] = proj_feat[mask] 190 | 191 | # for the points that have already been filled with features 192 | # and are covered in projection, 193 | # apply max pooling first and then fill with pooled values 194 | mask = ~point_mask * feat_mask 195 | point_features[mask] = torch.max(point_features[mask], proj_feat[mask]) 196 | else: 197 | if i == 0: 198 | point_features = proj_feat 199 | else: 200 | mask = (point_features == 0).sum(1) == 128 201 | point_features[mask] = proj_feat[mask] 202 | 203 | # save 204 | database.create_dataset(scene_id, data=point_features.cpu().numpy()) 205 | 206 | print("done!") 207 | 208 | 209 | -------------------------------------------------------------------------------- /scripts/project_multiview_labels.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import math 5 | import argparse 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import pandas as pd 10 | import torchvision.transforms as transforms 11 | 12 | from imageio import imread 13 | from PIL import Image 14 | from tqdm import tqdm 15 | from plyfile import PlyData, PlyElement 16 | from collections import Counter 17 | 18 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 19 | from lib.config import CONF 20 | from lib.projection import ProjectionHelper 21 | from lib.enet import create_enet_for_3d 22 | 23 | SCANNET_LIST = CONF.SCANNETV2_LIST 24 | SCANNET_DATA = CONF.PATH.SCANNET_DATA 25 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES 26 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file 27 | 28 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH 29 | ENET_FEATURE_DATABASE = CONF.MULTIVIEW 30 | 31 | # projection 32 | INTRINSICS = [[37.01983, 0, 20, 0],[0, 38.52470, 15.5, 0],[0, 0, 1, 0],[0, 0, 0, 1]] 33 | PROJECTOR = ProjectionHelper(INTRINSICS, 0.1, 4.0, [41, 32], 0.05) 34 | 35 | ENET_PATH = CONF.ENET_WEIGHTS 36 | ENET_GT_PATH = SCANNET_FRAME_PATH 37 | 38 | NYU40_LABELS = CONF.NYU40_LABELS 39 | SCANNET_LABELS = ['unannotated', 'wall', 'floor', 'chair', 'table', 'desk', 'bed', 'bookshelf', 'sofa', 'sink', 'bathtub', 'toilet', 'curtain', 'counter', 'door', 'window', 'shower curtain', 'refridgerator', 'picture', 'cabinet', 'otherfurniture'] 40 | 41 | PC_LABEL_ROOT = os.path.join(CONF.PATH.OUTPUT, "projections") 42 | PC_LABEL_PATH = os.path.join(PC_LABEL_ROOT, "{}.ply") 43 | 44 | def get_nyu40_labels(): 45 | labels = ["unannotated"] 46 | labels += pd.read_csv(NYU40_LABELS)["nyu40class"].tolist() 47 | 48 | return labels 49 | 50 | def get_prediction_to_raw(): 51 | labels = get_nyu40_labels() 52 | mapping = {i: label for i, label in enumerate(labels)} 53 | 54 | return mapping 55 | 56 | def get_nyu_to_scannet(): 57 | nyu_idx_to_nyu_label = get_prediction_to_raw() 58 | scannet_label_to_scannet_idx = {label: i for i, label in enumerate(SCANNET_LABELS)} 59 | 60 | # mapping 61 | nyu_to_scannet = {} 62 | for nyu_idx in range(41): 63 | nyu_label = nyu_idx_to_nyu_label[nyu_idx] 64 | if nyu_label in scannet_label_to_scannet_idx.keys(): 65 | scannet_idx = scannet_label_to_scannet_idx[nyu_label] 66 | else: 67 | scannet_idx = 0 68 | nyu_to_scannet[nyu_idx] = scannet_idx 69 | 70 | return nyu_to_scannet 71 | 72 | def create_color_palette(): 73 | return { 74 | "unannotated": (0, 0, 0), 75 | "floor": (152, 223, 138), 76 | "wall": (174, 199, 232), 77 | "cabinet": (31, 119, 180), 78 | "bed": (255, 187, 120), 79 | "chair": (188, 189, 34), 80 | "sofa": (140, 86, 75), 81 | "table": (255, 152, 150), 82 | "door": (214, 39, 40), 83 | "window": (197, 176, 213), 84 | "bookshelf": (148, 103, 189), 85 | "picture": (196, 156, 148), 86 | "counter": (23, 190, 207), 87 | "desk": (247, 182, 210), 88 | "curtain": (219, 219, 141), 89 | "refridgerator": (255, 127, 14), 90 | "bathtub": (227, 119, 194), 91 | "shower curtain": (158, 218, 229), 92 | "toilet": (44, 160, 44), 93 | "sink": (112, 128, 144), 94 | "otherfurniture": (82, 84, 163), 95 | } 96 | 97 | def get_scene_list(args): 98 | if args.scene_id == "-1": 99 | with open(SCANNET_LIST, 'r') as f: 100 | return sorted(list(set(f.read().splitlines()))) 101 | else: 102 | return [args.scene_id] 103 | 104 | def to_tensor(arr): 105 | return torch.Tensor(arr).cuda() 106 | 107 | def resize_crop_image(image, new_image_dims): 108 | image_dims = [image.shape[1], image.shape[0]] 109 | if image_dims == new_image_dims: 110 | return image 111 | resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1]))) 112 | image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image)) 113 | image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image) 114 | image = np.array(image) 115 | 116 | return image 117 | 118 | def load_image(file, image_dims): 119 | image = imread(file) 120 | # preprocess 121 | image = resize_crop_image(image, image_dims) 122 | if len(image.shape) == 3: # color image 123 | image = np.transpose(image, [2, 0, 1]) # move feature to front 124 | image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0)) 125 | elif len(image.shape) == 2: # label image 126 | # image = np.expand_dims(image, 0) 127 | pass 128 | else: 129 | raise 130 | 131 | return image 132 | 133 | def load_pose(filename): 134 | lines = open(filename).read().splitlines() 135 | assert len(lines) == 4 136 | lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)] 137 | 138 | return np.asarray(lines).astype(np.float32) 139 | 140 | def load_depth(file, image_dims): 141 | depth_image = imread(file) 142 | # preprocess 143 | depth_image = resize_crop_image(depth_image, image_dims) 144 | depth_image = depth_image.astype(np.float32) / 1000.0 145 | 146 | return depth_image 147 | 148 | def visualize(coords, labels): 149 | palette = create_color_palette() 150 | nyu_to_scannet = get_nyu_to_scannet() 151 | vertex = [] 152 | for i in range(coords.shape[0]): 153 | vertex.append( 154 | ( 155 | coords[i][0], 156 | coords[i][1], 157 | coords[i][2], 158 | palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][0], 159 | palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][1], 160 | palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][2] 161 | ) 162 | ) 163 | 164 | vertex = np.array( 165 | vertex, 166 | dtype=[ 167 | ("x", np.dtype("float32")), 168 | ("y", np.dtype("float32")), 169 | ("z", np.dtype("float32")), 170 | ("red", np.dtype("uint8")), 171 | ("green", np.dtype("uint8")), 172 | ("blue", np.dtype("uint8")) 173 | ] 174 | ) 175 | 176 | output_pc = PlyElement.describe(vertex, "vertex") 177 | output_pc = PlyData([output_pc]) 178 | os.makedirs(PC_LABEL_ROOT, exist_ok=True) 179 | output_pc.write(PC_LABEL_PATH.format(args.scene_id)) 180 | 181 | def get_scene_data(scene_list): 182 | scene_data = {} 183 | for scene_id in scene_list: 184 | scene_data[scene_id] = {} 185 | scene_data[scene_id] = np.load(os.path.join(SCANNET_DATA, scene_id)+"_vert.npy")[:, :3] 186 | 187 | return scene_data 188 | 189 | def compute_projection(points, depth, camera_to_world): 190 | """ 191 | :param points: tensor containing all points of the point cloud (num_points, 3) 192 | :param depth: depth map (size: proj_image) 193 | :param camera_to_world: camera pose (4, 4) 194 | 195 | :return indices_3d (array with point indices that correspond to a pixel), 196 | :return indices_2d (array with pixel indices that correspond to a point) 197 | 198 | note: 199 | the first digit of indices represents the number of relevant points 200 | the rest digits are for the projection mapping 201 | """ 202 | num_points = points.shape[0] 203 | num_frames = depth.shape[0] 204 | indices_3ds = torch.zeros(num_frames, num_points + 1).long().cuda() 205 | indices_2ds = torch.zeros(num_frames, num_points + 1).long().cuda() 206 | 207 | for i in range(num_frames): 208 | indices = PROJECTOR.compute_projection(to_tensor(points), to_tensor(depth[i]), to_tensor(camera_to_world[i])) 209 | if indices: 210 | indices_3ds[i] = indices[0].long() 211 | indices_2ds[i] = indices[1].long() 212 | 213 | return indices_3ds, indices_2ds 214 | 215 | def create_enet(): 216 | enet_fixed, enet_trainable, enet_classifier = create_enet_for_3d(41, ENET_PATH, 21) 217 | enet = nn.Sequential( 218 | enet_fixed, 219 | enet_trainable, 220 | enet_classifier 221 | ).cuda() 222 | enet.eval() 223 | for param in enet.parameters(): 224 | param.requires_grad = False 225 | 226 | return enet 227 | 228 | 229 | if __name__ == "__main__": 230 | parser = argparse.ArgumentParser() 231 | parser.add_argument("--scene_id", type=str, default="-1") 232 | parser.add_argument("--gt", action="store_true") 233 | parser.add_argument("--maxpool", action="store_true", help="use max pooling to aggregate features \ 234 | (use majority voting in label projection mode)") 235 | args = parser.parse_args() 236 | 237 | scene_list = get_scene_list(args) 238 | scene_data = get_scene_data(scene_list) 239 | enet = create_enet() 240 | for scene_id in tqdm(scene_list): 241 | scene = scene_data[scene_id] 242 | # load frames 243 | frame_list = list(map(lambda x: x.split(".")[0], sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color"))))) 244 | scene_images = np.zeros((len(frame_list), 3, 256, 328)) 245 | scene_depths = np.zeros((len(frame_list), 32, 41)) 246 | scene_poses = np.zeros((len(frame_list), 4, 4)) 247 | for i, frame_id in enumerate(frame_list): 248 | scene_images[i] = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256]) 249 | scene_depths[i] = load_depth(SCANNET_FRAME_PATH.format(scene_id, "depth", "{}.png".format(frame_id)), [41, 32]) 250 | scene_poses[i] = load_pose(SCANNET_FRAME_PATH.format(scene_id, "pose", "{}.txt".format(frame_id))) 251 | 252 | # compute projections for each chunk 253 | projection_3d, projection_2d = compute_projection(scene, scene_depths, scene_poses) 254 | 255 | # compute valid projections 256 | projections = [] 257 | for i in range(projection_3d.shape[0]): 258 | num_valid = projection_3d[i, 0] 259 | if num_valid == 0: 260 | continue 261 | 262 | projections.append((frame_list[i], projection_3d[i], projection_2d[i])) 263 | 264 | # # project 265 | # labels = None 266 | # for i, projection in enumerate(projections): 267 | # frame_id = projection[0] 268 | # projection_3d = projection[1] 269 | # projection_2d = projection[2] 270 | # if args.gt: 271 | # feat = to_tensor(load_image(ENET_GT_PATH.format(scene_id, "labelv2", "{}.png".format(frame_id)), [41, 32])).unsqueeze(0) 272 | # else: 273 | # image = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256]) 274 | # feat = enet(to_tensor(image).unsqueeze(0)).max(1)[1].unsqueeze(1) 275 | 276 | # proj_label = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0) 277 | # if i == 0: 278 | # labels = proj_label 279 | # else: 280 | # labels[labels == 0] = proj_label[labels == 0] 281 | 282 | # project 283 | labels = to_tensor(scene).new(scene.shape[0], len(projections)).fill_(0).long() 284 | for i, projection in enumerate(projections): 285 | frame_id = projection[0] 286 | projection_3d = projection[1] 287 | projection_2d = projection[2] 288 | 289 | if args.gt: 290 | feat = to_tensor(load_image(ENET_GT_PATH.format(scene_id, "labelv2", "{}.png".format(frame_id)), [41, 32])).unsqueeze(0) 291 | else: 292 | image = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256]) 293 | feat = enet(to_tensor(image).unsqueeze(0)).max(1)[1].unsqueeze(1) 294 | 295 | proj_label = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0) # num_points, 1 296 | 297 | if args.maxpool: 298 | # only apply max pooling on the overlapping points 299 | # find out the points that are covered in projection 300 | feat_mask = ((proj_label == 0).sum(1) != 1).bool() 301 | # find out the points that are not filled with labels 302 | point_mask = ((labels == 0).sum(1) == len(projections)).bool() 303 | 304 | # for the points that are not filled with features 305 | # and are covered in projection, 306 | # simply fill those points with labels 307 | mask = point_mask * feat_mask 308 | labels[mask, i] = proj_label[mask, 0] 309 | 310 | # for the points that have already been filled with features 311 | # and are covered in projection, 312 | # simply fill those points with labels 313 | mask = ~point_mask * feat_mask 314 | labels[mask, i] = proj_label[mask, 0] 315 | else: 316 | if i == 0: 317 | labels = proj_label 318 | else: 319 | labels[labels == 0] = proj_label[labels == 0] 320 | 321 | # aggregate 322 | if args.maxpool: 323 | new_labels = [] 324 | for label_id in range(labels.shape[0]): 325 | point_label = labels[label_id].cpu().numpy().tolist() 326 | count = dict(Counter(point_label)) 327 | count = sorted(count.items(), key=lambda x: x[1], reverse=True) 328 | count = [c for c in count if c[0] != 0] 329 | if count: 330 | new_labels.append(count[0][0]) 331 | else: 332 | new_labels.append(0) 333 | 334 | labels = torch.FloatTensor(np.array(new_labels)[:, np.newaxis]) 335 | 336 | # output 337 | visualize(scene, labels.long().squeeze(1).cpu().numpy()) 338 | 339 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import h5py 5 | import argparse 6 | import importlib 7 | import torch 8 | import torch.optim as optim 9 | import torch.nn as nn 10 | import numpy as np 11 | 12 | from torch.utils.data import DataLoader 13 | from datetime import datetime 14 | from copy import deepcopy 15 | 16 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder 17 | from data.scannet.model_util_scannet import ScannetDatasetConfig 18 | from lib.dataset import ScannetReferenceDataset 19 | from lib.solver import Solver 20 | from lib.config import CONF 21 | from models.refnet import RefNet 22 | 23 | SCANREFER_TRAIN = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_train.json"))) 24 | SCANREFER_VAL = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_val.json"))) 25 | 26 | # constants 27 | DC = ScannetDatasetConfig() 28 | 29 | def get_dataloader(args, scanrefer, all_scene_list, split, config, augment): 30 | dataset = ScannetReferenceDataset( 31 | scanrefer=scanrefer[split], 32 | scanrefer_all_scene=all_scene_list, 33 | split=split, 34 | num_points=args.num_points, 35 | use_height=(not args.no_height), 36 | use_color=args.use_color, 37 | use_normal=args.use_normal, 38 | use_multiview=args.use_multiview 39 | ) 40 | # dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) 41 | dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) 42 | 43 | return dataset, dataloader 44 | 45 | def get_model(args): 46 | # initiate model 47 | input_channels = int(args.use_multiview) * 128 + int(args.use_normal) * 3 + int(args.use_color) * 3 + int(not args.no_height) 48 | model = RefNet( 49 | num_class=DC.num_class, 50 | num_heading_bin=DC.num_heading_bin, 51 | num_size_cluster=DC.num_size_cluster, 52 | mean_size_arr=DC.mean_size_arr, 53 | input_feature_dim=input_channels, 54 | num_proposal=args.num_proposals, 55 | use_lang_classifier=(not args.no_lang_cls), 56 | use_bidir=args.use_bidir, 57 | no_reference=args.no_reference 58 | ) 59 | 60 | # trainable model 61 | if args.use_pretrained: 62 | # load model 63 | print("loading pretrained VoteNet...") 64 | pretrained_model = RefNet( 65 | num_class=DC.num_class, 66 | num_heading_bin=DC.num_heading_bin, 67 | num_size_cluster=DC.num_size_cluster, 68 | mean_size_arr=DC.mean_size_arr, 69 | num_proposal=args.num_proposals, 70 | input_feature_dim=input_channels, 71 | use_bidir=args.use_bidir, 72 | no_reference=True 73 | ) 74 | 75 | pretrained_path = os.path.join(CONF.PATH.OUTPUT, args.use_pretrained, "model_last.pth") 76 | pretrained_model.load_state_dict(torch.load(pretrained_path), strict=False) 77 | 78 | # mount 79 | model.backbone_net = pretrained_model.backbone_net 80 | model.vgen = pretrained_model.vgen 81 | model.proposal = pretrained_model.proposal 82 | 83 | if args.no_detection: 84 | # freeze pointnet++ backbone 85 | for param in model.backbone_net.parameters(): 86 | param.requires_grad = False 87 | 88 | # freeze voting 89 | for param in model.vgen.parameters(): 90 | param.requires_grad = False 91 | 92 | # freeze detector 93 | for param in model.proposal.parameters(): 94 | param.requires_grad = False 95 | 96 | # to CUDA 97 | model = model.cuda() 98 | 99 | return model 100 | 101 | def get_num_params(model): 102 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 103 | num_params = int(sum([np.prod(p.size()) for p in model_parameters])) 104 | 105 | return num_params 106 | 107 | def get_solver(args, dataloader): 108 | model = get_model(args) 109 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) 110 | 111 | if args.use_checkpoint: 112 | print("loading checkpoint {}...".format(args.use_checkpoint)) 113 | stamp = args.use_checkpoint 114 | root = os.path.join(CONF.PATH.OUTPUT, stamp) 115 | checkpoint = torch.load(os.path.join(CONF.PATH.OUTPUT, args.use_checkpoint, "checkpoint.tar")) 116 | model.load_state_dict(checkpoint["model_state_dict"]) 117 | optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) 118 | else: 119 | stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 120 | if args.tag: stamp += "_"+args.tag.upper() 121 | root = os.path.join(CONF.PATH.OUTPUT, stamp) 122 | os.makedirs(root, exist_ok=True) 123 | 124 | # scheduler parameters for training solely the detection pipeline 125 | LR_DECAY_STEP = [80, 120, 160] if args.no_reference else None 126 | LR_DECAY_RATE = 0.1 if args.no_reference else None 127 | BN_DECAY_STEP = 20 if args.no_reference else None 128 | BN_DECAY_RATE = 0.5 if args.no_reference else None 129 | 130 | solver = Solver( 131 | model=model, 132 | config=DC, 133 | dataloader=dataloader, 134 | optimizer=optimizer, 135 | stamp=stamp, 136 | val_step=args.val_step, 137 | detection=not args.no_detection, 138 | reference=not args.no_reference, 139 | use_lang_classifier=not args.no_lang_cls, 140 | lr_decay_step=LR_DECAY_STEP, 141 | lr_decay_rate=LR_DECAY_RATE, 142 | bn_decay_step=BN_DECAY_STEP, 143 | bn_decay_rate=BN_DECAY_RATE 144 | ) 145 | num_params = get_num_params(model) 146 | 147 | return solver, num_params, root 148 | 149 | def save_info(args, root, num_params, train_dataset, val_dataset): 150 | info = {} 151 | for key, value in vars(args).items(): 152 | info[key] = value 153 | 154 | info["num_train"] = len(train_dataset) 155 | info["num_val"] = len(val_dataset) 156 | info["num_train_scenes"] = len(train_dataset.scene_list) 157 | info["num_val_scenes"] = len(val_dataset.scene_list) 158 | info["num_params"] = num_params 159 | 160 | with open(os.path.join(root, "info.json"), "w") as f: 161 | json.dump(info, f, indent=4) 162 | 163 | def get_scannet_scene_list(split): 164 | scene_list = sorted([line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET_META, "scannetv2_{}.txt".format(split)))]) 165 | 166 | return scene_list 167 | 168 | def get_scanrefer(scanrefer_train, scanrefer_val, num_scenes): 169 | if args.no_reference: 170 | train_scene_list = get_scannet_scene_list("train") 171 | new_scanrefer_train = [] 172 | for scene_id in train_scene_list: 173 | data = deepcopy(SCANREFER_TRAIN[0]) 174 | data["scene_id"] = scene_id 175 | new_scanrefer_train.append(data) 176 | 177 | val_scene_list = get_scannet_scene_list("val") 178 | new_scanrefer_val = [] 179 | for scene_id in val_scene_list: 180 | data = deepcopy(SCANREFER_VAL[0]) 181 | data["scene_id"] = scene_id 182 | new_scanrefer_val.append(data) 183 | else: 184 | # get initial scene list 185 | train_scene_list = sorted(list(set([data["scene_id"] for data in scanrefer_train]))) 186 | val_scene_list = sorted(list(set([data["scene_id"] for data in scanrefer_val]))) 187 | if num_scenes == -1: 188 | num_scenes = len(train_scene_list) 189 | else: 190 | assert len(train_scene_list) >= num_scenes 191 | 192 | # slice train_scene_list 193 | train_scene_list = train_scene_list[:num_scenes] 194 | 195 | # filter data in chosen scenes 196 | new_scanrefer_train = [] 197 | for data in scanrefer_train: 198 | if data["scene_id"] in train_scene_list: 199 | new_scanrefer_train.append(data) 200 | 201 | new_scanrefer_val = scanrefer_val 202 | 203 | # all scanrefer scene 204 | all_scene_list = train_scene_list + val_scene_list 205 | 206 | print("train on {} samples and val on {} samples".format(len(new_scanrefer_train), len(new_scanrefer_val))) 207 | 208 | return new_scanrefer_train, new_scanrefer_val, all_scene_list 209 | 210 | def train(args): 211 | # init training dataset 212 | print("preparing data...") 213 | scanrefer_train, scanrefer_val, all_scene_list = get_scanrefer(SCANREFER_TRAIN, SCANREFER_VAL, args.num_scenes) 214 | scanrefer = { 215 | "train": scanrefer_train, 216 | "val": scanrefer_val 217 | } 218 | 219 | # dataloader 220 | train_dataset, train_dataloader = get_dataloader(args, scanrefer, all_scene_list, "train", DC, True) 221 | val_dataset, val_dataloader = get_dataloader(args, scanrefer, all_scene_list, "val", DC, False) 222 | dataloader = { 223 | "train": train_dataloader, 224 | "val": val_dataloader 225 | } 226 | 227 | print("initializing...") 228 | solver, num_params, root = get_solver(args, dataloader) 229 | 230 | print("Start training...\n") 231 | save_info(args, root, num_params, train_dataset, val_dataset) 232 | solver(args.epoch, args.verbose) 233 | 234 | if __name__ == "__main__": 235 | parser = argparse.ArgumentParser() 236 | parser.add_argument("--tag", type=str, help="tag for the training, e.g. cuda_wl", default="") 237 | parser.add_argument("--gpu", type=str, help="gpu", default="0") 238 | parser.add_argument("--batch_size", type=int, help="batch size", default=14) 239 | parser.add_argument("--epoch", type=int, help="number of epochs", default=50) 240 | parser.add_argument("--verbose", type=int, help="iterations of showing verbose", default=10) 241 | parser.add_argument("--val_step", type=int, help="iterations of validating", default=5000) 242 | parser.add_argument("--lr", type=float, help="learning rate", default=1e-3) 243 | parser.add_argument("--wd", type=float, help="weight decay", default=1e-5) 244 | parser.add_argument("--num_points", type=int, default=40000, help="Point Number [default: 40000]") 245 | parser.add_argument("--num_proposals", type=int, default=256, help="Proposal number [default: 256]") 246 | parser.add_argument("--num_scenes", type=int, default=-1, help="Number of scenes [default: -1]") 247 | parser.add_argument("--seed", type=int, default=42, help="random seed") 248 | parser.add_argument("--no_height", action="store_true", help="Do NOT use height signal in input.") 249 | parser.add_argument("--no_augment", action="store_true", help="Do NOT use height signal in input.") 250 | parser.add_argument("--no_lang_cls", action="store_true", help="Do NOT use language classifier.") 251 | parser.add_argument("--no_detection", action="store_true", help="Do NOT train the detection module.") 252 | parser.add_argument("--no_reference", action="store_true", help="Do NOT train the localization module.") 253 | parser.add_argument("--use_color", action="store_true", help="Use RGB color in input.") 254 | parser.add_argument("--use_normal", action="store_true", help="Use RGB color in input.") 255 | parser.add_argument("--use_multiview", action="store_true", help="Use multiview images.") 256 | parser.add_argument("--use_bidir", action="store_true", help="Use bi-directional GRU.") 257 | parser.add_argument("--use_pretrained", type=str, help="Specify the folder name containing the pretrained detection module.") 258 | parser.add_argument("--use_checkpoint", type=str, help="Specify the checkpoint root", default="") 259 | args = parser.parse_args() 260 | 261 | # setting 262 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 263 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 264 | 265 | # reproducibility 266 | torch.manual_seed(args.seed) 267 | torch.backends.cudnn.deterministic = True 268 | torch.backends.cudnn.benchmark = False 269 | np.random.seed(args.seed) 270 | 271 | train(args) 272 | 273 | -------------------------------------------------------------------------------- /utils/box_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for calculating 2D and 3D bounding box IoU. 3 | From: https://github.com/facebookresearch/votenet/blob/master/utils/box_util.py 4 | 5 | Collected and written by Charles R. Qi 6 | Last modified: Jul 2019 7 | """ 8 | 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | from scipy.spatial import ConvexHull 13 | 14 | def polygon_clip(subjectPolygon, clipPolygon): 15 | """ Clip a polygon with another polygon. 16 | 17 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 18 | 19 | Args: 20 | subjectPolygon: a list of (x,y) 2d points, any polygon. 21 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 22 | Note: 23 | **points have to be counter-clockwise ordered** 24 | 25 | Return: 26 | a list of (x,y) vertex point for the intersection polygon. 27 | """ 28 | def inside(p): 29 | return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0]) 30 | 31 | def computeIntersection(): 32 | dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ] 33 | dp = [ s[0] - e[0], s[1] - e[1] ] 34 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 35 | n2 = s[0] * e[1] - s[1] * e[0] 36 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 37 | return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3] 38 | 39 | outputList = subjectPolygon 40 | cp1 = clipPolygon[-1] 41 | 42 | for clipVertex in clipPolygon: 43 | cp2 = clipVertex 44 | inputList = outputList 45 | outputList = [] 46 | s = inputList[-1] 47 | 48 | for subjectVertex in inputList: 49 | e = subjectVertex 50 | if inside(e): 51 | if not inside(s): 52 | outputList.append(computeIntersection()) 53 | outputList.append(e) 54 | elif inside(s): 55 | outputList.append(computeIntersection()) 56 | s = e 57 | cp1 = cp2 58 | if len(outputList) == 0: 59 | return None 60 | return(outputList) 61 | 62 | def poly_area(x,y): 63 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """ 64 | return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) 65 | 66 | def poly_area_batch(x,y): 67 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """ 68 | return 0.5 * np.abs(np.matmul(np.expand_dims(x, axis=1), np.roll(np.expand_dims(y, axis=2), 1, axis=1)) \ 69 | - np.matmul(np.expand_dims(y, axis=1), np.roll(np.expand_dims(x, axis=2), 1, axis=1))).squeeze(axis=(1,2)) 70 | 71 | def convex_hull_intersection(p1, p2): 72 | """ Compute area of two convex hull's intersection area. 73 | p1,p2 are a list of (x,y) tuples of hull vertices. 74 | return a list of (x,y) for the intersection and its volume 75 | """ 76 | inter_p = polygon_clip(p1,p2) 77 | if inter_p is not None: 78 | hull_inter = ConvexHull(inter_p) 79 | return inter_p, hull_inter.volume 80 | else: 81 | return None, 0.0 82 | 83 | def box3d_vol(corners): 84 | ''' corners: (8,3) no assumption on axis direction ''' 85 | a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2)) 86 | b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2)) 87 | c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2)) 88 | return a*b*c 89 | 90 | def is_clockwise(p): 91 | x = p[:,0] 92 | y = p[:,1] 93 | return np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)) > 0 94 | 95 | def box3d_iou(corners1, corners2): 96 | ''' Compute 3D bounding box IoU. 97 | 98 | Input: 99 | corners1: numpy array (8,3), assume up direction is Z 100 | corners2: numpy array (8,3), assume up direction is Z 101 | Output: 102 | iou: 3D bounding box IoU 103 | 104 | ''' 105 | # # corner points are in counter clockwise order 106 | # rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)] 107 | # rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)] 108 | # area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1]) 109 | # area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1]) 110 | # inter, inter_area = convex_hull_intersection(rect1, rect2) 111 | # iou_2d = inter_area/(area1+area2-inter_area) 112 | # ymax = min(corners1[0,1], corners2[0,1]) 113 | # ymin = max(corners1[4,1], corners2[4,1]) 114 | # inter_vol = inter_area * max(0.0, ymax-ymin) 115 | # vol1 = box3d_vol(corners1) 116 | # vol2 = box3d_vol(corners2) 117 | # iou = inter_vol / (vol1 + vol2 - inter_vol) 118 | # return iou, iou_2d 119 | 120 | x_min_1, x_max_1, y_min_1, y_max_1, z_min_1, z_max_1 = get_box3d_min_max(corners1) 121 | x_min_2, x_max_2, y_min_2, y_max_2, z_min_2, z_max_2 = get_box3d_min_max(corners2) 122 | xA = np.maximum(x_min_1, x_min_2) 123 | yA = np.maximum(y_min_1, y_min_2) 124 | zA = np.maximum(z_min_1, z_min_2) 125 | xB = np.minimum(x_max_1, x_max_2) 126 | yB = np.minimum(y_max_1, y_max_2) 127 | zB = np.minimum(z_max_1, z_max_2) 128 | inter_vol = np.maximum((xB - xA), 0) * np.maximum((yB - yA), 0) * np.maximum((zB - zA), 0) 129 | box_vol_1 = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) * (z_max_1 - z_min_1) 130 | box_vol_2 = (x_max_2 - x_min_2) * (y_max_2 - y_min_2) * (z_max_2 - z_min_2) 131 | iou = inter_vol / (box_vol_1 + box_vol_2 - inter_vol + 1e-8) 132 | 133 | return iou 134 | 135 | def get_box3d_min_max(corner): 136 | ''' Compute min and max coordinates for 3D bounding box 137 | Note: only for axis-aligned bounding boxes 138 | 139 | Input: 140 | corners: numpy array (8,3), assume up direction is Z (batch of N samples) 141 | Output: 142 | box_min_max: an array for min and max coordinates of 3D bounding box IoU 143 | 144 | ''' 145 | 146 | min_coord = corner.min(axis=0) 147 | max_coord = corner.max(axis=0) 148 | x_min, x_max = min_coord[0], max_coord[0] 149 | y_min, y_max = min_coord[1], max_coord[1] 150 | z_min, z_max = min_coord[2], max_coord[2] 151 | 152 | return x_min, x_max, y_min, y_max, z_min, z_max 153 | 154 | def box3d_iou_batch(corners1, corners2): 155 | ''' Compute 3D bounding box IoU. 156 | Note: only for axis-aligned bounding boxes 157 | 158 | Input: 159 | corners1: numpy array (N,8,3), assume up direction is Z (batch of N samples) 160 | corners2: numpy array (N,8,3), assume up direction is Z (batch of N samples) 161 | Output: 162 | iou: an array of 3D bounding box IoU 163 | 164 | ''' 165 | 166 | x_min_1, x_max_1, y_min_1, y_max_1, z_min_1, z_max_1 = get_box3d_min_max_batch(corners1) 167 | x_min_2, x_max_2, y_min_2, y_max_2, z_min_2, z_max_2 = get_box3d_min_max_batch(corners2) 168 | xA = np.maximum(x_min_1, x_min_2) 169 | yA = np.maximum(y_min_1, y_min_2) 170 | zA = np.maximum(z_min_1, z_min_2) 171 | xB = np.minimum(x_max_1, x_max_2) 172 | yB = np.minimum(y_max_1, y_max_2) 173 | zB = np.minimum(z_max_1, z_max_2) 174 | inter_vol = np.maximum((xB - xA), 0) * np.maximum((yB - yA), 0) * np.maximum((zB - zA), 0) 175 | box_vol_1 = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) * (z_max_1 - z_min_1) 176 | box_vol_2 = (x_max_2 - x_min_2) * (y_max_2 - y_min_2) * (z_max_2 - z_min_2) 177 | iou = inter_vol / (box_vol_1 + box_vol_2 - inter_vol + 1e-8) 178 | 179 | return iou 180 | 181 | def get_box3d_min_max_batch(corner): 182 | ''' Compute min and max coordinates for 3D bounding box 183 | Note: only for axis-aligned bounding boxes 184 | 185 | Input: 186 | corners: numpy array (N,8,3), assume up direction is Z (batch of N samples) 187 | Output: 188 | box_min_max: an array for min and max coordinates of 3D bounding box IoU 189 | 190 | ''' 191 | 192 | min_coord = corner.min(axis=1) 193 | max_coord = corner.max(axis=1) 194 | x_min, x_max = min_coord[:, 0], max_coord[:, 0] 195 | y_min, y_max = min_coord[:, 1], max_coord[:, 1] 196 | z_min, z_max = min_coord[:, 2], max_coord[:, 2] 197 | 198 | return x_min, x_max, y_min, y_max, z_min, z_max 199 | 200 | def get_iou(bb1, bb2): 201 | """ 202 | Calculate the Intersection over Union (IoU) of two 2D bounding boxes. 203 | 204 | Parameters 205 | ---------- 206 | bb1 : dict 207 | Keys: {'x1', 'x2', 'y1', 'y2'} 208 | The (x1, y1) position is at the top left corner, 209 | the (x2, y2) position is at the bottom right corner 210 | bb2 : dict 211 | Keys: {'x1', 'x2', 'y1', 'y2'} 212 | The (x, y) position is at the top left corner, 213 | the (x2, y2) position is at the bottom right corner 214 | 215 | Returns 216 | ------- 217 | float 218 | in [0, 1] 219 | """ 220 | assert bb1['x1'] < bb1['x2'] 221 | assert bb1['y1'] < bb1['y2'] 222 | assert bb2['x1'] < bb2['x2'] 223 | assert bb2['y1'] < bb2['y2'] 224 | 225 | # determine the coordinates of the intersection rectangle 226 | x_left = max(bb1['x1'], bb2['x1']) 227 | y_top = max(bb1['y1'], bb2['y1']) 228 | x_right = min(bb1['x2'], bb2['x2']) 229 | y_bottom = min(bb1['y2'], bb2['y2']) 230 | 231 | if x_right < x_left or y_bottom < y_top: 232 | return 0.0 233 | 234 | # The intersection of two axis-aligned bounding boxes is always an 235 | # axis-aligned bounding box 236 | intersection_area = (x_right - x_left) * (y_bottom - y_top) 237 | 238 | # compute the area of both AABBs 239 | bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1']) 240 | bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1']) 241 | 242 | # compute the intersection over union by taking the intersection 243 | # area and dividing it by the sum of prediction + ground-truth 244 | # areas - the interesection area 245 | iou = intersection_area / float(bb1_area + bb2_area - intersection_area) 246 | assert iou >= 0.0 247 | assert iou <= 1.0 248 | return iou 249 | 250 | def box2d_iou(box1, box2): 251 | ''' Compute 2D bounding box IoU. 252 | 253 | Input: 254 | box1: tuple of (xmin,ymin,xmax,ymax) 255 | box2: tuple of (xmin,ymin,xmax,ymax) 256 | Output: 257 | iou: 2D IoU scalar 258 | ''' 259 | return get_iou({'x1':box1[0], 'y1':box1[1], 'x2':box1[2], 'y2':box1[3]}, \ 260 | {'x1':box2[0], 'y1':box2[1], 'x2':box2[2], 'y2':box2[3]}) 261 | 262 | # ----------------------------------------------------------- 263 | # Convert from box parameters to 264 | # ----------------------------------------------------------- 265 | def roty(t): 266 | """Rotation about the y-axis.""" 267 | c = np.cos(t) 268 | s = np.sin(t) 269 | return np.array([[c, 0, s], 270 | [0, 1, 0], 271 | [-s, 0, c]]) 272 | 273 | def roty_batch(t): 274 | """Rotation about the y-axis. 275 | t: (x1,x2,...xn) 276 | return: (x1,x2,...,xn,3,3) 277 | """ 278 | input_shape = t.shape 279 | output = np.zeros(tuple(list(input_shape)+[3,3])) 280 | c = np.cos(t) 281 | s = np.sin(t) 282 | output[...,0,0] = c 283 | output[...,0,2] = s 284 | output[...,1,1] = 1 285 | output[...,2,0] = -s 286 | output[...,2,2] = c 287 | return output 288 | 289 | 290 | def get_3d_box(box_size, heading_angle, center): 291 | ''' box_size is array(l,w,h), heading_angle is radius clockwise from pos x axis, center is xyz of box center 292 | output (8,3) array for 3D box cornders 293 | Similar to utils/compute_orientation_3d 294 | ''' 295 | R = roty(heading_angle) 296 | l,w,h = box_size 297 | # x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2] 298 | # y_corners = [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2] 299 | # z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2] 300 | x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2] 301 | y_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2] 302 | z_corners = [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2] 303 | corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners])) 304 | corners_3d[0,:] = corners_3d[0,:] + center[0] 305 | corners_3d[1,:] = corners_3d[1,:] + center[1] 306 | corners_3d[2,:] = corners_3d[2,:] + center[2] 307 | corners_3d = np.transpose(corners_3d) 308 | return corners_3d 309 | 310 | def get_3d_box_batch(box_size, heading_angle, center): 311 | ''' box_size: [x1,x2,...,xn,3] 312 | heading_angle: [x1,x2,...,xn] 313 | center: [x1,x2,...,xn,3] 314 | Return: 315 | [x1,x3,...,xn,8,3] 316 | ''' 317 | input_shape = heading_angle.shape 318 | R = roty_batch(heading_angle) 319 | l = np.expand_dims(box_size[...,0], -1) # [x1,...,xn,1] 320 | w = np.expand_dims(box_size[...,1], -1) 321 | h = np.expand_dims(box_size[...,2], -1) 322 | corners_3d = np.zeros(tuple(list(input_shape)+[8,3])) 323 | # corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1) 324 | # corners_3d[...,:,1] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1) 325 | # corners_3d[...,:,2] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1) 326 | corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1) 327 | corners_3d[...,:,1] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1) 328 | corners_3d[...,:,2] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1) 329 | tlist = [i for i in range(len(input_shape))] 330 | tlist += [len(input_shape)+1, len(input_shape)] 331 | corners_3d = np.matmul(corners_3d, np.transpose(R, tuple(tlist))) 332 | corners_3d += np.expand_dims(center, -2) 333 | return corners_3d 334 | -------------------------------------------------------------------------------- /utils/eta.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File Created: Monday, 25th November 2019 1:35:30 pm 3 | Author: Dave Zhenyu Chen (zhenyu.chen@tum.de) 4 | ''' 5 | 6 | def get_eta(start, end, extra, num_left): 7 | exe_s = end - start 8 | eta_s = (exe_s + extra) * num_left 9 | eta = {'h': 0, 'm': 0, 's': 0} 10 | if eta_s < 60: 11 | eta['s'] = int(eta_s) 12 | elif eta_s >= 60 and eta_s < 3600: 13 | eta['m'] = int(eta_s / 60) 14 | eta['s'] = int(eta_s % 60) 15 | else: 16 | eta['h'] = int(eta_s / (60 * 60)) 17 | eta['m'] = int(eta_s % (60 * 60) / 60) 18 | eta['s'] = int(eta_s % (60 * 60) % 60) 19 | 20 | return eta 21 | 22 | def decode_eta(eta_sec): 23 | eta = {'h': 0, 'm': 0, 's': 0} 24 | if eta_sec < 60: 25 | eta['s'] = int(eta_sec) 26 | elif eta_sec >= 60 and eta_sec < 3600: 27 | eta['m'] = int(eta_sec / 60) 28 | eta['s'] = int(eta_sec % 60) 29 | else: 30 | eta['h'] = int(eta_sec / (60 * 60)) 31 | eta['m'] = int(eta_sec % (60 * 60) / 60) 32 | eta['s'] = int(eta_sec % (60 * 60) % 60) 33 | 34 | return eta -------------------------------------------------------------------------------- /utils/eval_det.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generic Code for Object Detection Evaluation 3 | From: https://github.com/facebookresearch/votenet/blob/master/utils/eval_det.py 4 | 5 | Input: 6 | For each class: 7 | For each image: 8 | Predictions: box, score 9 | Groundtruths: box 10 | 11 | Output: 12 | For each class: 13 | precision-recal and average precision 14 | 15 | Author: Charles R. Qi 16 | 17 | Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py 18 | """ 19 | import numpy as np 20 | 21 | def voc_ap(rec, prec, use_07_metric=False): 22 | """ ap = voc_ap(rec, prec, [use_07_metric]) 23 | Compute VOC AP given precision and recall. 24 | If use_07_metric is true, uses the 25 | VOC 07 11 point method (default:False). 26 | """ 27 | if use_07_metric: 28 | # 11 point metric 29 | ap = 0. 30 | for t in np.arange(0., 1.1, 0.1): 31 | if np.sum(rec >= t) == 0: 32 | p = 0 33 | else: 34 | p = np.max(prec[rec >= t]) 35 | ap = ap + p / 11. 36 | else: 37 | # correct AP calculation 38 | # first append sentinel values at the end 39 | mrec = np.concatenate(([0.], rec, [1.])) 40 | mpre = np.concatenate(([0.], prec, [0.])) 41 | 42 | # compute the precision envelope 43 | for i in range(mpre.size - 1, 0, -1): 44 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 45 | 46 | # to calculate area under PR curve, look for points 47 | # where X axis (recall) changes value 48 | i = np.where(mrec[1:] != mrec[:-1])[0] 49 | 50 | # and sum (\Delta recall) * prec 51 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 52 | return ap 53 | 54 | import os 55 | import sys 56 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 57 | from utils.metric_util import calc_iou # axis-aligned 3D box IoU 58 | def get_iou(bb1, bb2): 59 | """ Compute IoU of two bounding boxes. 60 | ** Define your bod IoU function HERE ** 61 | """ 62 | #pass 63 | iou3d = calc_iou(bb1, bb2) 64 | return iou3d 65 | 66 | from utils.box_util import box3d_iou 67 | def get_iou_obb(bb1,bb2): 68 | iou3d = box3d_iou(bb1,bb2) 69 | return iou3d 70 | 71 | def get_iou_main(get_iou_func, args): 72 | return get_iou_func(*args) 73 | 74 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 75 | """ Generic functions to compute precision/recall for object detection 76 | for a single class. 77 | Input: 78 | pred: map of {img_id: [(bbox, score)]} where bbox is numpy array 79 | gt: map of {img_id: [bbox]} 80 | ovthresh: scalar, iou threshold 81 | use_07_metric: bool, if True use VOC07 11 point method 82 | Output: 83 | rec: numpy array of length nd 84 | prec: numpy array of length nd 85 | ap: scalar, average precision 86 | """ 87 | 88 | # construct gt objects 89 | class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} 90 | npos = 0 91 | for img_id in gt.keys(): 92 | bbox = np.array(gt[img_id]) 93 | det = [False] * len(bbox) 94 | npos += len(bbox) 95 | class_recs[img_id] = {'bbox': bbox, 'det': det} 96 | # pad empty list to all other imgids 97 | for img_id in pred.keys(): 98 | if img_id not in gt: 99 | class_recs[img_id] = {'bbox': np.array([]), 'det': []} 100 | 101 | # construct dets 102 | image_ids = [] 103 | confidence = [] 104 | BB = [] 105 | for img_id in pred.keys(): 106 | for box,score in pred[img_id]: 107 | image_ids.append(img_id) 108 | confidence.append(score) 109 | BB.append(box) 110 | confidence = np.array(confidence) 111 | BB = np.array(BB) # (nd,4 or 8,3 or 6) 112 | 113 | # sort by confidence 114 | sorted_ind = np.argsort(-confidence) 115 | sorted_scores = np.sort(-confidence) 116 | BB = BB[sorted_ind, ...] 117 | image_ids = [image_ids[x] for x in sorted_ind] 118 | 119 | # go down dets and mark TPs and FPs 120 | nd = len(image_ids) 121 | tp = np.zeros(nd) 122 | fp = np.zeros(nd) 123 | for d in range(nd): 124 | #if d%100==0: print(d) 125 | R = class_recs[image_ids[d]] 126 | bb = BB[d,...].astype(float) 127 | ovmax = -np.inf 128 | BBGT = R['bbox'].astype(float) 129 | 130 | if BBGT.size > 0: 131 | # compute overlaps 132 | for j in range(BBGT.shape[0]): 133 | iou = get_iou_main(get_iou_func, (bb, BBGT[j,...])) 134 | if iou > ovmax: 135 | ovmax = iou 136 | jmax = j 137 | 138 | #print d, ovmax 139 | if ovmax > ovthresh: 140 | if not R['det'][jmax]: 141 | tp[d] = 1. 142 | R['det'][jmax] = 1 143 | else: 144 | fp[d] = 1. 145 | else: 146 | fp[d] = 1. 147 | 148 | # compute precision recall 149 | fp = np.cumsum(fp) 150 | tp = np.cumsum(tp) 151 | rec = tp / float(npos + 1e-8) 152 | #print('NPOS: ', npos) 153 | # avoid divide by zero in case the first detection matches a difficult 154 | # ground truth 155 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 156 | ap = voc_ap(rec, prec, use_07_metric) 157 | 158 | return rec, prec, ap 159 | 160 | def eval_det_cls_wrapper(arguments): 161 | pred, gt, ovthresh, use_07_metric, get_iou_func = arguments 162 | rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func) 163 | return (rec, prec, ap) 164 | 165 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 166 | """ Generic functions to compute precision/recall for object detection 167 | for multiple classes. 168 | Input: 169 | pred_all: map of {img_id: [(classname, bbox, score)]} 170 | gt_all: map of {img_id: [(classname, bbox)]} 171 | ovthresh: scalar, iou threshold 172 | use_07_metric: bool, if true use VOC07 11 point method 173 | Output: 174 | rec: {classname: rec} 175 | prec: {classname: prec_all} 176 | ap: {classname: scalar} 177 | """ 178 | pred = {} # map {classname: pred} 179 | gt = {} # map {classname: gt} 180 | for img_id in pred_all.keys(): 181 | for classname, bbox, score in pred_all[img_id]: 182 | if classname not in pred: pred[classname] = {} 183 | if img_id not in pred[classname]: 184 | pred[classname][img_id] = [] 185 | if classname not in gt: gt[classname] = {} 186 | if img_id not in gt[classname]: 187 | gt[classname][img_id] = [] 188 | pred[classname][img_id].append((bbox,score)) 189 | for img_id in gt_all.keys(): 190 | for classname, bbox in gt_all[img_id]: 191 | if classname not in gt: gt[classname] = {} 192 | if img_id not in gt[classname]: 193 | gt[classname][img_id] = [] 194 | gt[classname][img_id].append(bbox) 195 | 196 | rec = {} 197 | prec = {} 198 | ap = {} 199 | for classname in gt.keys(): 200 | print('Computing AP for class: ', classname) 201 | rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) 202 | print(classname, ap[classname]) 203 | 204 | return rec, prec, ap 205 | 206 | from multiprocessing import Pool 207 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 208 | """ Generic functions to compute precision/recall for object detection 209 | for multiple classes. 210 | Input: 211 | pred_all: map of {img_id: [(classname, bbox, score)]} 212 | gt_all: map of {img_id: [(classname, bbox)]} 213 | ovthresh: scalar, iou threshold 214 | use_07_metric: bool, if true use VOC07 11 point method 215 | Output: 216 | rec: {classname: rec} 217 | prec: {classname: prec_all} 218 | ap: {classname: scalar} 219 | """ 220 | pred = {} # map {classname: pred} 221 | gt = {} # map {classname: gt} 222 | for img_id in pred_all.keys(): 223 | for classname, bbox, score in pred_all[img_id]: 224 | if classname not in pred: pred[classname] = {} 225 | if img_id not in pred[classname]: 226 | pred[classname][img_id] = [] 227 | if classname not in gt: gt[classname] = {} 228 | if img_id not in gt[classname]: 229 | gt[classname][img_id] = [] 230 | pred[classname][img_id].append((bbox,score)) 231 | for img_id in gt_all.keys(): 232 | for classname, bbox in gt_all[img_id]: 233 | if classname not in gt: gt[classname] = {} 234 | if img_id not in gt[classname]: 235 | gt[classname][img_id] = [] 236 | gt[classname][img_id].append(bbox) 237 | 238 | rec = {} 239 | prec = {} 240 | ap = {} 241 | p = Pool(processes=10) 242 | ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred]) 243 | p.close() 244 | for i, classname in enumerate(gt.keys()): 245 | if classname in pred: 246 | rec[classname], prec[classname], ap[classname] = ret_values[i] 247 | else: 248 | rec[classname] = 0 249 | prec[classname] = 0 250 | ap[classname] = 0 251 | print(classname, ap[classname]) 252 | 253 | return rec, prec, ap 254 | -------------------------------------------------------------------------------- /utils/metric_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for metric evaluation. 3 | From: https://github.com/facebookresearch/votenet/blob/master/utils/metric_util.py 4 | 5 | Author: Or Litany and Charles R. Qi 6 | """ 7 | 8 | import os 9 | import sys 10 | import torch 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(BASE_DIR) 13 | 14 | import numpy as np 15 | 16 | # Mesh IO 17 | import trimesh 18 | 19 | 20 | # ---------------------------------------- 21 | # Precision and Recall 22 | # ---------------------------------------- 23 | 24 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None): 25 | ''' 26 | Args: 27 | labels: (B, N, 6) 28 | pred: (B, M, 6) 29 | iou_thresh: scalar 30 | conf_thresh: scalar 31 | label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider. 32 | pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider. 33 | Returns: 34 | TP,FP,FN,Precision,Recall 35 | ''' 36 | # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead 37 | # of True/False array as in numpy 38 | assert(not torch.is_tensor(label_mask)) 39 | assert(not torch.is_tensor(pred_mask)) 40 | TP, FP, FN = 0, 0, 0 41 | if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1])) 42 | if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1])) 43 | for batch_idx in range(labels.shape[0]): 44 | TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :], 45 | pred[batch_idx, pred_mask[batch_idx,:]==1, :], 46 | iou_thresh, conf_thresh) 47 | TP += TP_i 48 | FP += FP_i 49 | FN += FN_i 50 | 51 | return TP, FP, FN, precision_recall(TP, FP, FN) 52 | 53 | 54 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh): 55 | """Compute P and R for predicted bounding boxes. Ignores classes! 56 | Args: 57 | labels: (N x bbox) ground-truth bounding boxes (6 dims) 58 | pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification 59 | Returns: 60 | TP, FP, FN 61 | """ 62 | 63 | 64 | # for each pred box with high conf (C), compute IoU with all gt boxes. 65 | # TP = number of times IoU > th ; FP = C - TP 66 | # FN - number of scene objects without good match 67 | 68 | gt_bboxes = labels[:, :6] 69 | 70 | num_scene_bboxes = gt_bboxes.shape[0] 71 | conf = pred[:, 6] 72 | 73 | conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6] 74 | num_conf_pred_bboxes = conf_pred_bbox.shape[0] 75 | 76 | # init an array to keep iou between generated and scene bboxes 77 | iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes]) 78 | for g_idx in range(num_conf_pred_bboxes): 79 | for s_idx in range(num_scene_bboxes): 80 | iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :]) 81 | 82 | 83 | good_match_arr = (iou_arr >= iou_thresh) 84 | 85 | TP = good_match_arr.any(axis=1).sum() 86 | FP = num_conf_pred_bboxes - TP 87 | FN = num_scene_bboxes - good_match_arr.any(axis=0).sum() 88 | 89 | return TP, FP, FN 90 | 91 | 92 | def precision_recall(TP, FP, FN): 93 | Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0 94 | Rec = 1.0 * TP / (TP + FN) 95 | return Prec, Rec 96 | 97 | 98 | def calc_iou(box_a, box_b): 99 | """Computes IoU of two axis aligned bboxes. 100 | Args: 101 | box_a, box_b: 6D of center and lengths 102 | Returns: 103 | iou 104 | """ 105 | 106 | max_a = box_a[0:3] + box_a[3:6]/2 107 | max_b = box_b[0:3] + box_b[3:6]/2 108 | min_max = np.array([max_a, max_b]).min(0) 109 | 110 | min_a = box_a[0:3] - box_a[3:6]/2 111 | min_b = box_b[0:3] - box_b[3:6]/2 112 | max_min = np.array([min_a, min_b]).max(0) 113 | if not ((min_max > max_min).all()): 114 | return 0.0 115 | 116 | intersection = (min_max - max_min).prod() 117 | vol_a = box_a[3:6].prod() 118 | vol_b = box_b[3:6].prod() 119 | union = vol_a + vol_b - intersection 120 | return 1.0*intersection / union 121 | 122 | 123 | if __name__ == '__main__': 124 | print('running some tests') 125 | 126 | ############ 127 | ## Test IoU 128 | ############ 129 | box_a = np.array([0,0,0,1,1,1]) 130 | box_b = np.array([0,0,0,2,2,2]) 131 | expected_iou = 1.0/8 132 | pred_iou = calc_iou(box_a, box_b) 133 | assert expected_iou == pred_iou, 'function returned wrong IoU' 134 | 135 | box_a = np.array([0,0,0,1,1,1]) 136 | box_b = np.array([10,10,10,2,2,2]) 137 | expected_iou = 0.0 138 | pred_iou = calc_iou(box_a, box_b) 139 | assert expected_iou == pred_iou, 'function returned wrong IoU' 140 | 141 | print('IoU test -- PASSED') 142 | 143 | ######################### 144 | ## Test Precition Recall 145 | ######################### 146 | gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]]) 147 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]]) 148 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 149 | assert TP == 2 and FP == 0 and FN == 0 150 | assert precision_recall(TP, FP, FN) == (1, 1) 151 | 152 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0]]) 153 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 154 | assert TP == 1 and FP == 0 and FN == 1 155 | assert precision_recall(TP, FP, FN) == (1, 0.5) 156 | 157 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]]) 158 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 159 | assert TP == 1 and FP == 1 and FN == 1 160 | assert precision_recall(TP, FP, FN) == (0.5, 0.5) 161 | 162 | # wrong box has low confidence 163 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]]) 164 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 165 | assert TP == 1 and FP == 0 and FN == 1 166 | assert precision_recall(TP, FP, FN) == (1, 0.5) 167 | 168 | print('Precition Recall test -- PASSED') 169 | 170 | -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils.pc_utils import bbox_corner_dist_measure 3 | 4 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 5 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 6 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 7 | ''' 8 | def nms_2d(boxes, overlap_threshold): 9 | x1 = boxes[:,0] 10 | y1 = boxes[:,1] 11 | x2 = boxes[:,2] 12 | y2 = boxes[:,3] 13 | score = boxes[:,4] 14 | area = (x2-x1)*(y2-y1) 15 | 16 | I = np.argsort(score) 17 | pick = [] 18 | while (I.size!=0): 19 | last = I.size 20 | i = I[-1] 21 | pick.append(i) 22 | suppress = [last-1] 23 | for pos in range(last-1): 24 | j = I[pos] 25 | xx1 = max(x1[i],x1[j]) 26 | yy1 = max(y1[i],y1[j]) 27 | xx2 = min(x2[i],x2[j]) 28 | yy2 = min(y2[i],y2[j]) 29 | w = xx2-xx1 30 | h = yy2-yy1 31 | if (w>0 and h>0): 32 | o = w*h/area[j] 33 | print('Overlap is', o) 34 | if (o>overlap_threshold): 35 | suppress.append(pos) 36 | I = np.delete(I,suppress) 37 | return pick 38 | 39 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 40 | x1 = boxes[:,0] 41 | y1 = boxes[:,1] 42 | x2 = boxes[:,2] 43 | y2 = boxes[:,3] 44 | score = boxes[:,4] 45 | area = (x2-x1)*(y2-y1) 46 | 47 | I = np.argsort(score) 48 | pick = [] 49 | while (I.size!=0): 50 | last = I.size 51 | i = I[-1] 52 | pick.append(i) 53 | 54 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 55 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 56 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 57 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 58 | 59 | w = np.maximum(0, xx2-xx1) 60 | h = np.maximum(0, yy2-yy1) 61 | 62 | if old_type: 63 | o = (w*h)/area[I[:last-1]] 64 | else: 65 | inter = w*h 66 | o = inter / (area[i] + area[I[:last-1]] - inter) 67 | 68 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 69 | 70 | return pick 71 | 72 | def nms_3d_faster(boxes, overlap_threshold, old_type=False): 73 | x1 = boxes[:,0] 74 | y1 = boxes[:,1] 75 | z1 = boxes[:,2] 76 | x2 = boxes[:,3] 77 | y2 = boxes[:,4] 78 | z2 = boxes[:,5] 79 | score = boxes[:,6] 80 | area = (x2-x1)*(y2-y1)*(z2-z1) 81 | 82 | I = np.argsort(score) 83 | pick = [] 84 | while (I.size!=0): 85 | last = I.size 86 | i = I[-1] 87 | pick.append(i) 88 | 89 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 90 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 91 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 92 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 93 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 94 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 95 | 96 | l = np.maximum(0, xx2-xx1) 97 | w = np.maximum(0, yy2-yy1) 98 | h = np.maximum(0, zz2-zz1) 99 | 100 | if old_type: 101 | o = (l*w*h)/area[I[:last-1]] 102 | else: 103 | inter = l*w*h 104 | o = inter / (area[i] + area[I[:last-1]] - inter) 105 | 106 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 107 | 108 | return pick 109 | 110 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False): 111 | x1 = boxes[:,0] 112 | y1 = boxes[:,1] 113 | z1 = boxes[:,2] 114 | x2 = boxes[:,3] 115 | y2 = boxes[:,4] 116 | z2 = boxes[:,5] 117 | score = boxes[:,6] 118 | cls = boxes[:,7] 119 | area = (x2-x1)*(y2-y1)*(z2-z1) 120 | 121 | I = np.argsort(score) 122 | pick = [] 123 | while (I.size!=0): 124 | last = I.size 125 | i = I[-1] 126 | pick.append(i) 127 | 128 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 129 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 130 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 131 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 132 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 133 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 134 | cls1 = cls[i] 135 | cls2 = cls[I[:last-1]] 136 | 137 | l = np.maximum(0, xx2-xx1) 138 | w = np.maximum(0, yy2-yy1) 139 | h = np.maximum(0, zz2-zz1) 140 | 141 | if old_type: 142 | o = (l*w*h)/area[I[:last-1]] 143 | else: 144 | inter = l*w*h 145 | o = inter / (area[i] + area[I[:last-1]] - inter) 146 | o = o * (cls1==cls2) 147 | 148 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 149 | 150 | return pick 151 | 152 | 153 | def nms_crnr_dist(boxes, conf, overlap_threshold): 154 | 155 | I = np.argsort(conf) 156 | pick = [] 157 | while (I.size!=0): 158 | last = I.size 159 | i = I[-1] 160 | pick.append(i) 161 | 162 | scores = [] 163 | for ind in I[:-1]: 164 | scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :])) 165 | 166 | I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0]))) 167 | 168 | return pick 169 | 170 | if __name__=='__main__': 171 | a = np.random.random((100,5)) 172 | print(nms_2d(a,0.9)) 173 | print(nms_2d_faster(a,0.9)) 174 | -------------------------------------------------------------------------------- /utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chamfer distance in Pytorch. 3 | Author: Charles R. Qi 4 | 5 | From: https://github.com/facebookresearch/votenet/blob/master/utils/nn_distance.py 6 | """ 7 | 8 | import torch 9 | import torch.nn as nn 10 | import numpy as np 11 | 12 | 13 | def huber_loss(error, delta=1.0): 14 | """ 15 | Args: 16 | error: Torch tensor (d1,d2,...,dk) 17 | Returns: 18 | loss: Torch tensor (d1,d2,...,dk) 19 | 20 | x = error = pred - gt or dist(pred,gt) 21 | 0.5 * |x|^2 if |x|<=d 22 | 0.5 * d^2 + d * (|x|-d) if |x|>d 23 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 24 | """ 25 | abs_error = torch.abs(error) 26 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 27 | quadratic = torch.clamp(abs_error, max=delta) 28 | linear = (abs_error - quadratic) 29 | loss = 0.5 * quadratic**2 + delta * linear 30 | return loss 31 | 32 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 33 | """ 34 | Input: 35 | pc1: (B,N,C) torch tensor 36 | pc2: (B,M,C) torch tensor 37 | l1smooth: bool, whether to use l1smooth loss 38 | delta: scalar, the delta used in l1smooth loss 39 | Output: 40 | dist1: (B,N) torch float32 tensor 41 | idx1: (B,N) torch int64 tensor 42 | dist2: (B,M) torch float32 tensor 43 | idx2: (B,M) torch int64 tensor 44 | """ 45 | N = pc1.shape[1] 46 | M = pc2.shape[1] 47 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 48 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 49 | pc_diff = pc1_expand_tile - pc2_expand_tile 50 | 51 | if l1smooth: 52 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 53 | elif l1: 54 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 55 | else: 56 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 57 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 58 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 59 | return dist1, idx1, dist2, idx2 60 | 61 | def demo_nn_distance(): 62 | np.random.seed(0) 63 | pc1arr = np.random.random((1,5,3)) 64 | pc2arr = np.random.random((1,6,3)) 65 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 66 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 67 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 68 | print(dist1) 69 | print(idx1) 70 | dist = np.zeros((5,6)) 71 | for i in range(5): 72 | for j in range(6): 73 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 74 | print(dist) 75 | print('-'*30) 76 | print('L1smooth dists:') 77 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 78 | print(dist1) 79 | print(idx1) 80 | dist = np.zeros((5,6)) 81 | for i in range(5): 82 | for j in range(6): 83 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 84 | quad = np.minimum(error, 1.0) 85 | linear = error - quad 86 | loss = 0.5*quad**2 + 1.0*linear 87 | dist[i,j] = np.sum(loss) 88 | print(dist) 89 | 90 | 91 | if __name__ == '__main__': 92 | demo_nn_distance() 93 | --------------------------------------------------------------------------------