├── .gitignore
├── LICENSE
├── README.md
├── benchmark
    ├── eval.py
    └── predict.py
├── data
    └── scannet
    │   ├── README.md
    │   ├── batch_load_scannet_data.py
    │   ├── load_scannet_data.py
    │   ├── meta_data
    │       ├── nyu40_labels.csv
    │       ├── scannet_means.npz
    │       ├── scannet_reference_means.npz
    │       ├── scannetv2-labels.combined.tsv
    │       ├── scannetv2.txt
    │       ├── scannetv2_test.txt
    │       ├── scannetv2_train.txt
    │       └── scannetv2_val.txt
    │   ├── model_util_scannet.py
    │   ├── scannet_utils.py
    │   └── visualize.py
├── demo
    └── ScanRefer.gif
├── docs
    ├── browser.png
    ├── davezchen_eccv2020_scanrefer.pdf
    ├── index.html
    ├── paper.jpg
    ├── teaser.png
    └── w3.css
├── lib
    ├── ap_helper.py
    ├── config.py
    ├── dataset.py
    ├── enet.py
    ├── eval_helper.py
    ├── loss.py
    ├── loss_helper.py
    ├── pointnet2
    │   ├── _ext_src
    │   │   ├── include
    │   │   │   ├── ball_query.h
    │   │   │   ├── cuda_utils.h
    │   │   │   ├── group_points.h
    │   │   │   ├── interpolate.h
    │   │   │   ├── sampling.h
    │   │   │   └── utils.h
    │   │   └── src
    │   │   │   ├── ball_query.cpp
    │   │   │   ├── ball_query_gpu.cu
    │   │   │   ├── bindings.cpp
    │   │   │   ├── group_points.cpp
    │   │   │   ├── group_points_gpu.cu
    │   │   │   ├── interpolate.cpp
    │   │   │   ├── interpolate_gpu.cu
    │   │   │   ├── sampling.cpp
    │   │   │   └── sampling_gpu.cu
    │   ├── _version.py
    │   ├── pointnet2_modules.py
    │   ├── pointnet2_test.py
    │   ├── pointnet2_utils.py
    │   ├── pytorch_utils.py
    │   └── setup.py
    ├── projection.py
    └── solver.py
├── models
    ├── backbone_module.py
    ├── lang_module.py
    ├── match_module.py
    ├── proposal_module.py
    ├── refnet.py
    └── voting_module.py
├── requirements.txt
├── scripts
    ├── compute_multiview_features.py
    ├── eval.py
    ├── project_multiview_features.py
    ├── project_multiview_labels.py
    ├── train.py
    └── visualize.py
└── utils
    ├── box_util.py
    ├── eta.py
    ├── eval_det.py
    ├── metric_util.py
    ├── nms.py
    ├── nn_distance.py
    └── pc_utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # dataset
 2 | data/scanrefer*
 3 | data/ScanRefer*
 4 | data/glove*
 5 | data/scannet/scannet_data
 6 | data/scannet/scans
 7 | data/scannetv2_enet.pth
 8 | 
 9 | # cache
10 | data/scannet/__pycache__
11 | lib/__pycache__
12 | lib/pointnet2/__pycache__
13 | models/__pycache__
14 | utils/__pycache__
15 | .DS_Store
16 | 
17 | # pointnet2
18 | lib/pointnet2/build/
19 | lib/pointnet2/dist/
20 | lib/pointnet2/pointnet2.egg-info/
21 | 
22 | # output
23 | outputs/
24 | 
25 | # delete
26 | docs/.DS_Store
27 | demo/.DS_Store
28 | 
29 | # misc
30 | upload/


--------------------------------------------------------------------------------
/benchmark/eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import argparse
  5 | 
  6 | import numpy as np
  7 | 
  8 | from tqdm import tqdm
  9 | 
 10 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 11 | from lib.config import CONF
 12 | from utils.box_util import box3d_iou
 13 | 
 14 | SCANREFER_GT = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_test_gt_bbox.json")))
 15 | 
 16 | def organize_gt():
 17 |     organized = {}
 18 | 
 19 |     for data in SCANREFER_GT:
 20 |         scene_id = data["scene_id"]
 21 |         object_id = data["object_id"]
 22 |         ann_id = data["ann_id"]
 23 | 
 24 |         if scene_id not in organized:
 25 |             organized[scene_id] = {}
 26 | 
 27 |         if object_id not in organized[scene_id]:
 28 |             organized[scene_id][object_id] = {}
 29 | 
 30 |         if ann_id not in organized[scene_id][object_id]:
 31 |             organized[scene_id][object_id][ann_id] = {}
 32 | 
 33 |         organized[scene_id][object_id][ann_id] = data
 34 | 
 35 |     return organized
 36 | 
 37 | def evaluate(args):
 38 |     pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json")
 39 |     if not os.path.isfile(pred_path):
 40 |         print("please run `benchmark/predict.py` first to generate bounding boxes")
 41 |         exit()    
 42 | 
 43 |     organized_gt = organize_gt()
 44 | 
 45 |     with open(pred_path) as f:
 46 |         predictions = json.load(f)
 47 |         ious = []
 48 |         masks = []
 49 |         others = []
 50 |         print("evaluating...")
 51 |         for data in tqdm(predictions):
 52 |             scene_id = data["scene_id"]
 53 |             object_id = data["object_id"]
 54 |             ann_id = data["ann_id"]
 55 |             pred_bbox = np.array(data["bbox"])
 56 |             mask = data["unique_multiple"]
 57 |             other = data["others"]
 58 | 
 59 |             try:
 60 |                 gt_bbox = np.array(organized_gt[scene_id][object_id][ann_id]["bbox"])
 61 |                 # iou, _ = box3d_iou(pred_bbox, gt_bbox)
 62 |                 iou = box3d_iou(pred_bbox, gt_bbox)
 63 |                 
 64 |             except KeyError:
 65 |                 iou = 0
 66 | 
 67 |             ious.append(iou)
 68 |             masks.append(mask)
 69 |             others.append(other)
 70 | 
 71 |         # ious = np.array(ious)
 72 |         # iou_rate_025 = ious[ious >= 0.25].shape[0] / ious.shape[0]
 73 |         # iou_rate_05 = ious[ious >= 0.5].shape[0] / ious.shape[0]
 74 | 
 75 |         # print("\nAcc@0.25IoU: {}".format(iou_rate_025))
 76 |         # print("Acc@0.5IoU: {}".format(iou_rate_05))
 77 | 
 78 |         ious = np.array(ious)
 79 |         masks = np.array(masks)
 80 |         others = np.array(others)
 81 | 
 82 |         multiple_dict = {
 83 |             "unique": 0,
 84 |             "multiple": 1
 85 |         }
 86 |         others_dict = {
 87 |             "not_in_others": 0,
 88 |             "in_others": 1
 89 |         }
 90 | 
 91 |         # evaluation stats
 92 |         stats = {k: np.sum(masks == v) for k, v in multiple_dict.items()}
 93 |         stats["overall"] = masks.shape[0]
 94 |         stats = {}
 95 |         for k, v in multiple_dict.items():
 96 |             stats[k] = {}
 97 |             for k_o, v_o in others_dict.items():
 98 |                 stats[k][k_o] = np.sum(np.logical_and(masks == v, others == v_o))
 99 | 
100 |             stats[k]["overall"] = np.sum(masks == v)
101 | 
102 |         stats["overall"] = {}
103 |         for k_o, v_o in others_dict.items():
104 |             stats["overall"][k_o] = np.sum(others == v_o)
105 |         
106 |         stats["overall"]["overall"] = masks.shape[0]
107 | 
108 |         # aggregate scores
109 |         scores = {}
110 |         for k, v in multiple_dict.items():
111 |             for k_o in others_dict.keys():
112 |                 acc_025iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.25)].shape[0] \
113 |                     / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \
114 |                     if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0
115 |                 acc_05iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.5)].shape[0] \
116 |                     / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \
117 |                     if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0
118 | 
119 |                 if k not in scores:
120 |                     scores[k] = {k_o: {} for k_o in others_dict.keys()}
121 | 
122 |                 scores[k][k_o]["acc@0.25iou"] = acc_025iou
123 |                 scores[k][k_o]["acc@0.5iou"] = acc_05iou
124 | 
125 |             acc_025iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.25)].shape[0] \
126 |                 / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0
127 |             acc_05iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.5)].shape[0] \
128 |                 / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0
129 | 
130 |             scores[k]["overall"] = {}
131 |             scores[k]["overall"]["acc@0.25iou"] = acc_025iou
132 |             scores[k]["overall"]["acc@0.5iou"] = acc_05iou
133 | 
134 |         scores["overall"] = {}
135 |         for k_o in others_dict.keys():
136 |             acc_025iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.25)].shape[0] \
137 |                 / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0
138 |             acc_05iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.5)].shape[0] \
139 |                 / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0
140 | 
141 |             # aggregate
142 |             scores["overall"][k_o] = {}
143 |             scores["overall"][k_o]["acc@0.25iou"] = acc_025iou
144 |             scores["overall"][k_o]["acc@0.5iou"] = acc_05iou
145 |         
146 |         acc_025iou = ious[ious >= 0.25].shape[0] / ious.shape[0]
147 |         acc_05iou = ious[ious >= 0.5].shape[0] / ious.shape[0]
148 | 
149 | 
150 |         # aggregate
151 |         scores["overall"]["overall"] = {}
152 |         scores["overall"]["overall"]["acc@0.25iou"] = acc_025iou
153 |         scores["overall"]["overall"]["acc@0.5iou"] = acc_05iou
154 | 
155 |         # report
156 |         print("\nstats:")
157 |         for k_s in stats.keys():
158 |             for k_o in stats[k_s].keys():
159 |                 print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o]))
160 | 
161 |         for k_s in scores.keys():
162 |             print("\n{}:".format(k_s))
163 |             for k_m in scores[k_s].keys():
164 |                 for metric in scores[k_s][k_m].keys():
165 |                     print("{} | {} | {}: {}".format(k_s, k_m, metric, scores[k_s][k_m][metric]))
166 | 
167 | if __name__ == "__main__":
168 |     parser = argparse.ArgumentParser()
169 |     parser.add_argument("--folder", type=str, help="Folder containing the model")
170 |     args = parser.parse_args()
171 | 
172 |     evaluate(args)


--------------------------------------------------------------------------------
/benchmark/predict.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import pickle
  5 | import argparse
  6 | import importlib
  7 | import torch
  8 | import torch.optim as optim
  9 | import torch.nn as nn
 10 | import numpy as np
 11 | 
 12 | from torch.utils.data import DataLoader
 13 | from datetime import datetime
 14 | from tqdm import tqdm
 15 | from copy import deepcopy
 16 | 
 17 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 18 | from lib.config import CONF
 19 | from lib.dataset import ScannetReferenceDataset
 20 | from lib.solver import Solver
 21 | from lib.ap_helper import APCalculator, parse_predictions, parse_groundtruths
 22 | from lib.loss_helper import get_loss
 23 | from lib.eval_helper import get_eval
 24 | from models.refnet import RefNet
 25 | from utils.box_util import get_3d_box
 26 | from data.scannet.model_util_scannet import ScannetDatasetConfig
 27 | 
 28 | SCANREFER_TEST = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_test.json")))
 29 | 
 30 | def get_dataloader(args, scanrefer, all_scene_list, split, config):
 31 |     dataset = ScannetReferenceDataset(
 32 |         scanrefer=scanrefer, 
 33 |         scanrefer_all_scene=all_scene_list, 
 34 |         split=split, 
 35 |         num_points=args.num_points, 
 36 |         use_color=args.use_color, 
 37 |         use_height=(not args.no_height),
 38 |         use_normal=args.use_normal, 
 39 |         use_multiview=args.use_multiview
 40 |     )
 41 |     print("predict for {} samples".format(len(dataset)))
 42 | 
 43 |     dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)
 44 | 
 45 |     return dataset, dataloader
 46 | 
 47 | def get_model(args, config):
 48 |     # load model
 49 |     input_channels = int(args.use_multiview) * 128 + int(args.use_normal) * 3 + int(args.use_color) * 3 + int(not args.no_height)
 50 |     model = RefNet(
 51 |         num_class=config.num_class,
 52 |         num_heading_bin=config.num_heading_bin,
 53 |         num_size_cluster=config.num_size_cluster,
 54 |         mean_size_arr=config.mean_size_arr,
 55 |         num_proposal=args.num_proposals,
 56 |         input_feature_dim=input_channels,
 57 |         use_lang_classifier=(not args.no_lang_cls),
 58 |         use_bidir=args.use_bidir
 59 |     ).cuda()
 60 | 
 61 |     model_name = "model.pth"
 62 |     path = os.path.join(CONF.PATH.OUTPUT, args.folder, model_name)
 63 |     model.load_state_dict(torch.load(path), strict=False)
 64 |     model.eval()
 65 | 
 66 |     return model
 67 | 
 68 | def get_scannet_scene_list(split):
 69 |     scene_list = sorted([line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET_META, "scannetv2_{}.txt".format(split)))])
 70 | 
 71 |     return scene_list
 72 | 
 73 | def get_scanrefer(args):
 74 |     scanrefer = SCANREFER_TEST
 75 |     scene_list = sorted(list(set([data["scene_id"] for data in scanrefer])))
 76 |     scanrefer = [data for data in scanrefer if data["scene_id"] in scene_list]
 77 | 
 78 |     return scanrefer, scene_list
 79 | 
 80 | def predict(args):
 81 |     print("predict bounding boxes...")
 82 |     # constant
 83 |     DC = ScannetDatasetConfig()
 84 | 
 85 |     # init training dataset
 86 |     print("preparing data...")
 87 |     scanrefer, scene_list = get_scanrefer(args)
 88 | 
 89 |     # dataloader
 90 |     _, dataloader = get_dataloader(args, scanrefer, scene_list, "test", DC)
 91 | 
 92 |     # model
 93 |     model = get_model(args, DC)
 94 | 
 95 |     # config
 96 |     POST_DICT = {
 97 |         "remove_empty_box": True, 
 98 |         "use_3d_nms": True, 
 99 |         "nms_iou": 0.25,
100 |         "use_old_type_nms": False, 
101 |         "cls_nms": True, 
102 |         "per_class_proposal": True,
103 |         "conf_thresh": 0.05,
104 |         "dataset_config": DC
105 |     } if not args.no_nms else None
106 | 
107 |     # predict
108 |     print("predicting...")
109 |     pred_bboxes = []
110 |     for data_dict in tqdm(dataloader):
111 |         for key in data_dict:
112 |             data_dict[key] = data_dict[key].cuda()
113 | 
114 |         # feed
115 |         data_dict = model(data_dict)
116 |         _, data_dict = get_loss(
117 |             data_dict=data_dict, 
118 |             config=DC, 
119 |             detection=False,
120 |             reference=True
121 |         )
122 | 
123 |         objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long()
124 | 
125 |         if POST_DICT:
126 |             _ = parse_predictions(data_dict, POST_DICT)
127 |             nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()
128 | 
129 |             # construct valid mask
130 |             pred_masks = (nms_masks * objectness_preds_batch == 1).float()
131 |         else:
132 |             # construct valid mask
133 |             pred_masks = (objectness_preds_batch == 1).float()
134 | 
135 |         pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks, 1) # (B,)
136 |         pred_center = data_dict['center'] # (B,K,3)
137 |         pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal
138 |         pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
139 |         pred_heading_class = pred_heading_class # B,num_proposal
140 |         pred_heading_residual = pred_heading_residual.squeeze(2) # B,num_proposal
141 |         pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal
142 |         pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
143 |         pred_size_class = pred_size_class
144 |         pred_size_residual = pred_size_residual.squeeze(2) # B,num_proposal,3
145 | 
146 |         for i in range(pred_ref.shape[0]):
147 |             # compute the iou
148 |             pred_ref_idx = pred_ref[i]
149 |             pred_obb = DC.param2obb(
150 |                 pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(), 
151 |                 pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(), 
152 |                 pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(),
153 |                 pred_size_class[i, pred_ref_idx].detach().cpu().numpy(), 
154 |                 pred_size_residual[i, pred_ref_idx].detach().cpu().numpy()
155 |             )
156 |             pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
157 | 
158 |             # construct the multiple mask
159 |             multiple = data_dict["unique_multiple"][i].item()
160 | 
161 |             # construct the others mask
162 |             others = 1 if data_dict["object_cat"][i] == 17 else 0
163 | 
164 |             # store data
165 |             scanrefer_idx = data_dict["scan_idx"][i].item()
166 |             pred_data = {
167 |                 "scene_id": scanrefer[scanrefer_idx]["scene_id"],
168 |                 "object_id": scanrefer[scanrefer_idx]["object_id"],
169 |                 "ann_id": scanrefer[scanrefer_idx]["ann_id"],
170 |                 "bbox": pred_bbox.tolist(),
171 |                 "unique_multiple": multiple,
172 |                 "others": others
173 |             }
174 |             pred_bboxes.append(pred_data)
175 | 
176 |     # dump
177 |     print("dumping...")
178 |     pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json")
179 |     with open(pred_path, "w") as f:
180 |         json.dump(pred_bboxes, f, indent=4)
181 | 
182 |     print("done!")
183 | 
184 | if __name__ == "__main__":
185 |     parser = argparse.ArgumentParser()
186 |     parser.add_argument("--folder", type=str, help="Folder containing the model")
187 |     parser.add_argument("--gpu", type=str, help="gpu", default="0")
188 |     parser.add_argument("--batch_size", type=int, help="batch size", default=8)
189 |     parser.add_argument("--num_points", type=int, default=40000, help="Point Number [default: 40000]")
190 |     parser.add_argument("--num_proposals", type=int, default=256, help="Proposal number [default: 256]")
191 |     parser.add_argument("--seed", type=int, default=42, help="random seed")
192 |     parser.add_argument("--no_height", action="store_true", help="Do NOT use height signal in input.")
193 |     parser.add_argument("--no_lang_cls", action="store_true", help="Do NOT use language classifier.")
194 |     parser.add_argument("--no_nms", action="store_true", help="do NOT use non-maximum suppression for post-processing.")
195 |     parser.add_argument("--use_color", action="store_true", help="Use RGB color in input.")
196 |     parser.add_argument("--use_normal", action="store_true", help="Use RGB color in input.")
197 |     parser.add_argument("--use_multiview", action="store_true", help="Use multiview images.")
198 |     parser.add_argument("--use_bidir", action="store_true", help="Use bi-directional GRU.")
199 |     args = parser.parse_args()
200 | 
201 |     # setting
202 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
203 |     os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
204 | 
205 |     # reproducibility
206 |     torch.manual_seed(args.seed)
207 |     torch.backends.cudnn.deterministic = True
208 |     torch.backends.cudnn.benchmark = False
209 |     np.random.seed(args.seed)
210 | 
211 |     predict(args)
212 | 


--------------------------------------------------------------------------------
/data/scannet/README.md:
--------------------------------------------------------------------------------
 1 | # ScanNet Instructions
 2 | 
 3 | To acquire the access to ScanNet dataset, Please refer to the [ScanNet project page](https://github.com/ScanNet/ScanNet) and follow the instructions there. You will get a `download-scannet.py` script after your request for the ScanNet dataset is approved. Note that only a subset of ScanNet is needed. Once you get `download-scannet.py`, please use the commands below to download the portion of ScanNet that is necessary for ScanRefer:
 4 | 
 5 | ```shell
 6 | python2 download-scannet.py -o data/scannet --type _vh_clean_2.ply
 7 | python2 download-scannet.py -o data/scannet --type .aggregation.json
 8 | python2 download-scannet.py -o data/scannet --type _vh_clean_2.0.010000.segs.json
 9 | python2 download-scannet.py -o data/scannet --type .txt
10 | ```
11 | Roughly 10.6GB free space is needed on your disk.
12 | 


--------------------------------------------------------------------------------
/data/scannet/batch_load_scannet_data.py:
--------------------------------------------------------------------------------
 1 | """ 
 2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/batch_load_scannet_data.py
 3 | 
 4 | Batch mode in loading Scannet scenes with vertices and ground truth labels for semantic and instance segmentations
 5 | 
 6 | Usage example: python ./batch_load_scannet_data.py
 7 | """
 8 | 
 9 | import os
10 | import datetime
11 | import numpy as np
12 | from load_scannet_data import export
13 | from multiprocessing import Pool
14 | 
15 | 
16 | SCANNET_DIR = 'scans'
17 | SCAN_NAMES = sorted([line.rstrip() for line in open('meta_data/scannetv2.txt')])
18 | LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv'
19 | DONOTCARE_CLASS_IDS = np.array([])
20 | OBJ_CLASS_IDS = np.array([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]) # exclude wall (1), floor (2), ceiling (22)
21 | MAX_NUM_POINT = 50000
22 | OUTPUT_FOLDER = './scannet_data'
23 | 
24 | def export_one_scan(scan_name):
25 |     output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name)
26 |     mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply')
27 |     agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json')
28 |     seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json')
29 |     meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans.   
30 |     mesh_vertices, aligned_vertices, semantic_labels, instance_labels, instance_bboxes, aligned_instance_bboxes = export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None)
31 | 
32 |     mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
33 |     mesh_vertices = mesh_vertices[mask,:]
34 |     aligned_vertices = aligned_vertices[mask,:]
35 |     semantic_labels = semantic_labels[mask]
36 |     instance_labels = instance_labels[mask]
37 | 
38 |     if instance_bboxes.shape[0] > 1:
39 |         num_instances = len(np.unique(instance_labels))
40 |         print('Num of instances: ', num_instances)
41 | 
42 |         # bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS)
43 |         bbox_mask = np.in1d(instance_bboxes[:,-2], OBJ_CLASS_IDS) # match the mesh2cap
44 |         instance_bboxes = instance_bboxes[bbox_mask,:]
45 |         aligned_instance_bboxes = aligned_instance_bboxes[bbox_mask,:]
46 |         print('Num of care instances: ', instance_bboxes.shape[0])
47 |     else:
48 |         print("No semantic/instance annotation for test scenes")
49 | 
50 |     N = mesh_vertices.shape[0]
51 |     if N > MAX_NUM_POINT:
52 |         choices = np.random.choice(N, MAX_NUM_POINT, replace=False)
53 |         mesh_vertices = mesh_vertices[choices, :]
54 |         aligned_vertices = aligned_vertices[choices, :]
55 |         semantic_labels = semantic_labels[choices]
56 |         instance_labels = instance_labels[choices]
57 | 
58 |     print("Shape of points: {}".format(mesh_vertices.shape))
59 | 
60 |     np.save(output_filename_prefix+'_vert.npy', mesh_vertices)
61 |     np.save(output_filename_prefix+'_aligned_vert.npy', aligned_vertices)
62 |     np.save(output_filename_prefix+'_sem_label.npy', semantic_labels)
63 |     np.save(output_filename_prefix+'_ins_label.npy', instance_labels)
64 |     np.save(output_filename_prefix+'_bbox.npy', instance_bboxes)
65 |     np.save(output_filename_prefix+'_aligned_bbox.npy', aligned_instance_bboxes)
66 | 
67 | def batch_export():
68 | 
69 |     if not os.path.exists(OUTPUT_FOLDER):
70 |         print('Creating new data folder: {}'.format(OUTPUT_FOLDER))                
71 |         os.mkdir(OUTPUT_FOLDER)        
72 |     
73 |     with Pool() as pool:
74 |         pool.map(export_one_scan, SCAN_NAMES)
75 | 
76 | if __name__=='__main__':    
77 |     batch_export()
78 | 


--------------------------------------------------------------------------------
/data/scannet/load_scannet_data.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py
  3 | 
  4 | Load Scannet scenes with vertices and ground truth labels for semantic and instance segmentations
  5 | """
  6 | 
  7 | # python imports
  8 | import math
  9 | import os, sys, argparse
 10 | import inspect
 11 | import json
 12 | import pdb
 13 | import numpy as np
 14 | import scannet_utils
 15 | 
 16 | def read_aggregation(filename):
 17 |     object_id_to_segs = {}
 18 |     label_to_segs = {}
 19 |     with open(filename) as f:
 20 |         data = json.load(f)
 21 |         num_objects = len(data['segGroups'])
 22 |         for i in range(num_objects):
 23 |             object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed
 24 |             label = data['segGroups'][i]['label']
 25 |             segs = data['segGroups'][i]['segments']
 26 |             object_id_to_segs[object_id] = segs
 27 |             if label in label_to_segs:
 28 |                 label_to_segs[label].extend(segs)
 29 |             else:
 30 |                 label_to_segs[label] = segs
 31 |     return object_id_to_segs, label_to_segs
 32 | 
 33 | 
 34 | def read_segmentation(filename):
 35 |     seg_to_verts = {}
 36 |     with open(filename) as f:
 37 |         data = json.load(f)
 38 |         num_verts = len(data['segIndices'])
 39 |         for i in range(num_verts):
 40 |             seg_id = data['segIndices'][i]
 41 |             if seg_id in seg_to_verts:
 42 |                 seg_to_verts[seg_id].append(i)
 43 |             else:
 44 |                 seg_to_verts[seg_id] = [i]
 45 |     return seg_to_verts, num_verts
 46 | 
 47 | 
 48 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None):
 49 |     """ points are XYZ RGB (RGB in 0-255),
 50 |     semantic label as nyu40 ids,
 51 |     instance label as 1-#instance,
 52 |     box as (cx,cy,cz,dx,dy,dz,semantic_label)
 53 |     """
 54 |     label_map = scannet_utils.read_label_mapping(label_map_file, label_from='raw_category', label_to='nyu40id')    
 55 |     # mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file)
 56 |     mesh_vertices = scannet_utils.read_mesh_vertices_rgb_normal(mesh_file)
 57 | 
 58 |     # Load scene axis alignment matrix
 59 |     lines = open(meta_file).readlines()
 60 |     axis_align_matrix = None
 61 |     for line in lines:
 62 |         if 'axisAlignment' in line:
 63 |             axis_align_matrix = [float(x) for x in line.rstrip().strip('axisAlignment = ').split(' ')]
 64 | 
 65 |     if axis_align_matrix != None:
 66 |         axis_align_matrix = np.array(axis_align_matrix).reshape((4,4))
 67 |         pts = np.ones((mesh_vertices.shape[0], 4))
 68 |         pts[:,0:3] = mesh_vertices[:,0:3]
 69 |         pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4
 70 |         aligned_vertices = np.copy(mesh_vertices)
 71 |         aligned_vertices[:,0:3] = pts[:,0:3]
 72 |     else:
 73 |         print("No axis alignment matrix found")
 74 |         aligned_vertices = mesh_vertices
 75 | 
 76 |     # Load semantic and instance labels
 77 |     if os.path.isfile(agg_file):
 78 |         object_id_to_segs, label_to_segs = read_aggregation(agg_file)
 79 |         seg_to_verts, num_verts = read_segmentation(seg_file)
 80 | 
 81 |         label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 82 |         object_id_to_label_id = {}
 83 |         for label, segs in label_to_segs.items():
 84 |             label_id = label_map[label]
 85 |             for seg in segs:
 86 |                 verts = seg_to_verts[seg]
 87 |                 label_ids[verts] = label_id
 88 |         instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 89 |         num_instances = len(np.unique(list(object_id_to_segs.keys())))
 90 |         for object_id, segs in object_id_to_segs.items():
 91 |             for seg in segs:
 92 |                 verts = seg_to_verts[seg]
 93 |                 instance_ids[verts] = object_id
 94 |                 if object_id not in object_id_to_label_id:
 95 |                     object_id_to_label_id[object_id] = label_ids[verts][0]
 96 |         
 97 |         instance_bboxes = np.zeros((num_instances,8)) # also include object id
 98 |         aligned_instance_bboxes = np.zeros((num_instances,8)) # also include object id
 99 |         for obj_id in object_id_to_segs:
100 |             label_id = object_id_to_label_id[obj_id]
101 | 
102 |             # bboxes in the original meshes
103 |             obj_pc = mesh_vertices[instance_ids==obj_id, 0:3]
104 |             if len(obj_pc) == 0: continue
105 |             # Compute axis aligned box
106 |             # An axis aligned bounding box is parameterized by
107 |             # (cx,cy,cz) and (dx,dy,dz) and label id
108 |             # where (cx,cy,cz) is the center point of the box,
109 |             # dx is the x-axis length of the box.
110 |             xmin = np.min(obj_pc[:,0])
111 |             ymin = np.min(obj_pc[:,1])
112 |             zmin = np.min(obj_pc[:,2])
113 |             xmax = np.max(obj_pc[:,0])
114 |             ymax = np.max(obj_pc[:,1])
115 |             zmax = np.max(obj_pc[:,2])
116 |             bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin, label_id, obj_id-1]) # also include object id
117 |             # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
118 |             instance_bboxes[obj_id-1,:] = bbox 
119 | 
120 |             # bboxes in the aligned meshes
121 |             obj_pc = aligned_vertices[instance_ids==obj_id, 0:3]
122 |             if len(obj_pc) == 0: continue
123 |             # Compute axis aligned box
124 |             # An axis aligned bounding box is parameterized by
125 |             # (cx,cy,cz) and (dx,dy,dz) and label id
126 |             # where (cx,cy,cz) is the center point of the box,
127 |             # dx is the x-axis length of the box.
128 |             xmin = np.min(obj_pc[:,0])
129 |             ymin = np.min(obj_pc[:,1])
130 |             zmin = np.min(obj_pc[:,2])
131 |             xmax = np.max(obj_pc[:,0])
132 |             ymax = np.max(obj_pc[:,1])
133 |             zmax = np.max(obj_pc[:,2])
134 |             bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin, label_id, obj_id-1]) # also include object id
135 |             # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
136 |             aligned_instance_bboxes[obj_id-1,:] = bbox 
137 |     else:
138 |         # use zero as placeholders for the test scene
139 |         print("use placeholders")
140 |         num_verts = mesh_vertices.shape[0]
141 |         label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
142 |         instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
143 |         instance_bboxes = np.zeros((1, 8)) # also include object id
144 |         aligned_instance_bboxes = np.zeros((1, 8)) # also include object id
145 | 
146 |     if output_file is not None:
147 |         np.save(output_file+'_vert.npy', mesh_vertices)
148 |         np.save(output_file+'_aligned_vert.npy', aligned_vertices)
149 |         np.save(output_file+'_sem_label.npy', label_ids)
150 |         np.save(output_file+'_ins_label.npy', instance_ids)
151 |         np.save(output_file+'_bbox.npy', instance_bboxes)
152 |         np.save(output_file+'_aligned_bbox.npy', instance_bboxes)
153 | 
154 |     return mesh_vertices, aligned_vertices, label_ids, instance_ids, instance_bboxes, aligned_instance_bboxes
155 | 
156 | def main():
157 |     parser = argparse.ArgumentParser()
158 |     parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00')
159 |     parser.add_argument('--output_file', required=True, help='output file')
160 |     parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv')
161 |     opt = parser.parse_args()
162 | 
163 |     scan_name = os.path.split(opt.scan_path)[-1]
164 |     mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply')
165 |     agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json')
166 |     seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json')
167 |     meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans.
168 |     export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file)
169 | 
170 | if __name__ == '__main__':
171 |     main()
172 | 


--------------------------------------------------------------------------------
/data/scannet/meta_data/nyu40_labels.csv:
--------------------------------------------------------------------------------
 1 | nyu40id,nyu40class,mappedId,mappedIdConsecutive,weight
 2 | 1,wall,(ignore),19,0.0
 3 | 2,floor,(ignore),19,0.0
 4 | 3,cabinet,3,1,3.9644974086960434
 5 | 4,bed,4,2,5.459494152836571
 6 | 5,chair,5,3,2.241522691584157
 7 | 6,sofa,6,4,4.820655512680854
 8 | 7,table,7,5,3.565918577548873
 9 | 8,door,8,6,3.538498341919445
10 | 9,window,9,7,4.636521236560596
11 | 10,bookshelf,10,8,5.445050937449535
12 | 11,picture,11,9,5.079250281008131
13 | 12,counter,12,10,6.2030429647735845
14 | 13,blinds,(ignore),19,0.0
15 | 14,desk,14,11,4.622662494840168
16 | 15,shelves,(ignore),19,0.0
17 | 16,curtain,16,12,5.956294301248057
18 | 17,dresser,(ignore),19,0.0
19 | 18,pillow,(ignore),19,0.0
20 | 19,mirror,(ignore),19,0.0
21 | 20,floor_mat,(ignore),19,0.0
22 | 21,clothes,(ignore),19,0.0
23 | 22,ceiling,(ignore),19,0.0
24 | 23,books,(ignore),19,0.0
25 | 24,refridgerator,24,13,5.459141107819665
26 | 25,television,(ignore),19,0.0
27 | 26,paper,(ignore),19,0.0
28 | 27,towel,(ignore),19,0.0
29 | 28,shower_curtain,28,14,6.724871661883906
30 | 29,box,(ignore),19,0.0
31 | 30,whiteboard,(ignore),19,0.0
32 | 31,person,(ignore),19,0.0
33 | 32,night_stand,(ignore),19,0.0
34 | 33,toilet,33,15,5.832442848923174
35 | 34,sink,34,16,5.064773947290611
36 | 35,lamp,(ignore),19,0.0
37 | 36,bathtub,36,17,6.738988357113375
38 | 37,bag,(ignore),19,0.0
39 | 38,otherstructure,(ignore),19,0.0
40 | 39,otherfurniture,39,18,3.375217918833916
41 | 40,otherprop,(ignore),19,0.0


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/data/scannet/meta_data/scannet_means.npz


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannet_reference_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/data/scannet/meta_data/scannet_reference_means.npz


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0707_00
  2 | scene0708_00
  3 | scene0709_00
  4 | scene0710_00
  5 | scene0711_00
  6 | scene0712_00
  7 | scene0713_00
  8 | scene0714_00
  9 | scene0715_00
 10 | scene0716_00
 11 | scene0717_00
 12 | scene0718_00
 13 | scene0719_00
 14 | scene0720_00
 15 | scene0721_00
 16 | scene0722_00
 17 | scene0723_00
 18 | scene0724_00
 19 | scene0725_00
 20 | scene0726_00
 21 | scene0727_00
 22 | scene0728_00
 23 | scene0729_00
 24 | scene0730_00
 25 | scene0731_00
 26 | scene0732_00
 27 | scene0733_00
 28 | scene0734_00
 29 | scene0735_00
 30 | scene0736_00
 31 | scene0737_00
 32 | scene0738_00
 33 | scene0739_00
 34 | scene0740_00
 35 | scene0741_00
 36 | scene0742_00
 37 | scene0743_00
 38 | scene0744_00
 39 | scene0745_00
 40 | scene0746_00
 41 | scene0747_00
 42 | scene0748_00
 43 | scene0749_00
 44 | scene0750_00
 45 | scene0751_00
 46 | scene0752_00
 47 | scene0753_00
 48 | scene0754_00
 49 | scene0755_00
 50 | scene0756_00
 51 | scene0757_00
 52 | scene0758_00
 53 | scene0759_00
 54 | scene0760_00
 55 | scene0761_00
 56 | scene0762_00
 57 | scene0763_00
 58 | scene0764_00
 59 | scene0765_00
 60 | scene0766_00
 61 | scene0767_00
 62 | scene0768_00
 63 | scene0769_00
 64 | scene0770_00
 65 | scene0771_00
 66 | scene0772_00
 67 | scene0773_00
 68 | scene0774_00
 69 | scene0775_00
 70 | scene0776_00
 71 | scene0777_00
 72 | scene0778_00
 73 | scene0779_00
 74 | scene0780_00
 75 | scene0781_00
 76 | scene0782_00
 77 | scene0783_00
 78 | scene0784_00
 79 | scene0785_00
 80 | scene0786_00
 81 | scene0787_00
 82 | scene0788_00
 83 | scene0789_00
 84 | scene0790_00
 85 | scene0791_00
 86 | scene0792_00
 87 | scene0793_00
 88 | scene0794_00
 89 | scene0795_00
 90 | scene0796_00
 91 | scene0797_00
 92 | scene0798_00
 93 | scene0799_00
 94 | scene0800_00
 95 | scene0801_00
 96 | scene0802_00
 97 | scene0803_00
 98 | scene0804_00
 99 | scene0805_00
100 | scene0806_00
101 | 


--------------------------------------------------------------------------------
/data/scannet/meta_data/scannetv2_val.txt:
--------------------------------------------------------------------------------
  1 | scene0011_00
  2 | scene0011_01
  3 | scene0015_00
  4 | scene0019_00
  5 | scene0019_01
  6 | scene0025_00
  7 | scene0025_01
  8 | scene0025_02
  9 | scene0030_00
 10 | scene0030_01
 11 | scene0030_02
 12 | scene0046_00
 13 | scene0046_01
 14 | scene0046_02
 15 | scene0050_00
 16 | scene0050_01
 17 | scene0050_02
 18 | scene0063_00
 19 | scene0064_00
 20 | scene0064_01
 21 | scene0077_00
 22 | scene0077_01
 23 | scene0081_00
 24 | scene0081_01
 25 | scene0081_02
 26 | scene0084_00
 27 | scene0084_01
 28 | scene0084_02
 29 | scene0086_00
 30 | scene0086_01
 31 | scene0086_02
 32 | scene0088_00
 33 | scene0088_01
 34 | scene0088_02
 35 | scene0088_03
 36 | scene0095_00
 37 | scene0095_01
 38 | scene0100_00
 39 | scene0100_01
 40 | scene0100_02
 41 | scene0131_00
 42 | scene0131_01
 43 | scene0131_02
 44 | scene0139_00
 45 | scene0144_00
 46 | scene0144_01
 47 | scene0146_00
 48 | scene0146_01
 49 | scene0146_02
 50 | scene0149_00
 51 | scene0153_00
 52 | scene0153_01
 53 | scene0164_00
 54 | scene0164_01
 55 | scene0164_02
 56 | scene0164_03
 57 | scene0169_00
 58 | scene0169_01
 59 | scene0187_00
 60 | scene0187_01
 61 | scene0193_00
 62 | scene0193_01
 63 | scene0196_00
 64 | scene0203_00
 65 | scene0203_01
 66 | scene0203_02
 67 | scene0207_00
 68 | scene0207_01
 69 | scene0207_02
 70 | scene0208_00
 71 | scene0217_00
 72 | scene0221_00
 73 | scene0221_01
 74 | scene0222_00
 75 | scene0222_01
 76 | scene0231_00
 77 | scene0231_01
 78 | scene0231_02
 79 | scene0246_00
 80 | scene0249_00
 81 | scene0251_00
 82 | scene0256_00
 83 | scene0256_01
 84 | scene0256_02
 85 | scene0257_00
 86 | scene0277_00
 87 | scene0277_01
 88 | scene0277_02
 89 | scene0278_00
 90 | scene0278_01
 91 | scene0300_00
 92 | scene0300_01
 93 | scene0304_00
 94 | scene0307_00
 95 | scene0307_01
 96 | scene0307_02
 97 | scene0314_00
 98 | scene0316_00
 99 | scene0328_00
100 | scene0329_00
101 | scene0329_01
102 | scene0329_02
103 | scene0334_00
104 | scene0334_01
105 | scene0334_02
106 | scene0338_00
107 | scene0338_01
108 | scene0338_02
109 | scene0342_00
110 | scene0343_00
111 | scene0351_00
112 | scene0351_01
113 | scene0353_00
114 | scene0353_01
115 | scene0353_02
116 | scene0354_00
117 | scene0355_00
118 | scene0355_01
119 | scene0356_00
120 | scene0356_01
121 | scene0356_02
122 | scene0357_00
123 | scene0357_01
124 | scene0377_00
125 | scene0377_01
126 | scene0377_02
127 | scene0378_00
128 | scene0378_01
129 | scene0378_02
130 | scene0382_00
131 | scene0382_01
132 | scene0389_00
133 | scene0406_00
134 | scene0406_01
135 | scene0406_02
136 | scene0412_00
137 | scene0412_01
138 | scene0414_00
139 | scene0423_00
140 | scene0423_01
141 | scene0423_02
142 | scene0426_00
143 | scene0426_01
144 | scene0426_02
145 | scene0426_03
146 | scene0427_00
147 | scene0430_00
148 | scene0430_01
149 | scene0432_00
150 | scene0432_01
151 | scene0435_00
152 | scene0435_01
153 | scene0435_02
154 | scene0435_03
155 | scene0441_00
156 | scene0458_00
157 | scene0458_01
158 | scene0461_00
159 | scene0462_00
160 | scene0474_00
161 | scene0474_01
162 | scene0474_02
163 | scene0474_03
164 | scene0474_04
165 | scene0474_05
166 | scene0488_00
167 | scene0488_01
168 | scene0490_00
169 | scene0494_00
170 | scene0496_00
171 | scene0500_00
172 | scene0500_01
173 | scene0518_00
174 | scene0527_00
175 | scene0535_00
176 | scene0549_00
177 | scene0549_01
178 | scene0550_00
179 | scene0552_00
180 | scene0552_01
181 | scene0553_00
182 | scene0553_01
183 | scene0553_02
184 | scene0558_00
185 | scene0558_01
186 | scene0558_02
187 | scene0559_00
188 | scene0559_01
189 | scene0559_02
190 | scene0565_00
191 | scene0568_00
192 | scene0568_01
193 | scene0568_02
194 | scene0574_00
195 | scene0574_01
196 | scene0574_02
197 | scene0575_00
198 | scene0575_01
199 | scene0575_02
200 | scene0578_00
201 | scene0578_01
202 | scene0578_02
203 | scene0580_00
204 | scene0580_01
205 | scene0583_00
206 | scene0583_01
207 | scene0583_02
208 | scene0591_00
209 | scene0591_01
210 | scene0591_02
211 | scene0593_00
212 | scene0593_01
213 | scene0595_00
214 | scene0598_00
215 | scene0598_01
216 | scene0598_02
217 | scene0599_00
218 | scene0599_01
219 | scene0599_02
220 | scene0606_00
221 | scene0606_01
222 | scene0606_02
223 | scene0607_00
224 | scene0607_01
225 | scene0608_00
226 | scene0608_01
227 | scene0608_02
228 | scene0609_00
229 | scene0609_01
230 | scene0609_02
231 | scene0609_03
232 | scene0616_00
233 | scene0616_01
234 | scene0618_00
235 | scene0621_00
236 | scene0629_00
237 | scene0629_01
238 | scene0629_02
239 | scene0633_00
240 | scene0633_01
241 | scene0643_00
242 | scene0644_00
243 | scene0645_00
244 | scene0645_01
245 | scene0645_02
246 | scene0647_00
247 | scene0647_01
248 | scene0648_00
249 | scene0648_01
250 | scene0651_00
251 | scene0651_01
252 | scene0651_02
253 | scene0652_00
254 | scene0653_00
255 | scene0653_01
256 | scene0655_00
257 | scene0655_01
258 | scene0655_02
259 | scene0658_00
260 | scene0660_00
261 | scene0663_00
262 | scene0663_01
263 | scene0663_02
264 | scene0664_00
265 | scene0664_01
266 | scene0664_02
267 | scene0665_00
268 | scene0665_01
269 | scene0670_00
270 | scene0670_01
271 | scene0671_00
272 | scene0671_01
273 | scene0678_00
274 | scene0678_01
275 | scene0678_02
276 | scene0684_00
277 | scene0684_01
278 | scene0685_00
279 | scene0685_01
280 | scene0685_02
281 | scene0686_00
282 | scene0686_01
283 | scene0686_02
284 | scene0689_00
285 | scene0690_00
286 | scene0690_01
287 | scene0693_00
288 | scene0693_01
289 | scene0693_02
290 | scene0695_00
291 | scene0695_01
292 | scene0695_02
293 | scene0695_03
294 | scene0696_00
295 | scene0696_01
296 | scene0696_02
297 | scene0697_00
298 | scene0697_01
299 | scene0697_02
300 | scene0697_03
301 | scene0699_00
302 | scene0700_00
303 | scene0700_01
304 | scene0700_02
305 | scene0701_00
306 | scene0701_01
307 | scene0701_02
308 | scene0702_00
309 | scene0702_01
310 | scene0702_02
311 | scene0704_00
312 | scene0704_01
313 | 


--------------------------------------------------------------------------------
/data/scannet/model_util_scannet.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/model_util_scannet.py
  3 | """
  4 | 
  5 | import numpy as np
  6 | import sys
  7 | import os
  8 | 
  9 | sys.path.append(os.path.join(os.getcwd(), os.pardir, "lib")) # HACK add the lib folder
 10 | from lib.config import CONF
 11 | from utils.box_util import get_3d_box
 12 | 
 13 | def in_hull(p, hull):
 14 |     from scipy.spatial import Delaunay
 15 |     if not isinstance(hull,Delaunay):
 16 |         hull = Delaunay(hull)
 17 |     return hull.find_simplex(p)>=0
 18 | 
 19 | def extract_pc_in_box3d(pc, box3d):
 20 |     ''' pc: (N,3), box3d: (8,3) '''
 21 |     box3d_roi_inds = in_hull(pc[:,0:3], box3d)
 22 |     return pc[box3d_roi_inds,:], box3d_roi_inds
 23 | 
 24 | def rotate_aligned_boxes(input_boxes, rot_mat):    
 25 |     centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6]    
 26 |     new_centers = np.dot(centers, np.transpose(rot_mat))
 27 |            
 28 |     dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0
 29 |     new_x = np.zeros((dx.shape[0], 4))
 30 |     new_y = np.zeros((dx.shape[0], 4))
 31 |     
 32 |     for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]):        
 33 |         crnrs = np.zeros((dx.shape[0], 3))
 34 |         crnrs[:,0] = crnr[0]*dx
 35 |         crnrs[:,1] = crnr[1]*dy
 36 |         crnrs = np.dot(crnrs, np.transpose(rot_mat))
 37 |         new_x[:,i] = crnrs[:,0]
 38 |         new_y[:,i] = crnrs[:,1]
 39 |     
 40 |     
 41 |     new_dx = 2.0*np.max(new_x, 1)
 42 |     new_dy = 2.0*np.max(new_y, 1)    
 43 |     new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1)
 44 |                   
 45 |     return np.concatenate([new_centers, new_lengths], axis=1)
 46 | 
 47 | def rotate_aligned_boxes_along_axis(input_boxes, rot_mat, axis):    
 48 |     centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6]    
 49 |     new_centers = np.dot(centers, np.transpose(rot_mat))
 50 | 
 51 |     if axis == "x":     
 52 |         d1, d2 = lengths[:,1]/2.0, lengths[:,2]/2.0
 53 |     elif axis == "y":
 54 |         d1, d2 = lengths[:,0]/2.0, lengths[:,2]/2.0
 55 |     else:
 56 |         d1, d2 = lengths[:,0]/2.0, lengths[:,1]/2.0
 57 | 
 58 |     new_1 = np.zeros((d1.shape[0], 4))
 59 |     new_2 = np.zeros((d1.shape[0], 4))
 60 |     
 61 |     for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]):        
 62 |         crnrs = np.zeros((d1.shape[0], 3))
 63 |         crnrs[:,0] = crnr[0]*d1
 64 |         crnrs[:,1] = crnr[1]*d2
 65 |         crnrs = np.dot(crnrs, np.transpose(rot_mat))
 66 |         new_1[:,i] = crnrs[:,0]
 67 |         new_2[:,i] = crnrs[:,1]
 68 |     
 69 |     new_d1 = 2.0*np.max(new_1, 1)
 70 |     new_d2 = 2.0*np.max(new_2, 1)    
 71 | 
 72 |     if axis == "x":     
 73 |         new_lengths = np.stack((lengths[:,0], new_d1, new_d2), axis=1)
 74 |     elif axis == "y":
 75 |         new_lengths = np.stack((new_d1, lengths[:,1], new_d2), axis=1)
 76 |     else:
 77 |         new_lengths = np.stack((new_d1, new_d2, lengths[:,2]), axis=1)
 78 |                   
 79 |     return np.concatenate([new_centers, new_lengths], axis=1)
 80 | 
 81 | class ScannetDatasetConfig(object):
 82 |     def __init__(self):
 83 |         self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5,
 84 |             'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11,
 85 |             'refrigerator':12, 'shower curtain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'others':17}  
 86 |         self.class2type = {self.type2class[t]:t for t in self.type2class}
 87 | 
 88 |         self.nyu40ids = np.array([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]) # exclude wall (1), floor (2), ceiling (22)
 89 |         self.nyu40id2class = self._get_nyu40id2class()
 90 |         self.mean_size_arr = np.load(os.path.join(CONF.PATH.SCANNET, 'meta_data/scannet_reference_means.npz'))['arr_0']
 91 | 
 92 |         self.num_class = len(self.type2class.keys())
 93 |         self.num_heading_bin = 1
 94 |         self.num_size_cluster = len(self.type2class.keys())
 95 | 
 96 |         self.type_mean_size = {}
 97 |         for i in range(self.num_size_cluster):
 98 |             self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:]
 99 | 
100 |     def _get_nyu40id2class(self):
101 |         lines = [line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET, 'meta_data/scannetv2-labels.combined.tsv'))]
102 |         lines = lines[1:]
103 |         nyu40ids2class = {}
104 |         for i in range(len(lines)):
105 |             label_classes_set = set(self.type2class.keys())
106 |             elements = lines[i].split('\t')
107 |             nyu40_id = int(elements[4])
108 |             nyu40_name = elements[7]
109 |             if nyu40_id in self.nyu40ids:
110 |                 if nyu40_name not in label_classes_set:
111 |                     nyu40ids2class[nyu40_id] = self.type2class["others"]
112 |                 else:
113 |                     nyu40ids2class[nyu40_id] = self.type2class[nyu40_name]
114 | 
115 |         return nyu40ids2class
116 | 
117 |     def angle2class(self, angle):
118 |         ''' Convert continuous angle to discrete class
119 |             [optinal] also small regression number from  
120 |             class center angle to current angle.
121 |            
122 |             angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
123 |             return is class of int32 of 0,1,...,N-1 and a number such that
124 |                 class*(2pi/N) + number = angle
125 | 
126 |             NOT USED.
127 |         '''
128 |         assert(False)
129 |     
130 |     def class2angle(self, pred_cls, residual, to_label_format=True):
131 |         ''' Inverse function to angle2class.
132 |         
133 |         As ScanNet only has axis-alined boxes so angles are always 0. '''
134 |         return 0
135 | 
136 |     def class2angle_batch(self, pred_cls, residual, to_label_format=True):
137 |         ''' Inverse function to angle2class.
138 |         
139 |         As ScanNet only has axis-alined boxes so angles are always 0. '''
140 |         return np.zeros(pred_cls.shape[0])
141 | 
142 |     def size2class(self, size, type_name):
143 |         ''' Convert 3D box size (l,w,h) to size class and size residual '''
144 |         size_class = self.type2class[type_name]
145 |         size_residual = size - self.type_mean_size[type_name]
146 |         return size_class, size_residual
147 |     
148 |     def class2size(self, pred_cls, residual):
149 |         ''' Inverse function to size2class '''      
150 |         return self.mean_size_arr[pred_cls] + residual
151 | 
152 |     def class2size_batch(self, pred_cls, residual):
153 |         ''' Inverse function to size2class '''      
154 |         return self.mean_size_arr[pred_cls] + residual
155 | 
156 |     def param2obb(self, center, heading_class, heading_residual, size_class, size_residual):
157 |         heading_angle = self.class2angle(heading_class, heading_residual)
158 |         box_size = self.class2size(int(size_class), size_residual)
159 |         obb = np.zeros((7,))
160 |         obb[0:3] = center
161 |         obb[3:6] = box_size
162 |         obb[6] = heading_angle*-1
163 |         return obb
164 | 
165 |     def param2obb_batch(self, center, heading_class, heading_residual, size_class, size_residual):
166 |         heading_angle = self.class2angle_batch(heading_class, heading_residual)
167 |         box_size = self.class2size_batch(size_class, size_residual)
168 |         obb = np.zeros((heading_class.shape[0], 7))
169 |         obb[:, 0:3] = center
170 |         obb[:, 3:6] = box_size
171 |         obb[:, 6] = heading_angle*-1
172 |         return obb
173 | 


--------------------------------------------------------------------------------
/data/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import json
  8 | import csv
  9 | import numpy as np
 10 | 
 11 | try:
 12 |     from plyfile import PlyData, PlyElement
 13 | except:
 14 |     print("Please install the module 'plyfile' for PLY i/o, e.g.")
 15 |     print("pip install plyfile")
 16 |     sys.exit(-1)
 17 | 
 18 | def normalize_v3(arr):
 19 |     ''' Normalize a numpy array of 3 component vectors shape=(n,3) '''
 20 |     lens = np.sqrt( arr[:,0]**2 + arr[:,1]**2 + arr[:,2]**2 )
 21 |     arr[:,0] /= (lens + 1e-8)
 22 |     arr[:,1] /= (lens + 1e-8)
 23 |     arr[:,2] /= (lens + 1e-8)                
 24 |     return arr
 25 | 
 26 | def compute_normal(vertices, faces):
 27 |     #Create a zeroed array with the same type and shape as our vertices i.e., per vertex normal
 28 |     normals = np.zeros( vertices.shape, dtype=vertices.dtype )
 29 |     #Create an indexed view into the vertex array using the array of three indices for triangles
 30 |     tris = vertices[faces]
 31 |     #Calculate the normal for all the triangles, by taking the cross product of the vectors v1-v0, and v2-v0 in each triangle             
 32 |     n = np.cross( tris[::,1 ] - tris[::,0]  , tris[::,2 ] - tris[::,0] )
 33 |     # n is now an array of normals per triangle. The length of each normal is dependent the vertices, 
 34 |     # we need to normalize these, so that our next step weights each normal equally.
 35 |     normalize_v3(n)
 36 |     # now we have a normalized array of normals, one per triangle, i.e., per triangle normals.
 37 |     # But instead of one per triangle (i.e., flat shading), we add to each vertex in that triangle, 
 38 |     # the triangles' normal. Multiple triangles would then contribute to every vertex, so we need to normalize again afterwards.
 39 |     # The cool part, we can actually add the normals through an indexed view of our (zeroed) per vertex normal array
 40 |     normals[ faces[:,0] ] += n
 41 |     normals[ faces[:,1] ] += n
 42 |     normals[ faces[:,2] ] += n
 43 |     normalize_v3(normals)
 44 |     
 45 |     return normals
 46 | 
 47 | def represents_int(s):
 48 |     ''' if string s represents an int. '''
 49 |     try: 
 50 |         int(s)
 51 |         return True
 52 |     except ValueError:
 53 |         return False
 54 | 
 55 | 
 56 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'):
 57 |     assert os.path.isfile(filename)
 58 |     mapping = dict()
 59 |     with open(filename) as csvfile:
 60 |         reader = csv.DictReader(csvfile, delimiter='\t')
 61 |         for row in reader:
 62 |             mapping[row[label_from]] = int(row[label_to])
 63 |     if represents_int(list(mapping.keys())[0]):
 64 |         mapping = {int(k):v for k,v in mapping.items()}
 65 |     return mapping
 66 | 
 67 | def read_mesh_vertices(filename):
 68 |     """ read XYZ for each vertex.
 69 |     """
 70 |     assert os.path.isfile(filename)
 71 |     with open(filename, 'rb') as f:
 72 |         plydata = PlyData.read(f)
 73 |         num_verts = plydata['vertex'].count
 74 |         vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
 75 |         vertices[:,0] = plydata['vertex'].data['x']
 76 |         vertices[:,1] = plydata['vertex'].data['y']
 77 |         vertices[:,2] = plydata['vertex'].data['z']
 78 |     return vertices
 79 | 
 80 | def read_mesh_vertices_rgb(filename):
 81 |     """ read XYZ RGB for each vertex.
 82 |     Note: RGB values are in 0-255
 83 |     """
 84 |     assert os.path.isfile(filename)
 85 |     with open(filename, 'rb') as f:
 86 |         plydata = PlyData.read(f)
 87 |         num_verts = plydata['vertex'].count
 88 |         vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
 89 |         vertices[:,0] = plydata['vertex'].data['x']
 90 |         vertices[:,1] = plydata['vertex'].data['y']
 91 |         vertices[:,2] = plydata['vertex'].data['z']
 92 |         vertices[:,3] = plydata['vertex'].data['red']
 93 |         vertices[:,4] = plydata['vertex'].data['green']
 94 |         vertices[:,5] = plydata['vertex'].data['blue']
 95 |     return vertices
 96 | 
 97 | def read_mesh_vertices_rgb_normal(filename):
 98 |     """ read XYZ RGB normals point cloud from filename PLY file """
 99 |     assert(os.path.isfile(filename))
100 |     with open(filename, 'rb') as f:
101 |         plydata = PlyData.read(f)
102 |         num_verts = plydata['vertex'].count
103 |         vertices = np.zeros(shape=[num_verts, 9], dtype=np.float32)
104 |         vertices[:,0] = plydata['vertex'].data['x']
105 |         vertices[:,1] = plydata['vertex'].data['y']
106 |         vertices[:,2] = plydata['vertex'].data['z']
107 |         vertices[:,3] = plydata['vertex'].data['red']
108 |         vertices[:,4] = plydata['vertex'].data['green']
109 |         vertices[:,5] = plydata['vertex'].data['blue']
110 | 
111 |         # compute normals
112 |         xyz = np.array([[x, y, z] for x, y, z, _, _, _, _ in plydata["vertex"].data])
113 |         face = np.array([f[0] for f in plydata["face"].data])
114 |         nxnynz = compute_normal(xyz, face)
115 |         vertices[:,6:] = nxnynz
116 |     return vertices
117 | 


--------------------------------------------------------------------------------
/data/scannet/visualize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | import numpy as np
 5 | 
 6 | if __name__ == "__main__":
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--scene_id", type=str, help="scene id of scene to be visualized", default="scene0000_00")
 9 |     args = parser.parse_args()
10 | 
11 |     verts = np.load("scannet_data/{}_vert.npy".format(args.scene_id))
12 |     aligned_verts = np.load("scannet_data/{}_aligned_vert.npy".format(args.scene_id))
13 | 
14 | 
15 |     with open("scannet_data/{}_verts.obj".format(args.scene_id), "w") as f:
16 |         for i in range(verts.shape[0]):
17 |             f.write("v {} {} {} {} {} {}\n".format(
18 |                 verts[i, 0], 
19 |                 verts[i, 1], 
20 |                 verts[i, 2], 
21 |                 verts[i, 3], 
22 |                 verts[i, 4], 
23 |                 verts[i, 5]
24 |             ))
25 | 
26 |     with open("scannet_data/{}_aligned_verts.obj".format(args.scene_id), "w") as f:
27 |         for i in range(aligned_verts.shape[0]):
28 |             f.write("v {} {} {} {} {} {}\n".format(
29 |                 aligned_verts[i, 0], 
30 |                 aligned_verts[i, 1], 
31 |                 aligned_verts[i, 2], 
32 |                 aligned_verts[i, 3], 
33 |                 aligned_verts[i, 4], 
34 |                 aligned_verts[i, 5]
35 |             ))
36 | 


--------------------------------------------------------------------------------
/demo/ScanRefer.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/demo/ScanRefer.gif


--------------------------------------------------------------------------------
/docs/browser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/browser.png


--------------------------------------------------------------------------------
/docs/davezchen_eccv2020_scanrefer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/davezchen_eccv2020_scanrefer.pdf


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | 
  4 | <head>
  5 |     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  6 |     <title>ScanRefer: 3D Object Localization in RGB-DScans using Natural Language</title>
  7 |     <link rel="stylesheet" href="w3.css">
  8 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous">
  9 |     <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
 10 |     <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
 11 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
 12 | </head>
 13 | 
 14 | <body>
 15 | 
 16 | <br/>
 17 | <br/>
 18 | 
 19 | <div class="w3-container" id="paper">
 20 |     <div class="w3-content" style="max-width:850px">
 21 |   
 22 |     <h2 align="center" id="title"><b>ScanRefer: 3D Object Localization in RGB-D Scans using Natural Language</b></h2>
 23 |     <br/>
 24 | 
 25 |     <p align="center" id="title">European Conference on Computer Vision (ECCV), 2020.</p>
 26 | 
 27 |     <p align="center" class="center_text" id="authors">
 28 |         <a target="_blank" href="http://www.niessnerlab.org/members/zhenyu_chen/profile.html">Dave Zhenyu Chen</a><sup>1</sup>
 29 |         &nbsp;&nbsp;&nbsp;&nbsp;
 30 |         <a target="_blank" href="https://angelxuanchang.github.io/">Angel X. Chang</a><sup>2</sup>
 31 |         &nbsp;&nbsp;&nbsp;&nbsp;
 32 |         <a target="_blank" href="https://www.niessnerlab.org/members/matthias_niessner/profile.html">Matthias Nie&szlig;ner</a><sup>1</sup>
 33 |         &nbsp;&nbsp;&nbsp;&nbsp;
 34 |     </p>
 35 | 
 36 |     <p class="center_text" align="center">
 37 |         <sup>1</sup>Technical University of Munich
 38 |         &nbsp; &nbsp; &nbsp;
 39 |         <sup>2</sup>Simon Fraser University
 40 |     </p>
 41 | 
 42 |     <br>
 43 |         <h4 align="center" id="title"><b>Submit to our ScanRefer Localization Benchmark <a href="http://kaldir.vc.in.tum.de/scanrefer_benchmark/" target="__blank">here</a>!</b></h4>
 44 | 
 45 |         <br><center><a href="http://kaldir.vc.in.tum.de/scanrefer_benchmark/" target="__blank"><img src="teaser.png" style="max-width:100%" /></a></center><br>
 46 |         
 47 |         <h3 class="w3-left-align" id="video"><b>Introduction</b></h3>
 48 |         <p>
 49 |             We introduce the task of 3D object localization in RGB-D scans using natural language descriptions.
 50 |             As input, we assume a point cloud of a scanned 3D scene along with a free-form description of a specified target object.
 51 |             To address this task, we propose ScanRefer, learning a fused descriptor from 3D object proposals and encoded sentence embeddings.
 52 |             This fused descriptor correlates language expressions with geometric features, enabling regression of the 3D bounding box of a target object.
 53 |             We also introduce the ScanRefer dataset, containing 51,583 descriptions of 11,046 objects from 800 ScanNet scenes. 
 54 |             ScanRefer is the first large-scale effort to perform object localization via natural language expression directly in 3D.
 55 |         </p>
 56 | 
 57 |         <h3 class="w3-left-align" id="video"><b>Video</b></h3>
 58 |         <p>
 59 |         <iframe width="850" height="480" src="https://www.youtube.com/embed/T9J5t-UEcNA" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 60 |         <p/>
 61 | 
 62 |         <h3 class="w3-left-align" id="video"><b>Browse</b></h3>
 63 |         <p>
 64 |             The ScanRefer data can be browsed online in your web browser. Learn more at <a href="https://kaldir.vc.in.tum.de:8080/apps/main" target="__blank">the ScanRefer Data Browser</a>.
 65 |             <br/> (For a better browsing experience, we recommend using Google Chrome.)
 66 |         </p>
 67 | 
 68 |         <center><a href="https://kaldir.vc.in.tum.de:8080/apps/main" target="__blank"><img src="browser.png" style="max-width:100%" /></a></center><br>
 69 | 
 70 |         <h3 class="w3-left-align" id="publication"><b>Publication</b></h3>
 71 |         European Conference on Computer Vision (ECCV), 2020. <br/>
 72 |         <a href="davezchen_eccv2020_scanrefer.pdf" target="__blank">Paper</a> | <a href="https://arxiv.org/abs/1912.08830" target="__blank">arXiv</a> | <a href="https://github.com/daveredrum/ScanRefer" target="__blank">Code</a>
 73 |         <center>
 74 |             <a href="davezchen_eccv2020_scanrefer.pdf" target="__blank"><img src="paper.jpg" style="max-width:100%" /></a>
 75 |         </center><br>
 76 | 
 77 |         If you find our project useful, please consider citing us:
 78 |         <pre class="w3-panel w3-leftbar w3-light-grey" style="white-space: pre-wrap; font-family: monospace; font-size: 11px">
 79 | 
 80 | @article{chen2020scanrefer,
 81 |     title={ScanRefer: 3D Object Localization in RGB-D Scans using Natural Language},
 82 |     author={Chen, Dave Zhenyu and Chang, Angel X and Nie{\ss}ner, Matthias},
 83 |     journal={16th European Conference on Computer Vision (ECCV)},
 84 |     year={2020}
 85 | }
 86 | 
 87 | </pre>
 88 | 
 89 |         <h3 class="w3-left-align" id="dataset"><b>Dataset Download</b></h3>
 90 | 
 91 |         If you would like to access to the ScanRefer dataset, please fill out the <a href="https://forms.gle/aLtzXN12DsYDMSXX6">ScanRefer Terms of Use Form</a>. Once your request is accepted, you will receive an email with the download link.
 92 | 
 93 |     </div>
 94 | 
 95 | 
 96 | </div>
 97 | 
 98 | <br/>
 99 | <br/>
100 | 
101 | </body>
102 | </html>
103 | 


--------------------------------------------------------------------------------
/docs/paper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/paper.jpg


--------------------------------------------------------------------------------
/docs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daveredrum/ScanRefer/9d7483053e8d29acfd4db4eb1bc28f1564f5dddb/docs/teaser.png


--------------------------------------------------------------------------------
/lib/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from easydict import EasyDict
 4 | 
 5 | CONF = EasyDict()
 6 | 
 7 | # path
 8 | CONF.PATH = EasyDict()
 9 | CONF.PATH.BASE = "/home/davech2y/ScanRefer/" # TODO: change this
10 | CONF.PATH.DATA = os.path.join(CONF.PATH.BASE, "data")
11 | CONF.PATH.SCANNET = os.path.join(CONF.PATH.DATA, "scannet")
12 | CONF.PATH.LIB = os.path.join(CONF.PATH.BASE, "lib")
13 | CONF.PATH.MODELS = os.path.join(CONF.PATH.BASE, "models")
14 | CONF.PATH.UTILS = os.path.join(CONF.PATH.BASE, "utils")
15 | 
16 | # append to syspath
17 | for _, path in CONF.PATH.items():
18 |     sys.path.append(path)
19 | 
20 | # scannet data
21 | CONF.PATH.SCANNET_SCANS = os.path.join(CONF.PATH.SCANNET, "scans")
22 | CONF.PATH.SCANNET_META = os.path.join(CONF.PATH.SCANNET, "meta_data")
23 | CONF.PATH.SCANNET_DATA = os.path.join(CONF.PATH.SCANNET, "scannet_data")
24 | 
25 | # data
26 | CONF.SCANNET_DIR =  "/mnt/canis/Datasets/ScanNet/public/v2/scans" # TODO change this
27 | CONF.SCANNET_FRAMES_ROOT = "/home/davech2y/frames_square/" # TODO change this
28 | CONF.PROJECTION = "/home/davech2y/multiview_projection_scanrefer" # TODO change this
29 | CONF.ENET_FEATURES_ROOT = "/home/davech2y/enet_features" # TODO change this
30 | CONF.ENET_FEATURES_SUBROOT = os.path.join(CONF.ENET_FEATURES_ROOT, "{}") # scene_id
31 | CONF.ENET_FEATURES_PATH = os.path.join(CONF.ENET_FEATURES_SUBROOT, "{}.npy") # frame_id
32 | CONF.SCANNET_FRAMES = os.path.join(CONF.SCANNET_FRAMES_ROOT, "{}/{}") # scene_id, mode 
33 | CONF.SCENE_NAMES = sorted(os.listdir(CONF.SCANNET_DIR))
34 | CONF.ENET_WEIGHTS = os.path.join(CONF.PATH.BASE, "data/scannetv2_enet.pth")
35 | # CONF.MULTIVIEW = os.path.join(CONF.PATH.SCANNET_DATA, "enet_feats.hdf5")
36 | CONF.MULTIVIEW = os.path.join(CONF.PATH.SCANNET_DATA, "enet_feats_maxpool.hdf5")
37 | CONF.NYU40_LABELS = os.path.join(CONF.PATH.SCANNET_META, "nyu40_labels.csv")
38 | 
39 | # scannet
40 | CONF.SCANNETV2_TRAIN = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_train.txt")
41 | CONF.SCANNETV2_VAL = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_val.txt")
42 | CONF.SCANNETV2_TEST = os.path.join(CONF.PATH.SCANNET_META, "scannetv2_test.txt")
43 | CONF.SCANNETV2_LIST = os.path.join(CONF.PATH.SCANNET_META, "scannetv2.txt")
44 | 
45 | # output
46 | CONF.PATH.OUTPUT = os.path.join(CONF.PATH.BASE, "outputs")
47 | 
48 | # train
49 | CONF.TRAIN = EasyDict()
50 | CONF.TRAIN.MAX_DES_LEN = 126
51 | CONF.TRAIN.SEED = 42
52 | 


--------------------------------------------------------------------------------
/lib/eval_helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import numpy as np
  9 | import sys
 10 | import os
 11 | 
 12 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder
 13 | from utils.nn_distance import nn_distance, huber_loss
 14 | from lib.ap_helper import parse_predictions
 15 | from lib.loss import SoftmaxRankingLoss
 16 | from utils.box_util import get_3d_box, get_3d_box_batch, box3d_iou
 17 | 
 18 | def eval_ref_one_sample(pred_bbox, gt_bbox):
 19 |     """ Evaluate one reference prediction
 20 | 
 21 |     Args:
 22 |         pred_bbox: 8 corners of prediction bounding box, (8, 3)
 23 |         gt_bbox: 8 corners of ground truth bounding box, (8, 3)
 24 |     Returns:
 25 |         iou: intersection over union score
 26 |     """
 27 | 
 28 |     iou = box3d_iou(pred_bbox, gt_bbox)
 29 | 
 30 |     return iou
 31 | 
 32 | def construct_bbox_corners(center, box_size):
 33 |     sx, sy, sz = box_size
 34 |     x_corners = [sx/2, sx/2, -sx/2, -sx/2, sx/2, sx/2, -sx/2, -sx/2]
 35 |     y_corners = [sy/2, -sy/2, -sy/2, sy/2, sy/2, -sy/2, -sy/2, sy/2]
 36 |     z_corners = [sz/2, sz/2, sz/2, sz/2, -sz/2, -sz/2, -sz/2, -sz/2]
 37 |     corners_3d = np.vstack([x_corners, y_corners, z_corners])
 38 |     corners_3d[0,:] = corners_3d[0,:] + center[0];
 39 |     corners_3d[1,:] = corners_3d[1,:] + center[1];
 40 |     corners_3d[2,:] = corners_3d[2,:] + center[2];
 41 |     corners_3d = np.transpose(corners_3d)
 42 | 
 43 |     return corners_3d
 44 | 
 45 | def get_eval(data_dict, config, reference, use_lang_classifier=False, use_oracle=False, use_cat_rand=False, use_best=False, post_processing=None):
 46 |     """ Loss functions
 47 | 
 48 |     Args:
 49 |         data_dict: dict
 50 |         config: dataset config instance
 51 |         reference: flag (False/True)
 52 |         post_processing: config dict
 53 |     Returns:
 54 |         loss: pytorch scalar tensor
 55 |         data_dict: dict
 56 |     """
 57 | 
 58 |     batch_size, num_words, _ = data_dict["lang_feat"].shape
 59 | 
 60 | 
 61 |     objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long()
 62 |     objectness_labels_batch = data_dict['objectness_label'].long()
 63 | 
 64 |     if post_processing:
 65 |         _ = parse_predictions(data_dict, post_processing)
 66 |         nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()
 67 | 
 68 |         # construct valid mask
 69 |         pred_masks = (nms_masks * objectness_preds_batch == 1).float()
 70 |         label_masks = (objectness_labels_batch == 1).float()
 71 |     else:
 72 |         # construct valid mask
 73 |         pred_masks = (objectness_preds_batch == 1).float()
 74 |         label_masks = (objectness_labels_batch == 1).float()
 75 | 
 76 |     cluster_preds = torch.argmax(data_dict["cluster_ref"] * pred_masks, 1).long().unsqueeze(1).repeat(1, pred_masks.shape[1])
 77 |     preds = torch.zeros(pred_masks.shape).cuda()
 78 |     preds = preds.scatter_(1, cluster_preds, 1)
 79 |     cluster_preds = preds
 80 |     cluster_labels = data_dict["cluster_labels"].float()
 81 |     cluster_labels *= label_masks
 82 |     
 83 |     # compute classification scores
 84 |     corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1), dim=1).float()
 85 |     labels = torch.ones(corrects.shape[0]).cuda()
 86 |     ref_acc = corrects / (labels + 1e-8)
 87 |     
 88 |     # store
 89 |     data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist()
 90 | 
 91 |     # compute localization metrics
 92 |     if use_best:
 93 |         pred_ref = torch.argmax(data_dict["cluster_labels"], 1) # (B,)
 94 |         # store the calibrated predictions and masks
 95 |         data_dict['cluster_ref'] = data_dict["cluster_labels"]
 96 |     if use_cat_rand:
 97 |         cluster_preds = torch.zeros(cluster_labels.shape).cuda()
 98 |         for i in range(cluster_preds.shape[0]):
 99 |             num_bbox = data_dict["num_bbox"][i]
100 |             sem_cls_label = data_dict["sem_cls_label"][i]
101 |             # sem_cls_label = torch.argmax(end_points["sem_cls_scores"], 2)[i]
102 |             sem_cls_label[num_bbox:] -= 1
103 |             candidate_masks = torch.gather(sem_cls_label == data_dict["object_cat"][i], 0, data_dict["object_assignment"][i])
104 |             candidates = torch.arange(cluster_labels.shape[1])[candidate_masks]
105 |             try:
106 |                 chosen_idx = torch.randperm(candidates.shape[0])[0]
107 |                 chosen_candidate = candidates[chosen_idx]
108 |                 cluster_preds[i, chosen_candidate] = 1
109 |             except IndexError:
110 |                 cluster_preds[i, candidates] = 1
111 |         
112 |         pred_ref = torch.argmax(cluster_preds, 1) # (B,)
113 |         # store the calibrated predictions and masks
114 |         data_dict['cluster_ref'] = cluster_preds
115 |     else:
116 |         pred_ref = torch.argmax(data_dict['cluster_ref'] * pred_masks, 1) # (B,)
117 |         # store the calibrated predictions and masks
118 |         data_dict['cluster_ref'] = data_dict['cluster_ref'] * pred_masks
119 | 
120 |     if use_oracle:
121 |         pred_center = data_dict['center_label'] # (B,MAX_NUM_OBJ,3)
122 |         pred_heading_class = data_dict['heading_class_label'] # B,K2
123 |         pred_heading_residual = data_dict['heading_residual_label'] # B,K2
124 |         pred_size_class = data_dict['size_class_label'] # B,K2
125 |         pred_size_residual = data_dict['size_residual_label'] # B,K2,3
126 | 
127 |         # assign
128 |         pred_center = torch.gather(pred_center, 1, data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3))
129 |         pred_heading_class = torch.gather(pred_heading_class, 1, data_dict["object_assignment"])
130 |         pred_heading_residual = torch.gather(pred_heading_residual, 1, data_dict["object_assignment"]).unsqueeze(-1)
131 |         pred_size_class = torch.gather(pred_size_class, 1, data_dict["object_assignment"])
132 |         pred_size_residual = torch.gather(pred_size_residual, 1, data_dict["object_assignment"].unsqueeze(2).repeat(1, 1, 3))
133 |     else:
134 |         pred_center = data_dict['center'] # (B,K,3)
135 |         pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal
136 |         pred_heading_residual = torch.gather(data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
137 |         pred_heading_class = pred_heading_class # B,num_proposal
138 |         pred_heading_residual = pred_heading_residual.squeeze(2) # B,num_proposal
139 |         pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal
140 |         pred_size_residual = torch.gather(data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
141 |         pred_size_class = pred_size_class
142 |         pred_size_residual = pred_size_residual.squeeze(2) # B,num_proposal,3
143 | 
144 |     # store
145 |     data_dict["pred_mask"] = pred_masks
146 |     data_dict["label_mask"] = label_masks
147 |     data_dict['pred_center'] = pred_center
148 |     data_dict['pred_heading_class'] = pred_heading_class
149 |     data_dict['pred_heading_residual'] = pred_heading_residual
150 |     data_dict['pred_size_class'] = pred_size_class
151 |     data_dict['pred_size_residual'] = pred_size_residual
152 | 
153 |     gt_ref = torch.argmax(data_dict["ref_box_label"], 1)
154 |     gt_center = data_dict['center_label'] # (B,MAX_NUM_OBJ,3)
155 |     gt_heading_class = data_dict['heading_class_label'] # B,K2
156 |     gt_heading_residual = data_dict['heading_residual_label'] # B,K2
157 |     gt_size_class = data_dict['size_class_label'] # B,K2
158 |     gt_size_residual = data_dict['size_residual_label'] # B,K2,3
159 | 
160 |     ious = []
161 |     multiple = []
162 |     others = []
163 |     pred_bboxes = []
164 |     gt_bboxes = []
165 |     for i in range(pred_ref.shape[0]):
166 |         # compute the iou
167 |         pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i]
168 |         pred_obb = config.param2obb(
169 |             pred_center[i, pred_ref_idx, 0:3].detach().cpu().numpy(), 
170 |             pred_heading_class[i, pred_ref_idx].detach().cpu().numpy(), 
171 |             pred_heading_residual[i, pred_ref_idx].detach().cpu().numpy(),
172 |             pred_size_class[i, pred_ref_idx].detach().cpu().numpy(), 
173 |             pred_size_residual[i, pred_ref_idx].detach().cpu().numpy()
174 |         )
175 |         gt_obb = config.param2obb(
176 |             gt_center[i, gt_ref_idx, 0:3].detach().cpu().numpy(), 
177 |             gt_heading_class[i, gt_ref_idx].detach().cpu().numpy(), 
178 |             gt_heading_residual[i, gt_ref_idx].detach().cpu().numpy(),
179 |             gt_size_class[i, gt_ref_idx].detach().cpu().numpy(), 
180 |             gt_size_residual[i, gt_ref_idx].detach().cpu().numpy()
181 |         )
182 |         pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
183 |         gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])
184 |         iou = eval_ref_one_sample(pred_bbox, gt_bbox)
185 |         ious.append(iou)
186 | 
187 |         # NOTE: get_3d_box() will return problematic bboxes
188 |         pred_bbox = construct_bbox_corners(pred_obb[0:3], pred_obb[3:6])
189 |         gt_bbox = construct_bbox_corners(gt_obb[0:3], gt_obb[3:6])
190 |         pred_bboxes.append(pred_bbox)
191 |         gt_bboxes.append(gt_bbox)
192 | 
193 |         # construct the multiple mask
194 |         multiple.append(data_dict["unique_multiple"][i].item())
195 | 
196 |         # construct the others mask
197 |         flag = 1 if data_dict["object_cat"][i] == 17 else 0
198 |         others.append(flag)
199 | 
200 |     # lang
201 |     if reference and use_lang_classifier:
202 |         data_dict["lang_acc"] = (torch.argmax(data_dict['lang_scores'], 1) == data_dict["object_cat"]).float().mean()
203 |     else:
204 |         data_dict["lang_acc"] = torch.zeros(1)[0].cuda()
205 | 
206 |     # store
207 |     data_dict["ref_iou"] = ious
208 |     data_dict["ref_iou_rate_0.25"] = np.array(ious)[np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0]
209 |     data_dict["ref_iou_rate_0.5"] = np.array(ious)[np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0]
210 |     data_dict["ref_multiple_mask"] = multiple
211 |     data_dict["ref_others_mask"] = others
212 |     data_dict["pred_bboxes"] = pred_bboxes
213 |     data_dict["gt_bboxes"] = gt_bboxes
214 | 
215 |     # --------------------------------------------
216 |     # Some other statistics
217 |     obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2) # B,K
218 |     obj_acc = torch.sum((obj_pred_val==data_dict['objectness_label'].long()).float()*data_dict['objectness_mask'])/(torch.sum(data_dict['objectness_mask'])+1e-6)
219 |     data_dict['obj_acc'] = obj_acc
220 |     # detection semantic classification
221 |     sem_cls_label = torch.gather(data_dict['sem_cls_label'], 1, data_dict['object_assignment']) # select (B,K) from (B,K2)
222 |     sem_cls_pred = data_dict['sem_cls_scores'].argmax(-1) # (B,K)
223 |     sem_match = (sem_cls_label == sem_cls_pred).float()
224 |     data_dict["sem_acc"] = (sem_match * data_dict["pred_mask"]).sum() / data_dict["pred_mask"].sum()
225 | 
226 |     return data_dict
227 | 


--------------------------------------------------------------------------------
/lib/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class SoftmaxRankingLoss(nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self, inputs, targets):
10 |         # input check
11 |         assert inputs.shape == targets.shape
12 |         
13 |         # compute the probabilities
14 |         probs = F.softmax(inputs + 1e-8, dim=1)
15 | 
16 |         # reduction
17 |         loss = -torch.sum(torch.log(probs + 1e-8) * targets, dim=1).mean()
18 | 
19 |         return loss


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/ball_query.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
5 |                       const int nsample);
6 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <ATen/ATen.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include <cmath>
 7 | 
 8 | #include <cuda.h>
 9 | #include <cuda_runtime.h>
10 | 
11 | #include <vector>
12 | 
13 | #define TOTAL_THREADS 512
14 | 
15 | inline int opt_n_threads(int work_size) {
16 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
17 | 
18 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
19 | }
20 | 
21 | inline dim3 opt_block_config(int x, int y) {
22 |   const int x_threads = opt_n_threads(x);
23 |   const int y_threads =
24 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
25 |   dim3 block_config(x_threads, y_threads, 1);
26 | 
27 |   return block_config;
28 | }
29 | 
30 | #define CUDA_CHECK_ERRORS()                                           \
31 |   do {                                                                \
32 |     cudaError_t err = cudaGetLastError();                             \
33 |     if (cudaSuccess != err) {                                         \
34 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",  \
35 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
36 |               __FILE__);                                              \
37 |       exit(-1);                                                       \
38 |     }                                                                 \
39 |   } while (0)
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/group_points.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | at::Tensor group_points(at::Tensor points, at::Tensor idx);
5 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
6 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/interpolate.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <torch/extension.h>
 4 | #include <vector>
 5 | 
 6 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
 7 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 8 |                              at::Tensor weight);
 9 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
10 |                                   at::Tensor weight, const int m);
11 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/sampling.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | at::Tensor gather_points(at::Tensor points, at::Tensor idx);
5 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
6 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
7 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/include/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | #include <torch/extension.h>
 4 | 
 5 | #define CHECK_CUDA(x)                                    \
 6 |   do {                                                   \
 7 |     AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \
 8 |   } while (0)
 9 | 
10 | #define CHECK_CONTIGUOUS(x)                                          \
11 |   do {                                                               \
12 |     AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \
13 |   } while (0)
14 | 
15 | #define CHECK_IS_INT(x)                               \
16 |   do {                                                \
17 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \
18 |               #x " must be an int tensor");           \
19 |   } while (0)
20 | 
21 | #define CHECK_IS_FLOAT(x)                               \
22 |   do {                                                  \
23 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \
24 |               #x " must be a float tensor");            \
25 |   } while (0)
26 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | #include "ball_query.h"
 2 | #include "utils.h"
 3 | 
 4 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
 5 |                                      int nsample, const float *new_xyz,
 6 |                                      const float *xyz, int *idx);
 7 | 
 8 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
 9 |                       const int nsample) {
10 |   CHECK_CONTIGUOUS(new_xyz);
11 |   CHECK_CONTIGUOUS(xyz);
12 |   CHECK_IS_FLOAT(new_xyz);
13 |   CHECK_IS_FLOAT(xyz);
14 | 
15 |   if (new_xyz.is_cuda()) {
16 |     CHECK_CUDA(xyz);
17 |   }
18 | 
19 |   at::Tensor idx =
20 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
21 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
22 | 
23 |   if (new_xyz.is_cuda()) {
24 |     query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
25 |                                     radius, nsample, new_xyz.data_ptr<float>(),
26 |                                     xyz.data_ptr<float>(), idx.data_ptr<int>());
27 |   } else {
28 |     AT_ASSERT(false, "CPU not supported");
29 |   }
30 | 
31 |   return idx;
32 | }
33 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include "cuda_utils.h"
 6 | 
 7 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
 8 | // output: idx(b, m, nsample)
 9 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
10 |                                         int nsample,
11 |                                         const float *__restrict__ new_xyz,
12 |                                         const float *__restrict__ xyz,
13 |                                         int *__restrict__ idx) {
14 |   int batch_index = blockIdx.x;
15 |   xyz += batch_index * n * 3;
16 |   new_xyz += batch_index * m * 3;
17 |   idx += m * nsample * batch_index;
18 | 
19 |   int index = threadIdx.x;
20 |   int stride = blockDim.x;
21 | 
22 |   float radius2 = radius * radius;
23 |   for (int j = index; j < m; j += stride) {
24 |     float new_x = new_xyz[j * 3 + 0];
25 |     float new_y = new_xyz[j * 3 + 1];
26 |     float new_z = new_xyz[j * 3 + 2];
27 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
28 |       float x = xyz[k * 3 + 0];
29 |       float y = xyz[k * 3 + 1];
30 |       float z = xyz[k * 3 + 2];
31 |       float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
32 |                  (new_z - z) * (new_z - z);
33 |       if (d2 < radius2) {
34 |         if (cnt == 0) {
35 |           for (int l = 0; l < nsample; ++l) {
36 |             idx[j * nsample + l] = k;
37 |           }
38 |         }
39 |         idx[j * nsample + cnt] = k;
40 |         ++cnt;
41 |       }
42 |     }
43 |   }
44 | }
45 | 
46 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
47 |                                      int nsample, const float *new_xyz,
48 |                                      const float *xyz, int *idx) {
49 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
50 |   query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
51 |       b, n, m, radius, nsample, new_xyz, xyz, idx);
52 | 
53 |   CUDA_CHECK_ERRORS();
54 | }
55 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | #include "ball_query.h"
 2 | #include "group_points.h"
 3 | #include "interpolate.h"
 4 | #include "sampling.h"
 5 | 
 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 7 |   m.def("gather_points", &gather_points);
 8 |   m.def("gather_points_grad", &gather_points_grad);
 9 |   m.def("furthest_point_sampling", &furthest_point_sampling);
10 | 
11 |   m.def("three_nn", &three_nn);
12 |   m.def("three_interpolate", &three_interpolate);
13 |   m.def("three_interpolate_grad", &three_interpolate_grad);
14 | 
15 |   m.def("ball_query", &ball_query);
16 | 
17 |   m.def("group_points", &group_points);
18 |   m.def("group_points_grad", &group_points_grad);
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | #include "group_points.h"
 2 | #include "utils.h"
 3 | 
 4 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
 5 |                                  const float *points, const int *idx,
 6 |                                  float *out);
 7 | 
 8 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 9 |                                       int nsample, const float *grad_out,
10 |                                       const int *idx, float *grad_points);
11 | 
12 | at::Tensor group_points(at::Tensor points, at::Tensor idx) {
13 |   CHECK_CONTIGUOUS(points);
14 |   CHECK_CONTIGUOUS(idx);
15 |   CHECK_IS_FLOAT(points);
16 |   CHECK_IS_INT(idx);
17 | 
18 |   if (points.is_cuda()) {
19 |     CHECK_CUDA(idx);
20 |   }
21 | 
22 |   at::Tensor output =
23 |       torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
24 |                    at::device(points.device()).dtype(at::ScalarType::Float));
25 | 
26 |   if (points.is_cuda()) {
27 |     group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
28 |                                 idx.size(1), idx.size(2),
29 |                                 points.data_ptr<float>(), idx.data_ptr<int>(),
30 |                                 output.data_ptr<float>());
31 |   } else {
32 |     AT_ASSERT(false, "CPU not supported");
33 |   }
34 | 
35 |   return output;
36 | }
37 | 
38 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
39 |   CHECK_CONTIGUOUS(grad_out);
40 |   CHECK_CONTIGUOUS(idx);
41 |   CHECK_IS_FLOAT(grad_out);
42 |   CHECK_IS_INT(idx);
43 | 
44 |   if (grad_out.is_cuda()) {
45 |     CHECK_CUDA(idx);
46 |   }
47 | 
48 |   at::Tensor output =
49 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
50 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
51 | 
52 |   if (grad_out.is_cuda()) {
53 |     group_points_grad_kernel_wrapper(
54 |         grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
55 |         grad_out.data_ptr<float>(), idx.data_ptr<int>(),
56 |         output.data_ptr<float>());
57 |   } else {
58 |     AT_ASSERT(false, "CPU not supported");
59 |   }
60 | 
61 |   return output;
62 | }
63 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "cuda_utils.h"
 5 | 
 6 | // input: points(b, c, n) idx(b, npoints, nsample)
 7 | // output: out(b, c, npoints, nsample)
 8 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
 9 |                                     int nsample,
10 |                                     const float *__restrict__ points,
11 |                                     const int *__restrict__ idx,
12 |                                     float *__restrict__ out) {
13 |   int batch_index = blockIdx.x;
14 |   points += batch_index * n * c;
15 |   idx += batch_index * npoints * nsample;
16 |   out += batch_index * npoints * nsample * c;
17 | 
18 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
19 |   const int stride = blockDim.y * blockDim.x;
20 |   for (int i = index; i < c * npoints; i += stride) {
21 |     const int l = i / npoints;
22 |     const int j = i % npoints;
23 |     for (int k = 0; k < nsample; ++k) {
24 |       int ii = idx[j * nsample + k];
25 |       out[(l * npoints + j) * nsample + k] = points[l * n + ii];
26 |     }
27 |   }
28 | }
29 | 
30 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
31 |                                  const float *points, const int *idx,
32 |                                  float *out) {
33 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
34 | 
35 |   group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
36 |       b, c, n, npoints, nsample, points, idx, out);
37 | 
38 |   CUDA_CHECK_ERRORS();
39 | }
40 | 
41 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
42 | // output: grad_points(b, c, n)
43 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
44 |                                          int nsample,
45 |                                          const float *__restrict__ grad_out,
46 |                                          const int *__restrict__ idx,
47 |                                          float *__restrict__ grad_points) {
48 |   int batch_index = blockIdx.x;
49 |   grad_out += batch_index * npoints * nsample * c;
50 |   idx += batch_index * npoints * nsample;
51 |   grad_points += batch_index * n * c;
52 | 
53 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
54 |   const int stride = blockDim.y * blockDim.x;
55 |   for (int i = index; i < c * npoints; i += stride) {
56 |     const int l = i / npoints;
57 |     const int j = i % npoints;
58 |     for (int k = 0; k < nsample; ++k) {
59 |       int ii = idx[j * nsample + k];
60 |       atomicAdd(grad_points + l * n + ii,
61 |                 grad_out[(l * npoints + j) * nsample + k]);
62 |     }
63 |   }
64 | }
65 | 
66 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
67 |                                       int nsample, const float *grad_out,
68 |                                       const int *idx, float *grad_points) {
69 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
70 | 
71 |   group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
72 |       b, c, n, npoints, nsample, grad_out, idx, grad_points);
73 | 
74 |   CUDA_CHECK_ERRORS();
75 | }
76 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include "interpolate.h"
  2 | #include "utils.h"
  3 | 
  4 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
  5 |                              const float *known, float *dist2, int *idx);
  6 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
  7 |                                       const float *points, const int *idx,
  8 |                                       const float *weight, float *out);
  9 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
 10 |                                            const float *grad_out,
 11 |                                            const int *idx, const float *weight,
 12 |                                            float *grad_points);
 13 | 
 14 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
 15 |   CHECK_CONTIGUOUS(unknowns);
 16 |   CHECK_CONTIGUOUS(knows);
 17 |   CHECK_IS_FLOAT(unknowns);
 18 |   CHECK_IS_FLOAT(knows);
 19 | 
 20 |   if (unknowns.is_cuda()) {
 21 |     CHECK_CUDA(knows);
 22 |   }
 23 | 
 24 |   at::Tensor idx =
 25 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 26 |                    at::device(unknowns.device()).dtype(at::ScalarType::Int));
 27 |   at::Tensor dist2 =
 28 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 29 |                    at::device(unknowns.device()).dtype(at::ScalarType::Float));
 30 | 
 31 |   if (unknowns.is_cuda()) {
 32 |     three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
 33 |                             unknowns.data_ptr<float>(), knows.data_ptr<float>(),
 34 |                             dist2.data_ptr<float>(), idx.data_ptr<int>());
 35 |   } else {
 36 |     AT_ASSERT(false, "CPU not supported");
 37 |   }
 38 | 
 39 |   return {dist2, idx};
 40 | }
 41 | 
 42 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 43 |                              at::Tensor weight) {
 44 |   CHECK_CONTIGUOUS(points);
 45 |   CHECK_CONTIGUOUS(idx);
 46 |   CHECK_CONTIGUOUS(weight);
 47 |   CHECK_IS_FLOAT(points);
 48 |   CHECK_IS_INT(idx);
 49 |   CHECK_IS_FLOAT(weight);
 50 | 
 51 |   if (points.is_cuda()) {
 52 |     CHECK_CUDA(idx);
 53 |     CHECK_CUDA(weight);
 54 |   }
 55 | 
 56 |   at::Tensor output =
 57 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
 58 |                    at::device(points.device()).dtype(at::ScalarType::Float));
 59 | 
 60 |   if (points.is_cuda()) {
 61 |     three_interpolate_kernel_wrapper(
 62 |         points.size(0), points.size(1), points.size(2), idx.size(1),
 63 |         points.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
 64 |         output.data_ptr<float>());
 65 |   } else {
 66 |     AT_ASSERT(false, "CPU not supported");
 67 |   }
 68 | 
 69 |   return output;
 70 | }
 71 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
 72 |                                   at::Tensor weight, const int m) {
 73 |   CHECK_CONTIGUOUS(grad_out);
 74 |   CHECK_CONTIGUOUS(idx);
 75 |   CHECK_CONTIGUOUS(weight);
 76 |   CHECK_IS_FLOAT(grad_out);
 77 |   CHECK_IS_INT(idx);
 78 |   CHECK_IS_FLOAT(weight);
 79 | 
 80 |   if (grad_out.is_cuda()) {
 81 |     CHECK_CUDA(idx);
 82 |     CHECK_CUDA(weight);
 83 |   }
 84 | 
 85 |   at::Tensor output =
 86 |       torch::zeros({grad_out.size(0), grad_out.size(1), m},
 87 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
 88 | 
 89 |   if (grad_out.is_cuda()) {
 90 |     three_interpolate_grad_kernel_wrapper(
 91 |         grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
 92 |         grad_out.data_ptr<float>(), idx.data_ptr<int>(),
 93 |         weight.data_ptr<float>(), output.data_ptr<float>());
 94 |   } else {
 95 |     AT_ASSERT(false, "CPU not supported");
 96 |   }
 97 | 
 98 |   return output;
 99 | }
100 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "cuda_utils.h"
  6 | 
  7 | // input: unknown(b, n, 3) known(b, m, 3)
  8 | // output: dist2(b, n, 3), idx(b, n, 3)
  9 | __global__ void three_nn_kernel(int b, int n, int m,
 10 |                                 const float *__restrict__ unknown,
 11 |                                 const float *__restrict__ known,
 12 |                                 float *__restrict__ dist2,
 13 |                                 int *__restrict__ idx) {
 14 |   int batch_index = blockIdx.x;
 15 |   unknown += batch_index * n * 3;
 16 |   known += batch_index * m * 3;
 17 |   dist2 += batch_index * n * 3;
 18 |   idx += batch_index * n * 3;
 19 | 
 20 |   int index = threadIdx.x;
 21 |   int stride = blockDim.x;
 22 |   for (int j = index; j < n; j += stride) {
 23 |     float ux = unknown[j * 3 + 0];
 24 |     float uy = unknown[j * 3 + 1];
 25 |     float uz = unknown[j * 3 + 2];
 26 | 
 27 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 28 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 29 |     for (int k = 0; k < m; ++k) {
 30 |       float x = known[k * 3 + 0];
 31 |       float y = known[k * 3 + 1];
 32 |       float z = known[k * 3 + 2];
 33 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 34 |       if (d < best1) {
 35 |         best3 = best2;
 36 |         besti3 = besti2;
 37 |         best2 = best1;
 38 |         besti2 = besti1;
 39 |         best1 = d;
 40 |         besti1 = k;
 41 |       } else if (d < best2) {
 42 |         best3 = best2;
 43 |         besti3 = besti2;
 44 |         best2 = d;
 45 |         besti2 = k;
 46 |       } else if (d < best3) {
 47 |         best3 = d;
 48 |         besti3 = k;
 49 |       }
 50 |     }
 51 |     dist2[j * 3 + 0] = best1;
 52 |     dist2[j * 3 + 1] = best2;
 53 |     dist2[j * 3 + 2] = best3;
 54 | 
 55 |     idx[j * 3 + 0] = besti1;
 56 |     idx[j * 3 + 1] = besti2;
 57 |     idx[j * 3 + 2] = besti3;
 58 |   }
 59 | }
 60 | 
 61 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 62 |                              const float *known, float *dist2, int *idx) {
 63 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 64 |   three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 65 |                                                       dist2, idx);
 66 | 
 67 |   CUDA_CHECK_ERRORS();
 68 | }
 69 | 
 70 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 71 | // output: out(b, c, n)
 72 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 73 |                                          const float *__restrict__ points,
 74 |                                          const int *__restrict__ idx,
 75 |                                          const float *__restrict__ weight,
 76 |                                          float *__restrict__ out) {
 77 |   int batch_index = blockIdx.x;
 78 |   points += batch_index * m * c;
 79 | 
 80 |   idx += batch_index * n * 3;
 81 |   weight += batch_index * n * 3;
 82 | 
 83 |   out += batch_index * n * c;
 84 | 
 85 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
 86 |   const int stride = blockDim.y * blockDim.x;
 87 |   for (int i = index; i < c * n; i += stride) {
 88 |     const int l = i / n;
 89 |     const int j = i % n;
 90 |     float w1 = weight[j * 3 + 0];
 91 |     float w2 = weight[j * 3 + 1];
 92 |     float w3 = weight[j * 3 + 2];
 93 | 
 94 |     int i1 = idx[j * 3 + 0];
 95 |     int i2 = idx[j * 3 + 1];
 96 |     int i3 = idx[j * 3 + 2];
 97 | 
 98 |     out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
 99 |              points[l * m + i3] * w3;
100 |   }
101 | }
102 | 
103 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
104 |                                       const float *points, const int *idx,
105 |                                       const float *weight, float *out) {
106 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
107 |   three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
108 |       b, c, m, n, points, idx, weight, out);
109 | 
110 |   CUDA_CHECK_ERRORS();
111 | }
112 | 
113 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
114 | // output: grad_points(b, c, m)
115 | 
116 | __global__ void three_interpolate_grad_kernel(
117 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
118 |     const int *__restrict__ idx, const float *__restrict__ weight,
119 |     float *__restrict__ grad_points) {
120 |   int batch_index = blockIdx.x;
121 |   grad_out += batch_index * n * c;
122 |   idx += batch_index * n * 3;
123 |   weight += batch_index * n * 3;
124 |   grad_points += batch_index * m * c;
125 | 
126 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
127 |   const int stride = blockDim.y * blockDim.x;
128 |   for (int i = index; i < c * n; i += stride) {
129 |     const int l = i / n;
130 |     const int j = i % n;
131 |     float w1 = weight[j * 3 + 0];
132 |     float w2 = weight[j * 3 + 1];
133 |     float w3 = weight[j * 3 + 2];
134 | 
135 |     int i1 = idx[j * 3 + 0];
136 |     int i2 = idx[j * 3 + 1];
137 |     int i3 = idx[j * 3 + 2];
138 | 
139 |     atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
140 |     atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
141 |     atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
142 |   }
143 | }
144 | 
145 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
146 |                                            const float *grad_out,
147 |                                            const int *idx, const float *weight,
148 |                                            float *grad_points) {
149 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
150 |   three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
151 |       b, c, n, m, grad_out, idx, weight, grad_points);
152 | 
153 |   CUDA_CHECK_ERRORS();
154 | }
155 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include "sampling.h"
 2 | #include "utils.h"
 3 | 
 4 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 5 |                                   const float *points, const int *idx,
 6 |                                   float *out);
 7 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 8 |                                        const float *grad_out, const int *idx,
 9 |                                        float *grad_points);
10 | 
11 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
12 |                                             const float *dataset, float *temp,
13 |                                             int *idxs);
14 | 
15 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
16 |   CHECK_CONTIGUOUS(points);
17 |   CHECK_CONTIGUOUS(idx);
18 |   CHECK_IS_FLOAT(points);
19 |   CHECK_IS_INT(idx);
20 | 
21 |   if (points.is_cuda()) {
22 |     CHECK_CUDA(idx);
23 |   }
24 | 
25 |   at::Tensor output =
26 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
27 |                    at::device(points.device()).dtype(at::ScalarType::Float));
28 | 
29 |   if (points.is_cuda()) {
30 |     gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
31 |                                  idx.size(1), points.data_ptr<float>(),
32 |                                  idx.data_ptr<int>(), output.data_ptr<float>());
33 |   } else {
34 |     AT_ASSERT(false, "CPU not supported");
35 |   }
36 | 
37 |   return output;
38 | }
39 | 
40 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
41 |                               const int n) {
42 |   CHECK_CONTIGUOUS(grad_out);
43 |   CHECK_CONTIGUOUS(idx);
44 |   CHECK_IS_FLOAT(grad_out);
45 |   CHECK_IS_INT(idx);
46 | 
47 |   if (grad_out.is_cuda()) {
48 |     CHECK_CUDA(idx);
49 |   }
50 | 
51 |   at::Tensor output =
52 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
53 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
54 | 
55 |   if (grad_out.is_cuda()) {
56 |     gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
57 |                                       idx.size(1), grad_out.data_ptr<float>(),
58 |                                       idx.data_ptr<int>(),
59 |                                       output.data_ptr<float>());
60 |   } else {
61 |     AT_ASSERT(false, "CPU not supported");
62 |   }
63 | 
64 |   return output;
65 | }
66 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
67 |   CHECK_CONTIGUOUS(points);
68 |   CHECK_IS_FLOAT(points);
69 | 
70 |   at::Tensor output =
71 |       torch::zeros({points.size(0), nsamples},
72 |                    at::device(points.device()).dtype(at::ScalarType::Int));
73 | 
74 |   at::Tensor tmp =
75 |       torch::full({points.size(0), points.size(1)}, 1e10,
76 |                   at::device(points.device()).dtype(at::ScalarType::Float));
77 | 
78 |   if (points.is_cuda()) {
79 |     furthest_point_sampling_kernel_wrapper(
80 |         points.size(0), points.size(1), nsamples, points.data_ptr<float>(),
81 |         tmp.data_ptr<float>(), output.data_ptr<int>());
82 |   } else {
83 |     AT_ASSERT(false, "CPU not supported");
84 |   }
85 | 
86 |   return output;
87 | }
88 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_ext_src/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "cuda_utils.h"
  5 | 
  6 | // input: points(b, c, n) idx(b, m)
  7 | // output: out(b, c, m)
  8 | __global__ void gather_points_kernel(int b, int c, int n, int m,
  9 |                                      const float *__restrict__ points,
 10 |                                      const int *__restrict__ idx,
 11 |                                      float *__restrict__ out) {
 12 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 13 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 14 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 15 |         int a = idx[i * m + j];
 16 |         out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 17 |       }
 18 |     }
 19 |   }
 20 | }
 21 | 
 22 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 23 |                                   const float *points, const int *idx,
 24 |                                   float *out) {
 25 |   gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 26 |                          at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
 27 |                                                              points, idx, out);
 28 | 
 29 |   CUDA_CHECK_ERRORS();
 30 | }
 31 | 
 32 | // input: grad_out(b, c, m) idx(b, m)
 33 | // output: grad_points(b, c, n)
 34 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
 35 |                                           const float *__restrict__ grad_out,
 36 |                                           const int *__restrict__ idx,
 37 |                                           float *__restrict__ grad_points) {
 38 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 39 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 40 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 41 |         int a = idx[i * m + j];
 42 |         atomicAdd(grad_points + (i * c + l) * n + a,
 43 |                   grad_out[(i * c + l) * m + j]);
 44 |       }
 45 |     }
 46 |   }
 47 | }
 48 | 
 49 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 50 |                                        const float *grad_out, const int *idx,
 51 |                                        float *grad_points) {
 52 |   gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 53 |                               at::cuda::getCurrentCUDAStream()>>>(
 54 |       b, c, n, npoints, grad_out, idx, grad_points);
 55 | 
 56 |   CUDA_CHECK_ERRORS();
 57 | }
 58 | 
 59 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
 60 |                          int idx1, int idx2) {
 61 |   const float v1 = dists[idx1], v2 = dists[idx2];
 62 |   const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 63 |   dists[idx1] = max(v1, v2);
 64 |   dists_i[idx1] = v2 > v1 ? i2 : i1;
 65 | }
 66 | 
 67 | // Input dataset: (b, n, 3), tmp: (b, n)
 68 | // Ouput idxs (b, m)
 69 | template <unsigned int block_size>
 70 | __global__ void furthest_point_sampling_kernel(
 71 |     int b, int n, int m, const float *__restrict__ dataset,
 72 |     float *__restrict__ temp, int *__restrict__ idxs) {
 73 |   if (m <= 0) return;
 74 |   __shared__ float dists[block_size];
 75 |   __shared__ int dists_i[block_size];
 76 | 
 77 |   int batch_index = blockIdx.x;
 78 |   dataset += batch_index * n * 3;
 79 |   temp += batch_index * n;
 80 |   idxs += batch_index * m;
 81 | 
 82 |   int tid = threadIdx.x;
 83 |   const int stride = block_size;
 84 | 
 85 |   int old = 0;
 86 |   if (threadIdx.x == 0) idxs[0] = old;
 87 | 
 88 |   __syncthreads();
 89 |   for (int j = 1; j < m; j++) {
 90 |     int besti = 0;
 91 |     float best = -1;
 92 |     float x1 = dataset[old * 3 + 0];
 93 |     float y1 = dataset[old * 3 + 1];
 94 |     float z1 = dataset[old * 3 + 2];
 95 |     for (int k = tid; k < n; k += stride) {
 96 |       float x2, y2, z2;
 97 |       x2 = dataset[k * 3 + 0];
 98 |       y2 = dataset[k * 3 + 1];
 99 |       z2 = dataset[k * 3 + 2];
100 |       float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
101 |       if (mag <= 1e-3) continue;
102 | 
103 |       float d =
104 |           (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
105 | 
106 |       float d2 = min(d, temp[k]);
107 |       temp[k] = d2;
108 |       besti = d2 > best ? k : besti;
109 |       best = d2 > best ? d2 : best;
110 |     }
111 |     dists[tid] = best;
112 |     dists_i[tid] = besti;
113 |     __syncthreads();
114 | 
115 |     if (block_size >= 512) {
116 |       if (tid < 256) {
117 |         __update(dists, dists_i, tid, tid + 256);
118 |       }
119 |       __syncthreads();
120 |     }
121 |     if (block_size >= 256) {
122 |       if (tid < 128) {
123 |         __update(dists, dists_i, tid, tid + 128);
124 |       }
125 |       __syncthreads();
126 |     }
127 |     if (block_size >= 128) {
128 |       if (tid < 64) {
129 |         __update(dists, dists_i, tid, tid + 64);
130 |       }
131 |       __syncthreads();
132 |     }
133 |     if (block_size >= 64) {
134 |       if (tid < 32) {
135 |         __update(dists, dists_i, tid, tid + 32);
136 |       }
137 |       __syncthreads();
138 |     }
139 |     if (block_size >= 32) {
140 |       if (tid < 16) {
141 |         __update(dists, dists_i, tid, tid + 16);
142 |       }
143 |       __syncthreads();
144 |     }
145 |     if (block_size >= 16) {
146 |       if (tid < 8) {
147 |         __update(dists, dists_i, tid, tid + 8);
148 |       }
149 |       __syncthreads();
150 |     }
151 |     if (block_size >= 8) {
152 |       if (tid < 4) {
153 |         __update(dists, dists_i, tid, tid + 4);
154 |       }
155 |       __syncthreads();
156 |     }
157 |     if (block_size >= 4) {
158 |       if (tid < 2) {
159 |         __update(dists, dists_i, tid, tid + 2);
160 |       }
161 |       __syncthreads();
162 |     }
163 |     if (block_size >= 2) {
164 |       if (tid < 1) {
165 |         __update(dists, dists_i, tid, tid + 1);
166 |       }
167 |       __syncthreads();
168 |     }
169 | 
170 |     old = dists_i[0];
171 |     if (tid == 0) idxs[j] = old;
172 |   }
173 | }
174 | 
175 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
176 |                                             const float *dataset, float *temp,
177 |                                             int *idxs) {
178 |   unsigned int n_threads = opt_n_threads(n);
179 | 
180 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
181 | 
182 |   switch (n_threads) {
183 |     case 512:
184 |       furthest_point_sampling_kernel<512>
185 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
186 |       break;
187 |     case 256:
188 |       furthest_point_sampling_kernel<256>
189 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
190 |       break;
191 |     case 128:
192 |       furthest_point_sampling_kernel<128>
193 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
194 |       break;
195 |     case 64:
196 |       furthest_point_sampling_kernel<64>
197 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
198 |       break;
199 |     case 32:
200 |       furthest_point_sampling_kernel<32>
201 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
202 |       break;
203 |     case 16:
204 |       furthest_point_sampling_kernel<16>
205 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
206 |       break;
207 |     case 8:
208 |       furthest_point_sampling_kernel<8>
209 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
210 |       break;
211 |     case 4:
212 |       furthest_point_sampling_kernel<4>
213 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
214 |       break;
215 |     case 2:
216 |       furthest_point_sampling_kernel<2>
217 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
218 |       break;
219 |     case 1:
220 |       furthest_point_sampling_kernel<1>
221 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
222 |       break;
223 |     default:
224 |       furthest_point_sampling_kernel<512>
225 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
226 |   }
227 | 
228 |   CUDA_CHECK_ERRORS();
229 | }
230 | 


--------------------------------------------------------------------------------
/lib/pointnet2/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "3.0.0"
2 | 


--------------------------------------------------------------------------------
/lib/pointnet2/pointnet2_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ''' Testing customized ops. '''
 7 | 
 8 | import torch
 9 | from torch.autograd import gradcheck
10 | import numpy as np
11 | 
12 | import os
13 | import sys
14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
15 | sys.path.append(BASE_DIR)
16 | import pointnet2_utils
17 | 
18 | def test_interpolation_grad():
19 |     batch_size = 1
20 |     feat_dim = 2
21 |     m = 4
22 |     feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda()
23 |     
24 |     def interpolate_func(inputs):
25 |         idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda()
26 |         weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda()
27 |         interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight)
28 |         return interpolated_feats
29 |     
30 |     assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1))
31 | 
32 | if __name__=='__main__':
33 |     test_interpolation_grad()
34 | 


--------------------------------------------------------------------------------
/lib/pointnet2/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''
  7 | import torch
  8 | import torch.nn as nn
  9 | from typing import List, Tuple
 10 | 
 11 | class SharedMLP(nn.Sequential):
 12 | 
 13 |     def __init__(
 14 |             self,
 15 |             args: List[int],
 16 |             *,
 17 |             bn: bool = False,
 18 |             activation=nn.ReLU(inplace=True),
 19 |             preact: bool = False,
 20 |             first: bool = False,
 21 |             name: str = ""
 22 |     ):
 23 |         super().__init__()
 24 | 
 25 |         for i in range(len(args) - 1):
 26 |             self.add_module(
 27 |                 name + 'layer{}'.format(i),
 28 |                 Conv2d(
 29 |                     args[i],
 30 |                     args[i + 1],
 31 |                     bn=(not first or not preact or (i != 0)) and bn,
 32 |                     activation=activation
 33 |                     if (not first or not preact or (i != 0)) else None,
 34 |                     preact=preact
 35 |                 )
 36 |             )
 37 | 
 38 | 
 39 | class _BNBase(nn.Sequential):
 40 | 
 41 |     def __init__(self, in_size, batch_norm=None, name=""):
 42 |         super().__init__()
 43 |         self.add_module(name + "bn", batch_norm(in_size))
 44 | 
 45 |         nn.init.constant_(self[0].weight, 1.0)
 46 |         nn.init.constant_(self[0].bias, 0)
 47 | 
 48 | 
 49 | class BatchNorm1d(_BNBase):
 50 | 
 51 |     def __init__(self, in_size: int, *, name: str = ""):
 52 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
 53 | 
 54 | 
 55 | class BatchNorm2d(_BNBase):
 56 | 
 57 |     def __init__(self, in_size: int, name: str = ""):
 58 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
 59 | 
 60 | 
 61 | class BatchNorm3d(_BNBase):
 62 | 
 63 |     def __init__(self, in_size: int, name: str = ""):
 64 |         super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
 65 | 
 66 | 
 67 | class _ConvBase(nn.Sequential):
 68 | 
 69 |     def __init__(
 70 |             self,
 71 |             in_size,
 72 |             out_size,
 73 |             kernel_size,
 74 |             stride,
 75 |             padding,
 76 |             activation,
 77 |             bn,
 78 |             init,
 79 |             conv=None,
 80 |             batch_norm=None,
 81 |             bias=True,
 82 |             preact=False,
 83 |             name=""
 84 |     ):
 85 |         super().__init__()
 86 | 
 87 |         bias = bias and (not bn)
 88 |         conv_unit = conv(
 89 |             in_size,
 90 |             out_size,
 91 |             kernel_size=kernel_size,
 92 |             stride=stride,
 93 |             padding=padding,
 94 |             bias=bias
 95 |         )
 96 |         init(conv_unit.weight)
 97 |         if bias:
 98 |             nn.init.constant_(conv_unit.bias, 0)
 99 | 
100 |         if bn:
101 |             if not preact:
102 |                 bn_unit = batch_norm(out_size)
103 |             else:
104 |                 bn_unit = batch_norm(in_size)
105 | 
106 |         if preact:
107 |             if bn:
108 |                 self.add_module(name + 'bn', bn_unit)
109 | 
110 |             if activation is not None:
111 |                 self.add_module(name + 'activation', activation)
112 | 
113 |         self.add_module(name + 'conv', conv_unit)
114 | 
115 |         if not preact:
116 |             if bn:
117 |                 self.add_module(name + 'bn', bn_unit)
118 | 
119 |             if activation is not None:
120 |                 self.add_module(name + 'activation', activation)
121 | 
122 | 
123 | class Conv1d(_ConvBase):
124 | 
125 |     def __init__(
126 |             self,
127 |             in_size: int,
128 |             out_size: int,
129 |             *,
130 |             kernel_size: int = 1,
131 |             stride: int = 1,
132 |             padding: int = 0,
133 |             activation=nn.ReLU(inplace=True),
134 |             bn: bool = False,
135 |             init=nn.init.kaiming_normal_,
136 |             bias: bool = True,
137 |             preact: bool = False,
138 |             name: str = ""
139 |     ):
140 |         super().__init__(
141 |             in_size,
142 |             out_size,
143 |             kernel_size,
144 |             stride,
145 |             padding,
146 |             activation,
147 |             bn,
148 |             init,
149 |             conv=nn.Conv1d,
150 |             batch_norm=BatchNorm1d,
151 |             bias=bias,
152 |             preact=preact,
153 |             name=name
154 |         )
155 | 
156 | 
157 | class Conv2d(_ConvBase):
158 | 
159 |     def __init__(
160 |             self,
161 |             in_size: int,
162 |             out_size: int,
163 |             *,
164 |             kernel_size: Tuple[int, int] = (1, 1),
165 |             stride: Tuple[int, int] = (1, 1),
166 |             padding: Tuple[int, int] = (0, 0),
167 |             activation=nn.ReLU(inplace=True),
168 |             bn: bool = False,
169 |             init=nn.init.kaiming_normal_,
170 |             bias: bool = True,
171 |             preact: bool = False,
172 |             name: str = ""
173 |     ):
174 |         super().__init__(
175 |             in_size,
176 |             out_size,
177 |             kernel_size,
178 |             stride,
179 |             padding,
180 |             activation,
181 |             bn,
182 |             init,
183 |             conv=nn.Conv2d,
184 |             batch_norm=BatchNorm2d,
185 |             bias=bias,
186 |             preact=preact,
187 |             name=name
188 |         )
189 | 
190 | 
191 | class Conv3d(_ConvBase):
192 | 
193 |     def __init__(
194 |             self,
195 |             in_size: int,
196 |             out_size: int,
197 |             *,
198 |             kernel_size: Tuple[int, int, int] = (1, 1, 1),
199 |             stride: Tuple[int, int, int] = (1, 1, 1),
200 |             padding: Tuple[int, int, int] = (0, 0, 0),
201 |             activation=nn.ReLU(inplace=True),
202 |             bn: bool = False,
203 |             init=nn.init.kaiming_normal_,
204 |             bias: bool = True,
205 |             preact: bool = False,
206 |             name: str = ""
207 |     ):
208 |         super().__init__(
209 |             in_size,
210 |             out_size,
211 |             kernel_size,
212 |             stride,
213 |             padding,
214 |             activation,
215 |             bn,
216 |             init,
217 |             conv=nn.Conv3d,
218 |             batch_norm=BatchNorm3d,
219 |             bias=bias,
220 |             preact=preact,
221 |             name=name
222 |         )
223 | 
224 | 
225 | class FC(nn.Sequential):
226 | 
227 |     def __init__(
228 |             self,
229 |             in_size: int,
230 |             out_size: int,
231 |             *,
232 |             activation=nn.ReLU(inplace=True),
233 |             bn: bool = False,
234 |             init=None,
235 |             preact: bool = False,
236 |             name: str = ""
237 |     ):
238 |         super().__init__()
239 | 
240 |         fc = nn.Linear(in_size, out_size, bias=not bn)
241 |         if init is not None:
242 |             init(fc.weight)
243 |         if not bn:
244 |             nn.init.constant_(fc.bias, 0)
245 | 
246 |         if preact:
247 |             if bn:
248 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
249 | 
250 |             if activation is not None:
251 |                 self.add_module(name + 'activation', activation)
252 | 
253 |         self.add_module(name + 'fc', fc)
254 | 
255 |         if not preact:
256 |             if bn:
257 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
258 | 
259 |             if activation is not None:
260 |                 self.add_module(name + 'activation', activation)
261 | 
262 | def set_bn_momentum_default(bn_momentum):
263 | 
264 |     def fn(m):
265 |         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
266 |             m.momentum = bn_momentum
267 | 
268 |     return fn
269 | 
270 | 
271 | class BNMomentumScheduler(object):
272 | 
273 |     def __init__(
274 |             self, model, bn_lambda, last_epoch=-1,
275 |             setter=set_bn_momentum_default
276 |     ):
277 |         if not isinstance(model, nn.Module):
278 |             raise RuntimeError(
279 |                 "Class '{}' is not a PyTorch nn Module".format(
280 |                     type(model).__name__
281 |                 )
282 |             )
283 | 
284 |         self.model = model
285 |         self.setter = setter
286 |         self.lmbd = bn_lambda
287 | 
288 |         self.step(last_epoch + 1)
289 |         self.last_epoch = last_epoch
290 | 
291 |     def step(self, epoch=None):
292 |         if epoch is None:
293 |             epoch = self.last_epoch + 1
294 | 
295 |         self.last_epoch = epoch
296 |         self.model.apply(self.setter(self.lmbd(epoch)))
297 | 
298 | 
299 | 


--------------------------------------------------------------------------------
/lib/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import os.path as osp
 4 | 
 5 | from setuptools import find_packages, setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | _this_dir = osp.dirname(osp.abspath(__file__))
 9 | _ext_src_root = "_ext_src"
10 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob(
11 |     "{}/src/*.cu".format(_ext_src_root)
12 | )
13 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root))
14 | 
15 | requirements = ["torch>=1.4"]
16 | 
17 | os.environ["TORCH_CUDA_ARCH_LIST"] = "3.7+PTX;5.0;6.0;6.1;6.2;7.0;7.5"
18 | 
19 | exec(open("_version.py").read())
20 | 
21 | setup(
22 |     name='pointnet2',
23 |     version=__version__,
24 |     packages=find_packages(),
25 |     install_requires=requirements,
26 |     ext_modules=[
27 |         CUDAExtension(
28 |             name='pointnet2._ext',
29 |             sources=_ext_sources,
30 |             extra_compile_args={
31 |                 "cxx": ["-O3"],
32 |                 "nvcc": ["-O3", "-Xfatbin", "-compress-all"],
33 |             },
34 |             include_dirs=[osp.join(_this_dir, _ext_src_root, "include")],
35 |         )
36 |     ],
37 |     cmdclass={"build_ext": BuildExtension},
38 |     include_package_data=True,
39 | )


--------------------------------------------------------------------------------
/models/backbone_module.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | import sys
  6 | import os
  7 | 
  8 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder
  9 | from lib.pointnet2.pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
 10 | 
 11 | class Pointnet2Backbone(nn.Module):
 12 |     r"""
 13 |        Backbone network for point cloud feature learning.
 14 |        Based on Pointnet++ single-scale grouping network. 
 15 |         
 16 |        Parameters
 17 |        ----------
 18 |        input_feature_dim: int
 19 |             Number of input channels in the feature descriptor for each point.
 20 |             e.g. 3 for RGB.
 21 |     """
 22 |     def __init__(self, input_feature_dim=0):
 23 |         super().__init__()
 24 | 
 25 |         self.input_feature_dim = input_feature_dim
 26 | 
 27 |         # --------- 4 SET ABSTRACTION LAYERS ---------
 28 |         self.sa1 = PointnetSAModuleVotes(
 29 |                 npoint=2048,
 30 |                 radius=0.2,
 31 |                 nsample=64,
 32 |                 mlp=[input_feature_dim, 64, 64, 128],
 33 |                 use_xyz=True,
 34 |                 normalize_xyz=True
 35 |             )
 36 | 
 37 |         self.sa2 = PointnetSAModuleVotes(
 38 |                 npoint=1024,
 39 |                 radius=0.4,
 40 |                 nsample=32,
 41 |                 mlp=[128, 128, 128, 256],
 42 |                 use_xyz=True,
 43 |                 normalize_xyz=True
 44 |             )
 45 | 
 46 |         self.sa3 = PointnetSAModuleVotes(
 47 |                 npoint=512,
 48 |                 radius=0.8,
 49 |                 nsample=16,
 50 |                 mlp=[256, 128, 128, 256],
 51 |                 use_xyz=True,
 52 |                 normalize_xyz=True
 53 |             )
 54 | 
 55 |         self.sa4 = PointnetSAModuleVotes(
 56 |                 npoint=256,
 57 |                 radius=1.2,
 58 |                 nsample=16,
 59 |                 mlp=[256, 128, 128, 256],
 60 |                 use_xyz=True,
 61 |                 normalize_xyz=True
 62 |             )
 63 | 
 64 |         # --------- 2 FEATURE UPSAMPLING LAYERS --------
 65 |         self.fp1 = PointnetFPModule(mlp=[256+256,256,256])
 66 |         self.fp2 = PointnetFPModule(mlp=[256+256,256,256])
 67 | 
 68 |     def _break_up_pc(self, pc):
 69 |         xyz = pc[..., :3].contiguous()
 70 |         features = pc[..., 3:].transpose(1, 2).contiguous() if pc.size(-1) > 3 else None
 71 | 
 72 |         return xyz, features
 73 | 
 74 |     def forward(self, data_dict):
 75 |         r"""
 76 |             Forward pass of the network
 77 | 
 78 |             Parameters
 79 |             ----------
 80 |             pointcloud: Variable(torch.cuda.FloatTensor)
 81 |                 (B, N, 3 + input_feature_dim) tensor
 82 |                 Point cloud to run predicts on
 83 |                 Each point in the point-cloud MUST
 84 |                 be formated as (x, y, z, features...)
 85 | 
 86 |             Returns
 87 |             ----------
 88 |             data_dict: {XXX_xyz, XXX_features, XXX_inds}
 89 |                 XXX_xyz: float32 Tensor of shape (B,K,3)
 90 |                 XXX_features: float32 Tensor of shape (B,K,D)
 91 |                 XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1]
 92 |         """
 93 |         
 94 |         pointcloud = data_dict["point_clouds"]
 95 | 
 96 |         batch_size = pointcloud.shape[0]
 97 | 
 98 |         xyz, features = self._break_up_pc(pointcloud)
 99 | 
100 |         # --------- 4 SET ABSTRACTION LAYERS ---------
101 |         xyz, features, fps_inds = self.sa1(xyz, features)
102 |         data_dict['sa1_inds'] = fps_inds
103 |         data_dict['sa1_xyz'] = xyz
104 |         data_dict['sa1_features'] = features
105 | 
106 |         xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
107 |         data_dict['sa2_inds'] = fps_inds
108 |         data_dict['sa2_xyz'] = xyz
109 |         data_dict['sa2_features'] = features
110 | 
111 |         xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511
112 |         data_dict['sa3_xyz'] = xyz
113 |         data_dict['sa3_features'] = features
114 | 
115 |         xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255
116 |         data_dict['sa4_xyz'] = xyz
117 |         data_dict['sa4_features'] = features
118 | 
119 |         # --------- 2 FEATURE UPSAMPLING LAYERS --------
120 |         features = self.fp1(data_dict['sa3_xyz'], data_dict['sa4_xyz'], data_dict['sa3_features'], data_dict['sa4_features'])
121 |         features = self.fp2(data_dict['sa2_xyz'], data_dict['sa3_xyz'], data_dict['sa2_features'], features)
122 |         data_dict['fp2_features'] = features
123 |         data_dict['fp2_xyz'] = data_dict['sa2_xyz']
124 |         num_seed = data_dict['fp2_xyz'].shape[1]
125 |         data_dict['fp2_inds'] = data_dict['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
126 |         return data_dict
127 | 
128 | if __name__=='__main__':
129 |     backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda()
130 |     print(backbone_net)
131 |     backbone_net.eval()
132 |     out = backbone_net(torch.rand(16,20000,6).cuda())
133 |     for key in sorted(out.keys()):
134 |         print(key, '\t', out[key].shape)
135 | 


--------------------------------------------------------------------------------
/models/lang_module.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 7 | 
 8 | class LangModule(nn.Module):
 9 |     def __init__(self, num_text_classes, use_lang_classifier=True, use_bidir=False, 
10 |         emb_size=300, hidden_size=256):
11 |         super().__init__() 
12 | 
13 |         self.num_text_classes = num_text_classes
14 |         self.use_lang_classifier = use_lang_classifier
15 |         self.use_bidir = use_bidir
16 | 
17 |         self.gru = nn.GRU(
18 |             input_size=emb_size,
19 |             hidden_size=hidden_size,
20 |             batch_first=True,
21 |             bidirectional=self.use_bidir
22 |         )
23 |         lang_size = hidden_size * 2 if self.use_bidir else hidden_size
24 | 
25 |         # language classifier
26 |         if use_lang_classifier:
27 |             self.lang_cls = nn.Sequential(
28 |                 nn.Linear(lang_size, num_text_classes),
29 |                 nn.Dropout()
30 |             )
31 | 
32 | 
33 |     def forward(self, data_dict):
34 |         """
35 |         encode the input descriptions
36 |         """
37 | 
38 |         word_embs = data_dict["lang_feat"]
39 |         lang_feat = pack_padded_sequence(word_embs, data_dict["lang_len"], batch_first=True, enforce_sorted=False)
40 |     
41 |         # encode description
42 |         _, lang_last = self.gru(lang_feat)
43 |         lang_last = lang_last.permute(1, 0, 2).contiguous().flatten(start_dim=1) # batch_size, hidden_size * num_dir
44 | 
45 |         # store the encoded language features
46 |         data_dict["lang_emb"] = lang_last # B, hidden_size
47 |         
48 |         # classify
49 |         if self.use_lang_classifier:
50 |             data_dict["lang_scores"] = self.lang_cls(data_dict["lang_emb"])
51 | 
52 |         return data_dict
53 | 
54 | 


--------------------------------------------------------------------------------
/models/match_module.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class MatchModule(nn.Module):
 5 |     def __init__(self, num_proposals=256, lang_size=256, hidden_size=128):
 6 |         super().__init__() 
 7 | 
 8 |         self.num_proposals = num_proposals
 9 |         self.lang_size = lang_size
10 |         self.hidden_size = hidden_size
11 |         
12 |         self.fuse = nn.Sequential(
13 |             nn.Conv1d(self.lang_size + 128, hidden_size, 1),
14 |             nn.ReLU()
15 |         )
16 |         # self.match = nn.Conv1d(hidden_size, 1, 1)
17 |         self.match = nn.Sequential(
18 |             nn.Conv1d(hidden_size, hidden_size, 1),
19 |             nn.ReLU(),
20 |             nn.BatchNorm1d(hidden_size),
21 |             nn.Conv1d(hidden_size, hidden_size, 1),
22 |             nn.ReLU(),
23 |             nn.BatchNorm1d(hidden_size),
24 |             nn.Conv1d(hidden_size, 1, 1)
25 |         )
26 | 
27 |     def forward(self, data_dict):
28 |         """
29 |         Args:
30 |             xyz: (B,K,3)
31 |             features: (B,C,K)
32 |         Returns:
33 |             scores: (B,num_proposal,2+3+NH*2+NS*4) 
34 |         """
35 | 
36 |         # unpack outputs from detection branch
37 |         features = data_dict['aggregated_vote_features'] # batch_size, num_proposal, 128
38 |         objectness_masks = data_dict['objectness_scores'].max(2)[1].float().unsqueeze(2) # batch_size, num_proposals, 1
39 | 
40 |         # unpack outputs from language branch
41 |         lang_feat = data_dict["lang_emb"] # batch_size, lang_size
42 |         lang_feat = lang_feat.unsqueeze(1).repeat(1, self.num_proposals, 1) # batch_size, num_proposals, lang_size
43 | 
44 |         # fuse
45 |         features = torch.cat([features, lang_feat], dim=-1) # batch_size, num_proposals, 128 + lang_size
46 |         features = features.permute(0, 2, 1).contiguous() # batch_size, 128 + lang_size, num_proposals
47 | 
48 |         # fuse features
49 |         features = self.fuse(features) # batch_size, hidden_size, num_proposals
50 |         
51 |         # mask out invalid proposals
52 |         objectness_masks = objectness_masks.permute(0, 2, 1).contiguous() # batch_size, 1, num_proposals
53 |         features = features * objectness_masks
54 | 
55 |         # match
56 |         confidences = self.match(features).squeeze(1) # batch_size, num_proposals
57 |                 
58 |         data_dict["cluster_ref"] = confidences
59 | 
60 |         return data_dict
61 | 


--------------------------------------------------------------------------------
/models/proposal_module.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Modified from: https://github.com/facebookresearch/votenet/blob/master/models/proposal_module.py
  3 | """
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import numpy as np
  9 | import os
 10 | import sys
 11 | 
 12 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder
 13 | import lib.pointnet2.pointnet2_utils
 14 | from lib.pointnet2.pointnet2_modules import PointnetSAModuleVotes
 15 | 
 16 | class ProposalModule(nn.Module):
 17 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256):
 18 |         super().__init__() 
 19 | 
 20 |         self.num_class = num_class
 21 |         self.num_heading_bin = num_heading_bin
 22 |         self.num_size_cluster = num_size_cluster
 23 |         self.mean_size_arr = mean_size_arr
 24 |         self.num_proposal = num_proposal
 25 |         self.sampling = sampling
 26 |         self.seed_feat_dim = seed_feat_dim
 27 | 
 28 |         # Vote clustering
 29 |         self.vote_aggregation = PointnetSAModuleVotes( 
 30 |             npoint=self.num_proposal,
 31 |             radius=0.3,
 32 |             nsample=16,
 33 |             mlp=[self.seed_feat_dim, 128, 128, 128],
 34 |             use_xyz=True,
 35 |             normalize_xyz=True
 36 |         )
 37 |             
 38 |         # Object proposal/detection
 39 |         # Objectness scores (2), center residual (3),
 40 |         # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4)
 41 |         self.proposal = nn.Sequential(
 42 |             nn.Conv1d(128,128,1, bias=False),
 43 |             nn.BatchNorm1d(128),
 44 |             nn.ReLU(),
 45 |             nn.Conv1d(128,128,1, bias=False),
 46 |             nn.BatchNorm1d(128),
 47 |             nn.ReLU(),
 48 |             nn.Conv1d(128,2+3+num_heading_bin*2+num_size_cluster*4+self.num_class,1)
 49 |         )
 50 | 
 51 |     def forward(self, xyz, features, data_dict):
 52 |         """
 53 |         Args:
 54 |             xyz: (B,K,3)
 55 |             features: (B,C,K)
 56 |         Returns:
 57 |             scores: (B,num_proposal,2+3+NH*2+NS*4) 
 58 |         """
 59 | 
 60 |         # Farthest point sampling (FPS) on votes
 61 |         xyz, features, fps_inds = self.vote_aggregation(xyz, features)
 62 |         
 63 |         sample_inds = fps_inds
 64 | 
 65 |         data_dict['aggregated_vote_xyz'] = xyz # (batch_size, num_proposal, 3)
 66 |         data_dict['aggregated_vote_features'] = features.permute(0, 2, 1).contiguous() # (batch_size, num_proposal, 128)
 67 |         data_dict['aggregated_vote_inds'] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal
 68 | 
 69 |         # --------- PROPOSAL GENERATION ---------
 70 |         net = self.proposal(features)
 71 |         data_dict = self.decode_scores(net, data_dict, self.num_class, self.num_heading_bin, self.num_size_cluster, self.mean_size_arr)
 72 | 
 73 |         return data_dict
 74 | 
 75 |     def decode_scores(self, net, data_dict, num_class, num_heading_bin, num_size_cluster, mean_size_arr):
 76 |         """
 77 |         decode the predicted parameters for the bounding boxes
 78 | 
 79 |         """
 80 |         net_transposed = net.transpose(2,1).contiguous() # (batch_size, 1024, ..)
 81 |         batch_size = net_transposed.shape[0]
 82 |         num_proposal = net_transposed.shape[1]
 83 | 
 84 |         objectness_scores = net_transposed[:,:,0:2]
 85 | 
 86 |         base_xyz = data_dict['aggregated_vote_xyz'] # (batch_size, num_proposal, 3)
 87 |         center = base_xyz + net_transposed[:,:,2:5] # (batch_size, num_proposal, 3)
 88 | 
 89 |         heading_scores = net_transposed[:,:,5:5+num_heading_bin]
 90 |         heading_residuals_normalized = net_transposed[:,:,5+num_heading_bin:5+num_heading_bin*2]
 91 |         
 92 |         size_scores = net_transposed[:,:,5+num_heading_bin*2:5+num_heading_bin*2+num_size_cluster]
 93 |         size_residuals_normalized = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster:5+num_heading_bin*2+num_size_cluster*4].view([batch_size, num_proposal, num_size_cluster, 3]) # Bxnum_proposalxnum_size_clusterx3
 94 |         
 95 |         sem_cls_scores = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster*4:] # Bxnum_proposalx10
 96 | 
 97 |         # store
 98 |         data_dict['objectness_scores'] = objectness_scores
 99 |         data_dict['center'] = center
100 |         data_dict['heading_scores'] = heading_scores # Bxnum_proposalxnum_heading_bin
101 |         data_dict['heading_residuals_normalized'] = heading_residuals_normalized # Bxnum_proposalxnum_heading_bin (should be -1 to 1)
102 |         data_dict['heading_residuals'] = heading_residuals_normalized * (np.pi/num_heading_bin) # Bxnum_proposalxnum_heading_bin
103 |         data_dict['size_scores'] = size_scores
104 |         data_dict['size_residuals_normalized'] = size_residuals_normalized
105 |         data_dict['size_residuals'] = size_residuals_normalized * torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0)
106 |         data_dict['sem_cls_scores'] = sem_cls_scores
107 | 
108 |         return data_dict
109 | 
110 | 


--------------------------------------------------------------------------------
/models/refnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import sys
  5 | import os
  6 | 
  7 | sys.path.append(os.path.join(os.getcwd(), "lib")) # HACK add the lib folder
  8 | from models.backbone_module import Pointnet2Backbone
  9 | from models.voting_module import VotingModule
 10 | from models.proposal_module import ProposalModule
 11 | from models.lang_module import LangModule
 12 | from models.match_module import MatchModule
 13 | 
 14 | class RefNet(nn.Module):
 15 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 
 16 |     input_feature_dim=0, num_proposal=128, vote_factor=1, sampling="vote_fps",
 17 |     use_lang_classifier=True, use_bidir=False, no_reference=False,
 18 |     emb_size=300, hidden_size=256):
 19 |         super().__init__()
 20 | 
 21 |         self.num_class = num_class
 22 |         self.num_heading_bin = num_heading_bin
 23 |         self.num_size_cluster = num_size_cluster
 24 |         self.mean_size_arr = mean_size_arr
 25 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 26 |         self.input_feature_dim = input_feature_dim
 27 |         self.num_proposal = num_proposal
 28 |         self.vote_factor = vote_factor
 29 |         self.sampling = sampling
 30 |         self.use_lang_classifier = use_lang_classifier
 31 |         self.use_bidir = use_bidir      
 32 |         self.no_reference = no_reference
 33 | 
 34 | 
 35 |         # --------- PROPOSAL GENERATION ---------
 36 |         # Backbone point feature learning
 37 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 38 | 
 39 |         # Hough voting
 40 |         self.vgen = VotingModule(self.vote_factor, 256)
 41 | 
 42 |         # Vote aggregation and object proposal
 43 |         self.proposal = ProposalModule(num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling)
 44 | 
 45 |         if not no_reference:
 46 |             # --------- LANGUAGE ENCODING ---------
 47 |             # Encode the input descriptions into vectors
 48 |             # (including attention and language classification)
 49 |             self.lang = LangModule(num_class, use_lang_classifier, use_bidir, emb_size, hidden_size)
 50 | 
 51 |             # --------- PROPOSAL MATCHING ---------
 52 |             # Match the generated proposals and select the most confident ones
 53 |             self.match = MatchModule(num_proposals=num_proposal, lang_size=(1 + int(self.use_bidir)) * hidden_size)
 54 | 
 55 |     def forward(self, data_dict):
 56 |         """ Forward pass of the network
 57 | 
 58 |         Args:
 59 |             data_dict: dict
 60 |                 {
 61 |                     point_clouds, 
 62 |                     lang_feat
 63 |                 }
 64 | 
 65 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 66 |                     (B, N, 3 + input_channels) tensor
 67 |                     Point cloud to run predicts on
 68 |                     Each point in the point-cloud MUST
 69 |                     be formated as (x, y, z, features...)
 70 |         Returns:
 71 |             end_points: dict
 72 |         """
 73 | 
 74 |         #######################################
 75 |         #                                     #
 76 |         #           DETECTION BRANCH          #
 77 |         #                                     #
 78 |         #######################################
 79 | 
 80 |         # --------- HOUGH VOTING ---------
 81 |         data_dict = self.backbone_net(data_dict)
 82 |                 
 83 |         # --------- HOUGH VOTING ---------
 84 |         xyz = data_dict["fp2_xyz"]
 85 |         features = data_dict["fp2_features"]
 86 |         data_dict["seed_inds"] = data_dict["fp2_inds"]
 87 |         data_dict["seed_xyz"] = xyz
 88 |         data_dict["seed_features"] = features
 89 |         
 90 |         xyz, features = self.vgen(xyz, features)
 91 |         features_norm = torch.norm(features, p=2, dim=1)
 92 |         features = features.div(features_norm.unsqueeze(1))
 93 |         data_dict["vote_xyz"] = xyz
 94 |         data_dict["vote_features"] = features
 95 | 
 96 |         # --------- PROPOSAL GENERATION ---------
 97 |         data_dict = self.proposal(xyz, features, data_dict)
 98 | 
 99 |         if not self.no_reference:
100 |             #######################################
101 |             #                                     #
102 |             #           LANGUAGE BRANCH           #
103 |             #                                     #
104 |             #######################################
105 | 
106 |             # --------- LANGUAGE ENCODING ---------
107 |             data_dict = self.lang(data_dict)
108 | 
109 |             #######################################
110 |             #                                     #
111 |             #          PROPOSAL MATCHING          #
112 |             #                                     #
113 |             #######################################
114 | 
115 |             # --------- PROPOSAL MATCHING ---------
116 |             data_dict = self.match(data_dict)
117 | 
118 |         return data_dict
119 | 


--------------------------------------------------------------------------------
/models/voting_module.py:
--------------------------------------------------------------------------------
 1 | ''' 
 2 | Voting module: generate votes from XYZ and features of seed points.
 3 | 
 4 | Modified from: https://github.com/facebookresearch/votenet/blob/master/models/voting_module.py
 5 | '''
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | class VotingModule(nn.Module):
12 |     def __init__(self, vote_factor, seed_feature_dim):
13 |         """ Votes generation from seed point features.
14 | 
15 |         Args:
16 |             vote_facotr: int
17 |                 number of votes generated from each seed point
18 |             seed_feature_dim: int
19 |                 number of channels of seed point features
20 |             vote_feature_dim: int
21 |                 number of channels of vote features
22 |         """
23 |         super().__init__()
24 |         self.vote_factor = vote_factor
25 |         self.in_dim = seed_feature_dim
26 |         self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim
27 |         self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1)
28 |         self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1)
29 |         self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1)
30 |         self.bn1 = torch.nn.BatchNorm1d(self.in_dim)
31 |         self.bn2 = torch.nn.BatchNorm1d(self.in_dim)
32 |         
33 |     def forward(self, seed_xyz, seed_features):
34 |         """ Forward pass.
35 | 
36 |         Arguments:
37 |             seed_xyz: (batch_size, num_seed, 3) Pytorch tensor
38 |             seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor
39 |         Returns:
40 |             vote_xyz: (batch_size, num_seed*vote_factor, 3)
41 |             vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor)
42 |         """
43 |         batch_size = seed_xyz.shape[0]
44 |         num_seed = seed_xyz.shape[1]
45 |         num_vote = num_seed*self.vote_factor
46 |         net = F.relu(self.bn1(self.conv1(seed_features))) 
47 |         net = F.relu(self.bn2(self.conv2(net))) 
48 |         net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed)
49 |                 
50 |         net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim).contiguous()
51 |         offset = net[:,:,:,0:3]
52 |         vote_xyz = seed_xyz.unsqueeze(2) + offset
53 |         vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3)
54 |         
55 |         residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim)
56 |         vote_features = seed_features.transpose(2,1).unsqueeze(2).contiguous() + residual_features
57 |         vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim)
58 |         vote_features = vote_features.transpose(2,1).contiguous()
59 |         
60 |         return vote_xyz, vote_features
61 |  
62 | if __name__=='__main__':
63 |     net = VotingModule(2, 256).cuda()
64 |     xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda())
65 |     print('xyz', xyz.shape)
66 |     print('features', features.shape)
67 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | plyfile
2 | opencv-python
3 | trimesh==2.35.39
4 | tensorboardX
5 | easydict
6 | tqdm
7 | h5py
8 | matplotlib


--------------------------------------------------------------------------------
/scripts/compute_multiview_features.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import math
  4 | import torch
  5 | import argparse
  6 | import numpy as np
  7 | import torch.nn as nn
  8 | import torchvision.transforms as transforms
  9 | from torch.utils.data import Dataset, DataLoader
 10 | from imageio import imread
 11 | from PIL import Image
 12 | from tqdm import tqdm
 13 | 
 14 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 15 | from lib.enet import create_enet_for_3d
 16 | from lib.config import CONF
 17 | 
 18 | # scannet data
 19 | # NOTE: read only!
 20 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES
 21 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file
 22 | SCANNET_LIST = CONF.SCANNETV2_LIST
 23 | 
 24 | ENET_PATH = CONF.ENET_WEIGHTS
 25 | ENET_FEATURE_ROOT = CONF.ENET_FEATURES_SUBROOT
 26 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH
 27 | 
 28 | class EnetDataset(Dataset):
 29 |     def __init__(self):
 30 |         self._init_resources()
 31 |     
 32 |     def __len__(self):
 33 |         return len(self.data)
 34 | 
 35 |     def __getitem__(self, idx):
 36 |         scene_id, frame_id = self.data[idx]
 37 |         image = self._load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256])
 38 | 
 39 |         return scene_id, frame_id, image
 40 | 
 41 |     def _init_resources(self):
 42 |         self._get_scene_list()
 43 |         self.data = []
 44 |         for scene_id in self.scene_list:
 45 |             frame_list = sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color")), key=lambda x:int(x.split(".")[0]))
 46 |             for frame_file in frame_list:
 47 |                 self.data.append(
 48 |                     (
 49 |                         scene_id,
 50 |                         int(frame_file.split(".")[0])
 51 |                     )
 52 |                 )
 53 |     
 54 |     def _get_scene_list(self):
 55 |         with open(SCANNET_LIST, 'r') as f:
 56 |             self.scene_list = sorted(list(set(f.read().splitlines())))
 57 | 
 58 |     def _resize_crop_image(self, image, new_image_dims):
 59 |         image_dims = [image.shape[1], image.shape[0]]
 60 |         if image_dims != new_image_dims:
 61 |             resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1])))
 62 |             image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image))
 63 |             image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image)
 64 |         
 65 |         return np.array(image)
 66 | 
 67 |     def _load_image(self, file, image_dims):
 68 |         image = imread(file)
 69 |         # preprocess
 70 |         image = self._resize_crop_image(image, image_dims)
 71 |         if len(image.shape) == 3: # color image
 72 |             image = np.transpose(image, [2, 0, 1])  # move feature to front
 73 |             image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0))
 74 |         elif len(image.shape) == 2: # label image
 75 |             image = np.expand_dims(image, 0)
 76 |         else:
 77 |             raise ValueError
 78 | 
 79 |         return image
 80 | 
 81 |     def collate_fn(self, data):
 82 |         scene_ids, frame_ids, images = zip(*data)
 83 |         scene_ids = list(scene_ids)
 84 |         frame_ids = list(frame_ids)
 85 |         images = torch.stack(images, 0).cuda()
 86 | 
 87 |         return scene_ids, frame_ids, images
 88 | 
 89 | def create_enet():
 90 |     enet_fixed, enet_trainable, _ = create_enet_for_3d(41, ENET_PATH, 21)
 91 |     enet = nn.Sequential(
 92 |         enet_fixed,
 93 |         enet_trainable
 94 |     ).cuda()
 95 |     enet.eval()
 96 |     for param in enet.parameters():
 97 |         param.requires_grad = False
 98 | 
 99 |     return enet
100 | 
101 | if __name__ == "__main__":
102 |     parser = argparse.ArgumentParser()
103 |     parser.add_argument('--gpu', type=str, help='gpu', default='0')
104 |     args = parser.parse_args()
105 | 
106 |     # setting
107 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
108 |     os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
109 | 
110 |     # init
111 |     dataset = EnetDataset()
112 |     dataloader = DataLoader(dataset, batch_size=256, shuffle=False, collate_fn=dataset.collate_fn)
113 |     enet = create_enet()
114 | 
115 |     # feed
116 |     print("extracting multiview features from ENet...")
117 |     for scene_ids, frame_ids, images in tqdm(dataloader):
118 |         features = enet(images)
119 |         batch_size = images.shape[0]
120 |         for batch_id in range(batch_size):
121 |             os.makedirs(ENET_FEATURE_ROOT.format(scene_ids[batch_id]), exist_ok=True)
122 |             np.save(ENET_FEATURE_PATH.format(scene_ids[batch_id], frame_ids[batch_id]), features[batch_id].cpu().numpy())
123 | 
124 |     print("done!")
125 | 
126 | 


--------------------------------------------------------------------------------
/scripts/project_multiview_features.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import torch
  5 | import torch.nn as nn
  6 | import argparse
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | from plyfile import PlyData, PlyElement
 10 | import math
 11 | from imageio import imread
 12 | from PIL import Image
 13 | import torchvision.transforms as transforms
 14 | 
 15 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 16 | from lib.config import CONF
 17 | from lib.projection import ProjectionHelper
 18 | 
 19 | SCANNET_LIST = CONF.SCANNETV2_LIST
 20 | SCANNET_DATA = CONF.PATH.SCANNET_DATA
 21 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES
 22 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file
 23 | 
 24 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH
 25 | ENET_FEATURE_DATABASE = CONF.MULTIVIEW
 26 | 
 27 | # projection
 28 | INTRINSICS = [[37.01983, 0, 20, 0],[0, 38.52470, 15.5, 0],[0, 0, 1, 0],[0, 0, 0, 1]]
 29 | PROJECTOR = ProjectionHelper(INTRINSICS, 0.1, 4.0, [41, 32], 0.05)
 30 | 
 31 | def get_scene_list():
 32 |     with open(SCANNET_LIST, 'r') as f:
 33 |         return sorted(list(set(f.read().splitlines())))
 34 | 
 35 | def to_tensor(arr):
 36 |     return torch.Tensor(arr).cuda()
 37 | 
 38 | def resize_crop_image(image, new_image_dims):
 39 |     image_dims = [image.shape[1], image.shape[0]]
 40 |     if image_dims == new_image_dims:
 41 |         return image
 42 |     resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1])))
 43 |     image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image))
 44 |     image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image)
 45 |     image = np.array(image)
 46 |     
 47 |     return image
 48 | 
 49 | def load_image(file, image_dims):
 50 |     image = imread(file)
 51 |     # preprocess
 52 |     image = resize_crop_image(image, image_dims)
 53 |     if len(image.shape) == 3: # color image
 54 |         image =  np.transpose(image, [2, 0, 1])  # move feature to front
 55 |         image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0))
 56 |     elif len(image.shape) == 2: # label image
 57 | #         image = np.expand_dims(image, 0)
 58 |         pass
 59 |     else:
 60 |         raise
 61 |         
 62 |     return image
 63 | 
 64 | def load_pose(filename):
 65 |     lines = open(filename).read().splitlines()
 66 |     assert len(lines) == 4
 67 |     lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)]
 68 | 
 69 |     return np.asarray(lines).astype(np.float32)
 70 | 
 71 | def load_depth(file, image_dims):
 72 |     depth_image = imread(file)
 73 |     # preprocess
 74 |     depth_image = resize_crop_image(depth_image, image_dims)
 75 |     depth_image = depth_image.astype(np.float32) / 1000.0
 76 | 
 77 |     return depth_image
 78 | 
 79 | def get_scene_data(scene_list):
 80 |     scene_data = {}
 81 |     for scene_id in scene_list:
 82 |         # load the original vertices, not the axis-aligned ones
 83 |         scene_data[scene_id] = np.load(os.path.join(SCANNET_DATA, scene_id)+"_vert.npy")[:, :3]
 84 |     
 85 |     return scene_data
 86 | 
 87 | def compute_projection(points, depth, camera_to_world):
 88 |     """
 89 |         :param points: tensor containing all points of the point cloud (num_points, 3)
 90 |         :param depth: depth map (size: proj_image)
 91 |         :param camera_to_world: camera pose (4, 4)
 92 |         
 93 |         :return indices_3d (array with point indices that correspond to a pixel),
 94 |         :return indices_2d (array with pixel indices that correspond to a point)
 95 | 
 96 |         note:
 97 |             the first digit of indices represents the number of relevant points
 98 |             the rest digits are for the projection mapping
 99 |     """
100 |     num_points = points.shape[0]
101 |     num_frames = depth.shape[0]
102 |     indices_3ds = torch.zeros(num_frames, num_points + 1).long().cuda()
103 |     indices_2ds = torch.zeros(num_frames, num_points + 1).long().cuda()
104 | 
105 |     for i in range(num_frames):
106 |         indices = PROJECTOR.compute_projection(to_tensor(points), to_tensor(depth[i]), to_tensor(camera_to_world[i]))
107 |         if indices:
108 |             indices_3ds[i] = indices[0].long()
109 |             indices_2ds[i] = indices[1].long()
110 |             print("found {} mappings in {} points from frame {}".format(indices_3ds[i][0], num_points, i))
111 |         
112 |     return indices_3ds, indices_2ds
113 | 
114 | if __name__ == "__main__":
115 |     parser = argparse.ArgumentParser()
116 |     parser.add_argument('--gpu', type=str, help='gpu', default='0')
117 |     parser.add_argument("--maxpool", action="store_true", help="use max pooling to aggregate features \
118 |          (use majority voting in label projection mode)")
119 |     args = parser.parse_args()
120 | 
121 |     # setting
122 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
123 |     os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
124 | 
125 |     scene_list = get_scene_list()
126 |     scene_data = get_scene_data(scene_list)
127 |     with h5py.File(ENET_FEATURE_DATABASE, "w", libver="latest") as database:
128 |         print("projecting multiview features to point cloud...")
129 |         for scene_id in scene_list:
130 |             print("processing {}...".format(scene_id))
131 |             scene = scene_data[scene_id]
132 |             # load frames
133 |             frame_list = list(map(lambda x: x.split(".")[0], sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color")))))
134 |             scene_images = np.zeros((len(frame_list), 3, 256, 328))
135 |             scene_depths = np.zeros((len(frame_list), 32, 41))
136 |             scene_poses = np.zeros((len(frame_list), 4, 4))
137 |             for i, frame_id in enumerate(frame_list):
138 |                 scene_images[i] = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256])
139 |                 scene_depths[i] = load_depth(SCANNET_FRAME_PATH.format(scene_id, "depth", "{}.png".format(frame_id)), [41, 32])
140 |                 scene_poses[i] = load_pose(SCANNET_FRAME_PATH.format(scene_id, "pose", "{}.txt".format(frame_id)))
141 | 
142 |             # compute projections for each chunk
143 |             projection_3d, projection_2d = compute_projection(scene, scene_depths, scene_poses)
144 |             
145 |             # compute valid projections
146 |             projections = []
147 |             for i in range(projection_3d.shape[0]):
148 |                 num_valid = projection_3d[i, 0]
149 |                 if num_valid == 0:
150 |                     continue
151 | 
152 |                 projections.append((frame_list[i], projection_3d[i], projection_2d[i]))
153 | 
154 |             # # project
155 |             # point_features = to_tensor(scene).new(scene.shape[0], 128).fill_(0)
156 |             # for i, projection in enumerate(projections):
157 |             #     frame_id = projection[0]
158 |             #     projection_3d = projection[1]
159 |             #     projection_2d = projection[2]
160 |             #     feat = to_tensor(np.load(ENET_FEATURE_PATH.format(scene_id, frame_id)))
161 |             #     proj_feat = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0)
162 |             #     if i == 0:
163 |             #         point_features = proj_feat
164 |             #     else:
165 |             #         mask = ((point_features == 0).sum(1) == 128).nonzero().squeeze(1)
166 |             #         point_features[mask] = proj_feat[mask]
167 | 
168 |             # project
169 |             point_features = to_tensor(scene).new(scene.shape[0], 128).fill_(0)
170 |             for i, projection in enumerate(projections):
171 |                 frame_id = projection[0]
172 |                 projection_3d = projection[1]
173 |                 projection_2d = projection[2]
174 |                 feat = to_tensor(np.load(ENET_FEATURE_PATH.format(scene_id, frame_id)))
175 |                 
176 |                 proj_feat = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0)
177 |                 
178 |                 if args.maxpool:
179 |                     # only apply max pooling on the overlapping points
180 |                     # find out the points that are covered in projection
181 |                     feat_mask = ((proj_feat == 0).sum(1) != 128).bool()
182 |                     # find out the points that are not filled with features
183 |                     point_mask = ((point_features == 0).sum(1) == 128).bool()
184 | 
185 |                     # for the points that are not filled with features
186 |                     # and are covered in projection, 
187 |                     # simply fill those points with projected features
188 |                     mask = point_mask * feat_mask
189 |                     point_features[mask] = proj_feat[mask]
190 | 
191 |                     # for the points that have already been filled with features
192 |                     # and are covered in projection, 
193 |                     # apply max pooling first and then fill with pooled values
194 |                     mask = ~point_mask * feat_mask
195 |                     point_features[mask] = torch.max(point_features[mask], proj_feat[mask])
196 |                 else:
197 |                     if i == 0:
198 |                         point_features = proj_feat
199 |                     else:
200 |                         mask = (point_features == 0).sum(1) == 128
201 |                         point_features[mask] = proj_feat[mask]
202 | 
203 |             # save
204 |             database.create_dataset(scene_id, data=point_features.cpu().numpy())
205 | 
206 |     print("done!")
207 | 
208 |     
209 | 


--------------------------------------------------------------------------------
/scripts/project_multiview_labels.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import math
  5 | import argparse
  6 | import torch
  7 | import torch.nn as nn
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torchvision.transforms as transforms
 11 | 
 12 | from imageio import imread
 13 | from PIL import Image
 14 | from tqdm import tqdm
 15 | from plyfile import PlyData, PlyElement
 16 | from collections import Counter
 17 | 
 18 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 19 | from lib.config import CONF
 20 | from lib.projection import ProjectionHelper
 21 | from lib.enet import create_enet_for_3d
 22 | 
 23 | SCANNET_LIST = CONF.SCANNETV2_LIST
 24 | SCANNET_DATA = CONF.PATH.SCANNET_DATA
 25 | SCANNET_FRAME_ROOT = CONF.SCANNET_FRAMES
 26 | SCANNET_FRAME_PATH = os.path.join(SCANNET_FRAME_ROOT, "{}") # name of the file
 27 | 
 28 | ENET_FEATURE_PATH = CONF.ENET_FEATURES_PATH
 29 | ENET_FEATURE_DATABASE = CONF.MULTIVIEW
 30 | 
 31 | # projection
 32 | INTRINSICS = [[37.01983, 0, 20, 0],[0, 38.52470, 15.5, 0],[0, 0, 1, 0],[0, 0, 0, 1]]
 33 | PROJECTOR = ProjectionHelper(INTRINSICS, 0.1, 4.0, [41, 32], 0.05)
 34 | 
 35 | ENET_PATH = CONF.ENET_WEIGHTS
 36 | ENET_GT_PATH = SCANNET_FRAME_PATH
 37 | 
 38 | NYU40_LABELS = CONF.NYU40_LABELS
 39 | SCANNET_LABELS = ['unannotated', 'wall', 'floor', 'chair', 'table', 'desk', 'bed', 'bookshelf', 'sofa', 'sink', 'bathtub', 'toilet', 'curtain', 'counter', 'door', 'window', 'shower curtain', 'refridgerator', 'picture', 'cabinet', 'otherfurniture']
 40 | 
 41 | PC_LABEL_ROOT = os.path.join(CONF.PATH.OUTPUT, "projections")
 42 | PC_LABEL_PATH = os.path.join(PC_LABEL_ROOT, "{}.ply")
 43 | 
 44 | def get_nyu40_labels():
 45 |     labels = ["unannotated"]
 46 |     labels += pd.read_csv(NYU40_LABELS)["nyu40class"].tolist()
 47 |     
 48 |     return labels
 49 | 
 50 | def get_prediction_to_raw():
 51 |     labels = get_nyu40_labels()
 52 |     mapping = {i: label for i, label in enumerate(labels)}
 53 | 
 54 |     return mapping
 55 | 
 56 | def get_nyu_to_scannet():
 57 |     nyu_idx_to_nyu_label = get_prediction_to_raw()
 58 |     scannet_label_to_scannet_idx = {label: i for i, label in enumerate(SCANNET_LABELS)}
 59 | 
 60 |     # mapping
 61 |     nyu_to_scannet = {}
 62 |     for nyu_idx in range(41):
 63 |         nyu_label = nyu_idx_to_nyu_label[nyu_idx]
 64 |         if nyu_label in scannet_label_to_scannet_idx.keys():
 65 |             scannet_idx = scannet_label_to_scannet_idx[nyu_label]
 66 |         else:
 67 |             scannet_idx = 0
 68 |         nyu_to_scannet[nyu_idx] = scannet_idx
 69 | 
 70 |     return nyu_to_scannet
 71 | 
 72 | def create_color_palette():
 73 |     return {
 74 |         "unannotated": (0, 0, 0),
 75 |         "floor": (152, 223, 138),
 76 |         "wall": (174, 199, 232),
 77 |         "cabinet": (31, 119, 180),
 78 |         "bed": (255, 187, 120),
 79 |         "chair": (188, 189, 34),
 80 |         "sofa": (140, 86, 75),
 81 |         "table": (255, 152, 150),
 82 |         "door": (214, 39, 40),
 83 |         "window": (197, 176, 213),
 84 |         "bookshelf": (148, 103, 189),
 85 |         "picture": (196, 156, 148),
 86 |         "counter": (23, 190, 207),
 87 |         "desk": (247, 182, 210),
 88 |         "curtain": (219, 219, 141),
 89 |         "refridgerator": (255, 127, 14),
 90 |         "bathtub": (227, 119, 194),
 91 |         "shower curtain": (158, 218, 229),
 92 |         "toilet": (44, 160, 44),
 93 |         "sink": (112, 128, 144),
 94 |         "otherfurniture": (82, 84, 163),
 95 |     }
 96 | 
 97 | def get_scene_list(args):
 98 |     if args.scene_id == "-1":
 99 |         with open(SCANNET_LIST, 'r') as f:
100 |             return sorted(list(set(f.read().splitlines())))
101 |     else:
102 |         return [args.scene_id]
103 | 
104 | def to_tensor(arr):
105 |     return torch.Tensor(arr).cuda()
106 | 
107 | def resize_crop_image(image, new_image_dims):
108 |     image_dims = [image.shape[1], image.shape[0]]
109 |     if image_dims == new_image_dims:
110 |         return image
111 |     resize_width = int(math.floor(new_image_dims[1] * float(image_dims[0]) / float(image_dims[1])))
112 |     image = transforms.Resize([new_image_dims[1], resize_width], interpolation=Image.NEAREST)(Image.fromarray(image))
113 |     image = transforms.CenterCrop([new_image_dims[1], new_image_dims[0]])(image)
114 |     image = np.array(image)
115 |     
116 |     return image
117 | 
118 | def load_image(file, image_dims):
119 |     image = imread(file)
120 |     # preprocess
121 |     image = resize_crop_image(image, image_dims)
122 |     if len(image.shape) == 3: # color image
123 |         image =  np.transpose(image, [2, 0, 1])  # move feature to front
124 |         image = transforms.Normalize(mean=[0.496342, 0.466664, 0.440796], std=[0.277856, 0.28623, 0.291129])(torch.Tensor(image.astype(np.float32) / 255.0))
125 |     elif len(image.shape) == 2: # label image
126 | #         image = np.expand_dims(image, 0)
127 |         pass
128 |     else:
129 |         raise
130 |         
131 |     return image
132 | 
133 | def load_pose(filename):
134 |     lines = open(filename).read().splitlines()
135 |     assert len(lines) == 4
136 |     lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)]
137 | 
138 |     return np.asarray(lines).astype(np.float32)
139 | 
140 | def load_depth(file, image_dims):
141 |     depth_image = imread(file)
142 |     # preprocess
143 |     depth_image = resize_crop_image(depth_image, image_dims)
144 |     depth_image = depth_image.astype(np.float32) / 1000.0
145 | 
146 |     return depth_image
147 | 
148 | def visualize(coords, labels):
149 |     palette = create_color_palette()
150 |     nyu_to_scannet = get_nyu_to_scannet()
151 |     vertex = []
152 |     for i in range(coords.shape[0]):
153 |         vertex.append(
154 |             (
155 |                 coords[i][0],
156 |                 coords[i][1],
157 |                 coords[i][2],
158 |                 palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][0],
159 |                 palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][1],
160 |                 palette[SCANNET_LABELS[nyu_to_scannet[labels[i]]]][2]
161 |             )
162 |         )
163 |     
164 |     vertex = np.array(
165 |         vertex,
166 |         dtype=[
167 |             ("x", np.dtype("float32")), 
168 |             ("y", np.dtype("float32")), 
169 |             ("z", np.dtype("float32")),
170 |             ("red", np.dtype("uint8")),
171 |             ("green", np.dtype("uint8")),
172 |             ("blue", np.dtype("uint8"))
173 |         ]
174 |     )
175 | 
176 |     output_pc = PlyElement.describe(vertex, "vertex")
177 |     output_pc = PlyData([output_pc])
178 |     os.makedirs(PC_LABEL_ROOT, exist_ok=True)
179 |     output_pc.write(PC_LABEL_PATH.format(args.scene_id))
180 | 
181 | def get_scene_data(scene_list):
182 |     scene_data = {}
183 |     for scene_id in scene_list:
184 |         scene_data[scene_id] = {}
185 |         scene_data[scene_id] = np.load(os.path.join(SCANNET_DATA, scene_id)+"_vert.npy")[:, :3]
186 |     
187 |     return scene_data
188 | 
189 | def compute_projection(points, depth, camera_to_world):
190 |     """
191 |         :param points: tensor containing all points of the point cloud (num_points, 3)
192 |         :param depth: depth map (size: proj_image)
193 |         :param camera_to_world: camera pose (4, 4)
194 |         
195 |         :return indices_3d (array with point indices that correspond to a pixel),
196 |         :return indices_2d (array with pixel indices that correspond to a point)
197 | 
198 |         note:
199 |             the first digit of indices represents the number of relevant points
200 |             the rest digits are for the projection mapping
201 |     """
202 |     num_points = points.shape[0]
203 |     num_frames = depth.shape[0]
204 |     indices_3ds = torch.zeros(num_frames, num_points + 1).long().cuda()
205 |     indices_2ds = torch.zeros(num_frames, num_points + 1).long().cuda()
206 | 
207 |     for i in range(num_frames):
208 |         indices = PROJECTOR.compute_projection(to_tensor(points), to_tensor(depth[i]), to_tensor(camera_to_world[i]))
209 |         if indices:
210 |             indices_3ds[i] = indices[0].long()
211 |             indices_2ds[i] = indices[1].long()
212 |         
213 |     return indices_3ds, indices_2ds
214 | 
215 | def create_enet():
216 |     enet_fixed, enet_trainable, enet_classifier = create_enet_for_3d(41, ENET_PATH, 21)
217 |     enet = nn.Sequential(
218 |         enet_fixed,
219 |         enet_trainable,
220 |         enet_classifier
221 |     ).cuda()
222 |     enet.eval()
223 |     for param in enet.parameters():
224 |         param.requires_grad = False
225 | 
226 |     return enet
227 | 
228 | 
229 | if __name__ == "__main__":
230 |     parser = argparse.ArgumentParser()
231 |     parser.add_argument("--scene_id", type=str, default="-1")
232 |     parser.add_argument("--gt", action="store_true")
233 |     parser.add_argument("--maxpool", action="store_true", help="use max pooling to aggregate features \
234 |          (use majority voting in label projection mode)")
235 |     args = parser.parse_args()
236 | 
237 |     scene_list = get_scene_list(args)
238 |     scene_data = get_scene_data(scene_list)
239 |     enet = create_enet()
240 |     for scene_id in tqdm(scene_list):
241 |         scene = scene_data[scene_id]
242 |         # load frames
243 |         frame_list = list(map(lambda x: x.split(".")[0], sorted(os.listdir(SCANNET_FRAME_ROOT.format(scene_id, "color")))))
244 |         scene_images = np.zeros((len(frame_list), 3, 256, 328))
245 |         scene_depths = np.zeros((len(frame_list), 32, 41))
246 |         scene_poses = np.zeros((len(frame_list), 4, 4))
247 |         for i, frame_id in enumerate(frame_list):
248 |             scene_images[i] = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256])
249 |             scene_depths[i] = load_depth(SCANNET_FRAME_PATH.format(scene_id, "depth", "{}.png".format(frame_id)), [41, 32])
250 |             scene_poses[i] = load_pose(SCANNET_FRAME_PATH.format(scene_id, "pose", "{}.txt".format(frame_id)))
251 | 
252 |         # compute projections for each chunk
253 |         projection_3d, projection_2d = compute_projection(scene, scene_depths, scene_poses)
254 |         
255 |         # compute valid projections
256 |         projections = []
257 |         for i in range(projection_3d.shape[0]):
258 |             num_valid = projection_3d[i, 0]
259 |             if num_valid == 0:
260 |                 continue
261 | 
262 |             projections.append((frame_list[i], projection_3d[i], projection_2d[i]))
263 | 
264 |         # # project
265 |         # labels = None
266 |         # for i, projection in enumerate(projections):
267 |         #     frame_id = projection[0]
268 |         #     projection_3d = projection[1]
269 |         #     projection_2d = projection[2]
270 |         #     if args.gt:
271 |         #         feat = to_tensor(load_image(ENET_GT_PATH.format(scene_id, "labelv2", "{}.png".format(frame_id)), [41, 32])).unsqueeze(0)
272 |         #     else:
273 |         #         image = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256])
274 |         #         feat = enet(to_tensor(image).unsqueeze(0)).max(1)[1].unsqueeze(1)
275 | 
276 |         #     proj_label = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0)
277 |         #     if i == 0:
278 |         #         labels = proj_label
279 |         #     else:
280 |         #         labels[labels == 0] = proj_label[labels == 0]
281 | 
282 |         # project
283 |         labels = to_tensor(scene).new(scene.shape[0], len(projections)).fill_(0).long()
284 |         for i, projection in enumerate(projections):
285 |             frame_id = projection[0]
286 |             projection_3d = projection[1]
287 |             projection_2d = projection[2]
288 |             
289 |             if args.gt:
290 |                 feat = to_tensor(load_image(ENET_GT_PATH.format(scene_id, "labelv2", "{}.png".format(frame_id)), [41, 32])).unsqueeze(0)
291 |             else:
292 |                 image = load_image(SCANNET_FRAME_PATH.format(scene_id, "color", "{}.jpg".format(frame_id)), [328, 256])
293 |                 feat = enet(to_tensor(image).unsqueeze(0)).max(1)[1].unsqueeze(1)
294 | 
295 |             proj_label = PROJECTOR.project(feat, projection_3d, projection_2d, scene.shape[0]).transpose(1, 0) # num_points, 1
296 | 
297 |             if args.maxpool:
298 |                 # only apply max pooling on the overlapping points
299 |                 # find out the points that are covered in projection
300 |                 feat_mask = ((proj_label == 0).sum(1) != 1).bool()
301 |                 # find out the points that are not filled with labels
302 |                 point_mask = ((labels == 0).sum(1) == len(projections)).bool()
303 | 
304 |                 # for the points that are not filled with features
305 |                 # and are covered in projection, 
306 |                 # simply fill those points with labels
307 |                 mask = point_mask * feat_mask
308 |                 labels[mask, i] = proj_label[mask, 0]
309 | 
310 |                 # for the points that have already been filled with features
311 |                 # and are covered in projection, 
312 |                 # simply fill those points with labels
313 |                 mask = ~point_mask * feat_mask
314 |                 labels[mask, i] = proj_label[mask, 0]
315 |             else:
316 |                 if i == 0:
317 |                     labels = proj_label
318 |                 else:
319 |                     labels[labels == 0] = proj_label[labels == 0]
320 | 
321 |         # aggregate
322 |         if args.maxpool:
323 |             new_labels = []
324 |             for label_id in range(labels.shape[0]):
325 |                 point_label = labels[label_id].cpu().numpy().tolist()
326 |                 count = dict(Counter(point_label))
327 |                 count = sorted(count.items(), key=lambda x: x[1], reverse=True)
328 |                 count = [c for c in count if c[0] != 0]
329 |                 if count:
330 |                     new_labels.append(count[0][0])
331 |                 else:
332 |                     new_labels.append(0)
333 | 
334 |             labels = torch.FloatTensor(np.array(new_labels)[:, np.newaxis])
335 | 
336 |         # output
337 |         visualize(scene, labels.long().squeeze(1).cpu().numpy())
338 | 
339 |     


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import h5py
  5 | import argparse
  6 | import importlib
  7 | import torch
  8 | import torch.optim as optim
  9 | import torch.nn as nn
 10 | import numpy as np
 11 | 
 12 | from torch.utils.data import DataLoader
 13 | from datetime import datetime
 14 | from copy import deepcopy
 15 | 
 16 | sys.path.append(os.path.join(os.getcwd())) # HACK add the root folder
 17 | from data.scannet.model_util_scannet import ScannetDatasetConfig
 18 | from lib.dataset import ScannetReferenceDataset
 19 | from lib.solver import Solver
 20 | from lib.config import CONF
 21 | from models.refnet import RefNet
 22 | 
 23 | SCANREFER_TRAIN = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_train.json")))
 24 | SCANREFER_VAL = json.load(open(os.path.join(CONF.PATH.DATA, "ScanRefer_filtered_val.json")))
 25 | 
 26 | # constants
 27 | DC = ScannetDatasetConfig()
 28 | 
 29 | def get_dataloader(args, scanrefer, all_scene_list, split, config, augment):
 30 |     dataset = ScannetReferenceDataset(
 31 |         scanrefer=scanrefer[split], 
 32 |         scanrefer_all_scene=all_scene_list, 
 33 |         split=split, 
 34 |         num_points=args.num_points, 
 35 |         use_height=(not args.no_height),
 36 |         use_color=args.use_color, 
 37 |         use_normal=args.use_normal, 
 38 |         use_multiview=args.use_multiview
 39 |     )
 40 |     # dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
 41 |     dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
 42 | 
 43 |     return dataset, dataloader
 44 | 
 45 | def get_model(args):
 46 |     # initiate model
 47 |     input_channels = int(args.use_multiview) * 128 + int(args.use_normal) * 3 + int(args.use_color) * 3 + int(not args.no_height)
 48 |     model = RefNet(
 49 |         num_class=DC.num_class,
 50 |         num_heading_bin=DC.num_heading_bin,
 51 |         num_size_cluster=DC.num_size_cluster,
 52 |         mean_size_arr=DC.mean_size_arr,
 53 |         input_feature_dim=input_channels,
 54 |         num_proposal=args.num_proposals,
 55 |         use_lang_classifier=(not args.no_lang_cls),
 56 |         use_bidir=args.use_bidir,
 57 |         no_reference=args.no_reference
 58 |     )
 59 | 
 60 |     # trainable model
 61 |     if args.use_pretrained:
 62 |         # load model
 63 |         print("loading pretrained VoteNet...")
 64 |         pretrained_model = RefNet(
 65 |             num_class=DC.num_class,
 66 |             num_heading_bin=DC.num_heading_bin,
 67 |             num_size_cluster=DC.num_size_cluster,
 68 |             mean_size_arr=DC.mean_size_arr,
 69 |             num_proposal=args.num_proposals,
 70 |             input_feature_dim=input_channels,
 71 |             use_bidir=args.use_bidir,
 72 |             no_reference=True
 73 |         )
 74 | 
 75 |         pretrained_path = os.path.join(CONF.PATH.OUTPUT, args.use_pretrained, "model_last.pth")
 76 |         pretrained_model.load_state_dict(torch.load(pretrained_path), strict=False)
 77 | 
 78 |         # mount
 79 |         model.backbone_net = pretrained_model.backbone_net
 80 |         model.vgen = pretrained_model.vgen
 81 |         model.proposal = pretrained_model.proposal
 82 | 
 83 |         if args.no_detection:
 84 |             # freeze pointnet++ backbone
 85 |             for param in model.backbone_net.parameters():
 86 |                 param.requires_grad = False
 87 | 
 88 |             # freeze voting
 89 |             for param in model.vgen.parameters():
 90 |                 param.requires_grad = False
 91 |             
 92 |             # freeze detector
 93 |             for param in model.proposal.parameters():
 94 |                 param.requires_grad = False
 95 |     
 96 |     # to CUDA
 97 |     model = model.cuda()
 98 | 
 99 |     return model
100 | 
101 | def get_num_params(model):
102 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
103 |     num_params = int(sum([np.prod(p.size()) for p in model_parameters]))
104 | 
105 |     return num_params
106 | 
107 | def get_solver(args, dataloader):
108 |     model = get_model(args)
109 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
110 | 
111 |     if args.use_checkpoint:
112 |         print("loading checkpoint {}...".format(args.use_checkpoint))
113 |         stamp = args.use_checkpoint
114 |         root = os.path.join(CONF.PATH.OUTPUT, stamp)
115 |         checkpoint = torch.load(os.path.join(CONF.PATH.OUTPUT, args.use_checkpoint, "checkpoint.tar"))
116 |         model.load_state_dict(checkpoint["model_state_dict"])
117 |         optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
118 |     else:
119 |         stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
120 |         if args.tag: stamp += "_"+args.tag.upper()
121 |         root = os.path.join(CONF.PATH.OUTPUT, stamp)
122 |         os.makedirs(root, exist_ok=True)
123 | 
124 |     # scheduler parameters for training solely the detection pipeline
125 |     LR_DECAY_STEP = [80, 120, 160] if args.no_reference else None
126 |     LR_DECAY_RATE = 0.1 if args.no_reference else None
127 |     BN_DECAY_STEP = 20 if args.no_reference else None
128 |     BN_DECAY_RATE = 0.5 if args.no_reference else None
129 | 
130 |     solver = Solver(
131 |         model=model, 
132 |         config=DC, 
133 |         dataloader=dataloader, 
134 |         optimizer=optimizer, 
135 |         stamp=stamp, 
136 |         val_step=args.val_step,
137 |         detection=not args.no_detection,
138 |         reference=not args.no_reference, 
139 |         use_lang_classifier=not args.no_lang_cls,
140 |         lr_decay_step=LR_DECAY_STEP,
141 |         lr_decay_rate=LR_DECAY_RATE,
142 |         bn_decay_step=BN_DECAY_STEP,
143 |         bn_decay_rate=BN_DECAY_RATE
144 |     )
145 |     num_params = get_num_params(model)
146 | 
147 |     return solver, num_params, root
148 | 
149 | def save_info(args, root, num_params, train_dataset, val_dataset):
150 |     info = {}
151 |     for key, value in vars(args).items():
152 |         info[key] = value
153 |     
154 |     info["num_train"] = len(train_dataset)
155 |     info["num_val"] = len(val_dataset)
156 |     info["num_train_scenes"] = len(train_dataset.scene_list)
157 |     info["num_val_scenes"] = len(val_dataset.scene_list)
158 |     info["num_params"] = num_params
159 | 
160 |     with open(os.path.join(root, "info.json"), "w") as f:
161 |         json.dump(info, f, indent=4)
162 | 
163 | def get_scannet_scene_list(split):
164 |     scene_list = sorted([line.rstrip() for line in open(os.path.join(CONF.PATH.SCANNET_META, "scannetv2_{}.txt".format(split)))])
165 | 
166 |     return scene_list
167 | 
168 | def get_scanrefer(scanrefer_train, scanrefer_val, num_scenes):
169 |     if args.no_reference:
170 |         train_scene_list = get_scannet_scene_list("train")
171 |         new_scanrefer_train = []
172 |         for scene_id in train_scene_list:
173 |             data = deepcopy(SCANREFER_TRAIN[0])
174 |             data["scene_id"] = scene_id
175 |             new_scanrefer_train.append(data)
176 | 
177 |         val_scene_list = get_scannet_scene_list("val")
178 |         new_scanrefer_val = []
179 |         for scene_id in val_scene_list:
180 |             data = deepcopy(SCANREFER_VAL[0])
181 |             data["scene_id"] = scene_id
182 |             new_scanrefer_val.append(data)
183 |     else:
184 |         # get initial scene list
185 |         train_scene_list = sorted(list(set([data["scene_id"] for data in scanrefer_train])))
186 |         val_scene_list = sorted(list(set([data["scene_id"] for data in scanrefer_val])))
187 |         if num_scenes == -1: 
188 |             num_scenes = len(train_scene_list)
189 |         else:
190 |             assert len(train_scene_list) >= num_scenes
191 |         
192 |         # slice train_scene_list
193 |         train_scene_list = train_scene_list[:num_scenes]
194 | 
195 |         # filter data in chosen scenes
196 |         new_scanrefer_train = []
197 |         for data in scanrefer_train:
198 |             if data["scene_id"] in train_scene_list:
199 |                 new_scanrefer_train.append(data)
200 | 
201 |         new_scanrefer_val = scanrefer_val
202 | 
203 |     # all scanrefer scene
204 |     all_scene_list = train_scene_list + val_scene_list
205 | 
206 |     print("train on {} samples and val on {} samples".format(len(new_scanrefer_train), len(new_scanrefer_val)))
207 | 
208 |     return new_scanrefer_train, new_scanrefer_val, all_scene_list
209 | 
210 | def train(args):
211 |     # init training dataset
212 |     print("preparing data...")
213 |     scanrefer_train, scanrefer_val, all_scene_list = get_scanrefer(SCANREFER_TRAIN, SCANREFER_VAL, args.num_scenes)
214 |     scanrefer = {
215 |         "train": scanrefer_train,
216 |         "val": scanrefer_val
217 |     }
218 | 
219 |     # dataloader
220 |     train_dataset, train_dataloader = get_dataloader(args, scanrefer, all_scene_list, "train", DC, True)
221 |     val_dataset, val_dataloader = get_dataloader(args, scanrefer, all_scene_list, "val", DC, False)
222 |     dataloader = {
223 |         "train": train_dataloader,
224 |         "val": val_dataloader
225 |     }
226 | 
227 |     print("initializing...")
228 |     solver, num_params, root = get_solver(args, dataloader)
229 | 
230 |     print("Start training...\n")
231 |     save_info(args, root, num_params, train_dataset, val_dataset)
232 |     solver(args.epoch, args.verbose)
233 | 
234 | if __name__ == "__main__":
235 |     parser = argparse.ArgumentParser()
236 |     parser.add_argument("--tag", type=str, help="tag for the training, e.g. cuda_wl", default="")
237 |     parser.add_argument("--gpu", type=str, help="gpu", default="0")
238 |     parser.add_argument("--batch_size", type=int, help="batch size", default=14)
239 |     parser.add_argument("--epoch", type=int, help="number of epochs", default=50)
240 |     parser.add_argument("--verbose", type=int, help="iterations of showing verbose", default=10)
241 |     parser.add_argument("--val_step", type=int, help="iterations of validating", default=5000)
242 |     parser.add_argument("--lr", type=float, help="learning rate", default=1e-3)
243 |     parser.add_argument("--wd", type=float, help="weight decay", default=1e-5)
244 |     parser.add_argument("--num_points", type=int, default=40000, help="Point Number [default: 40000]")
245 |     parser.add_argument("--num_proposals", type=int, default=256, help="Proposal number [default: 256]")
246 |     parser.add_argument("--num_scenes", type=int, default=-1, help="Number of scenes [default: -1]")
247 |     parser.add_argument("--seed", type=int, default=42, help="random seed")
248 |     parser.add_argument("--no_height", action="store_true", help="Do NOT use height signal in input.")
249 |     parser.add_argument("--no_augment", action="store_true", help="Do NOT use height signal in input.")
250 |     parser.add_argument("--no_lang_cls", action="store_true", help="Do NOT use language classifier.")
251 |     parser.add_argument("--no_detection", action="store_true", help="Do NOT train the detection module.")
252 |     parser.add_argument("--no_reference", action="store_true", help="Do NOT train the localization module.")
253 |     parser.add_argument("--use_color", action="store_true", help="Use RGB color in input.")
254 |     parser.add_argument("--use_normal", action="store_true", help="Use RGB color in input.")
255 |     parser.add_argument("--use_multiview", action="store_true", help="Use multiview images.")
256 |     parser.add_argument("--use_bidir", action="store_true", help="Use bi-directional GRU.")
257 |     parser.add_argument("--use_pretrained", type=str, help="Specify the folder name containing the pretrained detection module.")
258 |     parser.add_argument("--use_checkpoint", type=str, help="Specify the checkpoint root", default="")
259 |     args = parser.parse_args()
260 | 
261 |     # setting
262 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
263 |     os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
264 | 
265 |     # reproducibility
266 |     torch.manual_seed(args.seed)
267 |     torch.backends.cudnn.deterministic = True
268 |     torch.backends.cudnn.benchmark = False
269 |     np.random.seed(args.seed)
270 | 
271 |     train(args)
272 |     
273 | 


--------------------------------------------------------------------------------
/utils/box_util.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Helper functions for calculating 2D and 3D bounding box IoU.
  3 | From: https://github.com/facebookresearch/votenet/blob/master/utils/box_util.py
  4 | 
  5 | Collected and written by Charles R. Qi
  6 | Last modified: Jul 2019
  7 | """
  8 | 
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | from scipy.spatial import ConvexHull
 13 | 
 14 | def polygon_clip(subjectPolygon, clipPolygon):
 15 |    """ Clip a polygon with another polygon.
 16 | 
 17 |    Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
 18 | 
 19 |    Args:
 20 |      subjectPolygon: a list of (x,y) 2d points, any polygon.
 21 |      clipPolygon: a list of (x,y) 2d points, has to be *convex*
 22 |    Note:
 23 |      **points have to be counter-clockwise ordered**
 24 | 
 25 |    Return:
 26 |      a list of (x,y) vertex point for the intersection polygon.
 27 |    """
 28 |    def inside(p):
 29 |       return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
 30 |  
 31 |    def computeIntersection():
 32 |       dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
 33 |       dp = [ s[0] - e[0], s[1] - e[1] ]
 34 |       n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
 35 |       n2 = s[0] * e[1] - s[1] * e[0] 
 36 |       n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
 37 |       return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
 38 |  
 39 |    outputList = subjectPolygon
 40 |    cp1 = clipPolygon[-1]
 41 |  
 42 |    for clipVertex in clipPolygon:
 43 |       cp2 = clipVertex
 44 |       inputList = outputList
 45 |       outputList = []
 46 |       s = inputList[-1]
 47 |  
 48 |       for subjectVertex in inputList:
 49 |          e = subjectVertex
 50 |          if inside(e):
 51 |             if not inside(s):
 52 |                outputList.append(computeIntersection())
 53 |             outputList.append(e)
 54 |          elif inside(s):
 55 |             outputList.append(computeIntersection())
 56 |          s = e
 57 |       cp1 = cp2
 58 |       if len(outputList) == 0:
 59 |           return None
 60 |    return(outputList)
 61 | 
 62 | def poly_area(x,y):
 63 |     """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
 64 |     return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
 65 | 
 66 | def poly_area_batch(x,y):
 67 |     """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """    
 68 |     return 0.5 * np.abs(np.matmul(np.expand_dims(x, axis=1), np.roll(np.expand_dims(y, axis=2), 1, axis=1)) \
 69 |         - np.matmul(np.expand_dims(y, axis=1), np.roll(np.expand_dims(x, axis=2), 1, axis=1))).squeeze(axis=(1,2))
 70 | 
 71 | def convex_hull_intersection(p1, p2):
 72 |     """ Compute area of two convex hull's intersection area.
 73 |         p1,p2 are a list of (x,y) tuples of hull vertices.
 74 |         return a list of (x,y) for the intersection and its volume
 75 |     """
 76 |     inter_p = polygon_clip(p1,p2)
 77 |     if inter_p is not None:
 78 |         hull_inter = ConvexHull(inter_p)
 79 |         return inter_p, hull_inter.volume
 80 |     else:
 81 |         return None, 0.0  
 82 | 
 83 | def box3d_vol(corners):
 84 |     ''' corners: (8,3) no assumption on axis direction '''
 85 |     a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
 86 |     b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
 87 |     c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
 88 |     return a*b*c
 89 | 
 90 | def is_clockwise(p):
 91 |     x = p[:,0]
 92 |     y = p[:,1]
 93 |     return np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)) > 0
 94 | 
 95 | def box3d_iou(corners1, corners2):
 96 |     ''' Compute 3D bounding box IoU.
 97 | 
 98 |     Input:
 99 |         corners1: numpy array (8,3), assume up direction is Z
100 |         corners2: numpy array (8,3), assume up direction is Z
101 |     Output:
102 |         iou: 3D bounding box IoU
103 | 
104 |     '''
105 |     # # corner points are in counter clockwise order
106 |     # rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)]
107 |     # rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)] 
108 |     # area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
109 |     # area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
110 |     # inter, inter_area = convex_hull_intersection(rect1, rect2)
111 |     # iou_2d = inter_area/(area1+area2-inter_area)
112 |     # ymax = min(corners1[0,1], corners2[0,1])
113 |     # ymin = max(corners1[4,1], corners2[4,1])
114 |     # inter_vol = inter_area * max(0.0, ymax-ymin)
115 |     # vol1 = box3d_vol(corners1)
116 |     # vol2 = box3d_vol(corners2)
117 |     # iou = inter_vol / (vol1 + vol2 - inter_vol)
118 |     # return iou, iou_2d
119 | 
120 |     x_min_1, x_max_1, y_min_1, y_max_1, z_min_1, z_max_1 = get_box3d_min_max(corners1)
121 |     x_min_2, x_max_2, y_min_2, y_max_2, z_min_2, z_max_2 = get_box3d_min_max(corners2)
122 |     xA = np.maximum(x_min_1, x_min_2)
123 |     yA = np.maximum(y_min_1, y_min_2)
124 |     zA = np.maximum(z_min_1, z_min_2)
125 |     xB = np.minimum(x_max_1, x_max_2)
126 |     yB = np.minimum(y_max_1, y_max_2)
127 |     zB = np.minimum(z_max_1, z_max_2)
128 |     inter_vol = np.maximum((xB - xA), 0) * np.maximum((yB - yA), 0) * np.maximum((zB - zA), 0)
129 |     box_vol_1 = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) * (z_max_1 - z_min_1)
130 |     box_vol_2 = (x_max_2 - x_min_2) * (y_max_2 - y_min_2) * (z_max_2 - z_min_2)
131 |     iou = inter_vol / (box_vol_1 + box_vol_2 - inter_vol + 1e-8)
132 | 
133 |     return iou
134 | 
135 | def get_box3d_min_max(corner):
136 |     ''' Compute min and max coordinates for 3D bounding box
137 |         Note: only for axis-aligned bounding boxes
138 | 
139 |     Input:
140 |         corners: numpy array (8,3), assume up direction is Z (batch of N samples)
141 |     Output:
142 |         box_min_max: an array for min and max coordinates of 3D bounding box IoU
143 | 
144 |     '''
145 | 
146 |     min_coord = corner.min(axis=0)
147 |     max_coord = corner.max(axis=0)
148 |     x_min, x_max = min_coord[0], max_coord[0]
149 |     y_min, y_max = min_coord[1], max_coord[1]
150 |     z_min, z_max = min_coord[2], max_coord[2]
151 |     
152 |     return x_min, x_max, y_min, y_max, z_min, z_max
153 | 
154 | def box3d_iou_batch(corners1, corners2):
155 |     ''' Compute 3D bounding box IoU.
156 |         Note: only for axis-aligned bounding boxes
157 | 
158 |     Input:
159 |         corners1: numpy array (N,8,3), assume up direction is Z (batch of N samples)
160 |         corners2: numpy array (N,8,3), assume up direction is Z (batch of N samples)
161 |     Output:
162 |         iou: an array of 3D bounding box IoU
163 | 
164 |     '''
165 |     
166 |     x_min_1, x_max_1, y_min_1, y_max_1, z_min_1, z_max_1 = get_box3d_min_max_batch(corners1)
167 |     x_min_2, x_max_2, y_min_2, y_max_2, z_min_2, z_max_2 = get_box3d_min_max_batch(corners2)
168 |     xA = np.maximum(x_min_1, x_min_2)
169 |     yA = np.maximum(y_min_1, y_min_2)
170 |     zA = np.maximum(z_min_1, z_min_2)
171 |     xB = np.minimum(x_max_1, x_max_2)
172 |     yB = np.minimum(y_max_1, y_max_2)
173 |     zB = np.minimum(z_max_1, z_max_2)
174 |     inter_vol = np.maximum((xB - xA), 0) * np.maximum((yB - yA), 0) * np.maximum((zB - zA), 0)
175 |     box_vol_1 = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) * (z_max_1 - z_min_1)
176 |     box_vol_2 = (x_max_2 - x_min_2) * (y_max_2 - y_min_2) * (z_max_2 - z_min_2)
177 |     iou = inter_vol / (box_vol_1 + box_vol_2 - inter_vol + 1e-8)
178 | 
179 |     return iou
180 | 
181 | def get_box3d_min_max_batch(corner):
182 |     ''' Compute min and max coordinates for 3D bounding box
183 |         Note: only for axis-aligned bounding boxes
184 | 
185 |     Input:
186 |         corners: numpy array (N,8,3), assume up direction is Z (batch of N samples)
187 |     Output:
188 |         box_min_max: an array for min and max coordinates of 3D bounding box IoU
189 | 
190 |     '''
191 | 
192 |     min_coord = corner.min(axis=1)
193 |     max_coord = corner.max(axis=1)
194 |     x_min, x_max = min_coord[:, 0], max_coord[:, 0]
195 |     y_min, y_max = min_coord[:, 1], max_coord[:, 1]
196 |     z_min, z_max = min_coord[:, 2], max_coord[:, 2]
197 |     
198 |     return x_min, x_max, y_min, y_max, z_min, z_max
199 | 
200 | def get_iou(bb1, bb2):
201 |     """
202 |     Calculate the Intersection over Union (IoU) of two 2D bounding boxes.
203 | 
204 |     Parameters
205 |     ----------
206 |     bb1 : dict
207 |         Keys: {'x1', 'x2', 'y1', 'y2'}
208 |         The (x1, y1) position is at the top left corner,
209 |         the (x2, y2) position is at the bottom right corner
210 |     bb2 : dict
211 |         Keys: {'x1', 'x2', 'y1', 'y2'}
212 |         The (x, y) position is at the top left corner,
213 |         the (x2, y2) position is at the bottom right corner
214 | 
215 |     Returns
216 |     -------
217 |     float
218 |         in [0, 1]
219 |     """
220 |     assert bb1['x1'] < bb1['x2']
221 |     assert bb1['y1'] < bb1['y2']
222 |     assert bb2['x1'] < bb2['x2']
223 |     assert bb2['y1'] < bb2['y2']
224 | 
225 |     # determine the coordinates of the intersection rectangle
226 |     x_left = max(bb1['x1'], bb2['x1'])
227 |     y_top = max(bb1['y1'], bb2['y1'])
228 |     x_right = min(bb1['x2'], bb2['x2'])
229 |     y_bottom = min(bb1['y2'], bb2['y2'])
230 | 
231 |     if x_right < x_left or y_bottom < y_top:
232 |         return 0.0
233 | 
234 |     # The intersection of two axis-aligned bounding boxes is always an
235 |     # axis-aligned bounding box
236 |     intersection_area = (x_right - x_left) * (y_bottom - y_top)
237 | 
238 |     # compute the area of both AABBs
239 |     bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
240 |     bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])
241 | 
242 |     # compute the intersection over union by taking the intersection
243 |     # area and dividing it by the sum of prediction + ground-truth
244 |     # areas - the interesection area
245 |     iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
246 |     assert iou >= 0.0
247 |     assert iou <= 1.0
248 |     return iou
249 | 
250 | def box2d_iou(box1, box2):
251 |     ''' Compute 2D bounding box IoU.
252 | 
253 |     Input:
254 |         box1: tuple of (xmin,ymin,xmax,ymax)
255 |         box2: tuple of (xmin,ymin,xmax,ymax)
256 |     Output:
257 |         iou: 2D IoU scalar
258 |     '''
259 |     return get_iou({'x1':box1[0], 'y1':box1[1], 'x2':box1[2], 'y2':box1[3]}, \
260 |         {'x1':box2[0], 'y1':box2[1], 'x2':box2[2], 'y2':box2[3]})
261 | 
262 | # -----------------------------------------------------------
263 | # Convert from box parameters to 
264 | # -----------------------------------------------------------
265 | def roty(t):
266 |     """Rotation about the y-axis."""
267 |     c = np.cos(t)
268 |     s = np.sin(t)
269 |     return np.array([[c,  0,  s],
270 |                     [0,  1,  0],
271 |                     [-s, 0,  c]])
272 | 
273 | def roty_batch(t):
274 |     """Rotation about the y-axis.
275 |     t: (x1,x2,...xn)
276 |     return: (x1,x2,...,xn,3,3)
277 |     """
278 |     input_shape = t.shape
279 |     output = np.zeros(tuple(list(input_shape)+[3,3]))
280 |     c = np.cos(t)
281 |     s = np.sin(t)
282 |     output[...,0,0] = c
283 |     output[...,0,2] = s
284 |     output[...,1,1] = 1
285 |     output[...,2,0] = -s
286 |     output[...,2,2] = c
287 |     return output
288 | 
289 | 
290 | def get_3d_box(box_size, heading_angle, center):
291 |     ''' box_size is array(l,w,h), heading_angle is radius clockwise from pos x axis, center is xyz of box center
292 |         output (8,3) array for 3D box cornders
293 |         Similar to utils/compute_orientation_3d
294 |     '''
295 |     R = roty(heading_angle)
296 |     l,w,h = box_size
297 |     # x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2]
298 |     # y_corners = [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2]
299 |     # z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]
300 |     x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2]
301 |     y_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]
302 |     z_corners = [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2]
303 |     corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
304 |     corners_3d[0,:] = corners_3d[0,:] + center[0]
305 |     corners_3d[1,:] = corners_3d[1,:] + center[1]
306 |     corners_3d[2,:] = corners_3d[2,:] + center[2]
307 |     corners_3d = np.transpose(corners_3d)
308 |     return corners_3d
309 | 
310 | def get_3d_box_batch(box_size, heading_angle, center):
311 |     ''' box_size: [x1,x2,...,xn,3]
312 |         heading_angle: [x1,x2,...,xn]
313 |         center: [x1,x2,...,xn,3]
314 |     Return:
315 |         [x1,x3,...,xn,8,3]
316 |     '''
317 |     input_shape = heading_angle.shape
318 |     R = roty_batch(heading_angle)
319 |     l = np.expand_dims(box_size[...,0], -1) # [x1,...,xn,1]
320 |     w = np.expand_dims(box_size[...,1], -1)
321 |     h = np.expand_dims(box_size[...,2], -1)
322 |     corners_3d = np.zeros(tuple(list(input_shape)+[8,3]))
323 |     # corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1)
324 |     # corners_3d[...,:,1] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1)
325 |     # corners_3d[...,:,2] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1)
326 |     corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1)
327 |     corners_3d[...,:,1] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1)
328 |     corners_3d[...,:,2] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1)
329 |     tlist = [i for i in range(len(input_shape))]
330 |     tlist += [len(input_shape)+1, len(input_shape)]
331 |     corners_3d = np.matmul(corners_3d, np.transpose(R, tuple(tlist)))
332 |     corners_3d += np.expand_dims(center, -2)
333 |     return corners_3d
334 | 


--------------------------------------------------------------------------------
/utils/eta.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | File Created: Monday, 25th November 2019 1:35:30 pm
 3 | Author: Dave Zhenyu Chen (zhenyu.chen@tum.de)
 4 | '''
 5 | 
 6 | def get_eta(start, end, extra, num_left):
 7 |     exe_s = end - start
 8 |     eta_s = (exe_s + extra) * num_left
 9 |     eta = {'h': 0, 'm': 0, 's': 0}
10 |     if eta_s < 60:
11 |         eta['s'] = int(eta_s)
12 |     elif eta_s >= 60 and eta_s < 3600:
13 |         eta['m'] = int(eta_s / 60)
14 |         eta['s'] = int(eta_s % 60)
15 |     else:
16 |         eta['h'] = int(eta_s / (60 * 60))
17 |         eta['m'] = int(eta_s % (60 * 60) / 60)
18 |         eta['s'] = int(eta_s % (60 * 60) % 60)
19 | 
20 |     return eta
21 | 
22 | def decode_eta(eta_sec):
23 |     eta = {'h': 0, 'm': 0, 's': 0}
24 |     if eta_sec < 60:
25 |         eta['s'] = int(eta_sec)
26 |     elif eta_sec >= 60 and eta_sec < 3600:
27 |         eta['m'] = int(eta_sec / 60)
28 |         eta['s'] = int(eta_sec % 60)
29 |     else:
30 |         eta['h'] = int(eta_sec / (60 * 60))
31 |         eta['m'] = int(eta_sec % (60 * 60) / 60)
32 |         eta['s'] = int(eta_sec % (60 * 60) % 60)
33 | 
34 |     return eta


--------------------------------------------------------------------------------
/utils/eval_det.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 |     Generic Code for Object Detection Evaluation
  3 |     From: https://github.com/facebookresearch/votenet/blob/master/utils/eval_det.py
  4 | 
  5 |     Input:
  6 |     For each class:
  7 |         For each image:
  8 |             Predictions: box, score
  9 |             Groundtruths: box
 10 |     
 11 |     Output:
 12 |     For each class:
 13 |         precision-recal and average precision
 14 |     
 15 |     Author: Charles R. Qi
 16 |     
 17 |     Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py
 18 | """
 19 | import numpy as np
 20 | 
 21 | def voc_ap(rec, prec, use_07_metric=False):
 22 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 23 |     Compute VOC AP given precision and recall.
 24 |     If use_07_metric is true, uses the
 25 |     VOC 07 11 point method (default:False).
 26 |     """
 27 |     if use_07_metric:
 28 |         # 11 point metric
 29 |         ap = 0.
 30 |         for t in np.arange(0., 1.1, 0.1):
 31 |             if np.sum(rec >= t) == 0:
 32 |                 p = 0
 33 |             else:
 34 |                 p = np.max(prec[rec >= t])
 35 |             ap = ap + p / 11.
 36 |     else:
 37 |         # correct AP calculation
 38 |         # first append sentinel values at the end
 39 |         mrec = np.concatenate(([0.], rec, [1.]))
 40 |         mpre = np.concatenate(([0.], prec, [0.]))
 41 | 
 42 |         # compute the precision envelope
 43 |         for i in range(mpre.size - 1, 0, -1):
 44 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 45 | 
 46 |         # to calculate area under PR curve, look for points
 47 |         # where X axis (recall) changes value
 48 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 49 | 
 50 |         # and sum (\Delta recall) * prec
 51 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 52 |     return ap
 53 | 
 54 | import os
 55 | import sys
 56 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 57 | from utils.metric_util import calc_iou # axis-aligned 3D box IoU
 58 | def get_iou(bb1, bb2):
 59 |     """ Compute IoU of two bounding boxes.
 60 |         ** Define your bod IoU function HERE **
 61 |     """
 62 |     #pass
 63 |     iou3d = calc_iou(bb1, bb2)
 64 |     return iou3d
 65 | 
 66 | from utils.box_util import box3d_iou
 67 | def get_iou_obb(bb1,bb2):
 68 |     iou3d = box3d_iou(bb1,bb2)
 69 |     return iou3d
 70 | 
 71 | def get_iou_main(get_iou_func, args):
 72 |     return get_iou_func(*args)
 73 | 
 74 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
 75 |     """ Generic functions to compute precision/recall for object detection
 76 |         for a single class.
 77 |         Input:
 78 |             pred: map of {img_id: [(bbox, score)]} where bbox is numpy array
 79 |             gt: map of {img_id: [bbox]}
 80 |             ovthresh: scalar, iou threshold
 81 |             use_07_metric: bool, if True use VOC07 11 point method
 82 |         Output:
 83 |             rec: numpy array of length nd
 84 |             prec: numpy array of length nd
 85 |             ap: scalar, average precision
 86 |     """
 87 | 
 88 |     # construct gt objects
 89 |     class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}}
 90 |     npos = 0
 91 |     for img_id in gt.keys():
 92 |         bbox = np.array(gt[img_id])
 93 |         det = [False] * len(bbox)
 94 |         npos += len(bbox)
 95 |         class_recs[img_id] = {'bbox': bbox, 'det': det}
 96 |     # pad empty list to all other imgids
 97 |     for img_id in pred.keys():
 98 |         if img_id not in gt:
 99 |             class_recs[img_id] = {'bbox': np.array([]), 'det': []}
100 | 
101 |     # construct dets
102 |     image_ids = []
103 |     confidence = []
104 |     BB = []
105 |     for img_id in pred.keys():
106 |         for box,score in pred[img_id]:
107 |             image_ids.append(img_id)
108 |             confidence.append(score)
109 |             BB.append(box)
110 |     confidence = np.array(confidence)
111 |     BB = np.array(BB) # (nd,4 or 8,3 or 6)
112 | 
113 |     # sort by confidence
114 |     sorted_ind = np.argsort(-confidence)
115 |     sorted_scores = np.sort(-confidence)
116 |     BB = BB[sorted_ind, ...]
117 |     image_ids = [image_ids[x] for x in sorted_ind]
118 | 
119 |     # go down dets and mark TPs and FPs
120 |     nd = len(image_ids)
121 |     tp = np.zeros(nd)
122 |     fp = np.zeros(nd)
123 |     for d in range(nd):
124 |         #if d%100==0: print(d)
125 |         R = class_recs[image_ids[d]]
126 |         bb = BB[d,...].astype(float)
127 |         ovmax = -np.inf
128 |         BBGT = R['bbox'].astype(float)
129 | 
130 |         if BBGT.size > 0:
131 |             # compute overlaps
132 |             for j in range(BBGT.shape[0]):
133 |                 iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))
134 |                 if iou > ovmax:
135 |                     ovmax = iou
136 |                     jmax = j
137 | 
138 |         #print d, ovmax
139 |         if ovmax > ovthresh:
140 |             if not R['det'][jmax]:
141 |                 tp[d] = 1.
142 |                 R['det'][jmax] = 1
143 |             else:
144 |                 fp[d] = 1.
145 |         else:
146 |             fp[d] = 1.
147 | 
148 |     # compute precision recall
149 |     fp = np.cumsum(fp)
150 |     tp = np.cumsum(tp)
151 |     rec = tp / float(npos + 1e-8)
152 |     #print('NPOS: ', npos)
153 |     # avoid divide by zero in case the first detection matches a difficult
154 |     # ground truth
155 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
156 |     ap = voc_ap(rec, prec, use_07_metric)
157 | 
158 |     return rec, prec, ap
159 | 
160 | def eval_det_cls_wrapper(arguments):
161 |     pred, gt, ovthresh, use_07_metric, get_iou_func = arguments
162 |     rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func)
163 |     return (rec, prec, ap)
164 | 
165 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
166 |     """ Generic functions to compute precision/recall for object detection
167 |         for multiple classes.
168 |         Input:
169 |             pred_all: map of {img_id: [(classname, bbox, score)]}
170 |             gt_all: map of {img_id: [(classname, bbox)]}
171 |             ovthresh: scalar, iou threshold
172 |             use_07_metric: bool, if true use VOC07 11 point method
173 |         Output:
174 |             rec: {classname: rec}
175 |             prec: {classname: prec_all}
176 |             ap: {classname: scalar}
177 |     """
178 |     pred = {} # map {classname: pred}
179 |     gt = {} # map {classname: gt}
180 |     for img_id in pred_all.keys():
181 |         for classname, bbox, score in pred_all[img_id]:
182 |             if classname not in pred: pred[classname] = {}
183 |             if img_id not in pred[classname]:
184 |                 pred[classname][img_id] = []
185 |             if classname not in gt: gt[classname] = {}
186 |             if img_id not in gt[classname]:
187 |                 gt[classname][img_id] = []
188 |             pred[classname][img_id].append((bbox,score))
189 |     for img_id in gt_all.keys():
190 |         for classname, bbox in gt_all[img_id]:
191 |             if classname not in gt: gt[classname] = {}
192 |             if img_id not in gt[classname]:
193 |                 gt[classname][img_id] = []
194 |             gt[classname][img_id].append(bbox)
195 | 
196 |     rec = {}
197 |     prec = {}
198 |     ap = {}
199 |     for classname in gt.keys():
200 |         print('Computing AP for class: ', classname)
201 |         rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func)
202 |         print(classname, ap[classname])
203 |     
204 |     return rec, prec, ap 
205 | 
206 | from multiprocessing import Pool
207 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
208 |     """ Generic functions to compute precision/recall for object detection
209 |         for multiple classes.
210 |         Input:
211 |             pred_all: map of {img_id: [(classname, bbox, score)]}
212 |             gt_all: map of {img_id: [(classname, bbox)]}
213 |             ovthresh: scalar, iou threshold
214 |             use_07_metric: bool, if true use VOC07 11 point method
215 |         Output:
216 |             rec: {classname: rec}
217 |             prec: {classname: prec_all}
218 |             ap: {classname: scalar}
219 |     """
220 |     pred = {} # map {classname: pred}
221 |     gt = {} # map {classname: gt}
222 |     for img_id in pred_all.keys():
223 |         for classname, bbox, score in pred_all[img_id]:
224 |             if classname not in pred: pred[classname] = {}
225 |             if img_id not in pred[classname]:
226 |                 pred[classname][img_id] = []
227 |             if classname not in gt: gt[classname] = {}
228 |             if img_id not in gt[classname]:
229 |                 gt[classname][img_id] = []
230 |             pred[classname][img_id].append((bbox,score))
231 |     for img_id in gt_all.keys():
232 |         for classname, bbox in gt_all[img_id]:
233 |             if classname not in gt: gt[classname] = {}
234 |             if img_id not in gt[classname]:
235 |                 gt[classname][img_id] = []
236 |             gt[classname][img_id].append(bbox)
237 | 
238 |     rec = {}
239 |     prec = {}
240 |     ap = {}
241 |     p = Pool(processes=10)
242 |     ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred])
243 |     p.close()
244 |     for i, classname in enumerate(gt.keys()):
245 |         if classname in pred:
246 |             rec[classname], prec[classname], ap[classname] = ret_values[i]
247 |         else:
248 |             rec[classname] = 0
249 |             prec[classname] = 0
250 |             ap[classname] = 0
251 |         print(classname, ap[classname])
252 |     
253 |     return rec, prec, ap 
254 | 


--------------------------------------------------------------------------------
/utils/metric_util.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Utility functions for metric evaluation.
  3 | From: https://github.com/facebookresearch/votenet/blob/master/utils/metric_util.py
  4 | 
  5 | Author: Or Litany and Charles R. Qi
  6 | """
  7 | 
  8 | import os
  9 | import sys
 10 | import torch
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | sys.path.append(BASE_DIR)
 13 | 
 14 | import numpy as np
 15 | 
 16 | # Mesh IO
 17 | import trimesh
 18 | 
 19 |  
 20 | # ----------------------------------------
 21 | # Precision and Recall
 22 | # ----------------------------------------
 23 | 
 24 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None):
 25 |     '''
 26 |     Args:
 27 |         labels: (B, N, 6)
 28 |         pred: (B, M, 6)
 29 |         iou_thresh: scalar
 30 |         conf_thresh: scalar
 31 |         label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider.
 32 |         pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider.
 33 |     Returns:
 34 |         TP,FP,FN,Precision,Recall
 35 |     '''
 36 |     # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead
 37 |     # of True/False array as in numpy
 38 |     assert(not torch.is_tensor(label_mask))
 39 |     assert(not torch.is_tensor(pred_mask))
 40 |     TP, FP, FN = 0, 0, 0
 41 |     if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1]))
 42 |     if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1]))
 43 |     for batch_idx in range(labels.shape[0]):
 44 |         TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :],
 45 |                                                          pred[batch_idx, pred_mask[batch_idx,:]==1, :],
 46 |                                                          iou_thresh, conf_thresh)
 47 |         TP += TP_i
 48 |         FP += FP_i
 49 |         FN += FN_i
 50 |     
 51 |     return TP, FP, FN, precision_recall(TP, FP, FN)
 52 |       
 53 | 
 54 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh):
 55 |     """Compute P and R for predicted bounding boxes. Ignores classes!
 56 |     Args:
 57 |         labels: (N x bbox) ground-truth bounding boxes (6 dims) 
 58 |         pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification
 59 |     Returns:
 60 |         TP, FP, FN
 61 |     """
 62 |     
 63 |     
 64 |     # for each pred box with high conf (C), compute IoU with all gt boxes. 
 65 |     # TP = number of times IoU > th ; FP = C - TP 
 66 |     # FN - number of scene objects without good match
 67 |     
 68 |     gt_bboxes = labels[:, :6]      
 69 |     
 70 |     num_scene_bboxes = gt_bboxes.shape[0]
 71 |     conf = pred[:, 6]    
 72 |         
 73 |     conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6]
 74 |     num_conf_pred_bboxes = conf_pred_bbox.shape[0]
 75 |     
 76 |     # init an array to keep iou between generated and scene bboxes
 77 |     iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes])    
 78 |     for g_idx in range(num_conf_pred_bboxes):
 79 |         for s_idx in range(num_scene_bboxes):            
 80 |             iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :])
 81 |     
 82 |     
 83 |     good_match_arr = (iou_arr >= iou_thresh)
 84 |             
 85 |     TP = good_match_arr.any(axis=1).sum()    
 86 |     FP = num_conf_pred_bboxes - TP        
 87 |     FN = num_scene_bboxes - good_match_arr.any(axis=0).sum()
 88 |     
 89 |     return TP, FP, FN
 90 |     
 91 | 
 92 | def precision_recall(TP, FP, FN):
 93 |     Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0
 94 |     Rec = 1.0 * TP / (TP + FN)
 95 |     return Prec, Rec
 96 |     
 97 | 
 98 | def calc_iou(box_a, box_b):
 99 |     """Computes IoU of two axis aligned bboxes.
100 |     Args:
101 |         box_a, box_b: 6D of center and lengths        
102 |     Returns:
103 |         iou
104 |     """        
105 |         
106 |     max_a = box_a[0:3] + box_a[3:6]/2
107 |     max_b = box_b[0:3] + box_b[3:6]/2    
108 |     min_max = np.array([max_a, max_b]).min(0)
109 |         
110 |     min_a = box_a[0:3] - box_a[3:6]/2
111 |     min_b = box_b[0:3] - box_b[3:6]/2
112 |     max_min = np.array([min_a, min_b]).max(0)
113 |     if not ((min_max > max_min).all()):
114 |         return 0.0
115 | 
116 |     intersection = (min_max - max_min).prod()
117 |     vol_a = box_a[3:6].prod()
118 |     vol_b = box_b[3:6].prod()
119 |     union = vol_a + vol_b - intersection
120 |     return 1.0*intersection / union
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     print('running some tests')
125 |     
126 |     ############
127 |     ## Test IoU 
128 |     ############
129 |     box_a = np.array([0,0,0,1,1,1])
130 |     box_b = np.array([0,0,0,2,2,2])
131 |     expected_iou = 1.0/8
132 |     pred_iou = calc_iou(box_a, box_b)
133 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
134 |     
135 |     box_a = np.array([0,0,0,1,1,1])
136 |     box_b = np.array([10,10,10,2,2,2])
137 |     expected_iou = 0.0
138 |     pred_iou = calc_iou(box_a, box_b)
139 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
140 |     
141 |     print('IoU test -- PASSED')
142 |     
143 |     #########################
144 |     ## Test Precition Recall 
145 |     #########################
146 |     gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]])
147 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]])
148 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
149 |     assert TP == 2 and FP == 0 and FN == 0
150 |     assert precision_recall(TP, FP, FN) == (1, 1)
151 |     
152 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0]])
153 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
154 |     assert TP == 1 and FP == 0 and FN == 1
155 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
156 |     
157 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]])
158 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
159 |     assert TP == 1 and FP == 1 and FN == 1
160 |     assert precision_recall(TP, FP, FN) == (0.5, 0.5)
161 |     
162 |     # wrong box has low confidence
163 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]])
164 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
165 |     assert TP == 1 and FP == 0 and FN == 1
166 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
167 |     
168 |     print('Precition Recall test -- PASSED')
169 |     
170 | 


--------------------------------------------------------------------------------
/utils/nms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from utils.pc_utils import bbox_corner_dist_measure
  3 | 
  4 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score)
  5 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
  6 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 
  7 | '''
  8 | def nms_2d(boxes, overlap_threshold):
  9 |     x1 = boxes[:,0]
 10 |     y1 = boxes[:,1]
 11 |     x2 = boxes[:,2]
 12 |     y2 = boxes[:,3]
 13 |     score = boxes[:,4]
 14 |     area = (x2-x1)*(y2-y1)
 15 | 
 16 |     I = np.argsort(score)
 17 |     pick = []
 18 |     while (I.size!=0):
 19 |         last = I.size
 20 |         i = I[-1]
 21 |         pick.append(i)
 22 |         suppress = [last-1]
 23 |         for pos in range(last-1):
 24 |             j = I[pos]
 25 |             xx1 = max(x1[i],x1[j])
 26 |             yy1 = max(y1[i],y1[j])
 27 |             xx2 = min(x2[i],x2[j])
 28 |             yy2 = min(y2[i],y2[j])
 29 |             w = xx2-xx1
 30 |             h = yy2-yy1
 31 |             if (w>0 and h>0):
 32 |                 o = w*h/area[j]
 33 |                 print('Overlap is', o)
 34 |                 if (o>overlap_threshold):
 35 |                     suppress.append(pos)
 36 |         I = np.delete(I,suppress)
 37 |     return pick
 38 | 
 39 | def nms_2d_faster(boxes, overlap_threshold, old_type=False):
 40 |     x1 = boxes[:,0]
 41 |     y1 = boxes[:,1]
 42 |     x2 = boxes[:,2]
 43 |     y2 = boxes[:,3]
 44 |     score = boxes[:,4]
 45 |     area = (x2-x1)*(y2-y1)
 46 | 
 47 |     I = np.argsort(score)
 48 |     pick = []
 49 |     while (I.size!=0):
 50 |         last = I.size
 51 |         i = I[-1]
 52 |         pick.append(i)
 53 | 
 54 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
 55 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
 56 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
 57 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
 58 | 
 59 |         w = np.maximum(0, xx2-xx1)
 60 |         h = np.maximum(0, yy2-yy1)
 61 | 
 62 |         if old_type:
 63 |             o = (w*h)/area[I[:last-1]]
 64 |         else:
 65 |             inter = w*h
 66 |             o = inter / (area[i] + area[I[:last-1]] - inter)
 67 | 
 68 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
 69 | 
 70 |     return pick
 71 | 
 72 | def nms_3d_faster(boxes, overlap_threshold, old_type=False):
 73 |     x1 = boxes[:,0]
 74 |     y1 = boxes[:,1]
 75 |     z1 = boxes[:,2]
 76 |     x2 = boxes[:,3]
 77 |     y2 = boxes[:,4]
 78 |     z2 = boxes[:,5]
 79 |     score = boxes[:,6]
 80 |     area = (x2-x1)*(y2-y1)*(z2-z1)
 81 | 
 82 |     I = np.argsort(score)
 83 |     pick = []
 84 |     while (I.size!=0):
 85 |         last = I.size
 86 |         i = I[-1]
 87 |         pick.append(i)
 88 | 
 89 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
 90 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
 91 |         zz1 = np.maximum(z1[i], z1[I[:last-1]])
 92 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
 93 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
 94 |         zz2 = np.minimum(z2[i], z2[I[:last-1]])
 95 | 
 96 |         l = np.maximum(0, xx2-xx1)
 97 |         w = np.maximum(0, yy2-yy1)
 98 |         h = np.maximum(0, zz2-zz1)
 99 | 
100 |         if old_type:
101 |             o = (l*w*h)/area[I[:last-1]]
102 |         else:
103 |             inter = l*w*h
104 |             o = inter / (area[i] + area[I[:last-1]] - inter)
105 | 
106 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
107 | 
108 |     return pick
109 | 
110 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False):
111 |     x1 = boxes[:,0]
112 |     y1 = boxes[:,1]
113 |     z1 = boxes[:,2]
114 |     x2 = boxes[:,3]
115 |     y2 = boxes[:,4]
116 |     z2 = boxes[:,5]
117 |     score = boxes[:,6]
118 |     cls = boxes[:,7]
119 |     area = (x2-x1)*(y2-y1)*(z2-z1)
120 | 
121 |     I = np.argsort(score)
122 |     pick = []
123 |     while (I.size!=0):
124 |         last = I.size
125 |         i = I[-1]
126 |         pick.append(i)
127 | 
128 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
129 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
130 |         zz1 = np.maximum(z1[i], z1[I[:last-1]])
131 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
132 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
133 |         zz2 = np.minimum(z2[i], z2[I[:last-1]])
134 |         cls1 = cls[i]
135 |         cls2 = cls[I[:last-1]]
136 | 
137 |         l = np.maximum(0, xx2-xx1)
138 |         w = np.maximum(0, yy2-yy1)
139 |         h = np.maximum(0, zz2-zz1)
140 | 
141 |         if old_type:
142 |             o = (l*w*h)/area[I[:last-1]]
143 |         else:
144 |             inter = l*w*h
145 |             o = inter / (area[i] + area[I[:last-1]] - inter)
146 |         o = o * (cls1==cls2)
147 | 
148 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
149 | 
150 |     return pick
151 | 
152 | 
153 | def nms_crnr_dist(boxes, conf, overlap_threshold):
154 |         
155 |     I = np.argsort(conf)
156 |     pick = []
157 |     while (I.size!=0):
158 |         last = I.size
159 |         i = I[-1]
160 |         pick.append(i)        
161 |         
162 |         scores = []
163 |         for ind in I[:-1]:
164 |             scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :]))
165 | 
166 |         I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0])))
167 | 
168 |     return pick
169 | 
170 | if __name__=='__main__':
171 |     a = np.random.random((100,5))
172 |     print(nms_2d(a,0.9))
173 |     print(nms_2d_faster(a,0.9))
174 | 


--------------------------------------------------------------------------------
/utils/nn_distance.py:
--------------------------------------------------------------------------------
 1 | """ 
 2 | Chamfer distance in Pytorch.
 3 | Author: Charles R. Qi
 4 | 
 5 | From: https://github.com/facebookresearch/votenet/blob/master/utils/nn_distance.py
 6 | """
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | import numpy as np
11 | 
12 | 
13 | def huber_loss(error, delta=1.0):
14 |     """
15 |     Args:
16 |         error: Torch tensor (d1,d2,...,dk)
17 |     Returns:
18 |         loss: Torch tensor (d1,d2,...,dk)
19 | 
20 |     x = error = pred - gt or dist(pred,gt)
21 |     0.5 * |x|^2                 if |x|<=d
22 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
23 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
24 |     """
25 |     abs_error = torch.abs(error)
26 |     #quadratic = torch.min(abs_error, torch.FloatTensor([delta]))
27 |     quadratic = torch.clamp(abs_error, max=delta)
28 |     linear = (abs_error - quadratic)
29 |     loss = 0.5 * quadratic**2 + delta * linear
30 |     return loss
31 | 
32 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False):
33 |     """
34 |     Input:
35 |         pc1: (B,N,C) torch tensor
36 |         pc2: (B,M,C) torch tensor
37 |         l1smooth: bool, whether to use l1smooth loss
38 |         delta: scalar, the delta used in l1smooth loss
39 |     Output:
40 |         dist1: (B,N) torch float32 tensor
41 |         idx1: (B,N) torch int64 tensor
42 |         dist2: (B,M) torch float32 tensor
43 |         idx2: (B,M) torch int64 tensor
44 |     """
45 |     N = pc1.shape[1]
46 |     M = pc2.shape[1]
47 |     pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1)
48 |     pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1)
49 |     pc_diff = pc1_expand_tile - pc2_expand_tile
50 |     
51 |     if l1smooth:
52 |         pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M)
53 |     elif l1:
54 |         pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M)
55 |     else:
56 |         pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M)
57 |     dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N)
58 |     dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M)
59 |     return dist1, idx1, dist2, idx2
60 | 
61 | def demo_nn_distance():
62 |     np.random.seed(0)
63 |     pc1arr = np.random.random((1,5,3))
64 |     pc2arr = np.random.random((1,6,3))
65 |     pc1 = torch.from_numpy(pc1arr.astype(np.float32))
66 |     pc2 = torch.from_numpy(pc2arr.astype(np.float32))
67 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2)
68 |     print(dist1)
69 |     print(idx1)
70 |     dist = np.zeros((5,6))
71 |     for i in range(5):
72 |         for j in range(6):
73 |             dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2)
74 |     print(dist)
75 |     print('-'*30)
76 |     print('L1smooth dists:')
77 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True)
78 |     print(dist1)
79 |     print(idx1)
80 |     dist = np.zeros((5,6))
81 |     for i in range(5):
82 |         for j in range(6):
83 |             error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:])
84 |             quad = np.minimum(error, 1.0)
85 |             linear = error - quad
86 |             loss = 0.5*quad**2 + 1.0*linear
87 |             dist[i,j] = np.sum(loss)
88 |     print(dist)
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     demo_nn_distance()
93 | 


--------------------------------------------------------------------------------