├── data ├── .gitkeep ├── scannet │ └── scannetv2_official_split.npz └── augmented_BBs │ ├── README.md │ └── visualize_bbs_data.py ├── utils ├── __init__.py ├── gt2eval.py ├── util.py ├── metric_util.py ├── evaluate_detections.py └── s3dis_util.py ├── teaser.jpeg ├── .gitignore ├── dataprocessing ├── oversegmentation │ ├── scene0776_00_oversegmentation.png │ ├── cpp │ │ ├── CMakeLists.txt │ │ ├── Makefile │ │ ├── segmentator.cpp │ │ ├── tinyply.cpp │ │ └── tinyply.h │ ├── run_segmentator.py │ ├── README.md │ └── visualize_segments.py ├── mix3d_albumentations_aug.yaml ├── prepare_s3dis.py ├── augmentation.py └── s3dis.py ├── env.yml ├── configs ├── s3dis_fold1.txt ├── s3dis_fold2.txt ├── s3dis_fold3.txt ├── s3dis_fold4.txt ├── s3dis_fold5.txt ├── s3dis_fold6.txt ├── scannet.txt ├── scannet_dropout1.txt ├── scannet_dropout10.txt ├── scannet_dropout2.txt ├── scannet_dropout20.txt ├── scannet_dropout5.txt ├── arkitscenes.txt ├── scannet_noisy2.txt ├── scannet_noisy5.txt ├── scannet_noisy1.txt ├── scannet_noisy10.txt └── s3dis_detections_learnedPS_voxsem_fold5.txt ├── docs ├── installation.md ├── arkitscenes.md ├── code_structure.md └── s3dis.md ├── models ├── iou_nms.py ├── resnet.py ├── model.py └── training.py └── README.md /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /teaser.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/teaser.jpeg -------------------------------------------------------------------------------- /data/scannet/scannetv2_official_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/data/scannet/scannetv2_official_split.npz -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea 3 | *.pyc 4 | !.gitkeep 5 | .ipynb_checkpoints 6 | trash 7 | .ipynb_checkpoints 8 | experiments 9 | analysis 10 | visualize -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/scene0776_00_oversegmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/dataprocessing/oversegmentation/scene0776_00_oversegmentation.png -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1 FATAL_ERROR) 2 | set(CMAKE_CXX_STANDARD 11) 3 | project(Segmentator) 4 | set(SOURCES segmentator.cpp tinyply.cpp) 5 | add_executable(segmentator ${SOURCES}) 6 | -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - pytorch 3 | - soumith 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - numpy==1.21.6 8 | - configargparse==1.5.2 9 | - pip==21.0.1 10 | - scipy==1.7.1 11 | - pip: 12 | - open3d==0.13.0 13 | - pyviz3d==0.2.28 14 | - tensorboard==2.0 15 | - albumentations==1.0.3 16 | - plyfile==0.7.4 17 | - protobuf==3.20.0 18 | - pynvml==11.4.1 19 | - quaternion==0.9.9 20 | -------------------------------------------------------------------------------- /utils/gt2eval.py: -------------------------------------------------------------------------------- 1 | import datasets.scannet as scannet 2 | import config_loader as cfg_loader 3 | import os 4 | from glob import glob 5 | 6 | cfg = cfg_loader.get_config(['--config','src/instances_ndf/configs/l1_lr-4_relu.txt']) 7 | 8 | scans = glob('data/scannet/scans/*') 9 | outfolder = os.path.join('data','scannet','gt_instance_data_txt') 10 | os.makedirs(outfolder, exist_ok = True) 11 | 12 | for scan in scans: 13 | raise # method has changes 14 | scene, labels = scannet.process_scene(os.path.basename(scan), cfg) 15 | gt_format = labels['instances'] + 1000 * labels['semantics'] 16 | 17 | with open(os.path.join(outfolder, os.path.basename(scan)) + '.txt', 'w') as f: 18 | for id in gt_format: 19 | f.write('%d\n' % id) 20 | break 21 | -------------------------------------------------------------------------------- /configs/s3dis_fold1.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 1 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/s3dis_fold2.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 2 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/s3dis_fold3.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 3 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/s3dis_fold4.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 4 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/s3dis_fold5.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 5 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/s3dis_fold6.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.5, 0.03, 0.3, 0.6] 14 | 15 | #training settings 16 | batch_size = 4 17 | num_workers = 8 18 | lr = 0.001 19 | loss_weight_bb_bounds = 0.5 20 | loss_weight_bb_scores = 3 21 | mlp_bb_scores_start_epoch = 100 22 | 23 | # augmentations 24 | augmentation 25 | scaling_aug= [1.0, 0.8, 1.2] 26 | rotation_90_aug 27 | 28 | # dataset settings 29 | dataset_name s3dis 30 | point_sampling_rate 0.25 31 | ignore_wall_ceiling_floor 32 | superpoint_algo learned_superpoint 33 | s3dis_split_fold 6 34 | 35 | # BB supervision 36 | bb_supervision -------------------------------------------------------------------------------- /configs/scannet.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | 32 | # augmentations 33 | augmentation 34 | scaling_aug = [1.0, 0.8, 1.2] 35 | flipping_aug = 0.5 36 | rotation_90_aug 37 | apply_hue_aug -------------------------------------------------------------------------------- /dataprocessing/mix3d_albumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | __version__: 0.4.5 2 | transform: 3 | __class_fullname__: albumentations.core.composition.Compose 4 | additional_targets: {} 5 | bbox_params: null 6 | keypoint_params: null 7 | p: 1.0 8 | transforms: 9 | - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast 10 | always_apply: true 11 | brightness_by_max: true 12 | brightness_limit: 13 | - -0.2 14 | - 0.2 15 | contrast_limit: 16 | - -0.2 17 | - 0.2 18 | p: 0.5 19 | - __class_fullname__: albumentations.augmentations.transforms.RGBShift 20 | always_apply: true 21 | b_shift_limit: 22 | - -20 23 | - 20 24 | g_shift_limit: 25 | - -20 26 | - 20 27 | p: 0.5 28 | r_shift_limit: 29 | - -20 30 | - 20 31 | -------------------------------------------------------------------------------- /configs/scannet_dropout1.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | dropout_boxes = 0.01 33 | 34 | # augmentations 35 | augmentation 36 | scaling_aug = [1.0, 0.8, 1.2] 37 | flipping_aug = 0.5 38 | rotation_90_aug 39 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_dropout10.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | dropout_boxes = 0.10 33 | 34 | # augmentations 35 | augmentation 36 | scaling_aug = [1.0, 0.8, 1.2] 37 | flipping_aug = 0.5 38 | rotation_90_aug 39 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_dropout2.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | dropout_boxes = 0.02 33 | 34 | # augmentations 35 | augmentation 36 | scaling_aug = [1.0, 0.8, 1.2] 37 | flipping_aug = 0.5 38 | rotation_90_aug 39 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_dropout20.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | dropout_boxes = 0.20 33 | 34 | # augmentations 35 | augmentation 36 | scaling_aug = [1.0, 0.8, 1.2] 37 | flipping_aug = 0.5 38 | rotation_90_aug 39 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_dropout5.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | dropout_boxes = 0.05 33 | 34 | # augmentations 35 | augmentation 36 | scaling_aug = [1.0, 0.8, 1.2] 37 | flipping_aug = 0.5 38 | rotation_90_aug 39 | apply_hue_aug -------------------------------------------------------------------------------- /configs/arkitscenes.txt: -------------------------------------------------------------------------------- 1 | 2 | # experiment 3 | exp_name = cfg_name 4 | 5 | # data 6 | use_normals_input 7 | bb_supervision 8 | data_dir = ./data/ARKitScenes/ 9 | dataset_name = arkitscenes 10 | 11 | # model 12 | do_segment_pooling 13 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 14 | 15 | point_association = False 16 | 17 | # eval 18 | eval_ths = [0.5, 0.05, 0.4, 0.6] 19 | eval_every = 10000000000 20 | ckpt_every = 2 21 | 22 | #training settings 23 | batch_size = 4 24 | voxel_size = 0.04 25 | subsample_rate = 2 26 | lr = 0.001 27 | loss_weight_bb_bounds = 0.5 28 | loss_weight_bb_scores = 3 29 | loss_weight_semantics = 0.3 30 | mlp_bb_scores_start_epoch = 100 31 | 32 | # augmentations - no elastic distortion for now 33 | augmentation 34 | rotation_aug=[1.0, 0.0, 0.9] 35 | scaling_aug = [1.0, 0.8, 1.2] 36 | # flipping_aug = 0.5 37 | # position_jittering = [0.2, 0.005] 38 | # flipping_aug = 0.5 39 | # rotation_90_aug -------------------------------------------------------------------------------- /configs/scannet_noisy2.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 33 | # hence, each dimension is affected by noise with sigma=4cm here 34 | noisy_boxes = 0.02 35 | majority_vote 36 | 37 | # augmentations 38 | augmentation 39 | scaling_aug = [1.0, 0.8, 1.2] 40 | flipping_aug = 0.5 41 | rotation_90_aug 42 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_noisy5.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 33 | # hence, each dimension is affected by noise with sigma=10cm here 34 | noisy_boxes = 0.05 35 | majority_vote 36 | 37 | # augmentations 38 | augmentation 39 | scaling_aug = [1.0, 0.8, 1.2] 40 | flipping_aug = 0.5 41 | rotation_90_aug 42 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_noisy1.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 33 | # hence, each dimension is affected by noise with sigma=2cm here 34 | noisy_boxes = 0.01 35 | majority_vote 36 | 37 | # augmentations 38 | augmentation 39 | scaling_aug = [1.0, 0.8, 1.2] 40 | flipping_aug = 0.5 41 | rotation_90_aug 42 | apply_hue_aug -------------------------------------------------------------------------------- /configs/scannet_noisy10.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | 4 | # data 5 | align 6 | use_normals_input 7 | bb_supervision 8 | smallest_bb_heuristic 9 | 10 | # model 11 | do_segment_pooling 12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics] 13 | 14 | # eval - those are temporarily 15 | eval_ths = [0.5, 0.05, 0.3, 0.6] 16 | 17 | #training settings 18 | batch_size = 8 19 | lr = 0.001 20 | loss_weight_bb_bounds = 0.5 21 | loss_weight_bb_scores = 1 22 | loss_weight_semantics = 1 23 | mlp_bb_scores_start_epoch = 100 24 | ckpt_every = 20 25 | eval_every = 20 26 | val_every = 5 27 | ## LR scheduler 28 | use_lr_scheduler 29 | lr_scheduler_start_epoch = 650 30 | lr_scheduler_end_epoch = 1650 31 | ## robustness 32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 33 | # hence, each dimension is affected by noise with sigma=20cm here 34 | noisy_boxes = 0.10 35 | majority_vote 36 | 37 | # augmentations 38 | augmentation 39 | scaling_aug = [1.0, 0.8, 1.2] 40 | flipping_aug = 0.5 41 | rotation_90_aug 42 | apply_hue_aug -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/run_segmentator.py: -------------------------------------------------------------------------------- 1 | """Generates the segmentations of 3d scanes given as .ply using 'segmentator' (in the cpp dir). 2 | """ 3 | 4 | import subprocess 5 | import os 6 | 7 | from absl import app 8 | from absl import flags 9 | 10 | FLAGS = flags.FLAGS 11 | flags.DEFINE_string('scene_path', '../../data/scannet/scans_test/', help="Path to the .ply scenes.") 12 | flags.DEFINE_string('segments_path', '../../data/scannet/scans_test_segmented', help='Path to the generated segments.') 13 | flags.DEFINE_string('segmentator_path', 'cpp/segmentator', help='Path to the segmentator executable.') 14 | 15 | 16 | def segment_scene(scene_name): 17 | scene_path = os.path.join(FLAGS.scene_path, f'{scene_name}/{scene_name}_vh_clean_2.ply') 18 | command = [FLAGS.segmentator_path, scene_path, '0.01', '20', FLAGS.segments_path] 19 | subprocess.call(command) 20 | 21 | def main(_): 22 | if not os.path.exists(FLAGS.segments_path): 23 | os.makedirs(FLAGS.segments_path) 24 | scene_names = [file.split('.')[0] for file in os.listdir(FLAGS.scene_path)] 25 | for scene_name in scene_names: 26 | segment_scene(scene_name) 27 | 28 | 29 | if __name__ == '__main__': 30 | app.run(main) 31 | -------------------------------------------------------------------------------- /configs/s3dis_detections_learnedPS_voxsem_fold5.txt: -------------------------------------------------------------------------------- 1 | # experiment 2 | exp_name = cfg_name 3 | data_dir = ./data/s3dis/ 4 | 5 | # input settings 6 | use_normals_input 7 | 8 | # model 9 | do_segment_pooling 10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics] 11 | 12 | # eval 13 | eval_ths = [0.3, 0.03, 0.2, 0.6] 14 | #checkpoint = checkpoint_134h:41m:14s_484874.59536361694 15 | # checkpoint = checkpoint_206h:12m:53s_742373.8897235394 #0.673 16 | # checkpoint = checkpoint_192h:58m:47s_694727.7309098244 #0.683 17 | # checkpoint = checkpoint_190h:9m:7s_684547.2124330997 # 0.687 18 | # checkpoint = checkpoint_189h:14m:11s_681251.3121433258 # 0.689 19 | # checkpoint = checkpoint_191h:6m:14s_687974.8505253792 #0.676 20 | # checkpoint = checkpoint_186h:29m:40s_671380.9550452232 # 0.667 21 | # checkpoint = checkpoint_188h:20m:22s_678022.2635447979 22 | # checkpoint = checkpoint_192h:58m:47s_694727.7309098244 23 | checkpoint = checkpoint_195h:35m:19s_704119.6752953529 24 | # 0.693 25 | 26 | #training settings 27 | batch_size = 4 28 | num_workers = 8 29 | lr = 0.001 30 | loss_weight_bb_bounds = 0.5 31 | loss_weight_bb_scores = 3 32 | mlp_bb_scores_start_epoch = 100 33 | 34 | 35 | # augmentations 36 | augmentation 37 | scaling_aug= [1.0, 0.8, 1.2] 38 | rotation_aug=1.0 39 | 40 | # dataset settings 41 | dataset_name s3dis 42 | s3dis_split_fold 5 43 | point_sampling_rate 0.25 44 | ignore_wall_ceiling_floor 45 | superpoint_algo learned_superpoint 46 | 47 | load_unused_head -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/README.md: -------------------------------------------------------------------------------- 1 | Mesh Segmentation 2 | ================= 3 | 4 | Adapted from the original from the [ScaNet Github](https://github.com/ScanNet/ScanNet/tree/master/Segmentator). 5 | 6 | Note that the segments for the validation scenes are already available the ``*.segs.json`` files. 7 | 8 | # 1. Compile the segmentator 9 | 10 | Mesh segmentation code using Felzenswalb and Huttenlocher's [*Graph Based Image Segmentation*](https://cs.brown.edu/~pff/segment/index.html) algorithm on computed mesh normals. 11 | 12 | To compile the segmentator code, navigate to the segmentor directory: 13 | ``` 14 | cd box2mask/dataprocessing/oversegmentation/cpp 15 | ``` 16 | Set `CMAKE_SOURCE_DIR={path_to_project_director}/dataprocessing/oversegmentation/cpp` in `Makefile` where `{path_to_project_director}` is the path to the project home directory. 17 | 18 | Build by running `make` (or create makefiles for your system using `cmake`). This will create a `segmentator` binary that can be called to generate segmentation: 19 | 20 | To see if it works, try: 21 | 22 | `./segmentator input.ply [kThresh=0.01] [segMinVerts=20]` 23 | 24 | For example: 25 | 26 | `./segmentator ../../../data/scannet/scans/scene0011_00/scene0011_00_vh_clean_2.ply 0.01 20` 27 | 28 | ### Arguments 29 | 1. path to an input mesh in PLY format. 30 | 2. the segmentation cluster threshold parameter, larger values lead to larger segments. (optional) 31 | 3. the minimum number of vertices per-segment, enforced by merging small clusters into larger segments. (optional) 32 | 33 | # 2. Generate the segments 34 | 35 | `run_segments.py` 36 | 37 | # 3. Visualize the segments 38 | 39 | `visualize_segments.py` 40 | 41 | ![Segmentation scene_0776_00_oversegmentation](scene0776_00_oversegmentation.png "Oversegmentation") 42 | -------------------------------------------------------------------------------- /data/augmented_BBs/README.md: -------------------------------------------------------------------------------- 1 | ## Reproduce Augmented BBs Experiments 2 | 3 | The following instruction is for reproducing the experiments in Fig. 7 in our paper. We use seeds to generate the same set of augmented data in every runs during training. The config files of these experiments are in `box2mask/configs/`. Name of each config is either `scannet_dropout[percentage]` (`percentage` is the boxes of boxes that are missing) or `scannet_noisy[sigma]` (`sigma` is the variance of the noise applied to each dimension). 4 | 5 | 6 | Similar to the main experiment, you can train the model using the augmented bounding boxes like the example bellow: 7 | 8 | ``` 9 | python models/training.py --config configs/scannet_noisy1.txt 10 | ``` 11 | 12 | To evaluate with the validation set: 13 | 14 | ``` 15 | python models/evaluation.py --config configs/scannet_noisy1.txt 16 | ``` 17 | 18 | ## Augmented Data 19 | 20 | We also store our augmented BBs as npy files. The following script will download and extract the data to `data/augmented_BBs/scannet_augmented_boxes_data/` 21 | ``` 22 | cd data/augmented_BBs/ 23 | wget https://datasets.d2.mpi-inf.mpg.de/box2mask/scannet_augmented_boxes_data.tar.gz 24 | tar -xvf scannet_augmented_boxes_data.tar.gz 25 | ``` 26 | 27 | The files are organized as follow: 28 | 29 | ```shell 30 | 31 | |-- .npy 32 | ``` 33 | 34 | where `` is `dropout[percentage]` (missing bounding box labels data, `percentage` can be 1, 2, 5 or 10) or `noisy[sigma]` (noisy label data, `sigma` can be 2, 4, 10 or 20). 35 | Each .npy file contains list of min corners and max corners of the bounding boxes as well as the semantic ids. 36 | 37 | We provide script to visualize the bounding box for a scene in the data. The command bellow will produce an interactive visualization server in `data/augmented_BBs/visualize/`. 38 | 39 | ``` 40 | cd data/augmented_BBs/ 41 | python visualize_bbs_data.py --data noisy1 --scene_name scene0293_00 --data_path data/augmented_BBs/scannet_augmented_boxes_data/ 42 | ``` 43 | Use the command bellow to start the visualization server: 44 | 45 | ``` 46 | cd data/augmented_BBs/visualize/ 47 | python -m http.server 6008 48 | ``` 49 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | 2 | ## Installation of Minkowski Engine 3 | 4 | We use [Minkowski Engine](https://github.com/NVIDIA/MinkowskiEngine) for sparse convolution of point cloud in our project. 5 | 6 | `MinkowskiEngine==0.5.4` with `cudatoolkit=10.2` was used for the project. 7 | 8 | First we creat a new environment 9 | ``` 10 | conda create -n box2mask python=3.7 11 | conda activate box2mask 12 | ``` 13 | 14 | Setup the CUDA system environment variables like the example below: 15 | ``` 16 | cuda_version=10.2 17 | # please set the right path to CUDA in your system, bellow is an example used for our system 18 | export CUDA_HOME=/usr/lib/cuda-${cuda_version}/ 19 | export PATH=/usr/lib/cuda-${cuda_version}/bin/:${PATH} 20 | export LD_LIBRARY_PATH=/usr/lib/cuda-${cuda_version}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} 21 | export CUDA_PATH=/usr/lib/cuda-${cuda_version}/ 22 | ``` 23 | 24 | Next, we install pytorch with cudatoolkit and dependencies 25 | ``` 26 | conda install pytorch=1.8.1 torchvision cudatoolkit=${cuda_version} -c pytorch -c nvidia 27 | ``` 28 | 29 | Install dependencies for Minkowski Engine 30 | ``` 31 | pip install torch ninja 32 | conda install openblas-devel -c anaconda 33 | ``` 34 | 35 | We then install gcc version 7 36 | `sudo apt install g++-7` # For CUDA 10.2, must use GCC <= 8 37 | > Make sure `g++-7 --version` is at least 7.4.0 38 | > export CXX=g++-7 39 | 40 | Install Minkowski Engine via pip: 41 | ``` 42 | pip install -U MinkowskiEngine==0.5.4 --install-option="--blas=openblas" -v --no-deps 43 | ``` 44 | 45 | For more detailed installation instruction, see [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine). 46 | ## Checking installations of Minkowski Engine 47 | 48 | The following commands will clone the repository of Minkowski Engine and run an example segmentation model on an indoor point cloud: 49 | ``` 50 | git clone https://github.com/NVIDIA/MinkowskiEngine.git 51 | cd MinkowskiEngine 52 | # code requires open3d 53 | pip install open3d 54 | python -m examples.indoor 55 | ``` 56 | 57 | ## Install GIT repository and other dependencies 58 | The following commands will clone Box2Mask repo on your machine and install the remaining dependencies. Note that you should still be using `box2mask` environemnt 59 | ``` 60 | git clone -b release https://github.com/jchibane/Box2Mask.git box2mask 61 | cd box2mask 62 | conda env update --file env.yml 63 | ``` 64 | 65 | -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/visualize_segments.py: -------------------------------------------------------------------------------- 1 | """Visualizes instance segmentations (from scannet format).""" 2 | 3 | import os 4 | import json 5 | import random 6 | import pyviz3d.visualizer as viz 7 | import open3d as o3d 8 | import numpy as np 9 | 10 | from absl import app 11 | from absl import flags 12 | 13 | FLAGS = flags.FLAGS 14 | flags.DEFINE_string('path_scenes', '../../data/scannet/scans_test/', help='Path to scene .ply') 15 | flags.DEFINE_string('path_segments', '../../data/scannet/scans_test_segmented/', help='Path to scene .seg.json') 16 | flags.DEFINE_string('path_viewer', '../../viewer/', 'Path to the visualizations.') 17 | 18 | 19 | def visualize_scene(scene_name): 20 | """Propagates the actual per-point predictions to the segments.""" 21 | 22 | path_ply = os.path.join(FLAGS.path_scenes, f'{scene_name}/{scene_name}_vh_clean_2.ply') 23 | path_segs_json = os.path.join(FLAGS.path_segments, f'{scene_name}_vh_clean_2.0.010000.segs.json') 24 | path_viewer = os.path.join(FLAGS.path_viewer, scene_name) 25 | 26 | # Read ply 27 | mesh = o3d.io.read_triangle_mesh(path_ply) 28 | mesh.compute_vertex_normals() 29 | mesh.normalize_normals() 30 | vertices_positions = np.asarray(mesh.vertices) 31 | vertices_positions -= np.mean(vertices_positions, axis=0) 32 | vertices_normals = np.asarray(mesh.vertex_normals) 33 | vertices_colors = np.asarray(mesh.vertex_colors) 34 | 35 | # Read segments from json 36 | with open(path_segs_json) as f: 37 | data = json.load(f) 38 | segment_indices_list = data["segIndices"] 39 | segment_indices_int_array = np.asarray(segment_indices_list, dtype='int32') 40 | 41 | # Create segment colors 42 | segment_colors = np.ones_like(vertices_positions) 43 | for segment_id in set(segment_indices_list): 44 | mask = segment_id == segment_indices_int_array # point ids of segment 45 | segment_colors[mask] = np.array([random.random()*255, random.random()*255, random.random()*255]) 46 | 47 | v = viz.Visualizer() 48 | v.add_points(scene_name+'_color', vertices_positions, vertices_colors*255, vertices_normals, point_size=25) 49 | v.add_points(scene_name+'_segments', vertices_positions, segment_colors, vertices_normals, point_size=25) 50 | v.save(path_viewer, verbose=True) 51 | 52 | 53 | def main(_): 54 | scene_names = sorted([s.split('.')[0] for s in os.listdir(f'{FLAGS.path_scenes}')]) 55 | for scene_name in scene_names: 56 | visualize_scene(scene_name) 57 | 58 | 59 | if __name__ == '__main__': 60 | app.run(main) 61 | -------------------------------------------------------------------------------- /docs/arkitscenes.md: -------------------------------------------------------------------------------- 1 | The following instruction is for reproducing the experiments in Table. 2 in our paper. 2 | 3 | Follow the original ARKitScenes [instruction](https://github.com/apple/ARKitScenes/blob/main/DATA.md) to download the data (3dod dataset). 4 | The oversegmentation for ARKitScenes can be download here: [train](https://datasets.d2.mpi-inf.mpg.de/box2mask/segmented_train_clean.tar.gz) and [valid](https://datasets.d2.mpi-inf.mpg.de/box2mask/segmented_val_clean.tar.gz). 5 | After you download the data and our prepared oversegmentations. The `Training` and `Validation` and oversegmentation folders should be prepared as the following structure for our project: 6 | 7 | ``` 8 | box2mask/data/ARKitScenes/3dod/ 9 | └── Training 10 | ├── 44358604 # scene name 11 | ├── 44358604_3dod_annotation.json # segmentation label of the scene 12 | ├── 44358604_3dod_mesh.ply # mesh file 13 | ├── 44358604_frames/ # Containing RGBD camera sequences 14 | ├── 45662912 15 | ├── 45662912_3dod_annotation.json 16 | ├── 45662912_3dod_mesh.ply 17 | ├── 45662912_frames/ 18 | ... 19 | └── Validation/ 20 | ├── 41069021 21 | ├── 41069021_3dod_annotation.json 22 | ├── 41069021_3dod_mesh.ply 23 | ├── 41069021_frames/ 24 | ├── 25 | ... 26 | └── segmented_train_clean/ 27 | ├── 47331587_3dod_mesh.0.010000.segs.json 28 | ├── 44358604_3dod_mesh.0.010000.segs.json 29 | ... 30 | └── segmented_val_clean/ 31 | ├── 41069021_3dod_mesh.0.010000.segs.json 32 | ... 33 | ``` 34 | 35 | Similar to the main experiment, you can train the model using `training.py` from the root folder:: 36 | 37 | ```python 38 | python models/training.py --config configs/arkitscenes.txt 39 | ``` 40 | 41 | To evaluate with the validation set (producing results like Table 2): 42 | 43 | ```python 44 | python models/evaluation.py --config configs/arkitscenes.txt 45 | ``` 46 | 47 | You can also produce visualization by adding option `--produce_visualizations`. Producing result for a specific scene can be achived via `model/evaluation.py` with `--predict_specific_scene` option, see the example below: 48 | 49 | ```python 50 | python models/evaluation.py --config configs/arkitscenes.txt --predict_specific_scene 42445429 --produce_visualizations 51 | ``` 52 | 53 | Running the command above will produce the visualization of segmentation result in `experiments/arkitscenes/results/[checkpoint]/viz/42445429` where `checkpoint` is the loaded checkpoint when running the script. -------------------------------------------------------------------------------- /data/augmented_BBs/visualize_bbs_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | 4 | import open3d as o3d 5 | from dataprocessing.scannet import scannet_color_map, SEMANTIC_VALID_CLASS_IDS, SEMANTIC_VALID_CLASS_IDS_torch 6 | import config_loader as cfg_loader 7 | import pyviz3d.visualizer as viz 8 | from dataprocessing import scannet 9 | import os 10 | import numpy as np 11 | from utils.util import get_bbs_lines 12 | import configargparse 13 | 14 | # Argument lists 15 | parser = configargparse.ArgumentParser() 16 | parser.add_argument ("--data", type=str, default='noisy1', 17 | help='Data can be one of noisy1, dropout10, etc') 18 | parser.add_argument ("--scene_name", type=str, default='scene0293_00', 19 | help='Scene to be processed') 20 | parser.add_argument ("--data_path", type=str, default='/BS/atran2/work/tmp/for_webpage/scannet_boxes_data/' 21 | help='Path to the augmentated boxes data') 22 | cfg = parser.parse_args(args) 23 | 24 | 25 | # Get the rgb point cloud, original labels of the scene 26 | scene, labels = scannet.process_scene(scene_name, 'train', cfg, do_augmentations=False) 27 | 28 | # Specify a data set (eg. noisy1, dropout10) 29 | data_name=cfg.data 30 | scene_name = cfg.scene_name 31 | 32 | # Load the instance BB of the scene 33 | boxes_data_path = os.path.join ("", data_name) 34 | box_info_pth = os.path.join (cfg.data_path, scene_name + '.npy') 35 | boxes = np.load (box_info_pth, allow_pickle=True).item () 36 | 37 | v = viz.Visualizer() 38 | 39 | v.add_points ("Scene RGB", scene["positions"], scene['colors'] * 255, point_size=25, visible=False) 40 | 41 | min_corners = boxes["min_corner"] # List of min corners of instances 42 | max_corners = boxes["max_corner"] # List of max corners of instances 43 | semantic_ids = boxes ["semantic_id"] # the list containing semantic id of each box 44 | 45 | # Visualize each instance 46 | for instance_id in range(len(semantic_ids)): 47 | min_corner = min_corners[instance_id][None] # shape 1x3 48 | max_corner = max_corners[instance_id][None] # shape 1x3 49 | semantic_id = semantic_ids[instance_id] 50 | 51 | # Get the 12 edges of the box 52 | bb_centers = (max_corner + min_corner) / 2 53 | bb_bounds = max_corner - bb_centers 54 | start, end = get_bbs_lines(bb_centers, bb_bounds) 55 | semantic_color = scannet.scannet_color_map [semantic_id] 56 | semantic_name = scannet.scannet_class_names [semantic_id] 57 | lines_color = np.stack ([semantic_color for _ in range (12)]) 58 | 59 | # Draw the box using piviz 60 | v.add_lines(semantic_name+';instance_id='+str(instance_id), start, end, lines_color, visible=False) 61 | 62 | visualize_path = os.path.join ("data/augmented_BBs/visualize/", data_name) 63 | os.makedirs(visualize_path, exist_ok=True) 64 | v.save(os.path.join(visualize_path, scene_name)) -------------------------------------------------------------------------------- /docs/code_structure.md: -------------------------------------------------------------------------------- 1 | # Code structure 2 | **configs/**, Includes the config files to run models 3 | 4 | **data/**, Storing datasets (eg. data/scannet/ or data/ARKitScenes) 5 | 6 | **config_loader.py**, Defines all hyper-parameters of the model 7 | 8 | 9 | **dataprocessing** 10 | - **dataprocessing/augmentation.py**, Defines augmentation code 11 | - **dataprocessing/scannet.py**, Reads on train/test/val scenes of scannet 12 | - **dataprocessing/arkitscenes.py**, Reads on train/val scenes of Arkitscenes 13 | - **dataprocessing/s3dis.py**, Reads on train/val scenes of S3DIS 14 | 15 | 16 | **models** 17 | - **models/dataloader.py**, Reads and preprocesses data and prepare tensor batches 18 | - **class ScanNet**, Reads and preprocesses Scannet scenes 19 | - **approx_association()**, Finds the associations of points using GT bounding boxes 20 | - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels 21 | - **class ARKitScenes**, Reads and preprocess ArkitScenes scenes 22 | - **approx_association()**, Finds the associations of points using GT bounding boxes 23 | - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels 24 | - **class S3DIS**, Reads and preprocess S3dis scenes 25 | - **approx_association()**, Finds the associations of points using GT bounding boxes 26 | - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels 27 | - **collate_fn**, Collates preprocessed scenes into tensor batches 28 | 29 | - **models/detection_net.py**, Defines the network 30 | - **class SelectionNet**, Define the main network and network heads 31 | - **detection2mask()**, Converts box proposals into final instance mask 32 | - **get_prediction()**, Gets prediction from the network heads 33 | - **models/evaluation.py**, Evaluates Scannet and ArkitScenes predictions. Can be run with: `python models/evaluation.py --config configs/[config_name].txt` 34 | - **arkitscenes_eval()**, Approximates oriented bounding boxes from instance predictions and computes detection quality using the AP score 35 | - **scannet_eval()**, Computes Scannet prediction scores in terms of AP, AP50 and AP25 36 | - **models/iou_nms.py**, Defines the Non-Maximum Clustering clustering 37 | - **NMS_clustering()**, Non-Maximum Clustering algorithm (as in Sec.3 and Sec. 4 in the main paper) 38 | - **models/resnet.py**, Some utilities for making the U-Net model 39 | - **models/training.py**, Defines the training code, can be run witch: `python models/training.py --config configs/[config_name].txt` 40 | - **models/model.py**, Defines and computes the losses for each epoch 41 | - **compute_loss_detection()**, Compute each loss and the weighted joint losses for the network optimization 42 | 43 | **utils/**, Contains some low-level utilities -------------------------------------------------------------------------------- /docs/s3dis.md: -------------------------------------------------------------------------------- 1 | The following instruction is for reproducing our results of the S3DIS data. 2 | 3 | First download the S3DIS from the official [page](http://buildingparser.stanford.edu/dataset.html). 4 | Our preprocessed normals and oversegmentations for the S3DIS scenes can be downloaded here: [oversegmentation](https://datasets.d2.mpi-inf.mpg.de/box2mask/segment_labels.tar.gz) and [normals](https://datasets.d2.mpi-inf.mpg.de/box2mask/normals.tar.gz). 5 | Unzip the S3DIS data and the `normals` to `box2mask/data/Stanford3dDataset_v1.2_Aligned_Version/`. The structure of the unzipped data is as follows: 6 | 7 | ``` 8 | box2mask/data/Stanford3dDataset_v1.2_Aligned_Version/ 9 | └── Area_1/ # Containing point cloud, segmentation information, normals, colors informations 10 | ├── hallway_1/ 11 | ├── Annotations/ # Contains instances information 12 | ├── door_2.txt 13 | ├── floor_1.txt 14 | ├── wall_2.txt 15 | ... 16 | ├── hallway_1.txt # Contains positions and colors of scene points 17 | ├── office_11/ 18 | ... 19 | ├── office_12/ 20 | ... 21 | ... 22 | └── Area_2/ 23 | ... 24 | └── Area_3/ 25 | ... 26 | └── Area_4/ 27 | ... 28 | └── Area_5/ 29 | ... 30 | └── Area_6/ 31 | ... 32 | └── normals/ 33 | ├── Area_4.office_7.npy 34 | ├── Area_5.office_36.npy 35 | ├── Area_1.office_25.npy 36 | ... 37 | ... 38 | ``` 39 | 40 | Run the following script to prepare the S3DIS dataset 41 | 42 | ```bash 43 | mkdir -p ./data/s3dis/ 44 | python dataprocessing/prepare_s3dis.py --data_dir ./data/Stanford3dDataset_v1.2_Aligned_Version/ 45 | ``` 46 | 47 | Uncompress the `segment_labels.tar.gz` file to `box2mask/data/s3dis/` 48 | 49 | The preprocessed data and oversegmentation folders should be prepared as the following structure for our project: 50 | 51 | ``` 52 | box2mask/data/s3dis/ 53 | └── Area_1/ # Containing point cloud, segmentation information, normals, colors informations 54 | ├── hallway_1.normals.instance.npy 55 | ├── office_11.normals.instance.npy 56 | ├── office_12.normals.instance.npy 57 | ... 58 | └── Area_2/ 59 | ├── office_1.normals.instance.npy 60 | ├── office_2.normals.instance.npy 61 | ├── office_3.normals.instance.npy 62 | ... 63 | └── Area_6/ 64 | ├── conferenceRoom_1.normals.instance.npy 65 | ├── copyRoom_1.normals.instance.npy 66 | ├── office_3.normals.instance.npy 67 | ... 68 | └── segment_labels/ # Containing the segmentation files of all scenes 69 | ├──learned_superpoin_graph_segmentations/ 70 | ├── Area_4.office_7.npy 71 | ├── Area_5.office_36.npy 72 | ├── Area_1.office_25.npy 73 | ... 74 | ``` 75 | 76 | Here each `.normals.instance.npy` contains the point cloud, segmentations, colors and normals information. The information can be each extracted using the following script (note: instance labels is only used to get axis aligned bounding box information of each instance): 77 | 78 | ```python 79 | data = np.load ('box2mask/data/s3dis/Area_1/hallway_1.normals.instance.npy') 80 | 81 | positions = data [:,:3].astype (np.float32) # XYZ positions (N x 3) 82 | colors = data [:,3:6].astype (np.float) / 255 # Point colors (N x 3) 83 | normals = data [:,6:9].astype (np.float) # Surface normals (N x 3) 84 | semantics = data [:, -2].astype (np.int32) # Semantic labels of points (N x 1) 85 | instances = data [:, -1].astype (np.int32) # Instance labels of points (N x 1) 86 | ``` 87 | 88 | You can train the model using `training.py` from the root folder. Each config file is of format s3dis_fold\[area_number\] which area_number indicate the area to be used as validation set and other areas to be used as training set. For example, to have area 5 as the validation set and other areas for training: 89 | 90 | ```bash 91 | python models/training.py --config configs/s3dis_fold5.txt 92 | ``` 93 | 94 | To evaluate with the validation, run the following commands (producing the validation score as in Table 1 with Area 5): 95 | 96 | ```bash 97 | python models/evaluation.py --config configs/s3dis_fold5.txt 98 | ``` 99 | 100 | You can also produce visualization by adding option `--produce_visualizations`. To choose a specific scene to process, provide a scene name with the option `--predict_specific_scene`. Each scene has the name in the following format `Area_[area_number].[room_name]` where `[area_number]` is a number from 1 to 6 and `[room_name]` the name of the room in the area. Producing result for a specific scene can be achived via `model/evaluation.py` with `--predict_specific_scene` option, see the example below: 101 | 102 | ```bash 103 | python models/evaluation.py --config configs/s3dis_fold5.txt --predict_specific_scene Area_5.office_7 --produce_visualizations 104 | ``` 105 | 106 | Running the command above will produce the visualization of segmentation result in `experiments/s3dis_fold5/results/[checkpoint]/viz/Area_5.office_7` where `checkpoint` is the loaded checkpoint when running the script. -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import copy 4 | 5 | def get_bb_lines(bb_center, bb_bounds): 6 | start_list = [] 7 | end_list = [] 8 | bb_min = bb_center - bb_bounds 9 | bb_max = bb_center + bb_bounds 10 | length = bb_max - bb_min 11 | for i in range(3): 12 | start_list.append(bb_min) 13 | end = bb_min.copy() 14 | end[i] = bb_max[i] 15 | end_list.append(end) 16 | for j in range(3): 17 | if i == j: 18 | continue 19 | start_list.append(end) 20 | second_end = end.copy() 21 | second_end[j] += length[j] 22 | end_list.append(second_end) 23 | for i in range(3): 24 | start_list.append(bb_max) 25 | end = bb_max.copy() 26 | end[i] = bb_min[i] 27 | end_list.append(end) 28 | return np.array(start_list), np.array(end_list) 29 | 30 | def get_bbs_lines(bbs_centers, bbs_bounds): 31 | if type(bbs_centers) is torch.Tensor or type(bbs_bounds) is torch.Tensor: 32 | bbs_centers = bbs_centers.numpy() 33 | bbs_bounds = bbs_bounds.numpy() 34 | start_list = [] 35 | end_list = [] 36 | for i in range(len(bbs_centers)): 37 | # for i in range(len(bb_centers)): 38 | start, end = get_bb_lines(bbs_centers[i], bbs_bounds[i]) 39 | start_list.append(start) 40 | end_list.append(end) 41 | start = np.concatenate(start_list, 0) 42 | end = np.concatenate(end_list, 0) 43 | return start, end 44 | 45 | # out: bb [min corner ,max corner, score] 46 | def to_bbs_min_max(locations, offsets, bounds, scores=None, use_torch=True): 47 | if use_torch: 48 | centers = offsets + locations 49 | if offsets.is_cuda: 50 | bbs = torch.cuda.FloatTensor(centers.shape[0], 6).fill_(0) 51 | else: 52 | bbs = torch.zeros((centers.shape[0], 6)) 53 | bbs[:, :3] = centers - bounds 54 | bbs[:, 3:] = centers + bounds 55 | if scores is not None: 56 | bbs = torch.cat((scores, bbs), axis=1) 57 | else: 58 | centers = offsets + locations 59 | bbs = np.zeros((centers.shape[0], 6)) 60 | bbs[:, :3] = centers - bounds 61 | bbs[:, 3:] = centers + bounds 62 | if scores is not None: 63 | bbs = np.concatenate((scores, bbs), axis=1) 64 | return bbs 65 | 66 | def to_bbs_min_max_(centers, bounds, device): 67 | bounding_boxes = torch.zeros((bounds.shape[0], 6), device=device) 68 | bounding_boxes[:, :3] = centers - bounds 69 | bounding_boxes[:, 3:] = centers + bounds 70 | return bounding_boxes 71 | 72 | # go from min_corner, max_corner representation to center, bounds representation 73 | def to_bb_center_a_bounds(bbs_min_max): 74 | bb_centers = (bbs_min_max[:,3:] + bbs_min_max[:,:3]) / 2 75 | bb_bounds = bbs_min_max[:,3:] - bb_centers 76 | return bb_centers, bb_bounds 77 | 78 | def get_all_bb_corners(bb_centers,bb_bounds): 79 | # powerset of all dimensions 80 | neg_dims = [(), (0,), (1,), (2,), (0, 1), (0, 2), (1, 2), (0, 1, 2)] 81 | corner_displacements = bb_bounds.expand(len(neg_dims),-1,-1) # (8,num_predictions on all scenes,3) 82 | for i, neg_dim in enumerate(neg_dims): 83 | corner_displacements[i,:,neg_dim] *= -1 84 | eight_cornered_bbs = bb_centers + corner_displacements # (8,num_predictions on all scenes,3) 85 | return eight_cornered_bbs 86 | 87 | # works on torch tensors 88 | def is_within_bb(points, bb_min, bb_max): 89 | return torch.all( points >= bb_min, axis=-1) & torch.all( points <= bb_max, axis=-1) 90 | # numpy version 91 | def is_within_bb_np(points, bb_min, bb_max): 92 | return np.all( points >= bb_min, axis=-1) & np.all( points <= bb_max, axis=-1) 93 | 94 | def convertSecs(sec): 95 | seconds = int(sec % 60) 96 | minutes = int((sec / 60) % 60) 97 | hours = int((sec / (60 * 60))) 98 | return hours, minutes, seconds 99 | 100 | import random 101 | from collections import defaultdict 102 | 103 | colors = defaultdict(lambda: [random.random() * 255, random.random() * 255, random.random() * 255]) 104 | colors[0] = [0,0,0] 105 | colors[-2] = [255,0,0] 106 | def to_color(arr): 107 | return np.array([colors[e] for e in arr]) 108 | 109 | def scalar2colors(arr): 110 | colors = np.zeros((len(arr),3)) 111 | colors[:,1] = arr 112 | colors *= 255 113 | return colors 114 | 115 | def to_worldcoords(vox_coords,scene, cfg): 116 | return (vox_coords * cfg.voxel_size + min(0, np.min(scene["positions"]))).numpy() 117 | 118 | # ----------------- map segment ids to dense ranking starting at index 0 (needed by ME global pool function) 119 | # Segment ids can be duplicates: map segment ids to unique ones. 120 | # This means, that every segment in each batch, needs to have a unique batch_id, in order to be pooled 121 | # separately. 122 | 123 | def to_unique( segments): # enumeration_ids, when we have id arrays, like [0,1,2,..,n] 124 | unique_segments = copy.deepcopy(segments) 125 | # make sure all segments across scenes have unique ids 126 | for i in range(1, len(unique_segments)): 127 | unique_segments[i] += np.max(unique_segments[i - 1]) + 1 128 | unique_segments = np.concatenate(unique_segments, 0) 129 | _, pooling_ids = np.unique(unique_segments, return_inverse=True) 130 | return torch.from_numpy(pooling_ids).long() 131 | 132 | 133 | # Epoch counts from 0 to N-1 134 | from math import cos, pi 135 | def cosine_lr_after_step(optimizer, base_lr, epoch, start_epoch, total_epochs, clip=1e-6): 136 | if epoch < start_epoch: 137 | lr = base_lr 138 | else: 139 | lr = clip + 0.5 * (base_lr - clip) * \ 140 | (1 + cos(pi * ( (epoch - start_epoch) / (total_epochs - start_epoch)))) 141 | 142 | for param_group in optimizer.param_groups: 143 | param_group['lr'] = lr -------------------------------------------------------------------------------- /utils/metric_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Utility functions for metric evaluation. 7 | 8 | Author: Or Litany and Charles R. Qi 9 | """ 10 | 11 | import torch 12 | import numpy as np 13 | 14 | 15 | # ---------------------------------------- 16 | # Precision and Recall 17 | # ---------------------------------------- 18 | 19 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None): 20 | ''' 21 | Args: 22 | labels: (B, N, 6) 23 | pred: (B, M, 6) 24 | iou_thresh: scalar 25 | conf_thresh: scalar 26 | label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider. 27 | pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider. 28 | Returns: 29 | TP,FP,FN,Precision,Recall 30 | ''' 31 | # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead 32 | # of True/False array as in numpy 33 | assert (not torch.is_tensor(label_mask)) 34 | assert (not torch.is_tensor(pred_mask)) 35 | TP, FP, FN = 0, 0, 0 36 | if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1])) 37 | if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1])) 38 | for batch_idx in range(labels.shape[0]): 39 | TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx, :] == 1, :], 40 | pred[batch_idx, pred_mask[batch_idx, :] == 1, :], 41 | iou_thresh, conf_thresh) 42 | TP += TP_i 43 | FP += FP_i 44 | FN += FN_i 45 | 46 | return TP, FP, FN, precision_recall(TP, FP, FN) 47 | 48 | 49 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh): 50 | """Compute P and R for predicted bounding boxes. Ignores classes! 51 | Args: 52 | labels: (N x bbox) ground-truth bounding boxes (6 dims) 53 | pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification 54 | Returns: 55 | TP, FP, FN 56 | """ 57 | 58 | # for each pred box with high conf (C), compute IoU with all gt boxes. 59 | # TP = number of times IoU > th ; FP = C - TP 60 | # FN - number of scene objects without good match 61 | 62 | gt_bboxes = labels[:, :6] 63 | 64 | num_scene_bboxes = gt_bboxes.shape[0] 65 | conf = pred[:, 6] 66 | 67 | conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6] 68 | num_conf_pred_bboxes = conf_pred_bbox.shape[0] 69 | 70 | # init an array to keep iou between generated and scene bboxes 71 | iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes]) 72 | for g_idx in range(num_conf_pred_bboxes): 73 | for s_idx in range(num_scene_bboxes): 74 | iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx, :], gt_bboxes[s_idx, :]) 75 | 76 | good_match_arr = (iou_arr >= iou_thresh) 77 | 78 | TP = good_match_arr.any(axis=1).sum() 79 | FP = num_conf_pred_bboxes - TP 80 | FN = num_scene_bboxes - good_match_arr.any(axis=0).sum() 81 | 82 | return TP, FP, FN 83 | 84 | 85 | def precision_recall(TP, FP, FN): 86 | Prec = 1.0 * TP / (TP + FP) if TP + FP > 0 else 0 87 | Rec = 1.0 * TP / (TP + FN) 88 | return Prec, Rec 89 | 90 | 91 | def calc_iou(box_a, box_b): 92 | """Computes IoU of two axis aligned bboxes. 93 | Args: 94 | box_a, box_b: 6D of center and lengths 95 | Returns: 96 | iou 97 | """ 98 | 99 | max_a = box_a[0:3] + box_a[3:6] / 2 100 | max_b = box_b[0:3] + box_b[3:6] / 2 101 | min_max = np.array([max_a, max_b]).min(0) 102 | 103 | min_a = box_a[0:3] - box_a[3:6] / 2 104 | min_b = box_b[0:3] - box_b[3:6] / 2 105 | max_min = np.array([min_a, min_b]).max(0) 106 | if not ((min_max > max_min).all()): 107 | return 0.0 108 | 109 | intersection = (min_max - max_min).prod() 110 | vol_a = box_a[3:6].prod() 111 | vol_b = box_b[3:6].prod() 112 | union = vol_a + vol_b - intersection 113 | return 1.0 * intersection / union 114 | 115 | 116 | if __name__ == '__main__': 117 | print('running some tests') 118 | 119 | ############ 120 | ## Test IoU 121 | ############ 122 | box_a = np.array([0, 0, 0, 1, 1, 1]) 123 | box_b = np.array([0, 0, 0, 2, 2, 2]) 124 | expected_iou = 1.0 / 8 125 | pred_iou = calc_iou(box_a, box_b) 126 | assert expected_iou == pred_iou, 'function returned wrong IoU' 127 | 128 | box_a = np.array([0, 0, 0, 1, 1, 1]) 129 | box_b = np.array([10, 10, 10, 2, 2, 2]) 130 | expected_iou = 0.0 131 | pred_iou = calc_iou(box_a, box_b) 132 | assert expected_iou == pred_iou, 'function returned wrong IoU' 133 | 134 | print('IoU test -- PASSED') 135 | 136 | ######################### 137 | ## Test Precition Recall 138 | ######################### 139 | gt_boxes = np.array([[0, 0, 0, 1, 1, 1], [3, 0, 1, 1, 10, 1]]) 140 | detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [3, 0, 1, 1, 10, 1, 0.9]]) 141 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 142 | assert TP == 2 and FP == 0 and FN == 0 143 | assert precision_recall(TP, FP, FN) == (1, 1) 144 | 145 | detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0]]) 146 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 147 | assert TP == 1 and FP == 0 and FN == 1 148 | assert precision_recall(TP, FP, FN) == (1, 0.5) 149 | 150 | detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [-1, -1, 0, 0.1, 0.1, 1, 1.0]]) 151 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 152 | assert TP == 1 and FP == 1 and FN == 1 153 | assert precision_recall(TP, FP, FN) == (0.5, 0.5) 154 | 155 | # wrong box has low confidence 156 | detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [-1, -1, 0, 0.1, 0.1, 1, 0.1]]) 157 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 158 | assert TP == 1 and FP == 0 and FN == 1 159 | assert precision_recall(TP, FP, FN) == (1, 0.5) 160 | 161 | print('Precition Recall test -- PASSED') 162 | -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/cpp/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.13 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | 7 | .PHONY : default_target 8 | 9 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 10 | .NOTPARALLEL: 11 | 12 | 13 | #============================================================================= 14 | # Special targets provided by cmake. 15 | 16 | # Disable implicit rules so canonical targets will work. 17 | .SUFFIXES: 18 | 19 | 20 | # Remove some rules from gmake that .SUFFIXES does not remove. 21 | SUFFIXES = 22 | 23 | .SUFFIXES: .hpux_make_needs_suffix_list 24 | 25 | 26 | # Suppress display of executed commands. 27 | $(VERBOSE).SILENT: 28 | 29 | 30 | # A target that is always out of date. 31 | cmake_force: 32 | 33 | .PHONY : cmake_force 34 | 35 | #============================================================================= 36 | # Set environment variables for the build. 37 | 38 | # The shell in which to execute make rules. 39 | SHELL = /bin/sh 40 | 41 | # The CMake executable. 42 | CMAKE_COMMAND = /usr/bin/cmake 43 | 44 | # The command to remove a file. 45 | RM = /usr/bin/cmake -E remove -f 46 | 47 | # Escaping for special characters. 48 | EQUALS = = 49 | 50 | # The top-level source directory on which CMake was run. 51 | CMAKE_SOURCE_DIR = /home/atran/atran/for_webpage/for_webpage2/dataprocessing/oversegmentation/cpp 52 | 53 | # The top-level build directory on which CMake was run. 54 | CMAKE_BINARY_DIR = $(CMAKE_SOURCE_DIR) 55 | 56 | #============================================================================= 57 | # Targets provided globally by CMake. 58 | 59 | # Special rule for the target rebuild_cache 60 | rebuild_cache: 61 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 62 | /usr/bin/cmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 63 | .PHONY : rebuild_cache 64 | 65 | # Special rule for the target rebuild_cache 66 | rebuild_cache/fast: rebuild_cache 67 | 68 | .PHONY : rebuild_cache/fast 69 | 70 | # Special rule for the target edit_cache 71 | edit_cache: 72 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." 73 | /usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 74 | .PHONY : edit_cache 75 | 76 | # Special rule for the target edit_cache 77 | edit_cache/fast: edit_cache 78 | 79 | .PHONY : edit_cache/fast 80 | 81 | # The main all target 82 | all: cmake_check_build_system 83 | $(CMAKE_COMMAND) -E cmake_progress_start $(CMAKE_SOURCE_DIR)/cpp/CMakeFiles $(CMAKE_SOURCE_DIR)/CMakeFiles/progress.marks 84 | $(MAKE) -f CMakeFiles/Makefile2 all 85 | $(CMAKE_COMMAND) -E cmake_progress_start $(CMAKE_SOURCE_DIR)/CMakeFiles 0 86 | .PHONY : all 87 | 88 | # The main clean target 89 | clean: 90 | $(MAKE) -f CMakeFiles/Makefile2 clean 91 | .PHONY : clean 92 | 93 | # The main clean target 94 | clean/fast: clean 95 | 96 | .PHONY : clean/fast 97 | 98 | # Prepare targets for installation. 99 | preinstall: all 100 | $(MAKE) -f CMakeFiles/Makefile2 preinstall 101 | .PHONY : preinstall 102 | 103 | # Prepare targets for installation. 104 | preinstall/fast: 105 | $(MAKE) -f CMakeFiles/Makefile2 preinstall 106 | .PHONY : preinstall/fast 107 | 108 | # clear depends 109 | depend: 110 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 111 | .PHONY : depend 112 | 113 | #============================================================================= 114 | # Target rules for targets named segmentator 115 | 116 | # Build rule for target. 117 | segmentator: cmake_check_build_system 118 | $(MAKE) -f CMakeFiles/Makefile2 segmentator 119 | .PHONY : segmentator 120 | 121 | # fast build rule for target. 122 | segmentator/fast: 123 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/build 124 | .PHONY : segmentator/fast 125 | 126 | segmentator.o: segmentator.cpp.o 127 | 128 | .PHONY : segmentator.o 129 | 130 | # target to build an object file 131 | segmentator.cpp.o: 132 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.o 133 | .PHONY : segmentator.cpp.o 134 | 135 | segmentator.i: segmentator.cpp.i 136 | 137 | .PHONY : segmentator.i 138 | 139 | # target to preprocess a source file 140 | segmentator.cpp.i: 141 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.i 142 | .PHONY : segmentator.cpp.i 143 | 144 | segmentator.s: segmentator.cpp.s 145 | 146 | .PHONY : segmentator.s 147 | 148 | # target to generate assembly for a file 149 | segmentator.cpp.s: 150 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.s 151 | .PHONY : segmentator.cpp.s 152 | 153 | tinyply.o: tinyply.cpp.o 154 | 155 | .PHONY : tinyply.o 156 | 157 | # target to build an object file 158 | tinyply.cpp.o: 159 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.o 160 | .PHONY : tinyply.cpp.o 161 | 162 | tinyply.i: tinyply.cpp.i 163 | 164 | .PHONY : tinyply.i 165 | 166 | # target to preprocess a source file 167 | tinyply.cpp.i: 168 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.i 169 | .PHONY : tinyply.cpp.i 170 | 171 | tinyply.s: tinyply.cpp.s 172 | 173 | .PHONY : tinyply.s 174 | 175 | # target to generate assembly for a file 176 | tinyply.cpp.s: 177 | $(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.s 178 | .PHONY : tinyply.cpp.s 179 | 180 | # Help Target 181 | help: 182 | @echo "The following are some of the valid targets for this Makefile:" 183 | @echo "... all (the default if no target is provided)" 184 | @echo "... clean" 185 | @echo "... depend" 186 | @echo "... rebuild_cache" 187 | @echo "... segmentator" 188 | @echo "... edit_cache" 189 | @echo "... segmentator.o" 190 | @echo "... segmentator.i" 191 | @echo "... segmentator.s" 192 | @echo "... tinyply.o" 193 | @echo "... tinyply.i" 194 | @echo "... tinyply.s" 195 | .PHONY : help 196 | 197 | 198 | 199 | #============================================================================= 200 | # Special targets to cleanup operation of make. 201 | 202 | # Special rule to run CMake to check the build system integrity. 203 | # No rule that depends on this can have commands that come from listfiles 204 | # because they might be regenerated. 205 | cmake_check_build_system: 206 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 207 | .PHONY : cmake_check_build_system 208 | 209 | -------------------------------------------------------------------------------- /dataprocessing/prepare_s3dis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | 4 | import numpy as np 5 | import skimage.io as io 6 | import open3d as o3d 7 | import pyviz3d.visualizer as viz 8 | import os 9 | import glob 10 | from sklearn.neighbors import NearestNeighbors 11 | from scipy.spatial import KDTree 12 | import natsort 13 | 14 | import configargparse 15 | 16 | parser = configargparse.ArgumentParser() 17 | parser.add_argument("--scene_id", type=int, default=None, 18 | help="Input the index of a scene to process. Default is None - process all scene") 19 | 20 | parser.add_argument("--data_dir", type=str, default='./data/Stanford3dDataset_v1.2_Aligned_Version/', 21 | help="Path to the original data") 22 | 23 | S3DIS_SEMANTICS_COLORS = np.array ( 24 | [(174, 199, 232), # ceiling 25 | (152, 223, 138), # floor 26 | (31, 119, 180), # wall 27 | (255, 187, 120), # column 28 | (188, 189, 34), # beam 29 | (140, 86, 75), # window 30 | (255, 152, 150), # door 31 | (214, 39, 40), # table 32 | (197, 176, 213), # chair 33 | (148, 103, 189), # bookcase 34 | (196, 156, 148), # sofa 35 | (23, 190, 207), # board 36 | (178, 76, 76),] # clutter 37 | ) 38 | 39 | INS_COLORS = np.array ([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] for _ in range (1000)]) 40 | 41 | def visualize (scene_name, pts, colors, normals, instances, semantics): 42 | viz_pth = 'visualize_scene_npy/' + scene_name 43 | os.makedirs (viz_pth, exist_ok=True) 44 | v = viz.Visualizer() 45 | sample_rate = 4 46 | 47 | normals_start = pts [::sample_rate*3] 48 | normals = normals[::sample_rate*3] 49 | pts = pts [::sample_rate] 50 | colors = colors [::sample_rate] 51 | instances = instances [::sample_rate] 52 | semantics = semantics [::sample_rate] 53 | 54 | instances_colors = INS_COLORS [instances] 55 | semantics_colors = S3DIS_SEMANTICS_COLORS [semantics] 56 | 57 | v.add_points ("points", pts , colors, point_size=15, visible=True) 58 | v.add_points ("instances", pts , instances_colors, point_size=15, visible=True) 59 | v.add_points ("semantics", pts , semantics_colors, point_size=15, visible=True) 60 | norm_colors = np.array ([[0,255,0] for _ in range (len (normals))]) 61 | v.add_lines ("normals", normals_start, normals_start + normals / 15, norm_colors, visible=False) 62 | v.save(viz_pth, verbose=False) 63 | 64 | 65 | ID2NAME = {0:'ceiling', 1:'floor', 2:'wall', 3:'beam', 4:'column', 5:'window', 6:'door', 7:'table', 8:'chair', 9:'sofa', 10:'bookcase', 11:'board', 12:'clutter'} 66 | ID2NAME = [ID2NAME [i] for i in range (13)] 67 | NAME2ID = {} 68 | for i in range (13): 69 | NAME2ID [ID2NAME [i]] = i 70 | 71 | def get_labels (scene_name, scene_data, data_dir): 72 | area = scene_name.split ('.') [0] 73 | name = scene_name.split ('.') [1] 74 | instance_pths = glob.glob (data_dir + '/' + area + '/' + name + '/Annotations/*.txt') 75 | 76 | scene_pts = scene_data [:,:3] # Scene point cloud 77 | pt_tree = KDTree (scene_pts) 78 | 79 | error = 0 80 | instances = np.zeros ((len (scene_data), 1), dtype=np.int32) - 1 81 | semantics = np.zeros ((len (scene_data), 1), dtype=np.float32) - 1 82 | 83 | # Use nearest neighbor to find corresponding point indexes in the scenes PC of instances 84 | for instance_id, pth in enumerate (instance_pths): 85 | class_name = pth.split ('/')[-1].split('_')[0] 86 | if not (class_name in NAME2ID.keys ()): 87 | if class_name == 'stairs': 88 | class_name = 'clutter' 89 | semantic_id = NAME2ID [class_name] 90 | # Load instance point cloud 91 | instance_data = np.loadtxt (pth) 92 | instance_pts = instance_data [:, :3] 93 | instance_colors = instance_data [:, 3:] 94 | # Find corresponding indices in the scene points 95 | dist, pt_indexs = pt_tree.query(instance_pts, k=1) 96 | instances [pt_indexs] = instance_id 97 | semantics [pt_indexs] = semantic_id 98 | error += dist.sum () 99 | 100 | decided = (instances >= 0)[:, 0] 101 | 102 | # For some points are not annotated, use the label from nearby points 103 | pt_tree = KDTree (scene_pts [decided]) 104 | dist, decided_indexs = pt_tree.query(scene_pts [~decided], k=1) 105 | 106 | instances [~decided] = instances [decided][decided_indexs] 107 | semantics [~decided] = semantics [decided][decided_indexs] 108 | 109 | assert (instances.min ()) >= 0 110 | assert (semantics.min ()) >= 0 111 | 112 | # Avoiding duplicate instances -> instance ids are contiguous from 0 113 | remap_id = np.array (range (instances.max () + 1)) 114 | for new_id, old_id in enumerate (np.unique (instances)): 115 | remap_id [old_id] = new_id 116 | instances = remap_id [instances].astype (np.float32) 117 | unique_instances = np.unique (instances) 118 | 119 | assert np.all(unique_instances == range(len(unique_instances))) 120 | 121 | return instances, semantics 122 | 123 | def read_scene_txt (name, data_dir): 124 | area = name.split ('.') [0] 125 | name = name.split ('.') [1] 126 | 127 | pts = np.loadtxt (os.path.join (data_dir + '/' + area, name, name + '.txt')) 128 | return pts 129 | 130 | def preprocess_s3dis (data_dir, scene_id): 131 | scene_list = [] 132 | for i in range (1, 7): 133 | area = data_dir + '/Area_' + str (i) 134 | tmp = glob.glob (area + '/*') 135 | for scene_name in tmp: 136 | scene_name = scene_name.split ('/')[-2] + '.' + scene_name.split ('/')[-1] 137 | scene_list.append (scene_name) 138 | 139 | scene_list = natsort.natsorted (scene_list) 140 | 141 | if scene_id is not None: 142 | scene_list = scene_list [scene_id:scene_id+1] 143 | 144 | for scene_name in scene_list: 145 | area = scene_name.split ('.') [0] 146 | name = scene_name.split ('.') [1] 147 | save_dir = 'data/s3dis/' + area + '/' 148 | scene_pth = os.path.join (save_dir, name + '.normals.instance.npy') 149 | 150 | os.makedirs (save_dir, exist_ok=True) 151 | 152 | scene_data = read_scene_txt (scene_name, data_dir) 153 | instances, semantics = get_labels (scene_name, scene_data, data_dir) 154 | normals = np.load (data_dir + '/normals/' + scene_name + '.npy') 155 | data = np.concatenate ([scene_data, normals, semantics, instances], 1) 156 | 157 | pts = data [:,:3].astype (np.float32) 158 | colors = data [:,3:6].astype (np.float32) 159 | normals = data [:,6:9].astype (np.float32) 160 | semantics = data [:, -2].astype (np.int32) 161 | instances = data [:, -1].astype (np.int32) 162 | 163 | # visualize (scene_name, pts - pts.mean (0), colors, normals, instances, semantics) 164 | np.save (scene_pth, data) 165 | print ("saved ", scene_pth) 166 | 167 | cfg = parser.parse_args() 168 | preprocess_s3dis (cfg.data_dir, cfg.scene_id) -------------------------------------------------------------------------------- /models/iou_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def set_IOUs(boxes_a, boxes): 5 | # assert boxes are defined as: (min_corner, max_corner) 6 | assert boxes_a.shape[1] == 6 and boxes.shape[1] == 6 7 | boxes_a_side_lengths = boxes_a[:, 3:] - boxes_a[:, :3] 8 | boxes_side_lengths = boxes[:, 3:] - boxes[:, :3] 9 | assert torch.all(boxes_a_side_lengths >= 0) and torch.all(boxes_side_lengths >= 0) 10 | 11 | intersection_min = torch.maximum(boxes_a[:, :3], boxes[:, :3]) 12 | intersection_max = torch.minimum(boxes_a[:, 3:], boxes[:, 3:]) 13 | 14 | # no overlap produces negative values, and is cutoff by 0 15 | intersection_side_lengths = torch.clamp( intersection_max - intersection_min, min=0) 16 | intersection_area = torch.prod(intersection_side_lengths, axis=1) 17 | 18 | boxes_a_area = torch.prod(boxes_a_side_lengths, axis=1) 19 | boxes_area = torch.prod(boxes_side_lengths, axis=1) 20 | 21 | union_area = boxes_a_area + boxes_area - intersection_area + 0.000001 22 | return intersection_area / union_area 23 | 24 | 25 | # axis-aligned bounding boxes only 26 | def torch_IOUs(box, boxes): 27 | # assert boxes are defined as: (min_corner, max_corner) 28 | assert box.shape[0] == 6 and boxes.shape[1] == 6 29 | 30 | box_side_lengths = box[3:] - box[:3] 31 | boxes_side_lengths = boxes[:, 3:] - boxes[:, :3] 32 | assert torch.all(box_side_lengths >= 0) and torch.all(boxes_side_lengths >= 0) 33 | 34 | intersection_min = torch.maximum(box[:3], boxes[:, :3]) 35 | intersection_max = torch.minimum(box[3:], boxes[:, 3:]) 36 | 37 | # no overlap produces negative values, and is cutoff by 0 38 | intersection_side_lengths = torch.clamp( intersection_max - intersection_min, min=0) 39 | intersection_area = torch.prod(intersection_side_lengths, axis=1) 40 | 41 | box_area = torch.prod(box_side_lengths) 42 | boxes_area = torch.prod(boxes_side_lengths, axis=1) 43 | 44 | union_area = box_area + boxes_area - intersection_area + 0.000001 45 | return intersection_area / union_area 46 | 47 | 48 | def np_NMS_clustering(boxes, cluster_th=0.5): 49 | # boxes should be a list of 3D boxes [box_score, min_corner,max_corner], higher scores for better boxes 50 | assert boxes.shape[1] == 7 and len(boxes.shape) == 2 51 | assert cluster_th > 0 and cluster_th < 1 52 | remaining_boxes_indices = np.argsort(-boxes[:, 0]) 53 | clusters = [] 54 | 55 | while len(remaining_boxes_indices) > 0: 56 | remaining_boxes = boxes[remaining_boxes_indices] 57 | # remove score component 58 | remaining_boxes = remaining_boxes[:, 1:] 59 | ious = IOUs(remaining_boxes[0], remaining_boxes) 60 | iou_mask = ious <= cluster_th 61 | 62 | clusters.append([remaining_boxes_indices[0], remaining_boxes_indices[~iou_mask]]) 63 | remaining_boxes_indices = remaining_boxes_indices[iou_mask] 64 | 65 | return clusters 66 | 67 | 68 | def NMS_clustering(boxes, cluster_th=0.5, get_heatmaps=True): 69 | # boxes should be a list of 3D boxes [box_score, min_corner,max_corner], higher scores for better boxes 70 | assert boxes.shape[1] == 7 and len(boxes.shape) == 2 71 | assert cluster_th > 0 and cluster_th < 1 72 | # boxes should have positive side lengths - otherwise they don't have an area and are invalid 73 | boxes_side_lengths = boxes[:, 4:] - boxes[:, 1:4] 74 | valid = torch.min(boxes_side_lengths, axis=1)[0] > 0 # (num_boxes) 75 | if ~ torch.all(valid): 76 | print('Warning: Invalid boxes found.') 77 | 78 | remaining_boxes_indices = torch.argsort(-boxes[:, 0]) 79 | # remove score component 80 | boxes = boxes[:, 1:] 81 | cluster_representant = [] 82 | clusters = [] 83 | cluster_heatmaps = [] 84 | while len(remaining_boxes_indices) > 0: 85 | #print(len(remaining_boxes_indices)) 86 | remaining_boxes = boxes[remaining_boxes_indices] 87 | if get_heatmaps: 88 | cluster_heatmap = torch_IOUs(remaining_boxes[0], boxes) 89 | # manually set iou to 1, even for invalid boxes (side_lengths <=0) 90 | cluster_heatmap[remaining_boxes_indices[0]] = 1 91 | cluster_heatmaps.append(cluster_heatmap) 92 | ious = cluster_heatmap[remaining_boxes_indices] 93 | else: 94 | ious = torch_IOUs(remaining_boxes[0], remaining_boxes) 95 | # manually set iou to 1, even for invalid boxes (side_lengths <=0) 96 | ious[0] = 1 97 | iou_mask = ious <= cluster_th 98 | cluster_representant.append(remaining_boxes_indices[0]) 99 | clusters.append(remaining_boxes_indices[~iou_mask]) 100 | remaining_boxes_indices = remaining_boxes_indices[iou_mask] 101 | 102 | if get_heatmaps: 103 | return torch.Tensor(cluster_representant).long(), clusters, torch.stack(cluster_heatmaps,0) 104 | else: 105 | return torch.Tensor(cluster_representant).long(), clusters 106 | 107 | 108 | # input masks: bool (true inside, false outside), shape: (num_masks, num_mask_elements) 109 | def masks_iou(mask, masks, allow_empty = False): 110 | # empty masks are invalid 111 | if not allow_empty: 112 | assert torch.all(torch.sum(masks, axis=1) > 0) and torch.sum(mask) > 0 113 | intersection = torch.sum(mask & masks, axis=1) 114 | union = torch.sum(mask | masks, axis=1) 115 | return intersection / union 116 | else: 117 | intersection = torch.sum(mask & masks, axis=1) 118 | union = torch.sum(mask | masks, axis=1) 119 | ret = torch.zeros_like(union).float() 120 | ret[union > 0] = intersection[union > 0] / union[union > 0] 121 | return ret 122 | 123 | def mask_iou_np(mask, mask_b): 124 | # empty masks are invalid 125 | assert np.sum(mask_b) > 0 and np.sum(mask) > 0 126 | intersection = np.sum(mask & mask_b) 127 | union = np.sum(mask | mask_b) 128 | return intersection / union 129 | 130 | def mask_NMS(sorted_masks, cluster_th=0.5, allow_empty = False): 131 | remaining_masks_indices = torch.arange(len(sorted_masks)) 132 | output_masks = [] 133 | suppressed = [] 134 | while len(remaining_masks_indices) > 0: 135 | remaining_masks = sorted_masks[remaining_masks_indices] 136 | ious = masks_iou(remaining_masks[0], remaining_masks, allow_empty) 137 | ious[0] = 1 138 | iou_mask = ious <= cluster_th 139 | 140 | output_masks.append(remaining_masks_indices[0]) 141 | suppressed.append((remaining_masks_indices[0], remaining_masks_indices[~iou_mask])) 142 | remaining_masks_indices = remaining_masks_indices[iou_mask] 143 | 144 | return torch.hstack(output_masks), suppressed 145 | 146 | def semIOU(pred_label, gt_label): 147 | IOU = [] 148 | # ignore invalid and unlabeled regions 149 | valid = gt_label > -100 150 | gt_label = gt_label[valid] 151 | pred_label = pred_label[valid] 152 | scene_labels = torch.unique(torch.cat((gt_label,pred_label))) 153 | for l in scene_labels: 154 | intersection = torch.sum((pred_label == l) & (gt_label == l)) 155 | union = torch.sum((pred_label == l) | (gt_label == l)) 156 | IOU.append((intersection / (union + 1e-6)).item()) 157 | return np.array(IOU) 158 | -------------------------------------------------------------------------------- /dataprocessing/augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | import random 4 | import scipy 5 | import matplotlib 6 | import albumentations as A 7 | 8 | SCANNET_ELASTIC_DISTORT_PARAMS = ((0.2, 0.4), (0.8, 1.6)) 9 | 10 | # mix 3d color augmentation and normalization 11 | mix3d_albumentations_aug = A.load('dataprocessing/mix3d_albumentations_aug.yaml', data_format="yaml") 12 | color_mean = (0.47793125906962, 0.4303257521323044, 0.3749598901421883) 13 | color_std = (0.2834475483823543, 0.27566157565723015, 0.27018971370874995) 14 | # input colors should be in 0,..,255 because 15 | # Normalize method applies: img = (img - mean * max_pixel_value) / (std * max_pixel_value) 16 | color_norm = A.Normalize(mean=color_mean, std=color_std) 17 | 18 | # HUE aug 19 | hue_aug = A.Compose([ 20 | A.HueSaturationValue(hue_shift_limit=50, sat_shift_limit=60, val_shift_limit=50, p=1), 21 | ], p=1) 22 | 23 | def rotate_mesh (mesh, max_xy_angle=np.pi / 100, individual_prob = 1): 24 | """ Randomly rotate the point clouds around z-axis (max 360 degree), x-axis and y-axis (max max_xy_angle degree) 25 | """ 26 | random_z_angle = 0 27 | random_x_angle = 0 28 | random_y_angle = 0 29 | if random.random() < individual_prob: 30 | random_z_angle = np.random.uniform (0, 2*np.pi) 31 | if random.random() < individual_prob: 32 | random_x_angle = np.random.uniform (-max_xy_angle, max_xy_angle) 33 | if random.random() < individual_prob: 34 | random_y_angle = np.random.uniform (-max_xy_angle, max_xy_angle) 35 | mesh.rotate(mesh.get_rotation_matrix_from_xyz((random_x_angle,random_y_angle,random_z_angle))) 36 | 37 | 38 | def rotate_mesh_90_degree(mesh): 39 | """ Randomly rotate the point clouds around z-axis (random angle in 0,90,180,270 degree) 40 | """ 41 | random_z_angle = [0, 0.5* np.pi, np.pi, 1.5 * np.pi][np.random.randint(0,4)] 42 | random_x_angle = 0 43 | random_y_angle = 0 44 | mesh.rotate(mesh.get_rotation_matrix_from_xyz((random_x_angle, random_y_angle, random_z_angle))) 45 | 46 | def scale_mesh (mesh, min_scale=0.9, max_scale=1.1): 47 | """ Randomly scale the point cloud with a random scale value between min and max 48 | """ 49 | scale = np.random.uniform (min_scale, max_scale) 50 | mesh.scale(scale, center=(0, 0, 0)) 51 | 52 | def color_jittering (colors, min=-0.05, max=0.05): 53 | """ Randomly jitter color 54 | Input: 55 | Nx3 array, original point colors 56 | Return: 57 | Nx3 array, jittered point colors 58 | """ 59 | jitters = np.random.uniform (min, max, colors.shape) 60 | jittered_colors = np.clip(jitters + colors, 0, 1) 61 | return jittered_colors 62 | 63 | def random_brightness (colors, brightness_limit=0.2): 64 | brighness_aug = A.RandomBrightnessContrast(p=1.0, brightness_limit=brightness_limit, contrast_limit=0.0, always_apply=True) 65 | colors = brighness_aug (image=colors.astype (np.float32)) ["image"] 66 | return colors 67 | 68 | def elastic_distortion( coords, granularity, magnitude): 69 | """Apply elastic distortion on sparse coordinate space. 70 | pointcloud: numpy array of (number of points, at least 3 spatial dims) 71 | granularity: size of the noise grid (in same scale[m/cm] as the voxel grid) 72 | magnitude: noise multiplier 73 | """ 74 | blurx = np.ones((3, 1, 1, 1)).astype('float32') / 3 75 | blury = np.ones((1, 3, 1, 1)).astype('float32') / 3 76 | blurz = np.ones((1, 1, 3, 1)).astype('float32') / 3 77 | coords_min = coords.min(0) 78 | 79 | # Create Gaussian noise tensor of the size given by granularity. 80 | noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3 81 | noise = np.random.randn(*noise_dim, 3).astype(np.float32) 82 | 83 | # Smoothing. 84 | for _ in range(2): 85 | noise = scipy.ndimage.filters.convolve(noise, blurx, mode='constant', cval=0) 86 | noise = scipy.ndimage.filters.convolve(noise, blury, mode='constant', cval=0) 87 | noise = scipy.ndimage.filters.convolve(noise, blurz, mode='constant', cval=0) 88 | 89 | # Trilinear interpolate noise filters for each spatial dimensions. 90 | ax = [ 91 | np.linspace(d_min, d_max, d) 92 | for d_min, d_max, d in zip(coords_min - granularity, coords_min + granularity * (noise_dim - 2), noise_dim) 93 | ] 94 | interp = scipy.interpolate.RegularGridInterpolator(ax, noise, bounds_error=0, fill_value=0) 95 | coords += interp(coords) * magnitude 96 | return coords 97 | 98 | 99 | class ChromaticTranslation(object): 100 | """Add random color to the image, input must be an array in [0,1] or a PIL image""" 101 | 102 | def __init__(self, trans_range_ratio=0.1): 103 | """ 104 | trans_range_ratio: ratio of translation i.e. 1.0 * 2 * ratio * rand(-0.5, 0.5) 105 | """ 106 | self.trans_range_ratio = trans_range_ratio 107 | 108 | def __call__(self, feats): 109 | if random.random() < 0.95: 110 | tr = (np.random.rand(1, 3) - 0.5) * 1.0 * 2 * self.trans_range_ratio 111 | feats[:, :3] = np.clip(tr + feats[:, :3], 0, 1) 112 | return feats 113 | 114 | class RandomBrightness (object): 115 | """Randomly modify the brightness of the image""" 116 | def __init__ (self, factor_range=0.2): 117 | self.factor_range = factor_range 118 | 119 | def __call__ (self, feats): 120 | hsv = matplotlib.colors.rgb_to_hsv (feats) 121 | factor_range = self.factor_range 122 | factor = np.random.uniform (1 - factor_range, 1 + factor_range) 123 | hsv [:,2] *= factor 124 | hsv = np.clip (hsv, 0, 1) 125 | rgb = matplotlib.colors.hsv_to_rgb (feats) 126 | return rgb 127 | 128 | class ChromaticAutoContrast(object): 129 | 130 | def __init__(self, randomize_blend_factor=True, blend_factor=0.5): 131 | self.randomize_blend_factor = randomize_blend_factor 132 | self.blend_factor = blend_factor 133 | 134 | def __call__(self, feats): 135 | if random.random() < 1.0: 136 | lo = feats[:, :3].min(0, keepdims=True) 137 | hi = feats[:, :3].max(0, keepdims=True) 138 | assert hi.max() <= 1, f"invalid color value. Color is supposed to be [0-1]" 139 | 140 | scale = 1.0 / (hi - lo) 141 | 142 | contrast_feats = (feats[:, :3] - lo) * scale 143 | 144 | blend_factor = random.random() if self.randomize_blend_factor else self.blend_factor 145 | feats[:, :3] = (1 - blend_factor) * feats + blend_factor * contrast_feats 146 | return feats 147 | 148 | def apply_mix3d_color_aug(color): 149 | color = color * 255 # needs to be in [0,255] 150 | pseudo_image = color.astype(np.uint8)[np.newaxis, :, :] 151 | color = np.squeeze(mix3d_albumentations_aug(image=pseudo_image)["image"]) 152 | 153 | # normalize color information 154 | pseudo_image = color[np.newaxis, :, :] 155 | color = np.squeeze(color_norm(image=pseudo_image)["image"]) 156 | return color 157 | 158 | def apply_hue_aug(color): 159 | color = color * 255 # needs to be in [0,255] 160 | pseudo_image = color.astype(np.uint8)[np.newaxis, :, :] 161 | pseudo_image = hue_aug(image=pseudo_image)["image"] 162 | pseudo_image = mix3d_albumentations_aug(image=pseudo_image)["image"] 163 | color = np.squeeze(pseudo_image) 164 | 165 | # normalize color information 166 | pseudo_image = color[np.newaxis, :, :] 167 | color = np.squeeze(color_norm(image=pseudo_image)["image"]) 168 | return color 169 | 170 | # Elastic distortion implemented like in HAIS 171 | def HAIS_elastic( x, gran, mag): 172 | blur0 = np.ones((3, 1, 1)).astype('float32') / 3 173 | blur1 = np.ones((1, 3, 1)).astype('float32') / 3 174 | blur2 = np.ones((1, 1, 3)).astype('float32') / 3 175 | 176 | bb = np.abs(x).max(0).astype(np.int32)//int(gran) + 3 177 | noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)] 178 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise] 179 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise] 180 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise] 181 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise] 182 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise] 183 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise] 184 | ax = [np.linspace(-(b-1)*gran, (b-1)*gran, b) for b in bb] 185 | interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise] 186 | def g(x_): 187 | return np.hstack([i(x_)[:,None] for i in interp]) 188 | return x + g(x) * mag -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Chris Choy (chrischoy@ai.stanford.edu). 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | # this software and associated documentation files (the "Software"), to deal in 5 | # the Software without restriction, including without limitation the rights to 6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | # of the Software, and to permit persons to whom the Software is furnished to do 8 | # so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all 11 | # copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | # 21 | # Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural 22 | # Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part 23 | # of the code. 24 | import os 25 | from urllib.request import urlretrieve 26 | import numpy as np 27 | 28 | import torch 29 | import torch.nn as nn 30 | from torch.optim import SGD 31 | 32 | try: 33 | import open3d as o3d 34 | except ImportError: 35 | raise ImportError("Please install open3d with `pip install open3d`.") 36 | 37 | import MinkowskiEngine as ME 38 | from MinkowskiEngine.modules.resnet_block import Bottleneck 39 | 40 | def load_file(file_name): 41 | pcd = o3d.io.read_point_cloud(file_name) 42 | coords = np.array(pcd.points) 43 | colors = np.array(pcd.colors) 44 | return coords, colors, pcd 45 | 46 | class BasicBlock(nn.Module): 47 | expansion = 1 48 | 49 | def __init__(self, 50 | inplanes, 51 | planes, 52 | stride=1, 53 | dilation=1, 54 | downsample=None, 55 | bn_momentum=0.1, 56 | dimension=-1, 57 | expand_coordinates=False): 58 | super(BasicBlock, self).__init__() 59 | assert dimension > 0 60 | 61 | self.conv1 = ME.MinkowskiConvolution( 62 | inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, dimension=dimension, expand_coordinates=expand_coordinates) 63 | self.norm1 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) 64 | self.conv2 = ME.MinkowskiConvolution( 65 | planes, planes, kernel_size=3, stride=1, dilation=dilation, dimension=dimension) 66 | self.norm2 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) 67 | self.relu = ME.MinkowskiReLU(inplace=True) 68 | self.downsample = downsample 69 | 70 | def forward(self, x): 71 | residual = x 72 | out = self.conv1(x) 73 | out = self.norm1(out) 74 | out = self.relu(out) 75 | out = self.conv2(out) 76 | out = self.norm2(out) 77 | 78 | if self.downsample is not None: 79 | residual = self.downsample(x) 80 | out += residual 81 | out = self.relu(out) 82 | 83 | return out 84 | 85 | 86 | class ResNetBase(nn.Module): 87 | BLOCK = None 88 | LAYERS = () 89 | INIT_DIM = 64 90 | PLANES = (64, 128, 256, 512) 91 | 92 | def __init__(self, in_channels, out_channels, D=3, expand_coordinates=False): 93 | nn.Module.__init__(self) 94 | self.D = D 95 | self.expand_coordinates = expand_coordinates 96 | assert self.BLOCK is not None 97 | 98 | self.network_initialization(in_channels, out_channels, D) 99 | self.weight_initialization() 100 | 101 | def network_initialization(self, in_channels, out_channels, D): 102 | 103 | self.inplanes = self.INIT_DIM 104 | self.conv1 = nn.Sequential( 105 | ME.MinkowskiConvolution( 106 | in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D 107 | ), 108 | ME.MinkowskiInstanceNorm(self.inplanes), 109 | ME.MinkowskiReLU(inplace=True), 110 | ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D), 111 | ) 112 | 113 | self.layer1 = self._make_layer( 114 | self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2 115 | ) 116 | self.layer2 = self._make_layer( 117 | self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2 118 | ) 119 | self.layer3 = self._make_layer( 120 | self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2 121 | ) 122 | self.layer4 = self._make_layer( 123 | self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2 124 | ) 125 | 126 | self.conv5 = nn.Sequential( 127 | ME.MinkowskiDropout(), 128 | ME.MinkowskiConvolution( 129 | self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D 130 | ), 131 | ME.MinkowskiInstanceNorm(self.inplanes), 132 | ME.MinkowskiGELU(), 133 | ) 134 | 135 | self.glob_pool = ME.MinkowskiGlobalMaxPooling() 136 | 137 | self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True) 138 | 139 | def weight_initialization(self): 140 | for m in self.modules(): 141 | if isinstance(m, ME.MinkowskiConvolution): 142 | ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu") 143 | 144 | if isinstance(m, ME.MinkowskiBatchNorm): 145 | nn.init.constant_(m.bn.weight, 1) 146 | nn.init.constant_(m.bn.bias, 0) 147 | 148 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1, expand_coordinates=False): 149 | downsample = None 150 | if stride != 1 or self.inplanes != planes * block.expansion: 151 | downsample = nn.Sequential( 152 | ME.MinkowskiConvolution( 153 | self.inplanes, 154 | planes * block.expansion, 155 | kernel_size=1, 156 | stride=stride, 157 | dimension=self.D, 158 | ), 159 | ME.MinkowskiBatchNorm(planes * block.expansion), 160 | ) 161 | layers = [] 162 | layers.append( 163 | block( 164 | self.inplanes, 165 | planes, 166 | stride=stride, 167 | dilation=dilation, 168 | downsample=downsample, 169 | dimension=self.D, 170 | expand_coordinates=expand_coordinates, 171 | ) 172 | ) 173 | self.inplanes = planes * block.expansion 174 | for i in range(1, blocks): 175 | layers.append( 176 | block( 177 | self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D, expand_coordinates=expand_coordinates, 178 | ) 179 | ) 180 | 181 | return nn.Sequential(*layers) 182 | 183 | def forward(self, x: ME.SparseTensor): 184 | x = self.conv1(x) 185 | x = self.layer1(x) 186 | x = self.layer2(x) 187 | x = self.layer3(x) 188 | x = self.layer4(x) 189 | x = self.conv5(x) 190 | x = self.glob_pool(x) 191 | return self.final(x) 192 | 193 | 194 | class ResNet14(ResNetBase): 195 | BLOCK = BasicBlock 196 | LAYERS = (1, 1, 1, 1) 197 | 198 | 199 | class ResNet18(ResNetBase): 200 | BLOCK = BasicBlock 201 | LAYERS = (2, 2, 2, 2) 202 | 203 | 204 | class ResNet34(ResNetBase): 205 | BLOCK = BasicBlock 206 | LAYERS = (3, 4, 6, 3) 207 | 208 | 209 | class ResNet50(ResNetBase): 210 | BLOCK = Bottleneck 211 | LAYERS = (3, 4, 6, 3) 212 | 213 | 214 | class ResNet101(ResNetBase): 215 | BLOCK = Bottleneck 216 | LAYERS = (3, 4, 23, 3) 217 | 218 | 219 | class ResFieldNetBase(ResNetBase): 220 | def network_initialization(self, in_channels, out_channels, D): 221 | field_ch = 32 222 | field_ch2 = 64 223 | self.field_network = nn.Sequential( 224 | ME.MinkowskiSinusoidal(in_channels, field_ch), 225 | ME.MinkowskiBatchNorm(field_ch), 226 | ME.MinkowskiReLU(inplace=True), 227 | ME.MinkowskiLinear(field_ch, field_ch), 228 | ME.MinkowskiBatchNorm(field_ch), 229 | ME.MinkowskiReLU(inplace=True), 230 | ME.MinkowskiToSparseTensor(), 231 | ) 232 | self.field_network2 = nn.Sequential( 233 | ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2), 234 | ME.MinkowskiBatchNorm(field_ch2), 235 | ME.MinkowskiReLU(inplace=True), 236 | ME.MinkowskiLinear(field_ch2, field_ch2), 237 | ME.MinkowskiBatchNorm(field_ch2), 238 | ME.MinkowskiReLU(inplace=True), 239 | ME.MinkowskiToSparseTensor(), 240 | ) 241 | 242 | ResNetBase.network_initialization(self, field_ch2, out_channels, D) 243 | 244 | def forward(self, x: ME.TensorField): 245 | otensor = self.field_network(x) 246 | otensor2 = self.field_network2(otensor.cat_slice(x)) 247 | return ResNetBase.forward(self, otensor2) 248 | 249 | 250 | class ResFieldNet14(ResFieldNetBase): 251 | BLOCK = BasicBlock 252 | LAYERS = (1, 1, 1, 1) 253 | 254 | 255 | class ResFieldNet18(ResFieldNetBase): 256 | BLOCK = BasicBlock 257 | LAYERS = (2, 2, 2, 2) 258 | 259 | 260 | class ResFieldNet34(ResFieldNetBase): 261 | BLOCK = BasicBlock 262 | LAYERS = (3, 4, 6, 3) 263 | 264 | 265 | class ResFieldNet50(ResFieldNetBase): 266 | BLOCK = Bottleneck 267 | LAYERS = (3, 4, 6, 3) 268 | 269 | 270 | class ResFieldNet101(ResFieldNetBase): 271 | BLOCK = Bottleneck 272 | LAYERS = (3, 4, 23, 3) 273 | 274 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Box2Mask 2 | 3 | > [Julian Chibane](http://virtualhumans.mpi-inf.mpg.de/people/Chibane.html), 4 | > [Francis Engelmann](https://francisengelmann.github.io/), 5 | > [Tuan Anh Tran](https://scholar.google.com/citations?user=5-0hLggAAAAJ&hl=en), 6 | > [Gerard Pons-Moll](http://virtualhumans.mpi-inf.mpg.de/people/pons-moll.html)
7 | > Box2Mask: Weakly Supervised 3D Semantic Instance Segmentation Using Bounding Boxes
8 | > In European Conference on Computer Vision (ECCV), 2022 9 | 10 | ![Teaser](teaser.jpeg) 11 | 12 | [Paper](http://virtualhumans.mpi-inf.mpg.de/papers/chibane22Box2Mask/Chibane_Box2Mask.pdf) - 13 | [Video](https://virtualhumans.mpi-inf.mpg.de/box2mask/#b2m_video) - 14 | [Project Website](https://virtualhumans.mpi-inf.mpg.de/box2mask/) - 15 | [Arxiv](https://arxiv.org/abs/2206.01203) - 16 | [Citation (Bibtex)](https://virtualhumans.mpi-inf.mpg.de/box2mask/#cite) 17 | 18 | ## Installations 19 | 20 | A linux system with python environment manager [conda](https://www.anaconda.com/) is required for the project. 21 | Follow the instructions [here](docs/installation.md) to setup the environment 22 | 23 | ## Data Setup 24 | 25 | **Scannet**: Download the [Scannet](http://www.scan-net.org/) dataset here. 26 | Download the preprocessed ground truth data ([gt_instance_data_txt.tar.gz](https://datasets.d2.mpi-inf.mpg.de/box2mask/gt_instance_data_txt.tar.gz)) and extract it to `data/scannet/` 27 | Each scene is stored with the name of format `scene%04d_%02d` (see [Scannet](https://raw.githubusercontent.com/ScanNet/ScanNet/master/README.md)). 28 | The data should be organized as follows for our project. 29 | ``` 30 | box2mask/data/scannet/ 31 | └── scans/ # contains 1513 train/valid scences 32 | ├── scene0383_02/ # each scene has the name in format `scene%04d_%02d` 33 | ├── scene0383_02_vh_clean.ply 34 | ├── scene0383_02.sens 35 | ├── scene0383_02_vh_clean_2.0.010000.segs.json 36 | ├── scene0383_02.aggregation.json, _vh_clean.aggregation.json 37 | ├── scene0383_02_vh_clean_2.0.010000.segs.json, _vh_clean.segs.json 38 | ├── scene0383_02_vh_clean_2.labels.ply 39 | ├── scene0383_02_2d-label.zip 40 | ├── scene0383_02_2d-instance.zip 41 | ├── scene0383_02_2d-label-filt.zip 42 | ├── scene0383_02_2d-instance-filt.zip 43 | ├── scene0515_02/ 44 | ├── scene0643_00/ 45 | ... 46 | └── scans_test/ # contains 100 test scenes 47 | ├── scene0731_00/ 48 | ├── scene0731_00.sens 49 | ├── scene0731_00.txt 50 | ├── scene0731_00_vh_clean_2.ply 51 | ├── scene0731_00_vh_clean.ply 52 | ├── scene0739_00/ 53 | ├── scene0747_00/ 54 | ... 55 | └── scannetv2_official_split.npz # contains data splits info 56 | └── gt_instance_data_txt/ # contains GT segmentations as txt files 57 | ├── scene0383_02.txt 58 | ├── scene0643_00.txt 59 | ... 60 | ``` 61 | 62 | **Arkit**: See [Arkitscenes instruction](docs/arkitscenes.md). 63 | 64 | **S3DIS**: See [S3DIS instruction](docs/s3dis.md). 65 | 66 | 67 | ## Quick Start with Pretrained Model 68 | 69 | We provide a pretrained checkpoint for a quick start with the method. 70 | First, from the folder where you clone the project, run the following command to download the pretrained checkpoint: 71 | ```bash 72 | cd box2mask # Navigate to the root folder 73 | mkdir -p experiments/scannet/checkpoints/ 74 | cd experiments/scannet/checkpoints/ 75 | wget https://datasets.d2.mpi-inf.mpg.de/box2mask/checkpoint_101h:54m:35s_366875.3242661953.tar 76 | cd ../../../ 77 | ``` 78 | 79 | Next, to predict a scene in the train set or test set, run the prediction from the project home folder: 80 | ```bash 81 | python models/evaluation.py --config configs/scannet.txt --predict_specific_scene scene0293_00 82 | ``` 83 | where `--predict_specific_scene` specifies the name of the scene that will be processed. 84 | The result of the prediction is saved in `experiments/scannet/results/checkpoint_101h:54m:35s_366875.3242661953/viz/scene0293_00/` as `pred_instances.ply` and `pred_semantics.ply`. 85 | To visualize the result using interactive web server, run: 86 | ```bash 87 | cd experiments/scannet/results/checkpoint_101h:54m:35s_366875.3242661953/viz/scene0293_00/ 88 | python -m http.server 6008 89 | ``` 90 | and follow the on-screen instructions. 91 | 92 | ## Training 93 | 94 | Start to train a model with a specific configuration file using: 95 | 96 | ```bash 97 | python models/training.py --config configs/scannet.txt 98 | ``` 99 | The command above will train with the Scannet dataset. 100 | You can use a different config file to train with a different dataset. 101 | To train with the Arkitscenes dataset use `configs/arkitscenes.txt` or to train with the S3DIS dataset with area 1 as the validation set use `config/s3dis_fold1` (the data needs to be setup first, see Sec. 'Data Setup') 102 | > Note: The above configuration uses a batch of 8 scenes, which assumes ~48GB GPURAM. 103 | > RAM usage can be decreased via a smaller batch size, see parameter `--batch_size`. 104 | 105 | ## Prediction and visualization 106 | 107 | The following command makes a prediction for the validation set and computes the validation score (reproducing the results from table 1 in our paper). 108 | ```bash 109 | python models/evaluation.py --config configs/scannet.txt --fixed_seed 10 110 | ``` 111 | To visualize the prediction for the validation set, add option `--produce_visualizations [scene_name]` to the above command, where `[scene_name]` is the name of the scene (eg. `scene0293_00` in Scannet or `6667847` in Arkitscenes or `Area_5.office_13` in S3DIS). 112 | The visualization files will be stored in `./experiments/[config_name]/results/[checkpoint]/viz/` where `[checkpoint]` is the name of the checkpoint used for prediction and `[config_name]` is the name of the config (`scannet` in this example). 113 | The interactive visualization server can be started using the command bellow. 114 | ```bash 115 | cd ./experiments/scannet/results/[checkpoint] 116 | python -m http.server 6008 117 | ``` 118 | Follow the on-screen instructions to find the visualizations in your browser. 119 | 120 | 121 | ## Prediction on the ScanNet test set 122 | The oversegmentations of scannet test scenes are needed for our project. Oversegmentations are already included for the train and validation scenes. 123 | For test scenes, see the [instruction](dataprocessing/oversegmentation/README.md) to compile the oversegmentation program. 124 | 125 | Next, the following script will produce the oversegmentations for test scenes. The oversegmentations results will be stored at `./data/scannet/scans_test_segmented` 126 | ```bash 127 | mkdirs -p ./data/scannet/scans_test_segmented 128 | cd dataprocessing/oversegmentation/ 129 | python run_segmentator.py 130 | ``` 131 | 132 | To run the ScanNet evaluation on the test set, we need to add the parameter `--submission_write_out_testset`. 133 | Without this parameter the validation set is evaluated as seen in the previous section. 134 | ```bash 135 | python models/evaluation.py --config configs/scannet.txt --submission_write_out_testset --fixed_seed 100 136 | ``` 137 | 138 | Resulting predictions files will be stored in `./experiments/scannet/results/[checkpoint]`. 139 | Our results are formatted into Scannet submission format ([see documentation](https://kaldir.vc.in.tum.de/scannet_benchmark/documentation)). 140 | `--fixed_seed` specifies a seed for test time augmentation. 141 | Results can be visualized interactively, in the same fashion as shown in the previous section. 142 | ## Augmented Data 143 | 144 | This [instruction](data/augmented_BBs/README.md) shows how to reproduce the augmented bounding box labels experiments and how to get the data. 145 | 146 | ## Arkitscenes Data 147 | 148 | See [instruction](docs/arkitscenes.md) to reproduce the results of arkitscenes. 149 | 150 | ## S3DIS Data 151 | 152 | See [instruction](docs/s3dis.md) to reproduce the results of S3DIS. 153 | 154 | ## Code structure 155 | 156 | The code structure can be found [here](docs/code_structure.md). 157 | 158 | 159 | ## License 160 | Copyright (c) 2022 Julian Chibane, Max-Planck-Gesellschaft 161 | 162 | By using this code you agree to the terms in the LICENSE. 163 | 164 | Moreover, you agree to cite the `Box2Mask: Weakly Supervised 3D Semantic Instance Segmentation Using Bounding Boxes` paper in 165 | any documents that report on research using this software or the manuscript. 166 | 167 | 168 |
169 | Show LICENSE (click to expand) 170 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use this software and associated documentation files (the "Software"). 171 | 172 | The authors hereby grant you a non-exclusive, non-transferable, free of charge right to copy, modify, merge, publish, distribute, and sublicense the Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects. 173 | 174 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artefacts for commercial purposes. 175 | For commercial inquiries, please see above contact information. 176 | 177 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 178 | 179 | You understand and agree that the authors are under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Software. The authors nevertheless reserve the right to update, modify, or discontinue the Software at any time. 180 | 181 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 182 | 183 |
184 | 185 | -------------------------------------------------------------------------------- /utils/evaluate_detections.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Generic Code for Object Detection Evaluation 7 | 8 | Input: 9 | For each class: 10 | For each image: 11 | Predictions: box, score 12 | Groundtruths: box 13 | 14 | Output: 15 | For each class: 16 | precision-recal and average precision 17 | 18 | Author: Charles R. Qi 19 | 20 | Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py 21 | """ 22 | import numpy as np 23 | from multiprocessing import Pool 24 | from utils.metric_util import calc_iou # axis-aligned 3D box IoU 25 | from utils.box_util import box3d_iou 26 | 27 | 28 | def voc_ap(rec, prec, use_07_metric=False): 29 | """ ap = voc_ap(rec, prec, [use_07_metric]) 30 | Compute VOC AP given precision and recall. 31 | If use_07_metric is true, uses the 32 | VOC 07 11 point method (default:False). 33 | """ 34 | if use_07_metric: 35 | # 11 point metric 36 | ap = 0. 37 | for t in np.arange(0., 1.1, 0.1): 38 | if np.sum(rec >= t) == 0: 39 | p = 0 40 | else: 41 | p = np.max(prec[rec >= t]) 42 | ap = ap + p / 11. 43 | else: 44 | # correct AP calculation 45 | # first append sentinel values at the end 46 | mrec = np.concatenate(([0.], rec, [1.])) 47 | mpre = np.concatenate(([0.], prec, [0.])) 48 | 49 | # compute the precision envelope 50 | for i in range(mpre.size - 1, 0, -1): 51 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 52 | 53 | # to calculate area under PR curve, look for points 54 | # where X axis (recall) changes value 55 | i = np.where(mrec[1:] != mrec[:-1])[0] 56 | 57 | # and sum (\Delta recall) * prec 58 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 59 | return ap 60 | 61 | 62 | def get_iou(bb1, bb2): 63 | """ Compute IoU of two bounding boxes. 64 | ** Define your bod IoU function HERE ** 65 | """ 66 | # pass 67 | iou3d = calc_iou(bb1, bb2) 68 | return iou3d 69 | 70 | 71 | def get_iou_obb(bb1, bb2): 72 | iou3d, iou2d = box3d_iou(bb1, bb2) 73 | return iou3d 74 | 75 | 76 | def get_iou_main(get_iou_func, args): 77 | return get_iou_func(*args) 78 | 79 | 80 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 81 | """ Generic functions to compute precision/recall for object detection 82 | for a single class. 83 | Input: 84 | pred: map of {img_id: [(bbox, score)]} where bbox is numpy array 85 | gt: map of {img_id: [bbox]} 86 | ovthresh: scalar, iou threshold 87 | use_07_metric: bool, if True use VOC07 11 point method 88 | Output: 89 | rec: numpy array of length nd 90 | prec: numpy array of length nd 91 | ap: scalar, average precision 92 | """ 93 | 94 | # construct gt objects 95 | class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} 96 | npos = 0 97 | for img_id in gt.keys(): 98 | bbox = np.array(gt[img_id]) 99 | det = [False] * len(bbox) 100 | npos += len(bbox) 101 | class_recs[img_id] = {'bbox': bbox, 'det': det} 102 | # pad empty list to all other imgids 103 | for img_id in pred.keys(): 104 | if img_id not in gt: 105 | class_recs[img_id] = {'bbox': np.array([]), 'det': []} 106 | 107 | # construct dets 108 | image_ids = [] 109 | confidence = [] 110 | BB = [] 111 | for img_id in pred.keys(): 112 | for box, score in pred[img_id]: 113 | image_ids.append(img_id) 114 | confidence.append(score) 115 | BB.append(box) 116 | confidence = np.array(confidence) 117 | 118 | BB = np.array(BB) 119 | 120 | # sort by confidence 121 | sorted_ind = np.argsort(-confidence) 122 | sorted_scores = np.sort(-confidence) 123 | BB = BB[sorted_ind, ...] 124 | image_ids = [image_ids[x] for x in sorted_ind] 125 | 126 | # go down dets and mark TPs and FPs 127 | nd = len(image_ids) 128 | tp = np.zeros(nd) 129 | fp = np.zeros(nd) 130 | for d in range(nd): 131 | R = class_recs[image_ids[d]] 132 | try: 133 | bb = BB[d, ...].astype(float) 134 | except: 135 | bb = BB[d, ...].tolist().astype(float) 136 | ovmax = -np.inf 137 | BBGT = R['bbox'].astype(float) 138 | 139 | if BBGT.size > 0: 140 | # compute overlaps 141 | for j in range(BBGT.shape[0]): 142 | iou = get_iou_main(get_iou_func, (bb, BBGT[j, ...])) 143 | if iou > ovmax: 144 | ovmax = iou 145 | jmax = j 146 | 147 | if ovmax > ovthresh: 148 | if not R['det'][jmax]: 149 | tp[d] = 1. 150 | R['det'][jmax] = 1 151 | else: 152 | fp[d] = 1. 153 | else: 154 | fp[d] = 1. 155 | 156 | # compute precision recall 157 | fp = np.cumsum(fp) 158 | tp = np.cumsum(tp) 159 | rec = tp / float(npos) 160 | # avoid divide by zero in case the first detection matches a difficult 161 | # ground truth 162 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 163 | ap = voc_ap(rec, prec, use_07_metric) 164 | 165 | return rec, prec, ap 166 | 167 | 168 | def eval_det_cls_wrapper(arguments): 169 | pred, gt, ovthresh, use_07_metric, get_iou_func = arguments 170 | rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func) 171 | return (rec, prec, ap) 172 | 173 | 174 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 175 | """ Generic functions to compute precision/recall for object detection 176 | for multiple classes. 177 | Input: 178 | pred_all: map of {img_id: [(classname, bbox, score)]} 179 | gt_all: map of {img_id: [(classname, bbox)]} 180 | ovthresh: scalar, iou threshold 181 | use_07_metric: bool, if true use VOC07 11 point method 182 | Output: 183 | rec: {classname: rec} 184 | prec: {classname: prec_all} 185 | ap: {classname: scalar} 186 | """ 187 | pred = {} # map {classname: pred} 188 | gt = {} # map {classname: gt} 189 | for img_id in pred_all.keys(): 190 | for classname, bbox, score in pred_all[img_id]: 191 | if classname not in pred: pred[classname] = {} 192 | if img_id not in pred[classname]: 193 | pred[classname][img_id] = [] 194 | if classname not in gt: 195 | gt[classname] = {} 196 | if img_id not in gt[classname]: 197 | gt[classname][img_id] = [] 198 | pred[classname][img_id].append((bbox, score)) 199 | for img_id in gt_all.keys(): 200 | for classname, bbox in gt_all[img_id]: 201 | if classname == 22: 202 | print(img_id, classname) 203 | if classname not in gt: 204 | gt[classname] = {} 205 | if img_id not in gt[classname]: 206 | gt[classname][img_id] = [] 207 | gt[classname][img_id].append(bbox) 208 | 209 | rec = {} 210 | prec = {} 211 | ap = {} 212 | for classname in gt.keys(): 213 | try: 214 | print('Computing AP for class: ', classname) 215 | rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, 216 | use_07_metric, get_iou_func) 217 | print(classname, ap[classname]) 218 | except KeyError as exception: 219 | print('KeyError:', exception) 220 | return rec, prec, ap 221 | 222 | 223 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 224 | """ Generic functions to compute precision/recall for object detection 225 | for multiple classes. 226 | Input: 227 | pred_all: map of {img_id: [(classname, bbox, score)]} 228 | gt_all: map of {img_id: [(classname, bbox)]} 229 | ovthresh: scalar, iou threshold 230 | use_07_metric: bool, if true use VOC07 11 point method 231 | Output: 232 | rec: {classname: rec} 233 | prec: {classname: prec_all} 234 | ap: {classname: scalar} 235 | """ 236 | pred = {} # map {classname: pred} 237 | gt = {} # map {classname: gt} 238 | for img_id in pred_all.keys(): 239 | for classname, bbox, score in pred_all[img_id]: 240 | if classname not in pred: 241 | pred[classname] = {} 242 | if img_id not in pred[classname]: 243 | pred[classname][img_id] = [] 244 | if classname not in gt: 245 | gt[classname] = {} 246 | if img_id not in gt[classname]: 247 | gt[classname][img_id] = [] 248 | pred[classname][img_id].append((bbox, score)) 249 | for img_id in gt_all.keys(): 250 | for classname, bbox in gt_all[img_id]: 251 | if classname not in gt: 252 | gt[classname] = {} 253 | if img_id not in gt[classname]: 254 | gt[classname][img_id] = [] 255 | gt[classname][img_id].append(bbox) 256 | 257 | rec = {} 258 | prec = {} 259 | ap = {} 260 | p = Pool(processes=10) 261 | ret_values = p.map(eval_det_cls_wrapper, 262 | [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in 263 | gt.keys() if classname in pred]) 264 | p.close() 265 | for i, classname in enumerate(gt.keys()): 266 | if classname in pred: 267 | rec[classname], prec[classname], ap[classname] = ret_values[i] 268 | else: 269 | rec[classname] = 0 270 | prec[classname] = 0 271 | ap[classname] = 0 272 | print(classname, ap[classname]) 273 | 274 | return rec, prec, ap 275 | 276 | 277 | if __name__ == "__main__": 278 | classname = 'chair' 279 | bbox = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0]) 280 | score = 0.9 281 | pred_all = {'01': [[classname, bbox, score]]} 282 | gt_all = {'01': [[classname, bbox]]} 283 | 284 | 285 | rec, prec, ap = eval_det(pred_all, gt_all, 286 | ovthresh=0.25, 287 | use_07_metric=False, 288 | get_iou_func=get_iou_obb) 289 | 290 | -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/cpp/segmentator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define TINYOBJLOADER_IMPLEMENTATION 9 | #include "tiny_obj_loader.h" 10 | #include "tinyply.h" 11 | 12 | using std::vector; 13 | using std::string; 14 | 15 | // felzenswalb segmentation (https://cs.brown.edu/~pff/segment/index.html) 16 | 17 | // disjoint-set forests using union-by-rank and path compression (sort of). 18 | typedef struct { 19 | int rank; 20 | int p; 21 | int size; 22 | } uni_elt; 23 | 24 | class universe { 25 | public: 26 | universe(int elements) { 27 | elts = new uni_elt[elements]; 28 | num = elements; 29 | for (int i = 0; i < elements; i++) { 30 | elts[i].rank = 0; 31 | elts[i].size = 1; 32 | elts[i].p = i; 33 | } 34 | } 35 | ~universe() { delete [] elts; } 36 | int find(int x) { 37 | int y = x; 38 | while (y != elts[y].p) 39 | y = elts[y].p; 40 | elts[x].p = y; 41 | return y; 42 | } 43 | void join(int x, int y) { 44 | if (elts[x].rank > elts[y].rank) { 45 | elts[y].p = x; 46 | elts[x].size += elts[y].size; 47 | } else { 48 | elts[x].p = y; 49 | elts[y].size += elts[x].size; 50 | if (elts[x].rank == elts[y].rank) 51 | elts[y].rank++; 52 | } 53 | num--; 54 | } 55 | int size(int x) const { return elts[x].size; } 56 | int num_sets() const { return num; } 57 | private: 58 | uni_elt *elts; 59 | int num; 60 | }; 61 | 62 | typedef struct { 63 | float w; 64 | int a, b; 65 | } edge; 66 | 67 | bool operator<(const edge &a, const edge &b) { 68 | return a.w < b.w; 69 | } 70 | 71 | universe *segment_graph(int num_vertices, int num_edges, edge *edges, float c) { 72 | std::sort(edges, edges + num_edges); // sort edges by weight 73 | universe *u = new universe(num_vertices); // make a disjoint-set forest 74 | float *threshold = new float[num_vertices]; 75 | for (int i = 0; i < num_vertices; i++) { threshold[i] = c; } 76 | // for each edge, in non-decreasing weight order 77 | for (int i = 0; i < num_edges; i++) { 78 | edge *pedge = &edges[i]; 79 | // components conected by this edge 80 | int a = u->find(pedge->a); 81 | int b = u->find(pedge->b); 82 | if (a != b) { 83 | if ((pedge->w <= threshold[a]) && (pedge->w <= threshold[b])) { 84 | u->join(a, b); 85 | a = u->find(a); 86 | threshold[a] = pedge->w + (c / u->size(a)); 87 | } 88 | } 89 | } 90 | delete [] threshold; 91 | return u; 92 | } 93 | 94 | // simple vec3f class 95 | class vec3f { 96 | public: 97 | float x, y, z; 98 | vec3f() { x = 0; y = 0; z = 0; } 99 | vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; } 100 | vec3f operator+(const vec3f& o) { 101 | return vec3f{x+o.x, y+o.y, z+o.z}; 102 | } 103 | vec3f operator-(const vec3f& o) { 104 | return vec3f{x-o.x, y-o.y, z-o.z}; 105 | } 106 | }; 107 | vec3f cross(const vec3f& u, const vec3f& v) { 108 | vec3f c = {u.y*v.z - u.z*v.y, u.z*v.x - u.x*v.z, u.x*v.y - u.y*v.x}; 109 | float n = sqrtf(c.x*c.x + c.y*c.y + c.z*c.z); 110 | c.x /= n; c.y /= n; c.z /= n; 111 | return c; 112 | } 113 | vec3f lerp(const vec3f& a, const vec3f& b, const float v) { 114 | const float u = 1.0f-v; 115 | return vec3f(v*b.x + u*a.x, v*b.y + u*a.y, v*b.z + u*a.z); 116 | } 117 | 118 | inline bool ends_with(const std::string & value, const std::string& ending) { 119 | if (ending.size() > value.size()) { return false; } 120 | return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); 121 | } 122 | 123 | vector segment(const string& meshFile, const float kthr, const int segMinVerts) { 124 | //std::cout << "Loading mesh " << meshFile << std::endl; 125 | vector verts; 126 | vector faces; 127 | size_t vertexCount = 0; 128 | size_t faceCount = 0; 129 | 130 | if (ends_with(meshFile, ".ply") || ends_with(meshFile, ".PLY")) { 131 | // Load the geometry from .ply 132 | std::ifstream ss(meshFile, std::ios::binary); 133 | tinyply::PlyFile file(ss); 134 | vertexCount = file.request_properties_from_element("vertex", { "x", "y", "z" }, verts); 135 | // Try getting vertex_indices or vertex_index 136 | faceCount = file.request_properties_from_element("face", { "vertex_indices" }, faces, 3); 137 | if (faceCount == 0) { 138 | faceCount = file.request_properties_from_element("face", { "vertex_index" }, faces, 3); 139 | } 140 | file.read(ss); 141 | } else if (ends_with(meshFile, ".obj") || ends_with(meshFile, ".OBJ")) { 142 | // Load the geometry from .obj 143 | tinyobj::attrib_t attrib; 144 | vector shapes; 145 | vector materials; 146 | string err; 147 | bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &err, meshFile.c_str(), NULL, false); 148 | if (!err.empty()) { // `err` may contain warning message. 149 | std::cerr << err << std::endl; 150 | } 151 | if (!ret) { 152 | exit(1); 153 | } 154 | if (shapes.size() > 1) { 155 | std::cerr << "Warning: only single mesh OBJ supported, segmenting first mesh" << std::endl; 156 | } 157 | 158 | // Keep with original vertices (we don't want them duplicated) 159 | vertexCount = attrib.vertices.size() / 3; 160 | for (size_t v = 0; v < attrib.vertices.size(); v++) { 161 | verts.push_back(attrib.vertices[v]); 162 | } 163 | 164 | const auto& mesh = shapes[0].mesh; 165 | faceCount = mesh.num_face_vertices.size(); 166 | for (size_t f = 0; f < faceCount; f++) { 167 | for (size_t v = 0; v < 3; v++) { 168 | const size_t idx = mesh.indices[3 * f + v].vertex_index; 169 | faces.push_back(idx); 170 | } 171 | } 172 | } 173 | 174 | printf("Read mesh with vertexCount %lu %lu, faceCount %lu %lu\n", 175 | vertexCount, verts.size(), faceCount, faces.size()); 176 | 177 | // create points, normals, edges, counts vectors 178 | vector points(vertexCount); 179 | vector normals(vertexCount); 180 | vector counts(verts.size(), 0); 181 | const size_t edgesCount = faceCount*3; 182 | edge* edges = new edge[edgesCount]; 183 | 184 | // Compute face normals and smooth into vertex normals 185 | for (int i = 0; i < faceCount; i++) { 186 | const int fbase = 3*i; 187 | const uint32_t i1 = faces[fbase]; 188 | const uint32_t i2 = faces[fbase+1]; 189 | const uint32_t i3 = faces[fbase+2]; 190 | int vbase = 3*i1; 191 | vec3f p1(verts[vbase], verts[vbase+1], verts[vbase+2]); 192 | vbase = 3*i2; 193 | vec3f p2(verts[vbase], verts[vbase+1], verts[vbase+2]); 194 | vbase = 3*i3; 195 | vec3f p3(verts[vbase], verts[vbase+1], verts[vbase+2]); 196 | points[i1] = p1; points[i2] = p2; points[i3] = p3; 197 | const int ebase = 3*i; 198 | edges[ebase ].a = i1; edges[ebase ].b = i2; 199 | edges[ebase+1].a = i1; edges[ebase+1].b = i3; 200 | edges[ebase+2].a = i3; edges[ebase+2].b = i2; 201 | 202 | // smoothly blend face normals into vertex normals 203 | vec3f normal = cross(p2 - p1, p3 - p1); 204 | normals[i1] = lerp(normals[i1], normal, 1.0f / (counts[i1] + 1.0f)); 205 | normals[i2] = lerp(normals[i2], normal, 1.0f / (counts[i2] + 1.0f)); 206 | normals[i3] = lerp(normals[i3], normal, 1.0f / (counts[i3] + 1.0f)); 207 | counts[i1]++; counts[i2]++; counts[i3]++; 208 | } 209 | 210 | //std::cout << "Constructing edge graph based on mesh connectivity..." << std::endl; 211 | for (int i = 0; i < edgesCount; i++) { 212 | int a = edges[i].a; 213 | int b = edges[i].b; 214 | 215 | vec3f& n1 = normals[a]; 216 | vec3f& n2 = normals[b]; 217 | vec3f& p1 = points[a]; 218 | vec3f& p2 = points[b]; 219 | 220 | float dx = p2.x - p1.x; 221 | float dy = p2.y - p1.y; 222 | float dz = p2.z - p1.z; 223 | float dd = sqrtf(dx * dx + dy * dy + dz * dz); dx /= dd; dy /= dd; dz /= dd; 224 | float dot = n1.x * n2.x + n1.y * n2.y + n1.z * n2.z; 225 | float dot2 = n2.x * dx + n2.y * dy + n2.z * dz; 226 | float ww = 1.0f - dot; 227 | if (dot2 > 0) { ww = ww * ww; } // make it much less of a problem if convex regions have normal difference 228 | edges[i].w = ww; 229 | } 230 | //std::cout << "Constructed graph" << std::endl; 231 | 232 | // Segment! 233 | universe* u = segment_graph(vertexCount, edgesCount, edges, kthr); 234 | //std::cout << "Segmented" << std::endl; 235 | 236 | // Joining small segments 237 | for (int j = 0; j < edgesCount; j++) { 238 | int a = u->find(edges[j].a); 239 | int b = u->find(edges[j].b); 240 | if ((a != b) && ((u->size(a) < segMinVerts) || (u->size(b) < segMinVerts))) { 241 | u->join(a, b); 242 | } 243 | } 244 | 245 | // Return segment indices as vector 246 | vector outComps(vertexCount); 247 | for (int q = 0; q < vertexCount; q++) { 248 | outComps[q] = u->find(q); 249 | } 250 | return outComps; 251 | } 252 | 253 | void writeToJSON(const string& filename, const string& scanId, 254 | const float kthr, const int segMinVerts, const vector& segIndices) { 255 | std::ofstream ofs(filename); 256 | ofs << "{"; 257 | ofs << "\"params\":{\"kThresh\":" << kthr << ",\"segMinVerts\":" << segMinVerts << "},"; 258 | ofs << "\"sceneId\":\"" << scanId << "\","; 259 | ofs << "\"segIndices\":["; 260 | for (int i = 0; i < segIndices.size(); i++) { 261 | if (i > 0) { ofs << ","; } 262 | ofs << segIndices[i]; 263 | } 264 | ofs << "]}"; 265 | ofs.close(); 266 | } 267 | 268 | int main(int argc, const char** argv) { 269 | if (argc < 2) { 270 | printf("Usage: ./segmentator input.ply [kThresh] [segMinVerts] (defaults: kThresh=0.01 segMinVerts=20)\n"); 271 | exit(-1); 272 | } else { 273 | const string plyFile = argv[1]; 274 | const float kthr = argc > 2 ? (float)atof(argv[2]) : 0.01f; 275 | const int segMinVerts = argc > 3 ? atoi(argv[3]) : 20; 276 | printf("Segmenting %s with kThresh=%f, segMinVerts=%d ...\n", plyFile.c_str(), kthr, segMinVerts); 277 | const vector comps = segment(plyFile, kthr, segMinVerts); 278 | std::unordered_set comp_indices; 279 | for (int i = 0; i < comps.size(); i++) { 280 | comp_indices.insert(comps[i]); 281 | } 282 | 283 | const string baseName = plyFile.substr(0, plyFile.find_last_of(".")); 284 | const string sceneName = baseName.substr(baseName.find_last_of("/")); 285 | const int lastslash = plyFile.find_last_of("/"); 286 | const string scanId = lastslash > 0 ? baseName.substr(lastslash) : baseName; 287 | // string segFile = baseName + "." + std::to_string(kthr) + ".segs.json"; 288 | const string segFilePrefix = argv[4]; 289 | string segFile = segFilePrefix + sceneName + "." + std::to_string(kthr) + ".segs.json"; 290 | writeToJSON(segFile, scanId, kthr, segMinVerts, comps); 291 | printf("Segmentation written to %s with %lu segments\n", segFile.c_str(), comp_indices.size()); 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/cpp/tinyply.cpp: -------------------------------------------------------------------------------- 1 | // This software is in the public domain. Where that dedication is not 2 | // recognized, you are granted a perpetual, irrevocable license to copy, 3 | // distribute, and modify this file as you see fit. 4 | // Authored in 2015 by Dimitri Diakopoulos (http://www.dimitridiakopoulos.com) 5 | // https://github.com/ddiakopoulos/tinyply 6 | 7 | #include "tinyply.h" 8 | 9 | using namespace tinyply; 10 | using namespace std; 11 | 12 | ////////////////// 13 | // PLY Property // 14 | ////////////////// 15 | 16 | PlyProperty::PlyProperty(std::istream & is) : isList(false) 17 | { 18 | parse_internal(is); 19 | } 20 | 21 | void PlyProperty::parse_internal(std::istream & is) 22 | { 23 | string type; 24 | is >> type; 25 | if (type == "list") 26 | { 27 | string countType; 28 | is >> countType >> type; 29 | listType = property_type_from_string(countType); 30 | isList = true; 31 | } 32 | propertyType = property_type_from_string(type); 33 | is >> name; 34 | } 35 | 36 | ///////////////// 37 | // PLY Element // 38 | ///////////////// 39 | 40 | PlyElement::PlyElement(std::istream & is) 41 | { 42 | parse_internal(is); 43 | } 44 | 45 | void PlyElement::parse_internal(std::istream & is) 46 | { 47 | is >> name >> size; 48 | } 49 | 50 | ////////////// 51 | // PLY File // 52 | ////////////// 53 | 54 | PlyFile::PlyFile(std::istream & is) 55 | { 56 | if (!parse_header(is)) 57 | { 58 | throw std::runtime_error("file is not ply or encounted junk in header"); 59 | } 60 | } 61 | 62 | bool PlyFile::parse_header(std::istream & is) 63 | { 64 | std::string line; 65 | bool gotMagic = false; 66 | while (std::getline(is, line)) 67 | { 68 | std::istringstream ls(line); 69 | std::string token; 70 | ls >> token; 71 | if (token == "ply" || token == "PLY" || token == "") 72 | { 73 | gotMagic = true; 74 | continue; 75 | } 76 | else if (token == "comment") read_header_text(line, ls, comments, 8); 77 | else if (token == "format") read_header_format(ls); 78 | else if (token == "element") read_header_element(ls); 79 | else if (token == "property") read_header_property(ls); 80 | else if (token == "obj_info") read_header_text(line, ls, objInfo, 9); 81 | else if (token == "end_header") break; 82 | else return false; 83 | } 84 | return true; 85 | } 86 | 87 | void PlyFile::read_header_text(std::string line, std::istream & is, std::vector& place, int erase) 88 | { 89 | place.push_back((erase > 0) ? line.erase(0, erase) : line); 90 | } 91 | 92 | void PlyFile::read_header_format(std::istream & is) 93 | { 94 | std::string s; 95 | (is >> s); 96 | if (s == "binary_little_endian") isBinary = true; 97 | else if (s == "binary_big_endian") isBinary = isBigEndian = true; 98 | } 99 | 100 | void PlyFile::read_header_element(std::istream & is) 101 | { 102 | get_elements().emplace_back(is); 103 | } 104 | 105 | void PlyFile::read_header_property(std::istream & is) 106 | { 107 | get_elements().back().properties.emplace_back(is); 108 | } 109 | 110 | size_t PlyFile::skip_property_binary(const PlyProperty & property, std::istream & is) 111 | { 112 | static std::vector skip(PropertyTable[property.propertyType].stride); 113 | if (property.isList) 114 | { 115 | size_t listSize = 0; 116 | size_t dummyCount = 0; 117 | read_property_binary(property.listType, &listSize, dummyCount, is); 118 | for (size_t i = 0; i < listSize; ++i) is.read(skip.data(), PropertyTable[property.propertyType].stride); 119 | return listSize; 120 | } 121 | else 122 | { 123 | is.read(skip.data(), PropertyTable[property.propertyType].stride); 124 | return 0; 125 | } 126 | } 127 | 128 | void PlyFile::skip_property_ascii(const PlyProperty & property, std::istream & is) 129 | { 130 | std::string skip; 131 | if (property.isList) 132 | { 133 | int listSize; 134 | is >> listSize; 135 | for (int i = 0; i < listSize; ++i) is >> skip; 136 | } 137 | else is >> skip; 138 | } 139 | 140 | void PlyFile::read_property_binary(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) 141 | { 142 | static std::vector src(PropertyTable[t].stride); 143 | is.read(src.data(), PropertyTable[t].stride); 144 | 145 | switch (t) 146 | { 147 | case PlyProperty::Type::INT8: ply_cast(dest, src.data(), isBigEndian); break; 148 | case PlyProperty::Type::UINT8: ply_cast(dest, src.data(), isBigEndian); break; 149 | case PlyProperty::Type::INT16: ply_cast(dest, src.data(), isBigEndian); break; 150 | case PlyProperty::Type::UINT16: ply_cast(dest, src.data(), isBigEndian); break; 151 | case PlyProperty::Type::INT32: ply_cast(dest, src.data(), isBigEndian); break; 152 | case PlyProperty::Type::UINT32: ply_cast(dest, src.data(), isBigEndian); break; 153 | case PlyProperty::Type::FLOAT32: ply_cast_float(dest, src.data(), isBigEndian); break; 154 | case PlyProperty::Type::FLOAT64: ply_cast_double(dest, src.data(), isBigEndian); break; 155 | case PlyProperty::Type::INVALID: throw std::invalid_argument("invalid ply property"); 156 | } 157 | destOffset += PropertyTable[t].stride; 158 | } 159 | 160 | void PlyFile::read_property_ascii(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) 161 | { 162 | switch (t) 163 | { 164 | case PlyProperty::Type::INT8: *((int8_t *)dest) = ply_read_ascii(is); break; 165 | case PlyProperty::Type::UINT8: *((uint8_t *)dest) = ply_read_ascii(is); break; 166 | case PlyProperty::Type::INT16: ply_cast_ascii(dest, is); break; 167 | case PlyProperty::Type::UINT16: ply_cast_ascii(dest, is); break; 168 | case PlyProperty::Type::INT32: ply_cast_ascii(dest, is); break; 169 | case PlyProperty::Type::UINT32: ply_cast_ascii(dest, is); break; 170 | case PlyProperty::Type::FLOAT32: ply_cast_ascii(dest, is); break; 171 | case PlyProperty::Type::FLOAT64: ply_cast_ascii(dest, is); break; 172 | case PlyProperty::Type::INVALID: throw std::invalid_argument("invalid ply property"); 173 | } 174 | destOffset += PropertyTable[t].stride; 175 | } 176 | 177 | void PlyFile::write_property_ascii(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset) 178 | { 179 | switch (t) 180 | { 181 | case PlyProperty::Type::INT8: os << static_cast(*reinterpret_cast(src)); break; 182 | case PlyProperty::Type::UINT8: os << static_cast(*reinterpret_cast(src)); break; 183 | case PlyProperty::Type::INT16: os << *reinterpret_cast(src); break; 184 | case PlyProperty::Type::UINT16: os << *reinterpret_cast(src); break; 185 | case PlyProperty::Type::INT32: os << *reinterpret_cast(src); break; 186 | case PlyProperty::Type::UINT32: os << *reinterpret_cast(src); break; 187 | case PlyProperty::Type::FLOAT32: os << *reinterpret_cast(src); break; 188 | case PlyProperty::Type::FLOAT64: os << *reinterpret_cast(src); break; 189 | case PlyProperty::Type::INVALID: throw std::invalid_argument("invalid ply property"); 190 | } 191 | os << " "; 192 | srcOffset += PropertyTable[t].stride; 193 | } 194 | 195 | void PlyFile::write_property_binary(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset) 196 | { 197 | os.write((char *)src, PropertyTable[t].stride); 198 | srcOffset += PropertyTable[t].stride; 199 | } 200 | 201 | void PlyFile::read(std::istream & is) 202 | { 203 | read_internal(is); 204 | } 205 | 206 | void PlyFile::write(std::ostream & os, bool isBinary) 207 | { 208 | if (isBinary) write_binary_internal(os); 209 | else write_ascii_internal(os); 210 | } 211 | 212 | void PlyFile::write_binary_internal(std::ostream & os) 213 | { 214 | isBinary = true; 215 | write_header(os); 216 | 217 | for (auto & e : elements) 218 | { 219 | for (size_t i = 0; i < e.size; ++i) 220 | { 221 | for (auto & p : e.properties) 222 | { 223 | auto & cursor = userDataTable[make_key(e.name, p.name)]; 224 | if (p.isList) 225 | { 226 | uint8_t listSize[4] = {0, 0, 0, 0}; 227 | memcpy(listSize, &p.listCount, sizeof(uint32_t)); 228 | size_t dummyCount = 0; 229 | write_property_binary(p.listType, os, listSize, dummyCount); 230 | for (int j = 0; j < p.listCount; ++j) 231 | { 232 | write_property_binary(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset); 233 | } 234 | } 235 | else 236 | { 237 | write_property_binary(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset); 238 | } 239 | } 240 | } 241 | } 242 | } 243 | 244 | void PlyFile::write_ascii_internal(std::ostream & os) 245 | { 246 | write_header(os); 247 | 248 | for (auto & e : elements) 249 | { 250 | for (size_t i = 0; i < e.size; ++i) 251 | { 252 | for (auto & p : e.properties) 253 | { 254 | auto & cursor = userDataTable[make_key(e.name, p.name)]; 255 | if (p.isList) 256 | { 257 | os << p.listCount << " "; 258 | for (int j = 0; j < p.listCount; ++j) 259 | { 260 | write_property_ascii(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset); 261 | } 262 | } 263 | else 264 | { 265 | write_property_ascii(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset); 266 | } 267 | } 268 | os << std::endl; 269 | } 270 | } 271 | } 272 | 273 | void PlyFile::write_header(std::ostream & os) 274 | { 275 | const std::locale & fixLoc = std::locale("C"); 276 | os.imbue(fixLoc); 277 | 278 | os << "ply" << std::endl; 279 | if (isBinary) 280 | os << ((isBigEndian) ? "format binary_big_endian 1.0" : "format binary_little_endian 1.0") << std::endl; 281 | else 282 | os << "format ascii 1.0" << std::endl; 283 | 284 | for (const auto & comment : comments) 285 | os << "comment " << comment << std::endl; 286 | 287 | for (auto & e : elements) 288 | { 289 | os << "element " << e.name << " " << e.size << std::endl; 290 | for (const auto & p : e.properties) 291 | { 292 | if (p.isList) 293 | { 294 | os << "property list " << PropertyTable[p.listType].str << " " 295 | << PropertyTable[p.propertyType].str << " " << p.name << std::endl; 296 | } 297 | else 298 | { 299 | os << "property " << PropertyTable[p.propertyType].str << " " << p.name << std::endl; 300 | } 301 | } 302 | } 303 | os << "end_header" << std::endl; 304 | } 305 | 306 | void PlyFile::read_internal(std::istream & is) 307 | { 308 | std::function read; 309 | std::function skip; 310 | if (isBinary) 311 | { 312 | read = [&](PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) { read_property_binary(t, dest, destOffset, is); }; 313 | skip = [&](const PlyProperty & property, std::istream & is) { skip_property_binary(property, is); }; 314 | } 315 | else 316 | { 317 | read = [&](PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) { read_property_ascii(t, dest, destOffset, is); }; 318 | skip = [&](const PlyProperty & property, std::istream & is) { skip_property_ascii(property, is); }; 319 | } 320 | 321 | for (auto & element : get_elements()) 322 | { 323 | if (std::find(requestedElements.begin(), requestedElements.end(), element.name) != requestedElements.end()) 324 | { 325 | for (size_t count = 0; count < element.size; ++count) 326 | { 327 | for (auto & property : element.properties) 328 | { 329 | if (auto & cursor = userDataTable[make_key(element.name, property.name)]) 330 | { 331 | if (property.isList) 332 | { 333 | size_t listSize = 0; 334 | size_t dummyCount = 0; 335 | read(property.listType, &listSize, dummyCount, is); 336 | if (cursor->realloc == false) 337 | { 338 | cursor->realloc = true; 339 | resize_vector(property.propertyType, cursor->vector, listSize * element.size, cursor->data); 340 | } 341 | for (size_t i = 0; i < listSize; ++i) 342 | { 343 | read(property.propertyType, (cursor->data + cursor->offset), cursor->offset, is); 344 | } 345 | } 346 | else 347 | { 348 | read(property.propertyType, (cursor->data + cursor->offset), cursor->offset, is); 349 | } 350 | } 351 | else 352 | { 353 | skip(property, is); 354 | } 355 | } 356 | } 357 | } 358 | else continue; 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /dataprocessing/oversegmentation/cpp/tinyply.h: -------------------------------------------------------------------------------- 1 | // This software is in the public domain. Where that dedication is not 2 | // recognized, you are granted a perpetual, irrevocable license to copy, 3 | // distribute, and modify this file as you see fit. 4 | // Authored in 2015 by Dimitri Diakopoulos (http://www.dimitridiakopoulos.com) 5 | // https://github.com/ddiakopoulos/tinyply 6 | 7 | #ifndef tinyply_h 8 | #define tinyply_h 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tinyply 23 | { 24 | 25 | template T endian_swap(const T & v) { return v; } 26 | template<> inline uint16_t endian_swap(const uint16_t & v) { return (v << 8) | (v >> 8); } 27 | template<> inline uint32_t endian_swap(const uint32_t & v) { return (v << 24) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) | (v >> 24); } 28 | template<> inline uint64_t endian_swap(const uint64_t & v) 29 | { 30 | return (((v & 0x00000000000000ffLL) << 56) | 31 | ((v & 0x000000000000ff00LL) << 40) | 32 | ((v & 0x0000000000ff0000LL) << 24) | 33 | ((v & 0x00000000ff000000LL) << 8) | 34 | ((v & 0x000000ff00000000LL) >> 8) | 35 | ((v & 0x0000ff0000000000LL) >> 24) | 36 | ((v & 0x00ff000000000000LL) >> 40) | 37 | ((v & 0xff00000000000000LL) >> 56)); 38 | } 39 | template<> inline int16_t endian_swap(const int16_t & v) { uint16_t r = endian_swap(*(uint16_t*)&v); return *(int16_t*)&r; } 40 | template<> inline int32_t endian_swap(const int32_t & v) { uint32_t r = endian_swap(*(uint32_t*)&v); return *(int32_t*)&r; } 41 | template<> inline int64_t endian_swap(const int64_t & v) { uint64_t r = endian_swap(*(uint64_t*)&v); return *(int64_t*)&r; } 42 | inline float endian_swap_float(const uint32_t & v) { uint32_t r = endian_swap(v); return *(float*)&r; } 43 | inline double endian_swap_double(const uint64_t & v) { uint64_t r = endian_swap(v); return *(double*)&r; } 44 | 45 | struct DataCursor 46 | { 47 | void * vector; 48 | uint8_t * data; 49 | size_t offset; 50 | bool realloc = false; 51 | }; 52 | 53 | class PlyProperty 54 | { 55 | void parse_internal(std::istream & is); 56 | public: 57 | 58 | enum class Type : uint8_t 59 | { 60 | INVALID, 61 | INT8, 62 | UINT8, 63 | INT16, 64 | UINT16, 65 | INT32, 66 | UINT32, 67 | FLOAT32, 68 | FLOAT64 69 | }; 70 | 71 | PlyProperty(std::istream & is); 72 | PlyProperty(Type type, const std::string & name) : propertyType(type), isList(false), name(name) {} 73 | PlyProperty(Type list_type, Type prop_type, const std::string & name, int listCount) : listType(list_type), propertyType(prop_type), isList(true), name(name), listCount(listCount) {} 74 | 75 | Type listType, propertyType; 76 | bool isList; 77 | int listCount = 0; 78 | std::string name; 79 | }; 80 | 81 | inline std::string make_key(const std::string & a, const std::string & b) 82 | { 83 | return (a + "-" + b); 84 | } 85 | 86 | template 87 | void ply_cast(void * dest, const char * src, bool be) 88 | { 89 | *(static_cast(dest)) = (be) ? endian_swap(*(reinterpret_cast(src))) : *(reinterpret_cast(src)); 90 | } 91 | 92 | template 93 | void ply_cast_float(void * dest, const char * src, bool be) 94 | { 95 | *(static_cast(dest)) = (be) ? endian_swap_float(*(reinterpret_cast(src))) : *(reinterpret_cast(src)); 96 | } 97 | 98 | template 99 | void ply_cast_double(void * dest, const char * src, bool be) 100 | { 101 | *(static_cast(dest)) = (be) ? endian_swap_double(*(reinterpret_cast(src))) : *(reinterpret_cast(src)); 102 | } 103 | 104 | template 105 | T ply_read_ascii(std::istream & is) 106 | { 107 | T data; 108 | is >> data; 109 | return data; 110 | } 111 | 112 | template 113 | void ply_cast_ascii(void * dest, std::istream & is) 114 | { 115 | *(static_cast(dest)) = ply_read_ascii(is); 116 | } 117 | 118 | struct PropertyInfo { int stride; std::string str; }; 119 | static std::map PropertyTable 120 | { 121 | { PlyProperty::Type::INT8,{ 1, "char" } }, 122 | { PlyProperty::Type::UINT8,{ 1, "uchar" } }, 123 | { PlyProperty::Type::INT16,{ 2, "short" } }, 124 | { PlyProperty::Type::UINT16,{ 2, "ushort" } }, 125 | { PlyProperty::Type::INT32,{ 4, "int" } }, 126 | { PlyProperty::Type::UINT32,{ 4, "uint" } }, 127 | { PlyProperty::Type::FLOAT32,{ 4, "float" } }, 128 | { PlyProperty::Type::FLOAT64,{ 8, "double" } }, 129 | { PlyProperty::Type::INVALID,{ 0, "INVALID" } } 130 | }; 131 | 132 | inline PlyProperty::Type property_type_from_string(const std::string & t) 133 | { 134 | if (t == "int8" || t == "char") return PlyProperty::Type::INT8; 135 | else if (t == "uint8" || t == "uchar") return PlyProperty::Type::UINT8; 136 | else if (t == "int16" || t == "short") return PlyProperty::Type::INT16; 137 | else if (t == "uint16" || t == "ushort") return PlyProperty::Type::UINT16; 138 | else if (t == "int32" || t == "int") return PlyProperty::Type::INT32; 139 | else if (t == "uint32" || t == "uint") return PlyProperty::Type::UINT32; 140 | else if (t == "float32" || t == "float") return PlyProperty::Type::FLOAT32; 141 | else if (t == "float64" || t == "double") return PlyProperty::Type::FLOAT64; 142 | return PlyProperty::Type::INVALID; 143 | } 144 | 145 | template 146 | inline uint8_t * resize(void * v, size_t newSize) 147 | { 148 | auto vec = static_cast *>(v); 149 | vec->resize(newSize); 150 | return reinterpret_cast(vec->data()); 151 | } 152 | 153 | inline void resize_vector(const PlyProperty::Type t, void * v, size_t newSize, uint8_t *& ptr) 154 | { 155 | switch (t) 156 | { 157 | case PlyProperty::Type::INT8: ptr = resize(v, newSize); break; 158 | case PlyProperty::Type::UINT8: ptr = resize(v, newSize); break; 159 | case PlyProperty::Type::INT16: ptr = resize(v, newSize); break; 160 | case PlyProperty::Type::UINT16: ptr = resize(v, newSize); break; 161 | case PlyProperty::Type::INT32: ptr = resize(v, newSize); break; 162 | case PlyProperty::Type::UINT32: ptr = resize(v, newSize); break; 163 | case PlyProperty::Type::FLOAT32: ptr = resize(v, newSize); break; 164 | case PlyProperty::Type::FLOAT64: ptr = resize(v, newSize); break; 165 | case PlyProperty::Type::INVALID: throw std::invalid_argument("invalid ply property"); 166 | } 167 | } 168 | 169 | template 170 | inline PlyProperty::Type property_type_for_type(std::vector & theType) 171 | { 172 | if (std::is_same::value) return PlyProperty::Type::INT8; 173 | else if (std::is_same::value) return PlyProperty::Type::UINT8; 174 | else if (std::is_same::value) return PlyProperty::Type::INT16; 175 | else if (std::is_same::value) return PlyProperty::Type::UINT16; 176 | else if (std::is_same::value) return PlyProperty::Type::INT32; 177 | else if (std::is_same::value) return PlyProperty::Type::UINT32; 178 | else if (std::is_same::value) return PlyProperty::Type::FLOAT32; 179 | else if (std::is_same::value) return PlyProperty::Type::FLOAT64; 180 | else return PlyProperty::Type::INVALID; 181 | } 182 | 183 | class PlyElement 184 | { 185 | void parse_internal(std::istream & is); 186 | public: 187 | PlyElement(std::istream & istream); 188 | PlyElement(const std::string & name, size_t count) : name(name), size(count) {} 189 | std::string name; 190 | size_t size; 191 | std::vector properties; 192 | }; 193 | 194 | inline int find_element(const std::string key, std::vector & list) 195 | { 196 | for (size_t i = 0; i < list.size(); ++i) 197 | { 198 | if (list[i].name == key) 199 | { 200 | return i; 201 | } 202 | } 203 | return -1; 204 | } 205 | 206 | class PlyFile 207 | { 208 | 209 | public: 210 | 211 | PlyFile() {} 212 | PlyFile(std::istream & is); 213 | 214 | void read(std::istream & is); 215 | void write(std::ostream & os, bool isBinary); 216 | 217 | std::vector & get_elements() { return elements; } 218 | 219 | std::vector comments; 220 | std::vector objInfo; 221 | 222 | template 223 | size_t request_properties_from_element(const std::string & elementKey, std::vector propertyKeys, std::vector & source, const int listCount = 1) 224 | { 225 | if (get_elements().size() == 0) 226 | return 0; 227 | 228 | if (find_element(elementKey, get_elements()) >= 0) 229 | { 230 | if (std::find(requestedElements.begin(), requestedElements.end(), elementKey) == requestedElements.end()) 231 | requestedElements.push_back(elementKey); 232 | } 233 | else return 0; 234 | 235 | // count and verify large enough 236 | auto instance_counter = [&](const std::string & elementKey, const std::string & propertyKey) 237 | { 238 | for (auto e : get_elements()) 239 | { 240 | if (e.name != elementKey) continue; 241 | for (auto p : e.properties) 242 | { 243 | if (p.name == propertyKey) 244 | { 245 | if (PropertyTable[property_type_for_type(source)].stride != PropertyTable[p.propertyType].stride) 246 | throw std::runtime_error("destination vector is wrongly typed to hold this property"); 247 | return e.size; 248 | 249 | } 250 | } 251 | } 252 | return size_t(0); 253 | }; 254 | 255 | // Check if requested key is in the parsed header 256 | std::vector unusedKeys; 257 | for (auto key : propertyKeys) 258 | { 259 | for (auto e : get_elements()) 260 | { 261 | if (e.name != elementKey) continue; 262 | std::vector headerKeys; 263 | for (auto p : e.properties) 264 | { 265 | headerKeys.push_back(p.name); 266 | } 267 | 268 | if (std::find(headerKeys.begin(), headerKeys.end(), key) == headerKeys.end()) 269 | { 270 | unusedKeys.push_back(key); 271 | } 272 | 273 | } 274 | } 275 | 276 | // Not using them? Don't let them affect the propertyKeys count used for calculating array sizes 277 | for (auto k : unusedKeys) 278 | { 279 | propertyKeys.erase(std::remove(propertyKeys.begin(), propertyKeys.end(), k), propertyKeys.end()); 280 | } 281 | if (!propertyKeys.size()) return 0; 282 | 283 | // All requested properties in the userDataTable share the same cursor (thrown into the same flat array) 284 | auto cursor = std::make_shared(); 285 | 286 | std::vector instanceCounts; 287 | 288 | for (auto key : propertyKeys) 289 | { 290 | if (int instanceCount = instance_counter(elementKey, key)) 291 | { 292 | instanceCounts.push_back(instanceCount); 293 | auto result = userDataTable.insert(std::pair>(make_key(elementKey, key), cursor)); 294 | if (result.second == false) 295 | throw std::invalid_argument("property has already been requested: " + key); 296 | } 297 | else continue; 298 | } 299 | 300 | size_t totalInstanceSize = [&]() { size_t t = 0; for (auto c : instanceCounts) { t += c; } return t; }() * listCount; 301 | source.resize(totalInstanceSize); // this satisfies regular properties; `cursor->realloc` is for list types since tinyply uses single-pass parsing 302 | cursor->offset = 0; 303 | cursor->vector = &source; 304 | cursor->data = reinterpret_cast(source.data()); 305 | 306 | if (listCount > 1) 307 | { 308 | cursor->realloc = true; 309 | return (totalInstanceSize / propertyKeys.size()) / listCount; 310 | } 311 | 312 | return totalInstanceSize / propertyKeys.size(); 313 | } 314 | 315 | template 316 | void add_properties_to_element(const std::string & elementKey, const std::vector & propertyKeys, std::vector & source, const int listCount = 1, const PlyProperty::Type listType = PlyProperty::Type::INVALID) 317 | { 318 | auto cursor = std::make_shared(); 319 | cursor->offset = 0; 320 | cursor->vector = &source; 321 | cursor->data = reinterpret_cast(source.data()); 322 | 323 | auto create_property_on_element = [&](PlyElement & e) 324 | { 325 | for (auto key : propertyKeys) 326 | { 327 | PlyProperty::Type t = property_type_for_type(source); 328 | PlyProperty newProp = (listType == PlyProperty::Type::INVALID) ? PlyProperty(t, key) : PlyProperty(listType, t, key, listCount); 329 | userDataTable.insert(std::pair>(make_key(e.name, key), cursor)); 330 | e.properties.push_back(newProp); 331 | } 332 | }; 333 | 334 | int idx = find_element(elementKey, elements); 335 | if (idx >= 0) 336 | { 337 | PlyElement & e = elements[idx]; 338 | create_property_on_element(e); 339 | } 340 | else 341 | { 342 | PlyElement newElement = (listCount == 1) ? PlyElement(elementKey, source.size() / propertyKeys.size()) : PlyElement(elementKey, source.size() / listCount); 343 | create_property_on_element(newElement); 344 | elements.push_back(newElement); 345 | } 346 | } 347 | 348 | private: 349 | 350 | size_t skip_property_binary(const PlyProperty & property, std::istream & is); 351 | void skip_property_ascii(const PlyProperty & property, std::istream & is); 352 | 353 | void read_property_binary(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is); 354 | void read_property_ascii(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is); 355 | void write_property_ascii(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset); 356 | void write_property_binary(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset); 357 | 358 | bool parse_header(std::istream & is); 359 | void write_header(std::ostream & os); 360 | 361 | void read_header_format(std::istream & is); 362 | void read_header_element(std::istream & is); 363 | void read_header_property(std::istream & is); 364 | void read_header_text(std::string line, std::istream & is, std::vector & place, int erase = 0); 365 | 366 | void read_internal(std::istream & is); 367 | 368 | void write_ascii_internal(std::ostream & os); 369 | void write_binary_internal(std::ostream & os); 370 | 371 | bool isBinary = false; 372 | bool isBigEndian = false; 373 | 374 | std::map> userDataTable; 375 | 376 | std::vector elements; 377 | std::vector requestedElements; 378 | }; 379 | 380 | } // namesapce tinyply 381 | 382 | #endif // tinyply_h 383 | -------------------------------------------------------------------------------- /models/model.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import models.detection_net as SelectionNet 4 | import MinkowskiEngine as ME 5 | import models.iou_nms as iou_nms 6 | from scipy.stats import pearsonr 7 | from models.iou_nms import * 8 | from utils.util import * 9 | from glob import glob 10 | import os 11 | 12 | 13 | 14 | class Model: 15 | def __init__(self, cfg, semantic_valid_class_ids, semantic_id2idx, instance_id2idx, is_foreground, device='cuda'): 16 | self.cfg = cfg 17 | self.device = device 18 | self.semantic_valid_class_ids = semantic_valid_class_ids 19 | self.semantic_id2idx = semantic_id2idx 20 | self.instance_id2idx = instance_id2idx 21 | self.is_foreground = is_foreground 22 | self.detection_model = SelectionNet.SelectionNet(cfg, device, semantic_valid_class_ids, is_foreground, out_channels=[96, 96, 6]).to(device) 23 | if cfg.multigpu: 24 | self.detection_model = torch.nn.parallel.DistributedDataParallel(self.detection_model, device_ids=[device]) 25 | self.detection_model = ME.MinkowskiSyncBatchNorm.convert_sync_batchnorm(self.detection_model) 26 | # loss is computed by averaging over all element-wise computed loss entries 27 | # BCEWL includes sigmoid activation, needs un-normalized input 28 | self.BCEWithLogitsLoss = torch.nn.BCEWithLogitsLoss().to(device) 29 | # not used semantic labels are mapped to -100 using SEMANTIC_ID2IDX and ignored by this loss 30 | # CE is a softmax with exp activation, needs un-normalized inputs 31 | self.semantics_loss = torch.nn.CrossEntropyLoss(ignore_index=-100).to(device) 32 | 33 | def compute_loss(self, batch, epoch): 34 | losses_dict, pred = self.compute_loss_detection(batch, epoch) 35 | 36 | return losses_dict 37 | 38 | def compute_loss_detection(self, batch, epoch): 39 | device = self.device 40 | cfg = self.cfg 41 | 42 | # transform data to voxelized sparse tensors 43 | sin = ME.SparseTensor(batch['vox_features'], batch['vox_coords'], device=device) 44 | 45 | # GET MODEL PREDICTION (and convert to regular pytorch tensors) 46 | pred = self.detection_model(sin, batch['pooling_ids'].to(device)) 47 | 48 | # pred keys: 49 | # mlp_offsets 50 | # mlp_bounds 51 | # mlp_bb_scores 52 | # mlp_semantics 53 | # vox_feats 54 | 55 | for mlp_head, sparse_tensor in pred.items(): 56 | pred[mlp_head] = sparse_tensor.F 57 | 58 | # initialize loss 59 | losses_dict = {'optimization_loss': 0} 60 | 61 | # OFFSET loss (offset to BB center) 62 | if cfg.mlp_offsets in self.cfg.network_heads: 63 | # get gt and prediction 64 | gt_offsets, pred_offsets = batch['gt_bb_offsets'], pred[cfg.mlp_offsets] 65 | if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision: 66 | pred_offsets = pred_offsets[batch['fg_instances']] 67 | gt_offsets = gt_offsets[batch['fg_instances']] 68 | 69 | # simple L1 loss over the predicted bounding box center offsets 70 | offset_loss_per_pred = torch.sum(torch.abs(pred_offsets - gt_offsets.to(device)), axis=1) 71 | offset_loss = torch.mean(offset_loss_per_pred) 72 | losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_offsets * offset_loss 73 | losses_dict['offset_loss'] = offset_loss.detach() 74 | 75 | # BB size loss 76 | if cfg.mlp_bounds in self.cfg.network_heads: 77 | # get gt and prediction 78 | gt_bounds, pred_bounds = batch['gt_bb_bounds'], pred[cfg.mlp_bounds] 79 | if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision: 80 | pred_bounds = pred_bounds[batch['fg_instances']] 81 | gt_bounds = gt_bounds[batch['fg_instances']] 82 | 83 | # simple L1 loss over the predicted bounding box bounds 84 | bounds_loss_per_pred = torch.sum(torch.abs(pred_bounds - gt_bounds.to(device)), axis=1) 85 | bounds_loss = torch.mean(bounds_loss_per_pred) 86 | 87 | losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_bounds * bounds_loss 88 | losses_dict['bounds_loss'] = bounds_loss.detach() 89 | 90 | # Axis aligned bounding boxes IoU loss 91 | if cfg.use_bb_iou_loss: 92 | pred_bounds = pred[self.cfg.mlp_bounds] 93 | pred_offsets = pred[self.cfg.mlp_offsets] 94 | gt_bounds = batch['gt_bb_bounds'] 95 | gt_offsets = batch['gt_bb_offsets'] 96 | loc = batch['input_location'] 97 | 98 | loc, gt_offsets, gt_bounds = loc.to(device), gt_offsets.to(device), gt_bounds.to(device) 99 | 100 | if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision: 101 | pred_bounds = pred_bounds[batch['fg_instances']] 102 | pred_offsets = pred_offsets[batch['fg_instances']] 103 | gt_bounds = gt_bounds[batch['fg_instances']] 104 | gt_offsets = gt_offsets[batch['fg_instances']] 105 | loc = loc[batch['fg_instances']] 106 | 107 | pred_bounds = torch.clamp(pred_bounds, min=self.cfg.min_bb_size) # enforce minimum size 108 | pred_bb_centers = pred_offsets + loc 109 | gt_bb_center = gt_offsets + loc 110 | pr_bbs = to_bbs_min_max_(pred_bb_centers, pred_bounds, device) 111 | gt_bbs = to_bbs_min_max_(gt_bb_center, gt_bounds, device) 112 | 113 | area1 = (pr_bbs[..., 3] - pr_bbs[..., 0]) * (pr_bbs[..., 4] - pr_bbs[..., 1]) * (pr_bbs[..., 5] - pr_bbs[..., 2]) 114 | area2 = (gt_bbs[..., 3] - gt_bbs[..., 0]) * (gt_bbs[..., 4] - gt_bbs[..., 1]) * (gt_bbs[..., 5] - gt_bbs[..., 2]) 115 | lt = torch.max(pr_bbs[..., :3], gt_bbs[..., :3]) 116 | rb = torch.min(pr_bbs[..., 3:], gt_bbs[..., 3:]) 117 | wh = (rb - lt).clamp(min=0) 118 | overlap = wh[..., 0] * wh[..., 1] * wh[..., 2] 119 | union = area1 + area2 - overlap 120 | eps = 1e-6 121 | eps = union.new_tensor([eps]) 122 | union = torch.max(union, eps) 123 | ious = overlap / union 124 | 125 | iou_loss_per_pred = 1.0 - ious 126 | iou_loss = torch.mean(iou_loss_per_pred) 127 | 128 | losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_iou * iou_loss 129 | losses_dict['iou_loss'] = iou_loss.detach() 130 | 131 | 132 | # BB score loss 133 | if cfg.mlp_bb_scores in self.cfg.network_heads: 134 | loss_weight_bb_scores = self.cfg.loss_weight_bb_scores 135 | # hack because multi gpu needs to have the full network be part of loss computation already at beginning 136 | if epoch < self.cfg.mlp_bb_scores_start_epoch: 137 | loss_weight_bb_scores = 0 138 | # get gt and prediction 139 | pred_scores = pred[cfg.mlp_bb_scores].reshape(-1) # (num_voxels) 140 | pred_bounds = pred[cfg.mlp_bounds] 141 | pred_offsets = pred[cfg.mlp_offsets] 142 | loc = batch['input_location'] 143 | gt_offsets = batch['gt_bb_offsets'] 144 | gt_bounds = batch['gt_bb_bounds'] 145 | 146 | if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision: 147 | pred_scores = pred_scores[batch['fg_instances']] 148 | pred_bounds = pred_bounds[batch['fg_instances']] 149 | pred_offsets = pred_offsets[batch['fg_instances']] 150 | loc = loc[batch['fg_instances']] 151 | gt_offsets = gt_offsets[batch['fg_instances']] 152 | gt_bounds = gt_bounds[batch['fg_instances']] 153 | 154 | loc, gt_offsets, gt_bounds = loc.to(device), gt_offsets.to(device), gt_bounds.to(device) 155 | 156 | # convert gt data to BB (min, max)-corner representation 157 | gt_bb_center = gt_offsets + loc 158 | gt_bbs = to_bbs_min_max_(gt_bb_center, gt_bounds, device) 159 | 160 | # convert pred data to BB (min,max)-corner representation 161 | pred_bounds = torch.clamp(pred_bounds, min=self.cfg.min_bb_size) # enforce minimum size 162 | pred_bb_centers = pred_offsets + loc 163 | pred_bbs = to_bbs_min_max_(pred_bb_centers, pred_bounds, device) 164 | 165 | # compute IOU between pred and gt. This is the GT score that should be predicted. 166 | ious = iou_nms.set_IOUs(gt_bbs, pred_bbs).detach() # (num_input_bbs) 167 | score_loss = self.BCEWithLogitsLoss(pred_scores, ious) 168 | 169 | # for interpretable logging, we use correlation 170 | corr, _ = pearsonr(ious.cpu().numpy(), pred_scores.cpu().detach().numpy()) 171 | losses_dict['bb_scores_correlation'] = corr 172 | 173 | losses_dict['optimization_loss'] += loss_weight_bb_scores * score_loss 174 | losses_dict['bb_score_loss'] = score_loss.detach() 175 | # for test / visualization only 176 | losses_dict['bb_target_scores'] = torch.mean(ious) 177 | 178 | # center score loss 179 | if cfg.mlp_center_scores in self.cfg.network_heads and epoch >= self.cfg.mlp_center_scores_start_epoch: 180 | # get gt and prediction 181 | pred_scores = pred[cfg.mlp_center_scores].reshape(-1) # (num_voxels) 182 | gt_scores = offset_loss_per_pred.detach() # ( num_voxels) 183 | # simple L1 loss over the predicted scores 184 | if self.cfg.loss_on_fg_instances: 185 | pred_scores = pred_scores[batch['fg_instances']] 186 | score_loss = torch.abs(pred_scores - gt_scores) 187 | score_loss = torch.mean(score_loss) 188 | losses_dict['optimization_loss'] += self.cfg.loss_weight_center_scores * score_loss 189 | losses_dict['center_score_loss'] = score_loss.detach() 190 | # for interpretable logging, we use correlation 191 | corr, _ = pearsonr(gt_scores.cpu().numpy(), pred_scores.cpu().detach().numpy()) 192 | losses_dict['center_scores_correlation'] = corr 193 | 194 | if self.cfg.mlp_semantics in self.cfg.network_heads: 195 | # get gt and prediction 196 | pred_semantics = pred[cfg.mlp_semantics] 197 | gt_semantics = batch['gt_semantics'] 198 | # invalid and unlabeled ids are mapped to '-100' (the 'ignore'-label of our loss) 199 | gt_semantics = self.semantic_id2idx[gt_semantics].to('cuda') 200 | 201 | semantics_loss = self.semantics_loss(pred_semantics, gt_semantics) 202 | pred_semantics_int = torch.argmax(pred_semantics, 1) 203 | # this accuracy is pessimistic: it also measures unlabeled+invalid points 204 | semantics_acc = torch.sum(pred_semantics_int == gt_semantics) / len(gt_semantics) 205 | semantics_miou = semIOU(pred_semantics_int, gt_semantics).mean() 206 | 207 | losses_dict['optimization_loss'] += self.cfg.loss_weight_semantics * semantics_loss 208 | losses_dict['semantics_loss'] = semantics_loss.detach().cpu().numpy() 209 | losses_dict['semantics_acc'] = semantics_acc.detach().cpu().numpy() 210 | losses_dict['semantics_mIoU'] = semantics_miou 211 | 212 | if self.cfg.mlp_per_vox_semantics in self.cfg.network_heads: 213 | pred_semantics = pred[cfg.mlp_per_vox_semantics] 214 | gt_semantics = batch['gt_per_vox_semantics'] 215 | gt_semantics = self.semantic_id2idx[gt_semantics].to('cuda') 216 | 217 | per_vox_semantics_loss = self.semantics_loss(pred_semantics, gt_semantics) 218 | pred_semantics_int = torch.argmax(pred_semantics, 1) 219 | per_vox_semantics_acc = torch.sum(pred_semantics_int == gt_semantics) / len(gt_semantics) 220 | 221 | losses_dict['optimization_loss'] += self.cfg.loss_weight_per_vox_semantics * per_vox_semantics_loss 222 | losses_dict['per_vox_semantics_loss'] = per_vox_semantics_loss.detach().cpu().numpy() 223 | losses_dict['per_vox_semantics_acc'] = per_vox_semantics_acc.detach().cpu().numpy() 224 | 225 | return losses_dict, pred 226 | 227 | def get_prediction(self, batch, with_grad=False, to_cpu=True, min_size=True, get_all=False): 228 | 229 | pred = self.detection_model.get_prediction(batch, with_grad=with_grad, to_cpu=to_cpu, min_size=min_size) 230 | return pred 231 | 232 | def pred2mask(self, batch, pred, mode): 233 | return self.detection_model.detection2mask(batch, pred, self.cfg, mode, 234 | True, *self.cfg.eval_ths) 235 | 236 | def parameters(self): 237 | return self.detection_model.parameters() 238 | 239 | def to(self, device): 240 | self.detection_model = self.detection_model.to(device) 241 | return self 242 | 243 | def eval(self): 244 | self.detection_model.eval() 245 | 246 | def train(self): 247 | self.detection_model.train() 248 | 249 | # returns if the checkpoint contained all parameters for the model 250 | def load_state_dict(self, state_dict, strict=True): 251 | if self.cfg.multigpu: 252 | return self.detection_model.module.load_state_dict(state_dict, strict) 253 | else: 254 | return self.detection_model.load_state_dict(state_dict, strict) 255 | 256 | 257 | def state_dict(self): 258 | 259 | if self.cfg.multigpu: 260 | return self.detection_model.module.state_dict() 261 | else: 262 | return self.detection_model.state_dict() 263 | 264 | def load_checkpoint(self, checkpoint=None, closest_to = None): 265 | checkpoints = glob(self.cfg.checkpoint_path + '/*') 266 | if checkpoint is None: 267 | if len(checkpoints) == 0: 268 | print('No checkpoints found at {}'.format(self.cfg.checkpoint_path)) 269 | return 0, 0 270 | 271 | checkpoints = [os.path.splitext(os.path.basename(path))[0].split('_')[-1] for path in checkpoints] 272 | checkpoints = np.array(checkpoints, dtype=float) 273 | checkpoints = np.sort(checkpoints) 274 | if closest_to: 275 | ckpt_idx = np.argmin(np.abs(checkpoints - (closest_to * 60 * 60))) 276 | else: #use last 277 | ckpt_idx = -1 278 | path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format( 279 | *[*convertSecs(checkpoints[ckpt_idx]), checkpoints[ckpt_idx]]) 280 | else: 281 | path = self.cfg.checkpoint_path + '{}.tar'.format(checkpoint) 282 | 283 | print('Loaded checkpoint from: {}'.format(path)) 284 | checkpoint = torch.load(path, map_location=self.device) 285 | self.load_state_dict(checkpoint['model_state_dict']) 286 | epoch = checkpoint['epoch'] 287 | training_time = checkpoint['training_time'] 288 | return epoch, training_time, os.path.basename(path)[:-4], checkpoint['iteration_num'] 289 | -------------------------------------------------------------------------------- /utils/s3dis_util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | 4 | import os 5 | import numpy as np 6 | from scipy import stats 7 | from sklearn.cluster import MeanShift, DBSCAN 8 | import pyviz3d.visualizer as viz 9 | import dataprocessing.scannet as scannet # Using scannet color mapping 10 | import dataprocessing.s3dis as s3dis # Using scannet color mapping 11 | from matplotlib import cm as viz_cm 12 | from sklearn.neighbors import NearestNeighbors 13 | from utils.util import get_bbs_lines 14 | 15 | import open3d as o3d 16 | 17 | NUM_CLASSES = 13 18 | 19 | def reconstruct_mesh (scene): 20 | positions = scene ["positions"] 21 | normals = scene ["normals"] 22 | pcd = o3d.geometry.PointCloud() 23 | pcd.points = o3d.utility.Vector3dVector(positions.astype (np.float32)) 24 | pcd.normals = o3d.utility.Vector3dVector(normals.astype (np.float32)) 25 | with o3d.utility.VerbosityContextManager( 26 | o3d.utility.VerbosityLevel.Debug) as cm: 27 | mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( 28 | pcd, depth=8) 29 | return mesh 30 | 31 | def interpolate(original_scene, sampled_positions, sampled_colors_list, radius=0.1, neutral_color=[1.0, 1.0, 1.0]): 32 | import copy 33 | pcd_tree = o3d.geometry.KDTreeFlann(original_scene) 34 | num_querries = sampled_positions.shape[0] 35 | interpolated_scenes_list = [copy.deepcopy(original_scene) for _ in range(len(sampled_colors_list))] 36 | for j in range(len(interpolated_scenes_list)): 37 | np.asarray(interpolated_scenes_list[j].vertex_colors)[:, :] = np.array(neutral_color) 38 | mesh_pos = np.asarray(original_scene.vertices) 39 | nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(sampled_positions) 40 | dist, original2sample = nbrs.kneighbors(mesh_pos) 41 | dist = dist [:, 0] 42 | original2sample = original2sample[:,0] 43 | for j in range(len(interpolated_scenes_list)): 44 | mesh = interpolated_scenes_list [j] 45 | original2sample [dist < radius] 46 | interpolated_colors = sampled_colors_list [j][original2sample [dist < radius]] 47 | colors = np.asarray(mesh.vertex_colors) 48 | colors [dist < radius] = interpolated_colors 49 | interpolated_scenes_list[j].vertex_colors = o3d.utility.Vector3dVector(colors) 50 | 51 | return interpolated_scenes_list 52 | 53 | def visualize_prediction (cfg, scene_name, scene, labels, pred_label, out_path): 54 | # ---------------- GET GT INSTANCES, SEMANTICS, and BBs 55 | print ("visualize ", scene_name) 56 | 57 | color_map = viz_cm.get_cmap('Paired', 12) 58 | colors_map = np.array(color_map(range(12)))[:, :3] 59 | r, g, b = colors_map.T 60 | colors_map = np.vstack((colors_map, np.array([r, b, g]).T, np.array([ b, r, g]).T, np.array([ g, r, b]).T, np.array([ b, g, r]).T, np.array([ g, b, r]).T)) 61 | colors_map = np.vstack((colors_map,colors_map,colors_map)) 62 | if np.max (colors_map) < 2: 63 | colors_map = (colors_map * 255).astype (np.int32) 64 | INS_COLORS = colors_map 65 | 66 | # Using color map from scannet 67 | SEM_COLORS = np.copy (scannet.scannet_color_map).astype (np.float32) 68 | SEM_COLORS [0] = SEM_COLORS [-2] 69 | 70 | scannet.scannet_color_map = s3dis.S3DIS_SEMANTICS_COLORS 71 | 72 | gt_inst_colors = INS_COLORS [labels['instances']] 73 | sem = labels['semantics'] 74 | gt_sem_colors = scannet.scannet_color_map[sem] 75 | 76 | gt_inst_colors = gt_inst_colors / 255 77 | gt_sem_colors = gt_sem_colors / 255 78 | 79 | mesh = reconstruct_mesh (scene) 80 | 81 | instance_fg = s3dis.semantics_to_forground_mask (labels['per_instance_semantics'], cfg) 82 | 83 | bbs = np.hstack((labels['per_instance_bb_centers'][instance_fg], 2* labels['per_instance_bb_bounds'][instance_fg], 84 | scannet.scannet_color_map[labels['per_instance_semantics'][instance_fg]])).T 85 | 86 | # ---------------- GET INSTANCES AND SEMANTICS COLORS 87 | pred_inst_colors = np.ones((len(scene ["positions"]), 3)) * 255 88 | pred_sem_colors = np.ones((len(scene ["positions"]), 3)) * 255 89 | 90 | for ins_id in np.unique (pred_label ["instances"]): 91 | mask = pred_label ["instances"] == ins_id 92 | sem_label = stats.mode(pred_label ["semantics"][mask], None)[0][0] 93 | if ins_id < 1: 94 | pred_inst_colors [mask] = [255,255,255] 95 | else: 96 | gt_ins_id = stats.mode(labels['instances'][mask], None)[0][0] 97 | pred_inst_colors [mask] = INS_COLORS [gt_ins_id] 98 | 99 | pred_sem_colors = scannet.scannet_color_map[pred_label ["semantics"]] 100 | pred_sem_colors [pred_label ["semantics"] < 0] = [255, 255, 255] 101 | 102 | pred_sem_colors = pred_sem_colors / 255 103 | pred_inst_colors = pred_inst_colors / 255 104 | 105 | mesh_rgb, mesh_gt_sem, mesh_gt_ins, mesh_pred_sem, mesh_pred_ins= interpolate (mesh, scene["positions"], 106 | [scene ["colors"], gt_sem_colors, gt_inst_colors, pred_sem_colors, pred_inst_colors], 107 | 0.04) 108 | interp_colors = np.asarray(mesh_gt_sem.vertex_colors) 109 | void_mask = interp_colors.sum (1) == 3.0 110 | mesh_rgb.remove_vertices_by_index (np.where (void_mask) [0]) 111 | mesh_gt_sem.remove_vertices_by_index (np.where (void_mask) [0]) 112 | mesh_gt_ins.remove_vertices_by_index (np.where (void_mask) [0]) 113 | mesh_pred_sem.remove_vertices_by_index (np.where (void_mask) [0]) 114 | mesh_pred_ins.remove_vertices_by_index (np.where (void_mask) [0]) 115 | 116 | o3d.io.write_triangle_mesh(os.path.join(out_path,'rgb.ply'), mesh_rgb) 117 | 118 | o3d.io.write_triangle_mesh(os.path.join(out_path,'gt_instances.ply'), mesh_gt_ins) 119 | o3d.io.write_triangle_mesh(os.path.join(out_path,'gt_semantics.ply'), mesh_gt_sem) 120 | 121 | o3d.io.write_triangle_mesh(os.path.join(out_path,'pred_instances.ply'), mesh_pred_ins) 122 | o3d.io.write_triangle_mesh(os.path.join(out_path,'pred_semantics.ply'), mesh_pred_sem) 123 | 124 | # -------------------- SAVE IN PYVIZ -------------------- 125 | v = viz.Visualizer() 126 | v.add_points(f'Input scene', scene['positions'], scene['colors'] * 255, point_size=25, visible=False) 127 | v.add_points(f'GT Instances',scene['positions'], gt_inst_colors * 255, point_size=25, visible=False) 128 | v.add_points(f'GT Semantics',scene['positions'], gt_sem_colors * 255, point_size=25, visible=False) 129 | v.add_points(f'Pred Instances',scene['positions'],pred_inst_colors * 255, point_size=25, visible=False) 130 | v.add_points(f'Pred Semantics',scene['positions'], pred_sem_colors * 255, point_size=25, visible=False) 131 | start, end = get_bbs_lines(labels['per_instance_bb_centers'][instance_fg], labels['per_instance_bb_bounds'][instance_fg]) 132 | bbs_colors = np.repeat(scannet.scannet_color_map[labels['per_instance_semantics'][instance_fg]], 12, axis=0) 133 | v.add_lines(f'GT BBs', start, end, bbs_colors, visible=False) 134 | v.save(os.path.join(out_path,'pyviz3d'), verbose=False) 135 | print ('Pyviz visualization to ', os.path.join(out_path,'pyviz3d')) 136 | 137 | def assign_semantics_to_proposals (pred_semantics, proposal_masks): 138 | # Use majoring vote to determind the semantic of proposals 139 | proposal_semantics = [] 140 | for mask in proposal_masks: 141 | mask = mask > 0 142 | semantic_id = np.bincount (pred_semantics [mask]).argmax () 143 | proposal_semantics.append (semantic_id) 144 | return np.array (proposal_semantics) 145 | 146 | def clustering_for_background (pred_semantics, coords, normals): 147 | ''' For the S3DIS scene: 148 | - we use DBSCAN to cluster the instances of walls 149 | - we use the semantic prediction to get the the floor insance and the ceiling instance (only 1 ceiling and 1 floor in each scene) 150 | - Non-maximum-clustering / bounding boxes are not used / predicted for walls / floors / ceiling 151 | ''' 152 | pred_instances = np.zeros_like (pred_semantics).astype (np.int32) 153 | # instance ID of ceiling 154 | pred_instances [pred_semantics == 0] = 1 155 | # instance ID of floor 156 | pred_instances [pred_semantics == 1] = 2 157 | 158 | wall_mask = pred_semantics == 2 159 | wall_coords = coords [wall_mask] 160 | wall_normals = normals [wall_mask] * 2 # priotizing normal over coordinates 161 | wall_features = np.concatenate ([wall_coords, wall_normals], 1) 162 | 163 | # wall_clustering = MeanShift(bandwidth=2, n_jobs=16).fit(wall_features) 164 | wall_clustering = DBSCAN(eps=0.35, min_samples=10, n_jobs=16).fit(wall_features) 165 | wall_clustering.labels_ = wall_clustering.labels_ + 4 166 | wall_instances = wall_clustering.labels_ 167 | 168 | # remove small noises 169 | bg_prop_ids, bg_prop_cnts = np.unique (wall_instances, return_counts=True) 170 | wall_id_small_mask = bg_prop_cnts < 3000 171 | small_prop_ids = bg_prop_ids [wall_id_small_mask] 172 | wall_small_mask = np.isin (wall_instances, small_prop_ids) 173 | wall_instances [wall_small_mask] = -1 174 | 175 | pred_instances [wall_mask] = wall_instances 176 | 177 | return pred_instances 178 | 179 | def s3dis_eval (pred_labels, gt_labels): 180 | 181 | num_room = len(gt_labels) 182 | 183 | # Initialize... 184 | # acc and macc 185 | total_true = 0 186 | total_seen = 0 187 | true_positive_classes = np.zeros(NUM_CLASSES) 188 | positive_classes = np.zeros(NUM_CLASSES) 189 | gt_classes = np.zeros(NUM_CLASSES) 190 | # mIoU 191 | ious = np.zeros(NUM_CLASSES) 192 | totalnums = np.zeros(NUM_CLASSES) 193 | # precision & recall 194 | total_gt_ins = np.zeros(NUM_CLASSES) 195 | at = 0.5 196 | tpsins = [[] for itmp in range(NUM_CLASSES)] 197 | fpsins = [[] for itmp in range(NUM_CLASSES)] 198 | # mucov and mwcov 199 | all_mean_cov = [[] for itmp in range(NUM_CLASSES)] 200 | all_mean_weighted_cov = [[] for itmp in range(NUM_CLASSES)] 201 | 202 | 203 | for i in range(num_room): 204 | data_label = pred_labels [i] 205 | pred_ins = pred_labels [i]["instances"] 206 | pred_sem = pred_labels [i]["semantics"] 207 | gt_label = gt_labels [i] 208 | gt_ins = gt_label ["instances"] 209 | gt_sem = gt_label ["semantics"] 210 | 211 | # semantic acc 212 | total_true += np.sum(pred_sem == gt_sem) 213 | total_seen += pred_sem.shape[0] 214 | 215 | # pn semantic mIoU 216 | for j in range(gt_sem.shape[0]): 217 | gt_l = int(gt_sem[j]) 218 | pred_l = int(pred_sem[j]) 219 | gt_classes[gt_l] += 1 220 | positive_classes[pred_l] += 1 221 | true_positive_classes[gt_l] += int(gt_l==pred_l) 222 | 223 | # instance 224 | un = np.unique(pred_ins) 225 | pts_in_pred = [[] for itmp in range(NUM_CLASSES)] 226 | for ig, g in enumerate(un): # each object in prediction 227 | if g == -1: 228 | continue 229 | tmp = (pred_ins == g) 230 | sem_seg_i = int(stats.mode(pred_sem[tmp])[0]) 231 | pts_in_pred[sem_seg_i] += [tmp] 232 | 233 | un = np.unique(gt_ins) 234 | pts_in_gt = [[] for itmp in range(NUM_CLASSES)] 235 | for ig, g in enumerate(un): 236 | tmp = (gt_ins == g) 237 | sem_seg_i = int(stats.mode(gt_sem[tmp])[0]) 238 | pts_in_gt[sem_seg_i] += [tmp] 239 | # NOTE: 240 | # pts_in_gt: (Nclass, Npoints) - binary array, gt instance list of each gt class 241 | # pts_in_pred: (Nclass, Npoints) - binary array, pred instance list of each pred class 242 | 243 | # instance mucov & mwcov 244 | for i_sem in range(NUM_CLASSES): 245 | sum_cov = 0 246 | mean_cov = 0 247 | mean_weighted_cov = 0 248 | num_gt_point = 0 249 | for ig, ins_gt in enumerate(pts_in_gt[i_sem]): 250 | ovmax = 0. 251 | num_ins_gt_point = np.sum(ins_gt) 252 | num_gt_point += num_ins_gt_point 253 | for ip, ins_pred in enumerate(pts_in_pred[i_sem]): 254 | union = (ins_pred | ins_gt) 255 | intersect = (ins_pred & ins_gt) 256 | iou = float(np.sum(intersect)) / np.sum(union) 257 | 258 | if iou > ovmax: 259 | ovmax = iou 260 | ipmax = ip 261 | 262 | sum_cov += ovmax 263 | mean_weighted_cov += ovmax * num_ins_gt_point 264 | 265 | if len(pts_in_gt[i_sem]) != 0: 266 | mean_cov = sum_cov / len(pts_in_gt[i_sem]) 267 | all_mean_cov[i_sem].append(mean_cov) 268 | 269 | mean_weighted_cov /= num_gt_point 270 | all_mean_weighted_cov[i_sem].append(mean_weighted_cov) 271 | 272 | 273 | # instance precision & recall 274 | for i_sem in range(NUM_CLASSES): 275 | tp = [0.] * len(pts_in_pred[i_sem]) 276 | fp = [0.] * len(pts_in_pred[i_sem]) 277 | gtflag = np.zeros(len(pts_in_gt[i_sem])) 278 | total_gt_ins[i_sem] += len(pts_in_gt[i_sem]) 279 | 280 | for ip, ins_pred in enumerate(pts_in_pred[i_sem]): 281 | ovmax = -1. 282 | 283 | for ig, ins_gt in enumerate(pts_in_gt[i_sem]): 284 | union = (ins_pred | ins_gt) 285 | intersect = (ins_pred & ins_gt) 286 | iou = float(np.sum(intersect)) / np.sum(union) 287 | 288 | 289 | if iou > ovmax: 290 | ovmax = iou 291 | igmax = ig 292 | 293 | if ovmax >= at: 294 | tp[ip] = 1 # true 295 | else: 296 | fp[ip] = 1 # false positive 297 | 298 | tpsins[i_sem] += tp 299 | fpsins[i_sem] += fp 300 | 301 | 302 | MUCov = np.zeros(NUM_CLASSES) 303 | MWCov = np.zeros(NUM_CLASSES) 304 | for i_sem in range(NUM_CLASSES): 305 | MUCov[i_sem] = np.mean(all_mean_cov[i_sem]) 306 | MWCov[i_sem] = np.mean(all_mean_weighted_cov[i_sem]) 307 | 308 | precision = np.zeros(NUM_CLASSES) 309 | recall = np.zeros(NUM_CLASSES) 310 | for i_sem in range(NUM_CLASSES): 311 | tp = np.asarray(tpsins[i_sem]).astype(np.float) 312 | fp = np.asarray(fpsins[i_sem]).astype(np.float) 313 | tp = np.sum(tp) 314 | fp = np.sum(fp) 315 | 316 | rec = tp / total_gt_ins[i_sem] 317 | prec = tp / (tp + fp) 318 | 319 | precision[i_sem] = prec 320 | recall[i_sem] = rec 321 | 322 | def log_string(out_str): 323 | print(out_str) 324 | 325 | log_string('Instance Segmentation Precision: {}'.format(precision)) 326 | log_string('Instance Segmentation mPrecision: {}'.format(np.mean(precision))) 327 | log_string('Instance Segmentation Recall: {}'.format(recall)) 328 | log_string('Instance Segmentation mRecall: {}'.format(np.mean(recall))) 329 | 330 | 331 | 332 | # semantic results 333 | iou_list = [] 334 | for i in range(NUM_CLASSES): 335 | iou = true_positive_classes[i]/float(gt_classes[i]+positive_classes[i]-true_positive_classes[i]) 336 | iou_list.append(iou) 337 | 338 | return np.mean(precision), np.mean(recall), precision, recall -------------------------------------------------------------------------------- /dataprocessing/s3dis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | 4 | import open3d as o3d 5 | import numpy as np 6 | import pyviz3d.visualizer as viz 7 | import glob 8 | from natsort import natsorted 9 | import os 10 | import json 11 | from sklearn.neighbors import NearestNeighbors 12 | from scipy.spatial import cKDTree as KDTree 13 | import time, math 14 | import dataprocessing.augmentation as augmentation 15 | import torch 16 | 17 | def generate_color_map (max_ids): 18 | mapping = [[np.random.randint (255), np.random.randint (255), np.random.randint (255)] for _ in range (max_ids)] 19 | return np.array (mapping) 20 | 21 | ID2NAME = {0:'ceiling', 1:'floor', 2:'wall', 3:'beam', 4:'column', 5:'window', 6:'door', 7:'table', 8:'chair', 9:'sofa', 10:'bookshelf', 11:'board', 12:'clutter'} 22 | ID2NAME = [ID2NAME [i] for i in range (13)] 23 | S3DIS_SEMANTICS_COLORS = np.array ( 24 | [(174, 199, 232), # ceiling 25 | (152, 223, 138), # floor 26 | (31, 119, 180), # wall 27 | (255, 187, 120), # column 28 | (188, 189, 34), # beam 29 | (140, 86, 75), # window 30 | (255, 152, 150), # door 31 | (214, 39, 40), # table 32 | (197, 176, 213), # chair 33 | (148, 103, 189), # bookcase 34 | (196, 156, 148), # sofa 35 | (23, 190, 207), # board 36 | (178, 76, 76),] # clutter 37 | ) 38 | 39 | # WARNING: those arrays are used within the network 40 | S3DIS_SEMANTIC_VALID_CLASS_IDS = np.array(range (13)) 41 | S3DIS_SEMANTIC_VALID_CLASS_IDS_torch = torch.Tensor(S3DIS_SEMANTIC_VALID_CLASS_IDS) 42 | S3DIS_INSTANCE_VALID_CLASS_IDS = np.array(range (13)) 43 | 44 | S3DIS_INSTANCE_VALID_CLASS_IDS_torch = torch.Tensor(S3DIS_INSTANCE_VALID_CLASS_IDS).long() 45 | S3DIS_INSTANCE_ID2IDX = torch.zeros(13).fill_(-1).long() 46 | S3DIS_INSTANCE_ID2IDX[S3DIS_INSTANCE_VALID_CLASS_IDS] = torch.arange(len(S3DIS_INSTANCE_VALID_CLASS_IDS)).long() 47 | 48 | S3DIS_SEMANTIC_ID2IDX = torch.zeros(300).fill_(-100).long() 49 | # Needed to map semantic ids to ones valid for scene segmentation (= valid classes W wall, ceiling, floor) 50 | S3DIS_SEMANTIC_ID2IDX[S3DIS_SEMANTIC_VALID_CLASS_IDS] = torch.arange(len(S3DIS_SEMANTIC_VALID_CLASS_IDS)).long() 51 | 52 | def get_scene_names (mode, cfg): 53 | scene_npy_pths = glob.glob (os.path.join (cfg.data_dir, 'Area_*/*.npy')) 54 | scene_names = [pth.split ('/')[-2] + '.' + pth.split ('/')[-1].split ('.')[0] for pth in scene_npy_pths] 55 | 56 | if mode == "train": 57 | valid_set_prefix = "Area_" + str (cfg.s3dis_split_fold) 58 | scene_names = [name for name in scene_names if valid_set_prefix not in name] 59 | if mode == 'val': 60 | valid_set_prefix = "Area_" + str (cfg.s3dis_split_fold) 61 | scene_names = [name for name in scene_names if valid_set_prefix in name] 62 | 63 | return scene_names 64 | 65 | def refine_segments (segments, counts, positions, minVerts=20): 66 | """ merge too small segments to large nearby segment 67 | """ 68 | segcount_per_point = counts [segments] 69 | large_enough = segcount_per_point > minVerts 70 | too_small = segcount_per_point <= minVerts 71 | 72 | nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(positions[large_enough]) 73 | dist, qualified_2_disqualified = nbrs.kneighbors(positions[too_small]) 74 | disqualified2qualified = qualified_2_disqualified[:,0] 75 | segments [too_small] = segments[large_enough] [disqualified2qualified] 76 | 77 | return segments 78 | 79 | def semantics_to_forground_mask (semantics, cfg=None): 80 | if cfg.ignore_wall_ceiling_floor: 81 | return semantics > 2 82 | return semantics >= 0 83 | 84 | def is_foreground (sem): 85 | return sem > 2 86 | 87 | def compute_avg_centers(positions, instance_labels): 88 | per_point_centers = np.zeros((instance_labels.shape[0], 3), dtype='float32') 89 | per_point_offsets = np.zeros((instance_labels.shape[0], 3), dtype='float32') 90 | per_point_center_distances = np.zeros((instance_labels.shape[0], 1), dtype='float32') 91 | 92 | for instance_id in set(instance_labels): 93 | instance_mask = (instance_id == instance_labels) 94 | 95 | # compute AVG centers 96 | instance_center = np.mean(positions[instance_mask], axis=0) 97 | per_point_centers[instance_mask] = instance_center 98 | per_point_offsets[instance_mask] = per_point_centers[instance_mask] - positions[instance_mask] 99 | per_point_center_distances = np.linalg.norm(per_point_offsets, axis=1) 100 | 101 | return per_point_centers, per_point_center_distances 102 | 103 | def compute_bounding_box(positions, instance_labels, semantic_labels): 104 | per_point_bb_centers = np.zeros((instance_labels.shape[0], 3), dtype='float32') 105 | per_point_bb_offsets = np.zeros((instance_labels.shape[0], 3), dtype='float32') 106 | per_point_bb_bounds = np.zeros((instance_labels.shape[0], 3), dtype='float32') 107 | per_point_bb_center_distances = np.zeros((instance_labels.shape[0], 1), dtype='float32') 108 | per_point_bb_radius = np.zeros((instance_labels.shape[0], 1), dtype='float32') 109 | 110 | instances = np.unique(instance_labels) 111 | per_instance_semantics = np.zeros((len(instances)), dtype='int32') 112 | per_instance_bb_centers = np.zeros((len(instances), 3), dtype='float32') 113 | per_instance_bb_bounds = np.zeros((len(instances), 3), dtype='float32') 114 | per_instance_bb_radius = np.zeros((len(instances)), dtype='float32') 115 | 116 | for i, instance_id in enumerate(instances): 117 | instance_mask = (instance_id == instance_labels) 118 | instance_points = positions[instance_mask] 119 | per_instance_semantics[i] = semantic_labels[instance_mask][0] 120 | 121 | # bb center 122 | max_bounds = np.max(instance_points, axis=0) 123 | min_bounds = np.min(instance_points, axis=0) 124 | bb_center = (min_bounds + max_bounds) / 2 125 | per_point_bb_centers[instance_mask] = bb_center 126 | per_instance_bb_centers[i] = bb_center 127 | 128 | # bb bounds 129 | bb_bounds = max_bounds - bb_center 130 | per_point_bb_bounds[instance_mask] = bb_bounds 131 | per_instance_bb_bounds[i] = bb_bounds 132 | 133 | # bb center offsets 134 | offsets = bb_center - instance_points 135 | per_point_bb_offsets[instance_mask] = offsets 136 | 137 | # bb center distances 138 | bb_center_distances = np.linalg.norm(offsets, axis=1) 139 | per_point_bb_center_distances[instance_mask] = bb_center_distances.reshape((-1,1)) 140 | 141 | # bb radius 142 | radius = np.max(bb_center_distances).reshape((-1,1)) 143 | per_point_bb_radius[instance_mask] = radius 144 | per_instance_bb_radius[i] = radius 145 | 146 | return per_point_bb_centers, per_point_bb_offsets, per_point_bb_bounds, \ 147 | per_point_bb_center_distances, per_point_bb_radius, \ 148 | instances, per_instance_semantics, per_instance_bb_centers, per_instance_bb_bounds, per_instance_bb_radius 149 | 150 | def seg2label (segments, label_ids): 151 | # Use major voting to assign label for each segment 152 | unique_segments_ids = np.unique(segments) 153 | seg2labelID = np.zeros(np.max(unique_segments_ids) + 1, dtype='int32') 154 | seg2labelID.fill(np.inf) 155 | for seg_id in unique_segments_ids: 156 | seg_mask = segments == seg_id 157 | 158 | seg_label_ids = label_ids[seg_mask] 159 | counts = np.bincount (seg_label_ids) 160 | most_frequent_labels = np.argmax(counts) 161 | 162 | seg2labelID[seg_id] = most_frequent_labels 163 | per_point_segment_labelID = seg2labelID [segments] 164 | return per_point_segment_labelID, seg2labelID 165 | 166 | def read_scene_from_numpy (scene_name, cfg, do_augmentations=False): 167 | """read_scene_from_numpy: read scene informationfrom numpy 168 | 169 | :return 170 | scene: dictionary containing 171 | name: name of the scene informat [area].[place] 172 | positions: 3D-float position of each vertex/point 173 | normals: 3D-float normal of each vertex/point (as computed by open3d) 174 | colors: 3D-float color of each vertex/point [0..1] 175 | segments: segments id of each vertex/point: N x 1 int32 176 | labels: dictionary containing 177 | semantic_labels: N x 1 int32 178 | instance_labels: N x 1 int32 179 | centers: N x 3 float32 180 | center_distances: N x 1 float32 181 | """ 182 | scene_npy_path = os.path.join (cfg.data_dir, scene_name.split ('.') [0] + '/' + scene_name [len("Area_*") + 1:] + '.normals.instance.npy') 183 | data = np.load (scene_npy_path) 184 | 185 | positions = data [:,:3].astype (np.float32) 186 | colors = data [:,3:6].astype (np.float) / 255 187 | positions = positions - positions.mean (0) 188 | positions[:, 2] -= np.min (positions [:, 2]) 189 | normals = data [:,6:9].astype (np.float) 190 | semantics = data [:, -2].astype (np.int32) 191 | instances = data [:, -1].astype (np.int32) 192 | 193 | # Basic augmentations (rotation, scaling, flipping x-y) 194 | pcd = o3d.geometry.PointCloud() 195 | pcd.points = o3d.utility.Vector3dVector(positions.astype (np.float32)) 196 | pcd.normals = o3d.utility.Vector3dVector(normals.astype (np.float32)) 197 | 198 | if cfg and cfg.augmentation and do_augmentations: 199 | # rotation around x,y,z 200 | if np.random.rand () < cfg.rotation_aug[0]: 201 | augmentation.rotate_mesh (pcd) 202 | if np.random.rand () < cfg.scaling_aug [0]: 203 | augmentation.scale_mesh (pcd, cfg.scaling_aug[1], cfg.scaling_aug[2]) 204 | # rotation around z (height) in 90 degree angles 205 | if cfg.rotation_90_aug: 206 | augmentation.rotate_mesh_90_degree(pcd) 207 | if np.random.rand () < cfg.flipping_aug: 208 | Rt = np.eye (4) 209 | Rt [0][0] *= -1 # Randomly x-axis flip 210 | pcd.transform (Rt) 211 | if np.random.rand () < cfg.flipping_aug: 212 | Rt = np.eye (4) 213 | Rt [1][1] *= - 1 # Randomly y-axis flip 214 | pcd.transform (Rt) 215 | 216 | positions = np.asarray(pcd.points) 217 | normals = np.asarray (pcd.normals) 218 | 219 | # Apply geometric augmentation 220 | if do_augmentations and cfg.augmentation: 221 | if np.random.rand () < cfg.elastic_distortion: 222 | elastic_distortion = augmentation.ElasticDistortion () 223 | positions = elastic_distortion (positions) 224 | pcd.points = o3d.utility.Vector3dVector(positions) 225 | 226 | # elastic distoriton HAIS setting 227 | if np.random.rand () < cfg.elastic_distortion_HAIS: 228 | positions -= positions.mean (0) 229 | positions = augmentation.HAIS_elastic(positions, 6 * (1/cfg.voxel_size) // 50, 40 * (1/cfg.voxel_size) / 50) 230 | positions = augmentation.HAIS_elastic(positions, 20 * (1/cfg.voxel_size) // 50, 160 * (1/cfg.voxel_size) / 50) 231 | positions -= positions.mean (0) 232 | positions[:, 2] -= np.min (positions [:, 2]) 233 | pcd.points = o3d.utility.Vector3dVector(positions) 234 | 235 | if np.random.rand () < cfg.position_jittering [0]: 236 | displacements = cfg.position_jittering [1] * np.random.randn (*positions.shape) 237 | positions = positions + displacements 238 | pcd.points = o3d.utility.Vector3dVector(positions) 239 | 240 | if cfg.HAIS_jitter_aug: 241 | positions -= positions.mean(0) 242 | pcd.points = o3d.utility.Vector3dVector(positions) 243 | Rt = np.eye (4) 244 | m = np.eye(3) 245 | m += np.random.randn(3, 3) * 0.1 246 | theta = np.random.rand() * 2 * math.pi 247 | m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0], [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]]) # rotation 248 | Rt[:3,:3] = m 249 | pcd.transform (Rt) 250 | positions = np.asarray(pcd.points) 251 | positions[:, 2] -= np.min (positions [:, 2]) 252 | pcd.points = o3d.utility.Vector3dVector(positions) 253 | 254 | # Color transformations 255 | if do_augmentations and cfg.augmentation: 256 | # Contrast auto contrast 257 | if np.random.rand () < cfg.chromatic_auto_contrast: 258 | chromatic_auto_contrast = augmentation.ChromaticAutoContrast () 259 | colors = chromatic_auto_contrast (colors) 260 | 261 | # Chromatic translation 262 | if np.random.rand () < cfg.chromatic_translation [0]: 263 | trans_range_ratio = cfg.chromatic_translation [1] 264 | chromatic_translation = augmentation.ChromaticTranslation (trans_range_ratio) 265 | colors = chromatic_translation (colors) 266 | 267 | # Chromatic Jitter 268 | if np.random.rand () < cfg.color_jittering_aug [0]: 269 | colors = augmentation.color_jittering (colors, -cfg.color_jittering_aug [1], cfg.color_jittering_aug [1]) 270 | 271 | # Random Brightness 272 | if np.random.rand () < cfg.random_brightness [0]: 273 | colors = augmentation.random_brightness (colors, cfg.random_brightness [1]) 274 | 275 | if cfg.apply_hue_aug: 276 | colors = augmentation.apply_hue_aug(colors) 277 | 278 | 279 | positions = np.asarray(pcd.points) 280 | normals = np.asarray (pcd.normals) 281 | 282 | if cfg.superpoint_algo == 'learned_superpoint': 283 | PATH_SEGMENTS_LABELS_INFO = os.path.join (cfg.data_dir, 'segment_labels/learned_superpoint_graph_segmentations/') 284 | 285 | path_scene_segments_labels_info = PATH_SEGMENTS_LABELS_INFO + '/' + scene_name + '.npy' 286 | segments_labels_info = np.load (path_scene_segments_labels_info, allow_pickle=True).item () 287 | segments = segments_labels_info ['segments'] 288 | per_point_segment_instanceID = segments_labels_info ['per_point_segment_instanceID'] 289 | seg2instanceID = segments_labels_info ['seg2instanceID'] 290 | per_point_segment_semanticID = segments_labels_info ['per_point_segment_semanticID'] 291 | seg2semanticID = segments_labels_info ['seg2semanticID'] 292 | 293 | if cfg.point_sampling_rate is not None: 294 | num_scene_points = len (positions) 295 | sampling_mask = np.zeros (num_scene_points, dtype=np.bool) 296 | if not do_augmentations: 297 | # During evaluation or testing, sample every 4 points 298 | sampling_point_ids = np.array (range (num_scene_points)) [::4] 299 | else: 300 | # During training, sample points randomly with an user input sampling rate 301 | sampling_point_ids = np.random.choice (range (num_scene_points), int (num_scene_points * cfg.point_sampling_rate), replace=False) 302 | sampling_mask [sampling_point_ids] = True 303 | 304 | segments = segments [sampling_mask] 305 | 306 | # Remaping contiguous segments ID 307 | per_point_segment_semanticID = per_point_segment_semanticID [sampling_mask] 308 | per_point_segment_instanceID = per_point_segment_instanceID [sampling_mask] 309 | positions = positions [sampling_mask] 310 | colors = colors [sampling_mask] 311 | normals = normals [sampling_mask] 312 | instances = instances [sampling_mask] 313 | semantics = semantics [sampling_mask] 314 | 315 | scene = { 316 | 'name': scene_name, 317 | 'positions': positions, 318 | 'colors': colors, 319 | 'normals': normals, 320 | 'segments': segments, 321 | } 322 | labels = { 323 | 'instances': instances, 324 | 'semantics': semantics, 325 | 'per_point_segment_instanceID': per_point_segment_instanceID, 326 | 'per_point_segment_semanticID': per_point_segment_semanticID, 327 | 'seg2instanceID': seg2instanceID, 328 | 'seg2semanticID': seg2semanticID, 329 | } 330 | return scene, labels 331 | 332 | def process_scene(scene_name, mode, configuration, do_augmentations=False): 333 | """Process scene: extracts ground truth labels (instance and semantics) and computes centers 334 | 335 | :return 336 | scene: dictionary containing 337 | positions: 3D-float position of each vertex/point 338 | normals: 3D-float normal of each vertex/point (as computed by open3d) 339 | colors: 3D-float color of each vertex/point [0..1] 340 | labels: dictionary containing 341 | semantic_labels: N x 1 int32 342 | instance_labels: N x 1 int32 343 | centers: N x 3 float32 344 | center_distances: N x 1 float32 345 | """ 346 | cfg = configuration 347 | 348 | # Read point clouds, extract semantic & instance labels, compute centers 349 | scene, labels = read_scene_from_numpy(scene_name, configuration, do_augmentations=do_augmentations) 350 | centers, center_distances = compute_avg_centers(scene ['positions'], labels ['instances']) 351 | 352 | bb_centers, bb_offsets, bb_bounds, bb_center_distances, bb_radius, \ 353 | unique_instances, per_instance_semantics, per_instance_bb_centers, per_instance_bb_bounds, per_instance_bb_radius \ 354 | = compute_bounding_box(scene ['positions'], labels['instances'], labels['semantics']) 355 | 356 | # make sure the unique instance ids can be used as array indices for 'per_instance_XX' 357 | assert np.all(unique_instances == range(len(unique_instances))) 358 | 359 | labels ['per_instance_bb_radius'] = per_instance_bb_radius 360 | labels ['per_instance_bb_bounds'] = per_instance_bb_bounds 361 | labels ['per_instance_bb_centers'] = per_instance_bb_centers 362 | labels ['per_instance_semantics'] = per_instance_semantics 363 | labels ['unique_instances'] = unique_instances 364 | labels ['bb_radius'] = bb_radius 365 | labels ['bb_center_distances'] = bb_center_distances 366 | labels ['seg2inst'] = labels ['seg2instanceID'] 367 | labels ['bb_bounds'] = bb_bounds 368 | labels ['bb_offsets'] = bb_offsets 369 | labels ['bb_centers'] = bb_centers 370 | labels ['center_distances'] = center_distances 371 | labels ['centers'] = centers 372 | 373 | return scene, labels 374 | -------------------------------------------------------------------------------- /models/training.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | 4 | import sys 5 | sys.path.append('.') 6 | 7 | import torch, torch.nn as nn 8 | import torch.optim as optim 9 | from torch.nn import functional as F 10 | import os 11 | from torch.utils.tensorboard import SummaryWriter 12 | from glob import glob 13 | import numpy as np 14 | import time 15 | from collections import defaultdict 16 | from pynvml import * 17 | from utils.util import * 18 | from models.model import Model 19 | from models.evaluation import Evaluater 20 | import torch.multiprocessing as mp 21 | import torch.distributed as dist 22 | import config_loader as cfg_loader 23 | from models.dataloader import ScanNet 24 | from models.dataloader import ARKitScenes 25 | from models.dataloader import S3DIS 26 | 27 | class Trainer(object): 28 | # set val_dataset to None if no validation should be performed 29 | def __init__(self, model, train_dataloader, val_dataset, cfg, rank = None): 30 | self.cfg = cfg 31 | self.model = model 32 | self.rank = rank 33 | self.main_process = not cfg.multigpu or rank == 0 34 | 35 | model_params = self.model.parameters() 36 | 37 | if cfg.optimizer == 'Adam': 38 | self.optimizer = optim.Adam(model_params, lr=cfg.lr) 39 | if cfg.optimizer == 'Adadelta': 40 | self.optimizer = optim.Adadelta(model_params) 41 | if cfg.optimizer == 'RMSprop': 42 | self.optimizer = optim.RMSprop(model_params, momentum=0.9) 43 | 44 | self.epoch = 0 45 | self.train_dataloader = train_dataloader 46 | self.val_dataset = val_dataset 47 | self.val_min = None 48 | self.writer = None 49 | if self.main_process: 50 | os.makedirs(self.cfg.checkpoint_path, exist_ok=True) 51 | self.writer = SummaryWriter(os.path.dirname(__file__) + '/../experiments/tf_summaries/{}/'.format(cfg.exp_name)) 52 | # include copy of all variables and the configuration file into the experiments folder 53 | f = os.path.join(cfg.exp_path, 'args.txt') 54 | with open(f, 'w') as file: 55 | for arg in sorted(vars(cfg)): 56 | attr = getattr(cfg, arg) 57 | file.write('{} = {}\n'.format(arg, attr)) 58 | if cfg.config is not None: 59 | f = os.path.join(cfg.exp_path, 'config.txt') 60 | with open(f, 'w') as file: 61 | file.write(open(cfg.config, 'r').read()) 62 | 63 | def train_step(self, batch): 64 | self.model.train() 65 | self.optimizer.zero_grad() 66 | loss_dict = self.model.compute_loss(batch, self.epoch) 67 | optimization_loss = loss_dict['optimization_loss'] 68 | optimization_loss.backward() 69 | self.optimizer.step() 70 | return loss_dict 71 | 72 | def train_model(self, epochs): 73 | start, training_time, iteration_num = self.load_checkpoint() 74 | 75 | iteration_start_time = time.time() 76 | for rel_epoch, epoch in enumerate(range(start, epochs)): 77 | if self.cfg.multigpu: 78 | self.train_dataloader.sampler.set_epoch(epoch) 79 | self.epoch = epoch 80 | losses_epoch = defaultdict(int) # default values are 0 81 | print(f'Start epoch {epoch}') 82 | 83 | if self.cfg.use_lr_scheduler: 84 | cosine_lr_after_step(self.optimizer, self.cfg.lr, epoch, 85 | self.cfg.lr_scheduler_start_epoch, self.cfg.lr_scheduler_end_epoch) 86 | if self.main_process: 87 | self.writer.add_scalar('LR', self.optimizer.param_groups[0]['lr'], iteration_num) 88 | if epoch >= self.cfg.lr_scheduler_end_epoch: 89 | print('END TRAINING --- LR scheduling end reached. Stop training.') 90 | return 91 | 92 | time_prepare_training_batch_start = time.time() 93 | for batch_num, batch in enumerate(self.train_dataloader): 94 | 95 | # save model 96 | iteration_duration = time.time() - iteration_start_time 97 | 98 | #---------- EVALUATE and SAVE CHECKPOINT ------------------------# 99 | if self.main_process and batch_num == 0 and not self.cfg.skip_first_eval: 100 | training_time += iteration_duration 101 | iteration_start_time = time.time() 102 | 103 | # save checkpoints after every ckpt_every epochs 104 | if epoch % self.cfg.ckpt_every == 0: # model is large, so save only now and then 105 | print('Saving checkpoint...') 106 | save_time = time.time() 107 | self.save_checkpoint(epoch, training_time, iteration_num) 108 | print(f'Done saving checkpoint ({str(time.time() - save_time)[:5]} s)') 109 | 110 | 111 | val_losses = self.compute_val_loss(self.cfg.num_eval_batches) 112 | print("VAL losses: {} ".format(val_losses)) 113 | # self.writer.add_scalars('Losses/val', val_losses, iteration_num) 114 | for k, v in val_losses.items(): 115 | if not k[:11] == "mask_scores": 116 | self.writer.add_scalar('val/' + k, v, iteration_num) 117 | else: 118 | self.writer.add_scalar('val_mask_scores/' + k[12:], v, iteration_num) 119 | val_loss = val_losses['optimization_loss'] 120 | 121 | # Evaluation after every eval_every epochs 122 | if self.val_dataset and (epoch % self.cfg.val_every == 0 or epoch % self.cfg.eval_every == 0): 123 | print('start computing validation loss') 124 | # free memory for validation computation 125 | if 'loss_dict' in locals(): # just remove it if it exists 126 | del loss_dict 127 | 128 | # eval not needed for very early models, early models take long to eval 129 | if epoch % self.cfg.eval_every == 0 and epoch >= 250 and self.cfg.full_model \ 130 | and self.cfg.dataset_name == 'scannet': 131 | 132 | import dataprocessing.scannet as scannet 133 | semantic_valid_class_ids_torch = scannet.SCANNET_SEMANTIC_VALID_CLASS_IDS_torch 134 | is_foreground = scannet.is_foreground 135 | semantic_id2idx = scannet.SCANNET_SEMANTIC_ID2IDX 136 | instance_id2idx = scannet.SCANNET_INSTANCE_ID2IDX 137 | 138 | val_model = Model(self.cfg, semantic_valid_class_ids_torch, semantic_id2idx, instance_id2idx, is_foreground) 139 | predictor = Evaluater(val_model, self.cfg) 140 | ap_all, ap_50, ap_25 = predictor.eval(val_dataset) 141 | for ap_str, ap in [('ap_all', ap_all), ('ap_50', ap_50), ('ap_25', ap_25)] : 142 | self.writer.add_scalar('val/' + ap_str, ap, iteration_num) 143 | 144 | if self.val_min is None: 145 | self.val_min = val_loss 146 | 147 | if val_loss < self.val_min: 148 | self.val_min = val_loss 149 | for path in glob(self.cfg.exp_path + 'val_min=*'): 150 | os.remove(path) 151 | np.save(self.cfg.exp_path + 'val_min=checkpoint_{}h:{}m:{}s_{}.tar' 152 | .format(*[*convertSecs(training_time),training_time]), [epoch, iteration_num, val_loss]) 153 | self.cfg.skip_first_eval = False 154 | 155 | # Compute time to prepare batch; this time is reset at the end of this for-loop 156 | time_prepare_training_batch_duration = time.time() - time_prepare_training_batch_start 157 | print(f'Time to prepare batch: {time_prepare_training_batch_duration:.3f}') 158 | if self.main_process: 159 | self.writer.add_scalar('time/prepare_training_batch', time_prepare_training_batch_duration, 160 | iteration_num) 161 | 162 | # Optimize model 163 | time_training_step_start = time.time() 164 | loss_dict = self.train_step(batch) 165 | time_training_step_duration = time.time() - time_training_step_start 166 | if self.main_process: 167 | self.writer.add_scalar('time/training_step', time_training_step_duration, iteration_num) 168 | 169 | # Transform losses to single values 170 | for k, v in loss_dict.items(): 171 | losses_epoch[k] += v.item() 172 | 173 | current_iteration = batch_num + epoch * len(self.train_dataloader) 174 | current_losses = {k: str(v.item())[:6] for k, v in loss_dict.items()} 175 | print(f'{current_iteration} dt:{time_training_step_duration:.3f} Current losses: {current_losses}') 176 | 177 | if self.main_process: 178 | # LOGGING GPU STATISTICS 179 | # in order to manage the unstable memory usage of ME (defined on GPU 0 here) 180 | nvmlInit() 181 | h = nvmlDeviceGetHandleByIndex(0) 182 | info_before = nvmlDeviceGetMemoryInfo(h) 183 | # EMPTY CACHED MEMORY 184 | torch.cuda.empty_cache() 185 | info_after = nvmlDeviceGetMemoryInfo(h) 186 | for k, v in {'total MB': info_before.total / 1024 ** 2, 187 | 'free MB': info_before.free / 1024 ** 2, 188 | 'used MB': info_before.used / 1024 ** 2}.items(): 189 | self.writer.add_scalar('gpu memory usage/' + k, v, iteration_num) 190 | 191 | for k, v in {'total MB': info_before.total / 1024 ** 2, 192 | 'free after emptying MB': info_after.used / 1024 ** 2, 193 | 'used after emptying MB': info_after.used / 1024 ** 2}.items(): 194 | self.writer.add_scalar('gpu memory usage (emptied cache)/' + k, v, iteration_num) 195 | 196 | # how many batches we had overall - used for logging 197 | iteration_num += 1 198 | time_prepare_training_batch_start = time.time() 199 | 200 | if self.main_process: 201 | # self.writer.add_scalar('training loss last batch', loss, epoch) 202 | # compute AVG losses 203 | for k, v in losses_epoch.items(): 204 | losses_epoch[k] = v / len(self.train_dataloader) 205 | 206 | # self.writer.add_scalars(f'Losses/train', losses_epoch, iteration_num) 207 | for k, v in losses_epoch.items(): 208 | if not k[:11] == "mask_scores": 209 | self.writer.add_scalar('train/' + k, v, iteration_num) 210 | else: 211 | self.writer.add_scalar('train_mask_scores/' + k[12:], v, iteration_num) 212 | 213 | self.writer.add_scalar('Epoch', epoch, iteration_num) 214 | print('EPOCH AVG:', losses_epoch) 215 | 216 | def save_checkpoint(self, epoch, training_time, iteration_num): 217 | path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format(*[*convertSecs(training_time), training_time]) 218 | if not os.path.exists(path): 219 | save_dict = { 220 | 'training_time': training_time,'epoch': epoch, 'iteration_num': iteration_num, 221 | 'model_state_dict': self.model.state_dict(), 222 | 'optimizer_state_dict': self.optimizer.state_dict(), 223 | } 224 | torch.save(save_dict, path) 225 | 226 | def load_checkpoint(self, load_idx=-1, checkpoint=None): 227 | time_start = time.time() 228 | checkpoints = glob(self.cfg.checkpoint_path+'/*') 229 | 230 | if len(checkpoints) == 0: 231 | print('No checkpoints found at {}'.format(self.cfg.checkpoint_path)) 232 | return 0, 0, 0 233 | if checkpoint is None: 234 | checkpoints = [os.path.splitext(os.path.basename(path))[0].split('_')[-1] for path in checkpoints] 235 | checkpoints = np.array(checkpoints, dtype=float) 236 | checkpoints = np.sort(checkpoints) 237 | path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format(*[*convertSecs(checkpoints[load_idx]), checkpoints[load_idx]]) 238 | else: 239 | path = self.cfg.checkpoint_path + '{}.tar'.format(checkpoint) 240 | print('Loading checkpoint from: {}'.format(path)) 241 | checkpoint = torch.load(path) 242 | if self.cfg.apple_warmstart: 243 | model_dict = self.model.state_dict() 244 | # 1. filter out unnecessary keys 245 | pretrained_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if (k != "mlp_semantics.6.kernel" and k != "mlp_semantics.6.bias")} 246 | # 2. overwrite entries in the existing state dict 247 | model_dict.update(pretrained_dict) 248 | # 3. load the new state dict 249 | checkpoint['model_state_dict'] = model_dict 250 | missing_keys, unexpected_keys = self.model.load_state_dict(checkpoint['model_state_dict'], 251 | strict= not self.cfg.loose_model_loading) 252 | if len(missing_keys) == 0 and not self.cfg.apple_warmstart: 253 | self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 254 | 255 | epoch = checkpoint['epoch'] 256 | training_time = checkpoint['training_time'] 257 | iteration_num = checkpoint['iteration_num'] 258 | 259 | self.epoch = epoch 260 | print(f'Loaded checkpoint in {time.time() - time_start:.3f} seconds') 261 | return epoch, training_time, iteration_num 262 | 263 | def compute_val_loss(self, num_batches=5): 264 | self.model.eval() 265 | 266 | val_losses = defaultdict(int) 267 | for _ in range(num_batches): 268 | try: 269 | val_batch = self.val_data_iterator.next() 270 | except: 271 | self.val_data_iterator = self.val_dataset.get_loader().__iter__() 272 | val_batch = self.val_data_iterator.next() 273 | 274 | with torch.no_grad(): 275 | loss_dict = self.model.compute_loss( val_batch, self.epoch) 276 | for k, v in loss_dict.items(): 277 | val_losses[k] += v.item() 278 | print("[VAL]: Current losses: {} ".format({k: v.item() for k, v in loss_dict.items()})) 279 | # free memory from validation data 280 | del val_batch, loss_dict 281 | for k, v in val_losses.items(): 282 | val_losses[k] = v / num_batches 283 | 284 | return val_losses 285 | 286 | def start_train(rank, cfg, num_devices, train_dataset, val_dataset): 287 | dist.init_process_group( 288 | backend="nccl", 289 | init_method="tcp://127.0.0.1:33456", 290 | world_size=num_devices, 291 | rank=rank, 292 | ) 293 | torch.cuda.set_device(rank) 294 | model = Model(cfg, rank) 295 | train_dataloader = train_dataset.get_loader_multi_gpu(rank=rank, world_size=num_devices) 296 | trainer = Trainer(model, train_dataloader, val_dataset, cfg, rank) 297 | trainer.train_model(10000) 298 | 299 | if __name__ == '__main__': 300 | cfg = cfg_loader.get_config() 301 | 302 | if cfg.dataset_name == 'scannet': 303 | import dataprocessing.scannet as scannet 304 | semantic_valid_class_ids_torch = scannet.SCANNET_SEMANTIC_VALID_CLASS_IDS_torch 305 | is_foreground = scannet.is_foreground 306 | semantic_id2idx = scannet.SCANNET_SEMANTIC_ID2IDX 307 | instance_id2idx = scannet.SCANNET_INSTANCE_ID2IDX 308 | 309 | if not cfg.train_submission: 310 | val_dataset = ScanNet('val', cfg) 311 | train_dataset = ScanNet('train', cfg) 312 | else: 313 | val_dataset = None 314 | train_dataset = ScanNet('train+val', cfg) 315 | elif cfg.dataset_name == 'arkitscenes': 316 | import dataprocessing.arkitscenes as arkitscenes 317 | val_dataset = ARKitScenes('val', cfg, subsample_rate=cfg.subsample_rate) 318 | train_dataset = ARKitScenes('train', cfg, subsample_rate=cfg.subsample_rate) 319 | semantic_valid_class_ids_torch = arkitscenes.ARKITSCENES_SEMANTIC_VALID_CLASS_IDS_torch 320 | semantic_id2idx = arkitscenes.ARKITSCENES_SEMANTIC_ID2IDX 321 | instance_id2idx = arkitscenes.ARKITSCENES_INSTANCE_ID2IDX 322 | is_foreground = arkitscenes.is_foreground 323 | elif cfg.dataset_name == 's3dis': 324 | import dataprocessing.s3dis as s3dis 325 | val_dataset = S3DIS('val', cfg) 326 | train_dataset = S3DIS('train', cfg) 327 | semantic_valid_class_ids_torch = s3dis.S3DIS_SEMANTIC_VALID_CLASS_IDS_torch 328 | semantic_id2idx = s3dis.S3DIS_SEMANTIC_ID2IDX 329 | instance_id2idx = s3dis.S3DIS_INSTANCE_ID2IDX 330 | is_foreground = s3dis.is_foreground 331 | 332 | if cfg.fixed_seed: 333 | print('WARNING: fixed seed selected for training.') 334 | 335 | if cfg.multigpu: 336 | import torch.multiprocessing as mp 337 | mp.set_start_method('spawn') # said to be required in pytorch docs, for num_workers > 1 in dataloader 338 | 339 | num_devices = torch.cuda.device_count() 340 | mp.spawn(start_train, nprocs=num_devices, args=(cfg, num_devices, train_dataset, val_dataset)) 341 | else: 342 | model = Model(cfg, semantic_valid_class_ids_torch, semantic_id2idx, instance_id2idx, is_foreground) 343 | train_dataloader = train_dataset.get_loader() 344 | trainer = Trainer(model, train_dataloader, val_dataset, cfg) 345 | trainer.train_model(10000) 346 | 347 | --------------------------------------------------------------------------------