├── data
    ├── .gitkeep
    ├── scannet
    │   └── scannetv2_official_split.npz
    └── augmented_BBs
    │   ├── README.md
    │   └── visualize_bbs_data.py
├── utils
    ├── __init__.py
    ├── gt2eval.py
    ├── util.py
    ├── metric_util.py
    ├── evaluate_detections.py
    └── s3dis_util.py
├── teaser.jpeg
├── .gitignore
├── dataprocessing
    ├── oversegmentation
    │   ├── scene0776_00_oversegmentation.png
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── Makefile
    │   │   ├── segmentator.cpp
    │   │   ├── tinyply.cpp
    │   │   └── tinyply.h
    │   ├── run_segmentator.py
    │   ├── README.md
    │   └── visualize_segments.py
    ├── mix3d_albumentations_aug.yaml
    ├── prepare_s3dis.py
    ├── augmentation.py
    └── s3dis.py
├── env.yml
├── configs
    ├── s3dis_fold1.txt
    ├── s3dis_fold2.txt
    ├── s3dis_fold3.txt
    ├── s3dis_fold4.txt
    ├── s3dis_fold5.txt
    ├── s3dis_fold6.txt
    ├── scannet.txt
    ├── scannet_dropout1.txt
    ├── scannet_dropout10.txt
    ├── scannet_dropout2.txt
    ├── scannet_dropout20.txt
    ├── scannet_dropout5.txt
    ├── arkitscenes.txt
    ├── scannet_noisy2.txt
    ├── scannet_noisy5.txt
    ├── scannet_noisy1.txt
    ├── scannet_noisy10.txt
    └── s3dis_detections_learnedPS_voxsem_fold5.txt
├── docs
    ├── installation.md
    ├── arkitscenes.md
    ├── code_structure.md
    └── s3dis.md
├── models
    ├── iou_nms.py
    ├── resnet.py
    ├── model.py
    └── training.py
└── README.md


/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/teaser.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/teaser.jpeg


--------------------------------------------------------------------------------
/data/scannet/scannetv2_official_split.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/data/scannet/scannetv2_official_split.npz


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .idea
 3 | *.pyc
 4 | !.gitkeep
 5 | .ipynb_checkpoints
 6 | trash
 7 | .ipynb_checkpoints
 8 | experiments
 9 | analysis
10 | visualize


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/scene0776_00_oversegmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jchibane/Box2Mask/HEAD/dataprocessing/oversegmentation/scene0776_00_oversegmentation.png


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
2 | set(CMAKE_CXX_STANDARD 11)
3 | project(Segmentator)
4 | set(SOURCES segmentator.cpp tinyply.cpp)
5 | add_executable(segmentator ${SOURCES})
6 | 


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |  - pytorch
 3 |  - soumith
 4 |  - conda-forge
 5 |  - defaults
 6 | dependencies:
 7 |  - numpy==1.21.6
 8 |  - configargparse==1.5.2
 9 |  - pip==21.0.1
10 |  - scipy==1.7.1
11 |  - pip:
12 |     - open3d==0.13.0
13 |     - pyviz3d==0.2.28
14 |     - tensorboard==2.0
15 |     - albumentations==1.0.3
16 |     - plyfile==0.7.4
17 |     - protobuf==3.20.0
18 |     - pynvml==11.4.1
19 |     - quaternion==0.9.9
20 | 


--------------------------------------------------------------------------------
/utils/gt2eval.py:
--------------------------------------------------------------------------------
 1 | import datasets.scannet as scannet
 2 | import config_loader as cfg_loader
 3 | import os
 4 | from glob import glob
 5 | 
 6 | cfg = cfg_loader.get_config(['--config','src/instances_ndf/configs/l1_lr-4_relu.txt'])
 7 | 
 8 | scans = glob('data/scannet/scans/*')
 9 | outfolder = os.path.join('data','scannet','gt_instance_data_txt')
10 | os.makedirs(outfolder, exist_ok = True)
11 | 
12 | for scan in scans:
13 |     raise # method has changes
14 |     scene, labels = scannet.process_scene(os.path.basename(scan), cfg)
15 |     gt_format = labels['instances'] + 1000 * labels['semantics']
16 | 
17 |     with open(os.path.join(outfolder, os.path.basename(scan)) + '.txt', 'w') as f:
18 |         for id in gt_format:
19 |             f.write('%d\n' % id)
20 |     break
21 | 


--------------------------------------------------------------------------------
/configs/s3dis_fold1.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 1
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/s3dis_fold2.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 2
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/s3dis_fold3.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 3
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/s3dis_fold4.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 4
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/s3dis_fold5.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 5
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/s3dis_fold6.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.5, 0.03, 0.3, 0.6]
14 | 
15 | #training settings
16 | batch_size = 4
17 | num_workers = 8
18 | lr = 0.001
19 | loss_weight_bb_bounds = 0.5
20 | loss_weight_bb_scores = 3
21 | mlp_bb_scores_start_epoch = 100
22 | 
23 | # augmentations
24 | augmentation
25 | scaling_aug= [1.0, 0.8, 1.2]
26 | rotation_90_aug
27 | 
28 | # dataset settings
29 | dataset_name s3dis
30 | point_sampling_rate 0.25
31 | ignore_wall_ceiling_floor
32 | superpoint_algo learned_superpoint
33 | s3dis_split_fold 6
34 | 
35 | # BB supervision
36 | bb_supervision


--------------------------------------------------------------------------------
/configs/scannet.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | 
32 | # augmentations
33 | augmentation
34 | scaling_aug = [1.0, 0.8, 1.2]
35 | flipping_aug = 0.5
36 | rotation_90_aug
37 | apply_hue_aug


--------------------------------------------------------------------------------
/dataprocessing/mix3d_albumentations_aug.yaml:
--------------------------------------------------------------------------------
 1 | __version__: 0.4.5
 2 | transform:
 3 |   __class_fullname__: albumentations.core.composition.Compose
 4 |   additional_targets: {}
 5 |   bbox_params: null
 6 |   keypoint_params: null
 7 |   p: 1.0
 8 |   transforms:
 9 |     - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast
10 |       always_apply: true
11 |       brightness_by_max: true
12 |       brightness_limit:
13 |         - -0.2
14 |         - 0.2
15 |       contrast_limit:
16 |         - -0.2
17 |         - 0.2
18 |       p: 0.5
19 |     - __class_fullname__: albumentations.augmentations.transforms.RGBShift
20 |       always_apply: true
21 |       b_shift_limit:
22 |         - -20
23 |         - 20
24 |       g_shift_limit:
25 |         - -20
26 |         - 20
27 |       p: 0.5
28 |       r_shift_limit:
29 |         - -20
30 |         - 20
31 | 


--------------------------------------------------------------------------------
/configs/scannet_dropout1.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | dropout_boxes = 0.01 
33 | 
34 | # augmentations
35 | augmentation
36 | scaling_aug = [1.0, 0.8, 1.2]
37 | flipping_aug = 0.5
38 | rotation_90_aug
39 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_dropout10.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | dropout_boxes = 0.10
33 | 
34 | # augmentations
35 | augmentation
36 | scaling_aug = [1.0, 0.8, 1.2]
37 | flipping_aug = 0.5
38 | rotation_90_aug
39 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_dropout2.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | dropout_boxes = 0.02
33 | 
34 | # augmentations
35 | augmentation
36 | scaling_aug = [1.0, 0.8, 1.2]
37 | flipping_aug = 0.5
38 | rotation_90_aug
39 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_dropout20.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | dropout_boxes = 0.20
33 | 
34 | # augmentations
35 | augmentation
36 | scaling_aug = [1.0, 0.8, 1.2]
37 | flipping_aug = 0.5
38 | rotation_90_aug
39 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_dropout5.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | dropout_boxes = 0.05
33 | 
34 | # augmentations
35 | augmentation
36 | scaling_aug = [1.0, 0.8, 1.2]
37 | flipping_aug = 0.5
38 | rotation_90_aug
39 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/arkitscenes.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # experiment
 3 | exp_name = cfg_name
 4 | 
 5 | # data
 6 | use_normals_input
 7 | bb_supervision
 8 | data_dir = ./data/ARKitScenes/
 9 | dataset_name = arkitscenes
10 | 
11 | # model
12 | do_segment_pooling
13 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
14 | 
15 | point_association = False
16 | 
17 | # eval
18 | eval_ths = [0.5, 0.05, 0.4, 0.6]
19 | eval_every = 10000000000
20 | ckpt_every = 2
21 | 
22 | #training settings
23 | batch_size = 4
24 | voxel_size = 0.04
25 | subsample_rate = 2
26 | lr = 0.001
27 | loss_weight_bb_bounds = 0.5
28 | loss_weight_bb_scores = 3
29 | loss_weight_semantics = 0.3
30 | mlp_bb_scores_start_epoch = 100
31 | 
32 | # augmentations - no elastic distortion for now
33 | augmentation
34 | rotation_aug=[1.0, 0.0, 0.9]
35 | scaling_aug = [1.0, 0.8, 1.2]
36 | # flipping_aug = 0.5
37 | # position_jittering = [0.2, 0.005]
38 | # flipping_aug = 0.5
39 | # rotation_90_aug


--------------------------------------------------------------------------------
/configs/scannet_noisy2.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner),
33 | # hence, each dimension is affected by noise with sigma=4cm here
34 | noisy_boxes = 0.02 
35 | majority_vote
36 | 
37 | # augmentations
38 | augmentation
39 | scaling_aug = [1.0, 0.8, 1.2]
40 | flipping_aug = 0.5
41 | rotation_90_aug
42 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_noisy5.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner),
33 | # hence, each dimension is affected by noise with sigma=10cm here
34 | noisy_boxes = 0.05
35 | majority_vote
36 | 
37 | # augmentations
38 | augmentation
39 | scaling_aug = [1.0, 0.8, 1.2]
40 | flipping_aug = 0.5
41 | rotation_90_aug
42 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_noisy1.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 
33 | # hence, each dimension is affected by noise with sigma=2cm here
34 | noisy_boxes = 0.01 
35 | majority_vote
36 | 
37 | # augmentations
38 | augmentation
39 | scaling_aug = [1.0, 0.8, 1.2]
40 | flipping_aug = 0.5
41 | rotation_90_aug
42 | apply_hue_aug


--------------------------------------------------------------------------------
/configs/scannet_noisy10.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | 
 4 | # data
 5 | align
 6 | use_normals_input
 7 | bb_supervision
 8 | smallest_bb_heuristic
 9 | 
10 | # model
11 | do_segment_pooling
12 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_semantics]
13 | 
14 | # eval - those are temporarily
15 | eval_ths = [0.5, 0.05, 0.3, 0.6]
16 | 
17 | #training settings
18 | batch_size = 8
19 | lr = 0.001
20 | loss_weight_bb_bounds = 0.5
21 | loss_weight_bb_scores = 1
22 | loss_weight_semantics = 1
23 | mlp_bb_scores_start_epoch = 100
24 | ckpt_every = 20
25 | eval_every = 20
26 | val_every = 5
27 | ## LR scheduler
28 | use_lr_scheduler
29 | lr_scheduler_start_epoch = 650
30 | lr_scheduler_end_epoch = 1650
31 | ## robustness
32 | # For each dimension, we apply the noise twice (1 for min corner, 1 for max corner), 
33 | # hence, each dimension is affected by noise with sigma=20cm here
34 | noisy_boxes = 0.10
35 | majority_vote
36 | 
37 | # augmentations
38 | augmentation
39 | scaling_aug = [1.0, 0.8, 1.2]
40 | flipping_aug = 0.5
41 | rotation_90_aug
42 | apply_hue_aug


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/run_segmentator.py:
--------------------------------------------------------------------------------
 1 | """Generates the segmentations of 3d scanes given as .ply using 'segmentator' (in the cpp dir).
 2 | """
 3 | 
 4 | import subprocess
 5 | import os
 6 | 
 7 | from absl import app
 8 | from absl import flags
 9 | 
10 | FLAGS = flags.FLAGS
11 | flags.DEFINE_string('scene_path', '../../data/scannet/scans_test/', help="Path to the .ply scenes.")
12 | flags.DEFINE_string('segments_path', '../../data/scannet/scans_test_segmented', help='Path to the generated segments.')
13 | flags.DEFINE_string('segmentator_path', 'cpp/segmentator', help='Path to the segmentator executable.')
14 | 
15 | 
16 | def segment_scene(scene_name):
17 |     scene_path = os.path.join(FLAGS.scene_path, f'{scene_name}/{scene_name}_vh_clean_2.ply')
18 |     command = [FLAGS.segmentator_path, scene_path, '0.01', '20', FLAGS.segments_path]
19 |     subprocess.call(command)
20 | 
21 | def main(_):
22 |     if not os.path.exists(FLAGS.segments_path):
23 |         os.makedirs(FLAGS.segments_path)
24 |     scene_names = [file.split('.')[0] for file in os.listdir(FLAGS.scene_path)]
25 |     for scene_name in scene_names:
26 |         segment_scene(scene_name)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     app.run(main)
31 | 


--------------------------------------------------------------------------------
/configs/s3dis_detections_learnedPS_voxsem_fold5.txt:
--------------------------------------------------------------------------------
 1 | # experiment
 2 | exp_name = cfg_name
 3 | data_dir = ./data/s3dis/
 4 | 
 5 | # input settings
 6 | use_normals_input
 7 | 
 8 | # model
 9 | do_segment_pooling
10 | network_heads = [mlp_offsets, mlp_bounds, mlp_bb_scores, mlp_per_vox_semantics]
11 | 
12 | # eval
13 | eval_ths = [0.3, 0.03, 0.2, 0.6]
14 | #checkpoint = checkpoint_134h:41m:14s_484874.59536361694
15 | # checkpoint = checkpoint_206h:12m:53s_742373.8897235394 #0.673
16 | # checkpoint = checkpoint_192h:58m:47s_694727.7309098244 #0.683
17 | # checkpoint = checkpoint_190h:9m:7s_684547.2124330997 # 0.687
18 | # checkpoint = checkpoint_189h:14m:11s_681251.3121433258 # 0.689
19 | # checkpoint = checkpoint_191h:6m:14s_687974.8505253792 #0.676
20 | # checkpoint = checkpoint_186h:29m:40s_671380.9550452232 # 0.667
21 | # checkpoint = checkpoint_188h:20m:22s_678022.2635447979
22 | # checkpoint = checkpoint_192h:58m:47s_694727.7309098244
23 | checkpoint = checkpoint_195h:35m:19s_704119.6752953529 
24 | # 0.693
25 | 
26 | #training settings
27 | batch_size = 4
28 | num_workers = 8
29 | lr = 0.001
30 | loss_weight_bb_bounds = 0.5
31 | loss_weight_bb_scores = 3
32 | mlp_bb_scores_start_epoch = 100
33 | 
34 | 
35 | # augmentations
36 | augmentation
37 | scaling_aug= [1.0, 0.8, 1.2]
38 | rotation_aug=1.0
39 | 
40 | # dataset settings
41 | dataset_name s3dis
42 | s3dis_split_fold 5
43 | point_sampling_rate 0.25
44 | ignore_wall_ceiling_floor
45 | superpoint_algo learned_superpoint
46 | 
47 | load_unused_head


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/README.md:
--------------------------------------------------------------------------------
 1 | Mesh Segmentation
 2 | =================
 3 | 
 4 | Adapted from the original from the [ScaNet Github](https://github.com/ScanNet/ScanNet/tree/master/Segmentator).
 5 | 
 6 | Note that the segments for the validation scenes are already available the ``*.segs.json`` files.
 7 | 
 8 | # 1. Compile the segmentator
 9 | 
10 | Mesh segmentation code using Felzenswalb and Huttenlocher's [*Graph Based Image Segmentation*](https://cs.brown.edu/~pff/segment/index.html) algorithm on computed mesh normals.
11 | 
12 | To compile the segmentator code, navigate to the segmentor directory:
13 | ```
14 | cd box2mask/dataprocessing/oversegmentation/cpp
15 | ```
16 | Set `CMAKE_SOURCE_DIR={path_to_project_director}/dataprocessing/oversegmentation/cpp` in `Makefile` where `{path_to_project_director}` is the path to the project home directory.
17 | 
18 | Build by running `make` (or create makefiles for your system using `cmake`). This will create a `segmentator` binary that can be called to generate segmentation:
19 | 
20 | To see if it works, try:
21 | 
22 | `./segmentator input.ply [kThresh=0.01] [segMinVerts=20]`
23 | 
24 | For example:
25 | 
26 | `./segmentator ../../../data/scannet/scans/scene0011_00/scene0011_00_vh_clean_2.ply 0.01 20`
27 | 
28 | ### Arguments
29 | 1. path to an input mesh in PLY format.
30 | 2. the segmentation cluster threshold parameter, larger values lead to larger segments. (optional)
31 | 3. the minimum number of vertices per-segment, enforced by merging small clusters into larger segments. (optional)
32 | 
33 | # 2. Generate the segments
34 | 
35 | `run_segments.py`
36 | 
37 | # 3. Visualize the segments
38 | 
39 | `visualize_segments.py`
40 | 
41 | ![Segmentation scene_0776_00_oversegmentation](scene0776_00_oversegmentation.png "Oversegmentation")
42 | 


--------------------------------------------------------------------------------
/data/augmented_BBs/README.md:
--------------------------------------------------------------------------------
 1 | ## Reproduce Augmented BBs Experiments
 2 | 
 3 | The following instruction is for reproducing the experiments in Fig. 7 in our paper. We use seeds to generate the same set of augmented data in every runs during training. The config files of these experiments are in `box2mask/configs/`. Name of each config is either `scannet_dropout[percentage]` (`percentage` is the boxes of boxes that are missing) or `scannet_noisy[sigma]` (`sigma` is the variance of the noise applied to each dimension).
 4 | 
 5 | 
 6 | Similar to the main experiment, you can train the model using the augmented bounding boxes like the example bellow:
 7 | 
 8 | ```
 9 | python models/training.py --config configs/scannet_noisy1.txt
10 | ```
11 | 
12 | To evaluate with the validation set:
13 | 
14 | ```
15 | python models/evaluation.py --config configs/scannet_noisy1.txt
16 | ```
17 | 
18 | ## Augmented Data
19 | 
20 | We also store our augmented BBs as npy files. The following script will download and extract the data to `data/augmented_BBs/scannet_augmented_boxes_data/`
21 | ```
22 | cd data/augmented_BBs/
23 | wget https://datasets.d2.mpi-inf.mpg.de/box2mask/scannet_augmented_boxes_data.tar.gz
24 | tar -xvf scannet_augmented_boxes_data.tar.gz
25 | ```
26 | 
27 | The files are organized as follow:
28 | 
29 | ```shell
30 | <data_name>
31 | |-- <scanId>.npy
32 | ```
33 | 
34 | where `<data_name>` is `dropout[percentage]` (missing bounding box labels data, `percentage` can be 1, 2, 5 or 10) or `noisy[sigma]` (noisy label data, `sigma` can be 2, 4, 10 or 20).
35 | Each .npy file contains list of min corners and max corners of the bounding boxes as well as the semantic ids.
36 | 
37 | We provide script to visualize the bounding box for a scene in the data. The command bellow will produce an interactive visualization server in `data/augmented_BBs/visualize/`. 
38 | 
39 | ```
40 | cd data/augmented_BBs/
41 | python visualize_bbs_data.py --data noisy1 --scene_name scene0293_00 --data_path data/augmented_BBs/scannet_augmented_boxes_data/
42 | ```
43 | Use the command bellow to start the visualization server:
44 | 
45 | ```
46 | cd data/augmented_BBs/visualize/
47 | python -m http.server 6008
48 | ```
49 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Installation of Minkowski Engine 
 3 | 
 4 | We use [Minkowski Engine](https://github.com/NVIDIA/MinkowskiEngine) for sparse convolution of point cloud in our project.
 5 | 
 6 | `MinkowskiEngine==0.5.4` with `cudatoolkit=10.2` was used for the project.
 7 | 
 8 | First we creat a new environment
 9 | ```
10 | conda create -n box2mask python=3.7
11 | conda activate box2mask
12 | ```
13 | 
14 | Setup the CUDA system environment variables like the example below:
15 | ```
16 | cuda_version=10.2
17 | # please set the right path to CUDA in your system, bellow is an example used for our system
18 | export CUDA_HOME=/usr/lib/cuda-${cuda_version}/ 
19 | export PATH=/usr/lib/cuda-${cuda_version}/bin/:${PATH}
20 | export LD_LIBRARY_PATH=/usr/lib/cuda-${cuda_version}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
21 | export CUDA_PATH=/usr/lib/cuda-${cuda_version}/
22 | ```
23 | 
24 | Next, we install pytorch with cudatoolkit and dependencies
25 | ```
26 | conda install pytorch=1.8.1 torchvision cudatoolkit=${cuda_version} -c pytorch -c nvidia
27 | ```
28 | 
29 | Install dependencies for Minkowski Engine
30 | ```
31 | pip install torch ninja
32 | conda install openblas-devel -c anaconda 
33 | ```
34 | 
35 | We then install gcc version 7
36 | `sudo apt install g++-7`  # For CUDA 10.2, must use GCC <= 8
37 | > Make sure `g++-7 --version` is at least 7.4.0
38 | > export CXX=g++-7
39 | 
40 | Install Minkowski Engine via pip:
41 | ```
42 | pip install -U MinkowskiEngine==0.5.4 --install-option="--blas=openblas" -v --no-deps
43 | ```
44 | 
45 | For more detailed installation instruction, see [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine).
46 | ## Checking installations of Minkowski Engine
47 | 
48 | The following commands will clone the repository of Minkowski Engine and run an example segmentation model on an indoor point cloud:
49 | ```
50 | git clone https://github.com/NVIDIA/MinkowskiEngine.git
51 | cd MinkowskiEngine
52 | # code requires open3d
53 | pip install open3d
54 | python -m examples.indoor
55 | ```
56 | 
57 | ## Install GIT repository and other dependencies
58 | The following commands will clone Box2Mask repo on your machine and install the remaining dependencies. Note that you should still be using `box2mask` environemnt
59 | ```
60 | git clone -b release https://github.com/jchibane/Box2Mask.git box2mask
61 | cd box2mask
62 | conda env update --file env.yml
63 | ```
64 | 
65 | 


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/visualize_segments.py:
--------------------------------------------------------------------------------
 1 | """Visualizes instance segmentations (from scannet format)."""
 2 | 
 3 | import os
 4 | import json
 5 | import random
 6 | import pyviz3d.visualizer as viz
 7 | import open3d as o3d
 8 | import numpy as np
 9 | 
10 | from absl import app
11 | from absl import flags
12 | 
13 | FLAGS = flags.FLAGS
14 | flags.DEFINE_string('path_scenes', '../../data/scannet/scans_test/', help='Path to scene .ply')
15 | flags.DEFINE_string('path_segments', '../../data/scannet/scans_test_segmented/', help='Path to scene .seg.json')
16 | flags.DEFINE_string('path_viewer', '../../viewer/', 'Path to the visualizations.')
17 | 
18 | 
19 | def visualize_scene(scene_name):
20 |     """Propagates the actual per-point predictions to the segments."""
21 | 
22 |     path_ply = os.path.join(FLAGS.path_scenes, f'{scene_name}/{scene_name}_vh_clean_2.ply')
23 |     path_segs_json = os.path.join(FLAGS.path_segments, f'{scene_name}_vh_clean_2.0.010000.segs.json')
24 |     path_viewer = os.path.join(FLAGS.path_viewer, scene_name)
25 | 
26 |     # Read ply
27 |     mesh = o3d.io.read_triangle_mesh(path_ply)
28 |     mesh.compute_vertex_normals()
29 |     mesh.normalize_normals()
30 |     vertices_positions = np.asarray(mesh.vertices)
31 |     vertices_positions -= np.mean(vertices_positions, axis=0)
32 |     vertices_normals = np.asarray(mesh.vertex_normals)
33 |     vertices_colors = np.asarray(mesh.vertex_colors)
34 | 
35 |     # Read segments from json
36 |     with open(path_segs_json) as f:
37 |         data = json.load(f)
38 |     segment_indices_list = data["segIndices"]
39 |     segment_indices_int_array = np.asarray(segment_indices_list, dtype='int32')
40 | 
41 |     # Create segment colors
42 |     segment_colors = np.ones_like(vertices_positions)
43 |     for segment_id in set(segment_indices_list):
44 |         mask = segment_id == segment_indices_int_array  # point ids of segment
45 |         segment_colors[mask] = np.array([random.random()*255, random.random()*255, random.random()*255])
46 | 
47 |     v = viz.Visualizer()
48 |     v.add_points(scene_name+'_color', vertices_positions, vertices_colors*255, vertices_normals, point_size=25)
49 |     v.add_points(scene_name+'_segments', vertices_positions, segment_colors, vertices_normals, point_size=25)
50 |     v.save(path_viewer, verbose=True)
51 | 
52 | 
53 | def main(_):
54 |     scene_names = sorted([s.split('.')[0] for s in os.listdir(f'{FLAGS.path_scenes}')])
55 |     for scene_name in scene_names:
56 |         visualize_scene(scene_name)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     app.run(main)
61 | 


--------------------------------------------------------------------------------
/docs/arkitscenes.md:
--------------------------------------------------------------------------------
 1 | The following instruction is for reproducing the experiments in Table. 2 in our paper. 
 2 | 
 3 | Follow the original ARKitScenes [instruction](https://github.com/apple/ARKitScenes/blob/main/DATA.md) to download the data (3dod dataset). 
 4 | The oversegmentation for ARKitScenes can be download here: [train](https://datasets.d2.mpi-inf.mpg.de/box2mask/segmented_train_clean.tar.gz) and [valid](https://datasets.d2.mpi-inf.mpg.de/box2mask/segmented_val_clean.tar.gz).
 5 | After you download the data and our prepared oversegmentations. The `Training` and `Validation` and oversegmentation folders should be prepared as the following structure for our project:
 6 | 
 7 | ```
 8 | box2mask/data/ARKitScenes/3dod/
 9 | └── Training
10 |     ├── 44358604                            # scene name
11 |         ├── 44358604_3dod_annotation.json   # segmentation label of the scene
12 |         ├── 44358604_3dod_mesh.ply          # mesh file 
13 |         ├── 44358604_frames/                # Containing RGBD camera sequences 
14 |     ├── 45662912
15 |         ├── 45662912_3dod_annotation.json  
16 |         ├── 45662912_3dod_mesh.ply  
17 |         ├── 45662912_frames/
18 |     ...
19 | └── Validation/
20 |     ├── 41069021
21 |         ├── 41069021_3dod_annotation.json  
22 |         ├── 41069021_3dod_mesh.ply  
23 |         ├── 41069021_frames/
24 |     ├──
25 |     ...   
26 | └── segmented_train_clean/
27 |     ├── 47331587_3dod_mesh.0.010000.segs.json
28 |     ├── 44358604_3dod_mesh.0.010000.segs.json
29 |     ...
30 | └── segmented_val_clean/
31 |     ├── 41069021_3dod_mesh.0.010000.segs.json
32 |     ...
33 | ```
34 | 
35 | Similar to the main experiment, you can train the model using `training.py` from the root folder::
36 | 
37 | ```python
38 | python models/training.py --config configs/arkitscenes.txt
39 | ```
40 | 
41 | To evaluate with the validation set (producing results like Table 2):
42 | 
43 | ```python
44 | python models/evaluation.py --config configs/arkitscenes.txt
45 | ```
46 | 
47 | You can also produce visualization by adding option `--produce_visualizations`. Producing result for a specific scene can be achived via `model/evaluation.py` with `--predict_specific_scene` option, see the example below:
48 | 
49 | ```python
50 | python models/evaluation.py --config configs/arkitscenes.txt --predict_specific_scene 42445429 --produce_visualizations
51 | ```
52 | 
53 | Running the command above will produce the visualization of segmentation result in `experiments/arkitscenes/results/[checkpoint]/viz/42445429` where `checkpoint` is the loaded checkpoint when running the script.


--------------------------------------------------------------------------------
/data/augmented_BBs/visualize_bbs_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | 
 4 | import open3d as o3d
 5 | from dataprocessing.scannet import scannet_color_map, SEMANTIC_VALID_CLASS_IDS, SEMANTIC_VALID_CLASS_IDS_torch
 6 | import config_loader as cfg_loader
 7 | import pyviz3d.visualizer as viz
 8 | from dataprocessing import scannet
 9 | import os
10 | import numpy as np
11 | from utils.util import get_bbs_lines
12 | import configargparse
13 | 
14 | # Argument lists
15 | parser = configargparse.ArgumentParser()
16 | parser.add_argument ("--data", type=str, default='noisy1',
17 |                     help='Data can be one of noisy1, dropout10, etc')
18 | parser.add_argument ("--scene_name", type=str, default='scene0293_00',
19 |                     help='Scene to be processed')
20 | parser.add_argument ("--data_path", type=str, default='/BS/atran2/work/tmp/for_webpage/scannet_boxes_data/'
21 |                     help='Path to the augmentated boxes data')
22 | cfg = parser.parse_args(args)
23 | 
24 | 
25 | # Get the rgb point cloud, original labels of the scene
26 | scene, labels = scannet.process_scene(scene_name, 'train', cfg, do_augmentations=False)
27 | 
28 | # Specify a data set (eg. noisy1, dropout10)
29 | data_name=cfg.data
30 | scene_name = cfg.scene_name
31 | 
32 | # Load the instance BB of the scene
33 | boxes_data_path = os.path.join ("", data_name)
34 | box_info_pth = os.path.join (cfg.data_path, scene_name + '.npy')
35 | boxes = np.load (box_info_pth, allow_pickle=True).item ()
36 | 
37 | v = viz.Visualizer()
38 | 
39 | v.add_points ("Scene RGB", scene["positions"], scene['colors'] * 255, point_size=25, visible=False)
40 | 
41 | min_corners = boxes["min_corner"] # List of min corners of instances
42 | max_corners = boxes["max_corner"] # List of max corners of instances
43 | semantic_ids = boxes ["semantic_id"] # the list containing semantic id of each box
44 | 
45 | # Visualize each instance
46 | for instance_id in range(len(semantic_ids)):
47 |     min_corner = min_corners[instance_id][None] # shape 1x3
48 |     max_corner = max_corners[instance_id][None] # shape 1x3
49 |     semantic_id = semantic_ids[instance_id] 
50 |     
51 |     # Get the 12 edges of the box
52 |     bb_centers = (max_corner + min_corner) / 2
53 |     bb_bounds = max_corner - bb_centers
54 |     start, end = get_bbs_lines(bb_centers, bb_bounds)
55 |     semantic_color = scannet.scannet_color_map [semantic_id]
56 |     semantic_name = scannet.scannet_class_names [semantic_id]
57 |     lines_color = np.stack ([semantic_color for _ in range (12)])
58 |     
59 |     # Draw the box using piviz
60 |     v.add_lines(semantic_name+';instance_id='+str(instance_id), start, end, lines_color, visible=False)
61 | 
62 | visualize_path = os.path.join ("data/augmented_BBs/visualize/", data_name)
63 | os.makedirs(visualize_path, exist_ok=True)
64 | v.save(os.path.join(visualize_path, scene_name))


--------------------------------------------------------------------------------
/docs/code_structure.md:
--------------------------------------------------------------------------------
 1 | # Code structure
 2 | **configs/**, Includes the config files to run models
 3 | 
 4 | **data/**, Storing datasets (eg. data/scannet/ or data/ARKitScenes)
 5 | 
 6 | **config_loader.py**, Defines all hyper-parameters of the model
 7 | 
 8 | 
 9 | **dataprocessing**
10 |   - **dataprocessing/augmentation.py**, Defines augmentation code
11 |   - **dataprocessing/scannet.py**, Reads on train/test/val scenes of scannet
12 |   - **dataprocessing/arkitscenes.py**, Reads on train/val scenes of Arkitscenes
13 |   - **dataprocessing/s3dis.py**, Reads on train/val scenes of S3DIS
14 | 
15 | 
16 | **models**
17 |   - **models/dataloader.py**, Reads and preprocesses data and prepare tensor batches
18 |     - **class ScanNet**, Reads and preprocesses Scannet scenes
19 |         - **approx_association()**, Finds the associations of points using GT bounding boxes 
20 |         - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels
21 |     - **class ARKitScenes**, Reads and preprocess ArkitScenes scenes
22 |         - **approx_association()**, Finds the associations of points using GT bounding boxes 
23 |         - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels
24 |     - **class S3DIS**, Reads and preprocess S3dis scenes
25 |       - **approx_association()**, Finds the associations of points using GT bounding boxes 
26 |       - **__getitem__()**, Preprocesses the scenes, returns model inputs and labels
27 |     - **collate_fn**, Collates preprocessed scenes into tensor batches
28 | 
29 |   - **models/detection_net.py**, Defines the network
30 |     - **class SelectionNet**, Define the main network and network heads
31 |         - **detection2mask()**, Converts box proposals into final instance mask 
32 |         - **get_prediction()**, Gets prediction from the network heads
33 |   - **models/evaluation.py**, Evaluates Scannet and ArkitScenes predictions. Can be run with:  `python models/evaluation.py  --config configs/[config_name].txt`
34 |     - **arkitscenes_eval()**, Approximates oriented bounding boxes from instance predictions and computes  detection quality using the AP score
35 |     - **scannet_eval()**, Computes Scannet prediction scores in terms of AP, AP50 and AP25
36 |   - **models/iou_nms.py**, Defines the Non-Maximum Clustering clustering
37 |     - **NMS_clustering()**, Non-Maximum Clustering algorithm (as in Sec.3 and Sec. 4 in the main paper)
38 |   - **models/resnet.py**, Some utilities for making the U-Net model
39 |   - **models/training.py**, Defines the training code, can be run witch: `python models/training.py  --config configs/[config_name].txt`
40 |   - **models/model.py**, Defines and computes the losses for each epoch
41 |     - **compute_loss_detection()**, Compute each loss and the weighted joint losses for the network optimization
42 | 
43 | **utils/**, Contains some low-level utilities


--------------------------------------------------------------------------------
/docs/s3dis.md:
--------------------------------------------------------------------------------
  1 | The following instruction is for reproducing our results of the S3DIS data. 
  2 | 
  3 | First download the S3DIS from the official [page](http://buildingparser.stanford.edu/dataset.html). 
  4 | Our preprocessed normals and oversegmentations for the S3DIS scenes can be downloaded here: [oversegmentation](https://datasets.d2.mpi-inf.mpg.de/box2mask/segment_labels.tar.gz) and [normals](https://datasets.d2.mpi-inf.mpg.de/box2mask/normals.tar.gz). 
  5 | Unzip the S3DIS data and the `normals` to `box2mask/data/Stanford3dDataset_v1.2_Aligned_Version/`. The structure of the unzipped data is as follows:
  6 | 
  7 | ```
  8 | box2mask/data/Stanford3dDataset_v1.2_Aligned_Version/
  9 | └──  Area_1/                                    # Containing point cloud, segmentation information, normals, colors informations
 10 |     ├── hallway_1/ 
 11 |         ├── Annotations/ # Contains instances information 
 12 |             ├── door_2.txt  
 13 |             ├── floor_1.txt  
 14 |             ├── wall_2.txt
 15 |             ...
 16 |         ├── hallway_1.txt # Contains positions and colors of scene points
 17 |     ├── office_11/
 18 |         ...          
 19 |     ├── office_12/
 20 |         ...
 21 |     ...     
 22 | └──  Area_2/        
 23 |     ...   
 24 | └──  Area_3/
 25 |     ...
 26 | └──  Area_4/
 27 |     ...
 28 | └──  Area_5/
 29 |     ...
 30 | └──  Area_6/
 31 |     ...
 32 | └──  normals/
 33 |     ├── Area_4.office_7.npy
 34 |     ├── Area_5.office_36.npy
 35 |     ├── Area_1.office_25.npy
 36 |     ...
 37 | ...
 38 | ```
 39 | 
 40 | Run the following script to prepare the S3DIS dataset
 41 | 
 42 | ```bash
 43 | mkdir -p ./data/s3dis/
 44 | python dataprocessing/prepare_s3dis.py --data_dir ./data/Stanford3dDataset_v1.2_Aligned_Version/
 45 | ```
 46 | 
 47 | Uncompress the `segment_labels.tar.gz` file to `box2mask/data/s3dis/`
 48 | 
 49 | The preprocessed data and oversegmentation folders should be prepared as the following structure for our project:
 50 | 
 51 | ```
 52 | box2mask/data/s3dis/
 53 | └──  Area_1/                                    # Containing point cloud, segmentation information, normals, colors informations
 54 |     ├── hallway_1.normals.instance.npy   
 55 |     ├── office_11.normals.instance.npy          
 56 |     ├── office_12.normals.instance.npy             
 57 |     ...   
 58 | └──  Area_2/
 59 |     ├── office_1.normals.instance.npy  
 60 |     ├── office_2.normals.instance.npy  
 61 |     ├── office_3.normals.instance.npy
 62 |     ...
 63 | └──  Area_6/
 64 |     ├── conferenceRoom_1.normals.instance.npy
 65 |     ├── copyRoom_1.normals.instance.npy
 66 |     ├── office_3.normals.instance.npy
 67 |     ...
 68 | └── segment_labels/                             # Containing the segmentation files of all scenes
 69 |     ├──learned_superpoin_graph_segmentations/
 70 |         ├── Area_4.office_7.npy
 71 |         ├── Area_5.office_36.npy
 72 |         ├── Area_1.office_25.npy
 73 |         ...
 74 | ```
 75 | 
 76 | Here each `.normals.instance.npy` contains the point cloud, segmentations, colors and normals information. The information can be each extracted using the following script (note: instance labels is only used to get axis aligned bounding box information of each instance):
 77 | 
 78 | ```python 
 79 | data = np.load ('box2mask/data/s3dis/Area_1/hallway_1.normals.instance.npy')
 80 |     
 81 | positions = data [:,:3].astype (np.float32)         # XYZ positions (N x 3)
 82 | colors = data [:,3:6].astype (np.float) / 255       # Point colors (N x 3)
 83 | normals = data [:,6:9].astype (np.float)            # Surface normals (N x 3)
 84 | semantics = data [:, -2].astype (np.int32)          # Semantic labels of points (N x 1)
 85 | instances = data [:, -1].astype (np.int32)          # Instance labels of points (N x 1)
 86 | ```
 87 | 
 88 | You can train the model using `training.py` from the root folder. Each config file is of format s3dis_fold\[area_number\] which area_number indicate the area to be used as validation set and other areas to be used as training set. For example, to have area 5 as the validation set and other areas for training:
 89 | 
 90 | ```bash
 91 | python models/training.py --config configs/s3dis_fold5.txt
 92 | ```
 93 | 
 94 | To evaluate with the validation, run the following commands (producing the validation score as in Table 1 with Area 5):
 95 | 
 96 | ```bash
 97 | python models/evaluation.py --config configs/s3dis_fold5.txt
 98 | ```
 99 | 
100 | You can also produce visualization by adding option `--produce_visualizations`. To choose a specific scene to process, provide a scene name with the option `--predict_specific_scene`. Each scene has the name in the following format `Area_[area_number].[room_name]` where `[area_number]` is a number from 1 to 6 and `[room_name]` the name of the room in the area. Producing result for a specific scene can be achived via `model/evaluation.py` with `--predict_specific_scene` option, see the example below:
101 | 
102 | ```bash
103 | python models/evaluation.py --config configs/s3dis_fold5.txt --predict_specific_scene Area_5.office_7 --produce_visualizations
104 | ```
105 | 
106 | Running the command above will produce the visualization of segmentation result in `experiments/s3dis_fold5/results/[checkpoint]/viz/Area_5.office_7` where `checkpoint` is the loaded checkpoint when running the script.


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import copy
  4 | 
  5 | def get_bb_lines(bb_center, bb_bounds):
  6 |     start_list = []
  7 |     end_list = []
  8 |     bb_min = bb_center - bb_bounds
  9 |     bb_max = bb_center + bb_bounds
 10 |     length = bb_max - bb_min
 11 |     for i in range(3):
 12 |         start_list.append(bb_min)
 13 |         end = bb_min.copy()
 14 |         end[i] = bb_max[i]
 15 |         end_list.append(end)
 16 |         for j in range(3):
 17 |             if i == j:
 18 |                 continue
 19 |             start_list.append(end)
 20 |             second_end = end.copy()
 21 |             second_end[j] += length[j]
 22 |             end_list.append(second_end)
 23 |     for i in range(3):
 24 |         start_list.append(bb_max)
 25 |         end = bb_max.copy()
 26 |         end[i] = bb_min[i]
 27 |         end_list.append(end)
 28 |     return np.array(start_list), np.array(end_list)
 29 | 
 30 | def get_bbs_lines(bbs_centers, bbs_bounds):
 31 |     if type(bbs_centers) is torch.Tensor or  type(bbs_bounds) is torch.Tensor:
 32 |         bbs_centers = bbs_centers.numpy()
 33 |         bbs_bounds = bbs_bounds.numpy()
 34 |     start_list = []
 35 |     end_list = []
 36 |     for i in range(len(bbs_centers)):
 37 |         # for i in range(len(bb_centers)):
 38 |         start, end = get_bb_lines(bbs_centers[i], bbs_bounds[i])
 39 |         start_list.append(start)
 40 |         end_list.append(end)
 41 |     start = np.concatenate(start_list, 0)
 42 |     end = np.concatenate(end_list, 0)
 43 |     return start, end
 44 | 
 45 | # out: bb [min corner ,max corner, score]
 46 | def to_bbs_min_max(locations, offsets, bounds, scores=None, use_torch=True):
 47 |     if use_torch:
 48 |         centers = offsets + locations
 49 |         if offsets.is_cuda:
 50 |             bbs = torch.cuda.FloatTensor(centers.shape[0], 6).fill_(0)
 51 |         else:
 52 |             bbs = torch.zeros((centers.shape[0], 6))
 53 |         bbs[:, :3] = centers - bounds
 54 |         bbs[:, 3:] = centers + bounds
 55 |         if scores is not None:
 56 |             bbs = torch.cat((scores, bbs), axis=1)
 57 |     else:
 58 |         centers = offsets + locations
 59 |         bbs = np.zeros((centers.shape[0], 6))
 60 |         bbs[:, :3] = centers - bounds
 61 |         bbs[:, 3:] = centers + bounds
 62 |         if scores is not None:
 63 |             bbs = np.concatenate((scores, bbs), axis=1)
 64 |     return bbs
 65 | 
 66 | def to_bbs_min_max_(centers, bounds, device):
 67 |     bounding_boxes = torch.zeros((bounds.shape[0], 6), device=device)
 68 |     bounding_boxes[:, :3] = centers - bounds
 69 |     bounding_boxes[:, 3:] = centers + bounds
 70 |     return bounding_boxes
 71 | 
 72 | # go from min_corner, max_corner representation to center, bounds representation
 73 | def to_bb_center_a_bounds(bbs_min_max):
 74 |     bb_centers = (bbs_min_max[:,3:] + bbs_min_max[:,:3]) / 2
 75 |     bb_bounds = bbs_min_max[:,3:] - bb_centers
 76 |     return bb_centers, bb_bounds
 77 | 
 78 | def get_all_bb_corners(bb_centers,bb_bounds):
 79 |     # powerset of all dimensions
 80 |     neg_dims =  [(), (0,), (1,), (2,), (0, 1), (0, 2), (1, 2), (0, 1, 2)]
 81 |     corner_displacements = bb_bounds.expand(len(neg_dims),-1,-1) # (8,num_predictions on all scenes,3)
 82 |     for i, neg_dim in enumerate(neg_dims):
 83 |         corner_displacements[i,:,neg_dim] *= -1
 84 |     eight_cornered_bbs = bb_centers + corner_displacements # (8,num_predictions on all scenes,3)
 85 |     return eight_cornered_bbs
 86 | 
 87 | # works on torch tensors
 88 | def is_within_bb(points, bb_min, bb_max):
 89 |     return torch.all( points >= bb_min, axis=-1) & torch.all( points <= bb_max, axis=-1)
 90 | # numpy version
 91 | def is_within_bb_np(points, bb_min, bb_max):
 92 |     return np.all( points >= bb_min, axis=-1) & np.all( points <= bb_max, axis=-1)
 93 | 
 94 | def convertSecs(sec):
 95 |     seconds = int(sec % 60)
 96 |     minutes = int((sec / 60) % 60)
 97 |     hours = int((sec / (60 * 60)))
 98 |     return hours, minutes, seconds
 99 | 
100 | import random
101 | from collections import defaultdict
102 | 
103 | colors = defaultdict(lambda: [random.random() * 255, random.random() * 255, random.random() * 255])
104 | colors[0] = [0,0,0]
105 | colors[-2] = [255,0,0]
106 | def to_color(arr):
107 |     return np.array([colors[e] for e in arr])
108 | 
109 | def scalar2colors(arr):
110 |     colors = np.zeros((len(arr),3))
111 |     colors[:,1] = arr
112 |     colors *= 255
113 |     return colors
114 | 
115 | def to_worldcoords(vox_coords,scene, cfg):
116 |     return (vox_coords * cfg.voxel_size + min(0, np.min(scene["positions"]))).numpy()
117 | 
118 | # ----------------- map segment ids to dense ranking starting at index 0 (needed by ME global pool function)
119 | # Segment ids can be duplicates: map segment ids to unique ones.
120 | # This means, that every segment in each batch, needs to have a unique batch_id, in order to be pooled
121 | # separately.
122 | 
123 | def to_unique( segments): # enumeration_ids, when we have id arrays, like [0,1,2,..,n]
124 |     unique_segments =  copy.deepcopy(segments)
125 |     # make sure all segments across scenes have unique ids
126 |     for i in range(1, len(unique_segments)):
127 |         unique_segments[i] += np.max(unique_segments[i - 1]) + 1
128 |     unique_segments = np.concatenate(unique_segments, 0)
129 |     _, pooling_ids = np.unique(unique_segments, return_inverse=True)
130 |     return torch.from_numpy(pooling_ids).long()
131 | 
132 | 
133 | # Epoch counts from 0 to N-1
134 | from math import cos, pi
135 | def cosine_lr_after_step(optimizer, base_lr, epoch, start_epoch, total_epochs, clip=1e-6):
136 |     if epoch < start_epoch:
137 |         lr = base_lr
138 |     else:
139 |         lr =  clip + 0.5 * (base_lr - clip) * \
140 |             (1 + cos(pi * ( (epoch - start_epoch) / (total_epochs - start_epoch))))
141 | 
142 |     for param_group in optimizer.param_groups:
143 |         param_group['lr'] = lr


--------------------------------------------------------------------------------
/utils/metric_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | #
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Utility functions for metric evaluation.
  7 | 
  8 | Author: Or Litany and Charles R. Qi
  9 | """
 10 | 
 11 | import torch
 12 | import numpy as np
 13 | 
 14 | 
 15 | # ----------------------------------------
 16 | # Precision and Recall
 17 | # ----------------------------------------
 18 | 
 19 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None):
 20 |     '''
 21 |     Args:
 22 |         labels: (B, N, 6)
 23 |         pred: (B, M, 6)
 24 |         iou_thresh: scalar
 25 |         conf_thresh: scalar
 26 |         label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider.
 27 |         pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider.
 28 |     Returns:
 29 |         TP,FP,FN,Precision,Recall
 30 |     '''
 31 |     # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead
 32 |     # of True/False array as in numpy
 33 |     assert (not torch.is_tensor(label_mask))
 34 |     assert (not torch.is_tensor(pred_mask))
 35 |     TP, FP, FN = 0, 0, 0
 36 |     if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1]))
 37 |     if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1]))
 38 |     for batch_idx in range(labels.shape[0]):
 39 |         TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx, :] == 1, :],
 40 |                                                          pred[batch_idx, pred_mask[batch_idx, :] == 1, :],
 41 |                                                          iou_thresh, conf_thresh)
 42 |         TP += TP_i
 43 |         FP += FP_i
 44 |         FN += FN_i
 45 | 
 46 |     return TP, FP, FN, precision_recall(TP, FP, FN)
 47 | 
 48 | 
 49 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh):
 50 |     """Compute P and R for predicted bounding boxes. Ignores classes!
 51 |     Args:
 52 |         labels: (N x bbox) ground-truth bounding boxes (6 dims)
 53 |         pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification
 54 |     Returns:
 55 |         TP, FP, FN
 56 |     """
 57 | 
 58 |     # for each pred box with high conf (C), compute IoU with all gt boxes.
 59 |     # TP = number of times IoU > th ; FP = C - TP
 60 |     # FN - number of scene objects without good match
 61 | 
 62 |     gt_bboxes = labels[:, :6]
 63 | 
 64 |     num_scene_bboxes = gt_bboxes.shape[0]
 65 |     conf = pred[:, 6]
 66 | 
 67 |     conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6]
 68 |     num_conf_pred_bboxes = conf_pred_bbox.shape[0]
 69 | 
 70 |     # init an array to keep iou between generated and scene bboxes
 71 |     iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes])
 72 |     for g_idx in range(num_conf_pred_bboxes):
 73 |         for s_idx in range(num_scene_bboxes):
 74 |             iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx, :], gt_bboxes[s_idx, :])
 75 | 
 76 |     good_match_arr = (iou_arr >= iou_thresh)
 77 | 
 78 |     TP = good_match_arr.any(axis=1).sum()
 79 |     FP = num_conf_pred_bboxes - TP
 80 |     FN = num_scene_bboxes - good_match_arr.any(axis=0).sum()
 81 | 
 82 |     return TP, FP, FN
 83 | 
 84 | 
 85 | def precision_recall(TP, FP, FN):
 86 |     Prec = 1.0 * TP / (TP + FP) if TP + FP > 0 else 0
 87 |     Rec = 1.0 * TP / (TP + FN)
 88 |     return Prec, Rec
 89 | 
 90 | 
 91 | def calc_iou(box_a, box_b):
 92 |     """Computes IoU of two axis aligned bboxes.
 93 |     Args:
 94 |         box_a, box_b: 6D of center and lengths
 95 |     Returns:
 96 |         iou
 97 |     """
 98 | 
 99 |     max_a = box_a[0:3] + box_a[3:6] / 2
100 |     max_b = box_b[0:3] + box_b[3:6] / 2
101 |     min_max = np.array([max_a, max_b]).min(0)
102 | 
103 |     min_a = box_a[0:3] - box_a[3:6] / 2
104 |     min_b = box_b[0:3] - box_b[3:6] / 2
105 |     max_min = np.array([min_a, min_b]).max(0)
106 |     if not ((min_max > max_min).all()):
107 |         return 0.0
108 | 
109 |     intersection = (min_max - max_min).prod()
110 |     vol_a = box_a[3:6].prod()
111 |     vol_b = box_b[3:6].prod()
112 |     union = vol_a + vol_b - intersection
113 |     return 1.0 * intersection / union
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     print('running some tests')
118 | 
119 |     ############
120 |     ## Test IoU
121 |     ############
122 |     box_a = np.array([0, 0, 0, 1, 1, 1])
123 |     box_b = np.array([0, 0, 0, 2, 2, 2])
124 |     expected_iou = 1.0 / 8
125 |     pred_iou = calc_iou(box_a, box_b)
126 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
127 | 
128 |     box_a = np.array([0, 0, 0, 1, 1, 1])
129 |     box_b = np.array([10, 10, 10, 2, 2, 2])
130 |     expected_iou = 0.0
131 |     pred_iou = calc_iou(box_a, box_b)
132 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
133 | 
134 |     print('IoU test -- PASSED')
135 | 
136 |     #########################
137 |     ## Test Precition Recall
138 |     #########################
139 |     gt_boxes = np.array([[0, 0, 0, 1, 1, 1], [3, 0, 1, 1, 10, 1]])
140 |     detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [3, 0, 1, 1, 10, 1, 0.9]])
141 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
142 |     assert TP == 2 and FP == 0 and FN == 0
143 |     assert precision_recall(TP, FP, FN) == (1, 1)
144 | 
145 |     detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0]])
146 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
147 |     assert TP == 1 and FP == 0 and FN == 1
148 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
149 | 
150 |     detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [-1, -1, 0, 0.1, 0.1, 1, 1.0]])
151 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
152 |     assert TP == 1 and FP == 1 and FN == 1
153 |     assert precision_recall(TP, FP, FN) == (0.5, 0.5)
154 | 
155 |     # wrong box has low confidence
156 |     detected_boxes = np.array([[0, 0, 0, 1, 1, 1, 1.0], [-1, -1, 0, 0.1, 0.1, 1, 0.1]])
157 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
158 |     assert TP == 1 and FP == 0 and FN == 1
159 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
160 | 
161 |     print('Precition Recall test -- PASSED')
162 | 


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/cpp/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.13
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | 
  7 | .PHONY : default_target
  8 | 
  9 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
 10 | .NOTPARALLEL:
 11 | 
 12 | 
 13 | #=============================================================================
 14 | # Special targets provided by cmake.
 15 | 
 16 | # Disable implicit rules so canonical targets will work.
 17 | .SUFFIXES:
 18 | 
 19 | 
 20 | # Remove some rules from gmake that .SUFFIXES does not remove.
 21 | SUFFIXES =
 22 | 
 23 | .SUFFIXES: .hpux_make_needs_suffix_list
 24 | 
 25 | 
 26 | # Suppress display of executed commands.
 27 | $(VERBOSE).SILENT:
 28 | 
 29 | 
 30 | # A target that is always out of date.
 31 | cmake_force:
 32 | 
 33 | .PHONY : cmake_force
 34 | 
 35 | #=============================================================================
 36 | # Set environment variables for the build.
 37 | 
 38 | # The shell in which to execute make rules.
 39 | SHELL = /bin/sh
 40 | 
 41 | # The CMake executable.
 42 | CMAKE_COMMAND = /usr/bin/cmake
 43 | 
 44 | # The command to remove a file.
 45 | RM = /usr/bin/cmake -E remove -f
 46 | 
 47 | # Escaping for special characters.
 48 | EQUALS = =
 49 | 
 50 | # The top-level source directory on which CMake was run.
 51 | CMAKE_SOURCE_DIR = /home/atran/atran/for_webpage/for_webpage2/dataprocessing/oversegmentation/cpp
 52 | 
 53 | # The top-level build directory on which CMake was run.
 54 | CMAKE_BINARY_DIR = $(CMAKE_SOURCE_DIR)
 55 | 
 56 | #=============================================================================
 57 | # Targets provided globally by CMake.
 58 | 
 59 | # Special rule for the target rebuild_cache
 60 | rebuild_cache:
 61 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 62 | 	/usr/bin/cmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 63 | .PHONY : rebuild_cache
 64 | 
 65 | # Special rule for the target rebuild_cache
 66 | rebuild_cache/fast: rebuild_cache
 67 | 
 68 | .PHONY : rebuild_cache/fast
 69 | 
 70 | # Special rule for the target edit_cache
 71 | edit_cache:
 72 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
 73 | 	/usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 74 | .PHONY : edit_cache
 75 | 
 76 | # Special rule for the target edit_cache
 77 | edit_cache/fast: edit_cache
 78 | 
 79 | .PHONY : edit_cache/fast
 80 | 
 81 | # The main all target
 82 | all: cmake_check_build_system
 83 | 	$(CMAKE_COMMAND) -E cmake_progress_start $(CMAKE_SOURCE_DIR)/cpp/CMakeFiles $(CMAKE_SOURCE_DIR)/CMakeFiles/progress.marks
 84 | 	$(MAKE) -f CMakeFiles/Makefile2 all
 85 | 	$(CMAKE_COMMAND) -E cmake_progress_start $(CMAKE_SOURCE_DIR)/CMakeFiles 0
 86 | .PHONY : all
 87 | 
 88 | # The main clean target
 89 | clean:
 90 | 	$(MAKE) -f CMakeFiles/Makefile2 clean
 91 | .PHONY : clean
 92 | 
 93 | # The main clean target
 94 | clean/fast: clean
 95 | 
 96 | .PHONY : clean/fast
 97 | 
 98 | # Prepare targets for installation.
 99 | preinstall: all
100 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
101 | .PHONY : preinstall
102 | 
103 | # Prepare targets for installation.
104 | preinstall/fast:
105 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
106 | .PHONY : preinstall/fast
107 | 
108 | # clear depends
109 | depend:
110 | 	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
111 | .PHONY : depend
112 | 
113 | #=============================================================================
114 | # Target rules for targets named segmentator
115 | 
116 | # Build rule for target.
117 | segmentator: cmake_check_build_system
118 | 	$(MAKE) -f CMakeFiles/Makefile2 segmentator
119 | .PHONY : segmentator
120 | 
121 | # fast build rule for target.
122 | segmentator/fast:
123 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/build
124 | .PHONY : segmentator/fast
125 | 
126 | segmentator.o: segmentator.cpp.o
127 | 
128 | .PHONY : segmentator.o
129 | 
130 | # target to build an object file
131 | segmentator.cpp.o:
132 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.o
133 | .PHONY : segmentator.cpp.o
134 | 
135 | segmentator.i: segmentator.cpp.i
136 | 
137 | .PHONY : segmentator.i
138 | 
139 | # target to preprocess a source file
140 | segmentator.cpp.i:
141 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.i
142 | .PHONY : segmentator.cpp.i
143 | 
144 | segmentator.s: segmentator.cpp.s
145 | 
146 | .PHONY : segmentator.s
147 | 
148 | # target to generate assembly for a file
149 | segmentator.cpp.s:
150 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/segmentator.cpp.s
151 | .PHONY : segmentator.cpp.s
152 | 
153 | tinyply.o: tinyply.cpp.o
154 | 
155 | .PHONY : tinyply.o
156 | 
157 | # target to build an object file
158 | tinyply.cpp.o:
159 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.o
160 | .PHONY : tinyply.cpp.o
161 | 
162 | tinyply.i: tinyply.cpp.i
163 | 
164 | .PHONY : tinyply.i
165 | 
166 | # target to preprocess a source file
167 | tinyply.cpp.i:
168 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.i
169 | .PHONY : tinyply.cpp.i
170 | 
171 | tinyply.s: tinyply.cpp.s
172 | 
173 | .PHONY : tinyply.s
174 | 
175 | # target to generate assembly for a file
176 | tinyply.cpp.s:
177 | 	$(MAKE) -f CMakeFiles/segmentator.dir/build.make CMakeFiles/segmentator.dir/tinyply.cpp.s
178 | .PHONY : tinyply.cpp.s
179 | 
180 | # Help Target
181 | help:
182 | 	@echo "The following are some of the valid targets for this Makefile:"
183 | 	@echo "... all (the default if no target is provided)"
184 | 	@echo "... clean"
185 | 	@echo "... depend"
186 | 	@echo "... rebuild_cache"
187 | 	@echo "... segmentator"
188 | 	@echo "... edit_cache"
189 | 	@echo "... segmentator.o"
190 | 	@echo "... segmentator.i"
191 | 	@echo "... segmentator.s"
192 | 	@echo "... tinyply.o"
193 | 	@echo "... tinyply.i"
194 | 	@echo "... tinyply.s"
195 | .PHONY : help
196 | 
197 | 
198 | 
199 | #=============================================================================
200 | # Special targets to cleanup operation of make.
201 | 
202 | # Special rule to run CMake to check the build system integrity.
203 | # No rule that depends on this can have commands that come from listfiles
204 | # because they might be regenerated.
205 | cmake_check_build_system:
206 | 	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
207 | .PHONY : cmake_check_build_system
208 | 
209 | 


--------------------------------------------------------------------------------
/dataprocessing/prepare_s3dis.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | 
  4 | import numpy as np
  5 | import skimage.io as io
  6 | import open3d as o3d
  7 | import pyviz3d.visualizer as viz
  8 | import os
  9 | import glob
 10 | from sklearn.neighbors import NearestNeighbors
 11 | from scipy.spatial import KDTree
 12 | import natsort
 13 | 
 14 | import configargparse
 15 | 
 16 | parser = configargparse.ArgumentParser()
 17 | parser.add_argument("--scene_id", type=int, default=None,
 18 |                         help="Input the index of a scene to process. Default is None - process all scene")
 19 | 
 20 | parser.add_argument("--data_dir", type=str, default='./data/Stanford3dDataset_v1.2_Aligned_Version/',
 21 |                         help="Path to the original data")
 22 | 
 23 | S3DIS_SEMANTICS_COLORS = np.array (
 24 |     [(174, 199, 232),  # ceiling
 25 |     (152, 223, 138),  # floor
 26 |     (31, 119, 180),   # wall
 27 |     (255, 187, 120),  # column
 28 |     (188, 189, 34),   # beam
 29 |     (140, 86, 75),    # window
 30 |     (255, 152, 150),  # door
 31 |     (214, 39, 40),    # table
 32 |     (197, 176, 213),  # chair
 33 |     (148, 103, 189),  # bookcase
 34 |     (196, 156, 148),  # sofa
 35 |     (23, 190, 207),   # board
 36 |     (178, 76, 76),]   # clutter
 37 | )
 38 | 
 39 | INS_COLORS = np.array ([[np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] for _ in range (1000)])
 40 |     
 41 | def visualize (scene_name, pts, colors, normals, instances, semantics):
 42 |     viz_pth = 'visualize_scene_npy/' + scene_name
 43 |     os.makedirs (viz_pth, exist_ok=True)
 44 |     v = viz.Visualizer()
 45 |     sample_rate = 4
 46 |     
 47 |     normals_start = pts [::sample_rate*3]
 48 |     normals = normals[::sample_rate*3]
 49 |     pts = pts [::sample_rate]
 50 |     colors = colors [::sample_rate]
 51 |     instances = instances [::sample_rate]
 52 |     semantics = semantics [::sample_rate]
 53 |     
 54 |     instances_colors = INS_COLORS [instances]
 55 |     semantics_colors = S3DIS_SEMANTICS_COLORS [semantics]
 56 |         
 57 |     v.add_points ("points", pts , colors, point_size=15, visible=True)
 58 |     v.add_points ("instances", pts , instances_colors, point_size=15, visible=True)
 59 |     v.add_points ("semantics", pts , semantics_colors, point_size=15, visible=True)
 60 |     norm_colors = np.array ([[0,255,0] for _ in range (len (normals))])
 61 |     v.add_lines ("normals", normals_start, normals_start + normals / 15, norm_colors, visible=False)
 62 |     v.save(viz_pth, verbose=False)
 63 |     
 64 |     
 65 | ID2NAME = {0:'ceiling', 1:'floor', 2:'wall', 3:'beam', 4:'column', 5:'window', 6:'door', 7:'table', 8:'chair', 9:'sofa', 10:'bookcase', 11:'board', 12:'clutter'}
 66 | ID2NAME = [ID2NAME [i] for i in range (13)]
 67 | NAME2ID = {}
 68 | for i in range (13):
 69 |     NAME2ID [ID2NAME [i]] = i
 70 |     
 71 | def get_labels (scene_name, scene_data, data_dir):
 72 |     area = scene_name.split ('.') [0]
 73 |     name = scene_name.split ('.') [1]
 74 |     instance_pths = glob.glob (data_dir + '/' + area + '/' + name + '/Annotations/*.txt')
 75 |     
 76 |     scene_pts = scene_data [:,:3] # Scene point cloud 
 77 |     pt_tree = KDTree (scene_pts)
 78 |     
 79 |     error = 0
 80 |     instances = np.zeros ((len (scene_data), 1), dtype=np.int32) - 1
 81 |     semantics = np.zeros ((len (scene_data), 1), dtype=np.float32) - 1
 82 |     
 83 |     # Use nearest neighbor to find corresponding point indexes in the scenes PC of instances
 84 |     for instance_id, pth in enumerate (instance_pths):
 85 |         class_name = pth.split ('/')[-1].split('_')[0]
 86 |         if not (class_name in NAME2ID.keys ()):
 87 |             if class_name == 'stairs':
 88 |                 class_name = 'clutter'
 89 |         semantic_id = NAME2ID [class_name]
 90 |         # Load instance point cloud
 91 |         instance_data = np.loadtxt (pth)
 92 |         instance_pts = instance_data [:, :3]
 93 |         instance_colors = instance_data [:, 3:]
 94 |         # Find corresponding indices in the scene points
 95 |         dist, pt_indexs = pt_tree.query(instance_pts, k=1)
 96 |         instances [pt_indexs] = instance_id
 97 |         semantics [pt_indexs] = semantic_id
 98 |         error += dist.sum ()
 99 |         
100 |     decided = (instances >= 0)[:, 0]
101 |     
102 |     # For some points are not annotated, use the label from nearby points
103 |     pt_tree = KDTree (scene_pts [decided])
104 |     dist, decided_indexs = pt_tree.query(scene_pts [~decided], k=1)
105 |     
106 |     instances [~decided] = instances [decided][decided_indexs]
107 |     semantics [~decided] = semantics [decided][decided_indexs]
108 |     
109 |     assert (instances.min ()) >= 0
110 |     assert (semantics.min ()) >= 0
111 |     
112 |     # Avoiding duplicate instances -> instance ids are contiguous from 0
113 |     remap_id = np.array (range (instances.max () + 1))
114 |     for new_id, old_id in enumerate (np.unique (instances)):
115 |         remap_id [old_id] = new_id
116 |     instances = remap_id [instances].astype (np.float32)
117 |     unique_instances = np.unique (instances)
118 |     
119 |     assert np.all(unique_instances == range(len(unique_instances)))
120 |     
121 |     return instances, semantics
122 | 
123 | def read_scene_txt (name, data_dir):
124 |     area = name.split ('.') [0]
125 |     name = name.split ('.') [1]
126 |     
127 |     pts = np.loadtxt (os.path.join (data_dir + '/' + area, name, name + '.txt'))
128 |     return pts
129 | 
130 | def preprocess_s3dis (data_dir, scene_id):
131 |     scene_list = []
132 |     for i in range (1, 7):
133 |         area = data_dir + '/Area_' + str (i)
134 |         tmp = glob.glob (area + '/*')
135 |         for scene_name in tmp:
136 |             scene_name = scene_name.split ('/')[-2] + '.' + scene_name.split ('/')[-1]
137 |             scene_list.append (scene_name)
138 | 
139 |     scene_list = natsort.natsorted (scene_list)
140 |     
141 |     if scene_id is not None:
142 |         scene_list = scene_list [scene_id:scene_id+1]
143 |     
144 |     for scene_name in scene_list:
145 |         area = scene_name.split ('.') [0]
146 |         name = scene_name.split ('.') [1]
147 |         save_dir = 'data/s3dis/' + area + '/'
148 |         scene_pth = os.path.join (save_dir, name + '.normals.instance.npy')
149 |         
150 |         os.makedirs (save_dir, exist_ok=True)
151 |         
152 |         scene_data = read_scene_txt (scene_name, data_dir)
153 |         instances, semantics = get_labels (scene_name, scene_data, data_dir)
154 |         normals = np.load (data_dir + '/normals/' + scene_name + '.npy')
155 |         data = np.concatenate ([scene_data, normals, semantics, instances], 1)
156 |         
157 |         pts = data [:,:3].astype (np.float32)
158 |         colors = data [:,3:6].astype (np.float32)
159 |         normals = data [:,6:9].astype (np.float32)
160 |         semantics = data [:, -2].astype (np.int32)
161 |         instances = data [:, -1].astype (np.int32)
162 |         
163 |         # visualize (scene_name, pts - pts.mean (0), colors, normals, instances, semantics)
164 |         np.save (scene_pth, data)
165 |         print ("saved ", scene_pth)
166 |         
167 | cfg = parser.parse_args()
168 | preprocess_s3dis (cfg.data_dir, cfg.scene_id)     


--------------------------------------------------------------------------------
/models/iou_nms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | def set_IOUs(boxes_a, boxes):
  5 |     # assert boxes are defined as: (min_corner, max_corner)
  6 |     assert boxes_a.shape[1] == 6 and boxes.shape[1] == 6
  7 |     boxes_a_side_lengths = boxes_a[:, 3:] - boxes_a[:, :3]
  8 |     boxes_side_lengths = boxes[:, 3:] - boxes[:, :3]
  9 |     assert torch.all(boxes_a_side_lengths >= 0) and torch.all(boxes_side_lengths >= 0)
 10 | 
 11 |     intersection_min = torch.maximum(boxes_a[:, :3], boxes[:, :3])
 12 |     intersection_max = torch.minimum(boxes_a[:, 3:], boxes[:, 3:])
 13 | 
 14 |     # no overlap produces negative values, and is cutoff by 0
 15 |     intersection_side_lengths = torch.clamp( intersection_max - intersection_min, min=0)
 16 |     intersection_area = torch.prod(intersection_side_lengths, axis=1)
 17 | 
 18 |     boxes_a_area = torch.prod(boxes_a_side_lengths, axis=1)
 19 |     boxes_area = torch.prod(boxes_side_lengths, axis=1)
 20 | 
 21 |     union_area = boxes_a_area + boxes_area - intersection_area + 0.000001
 22 |     return intersection_area / union_area
 23 | 
 24 | 
 25 | # axis-aligned bounding boxes only
 26 | def torch_IOUs(box, boxes):
 27 |     # assert boxes are defined as: (min_corner, max_corner)
 28 |     assert box.shape[0] == 6 and boxes.shape[1] == 6
 29 | 
 30 |     box_side_lengths = box[3:] - box[:3]
 31 |     boxes_side_lengths = boxes[:, 3:] - boxes[:, :3]
 32 |     assert torch.all(box_side_lengths >= 0) and torch.all(boxes_side_lengths >= 0)
 33 | 
 34 |     intersection_min = torch.maximum(box[:3], boxes[:, :3])
 35 |     intersection_max = torch.minimum(box[3:], boxes[:, 3:])
 36 | 
 37 |     # no overlap produces negative values, and is cutoff by 0
 38 |     intersection_side_lengths = torch.clamp( intersection_max - intersection_min, min=0)
 39 |     intersection_area = torch.prod(intersection_side_lengths, axis=1)
 40 | 
 41 |     box_area = torch.prod(box_side_lengths)
 42 |     boxes_area = torch.prod(boxes_side_lengths, axis=1)
 43 | 
 44 |     union_area = box_area + boxes_area - intersection_area + 0.000001
 45 |     return intersection_area / union_area
 46 | 
 47 | 
 48 | def np_NMS_clustering(boxes, cluster_th=0.5):
 49 |     # boxes should be a list of 3D boxes [box_score, min_corner,max_corner], higher scores for better boxes
 50 |     assert boxes.shape[1] == 7 and len(boxes.shape) == 2
 51 |     assert cluster_th > 0 and cluster_th < 1
 52 |     remaining_boxes_indices = np.argsort(-boxes[:, 0])
 53 |     clusters = []
 54 | 
 55 |     while len(remaining_boxes_indices) > 0:
 56 |         remaining_boxes = boxes[remaining_boxes_indices]
 57 |         # remove score component
 58 |         remaining_boxes = remaining_boxes[:, 1:]
 59 |         ious = IOUs(remaining_boxes[0], remaining_boxes)
 60 |         iou_mask = ious <= cluster_th
 61 | 
 62 |         clusters.append([remaining_boxes_indices[0], remaining_boxes_indices[~iou_mask]])
 63 |         remaining_boxes_indices = remaining_boxes_indices[iou_mask]
 64 | 
 65 |     return clusters
 66 | 
 67 | 
 68 | def NMS_clustering(boxes, cluster_th=0.5, get_heatmaps=True):
 69 |     # boxes should be a list of 3D boxes [box_score, min_corner,max_corner], higher scores for better boxes
 70 |     assert boxes.shape[1] == 7 and len(boxes.shape) == 2
 71 |     assert cluster_th > 0 and cluster_th < 1
 72 |     # boxes should have positive side lengths - otherwise they don't have an area and are invalid
 73 |     boxes_side_lengths = boxes[:, 4:] - boxes[:, 1:4]
 74 |     valid = torch.min(boxes_side_lengths, axis=1)[0] > 0 # (num_boxes)
 75 |     if ~ torch.all(valid):
 76 |         print('Warning: Invalid boxes found.')
 77 | 
 78 |     remaining_boxes_indices = torch.argsort(-boxes[:, 0])
 79 |     # remove score component
 80 |     boxes = boxes[:, 1:]
 81 |     cluster_representant = []
 82 |     clusters = []
 83 |     cluster_heatmaps = []
 84 |     while len(remaining_boxes_indices) > 0:
 85 |         #print(len(remaining_boxes_indices))
 86 |         remaining_boxes = boxes[remaining_boxes_indices]
 87 |         if get_heatmaps:
 88 |             cluster_heatmap = torch_IOUs(remaining_boxes[0], boxes)
 89 |             # manually set iou to 1, even for invalid boxes (side_lengths <=0)
 90 |             cluster_heatmap[remaining_boxes_indices[0]] = 1
 91 |             cluster_heatmaps.append(cluster_heatmap)
 92 |             ious = cluster_heatmap[remaining_boxes_indices]
 93 |         else:
 94 |             ious = torch_IOUs(remaining_boxes[0], remaining_boxes)
 95 |             # manually set iou to 1, even for invalid boxes (side_lengths <=0)
 96 |             ious[0] = 1
 97 |         iou_mask = ious <= cluster_th
 98 |         cluster_representant.append(remaining_boxes_indices[0])
 99 |         clusters.append(remaining_boxes_indices[~iou_mask])
100 |         remaining_boxes_indices = remaining_boxes_indices[iou_mask]
101 | 
102 |     if get_heatmaps:
103 |         return torch.Tensor(cluster_representant).long(), clusters, torch.stack(cluster_heatmaps,0)
104 |     else:
105 |         return torch.Tensor(cluster_representant).long(), clusters
106 | 
107 | 
108 | # input masks: bool (true inside, false outside), shape: (num_masks, num_mask_elements)
109 | def masks_iou(mask, masks, allow_empty = False):
110 |     # empty masks are invalid
111 |     if not allow_empty:
112 |         assert torch.all(torch.sum(masks, axis=1) > 0) and torch.sum(mask) > 0
113 |         intersection = torch.sum(mask & masks, axis=1)
114 |         union = torch.sum(mask | masks, axis=1)
115 |         return intersection / union
116 |     else:
117 |         intersection = torch.sum(mask & masks, axis=1)
118 |         union = torch.sum(mask | masks, axis=1)
119 |         ret = torch.zeros_like(union).float()
120 |         ret[union > 0] = intersection[union > 0] / union[union > 0]
121 |         return ret
122 | 
123 | def mask_iou_np(mask, mask_b):
124 |     # empty masks are invalid
125 |     assert np.sum(mask_b) > 0 and np.sum(mask) > 0
126 |     intersection = np.sum(mask & mask_b)
127 |     union = np.sum(mask | mask_b)
128 |     return intersection / union
129 | 
130 | def mask_NMS(sorted_masks, cluster_th=0.5, allow_empty = False):
131 |     remaining_masks_indices = torch.arange(len(sorted_masks))
132 |     output_masks = []
133 |     suppressed = []
134 |     while len(remaining_masks_indices) > 0:
135 |         remaining_masks = sorted_masks[remaining_masks_indices]
136 |         ious = masks_iou(remaining_masks[0], remaining_masks, allow_empty)
137 |         ious[0] = 1
138 |         iou_mask = ious <= cluster_th
139 | 
140 |         output_masks.append(remaining_masks_indices[0])
141 |         suppressed.append((remaining_masks_indices[0], remaining_masks_indices[~iou_mask]))
142 |         remaining_masks_indices = remaining_masks_indices[iou_mask]
143 | 
144 |     return torch.hstack(output_masks), suppressed
145 | 
146 | def semIOU(pred_label, gt_label):
147 |     IOU = []
148 |     # ignore invalid and unlabeled regions
149 |     valid = gt_label > -100
150 |     gt_label = gt_label[valid]
151 |     pred_label = pred_label[valid]
152 |     scene_labels = torch.unique(torch.cat((gt_label,pred_label)))
153 |     for l in scene_labels:
154 |         intersection = torch.sum((pred_label == l) & (gt_label == l))
155 |         union = torch.sum((pred_label == l) | (gt_label == l))
156 |         IOU.append((intersection / (union + 1e-6)).item())
157 |     return np.array(IOU)
158 | 


--------------------------------------------------------------------------------
/dataprocessing/augmentation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import open3d as o3d
  3 | import random
  4 | import scipy
  5 | import matplotlib
  6 | import albumentations as A
  7 | 
  8 | SCANNET_ELASTIC_DISTORT_PARAMS = ((0.2, 0.4), (0.8, 1.6))
  9 | 
 10 | # mix 3d color augmentation and normalization
 11 | mix3d_albumentations_aug = A.load('dataprocessing/mix3d_albumentations_aug.yaml', data_format="yaml")
 12 | color_mean = (0.47793125906962, 0.4303257521323044, 0.3749598901421883)
 13 | color_std = (0.2834475483823543, 0.27566157565723015, 0.27018971370874995)
 14 | # input colors should be in 0,..,255 because
 15 | # Normalize method applies: img = (img - mean * max_pixel_value) / (std * max_pixel_value)
 16 | color_norm = A.Normalize(mean=color_mean, std=color_std)
 17 | 
 18 | # HUE aug
 19 | hue_aug = A.Compose([
 20 |     A.HueSaturationValue(hue_shift_limit=50, sat_shift_limit=60, val_shift_limit=50, p=1),
 21 | ], p=1)
 22 | 
 23 | def rotate_mesh (mesh, max_xy_angle=np.pi / 100, individual_prob = 1):
 24 |     """ Randomly rotate the point clouds around z-axis (max 360 degree), x-axis and y-axis (max max_xy_angle degree)
 25 |     """
 26 |     random_z_angle = 0
 27 |     random_x_angle = 0
 28 |     random_y_angle = 0
 29 |     if random.random() < individual_prob:
 30 |       random_z_angle = np.random.uniform (0, 2*np.pi)
 31 |     if random.random() < individual_prob:
 32 |       random_x_angle = np.random.uniform (-max_xy_angle, max_xy_angle)
 33 |     if random.random() < individual_prob:
 34 |       random_y_angle = np.random.uniform (-max_xy_angle, max_xy_angle)
 35 |     mesh.rotate(mesh.get_rotation_matrix_from_xyz((random_x_angle,random_y_angle,random_z_angle)))
 36 | 
 37 | 
 38 | def rotate_mesh_90_degree(mesh):
 39 |   """ Randomly rotate the point clouds around z-axis (random angle in 0,90,180,270 degree)
 40 |   """
 41 |   random_z_angle = [0, 0.5* np.pi, np.pi, 1.5 * np.pi][np.random.randint(0,4)]
 42 |   random_x_angle = 0
 43 |   random_y_angle = 0
 44 |   mesh.rotate(mesh.get_rotation_matrix_from_xyz((random_x_angle, random_y_angle, random_z_angle)))
 45 | 
 46 | def scale_mesh (mesh, min_scale=0.9, max_scale=1.1):
 47 |     """ Randomly scale the point cloud with a random scale value between min and max
 48 |     """
 49 |     scale = np.random.uniform (min_scale, max_scale)
 50 |     mesh.scale(scale, center=(0, 0, 0))
 51 | 
 52 | def color_jittering (colors, min=-0.05, max=0.05):
 53 |     """ Randomly jitter color 
 54 |         Input:
 55 |           Nx3 array, original point colors
 56 |         Return:
 57 |           Nx3 array, jittered point colors
 58 |     """
 59 |     jitters = np.random.uniform (min, max, colors.shape)
 60 |     jittered_colors = np.clip(jitters + colors, 0, 1)
 61 |     return jittered_colors
 62 | 
 63 | def random_brightness (colors, brightness_limit=0.2):
 64 |   brighness_aug = A.RandomBrightnessContrast(p=1.0, brightness_limit=brightness_limit, contrast_limit=0.0, always_apply=True)
 65 |   colors = brighness_aug (image=colors.astype (np.float32)) ["image"]
 66 |   return colors 
 67 | 
 68 | def elastic_distortion( coords, granularity, magnitude):
 69 |   """Apply elastic distortion on sparse coordinate space.
 70 |     pointcloud: numpy array of (number of points, at least 3 spatial dims)
 71 |     granularity: size of the noise grid (in same scale[m/cm] as the voxel grid)
 72 |     magnitude: noise multiplier
 73 |   """
 74 |   blurx = np.ones((3, 1, 1, 1)).astype('float32') / 3
 75 |   blury = np.ones((1, 3, 1, 1)).astype('float32') / 3
 76 |   blurz = np.ones((1, 1, 3, 1)).astype('float32') / 3
 77 |   coords_min = coords.min(0)
 78 | 
 79 |   # Create Gaussian noise tensor of the size given by granularity.
 80 |   noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3
 81 |   noise = np.random.randn(*noise_dim, 3).astype(np.float32)
 82 | 
 83 |   # Smoothing.
 84 |   for _ in range(2):
 85 |     noise = scipy.ndimage.filters.convolve(noise, blurx, mode='constant', cval=0)
 86 |     noise = scipy.ndimage.filters.convolve(noise, blury, mode='constant', cval=0)
 87 |     noise = scipy.ndimage.filters.convolve(noise, blurz, mode='constant', cval=0)
 88 | 
 89 |   # Trilinear interpolate noise filters for each spatial dimensions.
 90 |   ax = [
 91 |       np.linspace(d_min, d_max, d)
 92 |       for d_min, d_max, d in zip(coords_min - granularity, coords_min + granularity * (noise_dim - 2), noise_dim)
 93 |   ]
 94 |   interp = scipy.interpolate.RegularGridInterpolator(ax, noise, bounds_error=0, fill_value=0)
 95 |   coords += interp(coords) * magnitude
 96 |   return coords
 97 | 
 98 | 
 99 | class ChromaticTranslation(object):
100 |   """Add random color to the image, input must be an array in [0,1] or a PIL image"""
101 | 
102 |   def __init__(self, trans_range_ratio=0.1):
103 |     """
104 |     trans_range_ratio: ratio of translation i.e. 1.0 * 2 * ratio * rand(-0.5, 0.5)
105 |     """
106 |     self.trans_range_ratio = trans_range_ratio
107 | 
108 |   def __call__(self, feats):
109 |     if random.random() < 0.95:
110 |       tr = (np.random.rand(1, 3) - 0.5) * 1.0 * 2 * self.trans_range_ratio
111 |       feats[:, :3] = np.clip(tr + feats[:, :3], 0, 1)
112 |     return feats
113 |   
114 | class RandomBrightness (object):
115 |   """Randomly modify the brightness of the image"""
116 |   def __init__ (self, factor_range=0.2):
117 |     self.factor_range = factor_range
118 |   
119 |   def __call__ (self, feats):
120 |     hsv = matplotlib.colors.rgb_to_hsv (feats)
121 |     factor_range = self.factor_range
122 |     factor = np.random.uniform (1 - factor_range, 1 + factor_range) 
123 |     hsv [:,2] *= factor
124 |     hsv = np.clip (hsv, 0, 1)
125 |     rgb = matplotlib.colors.hsv_to_rgb (feats)
126 |     return rgb
127 | 
128 | class ChromaticAutoContrast(object):
129 | 
130 |   def __init__(self, randomize_blend_factor=True, blend_factor=0.5):
131 |     self.randomize_blend_factor = randomize_blend_factor
132 |     self.blend_factor = blend_factor
133 | 
134 |   def __call__(self, feats):
135 |     if random.random() < 1.0:
136 |       lo = feats[:, :3].min(0, keepdims=True)
137 |       hi = feats[:, :3].max(0, keepdims=True)
138 |       assert hi.max() <= 1, f"invalid color value. Color is supposed to be [0-1]"
139 | 
140 |       scale = 1.0 / (hi - lo)
141 | 
142 |       contrast_feats = (feats[:, :3] - lo) * scale
143 | 
144 |       blend_factor = random.random() if self.randomize_blend_factor else self.blend_factor
145 |       feats[:, :3] = (1 - blend_factor) * feats + blend_factor * contrast_feats
146 |     return feats
147 | 
148 | def apply_mix3d_color_aug(color):
149 |   color = color * 255 # needs to be in [0,255]
150 |   pseudo_image = color.astype(np.uint8)[np.newaxis, :, :]
151 |   color = np.squeeze(mix3d_albumentations_aug(image=pseudo_image)["image"])
152 | 
153 |   # normalize color information
154 |   pseudo_image = color[np.newaxis, :, :]
155 |   color = np.squeeze(color_norm(image=pseudo_image)["image"])
156 |   return color
157 | 
158 | def apply_hue_aug(color):
159 |     color = color * 255  # needs to be in [0,255]
160 |     pseudo_image = color.astype(np.uint8)[np.newaxis, :, :]
161 |     pseudo_image = hue_aug(image=pseudo_image)["image"]
162 |     pseudo_image = mix3d_albumentations_aug(image=pseudo_image)["image"]
163 |     color = np.squeeze(pseudo_image)
164 | 
165 |     # normalize color information
166 |     pseudo_image = color[np.newaxis, :, :]
167 |     color = np.squeeze(color_norm(image=pseudo_image)["image"])
168 |     return color
169 | 
170 | # Elastic distortion implemented like in HAIS
171 | def HAIS_elastic( x, gran, mag):
172 |     blur0 = np.ones((3, 1, 1)).astype('float32') / 3
173 |     blur1 = np.ones((1, 3, 1)).astype('float32') / 3
174 |     blur2 = np.ones((1, 1, 3)).astype('float32') / 3
175 | 
176 |     bb = np.abs(x).max(0).astype(np.int32)//int(gran) + 3
177 |     noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)]
178 |     noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
179 |     noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
180 |     noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
181 |     noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
182 |     noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
183 |     noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
184 |     ax = [np.linspace(-(b-1)*gran, (b-1)*gran, b) for b in bb]
185 |     interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise]
186 |     def g(x_):
187 |         return np.hstack([i(x_)[:,None] for i in interp])
188 |     return x + g(x) * mag


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Chris Choy (chrischoy@ai.stanford.edu).
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
  4 | # this software and associated documentation files (the "Software"), to deal in
  5 | # the Software without restriction, including without limitation the rights to
  6 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  7 | # of the Software, and to permit persons to whom the Software is furnished to do
  8 | # so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all
 11 | # copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 19 | # SOFTWARE.
 20 | #
 21 | # Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural
 22 | # Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part
 23 | # of the code.
 24 | import os
 25 | from urllib.request import urlretrieve
 26 | import numpy as np
 27 | 
 28 | import torch
 29 | import torch.nn as nn
 30 | from torch.optim import SGD
 31 | 
 32 | try:
 33 |     import open3d as o3d
 34 | except ImportError:
 35 |     raise ImportError("Please install open3d with `pip install open3d`.")
 36 | 
 37 | import MinkowskiEngine as ME
 38 | from MinkowskiEngine.modules.resnet_block import Bottleneck
 39 | 
 40 | def load_file(file_name):
 41 |     pcd = o3d.io.read_point_cloud(file_name)
 42 |     coords = np.array(pcd.points)
 43 |     colors = np.array(pcd.colors)
 44 |     return coords, colors, pcd
 45 | 
 46 | class BasicBlock(nn.Module):
 47 |     expansion = 1
 48 | 
 49 |     def __init__(self,
 50 |                  inplanes,
 51 |                  planes,
 52 |                  stride=1,
 53 |                  dilation=1,
 54 |                  downsample=None,
 55 |                  bn_momentum=0.1,
 56 |                  dimension=-1,
 57 |                  expand_coordinates=False):
 58 |         super(BasicBlock, self).__init__()
 59 |         assert dimension > 0
 60 | 
 61 |         self.conv1 = ME.MinkowskiConvolution(
 62 |             inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, dimension=dimension, expand_coordinates=expand_coordinates)
 63 |         self.norm1 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum)
 64 |         self.conv2 = ME.MinkowskiConvolution(
 65 |             planes, planes, kernel_size=3, stride=1, dilation=dilation, dimension=dimension)
 66 |         self.norm2 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum)
 67 |         self.relu = ME.MinkowskiReLU(inplace=True)
 68 |         self.downsample = downsample
 69 | 
 70 |     def forward(self, x):
 71 |         residual = x
 72 |         out = self.conv1(x)
 73 |         out = self.norm1(out)
 74 |         out = self.relu(out)
 75 |         out = self.conv2(out)
 76 |         out = self.norm2(out)
 77 | 
 78 |         if self.downsample is not None:
 79 |           residual = self.downsample(x)
 80 |         out += residual
 81 |         out = self.relu(out)
 82 | 
 83 |         return out
 84 | 
 85 | 
 86 | class ResNetBase(nn.Module):
 87 |     BLOCK = None
 88 |     LAYERS = ()
 89 |     INIT_DIM = 64
 90 |     PLANES = (64, 128, 256, 512)
 91 | 
 92 |     def __init__(self, in_channels, out_channels, D=3, expand_coordinates=False):
 93 |         nn.Module.__init__(self)
 94 |         self.D = D
 95 |         self.expand_coordinates = expand_coordinates
 96 |         assert self.BLOCK is not None
 97 | 
 98 |         self.network_initialization(in_channels, out_channels, D)
 99 |         self.weight_initialization()
100 | 
101 |     def network_initialization(self, in_channels, out_channels, D):
102 | 
103 |         self.inplanes = self.INIT_DIM
104 |         self.conv1 = nn.Sequential(
105 |             ME.MinkowskiConvolution(
106 |                 in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D
107 |             ),
108 |             ME.MinkowskiInstanceNorm(self.inplanes),
109 |             ME.MinkowskiReLU(inplace=True),
110 |             ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),
111 |         )
112 | 
113 |         self.layer1 = self._make_layer(
114 |             self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2
115 |         )
116 |         self.layer2 = self._make_layer(
117 |             self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2
118 |         )
119 |         self.layer3 = self._make_layer(
120 |             self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2
121 |         )
122 |         self.layer4 = self._make_layer(
123 |             self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2
124 |         )
125 | 
126 |         self.conv5 = nn.Sequential(
127 |             ME.MinkowskiDropout(),
128 |             ME.MinkowskiConvolution(
129 |                 self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D
130 |             ),
131 |             ME.MinkowskiInstanceNorm(self.inplanes),
132 |             ME.MinkowskiGELU(),
133 |         )
134 | 
135 |         self.glob_pool = ME.MinkowskiGlobalMaxPooling()
136 | 
137 |         self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)
138 | 
139 |     def weight_initialization(self):
140 |         for m in self.modules():
141 |             if isinstance(m, ME.MinkowskiConvolution):
142 |                 ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu")
143 | 
144 |             if isinstance(m, ME.MinkowskiBatchNorm):
145 |                 nn.init.constant_(m.bn.weight, 1)
146 |                 nn.init.constant_(m.bn.bias, 0)
147 | 
148 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1, expand_coordinates=False):
149 |         downsample = None
150 |         if stride != 1 or self.inplanes != planes * block.expansion:
151 |             downsample = nn.Sequential(
152 |                 ME.MinkowskiConvolution(
153 |                     self.inplanes,
154 |                     planes * block.expansion,
155 |                     kernel_size=1,
156 |                     stride=stride,
157 |                     dimension=self.D,
158 |                 ),
159 |                 ME.MinkowskiBatchNorm(planes * block.expansion),
160 |             )
161 |         layers = []
162 |         layers.append(
163 |             block(
164 |                 self.inplanes,
165 |                 planes,
166 |                 stride=stride,
167 |                 dilation=dilation,
168 |                 downsample=downsample,
169 |                 dimension=self.D,
170 |                 expand_coordinates=expand_coordinates,
171 |             )
172 |         )
173 |         self.inplanes = planes * block.expansion
174 |         for i in range(1, blocks):
175 |             layers.append(
176 |                 block(
177 |                     self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D, expand_coordinates=expand_coordinates,
178 |                 )
179 |             )
180 | 
181 |         return nn.Sequential(*layers)
182 | 
183 |     def forward(self, x: ME.SparseTensor):
184 |         x = self.conv1(x)
185 |         x = self.layer1(x)
186 |         x = self.layer2(x)
187 |         x = self.layer3(x)
188 |         x = self.layer4(x)
189 |         x = self.conv5(x)
190 |         x = self.glob_pool(x)
191 |         return self.final(x)
192 | 
193 | 
194 | class ResNet14(ResNetBase):
195 |     BLOCK = BasicBlock
196 |     LAYERS = (1, 1, 1, 1)
197 | 
198 | 
199 | class ResNet18(ResNetBase):
200 |     BLOCK = BasicBlock
201 |     LAYERS = (2, 2, 2, 2)
202 | 
203 | 
204 | class ResNet34(ResNetBase):
205 |     BLOCK = BasicBlock
206 |     LAYERS = (3, 4, 6, 3)
207 | 
208 | 
209 | class ResNet50(ResNetBase):
210 |     BLOCK = Bottleneck
211 |     LAYERS = (3, 4, 6, 3)
212 | 
213 | 
214 | class ResNet101(ResNetBase):
215 |     BLOCK = Bottleneck
216 |     LAYERS = (3, 4, 23, 3)
217 | 
218 | 
219 | class ResFieldNetBase(ResNetBase):
220 |     def network_initialization(self, in_channels, out_channels, D):
221 |         field_ch = 32
222 |         field_ch2 = 64
223 |         self.field_network = nn.Sequential(
224 |             ME.MinkowskiSinusoidal(in_channels, field_ch),
225 |             ME.MinkowskiBatchNorm(field_ch),
226 |             ME.MinkowskiReLU(inplace=True),
227 |             ME.MinkowskiLinear(field_ch, field_ch),
228 |             ME.MinkowskiBatchNorm(field_ch),
229 |             ME.MinkowskiReLU(inplace=True),
230 |             ME.MinkowskiToSparseTensor(),
231 |         )
232 |         self.field_network2 = nn.Sequential(
233 |             ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),
234 |             ME.MinkowskiBatchNorm(field_ch2),
235 |             ME.MinkowskiReLU(inplace=True),
236 |             ME.MinkowskiLinear(field_ch2, field_ch2),
237 |             ME.MinkowskiBatchNorm(field_ch2),
238 |             ME.MinkowskiReLU(inplace=True),
239 |             ME.MinkowskiToSparseTensor(),
240 |         )
241 | 
242 |         ResNetBase.network_initialization(self, field_ch2, out_channels, D)
243 | 
244 |     def forward(self, x: ME.TensorField):
245 |         otensor = self.field_network(x)
246 |         otensor2 = self.field_network2(otensor.cat_slice(x))
247 |         return ResNetBase.forward(self, otensor2)
248 | 
249 | 
250 | class ResFieldNet14(ResFieldNetBase):
251 |     BLOCK = BasicBlock
252 |     LAYERS = (1, 1, 1, 1)
253 | 
254 | 
255 | class ResFieldNet18(ResFieldNetBase):
256 |     BLOCK = BasicBlock
257 |     LAYERS = (2, 2, 2, 2)
258 | 
259 | 
260 | class ResFieldNet34(ResFieldNetBase):
261 |     BLOCK = BasicBlock
262 |     LAYERS = (3, 4, 6, 3)
263 | 
264 | 
265 | class ResFieldNet50(ResFieldNetBase):
266 |     BLOCK = Bottleneck
267 |     LAYERS = (3, 4, 6, 3)
268 | 
269 | 
270 | class ResFieldNet101(ResFieldNetBase):
271 |     BLOCK = Bottleneck
272 |     LAYERS = (3, 4, 23, 3)
273 | 
274 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Box2Mask
  2 | 
  3 | > [Julian Chibane](http://virtualhumans.mpi-inf.mpg.de/people/Chibane.html), 
  4 | > [Francis Engelmann](https://francisengelmann.github.io/),
  5 | > [Tuan Anh Tran](https://scholar.google.com/citations?user=5-0hLggAAAAJ&hl=en),
  6 | > [Gerard Pons-Moll](http://virtualhumans.mpi-inf.mpg.de/people/pons-moll.html) <br />
  7 | > Box2Mask: Weakly Supervised 3D Semantic Instance Segmentation Using Bounding Boxes <br />
  8 | > In European Conference on Computer Vision (ECCV), 2022
  9 | 
 10 | ![Teaser](teaser.jpeg)
 11 | 
 12 | [Paper](http://virtualhumans.mpi-inf.mpg.de/papers/chibane22Box2Mask/Chibane_Box2Mask.pdf) - 
 13 | [Video](https://virtualhumans.mpi-inf.mpg.de/box2mask/#b2m_video) -
 14 | [Project Website](https://virtualhumans.mpi-inf.mpg.de/box2mask/) -
 15 | [Arxiv](https://arxiv.org/abs/2206.01203) -
 16 | [Citation (Bibtex)](https://virtualhumans.mpi-inf.mpg.de/box2mask/#cite)
 17 | 
 18 | ## Installations
 19 | 
 20 | A linux system with python environment manager [conda](https://www.anaconda.com/) is required for the project.
 21 | Follow the instructions [here](docs/installation.md) to setup the environment
 22 | 
 23 | ## Data Setup
 24 | 
 25 | **Scannet**: Download the [Scannet](http://www.scan-net.org/) dataset here. 
 26 | Download the preprocessed ground truth data ([gt_instance_data_txt.tar.gz](https://datasets.d2.mpi-inf.mpg.de/box2mask/gt_instance_data_txt.tar.gz)) and extract it to `data/scannet/`
 27 | Each scene is stored with the name of format `scene%04d_%02d` (see [Scannet](https://raw.githubusercontent.com/ScanNet/ScanNet/master/README.md)).
 28 | The data should be organized as follows for our project. 
 29 | ```
 30 | box2mask/data/scannet/
 31 | └── scans/                        # contains 1513 train/valid scences
 32 |     ├── scene0383_02/             # each scene has the name in format `scene%04d_%02d`
 33 |         ├── scene0383_02_vh_clean.ply
 34 |         ├── scene0383_02.sens
 35 |         ├── scene0383_02_vh_clean_2.0.010000.segs.json
 36 |         ├── scene0383_02.aggregation.json, _vh_clean.aggregation.json
 37 |         ├── scene0383_02_vh_clean_2.0.010000.segs.json, _vh_clean.segs.json
 38 |         ├── scene0383_02_vh_clean_2.labels.ply
 39 |         ├── scene0383_02_2d-label.zip
 40 |         ├── scene0383_02_2d-instance.zip
 41 |         ├── scene0383_02_2d-label-filt.zip  
 42 |         ├── scene0383_02_2d-instance-filt.zip
 43 |     ├── scene0515_02/
 44 |     ├── scene0643_00/
 45 |     ...
 46 | └── scans_test/                   # contains 100 test scenes
 47 |     ├── scene0731_00/
 48 |         ├── scene0731_00.sens
 49 |         ├── scene0731_00.txt
 50 |         ├── scene0731_00_vh_clean_2.ply
 51 |         ├── scene0731_00_vh_clean.ply
 52 |     ├── scene0739_00/
 53 |     ├── scene0747_00/
 54 |     ...
 55 | └── scannetv2_official_split.npz  # contains data splits info
 56 | └── gt_instance_data_txt/         # contains GT segmentations as txt files
 57 |     ├── scene0383_02.txt
 58 |     ├── scene0643_00.txt
 59 |     ...
 60 | ```
 61 | 
 62 | **Arkit**: See [Arkitscenes instruction](docs/arkitscenes.md).
 63 | 
 64 | **S3DIS**: See [S3DIS instruction](docs/s3dis.md).
 65 | 
 66 | 
 67 | ## Quick Start with Pretrained Model
 68 | 
 69 | We provide a pretrained checkpoint for a quick start with the method.
 70 | First, from the folder where you clone the project, run the following command to download the pretrained checkpoint:
 71 | ```bash
 72 | cd box2mask # Navigate to the root folder
 73 | mkdir -p experiments/scannet/checkpoints/
 74 | cd experiments/scannet/checkpoints/
 75 | wget https://datasets.d2.mpi-inf.mpg.de/box2mask/checkpoint_101h:54m:35s_366875.3242661953.tar
 76 | cd ../../../
 77 | ```
 78 | 
 79 | Next, to predict a scene in the train set or test set, run the prediction from the project home folder:
 80 | ```bash
 81 | python models/evaluation.py --config configs/scannet.txt --predict_specific_scene scene0293_00
 82 | ```
 83 | where `--predict_specific_scene` specifies the name of the scene that will be processed. 
 84 | The result of the prediction is saved in `experiments/scannet/results/checkpoint_101h:54m:35s_366875.3242661953/viz/scene0293_00/` as `pred_instances.ply` and `pred_semantics.ply`.
 85 | To visualize the result using interactive web server, run:
 86 | ```bash
 87 | cd experiments/scannet/results/checkpoint_101h:54m:35s_366875.3242661953/viz/scene0293_00/
 88 | python -m http.server 6008
 89 | ```
 90 | and follow the on-screen instructions.
 91 | 
 92 | ## Training
 93 | 
 94 | Start to train a model with a specific configuration file using:
 95 | 
 96 | ```bash
 97 | python models/training.py --config configs/scannet.txt
 98 | ```
 99 | The command above will train with the Scannet dataset.
100 | You can use a different config file to train with a different dataset.
101 | To train with the Arkitscenes dataset use `configs/arkitscenes.txt` or to train with the S3DIS dataset with area 1 as the validation set use `config/s3dis_fold1` (the data needs to be setup first, see Sec. 'Data Setup')
102 | > Note: The above configuration uses a batch of 8 scenes, which assumes ~48GB GPURAM. 
103 | > RAM usage can be decreased via a smaller batch size, see parameter `--batch_size`.
104 | 
105 | ## Prediction and visualization
106 | 
107 | The following command makes a prediction for the validation set and computes the validation score (reproducing the results from table 1 in our paper).
108 | ```bash
109 | python models/evaluation.py --config configs/scannet.txt --fixed_seed 10
110 | ```
111 | To visualize the prediction for the validation set, add option `--produce_visualizations [scene_name]` to the above command, where `[scene_name]` is the name of the scene (eg. `scene0293_00` in Scannet or `6667847` in Arkitscenes or `Area_5.office_13` in S3DIS).
112 | The visualization files will be stored in `./experiments/[config_name]/results/[checkpoint]/viz/` where  `[checkpoint]` is the name of the checkpoint used for prediction and `[config_name]` is the name of the config (`scannet` in this example).
113 | The interactive visualization server can be started using the command bellow.
114 | ```bash
115 | cd ./experiments/scannet/results/[checkpoint]
116 | python -m http.server 6008
117 | ```
118 | Follow the on-screen instructions to find the visualizations in your browser.
119 | 
120 | 
121 | ## Prediction on the ScanNet test set
122 | The oversegmentations of scannet test scenes are needed for our project. Oversegmentations are already included for the train and validation scenes. 
123 | For test scenes, see the [instruction](dataprocessing/oversegmentation/README.md) to compile the oversegmentation program.
124 | 
125 | Next, the following script will produce the oversegmentations for test scenes. The oversegmentations results will be stored at `./data/scannet/scans_test_segmented`
126 | ```bash
127 | mkdirs -p ./data/scannet/scans_test_segmented
128 | cd dataprocessing/oversegmentation/
129 | python run_segmentator.py
130 | ```
131 | 
132 | To run the ScanNet evaluation on the test set, we need to add the parameter `--submission_write_out_testset`.
133 | Without this parameter the validation set is evaluated as seen in the previous section.
134 | ```bash
135 | python models/evaluation.py --config configs/scannet.txt --submission_write_out_testset --fixed_seed 100
136 | ```
137 | 
138 | Resulting predictions files will be stored in `./experiments/scannet/results/[checkpoint]`.
139 | Our results are formatted into Scannet submission format ([see documentation](https://kaldir.vc.in.tum.de/scannet_benchmark/documentation)). 
140 | `--fixed_seed` specifies a seed for test time augmentation.
141 | Results can be visualized interactively, in the same fashion as shown in the previous section.
142 | ## Augmented Data
143 | 
144 | This [instruction](data/augmented_BBs/README.md) shows how to reproduce the augmented bounding box labels experiments and how to get the data.
145 | 
146 | ## Arkitscenes Data
147 | 
148 | See [instruction](docs/arkitscenes.md) to reproduce the results of arkitscenes.
149 | 
150 | ## S3DIS Data
151 | 
152 | See [instruction](docs/s3dis.md) to reproduce the results of S3DIS.
153 | 
154 | ## Code structure
155 | 
156 | The code structure can be found [here](docs/code_structure.md).
157 | 
158 | 
159 | ## License
160 | Copyright (c) 2022 Julian Chibane, Max-Planck-Gesellschaft
161 | 
162 | By using this code you agree to the terms in the LICENSE.
163 | 
164 | Moreover, you agree to cite the `Box2Mask: Weakly Supervised 3D Semantic Instance Segmentation Using Bounding Boxes` paper in 
165 | any documents that report on research using this software or the manuscript.
166 | 
167 | 
168 | <details>
169 |   <summary> Show LICENSE (click to expand) </summary>
170 | Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use this software and associated documentation files (the "Software").
171 | 
172 | The authors hereby grant you a non-exclusive, non-transferable, free of charge right to copy, modify, merge, publish, distribute, and sublicense the Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects.
173 | 
174 | Any other use, in particular any use for commercial purposes, is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artefacts for commercial purposes.
175 | For commercial inquiries, please see above contact information.
176 | 
177 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
178 | 
179 | You understand and agree that the authors are under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Software. The authors nevertheless reserve the right to update, modify, or discontinue the Software at any time.
180 | 
181 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
182 | 
183 | </details>
184 | 
185 | 


--------------------------------------------------------------------------------
/utils/evaluate_detections.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | #
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Generic Code for Object Detection Evaluation
  7 | 
  8 |     Input:
  9 |     For each class:
 10 |         For each image:
 11 |             Predictions: box, score
 12 |             Groundtruths: box
 13 | 
 14 |     Output:
 15 |     For each class:
 16 |         precision-recal and average precision
 17 | 
 18 |     Author: Charles R. Qi
 19 | 
 20 |     Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py
 21 | """
 22 | import numpy as np
 23 | from multiprocessing import Pool
 24 | from utils.metric_util import calc_iou  # axis-aligned 3D box IoU
 25 | from utils.box_util import box3d_iou
 26 | 
 27 | 
 28 | def voc_ap(rec, prec, use_07_metric=False):
 29 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 30 |     Compute VOC AP given precision and recall.
 31 |     If use_07_metric is true, uses the
 32 |     VOC 07 11 point method (default:False).
 33 |     """
 34 |     if use_07_metric:
 35 |         # 11 point metric
 36 |         ap = 0.
 37 |         for t in np.arange(0., 1.1, 0.1):
 38 |             if np.sum(rec >= t) == 0:
 39 |                 p = 0
 40 |             else:
 41 |                 p = np.max(prec[rec >= t])
 42 |             ap = ap + p / 11.
 43 |     else:
 44 |         # correct AP calculation
 45 |         # first append sentinel values at the end
 46 |         mrec = np.concatenate(([0.], rec, [1.]))
 47 |         mpre = np.concatenate(([0.], prec, [0.]))
 48 | 
 49 |         # compute the precision envelope
 50 |         for i in range(mpre.size - 1, 0, -1):
 51 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 52 | 
 53 |         # to calculate area under PR curve, look for points
 54 |         # where X axis (recall) changes value
 55 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 56 | 
 57 |         # and sum (\Delta recall) * prec
 58 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 59 |     return ap
 60 | 
 61 | 
 62 | def get_iou(bb1, bb2):
 63 |     """ Compute IoU of two bounding boxes.
 64 |         ** Define your bod IoU function HERE **
 65 |     """
 66 |     # pass
 67 |     iou3d = calc_iou(bb1, bb2)
 68 |     return iou3d
 69 | 
 70 | 
 71 | def get_iou_obb(bb1, bb2):
 72 |     iou3d, iou2d = box3d_iou(bb1, bb2)
 73 |     return iou3d
 74 | 
 75 | 
 76 | def get_iou_main(get_iou_func, args):
 77 |     return get_iou_func(*args)
 78 | 
 79 | 
 80 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
 81 |     """ Generic functions to compute precision/recall for object detection
 82 |         for a single class.
 83 |         Input:
 84 |             pred: map of {img_id: [(bbox, score)]} where bbox is numpy array
 85 |             gt: map of {img_id: [bbox]}
 86 |             ovthresh: scalar, iou threshold
 87 |             use_07_metric: bool, if True use VOC07 11 point method
 88 |         Output:
 89 |             rec: numpy array of length nd
 90 |             prec: numpy array of length nd
 91 |             ap: scalar, average precision
 92 |     """
 93 | 
 94 |     # construct gt objects
 95 |     class_recs = {}  # {img_id: {'bbox': bbox list, 'det': matched list}}
 96 |     npos = 0
 97 |     for img_id in gt.keys():
 98 |         bbox = np.array(gt[img_id])
 99 |         det = [False] * len(bbox)
100 |         npos += len(bbox)
101 |         class_recs[img_id] = {'bbox': bbox, 'det': det}
102 |     # pad empty list to all other imgids
103 |     for img_id in pred.keys():
104 |         if img_id not in gt:
105 |             class_recs[img_id] = {'bbox': np.array([]), 'det': []}
106 | 
107 |     # construct dets
108 |     image_ids = []
109 |     confidence = []
110 |     BB = []
111 |     for img_id in pred.keys():
112 |         for box, score in pred[img_id]:
113 |             image_ids.append(img_id)
114 |             confidence.append(score)
115 |             BB.append(box)
116 |     confidence = np.array(confidence)
117 | 
118 |     BB = np.array(BB) 
119 | 
120 |     # sort by confidence
121 |     sorted_ind = np.argsort(-confidence)
122 |     sorted_scores = np.sort(-confidence)
123 |     BB = BB[sorted_ind, ...]
124 |     image_ids = [image_ids[x] for x in sorted_ind]
125 | 
126 |     # go down dets and mark TPs and FPs
127 |     nd = len(image_ids)
128 |     tp = np.zeros(nd)
129 |     fp = np.zeros(nd)
130 |     for d in range(nd):
131 |         R = class_recs[image_ids[d]]
132 |         try:
133 |             bb = BB[d, ...].astype(float)
134 |         except:
135 |             bb = BB[d, ...].tolist().astype(float)
136 |         ovmax = -np.inf
137 |         BBGT = R['bbox'].astype(float)
138 | 
139 |         if BBGT.size > 0:
140 |             # compute overlaps
141 |             for j in range(BBGT.shape[0]):
142 |                 iou = get_iou_main(get_iou_func, (bb, BBGT[j, ...]))
143 |                 if iou > ovmax:
144 |                     ovmax = iou
145 |                     jmax = j
146 | 
147 |         if ovmax > ovthresh:
148 |             if not R['det'][jmax]:
149 |                 tp[d] = 1.
150 |                 R['det'][jmax] = 1
151 |             else:
152 |                 fp[d] = 1.
153 |         else:
154 |             fp[d] = 1.
155 | 
156 |     # compute precision recall
157 |     fp = np.cumsum(fp)
158 |     tp = np.cumsum(tp)
159 |     rec = tp / float(npos)
160 |     # avoid divide by zero in case the first detection matches a difficult
161 |     # ground truth
162 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
163 |     ap = voc_ap(rec, prec, use_07_metric)
164 | 
165 |     return rec, prec, ap
166 | 
167 | 
168 | def eval_det_cls_wrapper(arguments):
169 |     pred, gt, ovthresh, use_07_metric, get_iou_func = arguments
170 |     rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func)
171 |     return (rec, prec, ap)
172 | 
173 | 
174 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
175 |     """ Generic functions to compute precision/recall for object detection
176 |         for multiple classes.
177 |         Input:
178 |             pred_all: map of {img_id: [(classname, bbox, score)]}
179 |             gt_all: map of {img_id: [(classname, bbox)]}
180 |             ovthresh: scalar, iou threshold
181 |             use_07_metric: bool, if true use VOC07 11 point method
182 |         Output:
183 |             rec: {classname: rec}
184 |             prec: {classname: prec_all}
185 |             ap: {classname: scalar}
186 |     """
187 |     pred = {}  # map {classname: pred}
188 |     gt = {}  # map {classname: gt}
189 |     for img_id in pred_all.keys():
190 |         for classname, bbox, score in pred_all[img_id]:
191 |             if classname not in pred: pred[classname] = {}
192 |             if img_id not in pred[classname]:
193 |                 pred[classname][img_id] = []
194 |             if classname not in gt:
195 |                 gt[classname] = {}
196 |             if img_id not in gt[classname]:
197 |                 gt[classname][img_id] = []
198 |             pred[classname][img_id].append((bbox, score))
199 |     for img_id in gt_all.keys():
200 |         for classname, bbox in gt_all[img_id]:
201 |             if classname == 22:
202 |                 print(img_id, classname)
203 |             if classname not in gt:
204 |                 gt[classname] = {}
205 |             if img_id not in gt[classname]:
206 |                 gt[classname][img_id] = []
207 |             gt[classname][img_id].append(bbox)
208 | 
209 |     rec = {}
210 |     prec = {}
211 |     ap = {}
212 |     for classname in gt.keys():
213 |         try:
214 |             print('Computing AP for class: ', classname)
215 |             rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh,
216 |                                                                           use_07_metric, get_iou_func)
217 |             print(classname, ap[classname])
218 |         except KeyError as exception:
219 |             print('KeyError:', exception)
220 |     return rec, prec, ap
221 | 
222 | 
223 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
224 |     """ Generic functions to compute precision/recall for object detection
225 |         for multiple classes.
226 |         Input:
227 |             pred_all: map of {img_id: [(classname, bbox, score)]}
228 |             gt_all: map of {img_id: [(classname, bbox)]}
229 |             ovthresh: scalar, iou threshold
230 |             use_07_metric: bool, if true use VOC07 11 point method
231 |         Output:
232 |             rec: {classname: rec}
233 |             prec: {classname: prec_all}
234 |             ap: {classname: scalar}
235 |     """
236 |     pred = {}  # map {classname: pred}
237 |     gt = {}  # map {classname: gt}
238 |     for img_id in pred_all.keys():
239 |         for classname, bbox, score in pred_all[img_id]:
240 |             if classname not in pred:
241 |                 pred[classname] = {}
242 |             if img_id not in pred[classname]:
243 |                 pred[classname][img_id] = []
244 |             if classname not in gt:
245 |                 gt[classname] = {}
246 |             if img_id not in gt[classname]:
247 |                 gt[classname][img_id] = []
248 |             pred[classname][img_id].append((bbox, score))
249 |     for img_id in gt_all.keys():
250 |         for classname, bbox in gt_all[img_id]:
251 |             if classname not in gt:
252 |                 gt[classname] = {}
253 |             if img_id not in gt[classname]:
254 |                 gt[classname][img_id] = []
255 |             gt[classname][img_id].append(bbox)
256 | 
257 |     rec = {}
258 |     prec = {}
259 |     ap = {}
260 |     p = Pool(processes=10)
261 |     ret_values = p.map(eval_det_cls_wrapper,
262 |                        [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in
263 |                         gt.keys() if classname in pred])
264 |     p.close()
265 |     for i, classname in enumerate(gt.keys()):
266 |         if classname in pred:
267 |             rec[classname], prec[classname], ap[classname] = ret_values[i]
268 |         else:
269 |             rec[classname] = 0
270 |             prec[classname] = 0
271 |             ap[classname] = 0
272 |         print(classname, ap[classname])
273 | 
274 |     return rec, prec, ap
275 | 
276 | 
277 | if __name__ == "__main__":
278 |     classname = 'chair'
279 |     bbox = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0])
280 |     score = 0.9
281 |     pred_all = {'01': [[classname, bbox, score]]}
282 |     gt_all = {'01': [[classname, bbox]]}
283 | 
284 | 
285 |     rec, prec, ap = eval_det(pred_all, gt_all,
286 |                                              ovthresh=0.25,
287 |                                              use_07_metric=False,
288 |                                              get_iou_func=get_iou_obb)
289 | 
290 | 


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/cpp/segmentator.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <cmath>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include <vector>
  6 | #include <unordered_set>
  7 | 
  8 | #define TINYOBJLOADER_IMPLEMENTATION
  9 | #include "tiny_obj_loader.h"
 10 | #include "tinyply.h"
 11 | 
 12 | using std::vector;
 13 | using std::string;
 14 | 
 15 | // felzenswalb segmentation (https://cs.brown.edu/~pff/segment/index.html)
 16 | 
 17 | // disjoint-set forests using union-by-rank and path compression (sort of).
 18 | typedef struct {
 19 |   int rank;
 20 |   int p;
 21 |   int size;
 22 | } uni_elt;
 23 | 
 24 | class universe {
 25 |  public:
 26 |   universe(int elements) {
 27 |     elts = new uni_elt[elements];
 28 |     num = elements;
 29 |     for (int i = 0; i < elements; i++) {
 30 |       elts[i].rank = 0;
 31 |       elts[i].size = 1;
 32 |       elts[i].p = i;
 33 |     }
 34 |   }
 35 |   ~universe() { delete [] elts; }
 36 |   int find(int x) {
 37 |     int y = x;
 38 |     while (y != elts[y].p)
 39 |       y = elts[y].p;
 40 |     elts[x].p = y;
 41 |     return y;
 42 |   }
 43 |   void join(int x, int y) {
 44 |     if (elts[x].rank > elts[y].rank) {
 45 |       elts[y].p = x;
 46 |       elts[x].size += elts[y].size;
 47 |     } else {
 48 |       elts[x].p = y;
 49 |       elts[y].size += elts[x].size;
 50 |       if (elts[x].rank == elts[y].rank)
 51 |         elts[y].rank++;
 52 |     }
 53 |     num--;
 54 |   }
 55 |   int size(int x) const { return elts[x].size; }
 56 |   int num_sets() const { return num; }
 57 |  private:
 58 |   uni_elt *elts;
 59 |   int num;
 60 | };
 61 | 
 62 | typedef struct {
 63 |   float w;
 64 |   int a, b;
 65 | } edge;
 66 | 
 67 | bool operator<(const edge &a, const edge &b) {
 68 |   return a.w < b.w;
 69 | }
 70 | 
 71 | universe *segment_graph(int num_vertices, int num_edges, edge *edges, float c) {
 72 |   std::sort(edges, edges + num_edges);  // sort edges by weight
 73 |   universe *u = new universe(num_vertices);  // make a disjoint-set forest
 74 |   float *threshold = new float[num_vertices];
 75 |   for (int i = 0; i < num_vertices; i++) { threshold[i] = c; }
 76 |   // for each edge, in non-decreasing weight order
 77 |   for (int i = 0; i < num_edges; i++) {
 78 |     edge *pedge = &edges[i];
 79 |     // components conected by this edge
 80 |     int a = u->find(pedge->a);
 81 |     int b = u->find(pedge->b);
 82 |     if (a != b) {
 83 |       if ((pedge->w <= threshold[a]) && (pedge->w <= threshold[b])) {
 84 |         u->join(a, b);
 85 |         a = u->find(a);
 86 |         threshold[a] = pedge->w + (c / u->size(a));
 87 |       }
 88 |     }
 89 |   }
 90 |   delete [] threshold;
 91 |   return u;
 92 | }
 93 | 
 94 | // simple vec3f class
 95 | class vec3f {
 96 |  public:
 97 |   float x, y, z;
 98 |   vec3f() { x = 0; y = 0; z = 0; }
 99 |   vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
100 |   vec3f operator+(const vec3f& o) {
101 |     return vec3f{x+o.x, y+o.y, z+o.z};
102 |   }
103 |   vec3f operator-(const vec3f& o) {
104 |     return vec3f{x-o.x, y-o.y, z-o.z};
105 |   }
106 | };
107 | vec3f cross(const vec3f& u, const vec3f& v) {
108 |   vec3f c = {u.y*v.z - u.z*v.y, u.z*v.x - u.x*v.z, u.x*v.y - u.y*v.x};
109 |   float n = sqrtf(c.x*c.x + c.y*c.y + c.z*c.z);
110 |   c.x /= n;  c.y /= n;  c.z /= n;
111 |   return c;
112 | }
113 | vec3f lerp(const vec3f& a, const vec3f& b, const float v) {
114 |   const float u = 1.0f-v;
115 |   return vec3f(v*b.x + u*a.x, v*b.y + u*a.y, v*b.z + u*a.z);
116 | }
117 | 
118 | inline bool ends_with(const std::string & value, const std::string& ending) {
119 |   if (ending.size() > value.size()) { return false; }
120 |   return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
121 | }
122 | 
123 | vector<int> segment(const string& meshFile, const float kthr, const int segMinVerts) {
124 |   //std::cout << "Loading mesh " << meshFile << std::endl;
125 |   vector<float> verts;
126 |   vector<uint32_t> faces;
127 |   size_t vertexCount = 0;
128 |   size_t faceCount = 0;
129 | 
130 |   if (ends_with(meshFile, ".ply") || ends_with(meshFile, ".PLY")) {
131 |     // Load the geometry from .ply
132 |     std::ifstream ss(meshFile, std::ios::binary);
133 |     tinyply::PlyFile file(ss);
134 |     vertexCount = file.request_properties_from_element("vertex", { "x", "y", "z" }, verts);
135 |     // Try getting vertex_indices or vertex_index
136 |     faceCount = file.request_properties_from_element("face", { "vertex_indices" }, faces, 3);
137 |     if (faceCount == 0) {
138 |       faceCount = file.request_properties_from_element("face", { "vertex_index" }, faces, 3);
139 |     }
140 |     file.read(ss);
141 |   } else if (ends_with(meshFile, ".obj") || ends_with(meshFile, ".OBJ")) {
142 |     // Load the geometry from .obj
143 |     tinyobj::attrib_t attrib;
144 |     vector<tinyobj::shape_t> shapes;
145 |     vector<tinyobj::material_t> materials;
146 |     string err;
147 |     bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &err, meshFile.c_str(), NULL, false);
148 |     if (!err.empty()) { // `err` may contain warning message.
149 |       std::cerr << err << std::endl;
150 |     }
151 |     if (!ret) {
152 |       exit(1);
153 |     }
154 |     if (shapes.size() > 1) {
155 |       std::cerr << "Warning: only single mesh OBJ supported, segmenting first mesh" << std::endl;
156 |     }
157 | 
158 |     // Keep with original vertices (we don't want them duplicated)
159 |     vertexCount = attrib.vertices.size() / 3;
160 |     for (size_t v = 0; v < attrib.vertices.size(); v++) {
161 |       verts.push_back(attrib.vertices[v]);
162 |     }
163 | 
164 |     const auto& mesh = shapes[0].mesh;
165 |     faceCount = mesh.num_face_vertices.size();
166 |     for (size_t f = 0; f < faceCount; f++) {
167 |       for (size_t v = 0; v < 3; v++) {
168 |         const size_t idx = mesh.indices[3 * f + v].vertex_index;
169 |         faces.push_back(idx);
170 |       }
171 |     }
172 |   }
173 | 
174 |   printf("Read mesh with vertexCount %lu %lu, faceCount %lu %lu\n",
175 |     vertexCount, verts.size(), faceCount, faces.size());
176 | 
177 |   // create points, normals, edges, counts vectors
178 |   vector<vec3f> points(vertexCount);
179 |   vector<vec3f> normals(vertexCount);
180 |   vector<int> counts(verts.size(), 0);
181 |   const size_t edgesCount = faceCount*3;
182 |   edge* edges = new edge[edgesCount];
183 | 
184 |   // Compute face normals and smooth into vertex normals
185 |   for (int i = 0; i < faceCount; i++) {
186 |     const int fbase = 3*i;
187 |     const uint32_t i1 = faces[fbase];
188 |     const uint32_t i2 = faces[fbase+1];
189 |     const uint32_t i3 = faces[fbase+2];
190 |     int vbase = 3*i1;
191 |     vec3f p1(verts[vbase], verts[vbase+1], verts[vbase+2]);
192 |     vbase = 3*i2;
193 |     vec3f p2(verts[vbase], verts[vbase+1], verts[vbase+2]);
194 |     vbase = 3*i3;
195 |     vec3f p3(verts[vbase], verts[vbase+1], verts[vbase+2]);
196 |     points[i1] = p1;  points[i2] = p2;  points[i3] = p3;
197 |     const int ebase = 3*i;
198 |     edges[ebase  ].a = i1;  edges[ebase  ].b = i2;
199 |     edges[ebase+1].a = i1;  edges[ebase+1].b = i3;
200 |     edges[ebase+2].a = i3;  edges[ebase+2].b = i2;
201 | 
202 |     // smoothly blend face normals into vertex normals
203 |     vec3f normal = cross(p2 - p1, p3 - p1);
204 |     normals[i1] = lerp(normals[i1], normal, 1.0f / (counts[i1] + 1.0f));
205 |     normals[i2] = lerp(normals[i2], normal, 1.0f / (counts[i2] + 1.0f));
206 |     normals[i3] = lerp(normals[i3], normal, 1.0f / (counts[i3] + 1.0f));
207 |     counts[i1]++; counts[i2]++; counts[i3]++;
208 |   }
209 | 
210 |   //std::cout << "Constructing edge graph based on mesh connectivity..." << std::endl;
211 |   for (int i = 0; i < edgesCount; i++) {
212 |     int a = edges[i].a;
213 |     int b = edges[i].b;
214 | 
215 |     vec3f& n1 = normals[a];
216 |     vec3f& n2 = normals[b];
217 |     vec3f& p1 = points[a];
218 |     vec3f& p2 = points[b];
219 | 
220 |     float dx = p2.x - p1.x;
221 |     float dy = p2.y - p1.y;
222 |     float dz = p2.z - p1.z;
223 |     float dd = sqrtf(dx * dx + dy * dy + dz * dz); dx /= dd; dy /= dd; dz /= dd;
224 |     float dot = n1.x * n2.x + n1.y * n2.y + n1.z * n2.z;
225 |     float dot2 = n2.x * dx + n2.y * dy + n2.z * dz;
226 |     float ww = 1.0f - dot;
227 |     if (dot2 > 0) { ww = ww * ww; } // make it much less of a problem if convex regions have normal difference
228 |     edges[i].w = ww;
229 |   }
230 |   //std::cout << "Constructed graph" << std::endl;
231 | 
232 |   // Segment!
233 |   universe* u = segment_graph(vertexCount, edgesCount, edges, kthr);
234 |   //std::cout << "Segmented" << std::endl;
235 | 
236 |   // Joining small segments
237 |   for (int j = 0; j < edgesCount; j++) {
238 |     int a = u->find(edges[j].a);
239 |     int b = u->find(edges[j].b);
240 |     if ((a != b) && ((u->size(a) < segMinVerts) || (u->size(b) < segMinVerts))) {
241 |       u->join(a, b);
242 |     }
243 |   }
244 | 
245 |   // Return segment indices as vector
246 |   vector<int> outComps(vertexCount);
247 |   for (int q = 0; q < vertexCount; q++) {
248 |     outComps[q] = u->find(q);
249 |   }
250 |   return outComps;
251 | }
252 | 
253 | void writeToJSON(const string& filename, const string& scanId,
254 |   const float kthr, const int segMinVerts, const vector<int>& segIndices) {
255 |   std::ofstream ofs(filename);
256 |   ofs << "{";
257 |   ofs << "\"params\":{\"kThresh\":" << kthr <<  ",\"segMinVerts\":" << segMinVerts << "},";
258 |   ofs << "\"sceneId\":\"" << scanId << "\",";
259 |   ofs << "\"segIndices\":[";
260 |   for (int i = 0; i < segIndices.size(); i++) {
261 |     if (i > 0) { ofs << ","; }
262 |     ofs << segIndices[i];
263 |   }
264 |   ofs << "]}";
265 |   ofs.close();
266 | }
267 | 
268 | int main(int argc, const char** argv) {
269 |   if (argc < 2) {
270 |     printf("Usage: ./segmentator input.ply [kThresh] [segMinVerts] (defaults: kThresh=0.01 segMinVerts=20)\n");
271 |     exit(-1);
272 |   } else {
273 |     const string plyFile = argv[1];
274 |     const float kthr = argc > 2 ? (float)atof(argv[2]) : 0.01f;
275 |     const int segMinVerts = argc > 3 ? atoi(argv[3]) : 20;
276 |     printf("Segmenting %s with kThresh=%f, segMinVerts=%d ...\n", plyFile.c_str(), kthr, segMinVerts);
277 |     const vector<int> comps = segment(plyFile, kthr, segMinVerts);
278 |     std::unordered_set<int> comp_indices;
279 |     for (int i = 0; i < comps.size(); i++) {
280 |       comp_indices.insert(comps[i]);
281 |     }
282 | 
283 |     const string baseName = plyFile.substr(0, plyFile.find_last_of("."));
284 |     const string sceneName = baseName.substr(baseName.find_last_of("/"));
285 |     const int lastslash = plyFile.find_last_of("/");
286 |     const string scanId = lastslash > 0 ? baseName.substr(lastslash) : baseName;
287 |     // string segFile = baseName + "." + std::to_string(kthr) + ".segs.json";
288 |     const string segFilePrefix = argv[4];
289 |     string segFile = segFilePrefix + sceneName + "." + std::to_string(kthr) + ".segs.json";
290 |     writeToJSON(segFile, scanId, kthr, segMinVerts, comps);
291 |     printf("Segmentation written to %s with %lu segments\n", segFile.c_str(), comp_indices.size());
292 |   }
293 | }
294 | 


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/cpp/tinyply.cpp:
--------------------------------------------------------------------------------
  1 | // This software is in the public domain. Where that dedication is not
  2 | // recognized, you are granted a perpetual, irrevocable license to copy,
  3 | // distribute, and modify this file as you see fit.
  4 | // Authored in 2015 by Dimitri Diakopoulos (http://www.dimitridiakopoulos.com)
  5 | // https://github.com/ddiakopoulos/tinyply
  6 | 
  7 | #include "tinyply.h"
  8 | 
  9 | using namespace tinyply;
 10 | using namespace std;
 11 | 
 12 | //////////////////
 13 | // PLY Property //
 14 | //////////////////
 15 | 
 16 | PlyProperty::PlyProperty(std::istream & is) : isList(false)
 17 | {
 18 |     parse_internal(is);
 19 | }
 20 | 
 21 | void PlyProperty::parse_internal(std::istream & is)
 22 | {
 23 |     string type;
 24 |     is >> type;
 25 |     if (type == "list")
 26 |     {
 27 |         string countType;
 28 |         is >> countType >> type;
 29 |         listType = property_type_from_string(countType);
 30 |         isList = true;
 31 |     }
 32 |     propertyType = property_type_from_string(type);
 33 |     is >> name;
 34 | }
 35 | 
 36 | /////////////////
 37 | // PLY Element //
 38 | /////////////////
 39 | 
 40 | PlyElement::PlyElement(std::istream & is)
 41 | {
 42 |     parse_internal(is);
 43 | }
 44 | 
 45 | void PlyElement::parse_internal(std::istream & is)
 46 | {
 47 |     is >> name >> size;
 48 | }
 49 | 
 50 | //////////////
 51 | // PLY File //
 52 | //////////////
 53 | 
 54 | PlyFile::PlyFile(std::istream & is)
 55 | {
 56 |     if (!parse_header(is))
 57 |     {
 58 |         throw std::runtime_error("file is not ply or encounted junk in header");
 59 |     }
 60 | }
 61 | 
 62 | bool PlyFile::parse_header(std::istream & is)
 63 | {
 64 |     std::string line;
 65 |     bool gotMagic = false;
 66 |     while (std::getline(is, line))
 67 |     {
 68 |         std::istringstream ls(line);
 69 |         std::string token;
 70 |         ls >> token;
 71 |         if (token == "ply" || token == "PLY" || token == "")
 72 |         {
 73 |             gotMagic = true;
 74 |             continue;
 75 |         }
 76 |         else if (token == "comment")    read_header_text(line, ls, comments, 8);
 77 |         else if (token == "format")     read_header_format(ls);
 78 |         else if (token == "element")    read_header_element(ls);
 79 |         else if (token == "property")   read_header_property(ls);
 80 |         else if (token == "obj_info")   read_header_text(line, ls, objInfo, 9);
 81 |         else if (token == "end_header") break;
 82 |         else return false;
 83 |     }
 84 |     return true;
 85 | }
 86 | 
 87 | void PlyFile::read_header_text(std::string line, std::istream & is, std::vector<std::string>& place, int erase)
 88 | {
 89 |     place.push_back((erase > 0) ? line.erase(0, erase) : line);
 90 | }
 91 | 
 92 | void PlyFile::read_header_format(std::istream & is)
 93 | {
 94 |     std::string s;
 95 |     (is >> s);
 96 | 	if (s == "binary_little_endian") isBinary = true;
 97 | 	else if (s == "binary_big_endian") isBinary = isBigEndian = true;
 98 | }
 99 | 
100 | void PlyFile::read_header_element(std::istream & is)
101 | {
102 |     get_elements().emplace_back(is);
103 | }
104 | 
105 | void PlyFile::read_header_property(std::istream & is)
106 | {
107 |     get_elements().back().properties.emplace_back(is);
108 | }
109 | 
110 | size_t PlyFile::skip_property_binary(const PlyProperty & property, std::istream & is)
111 | {
112 |     static std::vector<char> skip(PropertyTable[property.propertyType].stride);
113 |     if (property.isList)
114 |     {
115 | 		size_t listSize = 0;
116 | 		size_t dummyCount = 0;
117 |         read_property_binary(property.listType, &listSize, dummyCount, is);
118 |         for (size_t i = 0; i < listSize; ++i) is.read(skip.data(), PropertyTable[property.propertyType].stride);
119 |         return listSize;
120 |     }
121 |     else
122 |     {
123 |         is.read(skip.data(), PropertyTable[property.propertyType].stride);
124 |         return 0;
125 |     }
126 | }
127 | 
128 | void PlyFile::skip_property_ascii(const PlyProperty & property, std::istream & is)
129 | {
130 |     std::string skip;
131 |     if (property.isList)
132 |     {
133 |         int listSize;
134 |         is >> listSize;
135 |         for (int i = 0; i < listSize; ++i) is >> skip;
136 |     }
137 |     else is >> skip;
138 | }
139 | 
140 | void PlyFile::read_property_binary(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is)
141 | {
142 |     static std::vector<char> src(PropertyTable[t].stride);
143 |     is.read(src.data(), PropertyTable[t].stride);
144 | 
145 |     switch (t)
146 |     {
147 |         case PlyProperty::Type::INT8:       ply_cast<int8_t>(dest, src.data(), isBigEndian);        break;
148 |         case PlyProperty::Type::UINT8:      ply_cast<uint8_t>(dest, src.data(), isBigEndian);       break;
149 |         case PlyProperty::Type::INT16:      ply_cast<int16_t>(dest, src.data(), isBigEndian);       break;
150 |         case PlyProperty::Type::UINT16:     ply_cast<uint16_t>(dest, src.data(), isBigEndian);      break;
151 |         case PlyProperty::Type::INT32:      ply_cast<int32_t>(dest, src.data(), isBigEndian);       break;
152 |         case PlyProperty::Type::UINT32:     ply_cast<uint32_t>(dest, src.data(), isBigEndian);      break;
153 |         case PlyProperty::Type::FLOAT32:    ply_cast_float<float>(dest, src.data(), isBigEndian);   break;
154 |         case PlyProperty::Type::FLOAT64:    ply_cast_double<double>(dest, src.data(), isBigEndian); break;
155 |         case PlyProperty::Type::INVALID:    throw std::invalid_argument("invalid ply property");
156 |     }
157 |     destOffset += PropertyTable[t].stride;
158 | }
159 | 
160 | void PlyFile::read_property_ascii(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is)
161 | {
162 |     switch (t)
163 |     {
164 |         case PlyProperty::Type::INT8:       *((int8_t *)dest) = ply_read_ascii<int32_t>(is);        break;
165 |         case PlyProperty::Type::UINT8:      *((uint8_t *)dest) = ply_read_ascii<uint32_t>(is);      break;
166 |         case PlyProperty::Type::INT16:      ply_cast_ascii<int16_t>(dest, is);                      break;
167 |         case PlyProperty::Type::UINT16:     ply_cast_ascii<uint16_t>(dest, is);                     break;
168 |         case PlyProperty::Type::INT32:      ply_cast_ascii<int32_t>(dest, is);                      break;
169 |         case PlyProperty::Type::UINT32:     ply_cast_ascii<uint32_t>(dest, is);                     break;
170 |         case PlyProperty::Type::FLOAT32:    ply_cast_ascii<float>(dest, is);                        break;
171 |         case PlyProperty::Type::FLOAT64:    ply_cast_ascii<double>(dest, is);                       break;
172 |         case PlyProperty::Type::INVALID:    throw std::invalid_argument("invalid ply property");
173 |     }
174 |     destOffset += PropertyTable[t].stride;
175 | }
176 | 
177 | void PlyFile::write_property_ascii(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset)
178 | {
179 |     switch (t)
180 |     {
181 |         case PlyProperty::Type::INT8:       os << static_cast<int32_t>(*reinterpret_cast<int8_t*>(src));    break;
182 |         case PlyProperty::Type::UINT8:      os << static_cast<uint32_t>(*reinterpret_cast<uint8_t*>(src));  break;
183 |         case PlyProperty::Type::INT16:      os << *reinterpret_cast<int16_t*>(src);     break;
184 |         case PlyProperty::Type::UINT16:     os << *reinterpret_cast<uint16_t*>(src);    break;
185 |         case PlyProperty::Type::INT32:      os << *reinterpret_cast<int32_t*>(src);     break;
186 |         case PlyProperty::Type::UINT32:     os << *reinterpret_cast<uint32_t*>(src);    break;
187 |         case PlyProperty::Type::FLOAT32:    os << *reinterpret_cast<float*>(src);       break;
188 |         case PlyProperty::Type::FLOAT64:    os << *reinterpret_cast<double*>(src);      break;
189 |         case PlyProperty::Type::INVALID:    throw std::invalid_argument("invalid ply property");
190 |     }
191 |     os << " ";
192 |     srcOffset += PropertyTable[t].stride;
193 | }
194 | 
195 | void PlyFile::write_property_binary(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset)
196 | {
197 |     os.write((char *)src, PropertyTable[t].stride);
198 |     srcOffset += PropertyTable[t].stride;
199 | }
200 | 
201 | void PlyFile::read(std::istream & is)
202 | {
203 |     read_internal(is);
204 | }
205 | 
206 | void PlyFile::write(std::ostream & os, bool isBinary)
207 | {
208 |     if (isBinary) write_binary_internal(os);
209 |     else write_ascii_internal(os);
210 | }
211 | 
212 | void PlyFile::write_binary_internal(std::ostream & os)
213 | {
214 |     isBinary = true;
215 |     write_header(os);
216 | 
217 |     for (auto & e : elements)
218 |     {
219 |         for (size_t i = 0; i < e.size; ++i)
220 |         {
221 |             for (auto & p : e.properties)
222 |             {
223 |                 auto & cursor = userDataTable[make_key(e.name, p.name)];
224 |                 if (p.isList)
225 |                 {
226 |                     uint8_t listSize[4] = {0, 0, 0, 0};
227 |                     memcpy(listSize, &p.listCount, sizeof(uint32_t));
228 | 					size_t dummyCount = 0;
229 |                     write_property_binary(p.listType, os, listSize, dummyCount);
230 |                     for (int j = 0; j < p.listCount; ++j)
231 |                     {
232 |                         write_property_binary(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset);
233 |                     }
234 |                 }
235 |                 else
236 |                 {
237 |                     write_property_binary(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset);
238 |                 }
239 |             }
240 |         }
241 |     }
242 | }
243 | 
244 | void PlyFile::write_ascii_internal(std::ostream & os)
245 | {
246 |     write_header(os);
247 |     
248 |     for (auto & e : elements)
249 |     {
250 |         for (size_t i = 0; i < e.size; ++i)
251 |         {
252 |             for (auto & p : e.properties)
253 |             {
254 |                 auto & cursor = userDataTable[make_key(e.name, p.name)];
255 |                 if (p.isList)
256 |                 {
257 |                     os << p.listCount << " ";
258 |                     for (int j = 0; j < p.listCount; ++j)
259 |                     {
260 |                         write_property_ascii(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset);
261 |                     }
262 |                 }
263 |                 else
264 |                 {
265 |                     write_property_ascii(p.propertyType, os, (cursor->data + cursor->offset), cursor->offset);
266 |                 }
267 |             }
268 |             os << std::endl;
269 |         }
270 |     }
271 | }
272 | 
273 | void PlyFile::write_header(std::ostream & os)
274 | {
275 |     const std::locale & fixLoc = std::locale("C");
276 |     os.imbue(fixLoc);
277 |     
278 |     os << "ply" << std::endl;
279 |     if (isBinary)
280 |         os << ((isBigEndian) ? "format binary_big_endian 1.0" : "format binary_little_endian 1.0") << std::endl;
281 |     else
282 |         os << "format ascii 1.0" << std::endl;
283 |     
284 |     for (const auto & comment : comments)
285 |         os << "comment " << comment << std::endl;
286 |     
287 |     for (auto & e : elements)
288 |     {
289 |         os << "element " << e.name << " " << e.size << std::endl;
290 |         for (const auto & p : e.properties)
291 |         {
292 |             if (p.isList)
293 |             {
294 |                 os << "property list " << PropertyTable[p.listType].str << " "
295 |                 << PropertyTable[p.propertyType].str << " " << p.name << std::endl;
296 |             }
297 |             else
298 |             {
299 |                 os << "property " << PropertyTable[p.propertyType].str << " " << p.name << std::endl;
300 |             }
301 |         }
302 |     }
303 |     os << "end_header" << std::endl;
304 | }
305 | 
306 | void PlyFile::read_internal(std::istream & is)
307 | {
308 |     std::function<void(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is)> read;
309 |     std::function<void(const PlyProperty & property, std::istream & is)> skip;
310 |     if (isBinary)
311 |     {
312 |         read = [&](PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) { read_property_binary(t, dest, destOffset, is); };
313 |         skip = [&](const PlyProperty & property, std::istream & is) { skip_property_binary(property, is); };
314 |     }
315 |     else
316 |     {
317 |         read = [&](PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is) { read_property_ascii(t, dest, destOffset, is); };
318 |         skip = [&](const PlyProperty & property, std::istream & is) { skip_property_ascii(property, is); };
319 |     }
320 |     
321 |     for (auto & element : get_elements())
322 |     {
323 |         if (std::find(requestedElements.begin(), requestedElements.end(), element.name) != requestedElements.end())
324 |         {
325 |             for (size_t count = 0; count < element.size; ++count)
326 |             {
327 |                 for (auto & property : element.properties)
328 |                 {
329 |                     if (auto & cursor = userDataTable[make_key(element.name, property.name)])
330 |                     {
331 |                         if (property.isList)
332 |                         {
333 | 							size_t listSize = 0;
334 | 							size_t dummyCount = 0;
335 |                             read(property.listType, &listSize, dummyCount, is);
336 |                             if (cursor->realloc == false)
337 |                             {
338 |                                 cursor->realloc = true;
339 |                                 resize_vector(property.propertyType, cursor->vector, listSize * element.size, cursor->data);
340 |                             }
341 |                             for (size_t i = 0; i < listSize; ++i)
342 |                             {
343 |                                 read(property.propertyType, (cursor->data + cursor->offset), cursor->offset, is);
344 |                             }
345 |                         }
346 |                         else
347 |                         {
348 |                             read(property.propertyType, (cursor->data + cursor->offset), cursor->offset, is);
349 |                         }
350 |                     }
351 |                     else
352 |                     {
353 |                         skip(property, is);
354 |                     }
355 |                 }
356 |             }
357 |         }
358 |         else continue;
359 |     }
360 | }
361 | 


--------------------------------------------------------------------------------
/dataprocessing/oversegmentation/cpp/tinyply.h:
--------------------------------------------------------------------------------
  1 | // This software is in the public domain. Where that dedication is not
  2 | // recognized, you are granted a perpetual, irrevocable license to copy,
  3 | // distribute, and modify this file as you see fit.
  4 | // Authored in 2015 by Dimitri Diakopoulos (http://www.dimitridiakopoulos.com)
  5 | // https://github.com/ddiakopoulos/tinyply
  6 | 
  7 | #ifndef tinyply_h
  8 | #define tinyply_h
  9 | 
 10 | #include <vector>
 11 | #include <algorithm>
 12 | #include <string>
 13 | #include <stdint.h>
 14 | #include <map>
 15 | #include <iostream>
 16 | #include <sstream>
 17 | #include <type_traits>
 18 | #include <memory>
 19 | #include <functional>
 20 | #include <cstring>
 21 | 
 22 | namespace tinyply
 23 | {
 24 | 
 25 | 	template<typename T> T endian_swap(const T & v) { return v; }
 26 | 	template<> inline uint16_t endian_swap(const uint16_t & v) { return (v << 8) | (v >> 8); }
 27 | 	template<> inline uint32_t endian_swap(const uint32_t & v) { return (v << 24) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) | (v >> 24); }
 28 | 	template<> inline uint64_t endian_swap(const uint64_t & v)
 29 | 	{
 30 | 		return (((v & 0x00000000000000ffLL) << 56) |
 31 | 			((v & 0x000000000000ff00LL) << 40) |
 32 | 			((v & 0x0000000000ff0000LL) << 24) |
 33 | 			((v & 0x00000000ff000000LL) << 8) |
 34 | 			((v & 0x000000ff00000000LL) >> 8) |
 35 | 			((v & 0x0000ff0000000000LL) >> 24) |
 36 | 			((v & 0x00ff000000000000LL) >> 40) |
 37 | 			((v & 0xff00000000000000LL) >> 56));
 38 | 	}
 39 | 	template<> inline int16_t endian_swap(const int16_t & v) { uint16_t r = endian_swap(*(uint16_t*)&v); return *(int16_t*)&r; }
 40 | 	template<> inline int32_t endian_swap(const int32_t & v) { uint32_t r = endian_swap(*(uint32_t*)&v); return *(int32_t*)&r; }
 41 | 	template<> inline int64_t endian_swap(const int64_t & v) { uint64_t r = endian_swap(*(uint64_t*)&v); return *(int64_t*)&r; }
 42 | 	inline float endian_swap_float(const uint32_t & v) { uint32_t r = endian_swap(v); return *(float*)&r; }
 43 | 	inline double endian_swap_double(const uint64_t & v) { uint64_t r = endian_swap(v); return *(double*)&r; }
 44 | 
 45 | 	struct DataCursor
 46 | 	{
 47 | 		void * vector;
 48 | 		uint8_t * data;
 49 | 		size_t offset;
 50 | 		bool realloc = false;
 51 | 	};
 52 | 
 53 | 	class PlyProperty
 54 | 	{
 55 | 		void parse_internal(std::istream & is);
 56 | 	public:
 57 | 
 58 | 		enum class Type : uint8_t
 59 | 		{
 60 | 			INVALID,
 61 | 			INT8,
 62 | 			UINT8,
 63 | 			INT16,
 64 | 			UINT16,
 65 | 			INT32,
 66 | 			UINT32,
 67 | 			FLOAT32,
 68 | 			FLOAT64
 69 | 		};
 70 | 
 71 | 		PlyProperty(std::istream & is);
 72 | 		PlyProperty(Type type, const std::string & name) : propertyType(type), isList(false), name(name) {}
 73 | 		PlyProperty(Type list_type, Type prop_type, const std::string & name, int listCount) : listType(list_type), propertyType(prop_type), isList(true), name(name), listCount(listCount) {}
 74 | 
 75 | 		Type listType, propertyType;
 76 | 		bool isList;
 77 | 		int listCount = 0;
 78 | 		std::string name;
 79 | 	};
 80 | 
 81 | 	inline std::string make_key(const std::string & a, const std::string & b)
 82 | 	{
 83 | 		return (a + "-" + b);
 84 | 	}
 85 | 
 86 | 	template<typename T>
 87 | 	void ply_cast(void * dest, const char * src, bool be)
 88 | 	{
 89 | 		*(static_cast<T *>(dest)) = (be) ? endian_swap(*(reinterpret_cast<const T *>(src))) : *(reinterpret_cast<const T *>(src));
 90 | 	}
 91 | 
 92 | 	template<typename T>
 93 | 	void ply_cast_float(void * dest, const char * src, bool be)
 94 | 	{
 95 | 		*(static_cast<T *>(dest)) = (be) ? endian_swap_float(*(reinterpret_cast<const uint32_t *>(src))) : *(reinterpret_cast<const T *>(src));
 96 | 	}
 97 | 
 98 | 	template<typename T>
 99 | 	void ply_cast_double(void * dest, const char * src, bool be)
100 | 	{
101 | 		*(static_cast<T *>(dest)) = (be) ? endian_swap_double(*(reinterpret_cast<const uint64_t *>(src))) : *(reinterpret_cast<const T *>(src));
102 | 	}
103 | 
104 | 	template<typename T>
105 | 	T ply_read_ascii(std::istream & is)
106 | 	{
107 | 		T data;
108 | 		is >> data;
109 | 		return data;
110 | 	}
111 | 
112 | 	template<typename T>
113 | 	void ply_cast_ascii(void * dest, std::istream & is)
114 | 	{
115 | 		*(static_cast<T *>(dest)) = ply_read_ascii<T>(is);
116 | 	}
117 | 
118 | 	struct PropertyInfo { int stride; std::string str; };
119 | 	static std::map<PlyProperty::Type, PropertyInfo> PropertyTable
120 | 	{
121 | 		{ PlyProperty::Type::INT8,{ 1, "char" } },
122 | 		{ PlyProperty::Type::UINT8,{ 1, "uchar" } },
123 | 		{ PlyProperty::Type::INT16,{ 2, "short" } },
124 | 		{ PlyProperty::Type::UINT16,{ 2, "ushort" } },
125 | 		{ PlyProperty::Type::INT32,{ 4, "int" } },
126 | 		{ PlyProperty::Type::UINT32,{ 4, "uint" } },
127 | 		{ PlyProperty::Type::FLOAT32,{ 4, "float" } },
128 | 		{ PlyProperty::Type::FLOAT64,{ 8, "double" } },
129 | 		{ PlyProperty::Type::INVALID,{ 0, "INVALID" } }
130 | 	};
131 | 
132 | 	inline PlyProperty::Type property_type_from_string(const std::string & t)
133 | 	{
134 | 		if (t == "int8" || t == "char")             return PlyProperty::Type::INT8;
135 | 		else if (t == "uint8" || t == "uchar")      return PlyProperty::Type::UINT8;
136 | 		else if (t == "int16" || t == "short")      return PlyProperty::Type::INT16;
137 | 		else if (t == "uint16" || t == "ushort")    return PlyProperty::Type::UINT16;
138 | 		else if (t == "int32" || t == "int")        return PlyProperty::Type::INT32;
139 | 		else if (t == "uint32" || t == "uint")      return PlyProperty::Type::UINT32;
140 | 		else if (t == "float32" || t == "float")    return PlyProperty::Type::FLOAT32;
141 | 		else if (t == "float64" || t == "double")   return PlyProperty::Type::FLOAT64;
142 | 		return PlyProperty::Type::INVALID;
143 | 	}
144 | 
145 | 	template<typename T>
146 | 	inline uint8_t * resize(void * v, size_t newSize)
147 | 	{
148 | 		auto vec = static_cast<std::vector<T> *>(v);
149 | 		vec->resize(newSize);
150 | 		return reinterpret_cast<uint8_t *>(vec->data());
151 | 	}
152 | 
153 | 	inline void resize_vector(const PlyProperty::Type t, void * v, size_t newSize, uint8_t *& ptr)
154 | 	{
155 | 		switch (t)
156 | 		{
157 | 		case PlyProperty::Type::INT8:       ptr = resize<int8_t>(v, newSize);   break;
158 | 		case PlyProperty::Type::UINT8:      ptr = resize<uint8_t>(v, newSize);  break;
159 | 		case PlyProperty::Type::INT16:      ptr = resize<int16_t>(v, newSize);  break;
160 | 		case PlyProperty::Type::UINT16:     ptr = resize<uint16_t>(v, newSize); break;
161 | 		case PlyProperty::Type::INT32:      ptr = resize<int32_t>(v, newSize);  break;
162 | 		case PlyProperty::Type::UINT32:     ptr = resize<uint32_t>(v, newSize); break;
163 | 		case PlyProperty::Type::FLOAT32:    ptr = resize<float>(v, newSize);    break;
164 | 		case PlyProperty::Type::FLOAT64:    ptr = resize<double>(v, newSize);   break;
165 | 		case PlyProperty::Type::INVALID:    throw std::invalid_argument("invalid ply property");
166 | 		}
167 | 	}
168 | 
169 | 	template <typename T>
170 | 	inline PlyProperty::Type property_type_for_type(std::vector<T> & theType)
171 | 	{
172 | 		if (std::is_same<T, int8_t>::value)          return PlyProperty::Type::INT8;
173 | 		else if (std::is_same<T, uint8_t>::value)    return PlyProperty::Type::UINT8;
174 | 		else if (std::is_same<T, int16_t>::value)    return PlyProperty::Type::INT16;
175 | 		else if (std::is_same<T, uint16_t>::value)   return PlyProperty::Type::UINT16;
176 | 		else if (std::is_same<T, int32_t>::value)    return PlyProperty::Type::INT32;
177 | 		else if (std::is_same<T, uint32_t>::value)   return PlyProperty::Type::UINT32;
178 | 		else if (std::is_same<T, float>::value)      return PlyProperty::Type::FLOAT32;
179 | 		else if (std::is_same<T, double>::value)     return PlyProperty::Type::FLOAT64;
180 | 		else return PlyProperty::Type::INVALID;
181 | 	}
182 | 
183 | 	class PlyElement
184 | 	{
185 | 		void parse_internal(std::istream & is);
186 | 	public:
187 | 		PlyElement(std::istream & istream);
188 | 		PlyElement(const std::string & name, size_t count) : name(name), size(count) {}
189 | 		std::string name;
190 | 		size_t size;
191 | 		std::vector<PlyProperty> properties;
192 | 	};
193 | 
194 | 	inline int find_element(const std::string key, std::vector<PlyElement> & list)
195 | 	{
196 | 		for (size_t i = 0; i < list.size(); ++i)
197 | 		{
198 | 			if (list[i].name == key)
199 | 			{
200 | 				return i;
201 | 			}
202 | 		}
203 | 		return -1;
204 | 	}
205 | 
206 | 	class PlyFile
207 | 	{
208 | 
209 | 	public:
210 | 
211 | 		PlyFile() {}
212 | 		PlyFile(std::istream & is);
213 | 
214 | 		void read(std::istream & is);
215 | 		void write(std::ostream & os, bool isBinary);
216 | 
217 | 		std::vector<PlyElement> & get_elements() { return elements; }
218 | 
219 | 		std::vector<std::string> comments;
220 | 		std::vector<std::string> objInfo;
221 | 
222 | 		template<typename T>
223 | 		size_t request_properties_from_element(const std::string & elementKey, std::vector<std::string> propertyKeys, std::vector<T> & source, const int listCount = 1)
224 | 		{
225 | 			if (get_elements().size() == 0)
226 | 				return 0;
227 | 
228 | 			if (find_element(elementKey, get_elements()) >= 0)
229 | 			{
230 | 				if (std::find(requestedElements.begin(), requestedElements.end(), elementKey) == requestedElements.end())
231 | 					requestedElements.push_back(elementKey);
232 | 			}
233 | 			else return 0;
234 | 
235 | 			// count and verify large enough
236 | 			auto instance_counter = [&](const std::string & elementKey, const std::string & propertyKey)
237 | 			{
238 | 				for (auto e : get_elements())
239 | 				{
240 | 					if (e.name != elementKey) continue;
241 | 					for (auto p : e.properties)
242 | 					{
243 | 						if (p.name == propertyKey)
244 | 						{
245 | 							if (PropertyTable[property_type_for_type(source)].stride != PropertyTable[p.propertyType].stride)
246 | 								throw std::runtime_error("destination vector is wrongly typed to hold this property");
247 | 							return e.size;
248 | 
249 | 						}
250 | 					}
251 | 				}
252 | 				return size_t(0);
253 | 			};
254 | 
255 | 			// Check if requested key is in the parsed header
256 | 			std::vector<std::string> unusedKeys;
257 | 			for (auto key : propertyKeys)
258 | 			{
259 | 				for (auto e : get_elements())
260 | 				{
261 | 					if (e.name != elementKey) continue;
262 | 					std::vector<std::string> headerKeys;
263 | 					for (auto p : e.properties)
264 | 					{
265 | 						headerKeys.push_back(p.name);
266 | 					}
267 | 
268 | 					if (std::find(headerKeys.begin(), headerKeys.end(), key) == headerKeys.end())
269 | 					{
270 | 						unusedKeys.push_back(key);
271 | 					}
272 | 
273 | 				}
274 | 			}
275 | 
276 | 			// Not using them? Don't let them affect the propertyKeys count used for calculating array sizes
277 | 			for (auto k : unusedKeys)
278 | 			{
279 | 				propertyKeys.erase(std::remove(propertyKeys.begin(), propertyKeys.end(), k), propertyKeys.end());
280 | 			}
281 | 			if (!propertyKeys.size()) return 0;
282 | 
283 | 			// All requested properties in the userDataTable share the same cursor (thrown into the same flat array)
284 | 			auto cursor = std::make_shared<DataCursor>();
285 | 
286 | 			std::vector<size_t> instanceCounts;
287 | 
288 | 			for (auto key : propertyKeys)
289 | 			{
290 | 				if (int instanceCount = instance_counter(elementKey, key))
291 | 				{
292 | 					instanceCounts.push_back(instanceCount);
293 | 					auto result = userDataTable.insert(std::pair<std::string, std::shared_ptr<DataCursor>>(make_key(elementKey, key), cursor));
294 | 					if (result.second == false)
295 | 						throw std::invalid_argument("property has already been requested: " + key);
296 | 				}
297 | 				else continue;
298 | 			}
299 | 
300 | 			size_t totalInstanceSize = [&]() { size_t t = 0; for (auto c : instanceCounts) { t += c; } return t; }() * listCount;
301 | 			source.resize(totalInstanceSize); // this satisfies regular properties; `cursor->realloc` is for list types since tinyply uses single-pass parsing
302 | 			cursor->offset = 0;
303 | 			cursor->vector = &source;
304 | 			cursor->data = reinterpret_cast<uint8_t *>(source.data());
305 | 
306 | 			if (listCount > 1)
307 | 			{
308 | 				cursor->realloc = true;
309 | 				return (totalInstanceSize / propertyKeys.size()) / listCount;
310 | 			}
311 | 
312 | 			return totalInstanceSize / propertyKeys.size();
313 | 		}
314 | 
315 | 		template<typename T>
316 | 		void add_properties_to_element(const std::string & elementKey, const std::vector<std::string> & propertyKeys, std::vector<T> & source, const int listCount = 1, const PlyProperty::Type listType = PlyProperty::Type::INVALID)
317 | 		{
318 | 			auto cursor = std::make_shared<DataCursor>();
319 | 			cursor->offset = 0;
320 | 			cursor->vector = &source;
321 | 			cursor->data = reinterpret_cast<uint8_t *>(source.data());
322 | 
323 | 			auto create_property_on_element = [&](PlyElement & e)
324 | 			{
325 | 				for (auto key : propertyKeys)
326 | 				{
327 | 					PlyProperty::Type t = property_type_for_type(source);
328 | 					PlyProperty newProp = (listType == PlyProperty::Type::INVALID) ? PlyProperty(t, key) : PlyProperty(listType, t, key, listCount);
329 | 					userDataTable.insert(std::pair<std::string, std::shared_ptr<DataCursor>>(make_key(e.name, key), cursor));
330 | 					e.properties.push_back(newProp);
331 | 				}
332 | 			};
333 | 
334 | 			int idx = find_element(elementKey, elements);
335 | 			if (idx >= 0)
336 | 			{
337 | 				PlyElement & e = elements[idx];
338 | 				create_property_on_element(e);
339 | 			}
340 | 			else
341 | 			{
342 | 				PlyElement newElement = (listCount == 1) ? PlyElement(elementKey, source.size() / propertyKeys.size()) : PlyElement(elementKey, source.size() / listCount);
343 | 				create_property_on_element(newElement);
344 | 				elements.push_back(newElement);
345 | 			}
346 | 		}
347 | 
348 | 	private:
349 | 
350 | 		size_t skip_property_binary(const PlyProperty & property, std::istream & is);
351 | 		void skip_property_ascii(const PlyProperty & property, std::istream & is);
352 | 
353 | 		void read_property_binary(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is);
354 | 		void read_property_ascii(PlyProperty::Type t, void * dest, size_t & destOffset, std::istream & is);
355 | 		void write_property_ascii(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset);
356 | 		void write_property_binary(PlyProperty::Type t, std::ostream & os, uint8_t * src, size_t & srcOffset);
357 | 
358 | 		bool parse_header(std::istream & is);
359 | 		void write_header(std::ostream & os);
360 | 
361 | 		void read_header_format(std::istream & is);
362 | 		void read_header_element(std::istream & is);
363 | 		void read_header_property(std::istream & is);
364 | 		void read_header_text(std::string line, std::istream & is, std::vector<std::string> & place, int erase = 0);
365 | 
366 | 		void read_internal(std::istream & is);
367 | 
368 | 		void write_ascii_internal(std::ostream & os);
369 | 		void write_binary_internal(std::ostream & os);
370 | 
371 | 		bool isBinary = false;
372 | 		bool isBigEndian = false;
373 | 
374 | 		std::map<std::string, std::shared_ptr<DataCursor>> userDataTable;
375 | 
376 | 		std::vector<PlyElement> elements;
377 | 		std::vector<std::string> requestedElements;
378 | 	};
379 | 
380 | } // namesapce tinyply
381 | 
382 | #endif // tinyply_h
383 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import models.detection_net as SelectionNet
  4 | import MinkowskiEngine as ME
  5 | import models.iou_nms as iou_nms
  6 | from scipy.stats import pearsonr
  7 | from models.iou_nms import *
  8 | from utils.util import *
  9 | from glob import glob
 10 | import os
 11 | 
 12 | 
 13 | 
 14 | class Model:
 15 |     def __init__(self, cfg, semantic_valid_class_ids, semantic_id2idx, instance_id2idx, is_foreground, device='cuda'):
 16 |         self.cfg = cfg
 17 |         self.device = device
 18 |         self.semantic_valid_class_ids = semantic_valid_class_ids
 19 |         self.semantic_id2idx = semantic_id2idx
 20 |         self.instance_id2idx = instance_id2idx
 21 |         self.is_foreground = is_foreground
 22 |         self.detection_model = SelectionNet.SelectionNet(cfg, device, semantic_valid_class_ids, is_foreground, out_channels=[96, 96, 6]).to(device)
 23 |         if cfg.multigpu:
 24 |             self.detection_model = torch.nn.parallel.DistributedDataParallel(self.detection_model, device_ids=[device])
 25 |             self.detection_model = ME.MinkowskiSyncBatchNorm.convert_sync_batchnorm(self.detection_model)
 26 |         # loss is computed by averaging over all element-wise computed loss entries
 27 |         # BCEWL includes sigmoid activation, needs un-normalized input
 28 |         self.BCEWithLogitsLoss = torch.nn.BCEWithLogitsLoss().to(device)
 29 |         # not used semantic labels are mapped to -100 using SEMANTIC_ID2IDX and ignored by this loss
 30 |         # CE is a softmax with exp activation, needs un-normalized inputs
 31 |         self.semantics_loss = torch.nn.CrossEntropyLoss(ignore_index=-100).to(device)
 32 | 
 33 |     def compute_loss(self, batch, epoch):
 34 |         losses_dict, pred = self.compute_loss_detection(batch, epoch)
 35 | 
 36 |         return losses_dict
 37 | 
 38 |     def compute_loss_detection(self, batch, epoch):
 39 |         device = self.device
 40 |         cfg = self.cfg
 41 | 
 42 |         # transform data to voxelized sparse tensors
 43 |         sin = ME.SparseTensor(batch['vox_features'], batch['vox_coords'], device=device)
 44 | 
 45 |         # GET MODEL PREDICTION (and convert to regular pytorch tensors)
 46 |         pred = self.detection_model(sin, batch['pooling_ids'].to(device))
 47 | 
 48 |         # pred keys:
 49 |         #  mlp_offsets
 50 |         #  mlp_bounds
 51 |         #  mlp_bb_scores
 52 |         #  mlp_semantics
 53 |         #  vox_feats
 54 | 
 55 |         for mlp_head, sparse_tensor in pred.items():
 56 |             pred[mlp_head] = sparse_tensor.F
 57 | 
 58 |         # initialize loss
 59 |         losses_dict = {'optimization_loss': 0}
 60 | 
 61 |         # OFFSET loss (offset to BB center)
 62 |         if cfg.mlp_offsets in self.cfg.network_heads:
 63 |             # get gt and prediction
 64 |             gt_offsets, pred_offsets = batch['gt_bb_offsets'], pred[cfg.mlp_offsets]
 65 |             if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision:
 66 |                 pred_offsets = pred_offsets[batch['fg_instances']]
 67 |                 gt_offsets = gt_offsets[batch['fg_instances']]
 68 | 
 69 |             # simple L1 loss over the predicted bounding box center offsets
 70 |             offset_loss_per_pred = torch.sum(torch.abs(pred_offsets - gt_offsets.to(device)), axis=1)
 71 |             offset_loss = torch.mean(offset_loss_per_pred)
 72 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_offsets * offset_loss
 73 |             losses_dict['offset_loss'] = offset_loss.detach()
 74 | 
 75 |         # BB size loss
 76 |         if cfg.mlp_bounds in self.cfg.network_heads:
 77 |             # get gt and prediction
 78 |             gt_bounds, pred_bounds = batch['gt_bb_bounds'], pred[cfg.mlp_bounds]
 79 |             if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision:
 80 |                 pred_bounds = pred_bounds[batch['fg_instances']]
 81 |                 gt_bounds = gt_bounds[batch['fg_instances']]
 82 | 
 83 |             # simple L1 loss over the predicted bounding box bounds
 84 |             bounds_loss_per_pred = torch.sum(torch.abs(pred_bounds - gt_bounds.to(device)), axis=1)
 85 |             bounds_loss = torch.mean(bounds_loss_per_pred)
 86 | 
 87 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_bounds * bounds_loss
 88 |             losses_dict['bounds_loss'] = bounds_loss.detach()
 89 | 
 90 |         # Axis aligned bounding boxes IoU loss
 91 |         if cfg.use_bb_iou_loss:
 92 |             pred_bounds = pred[self.cfg.mlp_bounds]
 93 |             pred_offsets = pred[self.cfg.mlp_offsets]
 94 |             gt_bounds = batch['gt_bb_bounds']
 95 |             gt_offsets = batch['gt_bb_offsets']
 96 |             loc = batch['input_location']
 97 | 
 98 |             loc, gt_offsets, gt_bounds = loc.to(device), gt_offsets.to(device), gt_bounds.to(device)
 99 | 
100 |             if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision:
101 |                 pred_bounds = pred_bounds[batch['fg_instances']]
102 |                 pred_offsets = pred_offsets[batch['fg_instances']]
103 |                 gt_bounds = gt_bounds[batch['fg_instances']]
104 |                 gt_offsets = gt_offsets[batch['fg_instances']]
105 |                 loc = loc[batch['fg_instances']]
106 | 
107 |             pred_bounds = torch.clamp(pred_bounds, min=self.cfg.min_bb_size)  # enforce minimum size
108 |             pred_bb_centers = pred_offsets + loc
109 |             gt_bb_center = gt_offsets + loc
110 |             pr_bbs = to_bbs_min_max_(pred_bb_centers, pred_bounds, device)
111 |             gt_bbs = to_bbs_min_max_(gt_bb_center, gt_bounds, device)
112 | 
113 |             area1 = (pr_bbs[..., 3] - pr_bbs[..., 0]) * (pr_bbs[..., 4] - pr_bbs[..., 1]) * (pr_bbs[..., 5] - pr_bbs[..., 2])
114 |             area2 = (gt_bbs[..., 3] - gt_bbs[..., 0]) * (gt_bbs[..., 4] - gt_bbs[..., 1]) * (gt_bbs[..., 5] - gt_bbs[..., 2])
115 |             lt = torch.max(pr_bbs[..., :3], gt_bbs[..., :3])
116 |             rb = torch.min(pr_bbs[..., 3:], gt_bbs[..., 3:])
117 |             wh = (rb - lt).clamp(min=0)
118 |             overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
119 |             union = area1 + area2 - overlap
120 |             eps = 1e-6
121 |             eps = union.new_tensor([eps])
122 |             union = torch.max(union, eps)
123 |             ious = overlap / union
124 | 
125 |             iou_loss_per_pred = 1.0 - ious
126 |             iou_loss = torch.mean(iou_loss_per_pred)
127 | 
128 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_bb_iou * iou_loss
129 |             losses_dict['iou_loss'] = iou_loss.detach()
130 | 
131 | 
132 |         # BB score loss
133 |         if cfg.mlp_bb_scores in self.cfg.network_heads:
134 |             loss_weight_bb_scores = self.cfg.loss_weight_bb_scores
135 |             # hack because multi gpu needs to have the full network be part of loss computation already at beginning
136 |             if epoch < self.cfg.mlp_bb_scores_start_epoch:
137 |                 loss_weight_bb_scores = 0
138 |             # get gt and prediction
139 |             pred_scores = pred[cfg.mlp_bb_scores].reshape(-1)  # (num_voxels)
140 |             pred_bounds = pred[cfg.mlp_bounds]
141 |             pred_offsets = pred[cfg.mlp_offsets]
142 |             loc = batch['input_location']
143 |             gt_offsets = batch['gt_bb_offsets']
144 |             gt_bounds = batch['gt_bb_bounds']
145 | 
146 |             if self.cfg.loss_on_fg_instances or self.cfg.bb_supervision:
147 |                 pred_scores = pred_scores[batch['fg_instances']]
148 |                 pred_bounds = pred_bounds[batch['fg_instances']]
149 |                 pred_offsets = pred_offsets[batch['fg_instances']]
150 |                 loc = loc[batch['fg_instances']]
151 |                 gt_offsets = gt_offsets[batch['fg_instances']]
152 |                 gt_bounds = gt_bounds[batch['fg_instances']]
153 | 
154 |             loc, gt_offsets, gt_bounds = loc.to(device), gt_offsets.to(device), gt_bounds.to(device)
155 | 
156 |             # convert gt data to BB (min, max)-corner representation
157 |             gt_bb_center = gt_offsets + loc
158 |             gt_bbs = to_bbs_min_max_(gt_bb_center, gt_bounds, device)
159 | 
160 |             # convert pred data to BB (min,max)-corner representation
161 |             pred_bounds = torch.clamp(pred_bounds, min=self.cfg.min_bb_size)  # enforce minimum size
162 |             pred_bb_centers = pred_offsets + loc
163 |             pred_bbs = to_bbs_min_max_(pred_bb_centers, pred_bounds, device)
164 | 
165 |             # compute IOU between pred and gt. This is the GT score that should be predicted.
166 |             ious = iou_nms.set_IOUs(gt_bbs, pred_bbs).detach()  # (num_input_bbs)
167 |             score_loss = self.BCEWithLogitsLoss(pred_scores, ious)
168 | 
169 |             # for interpretable logging, we use correlation
170 |             corr, _ = pearsonr(ious.cpu().numpy(), pred_scores.cpu().detach().numpy())
171 |             losses_dict['bb_scores_correlation'] = corr
172 | 
173 |             losses_dict['optimization_loss'] += loss_weight_bb_scores * score_loss
174 |             losses_dict['bb_score_loss'] = score_loss.detach()
175 |             # for test / visualization only
176 |             losses_dict['bb_target_scores'] = torch.mean(ious)
177 | 
178 |         # center score loss
179 |         if cfg.mlp_center_scores in self.cfg.network_heads and epoch >= self.cfg.mlp_center_scores_start_epoch:
180 |             # get gt and prediction
181 |             pred_scores = pred[cfg.mlp_center_scores].reshape(-1)  # (num_voxels)
182 |             gt_scores = offset_loss_per_pred.detach()  # ( num_voxels)
183 |             # simple L1 loss over the predicted scores
184 |             if self.cfg.loss_on_fg_instances:
185 |                 pred_scores = pred_scores[batch['fg_instances']]
186 |             score_loss = torch.abs(pred_scores - gt_scores)
187 |             score_loss = torch.mean(score_loss)
188 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_center_scores * score_loss
189 |             losses_dict['center_score_loss'] = score_loss.detach()
190 |             # for interpretable logging, we use correlation
191 |             corr, _ = pearsonr(gt_scores.cpu().numpy(), pred_scores.cpu().detach().numpy())
192 |             losses_dict['center_scores_correlation'] = corr
193 | 
194 |         if self.cfg.mlp_semantics in self.cfg.network_heads:
195 |             # get gt and prediction
196 |             pred_semantics = pred[cfg.mlp_semantics]
197 |             gt_semantics = batch['gt_semantics']
198 |             # invalid and unlabeled ids are mapped to '-100' (the 'ignore'-label of our loss)
199 |             gt_semantics = self.semantic_id2idx[gt_semantics].to('cuda')
200 | 
201 |             semantics_loss = self.semantics_loss(pred_semantics, gt_semantics)
202 |             pred_semantics_int = torch.argmax(pred_semantics, 1)
203 |             # this accuracy is pessimistic: it also measures unlabeled+invalid points
204 |             semantics_acc = torch.sum(pred_semantics_int == gt_semantics) / len(gt_semantics)
205 |             semantics_miou = semIOU(pred_semantics_int, gt_semantics).mean()
206 | 
207 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_semantics * semantics_loss
208 |             losses_dict['semantics_loss'] = semantics_loss.detach().cpu().numpy()
209 |             losses_dict['semantics_acc'] = semantics_acc.detach().cpu().numpy()
210 |             losses_dict['semantics_mIoU'] = semantics_miou
211 |         
212 |         if self.cfg.mlp_per_vox_semantics in self.cfg.network_heads:
213 |             pred_semantics = pred[cfg.mlp_per_vox_semantics]
214 |             gt_semantics = batch['gt_per_vox_semantics']
215 |             gt_semantics = self.semantic_id2idx[gt_semantics].to('cuda')
216 | 
217 |             per_vox_semantics_loss = self.semantics_loss(pred_semantics, gt_semantics)
218 |             pred_semantics_int = torch.argmax(pred_semantics, 1)
219 |             per_vox_semantics_acc = torch.sum(pred_semantics_int == gt_semantics) / len(gt_semantics)
220 | 
221 |             losses_dict['optimization_loss'] += self.cfg.loss_weight_per_vox_semantics * per_vox_semantics_loss
222 |             losses_dict['per_vox_semantics_loss'] = per_vox_semantics_loss.detach().cpu().numpy()
223 |             losses_dict['per_vox_semantics_acc'] = per_vox_semantics_acc.detach().cpu().numpy()
224 | 
225 |         return losses_dict, pred
226 | 
227 |     def get_prediction(self, batch, with_grad=False, to_cpu=True, min_size=True, get_all=False):
228 | 
229 |         pred = self.detection_model.get_prediction(batch, with_grad=with_grad, to_cpu=to_cpu, min_size=min_size)
230 |         return pred
231 | 
232 |     def pred2mask(self, batch, pred, mode):
233 |         return self.detection_model.detection2mask(batch, pred, self.cfg, mode,
234 |                                                        True, *self.cfg.eval_ths)
235 | 
236 |     def parameters(self):
237 |         return self.detection_model.parameters()
238 | 
239 |     def to(self, device):
240 |         self.detection_model = self.detection_model.to(device)
241 |         return self
242 | 
243 |     def eval(self):
244 |         self.detection_model.eval()
245 | 
246 |     def train(self):
247 |         self.detection_model.train()
248 | 
249 |     # returns if the checkpoint contained all parameters for the model
250 |     def load_state_dict(self, state_dict, strict=True):
251 |         if self.cfg.multigpu:
252 |             return self.detection_model.module.load_state_dict(state_dict, strict)
253 |         else:
254 |             return self.detection_model.load_state_dict(state_dict, strict)
255 | 
256 | 
257 |     def state_dict(self):
258 | 
259 |         if self.cfg.multigpu:
260 |             return self.detection_model.module.state_dict()
261 |         else:
262 |             return self.detection_model.state_dict()
263 | 
264 |     def load_checkpoint(self, checkpoint=None, closest_to = None):
265 |         checkpoints = glob(self.cfg.checkpoint_path + '/*')
266 |         if checkpoint is None:
267 |             if len(checkpoints) == 0:
268 |                 print('No checkpoints found at {}'.format(self.cfg.checkpoint_path))
269 |                 return 0, 0
270 | 
271 |             checkpoints = [os.path.splitext(os.path.basename(path))[0].split('_')[-1] for path in checkpoints]
272 |             checkpoints = np.array(checkpoints, dtype=float)
273 |             checkpoints = np.sort(checkpoints)
274 |             if closest_to:
275 |                 ckpt_idx = np.argmin(np.abs(checkpoints - (closest_to * 60 * 60)))
276 |             else: #use last
277 |                 ckpt_idx = -1
278 |             path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format(
279 |                 *[*convertSecs(checkpoints[ckpt_idx]), checkpoints[ckpt_idx]])
280 |         else:
281 |             path = self.cfg.checkpoint_path + '{}.tar'.format(checkpoint)
282 | 
283 |         print('Loaded checkpoint from: {}'.format(path))
284 |         checkpoint = torch.load(path, map_location=self.device)
285 |         self.load_state_dict(checkpoint['model_state_dict'])
286 |         epoch = checkpoint['epoch']
287 |         training_time = checkpoint['training_time']
288 |         return epoch, training_time, os.path.basename(path)[:-4], checkpoint['iteration_num']
289 | 


--------------------------------------------------------------------------------
/utils/s3dis_util.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | 
  4 | import os
  5 | import numpy as np
  6 | from scipy import stats
  7 | from sklearn.cluster import MeanShift, DBSCAN  
  8 | import pyviz3d.visualizer as viz 
  9 | import dataprocessing.scannet as scannet # Using scannet color mapping
 10 | import dataprocessing.s3dis as s3dis # Using scannet color mapping
 11 | from matplotlib import cm as viz_cm
 12 | from sklearn.neighbors import NearestNeighbors
 13 | from utils.util import get_bbs_lines
 14 | 
 15 | import open3d as o3d
 16 | 
 17 | NUM_CLASSES = 13
 18 | 
 19 | def reconstruct_mesh (scene):
 20 |     positions = scene ["positions"]
 21 |     normals = scene ["normals"]
 22 |     pcd = o3d.geometry.PointCloud()
 23 |     pcd.points = o3d.utility.Vector3dVector(positions.astype (np.float32))
 24 |     pcd.normals = o3d.utility.Vector3dVector(normals.astype (np.float32))
 25 |     with o3d.utility.VerbosityContextManager(
 26 |         o3d.utility.VerbosityLevel.Debug) as cm:
 27 |         mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
 28 |             pcd, depth=8)
 29 |     return mesh
 30 | 
 31 | def interpolate(original_scene, sampled_positions, sampled_colors_list, radius=0.1, neutral_color=[1.0, 1.0, 1.0]):
 32 |     import copy
 33 |     pcd_tree = o3d.geometry.KDTreeFlann(original_scene)
 34 |     num_querries = sampled_positions.shape[0]
 35 |     interpolated_scenes_list = [copy.deepcopy(original_scene) for _ in range(len(sampled_colors_list))]
 36 |     for j in range(len(interpolated_scenes_list)):
 37 |         np.asarray(interpolated_scenes_list[j].vertex_colors)[:, :] = np.array(neutral_color)
 38 |     mesh_pos = np.asarray(original_scene.vertices)
 39 |     nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(sampled_positions)
 40 |     dist, original2sample = nbrs.kneighbors(mesh_pos)
 41 |     dist = dist [:, 0]
 42 |     original2sample = original2sample[:,0]
 43 |     for j in range(len(interpolated_scenes_list)):
 44 |         mesh = interpolated_scenes_list [j]
 45 |         original2sample [dist < radius]
 46 |         interpolated_colors = sampled_colors_list [j][original2sample [dist < radius]]
 47 |         colors = np.asarray(mesh.vertex_colors)
 48 |         colors [dist < radius] = interpolated_colors
 49 |         interpolated_scenes_list[j].vertex_colors = o3d.utility.Vector3dVector(colors)
 50 |     
 51 |     return interpolated_scenes_list
 52 | 
 53 | def visualize_prediction (cfg, scene_name, scene, labels, pred_label, out_path):
 54 |     # ---------------- GET GT INSTANCES, SEMANTICS, and BBs
 55 |     print ("visualize ", scene_name)
 56 | 
 57 |     color_map = viz_cm.get_cmap('Paired', 12)
 58 |     colors_map = np.array(color_map(range(12)))[:, :3]
 59 |     r, g, b = colors_map.T
 60 |     colors_map = np.vstack((colors_map, np.array([r, b, g]).T, np.array([ b, r, g]).T, np.array([ g, r, b]).T, np.array([ b, g, r]).T, np.array([ g, b, r]).T))
 61 |     colors_map = np.vstack((colors_map,colors_map,colors_map))
 62 |     if np.max (colors_map) < 2:
 63 |         colors_map = (colors_map * 255).astype (np.int32)
 64 |     INS_COLORS = colors_map
 65 | 
 66 |     # Using color map from scannet
 67 |     SEM_COLORS = np.copy (scannet.scannet_color_map).astype (np.float32)
 68 |     SEM_COLORS [0] = SEM_COLORS [-2]
 69 | 
 70 |     scannet.scannet_color_map = s3dis.S3DIS_SEMANTICS_COLORS
 71 | 
 72 |     gt_inst_colors = INS_COLORS [labels['instances']] 
 73 |     sem = labels['semantics']
 74 |     gt_sem_colors = scannet.scannet_color_map[sem]
 75 | 
 76 |     gt_inst_colors = gt_inst_colors / 255
 77 |     gt_sem_colors = gt_sem_colors / 255
 78 | 
 79 |     mesh = reconstruct_mesh (scene)
 80 | 
 81 |     instance_fg = s3dis.semantics_to_forground_mask (labels['per_instance_semantics'], cfg)
 82 | 
 83 |     bbs = np.hstack((labels['per_instance_bb_centers'][instance_fg], 2* labels['per_instance_bb_bounds'][instance_fg], 
 84 |                                     scannet.scannet_color_map[labels['per_instance_semantics'][instance_fg]])).T
 85 | 
 86 |     # ---------------- GET INSTANCES AND SEMANTICS COLORS
 87 |     pred_inst_colors = np.ones((len(scene ["positions"]), 3)) * 255
 88 |     pred_sem_colors = np.ones((len(scene ["positions"]), 3)) * 255
 89 | 
 90 |     for ins_id in np.unique (pred_label ["instances"]):
 91 |         mask = pred_label ["instances"] == ins_id
 92 |         sem_label = stats.mode(pred_label ["semantics"][mask], None)[0][0]
 93 |         if ins_id < 1:
 94 |             pred_inst_colors [mask] = [255,255,255]
 95 |         else:
 96 |             gt_ins_id = stats.mode(labels['instances'][mask], None)[0][0]
 97 |             pred_inst_colors [mask] = INS_COLORS [gt_ins_id]
 98 | 
 99 |     pred_sem_colors = scannet.scannet_color_map[pred_label ["semantics"]]
100 |     pred_sem_colors [pred_label ["semantics"] < 0] = [255, 255, 255]
101 | 
102 |     pred_sem_colors = pred_sem_colors / 255
103 |     pred_inst_colors = pred_inst_colors / 255
104 | 
105 |     mesh_rgb, mesh_gt_sem, mesh_gt_ins, mesh_pred_sem, mesh_pred_ins= interpolate (mesh, scene["positions"], 
106 |                             [scene ["colors"], gt_sem_colors, gt_inst_colors, pred_sem_colors, pred_inst_colors], 
107 |                             0.04)
108 |     interp_colors = np.asarray(mesh_gt_sem.vertex_colors)
109 |     void_mask = interp_colors.sum (1) == 3.0
110 |     mesh_rgb.remove_vertices_by_index (np.where (void_mask) [0])
111 |     mesh_gt_sem.remove_vertices_by_index (np.where (void_mask) [0])
112 |     mesh_gt_ins.remove_vertices_by_index (np.where (void_mask) [0])
113 |     mesh_pred_sem.remove_vertices_by_index (np.where (void_mask) [0])
114 |     mesh_pred_ins.remove_vertices_by_index (np.where (void_mask) [0])
115 | 
116 |     o3d.io.write_triangle_mesh(os.path.join(out_path,'rgb.ply'), mesh_rgb)
117 | 
118 |     o3d.io.write_triangle_mesh(os.path.join(out_path,'gt_instances.ply'), mesh_gt_ins)
119 |     o3d.io.write_triangle_mesh(os.path.join(out_path,'gt_semantics.ply'), mesh_gt_sem)
120 | 
121 |     o3d.io.write_triangle_mesh(os.path.join(out_path,'pred_instances.ply'), mesh_pred_ins)
122 |     o3d.io.write_triangle_mesh(os.path.join(out_path,'pred_semantics.ply'), mesh_pred_sem)
123 | 
124 |     # -------------------- SAVE IN PYVIZ --------------------
125 |     v = viz.Visualizer()
126 |     v.add_points(f'Input scene', scene['positions'], scene['colors'] * 255, point_size=25, visible=False)
127 |     v.add_points(f'GT Instances',scene['positions'], gt_inst_colors * 255, point_size=25, visible=False)
128 |     v.add_points(f'GT Semantics',scene['positions'], gt_sem_colors * 255, point_size=25, visible=False)
129 |     v.add_points(f'Pred Instances',scene['positions'],pred_inst_colors * 255, point_size=25, visible=False)
130 |     v.add_points(f'Pred Semantics',scene['positions'], pred_sem_colors * 255, point_size=25, visible=False)
131 |     start, end = get_bbs_lines(labels['per_instance_bb_centers'][instance_fg], labels['per_instance_bb_bounds'][instance_fg])
132 |     bbs_colors = np.repeat(scannet.scannet_color_map[labels['per_instance_semantics'][instance_fg]], 12, axis=0)
133 |     v.add_lines(f'GT BBs', start, end, bbs_colors, visible=False)
134 |     v.save(os.path.join(out_path,'pyviz3d'), verbose=False)
135 |     print ('Pyviz visualization to ', os.path.join(out_path,'pyviz3d'))
136 | 
137 | def assign_semantics_to_proposals (pred_semantics, proposal_masks):
138 |     # Use majoring vote to determind the semantic of proposals
139 |     proposal_semantics = []
140 |     for mask in proposal_masks:
141 |         mask = mask > 0
142 |         semantic_id = np.bincount (pred_semantics [mask]).argmax ()
143 |         proposal_semantics.append (semantic_id)
144 |     return np.array (proposal_semantics)
145 | 
146 | def clustering_for_background (pred_semantics, coords, normals):
147 |     ''' For the S3DIS scene:
148 |         - we use DBSCAN to cluster the instances of walls
149 |         - we use the semantic prediction to get the the floor insance and the ceiling instance (only 1 ceiling and 1 floor in each scene)
150 |         - Non-maximum-clustering / bounding boxes are not used / predicted for walls / floors / ceiling
151 |     '''
152 |     pred_instances = np.zeros_like (pred_semantics).astype (np.int32)
153 |     # instance ID of ceiling
154 |     pred_instances [pred_semantics == 0] = 1
155 |     # instance ID of floor
156 |     pred_instances [pred_semantics == 1] = 2
157 | 
158 |     wall_mask = pred_semantics == 2
159 |     wall_coords = coords [wall_mask]
160 |     wall_normals = normals [wall_mask] * 2 # priotizing normal over coordinates
161 |     wall_features = np.concatenate ([wall_coords, wall_normals], 1)
162 | 
163 |     # wall_clustering = MeanShift(bandwidth=2, n_jobs=16).fit(wall_features)
164 |     wall_clustering = DBSCAN(eps=0.35, min_samples=10, n_jobs=16).fit(wall_features)
165 |     wall_clustering.labels_ = wall_clustering.labels_ + 4
166 |     wall_instances = wall_clustering.labels_
167 |     
168 |     # remove small noises
169 |     bg_prop_ids, bg_prop_cnts = np.unique (wall_instances, return_counts=True)
170 |     wall_id_small_mask = bg_prop_cnts < 3000
171 |     small_prop_ids = bg_prop_ids [wall_id_small_mask]
172 |     wall_small_mask = np.isin (wall_instances, small_prop_ids)
173 |     wall_instances [wall_small_mask] = -1
174 | 
175 |     pred_instances [wall_mask] = wall_instances
176 | 
177 |     return pred_instances
178 | 
179 | def s3dis_eval (pred_labels, gt_labels):
180 | 
181 |     num_room = len(gt_labels)
182 | 
183 |     # Initialize...
184 |     # acc and macc
185 |     total_true = 0
186 |     total_seen = 0
187 |     true_positive_classes = np.zeros(NUM_CLASSES)
188 |     positive_classes = np.zeros(NUM_CLASSES)
189 |     gt_classes = np.zeros(NUM_CLASSES)
190 |     # mIoU
191 |     ious = np.zeros(NUM_CLASSES)
192 |     totalnums = np.zeros(NUM_CLASSES)
193 |     # precision & recall
194 |     total_gt_ins = np.zeros(NUM_CLASSES)
195 |     at = 0.5
196 |     tpsins = [[] for itmp in range(NUM_CLASSES)]
197 |     fpsins = [[] for itmp in range(NUM_CLASSES)]
198 |     # mucov and mwcov
199 |     all_mean_cov = [[] for itmp in range(NUM_CLASSES)]
200 |     all_mean_weighted_cov = [[] for itmp in range(NUM_CLASSES)]
201 | 
202 | 
203 |     for i in range(num_room):
204 |         data_label = pred_labels [i]
205 |         pred_ins = pred_labels [i]["instances"]
206 |         pred_sem = pred_labels [i]["semantics"]
207 |         gt_label = gt_labels [i]
208 |         gt_ins = gt_label ["instances"]
209 |         gt_sem = gt_label ["semantics"]
210 | 
211 |         # semantic acc
212 |         total_true += np.sum(pred_sem == gt_sem)
213 |         total_seen += pred_sem.shape[0]
214 | 
215 |         # pn semantic mIoU
216 |         for j in range(gt_sem.shape[0]):
217 |             gt_l = int(gt_sem[j])
218 |             pred_l = int(pred_sem[j])
219 |             gt_classes[gt_l] += 1
220 |             positive_classes[pred_l] += 1
221 |             true_positive_classes[gt_l] += int(gt_l==pred_l)
222 | 
223 |         # instance
224 |         un = np.unique(pred_ins)
225 |         pts_in_pred = [[] for itmp in range(NUM_CLASSES)]
226 |         for ig, g in enumerate(un):  # each object in prediction
227 |             if g == -1:
228 |                 continue
229 |             tmp = (pred_ins == g)
230 |             sem_seg_i = int(stats.mode(pred_sem[tmp])[0])
231 |             pts_in_pred[sem_seg_i] += [tmp]
232 | 
233 |         un = np.unique(gt_ins)
234 |         pts_in_gt = [[] for itmp in range(NUM_CLASSES)]
235 |         for ig, g in enumerate(un):
236 |             tmp = (gt_ins == g)
237 |             sem_seg_i = int(stats.mode(gt_sem[tmp])[0])
238 |             pts_in_gt[sem_seg_i] += [tmp]
239 |         # NOTE: 
240 |         # pts_in_gt: (Nclass, Npoints) - binary array, gt instance list of each gt class
241 |         # pts_in_pred: (Nclass, Npoints) - binary array, pred instance list of each pred class
242 | 
243 |         # instance mucov & mwcov
244 |         for i_sem in range(NUM_CLASSES):
245 |             sum_cov = 0
246 |             mean_cov = 0
247 |             mean_weighted_cov = 0
248 |             num_gt_point = 0
249 |             for ig, ins_gt in enumerate(pts_in_gt[i_sem]):
250 |                 ovmax = 0.
251 |                 num_ins_gt_point = np.sum(ins_gt)
252 |                 num_gt_point += num_ins_gt_point
253 |                 for ip, ins_pred in enumerate(pts_in_pred[i_sem]):
254 |                     union = (ins_pred | ins_gt)
255 |                     intersect = (ins_pred & ins_gt)
256 |                     iou = float(np.sum(intersect)) / np.sum(union)
257 | 
258 |                     if iou > ovmax:
259 |                         ovmax = iou
260 |                         ipmax = ip
261 | 
262 |                 sum_cov += ovmax
263 |                 mean_weighted_cov += ovmax * num_ins_gt_point
264 | 
265 |             if len(pts_in_gt[i_sem]) != 0:
266 |                 mean_cov = sum_cov / len(pts_in_gt[i_sem])
267 |                 all_mean_cov[i_sem].append(mean_cov)
268 | 
269 |                 mean_weighted_cov /= num_gt_point
270 |                 all_mean_weighted_cov[i_sem].append(mean_weighted_cov)
271 | 
272 | 
273 |         # instance precision & recall
274 |         for i_sem in range(NUM_CLASSES):
275 |             tp = [0.] * len(pts_in_pred[i_sem])
276 |             fp = [0.] * len(pts_in_pred[i_sem])
277 |             gtflag = np.zeros(len(pts_in_gt[i_sem]))
278 |             total_gt_ins[i_sem] += len(pts_in_gt[i_sem])
279 | 
280 |             for ip, ins_pred in enumerate(pts_in_pred[i_sem]):
281 |                 ovmax = -1.
282 | 
283 |                 for ig, ins_gt in enumerate(pts_in_gt[i_sem]):
284 |                     union = (ins_pred | ins_gt)
285 |                     intersect = (ins_pred & ins_gt)
286 |                     iou = float(np.sum(intersect)) / np.sum(union)
287 | 
288 | 
289 |                     if iou > ovmax:
290 |                         ovmax = iou
291 |                         igmax = ig
292 | 
293 |                 if ovmax >= at:
294 |                         tp[ip] = 1  # true
295 |                 else:
296 |                     fp[ip] = 1  # false positive
297 | 
298 |             tpsins[i_sem] += tp
299 |             fpsins[i_sem] += fp
300 | 
301 | 
302 |     MUCov = np.zeros(NUM_CLASSES)
303 |     MWCov = np.zeros(NUM_CLASSES)
304 |     for i_sem in range(NUM_CLASSES):
305 |         MUCov[i_sem] = np.mean(all_mean_cov[i_sem])
306 |         MWCov[i_sem] = np.mean(all_mean_weighted_cov[i_sem])
307 | 
308 |     precision = np.zeros(NUM_CLASSES)
309 |     recall = np.zeros(NUM_CLASSES)
310 |     for i_sem in range(NUM_CLASSES):
311 |         tp = np.asarray(tpsins[i_sem]).astype(np.float)
312 |         fp = np.asarray(fpsins[i_sem]).astype(np.float)
313 |         tp = np.sum(tp)
314 |         fp = np.sum(fp)
315 | 
316 |         rec = tp / total_gt_ins[i_sem]
317 |         prec = tp / (tp + fp)
318 | 
319 |         precision[i_sem] = prec
320 |         recall[i_sem] = rec
321 | 
322 |     def log_string(out_str):
323 |         print(out_str)
324 | 
325 |     log_string('Instance Segmentation Precision: {}'.format(precision))
326 |     log_string('Instance Segmentation mPrecision: {}'.format(np.mean(precision)))
327 |     log_string('Instance Segmentation Recall: {}'.format(recall))
328 |     log_string('Instance Segmentation mRecall: {}'.format(np.mean(recall)))
329 | 
330 | 
331 | 
332 |     # semantic results
333 |     iou_list = []
334 |     for i in range(NUM_CLASSES):
335 |         iou = true_positive_classes[i]/float(gt_classes[i]+positive_classes[i]-true_positive_classes[i]) 
336 |         iou_list.append(iou)
337 | 
338 |     return np.mean(precision), np.mean(recall), precision, recall


--------------------------------------------------------------------------------
/dataprocessing/s3dis.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | 
  4 | import open3d as o3d
  5 | import numpy as np
  6 | import pyviz3d.visualizer as viz
  7 | import glob
  8 | from natsort import natsorted
  9 | import os
 10 | import json
 11 | from sklearn.neighbors import NearestNeighbors
 12 | from scipy.spatial import cKDTree as KDTree
 13 | import time, math
 14 | import dataprocessing.augmentation as augmentation
 15 | import torch
 16 | 
 17 | def generate_color_map (max_ids):
 18 |     mapping = [[np.random.randint (255), np.random.randint (255), np.random.randint (255)] for _ in range (max_ids)]
 19 |     return np.array (mapping)
 20 | 
 21 | ID2NAME = {0:'ceiling', 1:'floor', 2:'wall', 3:'beam', 4:'column', 5:'window', 6:'door', 7:'table', 8:'chair', 9:'sofa', 10:'bookshelf', 11:'board', 12:'clutter'}
 22 | ID2NAME = [ID2NAME [i] for i in range (13)]
 23 | S3DIS_SEMANTICS_COLORS = np.array (
 24 |     [(174, 199, 232),  # ceiling
 25 |     (152, 223, 138),  # floor
 26 |     (31, 119, 180),   # wall
 27 |     (255, 187, 120),  # column
 28 |     (188, 189, 34),   # beam
 29 |     (140, 86, 75),    # window
 30 |     (255, 152, 150),  # door
 31 |     (214, 39, 40),    # table
 32 |     (197, 176, 213),  # chair
 33 |     (148, 103, 189),  # bookcase
 34 |     (196, 156, 148),  # sofa
 35 |     (23, 190, 207),   # board
 36 |     (178, 76, 76),]   # clutter
 37 | )
 38 | 
 39 | # WARNING: those arrays are used within the network
 40 | S3DIS_SEMANTIC_VALID_CLASS_IDS = np.array(range (13))
 41 | S3DIS_SEMANTIC_VALID_CLASS_IDS_torch = torch.Tensor(S3DIS_SEMANTIC_VALID_CLASS_IDS) 
 42 | S3DIS_INSTANCE_VALID_CLASS_IDS = np.array(range (13))
 43 | 
 44 | S3DIS_INSTANCE_VALID_CLASS_IDS_torch = torch.Tensor(S3DIS_INSTANCE_VALID_CLASS_IDS).long()
 45 | S3DIS_INSTANCE_ID2IDX = torch.zeros(13).fill_(-1).long()
 46 | S3DIS_INSTANCE_ID2IDX[S3DIS_INSTANCE_VALID_CLASS_IDS] = torch.arange(len(S3DIS_INSTANCE_VALID_CLASS_IDS)).long()
 47 | 
 48 | S3DIS_SEMANTIC_ID2IDX = torch.zeros(300).fill_(-100).long()
 49 | # Needed to map semantic ids to ones valid for scene segmentation (= valid classes W wall, ceiling, floor)
 50 | S3DIS_SEMANTIC_ID2IDX[S3DIS_SEMANTIC_VALID_CLASS_IDS] = torch.arange(len(S3DIS_SEMANTIC_VALID_CLASS_IDS)).long()
 51 | 
 52 | def get_scene_names (mode, cfg):
 53 |     scene_npy_pths = glob.glob (os.path.join (cfg.data_dir, 'Area_*/*.npy'))
 54 |     scene_names = [pth.split ('/')[-2] + '.' + pth.split ('/')[-1].split ('.')[0] for pth in scene_npy_pths]
 55 | 
 56 |     if mode == "train":
 57 |         valid_set_prefix = "Area_" + str (cfg.s3dis_split_fold)
 58 |         scene_names = [name for name in scene_names if valid_set_prefix not in name]
 59 |     if mode == 'val':
 60 |         valid_set_prefix = "Area_" + str (cfg.s3dis_split_fold)
 61 |         scene_names = [name for name in scene_names if valid_set_prefix in name]
 62 |     
 63 |     return scene_names
 64 | 
 65 | def refine_segments (segments, counts, positions, minVerts=20):
 66 |     """ merge too small segments to large nearby segment
 67 |     """
 68 |     segcount_per_point = counts [segments]
 69 |     large_enough = segcount_per_point > minVerts
 70 |     too_small = segcount_per_point <= minVerts
 71 |     
 72 |     nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(positions[large_enough])
 73 |     dist, qualified_2_disqualified = nbrs.kneighbors(positions[too_small])
 74 |     disqualified2qualified = qualified_2_disqualified[:,0]
 75 |     segments [too_small] = segments[large_enough] [disqualified2qualified]
 76 |     
 77 |     return segments
 78 | 
 79 | def semantics_to_forground_mask (semantics, cfg=None):
 80 |     if cfg.ignore_wall_ceiling_floor:
 81 |         return semantics > 2
 82 |     return semantics >= 0
 83 |     
 84 | def is_foreground (sem):
 85 |     return sem > 2
 86 | 
 87 | def compute_avg_centers(positions, instance_labels):
 88 |     per_point_centers = np.zeros((instance_labels.shape[0], 3), dtype='float32')
 89 |     per_point_offsets = np.zeros((instance_labels.shape[0], 3), dtype='float32')
 90 |     per_point_center_distances = np.zeros((instance_labels.shape[0], 1), dtype='float32')
 91 | 
 92 |     for instance_id in set(instance_labels):
 93 |         instance_mask = (instance_id == instance_labels)
 94 | 
 95 |         # compute AVG centers
 96 |         instance_center = np.mean(positions[instance_mask], axis=0)
 97 |         per_point_centers[instance_mask] = instance_center
 98 |         per_point_offsets[instance_mask] = per_point_centers[instance_mask] - positions[instance_mask]
 99 |         per_point_center_distances = np.linalg.norm(per_point_offsets, axis=1)
100 | 
101 |     return per_point_centers, per_point_center_distances
102 | 
103 | def compute_bounding_box(positions, instance_labels, semantic_labels):
104 |     per_point_bb_centers = np.zeros((instance_labels.shape[0], 3), dtype='float32')
105 |     per_point_bb_offsets = np.zeros((instance_labels.shape[0], 3), dtype='float32')
106 |     per_point_bb_bounds = np.zeros((instance_labels.shape[0], 3), dtype='float32')
107 |     per_point_bb_center_distances = np.zeros((instance_labels.shape[0], 1), dtype='float32')
108 |     per_point_bb_radius = np.zeros((instance_labels.shape[0], 1), dtype='float32')
109 | 
110 |     instances = np.unique(instance_labels)
111 |     per_instance_semantics = np.zeros((len(instances)),  dtype='int32')
112 |     per_instance_bb_centers = np.zeros((len(instances), 3), dtype='float32')
113 |     per_instance_bb_bounds = np.zeros((len(instances), 3), dtype='float32')
114 |     per_instance_bb_radius = np.zeros((len(instances)), dtype='float32')
115 | 
116 |     for i, instance_id in enumerate(instances):
117 |         instance_mask = (instance_id == instance_labels)
118 |         instance_points = positions[instance_mask]
119 |         per_instance_semantics[i] = semantic_labels[instance_mask][0]
120 | 
121 |         # bb center
122 |         max_bounds = np.max(instance_points, axis=0)
123 |         min_bounds = np.min(instance_points, axis=0)
124 |         bb_center = (min_bounds + max_bounds) / 2
125 |         per_point_bb_centers[instance_mask] = bb_center
126 |         per_instance_bb_centers[i] = bb_center
127 | 
128 |         # bb bounds
129 |         bb_bounds = max_bounds - bb_center
130 |         per_point_bb_bounds[instance_mask] = bb_bounds
131 |         per_instance_bb_bounds[i] = bb_bounds
132 | 
133 |         # bb center offsets
134 |         offsets = bb_center - instance_points
135 |         per_point_bb_offsets[instance_mask] =  offsets
136 | 
137 |         # bb center distances
138 |         bb_center_distances = np.linalg.norm(offsets, axis=1)
139 |         per_point_bb_center_distances[instance_mask] = bb_center_distances.reshape((-1,1))
140 | 
141 |         # bb radius
142 |         radius = np.max(bb_center_distances).reshape((-1,1))
143 |         per_point_bb_radius[instance_mask] = radius
144 |         per_instance_bb_radius[i] = radius
145 | 
146 |     return per_point_bb_centers, per_point_bb_offsets, per_point_bb_bounds, \
147 |            per_point_bb_center_distances, per_point_bb_radius, \
148 |            instances, per_instance_semantics, per_instance_bb_centers, per_instance_bb_bounds, per_instance_bb_radius
149 | 
150 | def seg2label (segments, label_ids):
151 |     # Use major voting to assign label for each segment
152 |     unique_segments_ids = np.unique(segments)
153 |     seg2labelID = np.zeros(np.max(unique_segments_ids) + 1, dtype='int32')
154 |     seg2labelID.fill(np.inf)
155 |     for seg_id in unique_segments_ids:
156 |         seg_mask = segments == seg_id
157 |         
158 |         seg_label_ids = label_ids[seg_mask]
159 |         counts = np.bincount (seg_label_ids)
160 |         most_frequent_labels = np.argmax(counts)
161 |         
162 |         seg2labelID[seg_id] = most_frequent_labels
163 |     per_point_segment_labelID = seg2labelID [segments]
164 |     return per_point_segment_labelID, seg2labelID
165 | 
166 | def read_scene_from_numpy (scene_name, cfg, do_augmentations=False):
167 |     """read_scene_from_numpy: read scene informationfrom numpy
168 | 
169 |     :return
170 |         scene: dictionary containing
171 |             name: name of the scene informat [area].[place]
172 |             positions: 3D-float position of each vertex/point
173 |             normals: 3D-float normal of each vertex/point (as computed by open3d) 
174 |             colors: 3D-float color of each vertex/point [0..1]
175 |             segments: segments id of each vertex/point: N x 1 int32
176 |         labels:  dictionary containing
177 |             semantic_labels: N x 1 int32
178 |             instance_labels: N x 1 int32
179 |             centers: N x 3 float32
180 |             center_distances: N x 1 float32
181 |     """
182 |     scene_npy_path = os.path.join (cfg.data_dir, scene_name.split ('.') [0] + '/' + scene_name [len("Area_*") + 1:] + '.normals.instance.npy')
183 |     data = np.load (scene_npy_path)
184 |     
185 |     positions = data [:,:3].astype (np.float32)
186 |     colors = data [:,3:6].astype (np.float) / 255
187 |     positions = positions - positions.mean (0)
188 |     positions[:, 2] -= np.min (positions [:, 2])
189 |     normals = data [:,6:9].astype (np.float)
190 |     semantics = data [:, -2].astype (np.int32)
191 |     instances = data [:, -1].astype (np.int32)
192 | 
193 |     # Basic augmentations (rotation, scaling, flipping x-y)
194 |     pcd = o3d.geometry.PointCloud()
195 |     pcd.points = o3d.utility.Vector3dVector(positions.astype (np.float32))
196 |     pcd.normals = o3d.utility.Vector3dVector(normals.astype (np.float32))
197 | 
198 |     if cfg and cfg.augmentation and do_augmentations:
199 |         # rotation around x,y,z
200 |         if np.random.rand () < cfg.rotation_aug[0]:
201 |             augmentation.rotate_mesh (pcd)
202 |         if np.random.rand () < cfg.scaling_aug [0]:
203 |             augmentation.scale_mesh (pcd, cfg.scaling_aug[1], cfg.scaling_aug[2])
204 |         # rotation around z (height) in 90 degree angles
205 |         if cfg.rotation_90_aug:
206 |             augmentation.rotate_mesh_90_degree(pcd)
207 |         if np.random.rand () < cfg.flipping_aug:
208 |             Rt = np.eye (4)
209 |             Rt [0][0] *= -1 # Randomly x-axis flip
210 |             pcd.transform (Rt)
211 |         if np.random.rand () < cfg.flipping_aug:
212 |             Rt = np.eye (4)
213 |             Rt [1][1] *= - 1 # Randomly y-axis flip
214 |             pcd.transform (Rt)
215 |     
216 |     positions = np.asarray(pcd.points)
217 |     normals = np.asarray (pcd.normals)
218 | 
219 |     # Apply geometric augmentation
220 |     if do_augmentations and cfg.augmentation:
221 |         if np.random.rand () < cfg.elastic_distortion:
222 |             elastic_distortion = augmentation.ElasticDistortion ()
223 |             positions = elastic_distortion (positions)
224 |             pcd.points = o3d.utility.Vector3dVector(positions)
225 | 
226 |         # elastic distoriton HAIS setting
227 |         if np.random.rand () < cfg.elastic_distortion_HAIS:
228 |             positions -= positions.mean (0)
229 |             positions = augmentation.HAIS_elastic(positions, 6 * (1/cfg.voxel_size) // 50, 40 * (1/cfg.voxel_size) / 50)
230 |             positions = augmentation.HAIS_elastic(positions, 20 * (1/cfg.voxel_size) // 50, 160 * (1/cfg.voxel_size) / 50)
231 |             positions -= positions.mean (0)
232 |             positions[:, 2] -= np.min (positions [:, 2])
233 |             pcd.points = o3d.utility.Vector3dVector(positions)
234 |         
235 |         if np.random.rand () < cfg.position_jittering [0]:
236 |             displacements = cfg.position_jittering [1] * np.random.randn (*positions.shape)
237 |             positions = positions + displacements
238 |             pcd.points = o3d.utility.Vector3dVector(positions)
239 | 
240 |         if cfg.HAIS_jitter_aug:
241 |             positions -= positions.mean(0)
242 |             pcd.points = o3d.utility.Vector3dVector(positions)
243 |             Rt = np.eye (4)
244 |             m = np.eye(3)
245 |             m += np.random.randn(3, 3) * 0.1
246 |             theta = np.random.rand() * 2 * math.pi
247 |             m = np.matmul(m, [[math.cos(theta), math.sin(theta), 0], [-math.sin(theta), math.cos(theta), 0], [0, 0, 1]])  # rotation
248 |             Rt[:3,:3] = m
249 |             pcd.transform (Rt)
250 |             positions = np.asarray(pcd.points)
251 |             positions[:, 2] -= np.min (positions [:, 2])
252 |             pcd.points = o3d.utility.Vector3dVector(positions)
253 | 
254 |     # Color transformations
255 |     if do_augmentations and cfg.augmentation:
256 |         # Contrast auto contrast
257 |         if np.random.rand () < cfg.chromatic_auto_contrast:
258 |             chromatic_auto_contrast = augmentation.ChromaticAutoContrast ()
259 |             colors = chromatic_auto_contrast (colors)
260 | 
261 |         # Chromatic translation
262 |         if np.random.rand () < cfg.chromatic_translation [0]:
263 |             trans_range_ratio = cfg.chromatic_translation [1]
264 |             chromatic_translation = augmentation.ChromaticTranslation (trans_range_ratio)
265 |             colors = chromatic_translation (colors)
266 | 
267 |         # Chromatic Jitter
268 |         if np.random.rand () < cfg.color_jittering_aug [0]:
269 |             colors = augmentation.color_jittering (colors, -cfg.color_jittering_aug [1], cfg.color_jittering_aug [1])
270 | 
271 |         # Random Brightness
272 |         if np.random.rand () < cfg.random_brightness [0]:
273 |             colors = augmentation.random_brightness (colors, cfg.random_brightness [1])
274 | 
275 |         if cfg.apply_hue_aug:
276 |             colors = augmentation.apply_hue_aug(colors)
277 | 
278 | 
279 |     positions = np.asarray(pcd.points)
280 |     normals = np.asarray (pcd.normals)
281 | 
282 |     if cfg.superpoint_algo == 'learned_superpoint':
283 |         PATH_SEGMENTS_LABELS_INFO = os.path.join (cfg.data_dir, 'segment_labels/learned_superpoint_graph_segmentations/')
284 | 
285 |     path_scene_segments_labels_info = PATH_SEGMENTS_LABELS_INFO + '/' + scene_name + '.npy'
286 |     segments_labels_info = np.load (path_scene_segments_labels_info, allow_pickle=True).item ()
287 |     segments = segments_labels_info ['segments']
288 |     per_point_segment_instanceID = segments_labels_info ['per_point_segment_instanceID']
289 |     seg2instanceID = segments_labels_info ['seg2instanceID']
290 |     per_point_segment_semanticID = segments_labels_info ['per_point_segment_semanticID']
291 |     seg2semanticID = segments_labels_info ['seg2semanticID']
292 | 
293 |     if cfg.point_sampling_rate is not None:
294 |         num_scene_points = len (positions)
295 |         sampling_mask = np.zeros (num_scene_points, dtype=np.bool)
296 |         if not do_augmentations:
297 |             # During evaluation or testing, sample every 4 points
298 |             sampling_point_ids = np.array (range (num_scene_points)) [::4]
299 |         else:
300 |             # During training, sample points randomly with an user input sampling rate
301 |             sampling_point_ids = np.random.choice (range (num_scene_points), int (num_scene_points * cfg.point_sampling_rate), replace=False)
302 |         sampling_mask [sampling_point_ids] = True
303 | 
304 |         segments = segments [sampling_mask]
305 | 
306 |         # Remaping contiguous segments ID
307 |         per_point_segment_semanticID = per_point_segment_semanticID [sampling_mask]
308 |         per_point_segment_instanceID = per_point_segment_instanceID [sampling_mask]
309 |         positions = positions [sampling_mask]
310 |         colors = colors [sampling_mask]
311 |         normals = normals [sampling_mask]
312 |         instances = instances [sampling_mask]
313 |         semantics = semantics [sampling_mask]
314 |                
315 |     scene = {
316 |         'name': scene_name,
317 |         'positions': positions,
318 |         'colors': colors,
319 |         'normals': normals,
320 |         'segments': segments,
321 |     }
322 |     labels = {
323 |         'instances': instances,
324 |         'semantics': semantics,
325 |         'per_point_segment_instanceID': per_point_segment_instanceID,
326 |         'per_point_segment_semanticID': per_point_segment_semanticID,
327 |         'seg2instanceID': seg2instanceID,
328 |         'seg2semanticID': seg2semanticID,
329 |     }
330 |     return scene, labels
331 | 
332 | def process_scene(scene_name, mode, configuration, do_augmentations=False):
333 |     """Process scene: extracts ground truth labels (instance and semantics) and computes centers
334 | 
335 |     :return
336 |         scene: dictionary containing
337 |             positions: 3D-float position of each vertex/point
338 |             normals: 3D-float normal of each vertex/point (as computed by open3d)
339 |             colors: 3D-float color of each vertex/point [0..1]
340 |         labels:  dictionary containing
341 |             semantic_labels: N x 1 int32
342 |             instance_labels: N x 1 int32
343 |             centers: N x 3 float32
344 |             center_distances: N x 1 float32
345 |     """
346 |     cfg = configuration
347 | 
348 |     # Read point clouds, extract semantic & instance labels, compute centers
349 |     scene, labels = read_scene_from_numpy(scene_name, configuration, do_augmentations=do_augmentations)
350 |     centers, center_distances = compute_avg_centers(scene ['positions'], labels ['instances'])
351 |     
352 |     bb_centers, bb_offsets, bb_bounds, bb_center_distances, bb_radius, \
353 |         unique_instances, per_instance_semantics, per_instance_bb_centers, per_instance_bb_bounds, per_instance_bb_radius \
354 |         = compute_bounding_box(scene ['positions'], labels['instances'], labels['semantics'])
355 | 
356 |     # make sure the unique instance ids can be used as array indices for 'per_instance_XX'
357 |     assert np.all(unique_instances == range(len(unique_instances)))
358 | 
359 |     labels ['per_instance_bb_radius'] = per_instance_bb_radius
360 |     labels ['per_instance_bb_bounds'] = per_instance_bb_bounds
361 |     labels ['per_instance_bb_centers'] = per_instance_bb_centers
362 |     labels ['per_instance_semantics'] = per_instance_semantics
363 |     labels ['unique_instances'] = unique_instances
364 |     labels ['bb_radius'] = bb_radius
365 |     labels ['bb_center_distances'] = bb_center_distances
366 |     labels ['seg2inst'] = labels ['seg2instanceID']
367 |     labels ['bb_bounds'] = bb_bounds
368 |     labels ['bb_offsets'] = bb_offsets
369 |     labels ['bb_centers'] = bb_centers
370 |     labels ['center_distances'] = center_distances
371 |     labels ['centers'] = centers
372 | 
373 |     return scene, labels
374 | 


--------------------------------------------------------------------------------
/models/training.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | 
  4 | import sys
  5 | sys.path.append('.')
  6 | 
  7 | import torch, torch.nn as nn
  8 | import torch.optim as optim
  9 | from torch.nn import functional as F
 10 | import os
 11 | from torch.utils.tensorboard import SummaryWriter
 12 | from glob import glob
 13 | import numpy as np
 14 | import time
 15 | from collections import defaultdict
 16 | from pynvml import *
 17 | from utils.util import *
 18 | from models.model import Model
 19 | from models.evaluation import Evaluater
 20 | import torch.multiprocessing as mp
 21 | import torch.distributed as dist
 22 | import config_loader as cfg_loader
 23 | from models.dataloader import ScanNet
 24 | from models.dataloader import ARKitScenes
 25 | from models.dataloader import S3DIS
 26 | 
 27 | class Trainer(object):
 28 |     # set val_dataset to None if no validation should be performed
 29 |     def __init__(self, model, train_dataloader, val_dataset, cfg, rank = None):
 30 |         self.cfg = cfg
 31 |         self.model = model
 32 |         self.rank = rank
 33 |         self.main_process = not cfg.multigpu or rank == 0
 34 | 
 35 |         model_params = self.model.parameters()
 36 | 
 37 |         if cfg.optimizer == 'Adam':
 38 |             self.optimizer = optim.Adam(model_params, lr=cfg.lr)
 39 |         if cfg.optimizer == 'Adadelta':
 40 |             self.optimizer = optim.Adadelta(model_params)
 41 |         if cfg.optimizer == 'RMSprop':
 42 |             self.optimizer = optim.RMSprop(model_params, momentum=0.9)
 43 | 
 44 |         self.epoch = 0
 45 |         self.train_dataloader = train_dataloader
 46 |         self.val_dataset = val_dataset
 47 |         self.val_min = None
 48 |         self.writer = None
 49 |         if self.main_process:
 50 |             os.makedirs(self.cfg.checkpoint_path, exist_ok=True)
 51 |             self.writer = SummaryWriter(os.path.dirname(__file__) + '/../experiments/tf_summaries/{}/'.format(cfg.exp_name))
 52 |             # include copy of all variables and the configuration file into the experiments folder
 53 |             f = os.path.join(cfg.exp_path, 'args.txt')
 54 |             with open(f, 'w') as file:
 55 |                 for arg in sorted(vars(cfg)):
 56 |                     attr = getattr(cfg, arg)
 57 |                     file.write('{} = {}\n'.format(arg, attr))
 58 |             if cfg.config is not None:
 59 |                 f = os.path.join(cfg.exp_path, 'config.txt')
 60 |                 with open(f, 'w') as file:
 61 |                     file.write(open(cfg.config, 'r').read())
 62 | 
 63 |     def train_step(self, batch):
 64 |         self.model.train()
 65 |         self.optimizer.zero_grad()
 66 |         loss_dict = self.model.compute_loss(batch, self.epoch)
 67 |         optimization_loss = loss_dict['optimization_loss']
 68 |         optimization_loss.backward()
 69 |         self.optimizer.step()
 70 |         return loss_dict
 71 | 
 72 |     def train_model(self, epochs):
 73 |         start, training_time, iteration_num = self.load_checkpoint()
 74 | 
 75 |         iteration_start_time = time.time()
 76 |         for rel_epoch, epoch in enumerate(range(start, epochs)):
 77 |             if self.cfg.multigpu:
 78 |                 self.train_dataloader.sampler.set_epoch(epoch)
 79 |             self.epoch = epoch
 80 |             losses_epoch = defaultdict(int)  # default values are 0
 81 |             print(f'Start epoch {epoch}')
 82 | 
 83 |             if self.cfg.use_lr_scheduler:
 84 |                 cosine_lr_after_step(self.optimizer, self.cfg.lr, epoch,
 85 |                                      self.cfg.lr_scheduler_start_epoch, self.cfg.lr_scheduler_end_epoch)
 86 |                 if self.main_process:
 87 |                     self.writer.add_scalar('LR', self.optimizer.param_groups[0]['lr'], iteration_num)
 88 |                     if epoch >= self.cfg.lr_scheduler_end_epoch:
 89 |                         print('END TRAINING --- LR scheduling end reached. Stop training.')
 90 |                         return
 91 | 
 92 |             time_prepare_training_batch_start = time.time()
 93 |             for batch_num, batch in enumerate(self.train_dataloader):
 94 | 
 95 |                 # save model
 96 |                 iteration_duration = time.time() - iteration_start_time
 97 | 
 98 |                 #---------- EVALUATE and SAVE CHECKPOINT ------------------------#
 99 |                 if self.main_process and batch_num == 0 and not self.cfg.skip_first_eval:
100 |                     training_time += iteration_duration
101 |                     iteration_start_time = time.time()
102 | 
103 |                     # save checkpoints after every ckpt_every epochs
104 |                     if epoch % self.cfg.ckpt_every == 0:  # model is large, so save only now and then
105 |                         print('Saving checkpoint...')
106 |                         save_time = time.time()
107 |                         self.save_checkpoint(epoch, training_time, iteration_num)
108 |                         print(f'Done saving checkpoint ({str(time.time() - save_time)[:5]} s)')
109 | 
110 | 
111 |                     val_losses = self.compute_val_loss(self.cfg.num_eval_batches)
112 |                     print("VAL losses: {} ".format(val_losses))
113 |                     # self.writer.add_scalars('Losses/val', val_losses, iteration_num)
114 |                     for k, v in val_losses.items():
115 |                         if not k[:11] == "mask_scores":
116 |                             self.writer.add_scalar('val/' + k, v, iteration_num)
117 |                         else:
118 |                             self.writer.add_scalar('val_mask_scores/' + k[12:], v, iteration_num)
119 |                     val_loss = val_losses['optimization_loss']
120 | 
121 |                     # Evaluation after every eval_every epochs
122 |                     if self.val_dataset and (epoch % self.cfg.val_every == 0 or epoch % self.cfg.eval_every == 0):
123 |                         print('start computing validation loss')
124 |                         # free memory for validation computation
125 |                         if 'loss_dict' in locals():  # just remove it if it exists
126 |                             del loss_dict
127 | 
128 |                         # eval not needed for very early models, early models take long to eval
129 |                         if epoch % self.cfg.eval_every == 0 and epoch >= 250 and self.cfg.full_model \
130 |                                 and self.cfg.dataset_name == 'scannet':
131 | 
132 |                             import dataprocessing.scannet as scannet
133 |                             semantic_valid_class_ids_torch = scannet.SCANNET_SEMANTIC_VALID_CLASS_IDS_torch
134 |                             is_foreground = scannet.is_foreground
135 |                             semantic_id2idx = scannet.SCANNET_SEMANTIC_ID2IDX
136 |                             instance_id2idx = scannet.SCANNET_INSTANCE_ID2IDX
137 | 
138 |                             val_model = Model(self.cfg, semantic_valid_class_ids_torch, semantic_id2idx, instance_id2idx, is_foreground)
139 |                             predictor = Evaluater(val_model, self.cfg)
140 |                             ap_all, ap_50, ap_25 = predictor.eval(val_dataset)
141 |                             for ap_str, ap in [('ap_all', ap_all), ('ap_50', ap_50), ('ap_25', ap_25)] :
142 |                                 self.writer.add_scalar('val/' + ap_str, ap, iteration_num)
143 | 
144 |                         if self.val_min is None:
145 |                             self.val_min = val_loss
146 | 
147 |                         if val_loss < self.val_min:
148 |                             self.val_min = val_loss
149 |                             for path in glob(self.cfg.exp_path + 'val_min=*'):
150 |                                 os.remove(path)
151 |                             np.save(self.cfg.exp_path + 'val_min=checkpoint_{}h:{}m:{}s_{}.tar'
152 |                                     .format(*[*convertSecs(training_time),training_time]), [epoch, iteration_num, val_loss])
153 |                 self.cfg.skip_first_eval = False
154 | 
155 |                 # Compute time to prepare batch; this time is reset at the end of this for-loop
156 |                 time_prepare_training_batch_duration = time.time() - time_prepare_training_batch_start
157 |                 print(f'Time to prepare batch: {time_prepare_training_batch_duration:.3f}')
158 |                 if self.main_process:
159 |                     self.writer.add_scalar('time/prepare_training_batch', time_prepare_training_batch_duration,
160 |                                            iteration_num)
161 | 
162 |                 # Optimize model
163 |                 time_training_step_start = time.time()
164 |                 loss_dict = self.train_step(batch)
165 |                 time_training_step_duration = time.time() - time_training_step_start
166 |                 if self.main_process:
167 |                     self.writer.add_scalar('time/training_step', time_training_step_duration, iteration_num)
168 | 
169 |                 # Transform losses to single values
170 |                 for k, v in loss_dict.items():
171 |                     losses_epoch[k] += v.item()
172 | 
173 |                 current_iteration = batch_num + epoch * len(self.train_dataloader)
174 |                 current_losses = {k: str(v.item())[:6] for k, v in loss_dict.items()}
175 |                 print(f'{current_iteration} dt:{time_training_step_duration:.3f} Current losses: {current_losses}')
176 | 
177 |                 if self.main_process:
178 |                     # LOGGING GPU STATISTICS
179 |                     # in order to manage the unstable memory usage of ME (defined on GPU 0 here)
180 |                     nvmlInit()
181 |                     h = nvmlDeviceGetHandleByIndex(0)
182 |                     info_before = nvmlDeviceGetMemoryInfo(h)
183 |                     # EMPTY CACHED MEMORY
184 |                     torch.cuda.empty_cache()
185 |                     info_after = nvmlDeviceGetMemoryInfo(h)
186 |                     for k, v in {'total MB': info_before.total / 1024 ** 2,
187 |                                  'free MB': info_before.free / 1024 ** 2,
188 |                                  'used MB': info_before.used / 1024 ** 2}.items():
189 |                         self.writer.add_scalar('gpu memory usage/' + k, v, iteration_num)
190 | 
191 |                     for k, v in {'total MB': info_before.total / 1024 ** 2,
192 |                                  'free after emptying MB': info_after.used / 1024 ** 2,
193 |                                  'used after emptying MB': info_after.used / 1024 ** 2}.items():
194 |                         self.writer.add_scalar('gpu memory usage (emptied cache)/' + k, v, iteration_num)
195 | 
196 |                 # how many batches we had overall - used for logging
197 |                 iteration_num += 1
198 |                 time_prepare_training_batch_start = time.time()
199 | 
200 |             if self.main_process:
201 |                 # self.writer.add_scalar('training loss last batch', loss, epoch)
202 |                 # compute AVG losses
203 |                 for k, v in losses_epoch.items():
204 |                     losses_epoch[k] = v / len(self.train_dataloader)
205 | 
206 |                 # self.writer.add_scalars(f'Losses/train', losses_epoch, iteration_num)
207 |                 for k, v in losses_epoch.items():
208 |                     if not k[:11] == "mask_scores":
209 |                         self.writer.add_scalar('train/' + k, v, iteration_num)
210 |                     else:
211 |                         self.writer.add_scalar('train_mask_scores/' + k[12:], v, iteration_num)
212 | 
213 |                 self.writer.add_scalar('Epoch', epoch, iteration_num)
214 |                 print('EPOCH AVG:', losses_epoch)
215 | 
216 |     def save_checkpoint(self, epoch, training_time, iteration_num):
217 |         path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format(*[*convertSecs(training_time), training_time])
218 |         if not os.path.exists(path):
219 |             save_dict = { 
220 |                         'training_time': training_time,'epoch': epoch, 'iteration_num': iteration_num,
221 |                         'model_state_dict': self.model.state_dict(),
222 |                         'optimizer_state_dict': self.optimizer.state_dict(),
223 |                         }
224 |             torch.save(save_dict, path)
225 | 
226 |     def load_checkpoint(self, load_idx=-1, checkpoint=None):
227 |         time_start = time.time()
228 |         checkpoints = glob(self.cfg.checkpoint_path+'/*')
229 | 
230 |         if len(checkpoints) == 0:
231 |             print('No checkpoints found at {}'.format(self.cfg.checkpoint_path))
232 |             return 0, 0, 0
233 |         if checkpoint is None:
234 |             checkpoints = [os.path.splitext(os.path.basename(path))[0].split('_')[-1] for path in checkpoints]
235 |             checkpoints = np.array(checkpoints, dtype=float)
236 |             checkpoints = np.sort(checkpoints)
237 |             path = self.cfg.checkpoint_path + 'checkpoint_{}h:{}m:{}s_{}.tar'.format(*[*convertSecs(checkpoints[load_idx]), checkpoints[load_idx]])
238 |         else:
239 |             path = self.cfg.checkpoint_path + '{}.tar'.format(checkpoint)
240 |         print('Loading checkpoint from: {}'.format(path))
241 |         checkpoint = torch.load(path)
242 |         if self.cfg.apple_warmstart:
243 |             model_dict = self.model.state_dict()
244 |             # 1. filter out unnecessary keys
245 |             pretrained_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if (k != "mlp_semantics.6.kernel" and k != "mlp_semantics.6.bias")}
246 |             # 2. overwrite entries in the existing state dict
247 |             model_dict.update(pretrained_dict)
248 |             # 3. load the new state dict
249 |             checkpoint['model_state_dict'] = model_dict
250 |         missing_keys, unexpected_keys = self.model.load_state_dict(checkpoint['model_state_dict'],
251 |                                                   strict= not self.cfg.loose_model_loading)
252 |         if len(missing_keys) == 0 and not self.cfg.apple_warmstart:
253 |             self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
254 | 
255 |         epoch = checkpoint['epoch']
256 |         training_time = checkpoint['training_time']
257 |         iteration_num = checkpoint['iteration_num']
258 | 
259 |         self.epoch = epoch
260 |         print(f'Loaded checkpoint in {time.time() - time_start:.3f} seconds')
261 |         return epoch, training_time, iteration_num
262 | 
263 |     def compute_val_loss(self, num_batches=5):
264 |         self.model.eval()
265 | 
266 |         val_losses = defaultdict(int) 
267 |         for _ in range(num_batches):
268 |             try:
269 |                 val_batch = self.val_data_iterator.next()
270 |             except:
271 |                 self.val_data_iterator = self.val_dataset.get_loader().__iter__()
272 |                 val_batch = self.val_data_iterator.next()
273 | 
274 |             with torch.no_grad():
275 |                 loss_dict = self.model.compute_loss( val_batch, self.epoch)
276 |             for k, v in loss_dict.items():
277 |                 val_losses[k] += v.item()
278 |             print("[VAL]: Current losses: {} ".format({k: v.item() for k, v in loss_dict.items()}))
279 |         # free memory from validation data
280 |         del val_batch, loss_dict
281 |         for k, v in val_losses.items():
282 |             val_losses[k] = v / num_batches
283 | 
284 |         return val_losses
285 | 
286 | def start_train(rank, cfg, num_devices, train_dataset, val_dataset):
287 |     dist.init_process_group(
288 |         backend="nccl",
289 |         init_method="tcp://127.0.0.1:33456",
290 |         world_size=num_devices,
291 |         rank=rank,
292 |     )
293 |     torch.cuda.set_device(rank)
294 |     model = Model(cfg, rank)
295 |     train_dataloader = train_dataset.get_loader_multi_gpu(rank=rank, world_size=num_devices)
296 |     trainer = Trainer(model, train_dataloader, val_dataset, cfg, rank)
297 |     trainer.train_model(10000)
298 | 
299 | if __name__ == '__main__':
300 |     cfg = cfg_loader.get_config()
301 | 
302 |     if cfg.dataset_name == 'scannet':
303 |         import dataprocessing.scannet as scannet
304 |         semantic_valid_class_ids_torch = scannet.SCANNET_SEMANTIC_VALID_CLASS_IDS_torch
305 |         is_foreground = scannet.is_foreground
306 |         semantic_id2idx = scannet.SCANNET_SEMANTIC_ID2IDX
307 |         instance_id2idx = scannet.SCANNET_INSTANCE_ID2IDX
308 | 
309 |         if not cfg.train_submission:
310 |             val_dataset = ScanNet('val', cfg)
311 |             train_dataset = ScanNet('train', cfg)
312 |         else:
313 |             val_dataset = None
314 |             train_dataset = ScanNet('train+val', cfg)
315 |     elif cfg.dataset_name == 'arkitscenes':
316 |         import dataprocessing.arkitscenes as arkitscenes
317 |         val_dataset = ARKitScenes('val', cfg, subsample_rate=cfg.subsample_rate)
318 |         train_dataset = ARKitScenes('train', cfg, subsample_rate=cfg.subsample_rate)
319 |         semantic_valid_class_ids_torch = arkitscenes.ARKITSCENES_SEMANTIC_VALID_CLASS_IDS_torch
320 |         semantic_id2idx = arkitscenes.ARKITSCENES_SEMANTIC_ID2IDX
321 |         instance_id2idx = arkitscenes.ARKITSCENES_INSTANCE_ID2IDX
322 |         is_foreground = arkitscenes.is_foreground
323 |     elif cfg.dataset_name == 's3dis':
324 |         import dataprocessing.s3dis as s3dis
325 |         val_dataset = S3DIS('val', cfg)
326 |         train_dataset = S3DIS('train', cfg)
327 |         semantic_valid_class_ids_torch = s3dis.S3DIS_SEMANTIC_VALID_CLASS_IDS_torch
328 |         semantic_id2idx = s3dis.S3DIS_SEMANTIC_ID2IDX
329 |         instance_id2idx = s3dis.S3DIS_INSTANCE_ID2IDX
330 |         is_foreground = s3dis.is_foreground
331 | 
332 |     if cfg.fixed_seed:
333 |         print('WARNING: fixed seed selected for training.')
334 | 
335 |     if cfg.multigpu:
336 |         import torch.multiprocessing as mp
337 |         mp.set_start_method('spawn') # said to be required in pytorch docs, for num_workers > 1 in dataloader
338 | 
339 |         num_devices = torch.cuda.device_count()
340 |         mp.spawn(start_train, nprocs=num_devices, args=(cfg, num_devices, train_dataset, val_dataset))
341 |     else:
342 |         model = Model(cfg, semantic_valid_class_ids_torch, semantic_id2idx, instance_id2idx, is_foreground)
343 |         train_dataloader = train_dataset.get_loader()
344 |         trainer = Trainer(model, train_dataloader, val_dataset, cfg)
345 |         trainer.train_model(10000)
346 | 
347 | 


--------------------------------------------------------------------------------