├── LICENSE ├── README.md ├── install_requirements.sh ├── openmask3d ├── __init__.py ├── class_agnostic_mask_computation │ ├── benchmark │ │ ├── __init__.py │ │ ├── evaluate_semantic_instance.py │ │ ├── util.py │ │ └── util_3d.py │ ├── conf │ │ ├── __init__.py │ │ ├── augmentation │ │ │ ├── albumentations_aug.yaml │ │ │ └── volumentations_aug.yaml │ │ ├── callbacks │ │ │ └── callbacks_instance_segmentation.yaml │ │ ├── config_base_class_agn_masks_scannet200.yaml │ │ ├── config_base_class_agn_masks_single_scene.yaml │ │ ├── config_base_instance_segmentation.yaml │ │ ├── data │ │ │ ├── collation_functions │ │ │ │ ├── voxelize_collate.yaml │ │ │ │ └── voxelize_collate_merge.yaml │ │ │ ├── data_loaders │ │ │ │ ├── simple_loader.yaml │ │ │ │ └── simple_loader_save_memory.yaml │ │ │ ├── datasets │ │ │ │ ├── scannet.yaml │ │ │ │ └── scannet200.yaml │ │ │ └── indoor.yaml │ │ ├── logging │ │ │ ├── base.yaml │ │ │ ├── full.yaml │ │ │ ├── minimal.yaml │ │ │ └── offline.yaml │ │ ├── loss │ │ │ ├── cross_entropy.yaml │ │ │ ├── set_criterion.yaml │ │ │ └── set_criterion_custom_weights_1.yaml │ │ ├── matcher │ │ │ └── hungarian_matcher.yaml │ │ ├── metrics │ │ │ └── miou.yaml │ │ ├── model │ │ │ └── mask3d.yaml │ │ ├── optimizer │ │ │ ├── adamw.yaml │ │ │ └── adamw_lower.yaml │ │ ├── scheduler │ │ │ ├── exponentiallr.yaml │ │ │ ├── lambdalr.yaml │ │ │ └── onecyclelr.yaml │ │ └── trainer │ │ │ ├── trainer.yaml │ │ │ └── trainer600.yaml │ ├── datasets │ │ ├── outdoor_semseg.py │ │ ├── preprocessing │ │ │ ├── base_preprocessing.py │ │ │ └── scannet_preprocessing.py │ │ ├── random_cuboid.py │ │ ├── scannet200 │ │ │ ├── __init__.py │ │ │ ├── scannet200_constants.py │ │ │ └── scannet200_splits.py │ │ ├── semseg.py │ │ └── utils.py │ ├── get_masks_scannet200.py │ ├── get_masks_single_scene.py │ ├── models │ │ ├── __init__.py │ │ ├── criterion.py │ │ ├── mask3d.py │ │ ├── matcher.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── confusionmatrix.py │ │ │ └── metrics.py │ │ ├── misc.py │ │ ├── model.py │ │ ├── modules │ │ │ ├── 3detr_helpers.py │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── helpers_3detr.py │ │ │ ├── resnet_block.py │ │ │ └── senet_block.py │ │ ├── position_embedding.py │ │ ├── res16unet.py │ │ ├── resnet.py │ │ ├── resunet.py │ │ └── wrapper.py │ ├── scripts │ │ ├── scannet │ │ │ ├── scannet_benchmark.sh │ │ │ └── scannet_val.sh │ │ └── scannet200 │ │ │ ├── scannet200_benchmark.sh │ │ │ └── scannet200_val.sh │ ├── third_party │ │ └── pointnet2 │ │ │ ├── _ext_src │ │ │ ├── include │ │ │ │ ├── ball_query.h │ │ │ │ ├── cuda_utils.h │ │ │ │ ├── group_points.h │ │ │ │ ├── interpolate.h │ │ │ │ ├── sampling.h │ │ │ │ └── utils.h │ │ │ └── src │ │ │ │ ├── ball_query.cpp │ │ │ │ ├── ball_query_gpu.cu │ │ │ │ ├── bindings.cpp │ │ │ │ ├── group_points.cpp │ │ │ │ ├── group_points_gpu.cu │ │ │ │ ├── interpolate.cpp │ │ │ │ ├── interpolate_gpu.cu │ │ │ │ ├── sampling.cpp │ │ │ │ └── sampling_gpu.cu │ │ │ ├── pointnet2_modules.py │ │ │ ├── pointnet2_test.py │ │ │ ├── pointnet2_utils.py │ │ │ ├── pytorch_utils.py │ │ │ └── setup.py │ ├── trainer │ │ ├── __init__.py │ │ └── trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── gradflow_check.py │ │ ├── kfold.py │ │ ├── pc_visualizations.py │ │ ├── point_cloud_utils.py │ │ ├── pointops2 │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── pointops.py │ │ │ ├── pointops2.py │ │ │ ├── pointops_ablation.py │ │ │ ├── test_attention_op_step1.py │ │ │ ├── test_attention_op_step1_v2.py │ │ │ ├── test_attention_op_step2.py │ │ │ ├── test_relative_pos_encoding_op_step1.py │ │ │ ├── test_relative_pos_encoding_op_step1_v2.py │ │ │ ├── test_relative_pos_encoding_op_step1_v3.py │ │ │ ├── test_relative_pos_encoding_op_step2.py │ │ │ └── test_relative_pos_encoding_op_step2_v2.py │ │ ├── setup.py │ │ └── src │ │ │ ├── __init__.py │ │ │ ├── aggregation │ │ │ ├── aggregation_cuda.cpp │ │ │ ├── aggregation_cuda_kernel.cu │ │ │ └── aggregation_cuda_kernel.h │ │ │ ├── attention │ │ │ ├── attention_cuda.cpp │ │ │ ├── attention_cuda_kernel.cu │ │ │ └── attention_cuda_kernel.h │ │ │ ├── attention_v2 │ │ │ ├── attention_cuda_kernel_v2.cu │ │ │ ├── attention_cuda_kernel_v2.h │ │ │ └── attention_cuda_v2.cpp │ │ │ ├── cuda_utils.h │ │ │ ├── grouping │ │ │ ├── grouping_cuda.cpp │ │ │ ├── grouping_cuda_kernel.cu │ │ │ └── grouping_cuda_kernel.h │ │ │ ├── interpolation │ │ │ ├── interpolation_cuda.cpp │ │ │ ├── interpolation_cuda_kernel.cu │ │ │ └── interpolation_cuda_kernel.h │ │ │ ├── knnquery │ │ │ ├── knnquery_cuda.cpp │ │ │ ├── knnquery_cuda_kernel.cu │ │ │ └── knnquery_cuda_kernel.h │ │ │ ├── pointops_api.cpp │ │ │ ├── rpe │ │ │ ├── relative_pos_encoding_cuda.cpp │ │ │ ├── relative_pos_encoding_cuda_kernel.cu │ │ │ └── relative_pos_encoding_cuda_kernel.h │ │ │ ├── rpe_v2 │ │ │ ├── relative_pos_encoding_cuda_kernel_v2.cu │ │ │ ├── relative_pos_encoding_cuda_kernel_v2.h │ │ │ └── relative_pos_encoding_cuda_v2.cpp │ │ │ ├── sampling │ │ │ ├── sampling_cuda.cpp │ │ │ ├── sampling_cuda_kernel.cu │ │ │ └── sampling_cuda_kernel.h │ │ │ └── subtraction │ │ │ ├── subtraction_cuda.cpp │ │ │ ├── subtraction_cuda_kernel.cu │ │ │ └── subtraction_cuda_kernel.h │ │ ├── utils.py │ │ └── votenet_utils │ │ ├── box_util.py │ │ ├── eval_det.py │ │ ├── metric_util.py │ │ ├── nms.py │ │ ├── nn_distance.py │ │ ├── pc_util.py │ │ ├── tf_logger.py │ │ └── tf_visualizer.py ├── compute_features_scannet200.py ├── compute_features_single_scene.py ├── compute_masks_full_scannet200.sh ├── compute_masks_single_scene.sh ├── configs │ ├── openmask3d_inference.yaml │ └── openmask3d_scannet200_eval.yaml ├── data │ └── load.py ├── evaluation │ ├── eval_semantic_instance.py │ ├── run_eval_close_vocab_inst_seg.py │ ├── scannet_constants.py │ ├── util.py │ ├── util_3d.py │ └── val_scenes_scannet200.txt ├── mask_features_computation │ ├── features_extractor.py │ └── utils.py ├── utils.py └── visualization │ ├── constants.py │ └── viz_sim_score_export.py ├── pyproject.toml ├── run_openmask3d_scannet200_eval.sh └── run_openmask3d_single_scene.sh /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 OpenMask3D 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /install_requirements.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | # OpenMask3D Installation 4 | # - If you encounter any problem with the Detectron2 or MinkowskiEngine installations, 5 | # it might be because you don't have properly set up gcc, g++, pybind11, openblas installations. 6 | # First, make sure you have those are installed properly. 7 | # - More details about installing on different platforms can be found in the GitHub repositories of 8 | # Detectron2(https://github.com/facebookresearch/detectron2) and MinkowskiEngine (https://github.com/NVIDIA/MinkowskiEngine). 9 | # - If you encounter any other problems, take a look at the installation guidelines in https://github.com/JonasSchult/Mask3D, which might be helpful as our mask module relies on Mask3D. 10 | 11 | # Note: The following commands were tested on Ubuntu 18.04 and 20.04, with CUDA 11.1 and 11.4. 12 | 13 | pip install torch==1.12.1 torchvision==0.13.1 -f https://download.pytorch.org/whl/cu113/torch_stable.html 14 | pip install ninja==1.10.2.3 15 | pip install pytorch-lightning==1.7.2 fire==0.5.0 imageio==2.23.0 tqdm==4.64.1 wandb==0.13.2 16 | pip install python-dotenv==0.21.0 pyviz3d==0.2.32 scipy==1.9.3 plyfile==0.7.4 scikit-learn==1.2.0 trimesh==3.17.1 loguru==0.6.0 albumentations==1.3.0 volumentations==0.1.8 17 | pip install antlr4-python3-runtime==4.8 black==21.4b2 omegaconf==2.0.6 hydra-core==1.0.5 --no-deps 18 | 19 | pip install 'git+https://github.com/facebookresearch/detectron2.git@710e7795d0eeadf9def0e7ef957eea13532e34cf' --no-deps 20 | 21 | conda install -y openblas-devel -c anaconda 22 | pip install -U git+https://github.com/NVIDIA/MinkowskiEngine -v --no-deps --config-settings="--blas_include_dirs=${CONDA_PREFIX}/include" --config-settings="--blas=openblas" 23 | 24 | pip install pynvml==11.4.1 gpustat==1.0.0 tabulate==0.9.0 pytest==7.2.0 tensorboardx==2.5.1 yapf==0.32.0 termcolor==2.1.1 addict==2.4.0 blessed==1.19.1 25 | pip install gorilla-core==0.2.7.8 26 | pip install matplotlib==3.7.2 27 | pip install cython 28 | 29 | pip install pycocotools==2.0.6 30 | pip install h5py==3.7.0 31 | pip install transforms3d==0.4.1 32 | pip install open3d==0.16.0 33 | pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.1+cu113.html 34 | pip install torchmetrics==0.11.0 35 | pip install setuptools==68.0.0 36 | 37 | pip install fvcore==0.1.5.post20221221 38 | pip install cloudpickle==2.1.0 39 | pip install Pillow==9.3.0 40 | 41 | cd openmask3d/class_agnostic_mask_computation/third_party/pointnet2 && pip install . 42 | 43 | pip install git+https://github.com/openai/CLIP.git@a9b1bf5920416aaeaec965c25dd9e8f98c864f16 --no-deps 44 | pip install git+https://github.com/facebookresearch/segment-anything.git@6fdee8f2727f4506cfbbe553e23b895e27956588 --no-deps 45 | pip install ftfy==6.1.1 46 | pip install regex==2023.10.3 -------------------------------------------------------------------------------- /openmask3d/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/benchmark/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/benchmark/util.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import csv 3 | try: 4 | import numpy as np 5 | except: 6 | print("Failed to import numpy package.") 7 | sys.exit(-1) 8 | try: 9 | import imageio 10 | except: 11 | print("Please install the module 'imageio' for image processing, e.g.") 12 | print("pip install imageio") 13 | sys.exit(-1) 14 | 15 | # print an error message and quit 16 | def print_error(message, user_fault=False): 17 | sys.stderr.write('ERROR: ' + str(message) + '\n') 18 | if user_fault: 19 | sys.exit(2) 20 | sys.exit(-1) 21 | 22 | 23 | # if string s represents an int 24 | def represents_int(s): 25 | try: 26 | int(s) 27 | return True 28 | except ValueError: 29 | return False 30 | 31 | 32 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 33 | assert os.path.isfile(filename) 34 | mapping = dict() 35 | with open(filename) as csvfile: 36 | reader = csv.DictReader(csvfile, delimiter='\t') 37 | for row in reader: 38 | mapping[row[label_from]] = int(row[label_to]) 39 | # if ints convert 40 | if represents_int(list(mapping.keys())[0]): 41 | mapping = {int(k):v for k,v in mapping.items()} 42 | return mapping 43 | 44 | 45 | # input: scene_types.txt or scene_types_all.txt 46 | def read_scene_types_mapping(filename, remove_spaces=True): 47 | assert os.path.isfile(filename) 48 | mapping = dict() 49 | lines = open(filename).read().splitlines() 50 | lines = [line.split('\t') for line in lines] 51 | if remove_spaces: 52 | mapping = { x[1].strip():int(x[0]) for x in lines } 53 | else: 54 | mapping = { x[1]:int(x[0]) for x in lines } 55 | return mapping 56 | 57 | 58 | # color by label 59 | def visualize_label_image(filename, image): 60 | height = image.shape[0] 61 | width = image.shape[1] 62 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 63 | color_palette = create_color_palette() 64 | for idx, color in enumerate(color_palette): 65 | vis_image[image==idx] = color 66 | imageio.imwrite(filename, vis_image) 67 | 68 | 69 | # color by different instances (mod length of color palette) 70 | def visualize_instance_image(filename, image): 71 | height = image.shape[0] 72 | width = image.shape[1] 73 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 74 | color_palette = create_color_palette() 75 | instances = np.unique(image) 76 | for idx, inst in enumerate(instances): 77 | vis_image[image==inst] = color_palette[inst%len(color_palette)] 78 | imageio.imwrite(filename, vis_image) 79 | 80 | 81 | # color palette for nyu40 labels 82 | def create_color_palette(): 83 | return [ 84 | (0, 0, 0), 85 | (174, 199, 232), # wall 86 | (152, 223, 138), # floor 87 | (31, 119, 180), # cabinet 88 | (255, 187, 120), # bed 89 | (188, 189, 34), # chair 90 | (140, 86, 75), # sofa 91 | (255, 152, 150), # table 92 | (214, 39, 40), # door 93 | (197, 176, 213), # window 94 | (148, 103, 189), # bookshelf 95 | (196, 156, 148), # picture 96 | (23, 190, 207), # counter 97 | (178, 76, 76), 98 | (247, 182, 210), # desk 99 | (66, 188, 102), 100 | (219, 219, 141), # curtain 101 | (140, 57, 197), 102 | (202, 185, 52), 103 | (51, 176, 203), 104 | (200, 54, 131), 105 | (92, 193, 61), 106 | (78, 71, 183), 107 | (172, 114, 82), 108 | (255, 127, 14), # refrigerator 109 | (91, 163, 138), 110 | (153, 98, 156), 111 | (140, 153, 101), 112 | (158, 218, 229), # shower curtain 113 | (100, 125, 154), 114 | (178, 127, 135), 115 | (120, 185, 128), 116 | (146, 111, 194), 117 | (44, 160, 44), # toilet 118 | (112, 128, 144), # sink 119 | (96, 207, 209), 120 | (227, 119, 194), # bathtub 121 | (213, 92, 176), 122 | (94, 106, 211), 123 | (82, 84, 163), # otherfurn 124 | (100, 85, 144) 125 | ] 126 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/conf/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/augmentation/albumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | __version__: 0.4.5 2 | transform: 3 | __class_fullname__: albumentations.core.composition.Compose 4 | additional_targets: {} 5 | bbox_params: null 6 | keypoint_params: null 7 | p: 1.0 8 | transforms: 9 | - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast 10 | always_apply: true 11 | brightness_by_max: true 12 | brightness_limit: 13 | - -0.2 14 | - 0.2 15 | contrast_limit: 16 | - -0.2 17 | - 0.2 18 | p: 0.5 19 | - __class_fullname__: albumentations.augmentations.transforms.RGBShift 20 | always_apply: true 21 | b_shift_limit: 22 | - -20 23 | - 20 24 | g_shift_limit: 25 | - -20 26 | - 20 27 | p: 0.5 28 | r_shift_limit: 29 | - -20 30 | - 20 31 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/augmentation/volumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | # pi = 3.14159265358979 2 | # pi/2 = 1.57079632679489 3 | # pi/3 = 1.04719755119659 4 | # pi/6 = 0.52359877559829 5 | # pi/12 = 0.26179938779914 6 | # pi/24 = 0.13089969389957 7 | # 8 | __version__: 0.1.6 9 | transform: 10 | __class_fullname__: volumentations.core.composition.Compose 11 | additional_targets: {} 12 | p: 1.0 13 | transforms: 14 | - __class_fullname__: volumentations.augmentations.transforms.Scale3d 15 | always_apply: true 16 | p: 0.5 17 | scale_limit: 18 | - - -0.1 19 | - 0.1 20 | - - -0.1 21 | - 0.1 22 | - - -0.1 23 | - 0.1 24 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 25 | always_apply: true 26 | axis: 27 | - 0 28 | - 0 29 | - 1 30 | p: 0.5 31 | rotation_limit: 32 | - -3.141592653589793 33 | - 3.141592653589793 34 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 35 | always_apply: true 36 | axis: 37 | - 0 38 | - 1 39 | - 0 40 | p: 0.5 41 | rotation_limit: 42 | - -0.13089969389957 43 | - 0.13089969389957 44 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 45 | always_apply: true 46 | axis: 47 | - 1 48 | - 0 49 | - 0 50 | p: 0.5 51 | rotation_limit: 52 | - -0.13089969389957 53 | - 0.13089969389957 54 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/callbacks/callbacks_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint 3 | monitor: val_mean_ap_50 4 | save_last: true 5 | save_top_k: 1 6 | mode: max 7 | dirpath: ${general.save_dir} 8 | filename: "{epoch}-{val_mean_ap_50:.3f}" 9 | every_n_epochs: 1 10 | 11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor 12 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/config_base_class_agn_masks_scannet200.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: false 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: true 10 | eval_on_segments: true 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: true 17 | ignore_class_threshold: 100 18 | project_name: scannet200 19 | workspace: username 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 201 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | input_mode: 'full_dataset' 26 | mask_save_dir: null 27 | 28 | 29 | export_threshold: 0.0001 30 | 31 | reps_per_epoch: 1 32 | 33 | on_crops: false 34 | 35 | scores_threshold: 0.0 36 | iou_threshold: 1.0 37 | 38 | area: 5 39 | 40 | eval_inner_core: -1 # disabled 41 | 42 | topk_per_image: 750 43 | 44 | ignore_mask_idx: [] 45 | 46 | max_batch_size: 99999999 47 | 48 | save_dir: saved/${general.experiment_name} 49 | # time/commit/md5(config)_uuid 50 | # time/experiment_id/version_uuid 51 | # experiment_id: 1 # commit[:8], or unique from logger 52 | # version: 1 # md5[:8] of config 53 | 54 | gpus: 1 55 | 56 | defaults: 57 | - data: indoor 58 | - data/data_loaders: simple_loader 59 | - data/datasets: scannet200 60 | - data/collation_functions: voxelize_collate 61 | - logging: full 62 | - model: mask3d 63 | - metrics: miou 64 | - optimizer: adamw 65 | - scheduler: onecyclelr 66 | - trainer: trainer600 67 | - callbacks: callbacks_instance_segmentation 68 | - matcher: hungarian_matcher 69 | - loss: set_criterion 70 | 71 | hydra: 72 | run: 73 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 74 | sweep: 75 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 76 | # dir: ${general.save_dir} 77 | subdir: ${hydra.job.num}_${hydra.job.id} 78 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/config_base_class_agn_masks_single_scene.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: false 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: true 17 | ignore_class_threshold: 100 18 | project_name: scannet200 19 | workspace: username 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 201 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | input_mode: 'single_scene' 26 | scene_path: null 27 | mask_save_dir: null 28 | 29 | 30 | export_threshold: 0.0001 31 | 32 | reps_per_epoch: 1 33 | 34 | on_crops: false 35 | 36 | scores_threshold: 0.0 37 | iou_threshold: 1.0 38 | 39 | area: 5 40 | 41 | eval_inner_core: -1 # disabled 42 | 43 | topk_per_image: 750 44 | 45 | ignore_mask_idx: [] 46 | 47 | max_batch_size: 99999999 48 | 49 | save_dir: saved/${general.experiment_name} 50 | # time/commit/md5(config)_uuid 51 | # time/experiment_id/version_uuid 52 | # experiment_id: 1 # commit[:8], or unique from logger 53 | # version: 1 # md5[:8] of config 54 | 55 | gpus: 1 56 | 57 | defaults: 58 | - data: indoor 59 | - data/data_loaders: simple_loader 60 | - data/datasets: scannet200 61 | - data/collation_functions: voxelize_collate 62 | - logging: full 63 | - model: mask3d 64 | - metrics: miou 65 | - optimizer: adamw 66 | - scheduler: onecyclelr 67 | - trainer: trainer600 68 | - callbacks: callbacks_instance_segmentation 69 | - matcher: hungarian_matcher 70 | - loss: set_criterion 71 | 72 | hydra: 73 | run: 74 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 75 | sweep: 76 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 77 | # dir: ${general.save_dir} 78 | subdir: ${hydra.job.num}_${hydra.job.id} 79 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/config_base_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: false 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: username 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | # time/commit/md5(config)_uuid 48 | # time/experiment_id/version_uuid 49 | # experiment_id: 1 # commit[:8], or unique from logger 50 | # version: 1 # md5[:8] of config 51 | 52 | gpus: 1 53 | 54 | defaults: 55 | - data: indoor 56 | - data/data_loaders: simple_loader 57 | - data/datasets: scannet 58 | - data/collation_functions: voxelize_collate 59 | - logging: full 60 | - model: mask3d 61 | - metrics: miou 62 | - optimizer: adamw 63 | - scheduler: onecyclelr 64 | - trainer: trainer600 65 | - callbacks: callbacks_instance_segmentation 66 | - matcher: hungarian_matcher 67 | - loss: set_criterion 68 | 69 | hydra: 70 | run: 71 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 72 | sweep: 73 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 74 | # dir: ${general.save_dir} 75 | subdir: ${hydra.job.num}_${hydra.job.id} 76 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/collation_functions/voxelize_collate.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils.VoxelizeCollate 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | batch_instance: false 11 | probing: ${general.linear_probing_backbone} 12 | task: ${general.task} 13 | ignore_class_threshold: ${general.ignore_class_threshold} 14 | filter_out_classes: ${data.train_dataset.filter_out_classes} 15 | label_offset: ${data.train_dataset.label_offset} 16 | num_queries: ${model.num_queries} 17 | 18 | validation_collation: 19 | _target_: datasets.utils.VoxelizeCollate 20 | ignore_label: ${data.ignore_label} 21 | voxel_size: ${data.voxel_size} 22 | mode: ${data.validation_mode} 23 | batch_instance: false 24 | probing: ${general.linear_probing_backbone} 25 | task: ${general.task} 26 | ignore_class_threshold: ${general.ignore_class_threshold} 27 | filter_out_classes: ${data.validation_dataset.filter_out_classes} 28 | label_offset: ${data.validation_dataset.label_offset} 29 | num_queries: ${model.num_queries} 30 | 31 | test_collation: 32 | _target_: datasets.utils.VoxelizeCollate 33 | ignore_label: ${data.ignore_label} 34 | voxel_size: ${data.voxel_size} 35 | mode: ${data.test_mode} 36 | batch_instance: false 37 | probing: ${general.linear_probing_backbone} 38 | task: ${general.task} 39 | ignore_class_threshold: ${general.ignore_class_threshold} 40 | filter_out_classes: ${data.test_dataset.filter_out_classes} 41 | label_offset: ${data.test_dataset.label_offset} 42 | num_queries: ${model.num_queries} 43 | input_mode: ${data.test_dataset.input_mode} -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/collation_functions/voxelize_collate_merge.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils.VoxelizeCollateMerge 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | scenes: 2 11 | batch_instance: false 12 | make_one_pc_noise: false 13 | place_nearby: false 14 | place_far: false 15 | proba: 1 16 | probing: ${general.linear_probing_backbone} 17 | include_ignore: ${general.include_ignore} 18 | task: ${general.task} 19 | 20 | validation_collation: 21 | _target_: datasets.utils.VoxelizeCollate 22 | ignore_label: ${data.ignore_label} 23 | voxel_size: ${data.voxel_size} 24 | mode: ${data.validation_mode} 25 | probing: ${general.linear_probing_backbone} 26 | include_ignore: ${general.include_ignore} 27 | task: ${general.task} 28 | 29 | test_collation: 30 | _target_: datasets.utils.VoxelizeCollate 31 | ignore_label: ${data.ignore_label} 32 | voxel_size: ${data.voxel_size} 33 | mode: ${data.test_mode} 34 | probing: ${general.linear_probing_backbone} 35 | include_ignore: ${general.include_ignore} 36 | task: ${general.task} 37 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/data_loaders/simple_loader.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_dataloader: 4 | _target_: torch.utils.data.DataLoader 5 | shuffle: true 6 | pin_memory: ${data.pin_memory} 7 | num_workers: ${data.num_workers} 8 | batch_size: ${data.batch_size} 9 | 10 | validation_dataloader: 11 | _target_: torch.utils.data.DataLoader 12 | shuffle: false 13 | pin_memory: ${data.pin_memory} 14 | num_workers: ${data.num_workers} 15 | batch_size: ${data.test_batch_size} 16 | 17 | test_dataloader: 18 | _target_: torch.utils.data.DataLoader 19 | shuffle: false 20 | pin_memory: ${data.pin_memory} 21 | num_workers: ${data.num_workers} 22 | batch_size: ${data.test_batch_size} 23 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/data_loaders/simple_loader_save_memory.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_dataloader: 4 | _target_: torch.utils.data.DataLoader 5 | shuffle: true 6 | pin_memory: ${data.pin_memory} 7 | num_workers: ${data.num_workers} 8 | batch_size: ${data.batch_size} 9 | 10 | validation_dataloader: 11 | _target_: torch.utils.data.DataLoader 12 | shuffle: false 13 | pin_memory: ${data.pin_memory} 14 | num_workers: 1 15 | batch_size: ${data.test_batch_size} 16 | 17 | test_dataloader: 18 | _target_: torch.utils.data.DataLoader 19 | shuffle: false 20 | pin_memory: ${data.pin_memory} 21 | num_workers: 1 22 | batch_size: ${data.test_batch_size} 23 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/datasets/scannet.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.semseg.SemanticSegmentationDataset 4 | dataset_name: "scannet" 5 | data_dir: data/processed/scannet 6 | image_augmentations_path: class_agnostic_mask_computation/conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: class_agnostic_mask_computation/conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet/label_database.yaml 9 | color_mean_std: data/processed/scannet/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [0, 1] 33 | label_offset: 2 34 | 35 | validation_dataset: 36 | _target_: datasets.semseg.SemanticSegmentationDataset 37 | dataset_name: "scannet" 38 | data_dir: data/processed/scannet 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet/label_database.yaml 42 | color_mean_std: data/processed/scannet/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [0, 1] 56 | label_offset: 2 57 | 58 | test_dataset: 59 | _target_: datasets.semseg.SemanticSegmentationDataset 60 | dataset_name: "scannet" 61 | data_dir: data_short/processed/scannet 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data_short/processed/scannet/label_database.yaml 65 | color_mean_std: data_short/processed/scannet/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [0, 1] 79 | label_offset: 2 80 | input_mode: ${data.input_mode} -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/datasets/scannet200.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.semseg.SemanticSegmentationDataset 4 | dataset_name: "scannet200" 5 | data_dir: data/processed/scannet200 6 | image_augmentations_path: class_agnostic_mask_computation/conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: class_agnostic_mask_computation/conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet200/label_database.yaml 9 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [0, 2] 33 | label_offset: 2 34 | 35 | validation_dataset: 36 | _target_: datasets.semseg.SemanticSegmentationDataset 37 | dataset_name: "scannet200" 38 | data_dir: data/processed/scannet200 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet200/label_database.yaml 42 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [0, 2] 56 | label_offset: 2 57 | 58 | test_dataset: 59 | _target_: datasets.semseg.SemanticSegmentationDataset 60 | dataset_name: "scannet200" 61 | data_dir: data/processed/scannet200 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannet200/label_database.yaml 65 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [0, 2] 79 | label_offset: 2 80 | input_mode: ${data.input_mode} 81 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/data/indoor.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | # these parameters are inherited by datasets, data_loaders and collators 4 | # but they might be overwritten 5 | 6 | # splits 7 | train_mode: train 8 | validation_mode: validation 9 | test_mode: validation # test # validation 10 | 11 | # dataset 12 | ignore_label: 255 13 | add_raw_coordinates: true # 3dim 14 | add_colors: true # 3dim 15 | add_normals: false # 3dim 16 | in_channels: 3 # in_channels = 3 * (add_normals + add_colors + add_raw_coordinates) 17 | num_labels: 200 18 | input_mode: ${general.input_mode} 19 | add_instance: ${general.add_instance} 20 | task: ${general.task} 21 | 22 | # data loader 23 | pin_memory: false 24 | num_workers: 4 25 | batch_size: 5 26 | test_batch_size: 1 27 | cache_data: false 28 | 29 | # collation 30 | voxel_size: 0.02 31 | 32 | reps_per_epoch: ${general.reps_per_epoch} 33 | 34 | cropping: false 35 | cropping_args: 36 | min_points: 30000 37 | aspect: 0.8 38 | min_crop: 0.5 39 | max_crop: 1.0 40 | 41 | crop_min_size: 20000 42 | crop_length: 6.0 43 | cropping_v1: true -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/logging/base.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.NeptuneLogger 3 | project_name: ${general.workspace}/${general.project_name} 4 | experiment_name: ${general.experiment_name} 5 | offline_mode: false 6 | 7 | - _target_: pytorch_lightning.loggers.CSVLogger 8 | save_dir: ${general.save_dir} 9 | name: ${general.experiment_id} 10 | version: ${general.version} 11 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/logging/full.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.WandbLogger 3 | project: ${general.project_name} 4 | name: ${general.experiment_name} 5 | save_dir: ${general.save_dir} 6 | entity: "wandb_username" 7 | resume: "allow" 8 | id: ${general.experiment_name} 9 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/logging/minimal.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.CSVLogger 3 | save_dir: ${general.save_dir} 4 | name: ${general.experiment_id} 5 | version: ${general.version} 6 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/logging/offline.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.TensorBoardLogger 3 | name: ${general.experiment_id} 4 | version: ${general.version} 5 | save_dir: ${general.save_dir} 6 | 7 | - _target_: pytorch_lightning.loggers.CSVLogger 8 | name: ${general.experiment_id} 9 | version: ${general.version} 10 | save_dir: ${general.save_dir} -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/loss/cross_entropy.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.nn.CrossEntropyLoss 3 | ignore_index: ${data.ignore_label} 4 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/loss/set_criterion.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.criterion.SetCriterion 3 | num_classes: ${general.num_targets} 4 | eos_coef: 0.1 5 | losses: 6 | - "labels" 7 | - "masks" 8 | num_points: ${matcher.num_points} 9 | oversample_ratio: 3.0 10 | importance_sample_ratio: 0.75 11 | class_weights: -1 12 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/loss/set_criterion_custom_weights_1.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.criterion.SetCriterion 3 | num_classes: ${general.num_targets} 4 | eos_coef: 0.1 5 | losses: 6 | - "labels" 7 | - "masks" 8 | num_points: ${matcher.num_points} 9 | oversample_ratio: 3.0 10 | importance_sample_ratio: 0.75 11 | class_weights: [1.0,1.5,10.0,1.0,1.0,1.0,1.0,1.0,10.0,10.0,1.0,10.0,1.0,1.0] 12 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/matcher/hungarian_matcher.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.matcher.HungarianMatcher 3 | cost_class: 2. 4 | cost_mask: 5. 5 | cost_dice: 2. 6 | num_points: -1 7 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/metrics/miou.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.metrics.ConfusionMatrix 3 | num_classes: ${data.num_labels} 4 | ignore_label: ${data.ignore_label} 5 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/model/mask3d.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.Mask3D 3 | 4 | # transformer parameters 5 | hidden_dim: 128 6 | dim_feedforward: 1024 7 | num_queries: 100 8 | num_heads: 8 9 | num_decoders: 3 10 | dropout: 0.0 11 | pre_norm: false 12 | use_level_embed: false 13 | normalize_pos_enc: true 14 | positional_encoding_type: "fourier" 15 | gauss_scale: 1.0 16 | hlevels: [0,1,2,3] 17 | 18 | # queries 19 | non_parametric_queries: true 20 | random_query_both: false 21 | random_normal: false 22 | random_queries: false 23 | use_np_features: false 24 | 25 | # sampling 26 | sample_sizes: [200, 800, 3200, 12800, 51200] 27 | max_sample_size: false # change false means sampling activated 28 | 29 | shared_decoder: true 30 | num_classes: ${general.num_targets} 31 | train_on_segments: ${general.train_on_segments} 32 | scatter_type: "mean" 33 | 34 | voxel_size: ${data.voxel_size} 35 | 36 | config: 37 | backbone: 38 | _target_: models.Res16UNet34C 39 | config: 40 | dialations: [ 1, 1, 1, 1 ] 41 | conv1_kernel_size: 5 42 | bn_momentum: 0.02 43 | # depends on normals, color, raw_coordinates 44 | # varies from 3 to 9 45 | in_channels: ${data.in_channels} 46 | out_channels: ${data.num_labels} 47 | out_fpn: true 48 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/optimizer/adamw.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.optim.AdamW 3 | lr: 0.0001 -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/optimizer/adamw_lower.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.optim.AdamW 3 | lr: 0.005 4 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/scheduler/exponentiallr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.ExponentialLR 5 | gamma: 0.99999 6 | last_epoch: -1 # ${trainer.max_epochs} 7 | # need to set to number because of tensorboard logger 8 | # steps_per_epoch: -1 9 | 10 | pytorch_lightning_params: 11 | interval: step 12 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/scheduler/lambdalr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.StepLR 5 | step_size: 99999 6 | 7 | pytorch_lightning_params: 8 | interval: epoch 9 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/scheduler/onecyclelr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.OneCycleLR 5 | max_lr: ${optimizer.lr} 6 | epochs: ${trainer.max_epochs} 7 | # need to set to number because of tensorboard logger 8 | steps_per_epoch: -1 9 | 10 | pytorch_lightning_params: 11 | interval: step 12 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/trainer/trainer.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | deterministic: false 3 | max_epochs: 1000 4 | min_epochs: 1 5 | resume_from_checkpoint: null 6 | check_val_every_n_epoch: 50 7 | num_sanity_val_steps: -1 8 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/conf/trainer/trainer600.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | deterministic: false 3 | max_epochs: 601 4 | min_epochs: 1 5 | resume_from_checkpoint: null 6 | check_val_every_n_epoch: 50 7 | num_sanity_val_steps: 2 8 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/datasets/random_cuboid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def check_aspect(crop_range, aspect_min): 7 | xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2]) 8 | xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]]) 9 | yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:]) 10 | return ( 11 | (xy_aspect >= aspect_min) 12 | or (xz_aspect >= aspect_min) 13 | or (yz_aspect >= aspect_min) 14 | ) 15 | 16 | 17 | class RandomCuboid(object): 18 | """ 19 | RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691] 20 | We slightly modify this operation to account for object detection. 21 | This augmentation randomly crops a cuboid from the input and 22 | ensures that the cropped cuboid contains at least one bounding box 23 | """ 24 | 25 | def __init__( 26 | self, 27 | min_points, 28 | #aspect=0.8, 29 | crop_length=6.0, 30 | version1=True 31 | ): 32 | #self.aspect = aspect 33 | self.crop_length = crop_length 34 | self.min_points = min_points 35 | self.version1 = version1 36 | 37 | def __call__(self, point_cloud): 38 | if point_cloud.shape[0] < self.min_points: 39 | print("too small pcd") 40 | return np.ones(point_cloud.shape[0], dtype=np.bool) 41 | 42 | range_xyz = np.max(point_cloud[:, :2], axis=0) - np.min( 43 | point_cloud[:, :2], axis=0 44 | ) 45 | 46 | for _ in range(100): 47 | #crop_range = self.min_crop + np.random.rand(3) * ( 48 | # self.max_crop - self.min_crop 49 | #) 50 | #crop_range[-1] = 999. 51 | # if not check_aspect(crop_range, self.aspect): 52 | # continue 53 | 54 | sample_center = point_cloud[:, :2].min(axis=0) + range_xyz/2 55 | 56 | if self.version1: 57 | offset_x = np.random.uniform(-range_xyz[0]/4,range_xyz[0]/4) 58 | offset_y = np.random.uniform(-range_xyz[1]/4,range_xyz[1]/4) 59 | else: 60 | offset_x = np.random.uniform(-(range_xyz[0]/2) + self.crop_length / 4, 61 | +(range_xyz[0]/2) - self.crop_length / 4) 62 | offset_y = np.random.uniform(-(range_xyz[1]/2) + self.crop_length / 4, 63 | +(range_xyz[1]/2) - self.crop_length / 4) 64 | 65 | sample_center[0] = sample_center[0] + offset_x 66 | sample_center[1] = sample_center[1] + offset_y 67 | 68 | min_xy = sample_center - self.crop_length / 2 69 | max_xy = sample_center + self.crop_length / 2 70 | 71 | upper_idx = ( 72 | np.sum((point_cloud[:, :2] <= max_xy).astype(np.int32), 1) == 2 73 | ) 74 | lower_idx = ( 75 | np.sum((point_cloud[:, :2] >= min_xy).astype(np.int32), 1) == 2 76 | ) 77 | 78 | new_pointidx = (upper_idx) & (lower_idx) 79 | 80 | if np.sum(new_pointidx) < self.min_points: 81 | print("TOO SMALL") 82 | continue 83 | 84 | return new_pointidx 85 | 86 | # fallback 87 | print("FALLBACK") 88 | return np.ones(point_cloud.shape[0], dtype=np.bool) 89 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/datasets/scannet200/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/datasets/scannet200/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/get_masks_scannet200.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import hydra 4 | from dotenv import load_dotenv 5 | from omegaconf import DictConfig 6 | from trainer.trainer import InstanceSegmentation, RegularCheckpointing 7 | from utils.utils import ( 8 | load_checkpoint_with_missing_or_exsessive_keys, 9 | load_backbone_checkpoint_with_missing_or_exsessive_keys 10 | ) 11 | from pytorch_lightning import Trainer 12 | import open3d as o3d 13 | import numpy as np 14 | import torch 15 | import time 16 | import pdb 17 | 18 | def get_parameters(cfg: DictConfig): 19 | #logger = logging.getLogger(__name__) 20 | load_dotenv(".env") 21 | 22 | # getting basic configuration 23 | if cfg.general.get("gpus", None) is None: 24 | cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None) 25 | #loggers = [] 26 | 27 | model = InstanceSegmentation(cfg) 28 | if cfg.general.backbone_checkpoint is not None: 29 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys(cfg, model) 30 | if cfg.general.checkpoint is not None: 31 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 32 | 33 | #logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True))) 34 | return cfg, model, None #loggers 35 | 36 | 37 | @hydra.main(config_path="conf", config_name="config_base_class_agn_masks_scannet200.yaml") 38 | def get_class_agnostic_masks_scannet200(cfg: DictConfig): 39 | 40 | os.chdir(hydra.utils.get_original_cwd()) 41 | cfg, model, _ = get_parameters(cfg) 42 | test_dataset = hydra.utils.instantiate(cfg.data.test_dataset) 43 | c_fn = hydra.utils.instantiate(cfg.data.test_collation) 44 | 45 | test_dataloader = hydra.utils.instantiate( 46 | cfg.data.test_dataloader, 47 | test_dataset, 48 | collate_fn=c_fn, 49 | ) 50 | model.freeze() 51 | #print(list(test_dataloader)) 52 | runner = Trainer( 53 | gpus=cfg.general.gpus, 54 | logger=None, 55 | **cfg.trainer 56 | ) 57 | runner.test(model) 58 | 59 | 60 | @hydra.main(config_path="conf", config_name="config_base_class_agn_masks_scannet200.yaml") 61 | def main(cfg: DictConfig): 62 | get_class_agnostic_masks_scannet200(cfg) 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/get_masks_single_scene.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import hydra 4 | from dotenv import load_dotenv 5 | from omegaconf import DictConfig 6 | from trainer.trainer import InstanceSegmentation, RegularCheckpointing 7 | from utils.utils import ( 8 | load_checkpoint_with_missing_or_exsessive_keys, 9 | load_backbone_checkpoint_with_missing_or_exsessive_keys 10 | ) 11 | from pytorch_lightning import Trainer 12 | import open3d as o3d 13 | import numpy as np 14 | import torch 15 | import time 16 | import pdb 17 | 18 | def get_parameters(cfg: DictConfig): 19 | #logger = logging.getLogger(__name__) 20 | load_dotenv(".env") 21 | 22 | # getting basic configuration 23 | if cfg.general.get("gpus", None) is None: 24 | cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None) 25 | #loggers = [] 26 | 27 | model = InstanceSegmentation(cfg) 28 | if cfg.general.backbone_checkpoint is not None: 29 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys(cfg, model) 30 | if cfg.general.checkpoint is not None: 31 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 32 | 33 | #logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True))) 34 | return cfg, model, None #loggers 35 | 36 | 37 | def load_ply(filepath): 38 | pcd = o3d.io.read_point_cloud(filepath) 39 | pcd.estimate_normals() 40 | coords = np.asarray(pcd.points) 41 | colors = np.asarray(pcd.colors) 42 | normals = np.asarray(pcd.normals) 43 | return coords, colors, normals 44 | 45 | def process_file(filepath): 46 | coords, colors, normals = load_ply(filepath) 47 | raw_coordinates = coords.copy() 48 | raw_colors = (colors*255).astype(np.uint8) 49 | raw_normals = normals 50 | 51 | features = colors 52 | if len(features.shape) == 1: 53 | features = np.hstack((features[None, ...], coords)) 54 | else: 55 | features = np.hstack((features, coords)) 56 | 57 | filename = filepath.split("/")[-1][:-4] 58 | return [[coords, features, [], filename, raw_colors, raw_normals, raw_coordinates, 0]] # 2: original_labels, 3: none 59 | # coordinates, features, labels, self.data[idx]['raw_filepath'].split("/")[-2], raw_color, raw_normals, raw_coordinates, idx 60 | 61 | @hydra.main(config_path="conf", config_name="config_base_class_agn_masks_single_scene.yaml") 62 | def get_class_agnostic_masks(cfg: DictConfig): 63 | 64 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 65 | os.chdir(hydra.utils.get_original_cwd()) 66 | cfg, model, loggers = get_parameters(cfg) 67 | 68 | c_fn = hydra.utils.instantiate(cfg.data.test_collation) #(model.config.data.test_collation) 69 | 70 | input_batch = process_file(cfg.general.scene_path) 71 | batch = c_fn(input_batch) 72 | 73 | model.to(device) 74 | model.eval() 75 | 76 | start = time.time() 77 | with torch.no_grad(): 78 | res_dict = model.get_masks_single_scene(batch) 79 | end = time.time() 80 | print("Time elapsed: ", end - start) 81 | 82 | @hydra.main(config_path="conf", config_name="config_base_class_agn_masks_single_scene.yaml") 83 | def main(cfg: DictConfig): 84 | get_class_agnostic_masks(cfg) 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/__init__.py: -------------------------------------------------------------------------------- 1 | import models.resunet as resunet 2 | import models.res16unet as res16unet 3 | from models.res16unet import Res16UNet34C, Res16UNet34A, Res16UNet14A, Res16UNet34D, Res16UNet18D, Res16UNet18B, Custom30M 4 | from models.mask3d import Mask3D 5 | 6 | MODELS = [] 7 | 8 | 9 | def add_models(module): 10 | MODELS.extend([getattr(module, a) for a in dir(module) if "Net" in a]) 11 | 12 | 13 | add_models(resunet) 14 | add_models(res16unet) 15 | add_models(mask3d) 16 | 17 | 18 | def get_models(): 19 | """Returns a tuple of sample models.""" 20 | return MODELS 21 | 22 | 23 | def load_model(name): 24 | """Creates and returns an instance of the model given its class name.""" 25 | # Find the model class from its name 26 | all_models = get_models() 27 | mdict = {model.__name__: model for model in all_models} 28 | if name not in mdict: 29 | print("Invalid model index. Options are:") 30 | # Display a list of valid model names 31 | for model in all_models: 32 | print(f"\t* {model.__name__}") 33 | return None 34 | NetClass = mdict[name] 35 | 36 | return NetClass 37 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .confusionmatrix import ConfusionMatrix 2 | from .metrics import IoU 3 | 4 | __all__ = ["ConfusionMatrix", "IoU"] 5 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/metrics/confusionmatrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class ConfusionMatrix: 6 | """Constructs a confusion matrix for a multi-class classification problems. 7 | 8 | Does not support multi-label, multi-class problems. 9 | 10 | Keyword arguments: 11 | - num_classes (int): number of classes in the classification problem. 12 | - normalized (boolean, optional): Determines whether or not the confusion 13 | matrix is normalized or not. Default: False. 14 | 15 | Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter/confusionmeter.py 16 | """ 17 | 18 | def __init__(self, num_classes, ignore_label): 19 | super().__init__() 20 | 21 | self.conf = np.ndarray((num_classes, num_classes), dtype=np.int32) 22 | self.ignore_label = ignore_label 23 | self.num_classes = num_classes 24 | self.reset() 25 | 26 | def reset(self): 27 | self.conf.fill(0) 28 | 29 | def add(self, predicted, target): 30 | """Computes the confusion matrix 31 | 32 | The shape of the confusion matrix is K x K, where K is the number 33 | of classes. 34 | 35 | Keyword arguments: 36 | - predicted (Tensor or numpy.ndarray): Can be an N x K tensor/array of 37 | predicted scores obtained from the model for N examples and K classes, 38 | or an N-tensor/array of integer values between 0 and K-1. 39 | - target (Tensor or numpy.ndarray): Can be an N x K tensor/array of 40 | ground-truth classes for N examples and K classes, or an N-tensor/array 41 | of integer values between 0 and K-1. 42 | 43 | """ 44 | # _, predicted = predicted.max(1) 45 | 46 | # predicted = predicted.view(-1) 47 | # target = target.view(-1) 48 | 49 | # If target and/or predicted are tensors, convert them to numpy arrays 50 | if torch.is_tensor(predicted): 51 | predicted = predicted.cpu().numpy() 52 | if torch.is_tensor(target): 53 | target = target.cpu().numpy() 54 | ind = ~np.isin(target, self.ignore_label) 55 | predicted, target = predicted[ind], target[ind] 56 | 57 | assert ( 58 | predicted.shape[0] == target.shape[0] 59 | ), "number of targets and predicted outputs do not match" 60 | 61 | if np.ndim(predicted) != 1: 62 | assert ( 63 | predicted.shape[1] == self.num_classes 64 | ), "number of predictions does not match size of confusion matrix" 65 | predicted = np.argmax(predicted, 1) 66 | else: 67 | assert (predicted.max() < self.num_classes) and ( 68 | predicted.min() >= 0 69 | ), "predicted values are not between 0 and k-1" 70 | 71 | if np.ndim(target) != 1: 72 | assert ( 73 | target.shape[1] == self.num_classes 74 | ), "Onehot target does not match size of confusion matrix" 75 | assert (target >= 0).all() and ( 76 | target <= 1 77 | ).all(), "in one-hot encoding, target values should be 0 or 1" 78 | assert (target.sum(1) == 1).all(), "multi-label setting is not supported" 79 | target = np.argmax(target, 1) 80 | else: 81 | assert (target.max() < self.num_classes) and ( 82 | target.min() >= 0 83 | ), "target values are not between 0 and k-1" 84 | 85 | # hack for bincounting 2 arrays together 86 | x = predicted + self.num_classes * target 87 | bincount_2d = np.bincount(x.astype(np.int32), minlength=self.num_classes ** 2) 88 | assert bincount_2d.size == self.num_classes ** 2 89 | conf = bincount_2d.reshape((self.num_classes, self.num_classes)) 90 | 91 | self.conf += conf 92 | 93 | def value(self, normalized=False): 94 | """ 95 | Returns: 96 | Confustion matrix of K rows and K columns, where rows corresponds 97 | to ground-truth targets and columns corresponds to predicted 98 | targets. 99 | """ 100 | if normalized: 101 | conf = self.conf.astype(np.float32) 102 | return conf / conf.sum(1).clip(min=1e-12)[:, None] 103 | return self.conf 104 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class IoU: 5 | """Computes the intersection over union (IoU) per class and corresponding 6 | mean (mIoU). 7 | 8 | Intersection over union (IoU) is a common evaluation metric for semantic 9 | segmentation. The predictions are first accumulated in a confusion matrix 10 | and the IoU is computed from it as follows: 11 | 12 | IoU = true_positive / (true_positive + false_positive + false_negative). 13 | 14 | Keyword arguments: 15 | - num_classes (int): number of classes in the classification problem 16 | - normalized (boolean, optional): Determines whether or not the confusion 17 | matrix is normalized or not. Default: False. 18 | - ignore_index (int or iterable, optional): Index of the classes to ignore 19 | when computing the IoU. Can be an int, or any iterable of ints. 20 | 21 | Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter 22 | 23 | """ 24 | 25 | def __init__(self): 26 | super().__init__() 27 | 28 | def value(self, conf_matrix): 29 | """Computes the IoU and mean IoU. 30 | 31 | The mean computation ignores NaN elements of the IoU array. 32 | 33 | Returns: 34 | Tuple: (IoU, mIoU). The first output is the per class IoU, 35 | for K classes it's numpy.ndarray with K elements. The second output, 36 | is the mean IoU. 37 | """ 38 | true_positive = np.diag(conf_matrix) 39 | false_positive = np.sum(conf_matrix, 0) - true_positive 40 | false_negative = np.sum(conf_matrix, 1) - true_positive 41 | 42 | # Just in case we get a division by 0, ignore/hide the error 43 | with np.errstate(divide="ignore", invalid="ignore"): 44 | iou = true_positive / (true_positive + false_positive + false_negative) 45 | 46 | return iou 47 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/model.py: -------------------------------------------------------------------------------- 1 | from MinkowskiEngine import MinkowskiNetwork 2 | 3 | 4 | class Model(MinkowskiNetwork): 5 | """ 6 | Base network for all sparse convnet 7 | 8 | By default, all networks are segmentation networks. 9 | """ 10 | 11 | OUT_PIXEL_DIST = -1 12 | 13 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 14 | super().__init__(D) 15 | self.in_channels = in_channels 16 | self.out_channels = out_channels 17 | self.config = config 18 | 19 | 20 | class HighDimensionalModel(Model): 21 | """ 22 | Base network for all spatio (temporal) chromatic sparse convnet 23 | """ 24 | 25 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 26 | assert D > 4, "Num dimension smaller than 5" 27 | super().__init__(in_channels, out_channels, config, D, **kwargs) 28 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/modules/3detr_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch.nn as nn 3 | from functools import partial 4 | import copy 5 | 6 | 7 | class BatchNormDim1Swap(nn.BatchNorm1d): 8 | """ 9 | Used for nn.Transformer that uses a HW x N x C rep 10 | """ 11 | 12 | def forward(self, x): 13 | """ 14 | x: HW x N x C 15 | permute to N x C x HW 16 | Apply BN on C 17 | permute back 18 | """ 19 | hw, n, c = x.shape 20 | x = x.permute(1, 2, 0) 21 | x = super(BatchNormDim1Swap, self).forward(x) 22 | # x: n x c x hw -> hw x n x c 23 | x = x.permute(2, 0, 1) 24 | return x 25 | 26 | 27 | NORM_DICT = { 28 | "bn": BatchNormDim1Swap, 29 | "bn1d": nn.BatchNorm1d, 30 | "id": nn.Identity, 31 | "ln": nn.LayerNorm, 32 | } 33 | 34 | ACTIVATION_DICT = { 35 | "relu": nn.ReLU, 36 | "gelu": nn.GELU, 37 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 38 | } 39 | 40 | WEIGHT_INIT_DICT = { 41 | "xavier_uniform": nn.init.xavier_uniform_, 42 | } 43 | 44 | 45 | class GenericMLP(nn.Module): 46 | def __init__( 47 | self, 48 | input_dim, 49 | hidden_dims, 50 | output_dim, 51 | norm_fn_name=None, 52 | activation="relu", 53 | use_conv=False, 54 | dropout=None, 55 | hidden_use_bias=False, 56 | output_use_bias=True, 57 | output_use_activation=False, 58 | output_use_norm=False, 59 | weight_init_name=None, 60 | ): 61 | super().__init__() 62 | activation = ACTIVATION_DICT[activation] 63 | norm = None 64 | if norm_fn_name is not None: 65 | norm = NORM_DICT[norm_fn_name] 66 | if norm_fn_name == "ln" and use_conv: 67 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 68 | 69 | if dropout is not None: 70 | if not isinstance(dropout, list): 71 | dropout = [dropout for _ in range(len(hidden_dims))] 72 | 73 | layers = [] 74 | prev_dim = input_dim 75 | for idx, x in enumerate(hidden_dims): 76 | if use_conv: 77 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 78 | else: 79 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 80 | layers.append(layer) 81 | if norm: 82 | layers.append(norm(x)) 83 | layers.append(activation()) 84 | if dropout is not None: 85 | layers.append(nn.Dropout(p=dropout[idx])) 86 | prev_dim = x 87 | if use_conv: 88 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 89 | else: 90 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 91 | layers.append(layer) 92 | 93 | if output_use_norm: 94 | layers.append(norm(output_dim)) 95 | 96 | if output_use_activation: 97 | layers.append(activation()) 98 | 99 | self.layers = nn.Sequential(*layers) 100 | 101 | if weight_init_name is not None: 102 | self.do_weight_init(weight_init_name) 103 | 104 | def do_weight_init(self, weight_init_name): 105 | func = WEIGHT_INIT_DICT[weight_init_name] 106 | for (_, param) in self.named_parameters(): 107 | if param.dim() > 1: # skips batchnorm/layernorm 108 | func(param) 109 | 110 | def forward(self, x): 111 | output = self.layers(x) 112 | return output 113 | 114 | 115 | def get_clones(module, N): 116 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/models/modules/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/modules/helpers_3detr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch.nn as nn 3 | from functools import partial 4 | import copy 5 | 6 | 7 | class BatchNormDim1Swap(nn.BatchNorm1d): 8 | """ 9 | Used for nn.Transformer that uses a HW x N x C rep 10 | """ 11 | 12 | def forward(self, x): 13 | """ 14 | x: HW x N x C 15 | permute to N x C x HW 16 | Apply BN on C 17 | permute back 18 | """ 19 | hw, n, c = x.shape 20 | x = x.permute(1, 2, 0) 21 | x = super(BatchNormDim1Swap, self).forward(x) 22 | # x: n x c x hw -> hw x n x c 23 | x = x.permute(2, 0, 1) 24 | return x 25 | 26 | 27 | NORM_DICT = { 28 | "bn": BatchNormDim1Swap, 29 | "bn1d": nn.BatchNorm1d, 30 | "id": nn.Identity, 31 | "ln": nn.LayerNorm, 32 | } 33 | 34 | ACTIVATION_DICT = { 35 | "relu": nn.ReLU, 36 | "gelu": nn.GELU, 37 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 38 | } 39 | 40 | WEIGHT_INIT_DICT = { 41 | "xavier_uniform": nn.init.xavier_uniform_, 42 | } 43 | 44 | 45 | class GenericMLP(nn.Module): 46 | def __init__( 47 | self, 48 | input_dim, 49 | hidden_dims, 50 | output_dim, 51 | norm_fn_name=None, 52 | activation="relu", 53 | use_conv=False, 54 | dropout=None, 55 | hidden_use_bias=False, 56 | output_use_bias=True, 57 | output_use_activation=False, 58 | output_use_norm=False, 59 | weight_init_name=None, 60 | ): 61 | super().__init__() 62 | activation = ACTIVATION_DICT[activation] 63 | norm = None 64 | if norm_fn_name is not None: 65 | norm = NORM_DICT[norm_fn_name] 66 | if norm_fn_name == "ln" and use_conv: 67 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 68 | 69 | if dropout is not None: 70 | if not isinstance(dropout, list): 71 | dropout = [dropout for _ in range(len(hidden_dims))] 72 | 73 | layers = [] 74 | prev_dim = input_dim 75 | for idx, x in enumerate(hidden_dims): 76 | if use_conv: 77 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 78 | else: 79 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 80 | layers.append(layer) 81 | if norm: 82 | layers.append(norm(x)) 83 | layers.append(activation()) 84 | if dropout is not None: 85 | layers.append(nn.Dropout(p=dropout[idx])) 86 | prev_dim = x 87 | if use_conv: 88 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 89 | else: 90 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 91 | layers.append(layer) 92 | 93 | if output_use_norm: 94 | layers.append(norm(output_dim)) 95 | 96 | if output_use_activation: 97 | layers.append(activation()) 98 | 99 | self.layers = nn.Sequential(*layers) 100 | 101 | if weight_init_name is not None: 102 | self.do_weight_init(weight_init_name) 103 | 104 | def do_weight_init(self, weight_init_name): 105 | func = WEIGHT_INIT_DICT[weight_init_name] 106 | for (_, param) in self.named_parameters(): 107 | if param.dim() > 1: # skips batchnorm/layernorm 108 | func(param) 109 | 110 | def forward(self, x): 111 | output = self.layers(x) 112 | return output 113 | 114 | 115 | def get_clones(module, N): 116 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from MinkowskiEngine import MinkowskiReLU 3 | 4 | from models.modules.common import ConvType, NormType, conv, get_norm 5 | 6 | 7 | class BasicBlockBase(nn.Module): 8 | expansion = 1 9 | NORM_TYPE = NormType.BATCH_NORM 10 | 11 | def __init__( 12 | self, 13 | inplanes, 14 | planes, 15 | stride=1, 16 | dilation=1, 17 | downsample=None, 18 | conv_type=ConvType.HYPERCUBE, 19 | bn_momentum=0.1, 20 | D=3, 21 | ): 22 | super().__init__() 23 | 24 | self.conv1 = conv( 25 | inplanes, 26 | planes, 27 | kernel_size=3, 28 | stride=stride, 29 | dilation=dilation, 30 | conv_type=conv_type, 31 | D=D, 32 | ) 33 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 34 | self.conv2 = conv( 35 | planes, 36 | planes, 37 | kernel_size=3, 38 | stride=1, 39 | dilation=dilation, 40 | bias=False, 41 | conv_type=conv_type, 42 | D=D, 43 | ) 44 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 45 | self.relu = MinkowskiReLU(inplace=True) 46 | self.downsample = downsample 47 | 48 | def forward(self, x): 49 | residual = x 50 | 51 | out = self.conv1(x) 52 | out = self.norm1(out) 53 | out = self.relu(out) 54 | 55 | out = self.conv2(out) 56 | out = self.norm2(out) 57 | 58 | if self.downsample is not None: 59 | residual = self.downsample(x) 60 | 61 | out += residual 62 | out = self.relu(out) 63 | 64 | return out 65 | 66 | 67 | class BasicBlock(BasicBlockBase): 68 | NORM_TYPE = NormType.BATCH_NORM 69 | 70 | 71 | class BasicBlockIN(BasicBlockBase): 72 | NORM_TYPE = NormType.INSTANCE_NORM 73 | 74 | 75 | class BasicBlockINBN(BasicBlockBase): 76 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 77 | 78 | 79 | class BottleneckBase(nn.Module): 80 | expansion = 4 81 | NORM_TYPE = NormType.BATCH_NORM 82 | 83 | def __init__( 84 | self, 85 | inplanes, 86 | planes, 87 | stride=1, 88 | dilation=1, 89 | downsample=None, 90 | conv_type=ConvType.HYPERCUBE, 91 | bn_momentum=0.1, 92 | D=3, 93 | ): 94 | super().__init__() 95 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 96 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 97 | 98 | self.conv2 = conv( 99 | planes, 100 | planes, 101 | kernel_size=3, 102 | stride=stride, 103 | dilation=dilation, 104 | conv_type=conv_type, 105 | D=D, 106 | ) 107 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 108 | 109 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 110 | self.norm3 = get_norm( 111 | self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum 112 | ) 113 | 114 | self.relu = MinkowskiReLU(inplace=True) 115 | self.downsample = downsample 116 | 117 | def forward(self, x): 118 | residual = x 119 | 120 | out = self.conv1(x) 121 | out = self.norm1(out) 122 | out = self.relu(out) 123 | 124 | out = self.conv2(out) 125 | out = self.norm2(out) 126 | out = self.relu(out) 127 | 128 | out = self.conv3(out) 129 | out = self.norm3(out) 130 | 131 | if self.downsample is not None: 132 | residual = self.downsample(x) 133 | 134 | out += residual 135 | out = self.relu(out) 136 | 137 | return out 138 | 139 | 140 | class Bottleneck(BottleneckBase): 141 | NORM_TYPE = NormType.BATCH_NORM 142 | 143 | 144 | class BottleneckIN(BottleneckBase): 145 | NORM_TYPE = NormType.INSTANCE_NORM 146 | 147 | 148 | class BottleneckINBN(BottleneckBase): 149 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 150 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/modules/senet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import MinkowskiEngine as ME 3 | 4 | from mix3d.models.modules.common import ConvType, NormType 5 | from mix3d.models.modules.resnet_block import BasicBlock, Bottleneck 6 | 7 | 8 | class SELayer(nn.Module): 9 | def __init__(self, channel, reduction=16, D=-1): 10 | # Global coords does not require coords_key 11 | super().__init__() 12 | self.fc = nn.Sequential( 13 | ME.MinkowskiLinear(channel, channel // reduction), 14 | ME.MinkowskiReLU(inplace=True), 15 | ME.MinkowskiLinear(channel // reduction, channel), 16 | ME.MinkowskiSigmoid(), 17 | ) 18 | self.pooling = ME.MinkowskiGlobalPooling(dimension=D) 19 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D) 20 | 21 | def forward(self, x): 22 | y = self.pooling(x) 23 | y = self.fc(y) 24 | return self.broadcast_mul(x, y) 25 | 26 | 27 | class SEBasicBlock(BasicBlock): 28 | def __init__( 29 | self, 30 | inplanes, 31 | planes, 32 | stride=1, 33 | dilation=1, 34 | downsample=None, 35 | conv_type=ConvType.HYPERCUBE, 36 | reduction=16, 37 | D=-1, 38 | ): 39 | super().__init__( 40 | inplanes, 41 | planes, 42 | stride=stride, 43 | dilation=dilation, 44 | downsample=downsample, 45 | conv_type=conv_type, 46 | D=D, 47 | ) 48 | self.se = SELayer(planes, reduction=reduction, D=D) 49 | 50 | def forward(self, x): 51 | residual = x 52 | 53 | out = self.conv1(x) 54 | out = self.norm1(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv2(out) 58 | out = self.norm2(out) 59 | out = self.se(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class SEBasicBlockSN(SEBasicBlock): 71 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 72 | 73 | 74 | class SEBasicBlockIN(SEBasicBlock): 75 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 76 | 77 | 78 | class SEBasicBlockLN(SEBasicBlock): 79 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 80 | 81 | 82 | class SEBottleneck(Bottleneck): 83 | def __init__( 84 | self, 85 | inplanes, 86 | planes, 87 | stride=1, 88 | dilation=1, 89 | downsample=None, 90 | conv_type=ConvType.HYPERCUBE, 91 | D=3, 92 | reduction=16, 93 | ): 94 | super().__init__( 95 | inplanes, 96 | planes, 97 | stride=stride, 98 | dilation=dilation, 99 | downsample=downsample, 100 | conv_type=conv_type, 101 | D=D, 102 | ) 103 | self.se = SELayer(planes * self.expansion, reduction=reduction, D=D) 104 | 105 | def forward(self, x): 106 | residual = x 107 | 108 | out = self.conv1(x) 109 | out = self.norm1(out) 110 | out = self.relu(out) 111 | 112 | out = self.conv2(out) 113 | out = self.norm2(out) 114 | out = self.relu(out) 115 | 116 | out = self.conv3(out) 117 | out = self.norm3(out) 118 | out = self.se(out) 119 | 120 | if self.downsample is not None: 121 | residual = self.downsample(x) 122 | 123 | out += residual 124 | out = self.relu(out) 125 | 126 | return out 127 | 128 | 129 | class SEBottleneckSN(SEBottleneck): 130 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 131 | 132 | 133 | class SEBottleneckIN(SEBottleneck): 134 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 135 | 136 | 137 | class SEBottleneckLN(SEBottleneck): 138 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 139 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/models/wrapper.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from torch.nn import Module 4 | from MinkowskiEngine import SparseTensor 5 | 6 | 7 | class Wrapper(Module): 8 | """ 9 | Wrapper for the segmentation networks. 10 | """ 11 | 12 | OUT_PIXEL_DIST = -1 13 | 14 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 15 | super().__init__() 16 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 17 | 18 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 19 | raise NotImplementedError("Must initialize a model and a filter") 20 | 21 | def forward(self, x, coords, colors=None): 22 | soutput = self.model(x) 23 | 24 | # During training, make the network invariant to the filter 25 | if not self.training or random.random() < 0.5: 26 | # Filter requires the model to finish the forward pass 27 | wrapper_coords = self.filter.initialize_coords(self.model, coords, colors) 28 | finput = SparseTensor(soutput.F, wrapper_coords) 29 | soutput = self.filter(finput) 30 | return soutput 31 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/scripts/scannet/scannet_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.95 5 | CURR_TOPK=300 6 | CURR_QUERY=150 7 | 8 | # TRAIN 9 | python main_instance_segmentation.py \ 10 | general.experiment_name="benchmark" \ 11 | general.eval_on_segments=true \ 12 | general.train_on_segments=true \ 13 | data.train_mode=train_validation 14 | 15 | # TEST 16 | python main_instance_segmentation.py \ 17 | general.experiment_name="benchmark_query_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}" \ 18 | general.project_name="scannet_eval" \ 19 | general.checkpoint='checkpoints/scannet/scannet_benchmark.ckpt' \ 20 | general.eval_on_segments=true \ 21 | general.train_on_segments=true \ 22 | general.train_mode=false \ 23 | general.export=true \ 24 | data.test_mode=test \ 25 | model.num_queries=${CURR_QUERY} \ 26 | general.topk_per_image=${CURR_TOPK} \ 27 | general.use_dbscan=true \ 28 | general.dbscan_eps=${CURR_DBSCAN} 29 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/scripts/scannet/scannet_val.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.95 5 | CURR_TOPK=500 6 | CURR_QUERY=150 7 | 8 | # TRAIN 9 | python main_instance_segmentation.py \ 10 | general.experiment_name="validation" \ 11 | general.eval_on_segments=true \ 12 | general.train_on_segments=true 13 | 14 | # TEST 15 | python main_instance_segmentation.py \ 16 | general.experiment_name="validation_query_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}" \ 17 | general.project_name="scannet_eval" \ 18 | general.checkpoint='checkpoints/scannet/scannet_val.ckpt' \ 19 | general.train_mode=false \ 20 | general.eval_on_segments=true \ 21 | general.train_on_segments=true \ 22 | model.num_queries=${CURR_QUERY} \ 23 | general.topk_per_image=${CURR_TOPK} \ 24 | general.use_dbscan=true \ 25 | general.dbscan_eps=${CURR_DBSCAN} 26 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/scripts/scannet200/scannet200_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.95 5 | CURR_TOPK=300 6 | CURR_QUERY=150 7 | CURR_T=0.001 8 | 9 | # TRAIN 10 | python main_instance_segmentation.py \ 11 | general.experiment_name="scannet200_benchmark" \ 12 | general.project_name="scannet200" \ 13 | data/datasets=scannet200 \ 14 | general.num_targets=201 \ 15 | data.num_labels=200 \ 16 | general.eval_on_segments=true \ 17 | general.train_on_segments=true \ 18 | data.train_mode=train_validation 19 | 20 | # TEST 21 | python main_instance_segmentation.py \ 22 | general.experiment_name="scannet200_benchmark_query_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}_export_${CURR_T}" \ 23 | general.project_name="scannet200_eval" \ 24 | general.checkpoint="checkpoints/scannet200/scannet200_benchmark.ckpt" \ 25 | data/datasets=scannet200 \ 26 | general.num_targets=201 \ 27 | data.num_labels=200 \ 28 | general.eval_on_segments=true \ 29 | general.train_on_segments=true \ 30 | general.train_mode=false \ 31 | model.num_queries=${CURR_QUERY} \ 32 | general.topk_per_image=${CURR_TOPK} \ 33 | general.use_dbscan=true \ 34 | general.dbscan_eps=${CURR_DBSCAN} \ 35 | general.export=true \ 36 | data.test_mode=test \ 37 | general.export_threshold=${CURR_T} 38 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/scripts/scannet200/scannet200_val.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.95 5 | CURR_TOPK=750 6 | CURR_QUERY=150 7 | 8 | # TRAIN 9 | python /local/home/efedele/clean/Mask3D/main_instance_segmentation.py \ 10 | general.experiment_name="scannet200_val" \ 11 | general.project_name="scannet200" \ 12 | data/datasets=scannet200 \ 13 | general.num_targets=201 \ 14 | data.num_labels=200 \ 15 | general.eval_on_segments=true \ 16 | general.train_on_segments=true 17 | 18 | # TEST 19 | python /local/home/efedele/clean/Mask3D/main_instance_segmentation.py \ 20 | general.experiment_name="scannet200_val_query_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}" \ 21 | general.project_name="scannet200_eval" \ 22 | general.checkpoint="checkpoints/scannet200/scannet200_val.ckpt" \ 23 | data/datasets=scannet200 \ 24 | general.num_targets=201 \ 25 | data.num_labels=200 \ 26 | general.eval_on_segments=true \ 27 | general.train_on_segments=true \ 28 | general.train_mode=false \ 29 | model.num_queries=${CURR_QUERY} \ 30 | general.topk_per_image=${CURR_TOPK} \ 31 | general.use_dbscan=true \ 32 | general.dbscan_eps=${CURR_DBSCAN} 33 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | #include 5 | 6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 7 | const int nsample); 8 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #ifndef _CUDA_UTILS_H 4 | #define _CUDA_UTILS_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #define TOTAL_THREADS 512 16 | 17 | inline int opt_n_threads(int work_size) { 18 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 19 | 20 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 21 | } 22 | 23 | inline dim3 opt_block_config(int x, int y) { 24 | const int x_threads = opt_n_threads(x); 25 | const int y_threads = 26 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 27 | dim3 block_config(x_threads, y_threads, 1); 28 | 29 | return block_config; 30 | } 31 | 32 | #define CUDA_CHECK_ERRORS() \ 33 | do { \ 34 | cudaError_t err = cudaGetLastError(); \ 35 | if (cudaSuccess != err) { \ 36 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 37 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 38 | __FILE__); \ 39 | exit(-1); \ 40 | } \ 41 | } while (0) 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 10 | at::Tensor weight); 11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 12 | at::Tensor weight, const int m); 13 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 10 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | #define CHECK_CUDA(x) \ 9 | do { \ 10 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 11 | } while (0) 12 | 13 | #define CHECK_CONTIGUOUS(x) \ 14 | do { \ 15 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 16 | } while (0) 17 | 18 | #define CHECK_IS_INT(x) \ 19 | do { \ 20 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 21 | #x " must be an int tensor"); \ 22 | } while (0) 23 | 24 | #define CHECK_IS_FLOAT(x) \ 25 | do { \ 26 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 27 | #x " must be a float tensor"); \ 28 | } while (0) 29 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "utils.h" 6 | 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 8 | int nsample, const float *new_xyz, 9 | const float *xyz, int *idx); 10 | 11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 12 | const int nsample) { 13 | CHECK_CONTIGUOUS(new_xyz); 14 | CHECK_CONTIGUOUS(xyz); 15 | CHECK_IS_FLOAT(new_xyz); 16 | CHECK_IS_FLOAT(xyz); 17 | 18 | if (new_xyz.is_cuda()) { 19 | CHECK_CUDA(xyz); 20 | } 21 | 22 | at::Tensor idx = 23 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 24 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 25 | 26 | if (new_xyz.is_cuda()) { 27 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 28 | radius, nsample, new_xyz.data(), 29 | xyz.data(), idx.data()); 30 | } else { 31 | AT_ASSERT(false, "CPU not supported"); 32 | } 33 | 34 | return idx; 35 | } 36 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 11 | // output: idx(b, m, nsample) 12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 13 | int nsample, 14 | const float *__restrict__ new_xyz, 15 | const float *__restrict__ xyz, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | xyz += batch_index * n * 3; 19 | new_xyz += batch_index * m * 3; 20 | idx += m * nsample * batch_index; 21 | 22 | int index = threadIdx.x; 23 | int stride = blockDim.x; 24 | 25 | float radius2 = radius * radius; 26 | for (int j = index; j < m; j += stride) { 27 | float new_x = new_xyz[j * 3 + 0]; 28 | float new_y = new_xyz[j * 3 + 1]; 29 | float new_z = new_xyz[j * 3 + 2]; 30 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 31 | float x = xyz[k * 3 + 0]; 32 | float y = xyz[k * 3 + 1]; 33 | float z = xyz[k * 3 + 2]; 34 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 35 | (new_z - z) * (new_z - z); 36 | if (d2 < radius2) { 37 | if (cnt == 0) { 38 | for (int l = 0; l < nsample; ++l) { 39 | idx[j * nsample + l] = k; 40 | } 41 | } 42 | idx[j * nsample + cnt] = k; 43 | ++cnt; 44 | } 45 | } 46 | } 47 | } 48 | 49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 50 | int nsample, const float *new_xyz, 51 | const float *xyz, int *idx) { 52 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 53 | query_ball_point_kernel<<>>( 54 | b, n, m, radius, nsample, new_xyz, xyz, idx); 55 | 56 | CUDA_CHECK_ERRORS(); 57 | } 58 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "group_points.h" 6 | #include "interpolate.h" 7 | #include "sampling.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("gather_points", &gather_points); 11 | m.def("gather_points_grad", &gather_points_grad); 12 | m.def("furthest_point_sampling", &furthest_point_sampling); 13 | 14 | m.def("three_nn", &three_nn); 15 | m.def("three_interpolate", &three_interpolate); 16 | m.def("three_interpolate_grad", &three_interpolate_grad); 17 | 18 | m.def("ball_query", &ball_query); 19 | 20 | m.def("group_points", &group_points); 21 | m.def("group_points_grad", &group_points_grad); 22 | } 23 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "group_points.h" 5 | #include "utils.h" 6 | 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 8 | const float *points, const int *idx, 9 | float *out); 10 | 11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 12 | int nsample, const float *grad_out, 13 | const int *idx, float *grad_points); 14 | 15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 16 | CHECK_CONTIGUOUS(points); 17 | CHECK_CONTIGUOUS(idx); 18 | CHECK_IS_FLOAT(points); 19 | CHECK_IS_INT(idx); 20 | 21 | if (points.is_cuda()) { 22 | CHECK_CUDA(idx); 23 | } 24 | 25 | at::Tensor output = 26 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 27 | at::device(points.device()).dtype(at::ScalarType::Float)); 28 | 29 | if (points.is_cuda()) { 30 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 31 | idx.size(1), idx.size(2), points.data(), 32 | idx.data(), output.data()); 33 | } else { 34 | AT_ASSERT(false, "CPU not supported"); 35 | } 36 | 37 | return output; 38 | } 39 | 40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 41 | CHECK_CONTIGUOUS(grad_out); 42 | CHECK_CONTIGUOUS(idx); 43 | CHECK_IS_FLOAT(grad_out); 44 | CHECK_IS_INT(idx); 45 | 46 | if (grad_out.is_cuda()) { 47 | CHECK_CUDA(idx); 48 | } 49 | 50 | at::Tensor output = 51 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 52 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 53 | 54 | if (grad_out.is_cuda()) { 55 | group_points_grad_kernel_wrapper( 56 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 57 | grad_out.data(), idx.data(), output.data()); 58 | } else { 59 | AT_ASSERT(false, "CPU not supported"); 60 | } 61 | 62 | return output; 63 | } 64 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include "cuda_utils.h" 8 | 9 | // input: points(b, c, n) idx(b, npoints, nsample) 10 | // output: out(b, c, npoints, nsample) 11 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 12 | int nsample, 13 | const float *__restrict__ points, 14 | const int *__restrict__ idx, 15 | float *__restrict__ out) { 16 | int batch_index = blockIdx.x; 17 | points += batch_index * n * c; 18 | idx += batch_index * npoints * nsample; 19 | out += batch_index * npoints * nsample * c; 20 | 21 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 22 | const int stride = blockDim.y * blockDim.x; 23 | for (int i = index; i < c * npoints; i += stride) { 24 | const int l = i / npoints; 25 | const int j = i % npoints; 26 | for (int k = 0; k < nsample; ++k) { 27 | int ii = idx[j * nsample + k]; 28 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 29 | } 30 | } 31 | } 32 | 33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 34 | const float *points, const int *idx, 35 | float *out) { 36 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 37 | 38 | group_points_kernel<<>>( 39 | b, c, n, npoints, nsample, points, idx, out); 40 | 41 | CUDA_CHECK_ERRORS(); 42 | } 43 | 44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 45 | // output: grad_points(b, c, n) 46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 47 | int nsample, 48 | const float *__restrict__ grad_out, 49 | const int *__restrict__ idx, 50 | float *__restrict__ grad_points) { 51 | int batch_index = blockIdx.x; 52 | grad_out += batch_index * npoints * nsample * c; 53 | idx += batch_index * npoints * nsample; 54 | grad_points += batch_index * n * c; 55 | 56 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 57 | const int stride = blockDim.y * blockDim.x; 58 | for (int i = index; i < c * npoints; i += stride) { 59 | const int l = i / npoints; 60 | const int j = i % npoints; 61 | for (int k = 0; k < nsample; ++k) { 62 | int ii = idx[j * nsample + k]; 63 | atomicAdd(grad_points + l * n + ii, 64 | grad_out[(l * npoints + j) * nsample + k]); 65 | } 66 | } 67 | } 68 | 69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 70 | int nsample, const float *grad_out, 71 | const int *idx, float *grad_points) { 72 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 73 | 74 | group_points_grad_kernel<<>>( 75 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 76 | 77 | CUDA_CHECK_ERRORS(); 78 | } 79 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "interpolate.h" 4 | #include "utils.h" 5 | 6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 7 | const float *known, float *dist2, int *idx); 8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 9 | const float *points, const int *idx, 10 | const float *weight, float *out); 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 12 | const float *grad_out, 13 | const int *idx, const float *weight, 14 | float *grad_points); 15 | 16 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 17 | CHECK_CONTIGUOUS(unknowns); 18 | CHECK_CONTIGUOUS(knows); 19 | CHECK_IS_FLOAT(unknowns); 20 | CHECK_IS_FLOAT(knows); 21 | 22 | if (unknowns.is_cuda()) { 23 | CHECK_CUDA(knows); 24 | } 25 | 26 | at::Tensor idx = 27 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 28 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 29 | at::Tensor dist2 = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 32 | 33 | if (unknowns.is_cuda()) { 34 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 35 | unknowns.data(), knows.data(), 36 | dist2.data(), idx.data()); 37 | } else { 38 | AT_ASSERT(false, "CPU not supported"); 39 | } 40 | 41 | return {dist2, idx}; 42 | } 43 | 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 45 | at::Tensor weight) { 46 | CHECK_CONTIGUOUS(points); 47 | CHECK_CONTIGUOUS(idx); 48 | CHECK_CONTIGUOUS(weight); 49 | CHECK_IS_FLOAT(points); 50 | CHECK_IS_INT(idx); 51 | CHECK_IS_FLOAT(weight); 52 | 53 | if (points.is_cuda()) { 54 | CHECK_CUDA(idx); 55 | CHECK_CUDA(weight); 56 | } 57 | 58 | at::Tensor output = 59 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 60 | at::device(points.device()).dtype(at::ScalarType::Float)); 61 | 62 | if (points.is_cuda()) { 63 | three_interpolate_kernel_wrapper( 64 | points.size(0), points.size(1), points.size(2), idx.size(1), 65 | points.data(), idx.data(), weight.data(), 66 | output.data()); 67 | } else { 68 | AT_ASSERT(false, "CPU not supported"); 69 | } 70 | 71 | return output; 72 | } 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 74 | at::Tensor weight, const int m) { 75 | CHECK_CONTIGUOUS(grad_out); 76 | CHECK_CONTIGUOUS(idx); 77 | CHECK_CONTIGUOUS(weight); 78 | CHECK_IS_FLOAT(grad_out); 79 | CHECK_IS_INT(idx); 80 | CHECK_IS_FLOAT(weight); 81 | 82 | if (grad_out.is_cuda()) { 83 | CHECK_CUDA(idx); 84 | CHECK_CUDA(weight); 85 | } 86 | 87 | at::Tensor output = 88 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 89 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 90 | 91 | if (grad_out.is_cuda()) { 92 | three_interpolate_grad_kernel_wrapper( 93 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 94 | grad_out.data(), idx.data(), weight.data(), 95 | output.data()); 96 | } else { 97 | AT_ASSERT(false, "CPU not supported"); 98 | } 99 | 100 | return output; 101 | } 102 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "sampling.h" 4 | #include "utils.h" 5 | 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 7 | const float *points, const int *idx, 8 | float *out); 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *grad_out, const int *idx, 11 | float *grad_points); 12 | 13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 14 | const float *dataset, float *temp, 15 | int *idxs); 16 | 17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.is_cuda()) { 32 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_ASSERT(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 43 | const int n) { 44 | CHECK_CONTIGUOUS(grad_out); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_IS_FLOAT(grad_out); 47 | CHECK_IS_INT(idx); 48 | 49 | if (grad_out.is_cuda()) { 50 | CHECK_CUDA(idx); 51 | } 52 | 53 | at::Tensor output = 54 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 55 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 56 | 57 | if (grad_out.is_cuda()) { 58 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 59 | idx.size(1), grad_out.data(), 60 | idx.data(), output.data()); 61 | } else { 62 | AT_ASSERT(false, "CPU not supported"); 63 | } 64 | 65 | return output; 66 | } 67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 68 | CHECK_CONTIGUOUS(points); 69 | CHECK_IS_FLOAT(points); 70 | 71 | at::Tensor output = 72 | torch::zeros({points.size(0), nsamples}, 73 | at::device(points.device()).dtype(at::ScalarType::Int)); 74 | 75 | at::Tensor tmp = 76 | torch::full({points.size(0), points.size(1)}, 1e10, 77 | at::device(points.device()).dtype(at::ScalarType::Float)); 78 | 79 | if (points.is_cuda()) { 80 | furthest_point_sampling_kernel_wrapper( 81 | points.size(0), points.size(1), nsamples, points.data(), 82 | tmp.data(), output.data()); 83 | } else { 84 | AT_ASSERT(false, "CPU not supported"); 85 | } 86 | 87 | return output; 88 | } 89 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | ''' Testing customized ops. ''' 4 | 5 | import torch 6 | from torch.autograd import gradcheck 7 | import numpy as np 8 | 9 | import os 10 | import sys 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(BASE_DIR) 13 | import pointnet2_utils 14 | 15 | def test_interpolation_grad(): 16 | batch_size = 1 17 | feat_dim = 2 18 | m = 4 19 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 20 | 21 | def interpolate_func(inputs): 22 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 23 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 24 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 25 | return interpolated_feats 26 | 27 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 28 | 29 | if __name__=='__main__': 30 | test_interpolation_grad() 31 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/third_party/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | import os.path as osp 10 | 11 | this_dir = osp.dirname(osp.abspath(__file__)) 12 | 13 | _ext_src_root = "_ext_src" 14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 15 | "{}/src/*.cu".format(_ext_src_root) 16 | ) 17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 18 | 19 | setup( 20 | name='pointnet2', 21 | ext_modules=[ 22 | CUDAExtension( 23 | name='pointnet2._ext', 24 | sources=_ext_sources, 25 | extra_compile_args={ 26 | "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 27 | "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 28 | }, 29 | include_dirs=[osp.join(this_dir, _ext_src_root, "include")], 30 | ) 31 | ], 32 | cmdclass={ 33 | 'build_ext': BuildExtension 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/trainer/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMask3D/openmask3d/3bc3fc52693b25668d0e91d55a2ea714544a4749/openmask3d/class_agnostic_mask_computation/utils/__init__.py -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/gradflow_check.py: -------------------------------------------------------------------------------- 1 | """ https://github.com/alwynmathew/gradflow-check """ 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.lines import Line2D 5 | 6 | 7 | def plot_grad_flow(named_parameters): 8 | ave_grads = [] 9 | layers = [] 10 | for n, p in named_parameters: 11 | if (p.requires_grad) and ("bias" not in n): 12 | if p.grad: 13 | layers.append(n) 14 | ave_grads.append(p.grad.abs().mean()) 15 | else: 16 | print(f"{n} - doesn't have gradient computed") 17 | 18 | plt.plot(ave_grads, alpha=0.3, color="b") 19 | plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k") 20 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 21 | plt.xlim(xmin=0, xmax=len(ave_grads)) 22 | plt.xlabel("Layers") 23 | plt.ylabel("average gradient") 24 | plt.title("Gradient flow") 25 | plt.grid(True) 26 | 27 | 28 | def plot_grad_flow_v2(named_parameters): 29 | """Plots the gradients flowing through different layers in the net during training. 30 | Can be used for checking for possible gradient vanishing / exploding problems. 31 | 32 | Usage: Plug this function in Trainer class after loss.backwards() as 33 | "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow""" 34 | ave_grads = [] 35 | max_grads = [] 36 | layers = [] 37 | for n, p in named_parameters: 38 | if (p.requires_grad) and ("bias" not in n): 39 | layers.append(n) 40 | if p.grad: 41 | ave_grads.append(p.grad.abs().mean()) 42 | max_grads.append(p.grad.abs().max()) 43 | else: 44 | print(f"{n} - doesn't have gradient computed") 45 | plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c") 46 | plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b") 47 | plt.hlines(0, 0, len(ave_grads) + 1, lw=2, color="k") 48 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 49 | plt.xlim(left=0, right=len(ave_grads)) 50 | plt.ylim(bottom=-0.001, top=0.02) # zoom in on the lower gradient regions 51 | plt.xlabel("Layers") 52 | plt.ylabel("average gradient") 53 | plt.title("Gradient flow") 54 | plt.grid(True) 55 | plt.legend( 56 | [ 57 | Line2D([0], [0], color="c", lw=4), 58 | Line2D([0], [0], color="b", lw=4), 59 | Line2D([0], [0], color="k", lw=4), 60 | ], 61 | ["max-gradient", "mean-gradient", "zero-gradient"], 62 | ) 63 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/kfold.py: -------------------------------------------------------------------------------- 1 | """ Author: https://github.com/yk-szk/stratified_group_kfold """ 2 | import random 3 | import numpy as np 4 | 5 | 6 | class StratifiedGroupKFold: 7 | """ 8 | Stratified Group K-fold with sklearn.model_selection.KFold compabitility. 9 | 10 | Split dataset into k folds with balanced label distribution (stratified) and non-overlapping group. 11 | 12 | Args: 13 | n_splits (int): # of splits 14 | shuffle (bool): Shuffle 15 | seed (int): Seed value for random number generator 16 | """ 17 | 18 | def __init__(self, n_splits, shuffle=True, random_state=None): 19 | self.n_splits = n_splits 20 | self.shuffle = shuffle 21 | self.seed = random_state 22 | 23 | def split(self, X, labels, groups): 24 | assert len(X) == len(labels) == len(groups), "Invalid input length" 25 | assert ( 26 | len(set(groups)) >= self.n_splits 27 | ), "The number of groups needs to be larger than n_splits" 28 | 29 | def encode(v): 30 | s = set(v) 31 | d = {l: i for i, l in enumerate(s)} 32 | return [d[e] for e in v] 33 | 34 | labels, groups = encode(labels), encode(groups) 35 | num_labels, num_groups = max(labels) + 1, max(groups) + 1 36 | label_counts_per_group = np.zeros((num_groups, num_labels), dtype=int) 37 | global_label_dist = np.bincount(labels) 38 | for label, g in zip(labels, groups): 39 | label_counts_per_group[g][label] += 1 40 | 41 | label_counts_per_fold = np.zeros((self.n_splits, num_labels), dtype=int) 42 | groups_per_fold = [set() for _ in range(self.n_splits)] 43 | 44 | def eval_label_counts_per_fold(y_counts, fold): 45 | fold += y_counts 46 | std_per_label = np.std(label_counts_per_fold, axis=0) / global_label_dist 47 | fold -= y_counts 48 | return np.mean(std_per_label) 49 | 50 | groups_and_label_counts = list(enumerate(label_counts_per_group)) 51 | if self.shuffle: 52 | rng = random.Random(self.seed) 53 | mean_std = np.mean(np.std(label_counts_per_group, axis=1)) 54 | groups_and_label_counts.sort( 55 | key=lambda g_counts: -np.std(g_counts[1]) + rng.gauss(0, mean_std) 56 | ) # add rng.gauss to increase the randomness 57 | else: 58 | groups_and_label_counts.sort(key=lambda g_counts: -np.std(g_counts[1])) 59 | 60 | for g, label_counts in groups_and_label_counts: 61 | evals = [ 62 | eval_label_counts_per_fold(label_counts, label_counts_per_fold[i]) 63 | for i in range(self.n_splits) 64 | ] 65 | best_fold = np.argmin(evals) 66 | label_counts_per_fold[best_fold] += label_counts 67 | groups_per_fold[best_fold].add(g) 68 | 69 | all_groups = set(groups) 70 | for test_groups in groups_per_fold: 71 | train_groups = all_groups - test_groups 72 | 73 | train_indices = [i for i, g in enumerate(groups) if g in train_groups] 74 | test_indices = [i for i, g in enumerate(groups) if g in test_groups] 75 | 76 | yield train_indices, test_indices 77 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/point_cloud_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional, Tuple 3 | 4 | import numpy as np 5 | import open3d 6 | from plyfile import PlyData, PlyElement 7 | 8 | 9 | def load_ply(filepath): 10 | with open(filepath, "rb") as f: 11 | plydata = PlyData.read(f) 12 | data = plydata.elements[0].data 13 | coords = np.array([data["x"], data["y"], data["z"]], dtype=np.float32).T 14 | feats = None 15 | labels = None 16 | if ({"red", "green", "blue"} - set(data.dtype.names)) == set(): 17 | feats = np.array([data["red"], data["green"], data["blue"]], dtype=np.uint8).T 18 | if "label" in data.dtype.names: 19 | labels = np.array(data["label"], dtype=np.uint32) 20 | return coords, feats, labels 21 | 22 | 23 | def load_ply_with_normals(filepath): 24 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 25 | if not mesh.has_vertex_normals(): 26 | mesh.compute_vertex_normals() 27 | vertices = np.asarray(mesh.vertices) 28 | normals = np.asarray(mesh.vertex_normals) 29 | 30 | coords, feats, labels = load_ply(filepath) 31 | assert np.allclose(coords, vertices), "different coordinates" 32 | feats = np.hstack((feats, normals)) 33 | 34 | return coords, feats, labels 35 | 36 | 37 | def load_obj_with_normals(filepath): 38 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 39 | if not mesh.has_vertex_normals(): 40 | mesh.compute_vertex_normals() 41 | coords = np.asarray(mesh.vertices) 42 | normals = np.asarray(mesh.vertex_normals) 43 | colors = np.asarray(mesh.vertex_colors) 44 | feats = np.hstack((colors, normals)) 45 | 46 | return coords, feats 47 | 48 | 49 | def write_point_cloud_in_ply( 50 | filepath: Path, 51 | coords: np.ndarray, 52 | feats: Optional[np.ndarray] = None, 53 | labels: Optional[np.ndarray] = None, 54 | dtypes: Optional[List[Tuple[str, str]]] = [ 55 | ("x", " 2 | #include 3 | #include 4 | #include 5 | #include "aggregation_cuda_kernel.h" 6 | 7 | 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const float *position = position_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 16 | } 17 | 18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 19 | { 20 | const float *input = input_tensor.data_ptr(); 21 | const float *position = position_tensor.data_ptr(); 22 | const float *weight = weight_tensor.data_ptr(); 23 | const int *idx = idx_tensor.data_ptr(); 24 | const float *grad_output = grad_output_tensor.data_ptr(); 25 | float *grad_input = grad_input_tensor.data_ptr(); 26 | float *grad_position = grad_position_tensor.data_ptr(); 27 | float *grad_weight = grad_weight_tensor.data_ptr(); 28 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 29 | } 30 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/attention/attention_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel.h" 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0 = index0_tensor.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0 = index0_tensor.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel_v2.h" 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "grouping_cuda_kernel.h" 6 | 7 | 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 14 | } 15 | 16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | float *grad_input = grad_input_tensor.data_ptr(); 21 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 22 | } 23 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 15 | } 16 | 17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 18 | { 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | const int *idx = idx_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | float *grad_input = grad_input_tensor.data_ptr(); 23 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 24 | } 25 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "knnquery_cuda_kernel.h" 6 | 7 | 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const float *new_xyz = new_xyz_tensor.data_ptr(); 12 | const int *offset = offset_tensor.data_ptr(); 13 | const int *new_offset = new_offset_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | float *dist2 = dist2_tensor.data_ptr(); 16 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 17 | } 18 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | #include "attention/attention_cuda_kernel.h" 11 | #include "rpe/relative_pos_encoding_cuda_kernel.h" 12 | #include "attention_v2/attention_cuda_kernel_v2.h" 13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" 14 | 15 | 16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 17 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 18 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 22 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 23 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 24 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 25 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 26 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 27 | m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); 28 | m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); 29 | m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); 30 | m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); 31 | m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); 32 | m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); 33 | m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); 34 | m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); 35 | m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); 36 | m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); 37 | m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); 38 | m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); 39 | m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); 40 | m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); 41 | m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); 42 | m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); 43 | m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); 44 | m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); 45 | } 46 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "relative_pos_encoding_cuda_kernel.h" 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 8 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *table = table_tensor.data_ptr(); 12 | const int *index = index_tensor.data_ptr(); 13 | const int *rel_idx = rel_idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 16 | } 17 | 18 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 19 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const float *q = q_tensor.data_ptr(); 24 | const int *index = index_tensor.data_ptr(); 25 | const float *table = table_tensor.data_ptr(); 26 | const int *rel_idx = rel_idx_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_table = grad_table_tensor.data_ptr(); 29 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 30 | } 31 | 32 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | const float *table = table_tensor.data_ptr(); 40 | const int *rel_idx = rel_idx_tensor.data_ptr(); 41 | float *output = output_tensor.data_ptr(); 42 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 43 | } 44 | 45 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 46 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 47 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 48 | { 49 | const float *grad_out = grad_out_tensor.data_ptr(); 50 | const int *index0 = index0_tensor.data_ptr(); 51 | const int *index1 = index1_tensor.data_ptr(); 52 | const float *attn = attn_tensor.data_ptr(); 53 | const float *v = v_tensor.data_ptr(); 54 | const float *table = table_tensor.data_ptr(); 55 | const int *rel_idx = rel_idx_tensor.data_ptr(); 56 | float *grad_attn = grad_attn_tensor.data_ptr(); 57 | float *grad_v = grad_v_tensor.data_ptr(); 58 | float *grad_table = grad_table_tensor.data_ptr(); 59 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 60 | } 61 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_V2_CUDA_KERNEL 2 | #define _RPE_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 9 | 10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 12 | 13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output); 21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 22 | 23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output); 24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 25 | 26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output); 27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | float *tmp = tmp_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 16 | } 17 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subtraction_cuda_kernel.h" 6 | 7 | 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input1 = input1_tensor.data_ptr(); 11 | const float *input2 = input2_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 15 | } 16 | 17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 18 | { 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *grad_output = grad_output_tensor.data_ptr(); 21 | float *grad_input1 = grad_input1_tensor.data_ptr(); 22 | float *grad_input2 = grad_input2_tensor.data_ptr(); 23 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 24 | } 25 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/votenet_utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = (abs_error - quadratic) 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | def demo_nn_distance(): 64 | np.random.seed(0) 65 | pc1arr = np.random.random((1,5,3)) 66 | pc2arr = np.random.random((1,6,3)) 67 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 68 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 69 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 70 | print(dist1) 71 | print(idx1) 72 | dist = np.zeros((5,6)) 73 | for i in range(5): 74 | for j in range(6): 75 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 76 | print(dist) 77 | print('-'*30) 78 | print('L1smooth dists:') 79 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 80 | print(dist1) 81 | print(idx1) 82 | dist = np.zeros((5,6)) 83 | for i in range(5): 84 | for j in range(6): 85 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 86 | quad = np.minimum(error, 1.0) 87 | linear = error - quad 88 | loss = 0.5*quad**2 + 1.0*linear 89 | dist[i,j] = np.sum(loss) 90 | print(dist) 91 | 92 | 93 | if __name__ == '__main__': 94 | demo_nn_distance() 95 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/votenet_utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | 15 | class Logger(object): 16 | 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 24 | self.writer.add_summary(summary, step) 25 | 26 | def image_summary(self, tag, images, step): 27 | """Log a list of images.""" 28 | 29 | img_summaries = [] 30 | for i, img in enumerate(images): 31 | # Write the image to a string 32 | try: 33 | s = StringIO() 34 | except: 35 | s = BytesIO() 36 | scipy.misc.toimage(img).save(s, format="png") 37 | 38 | # Create an Image object 39 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 40 | height=img.shape[0], 41 | width=img.shape[1]) 42 | # Create a Summary value 43 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 44 | 45 | # Create and write Summary 46 | summary = tf.Summary(value=img_summaries) 47 | self.writer.add_summary(summary, step) 48 | 49 | def histo_summary(self, tag, values, step, bins=1000): 50 | """Log a histogram of the tensor of values.""" 51 | 52 | # Create a histogram using numpy 53 | counts, bin_edges = np.histogram(values, bins=bins) 54 | 55 | # Fill the fields of the histogram proto 56 | hist = tf.HistogramProto() 57 | hist.min = float(np.min(values)) 58 | hist.max = float(np.max(values)) 59 | hist.num = int(np.prod(values.shape)) 60 | hist.sum = float(np.sum(values)) 61 | hist.sum_squares = float(np.sum(values**2)) 62 | 63 | # Drop the start of the first bin 64 | bin_edges = bin_edges[1:] 65 | 66 | # Add bin edges and counts 67 | for edge in bin_edges: 68 | hist.bucket_limit.append(edge) 69 | for c in counts: 70 | hist.bucket.append(c) 71 | 72 | # Create and write Summary 73 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 74 | self.writer.add_summary(summary, step) 75 | self.writer.flush() 76 | -------------------------------------------------------------------------------- /openmask3d/class_agnostic_mask_computation/utils/votenet_utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 7 | import os 8 | import time 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | import sys 11 | sys.path.append(BASE_DIR) 12 | import tf_logger 13 | 14 | 15 | class Visualizer(): 16 | def __init__(self, opt, name='train'): 17 | # self.opt = opt 18 | #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 19 | #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 20 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 21 | self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') 22 | with open(self.log_name, "a") as log_file: 23 | now = time.strftime("%c") 24 | log_file.write('================ Training Loss (%s) ================\n' % now) 25 | 26 | # |visuals|: dictionary of images to save 27 | def log_images(self, visuals, step): 28 | for label, image_numpy in visuals.items(): 29 | self.logger.image_summary( 30 | label, [image_numpy], step) 31 | 32 | # scalars: dictionary of scalar labels and values 33 | def log_scalars(self, scalars, step): 34 | for label, val in scalars.items(): 35 | self.logger.scalar_summary(label, val, step) 36 | 37 | # scatter plots 38 | def plot_current_points(self, points, disp_offset=10): 39 | pass 40 | 41 | # scalars: same format as |scalars| of plot_current_scalars 42 | def print_current_scalars(self, epoch, i, scalars): 43 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 44 | for k, v in scalars.items(): 45 | message += '%s: %.3f ' % (k, v) 46 | 47 | print(message) 48 | with open(self.log_name, "a") as log_file: 49 | log_file.write('%s\n' % message) 50 | -------------------------------------------------------------------------------- /openmask3d/compute_masks_full_scannet200.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=4 # speeds up MinkowskiEngine 3 | 4 | MASK_MODULE_CKPT_PATH="/home/ayca/ovseg3d/pretrained_models_and_data/pretrained/scannet200_val.ckpt" 5 | SCANNET_DATA_DIR="/media/ayca/Elements/ayca/OpenMask3D/scannet_processed/scannet200" 6 | SCANNET_LABEL_DB_PATH="${SCANNET_DATA_DIR%/}/label_database.yaml" 7 | MASK_SAVE_DIR="/home/ayca/openmask3d_temp_clean_up/mask_save_dir_scannet200_other_dir" 8 | SAVE_VISUALIZATIONS=false #if set to true, saves pyviz3d visualizations 9 | 10 | # TEST 11 | python class_agnostic_mask_computation/get_masks_scannet200.py \ 12 | general.experiment_name="scannet200" \ 13 | general.project_name="scannet200" \ 14 | general.checkpoint=${MASK_MODULE_CKPT_PATH} \ 15 | general.train_mode=false \ 16 | model.num_queries=150 \ 17 | general.use_dbscan=true \ 18 | general.dbscan_eps=0.95 \ 19 | general.save_visualizations=${SAVE_VISUALIZATIONS} \ 20 | data.test_dataset.data_dir=${SCANNET_DATA_DIR} \ 21 | data.validation_dataset.data_dir=${SCANNET_DATA_DIR} \ 22 | data.train_dataset.data_dir=${SCANNET_DATA_DIR} \ 23 | data.test_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 24 | data.validation_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 25 | data.train_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 26 | general.mask_save_dir=${MASK_SAVE_DIR} 27 | -------------------------------------------------------------------------------- /openmask3d/compute_masks_single_scene.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | MASK_MODULE_CKPT_PATH="/home/ayca/ovseg3d/pretrained_models_and_data/pretrained/scannet200_model.ckpt" 5 | SCENE_PLY_PATH="/home/ayca/openmask3d_temp_clean_up/scene0011_00_vh_clean_2.ply" 6 | MASK_SAVE_DIR="/home/ayca/openmask3d_temp_clean_up/mask_save_dir_scannet200_other_dir" 7 | SAVE_VISUALIZATIONS=false #if set to true, saves pyviz3d visualizations 8 | 9 | 10 | # TEST 11 | python class_agnostic_mask_computation/get_masks_single_scene.py \ 12 | general.experiment_name="single_scene" \ 13 | general.checkpoint=${MASK_MODULE_CKPT_PATH} \ 14 | general.train_mode=false \ 15 | data.test_mode=test \ 16 | model.num_queries=150 \ 17 | general.use_dbscan=true \ 18 | general.dbscan_eps=0.95 \ 19 | general.save_visualizations=${SAVE_VISUALIZATIONS} \ 20 | general.scene_path=${SCENE_PLY_PATH} \ 21 | general.mask_save_dir=${MASK_SAVE_DIR} 22 | -------------------------------------------------------------------------------- /openmask3d/configs/openmask3d_inference.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | masks: 3 | masks_path: '../resources/scene_example/scene0011_00_vh_clean_2_masks.npy' #scannet200 4 | camera: 5 | poses_path: '../resources/scene_example/pose/' 6 | intrinsic_path: '../resources/scene_example/intrinsic/intrinsic_color.txt' 7 | intrinsic_resolution: [968, 1296] 8 | depths: 9 | depths_path: '../resources/scene_example/depth/' 10 | depths_ext: '.png' 11 | depth_scale: 1000 12 | images: 13 | images_path: '../resources/scene_example/color/' 14 | images_ext: '.jpg' 15 | point_cloud_path: '../resources/scene_example/scene0011_00_vh_clean_2.ply' 16 | 17 | openmask3d: 18 | top_k: 5 19 | multi_level_expansion_ratio: 0.1 20 | num_of_levels: 3 21 | vis_threshold: 0.2 22 | frequency: 10 23 | num_random_rounds: 10 24 | num_selected_points: 5 25 | 26 | external: 27 | sam_checkpoint: '../resources/sam_vit_h_4b8939.pth' 28 | sam_model_type: 'vit_h' 29 | clip_model: 'ViT-L/14@336px' 30 | 31 | output: 32 | experiment_name: 'experiment' 33 | output_directory: 'output/' 34 | save_crops: False 35 | 36 | gpu: 37 | optimize_gpu_usage: False -------------------------------------------------------------------------------- /openmask3d/configs/openmask3d_scannet200_eval.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | scans_path: '/cluster/project/cvg/weders/data/scannet/scans/' 3 | masks: 4 | masks_path: '/cluster/scratch/efedele/masks' #scannet200 5 | masks_suffix: '*_masks.pt' 6 | camera: 7 | poses_path: 'data/pose/' 8 | intrinsic_path: 'data/intrinsic/intrinsic_color.txt' 9 | intrinsic_resolution: [968, 1296] 10 | depths: 11 | depths_path: 'data_compressed/depth/' 12 | depths_ext: '.png' 13 | depth_scale: 1000 14 | images: 15 | images_path: 'data_compressed/color/' 16 | images_ext: '.jpg' 17 | 18 | openmask3d: 19 | top_k: 5 20 | multi_level_expansion_ratio: 0.1 21 | num_of_levels: 3 22 | vis_threshold: 0.2 23 | frequency: 10 24 | num_random_rounds: 10 25 | num_selected_points: 5 26 | 27 | external: 28 | sam_checkpoint: '/cluster/scratch/efedele/checkpoints/sam_vit_h_4b8939.pth' 29 | sam_model_type: 'vit_h' 30 | clip_model: 'ViT-L/14@336px' 31 | 32 | output: 33 | experiment_name: 'experiment' 34 | output_directory: '/cluster/scratch/efedele/' 35 | save_crops: False 36 | 37 | gpu: 38 | optimize_gpu_usage: False -------------------------------------------------------------------------------- /openmask3d/data/load.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import open3d as o3d 4 | import imageio 5 | import torch 6 | import math 7 | import os 8 | 9 | def get_number_of_images(poses_path): 10 | i = 0 11 | while(os.path.isfile(os.path.join(poses_path, str(i) + '.txt'))): i += 1 12 | return i 13 | 14 | class Camera: 15 | def __init__(self, 16 | intrinsic_path, 17 | intrinsic_resolution, 18 | poses_path, 19 | depths_path, 20 | extension_depth, 21 | depth_scale): 22 | self.intrinsic = np.loadtxt(intrinsic_path)[:3, :3] 23 | self.intrinsic_original_resolution = intrinsic_resolution 24 | self.poses_path = poses_path 25 | self.depths_path = depths_path 26 | self.extension_depth = extension_depth 27 | self.depth_scale = depth_scale 28 | 29 | def get_adapted_intrinsic(self, desired_resolution): 30 | '''Get adjusted camera intrinsics.''' 31 | if self.intrinsic_original_resolution == desired_resolution: 32 | return self.intrinsic 33 | 34 | resize_width = int(math.floor(desired_resolution[1] * float( 35 | self.intrinsic_original_resolution[0]) / float(self.intrinsic_original_resolution[1]))) 36 | 37 | adapted_intrinsic = self.intrinsic.copy() 38 | adapted_intrinsic[0, 0] *= float(resize_width) / float(self.intrinsic_original_resolution[0]) 39 | adapted_intrinsic[1, 1] *= float(desired_resolution[1]) / float(self.intrinsic_original_resolution[1]) 40 | adapted_intrinsic[0, 2] *= float(desired_resolution[0] - 1) / float(self.intrinsic_original_resolution[0] - 1) 41 | adapted_intrinsic[1, 2] *= float(desired_resolution[1] - 1) / float(self.intrinsic_original_resolution[1] - 1) 42 | return adapted_intrinsic 43 | 44 | def load_poses(self, indices): 45 | path = os.path.join(self.poses_path, str(0) + '.txt') 46 | shape = np.linalg.inv(np.loadtxt(path))[:3, :].shape 47 | poses = np.zeros((len(indices), shape[0], shape[1])) 48 | for i, idx in enumerate(indices): 49 | path = os.path.join(self.poses_path, str(idx) + '.txt') 50 | poses[i] = np.linalg.inv(np.loadtxt(path))[:3, :] 51 | return poses 52 | 53 | def load_depth(self, idx, depth_scale): 54 | depth_path = os.path.join(self.depths_path, str(idx) + self.extension_depth) 55 | sensor_depth = imageio.v2.imread(depth_path) / depth_scale 56 | return sensor_depth 57 | 58 | 59 | class Images: 60 | def __init__(self, 61 | images_path, 62 | extension, 63 | indices): 64 | self.images_path = images_path 65 | self.extension = extension 66 | self.indices = indices 67 | self.images = self.load_images(indices) 68 | 69 | def load_images(self, indices): 70 | images = [] 71 | for idx in indices: 72 | img_path = os.path.join(self.images_path, str(idx) + self.extension) 73 | images.append(Image.open(img_path).convert("RGB")) 74 | return images 75 | def get_as_np_list(self): 76 | images = [] 77 | for i in range(len(self.images)): 78 | images.append(np.asarray(self.images[i])) 79 | return images 80 | 81 | class InstanceMasks3D: 82 | def __init__(self, masks_path): 83 | self.masks = torch.load(masks_path) 84 | self.num_masks = self.masks.shape[1] 85 | 86 | 87 | class PointCloud: 88 | def __init__(self, 89 | point_cloud_path): 90 | pcd = o3d.io.read_point_cloud(point_cloud_path) 91 | self.points = np.asarray(pcd.points) 92 | self.num_points = self.points.shape[0] 93 | 94 | def get_homogeneous_coordinates(self): 95 | return np.append(self.points, np.ones((self.num_points,1)), axis = -1) 96 | -------------------------------------------------------------------------------- /openmask3d/evaluation/util.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import csv 3 | try: 4 | import numpy as np 5 | except: 6 | print("Failed to import numpy package.") 7 | sys.exit(-1) 8 | try: 9 | import imageio 10 | except: 11 | print("Please install the module 'imageio' for image processing, e.g.") 12 | print("pip install imageio") 13 | sys.exit(-1) 14 | 15 | # print an error message and quit 16 | def print_error(message, user_fault=False): 17 | sys.stderr.write('ERROR: ' + str(message) + '\n') 18 | if user_fault: 19 | sys.exit(2) 20 | sys.exit(-1) 21 | 22 | 23 | # if string s represents an int 24 | def represents_int(s): 25 | try: 26 | int(s) 27 | return True 28 | except ValueError: 29 | return False 30 | 31 | 32 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 33 | assert os.path.isfile(filename) 34 | mapping = dict() 35 | with open(filename) as csvfile: 36 | reader = csv.DictReader(csvfile, delimiter='\t') 37 | for row in reader: 38 | mapping[row[label_from]] = int(row[label_to]) 39 | # if ints convert 40 | if represents_int(list(mapping.keys())[0]): 41 | mapping = {int(k):v for k,v in mapping.items()} 42 | return mapping 43 | 44 | 45 | # input: scene_types.txt or scene_types_all.txt 46 | def read_scene_types_mapping(filename, remove_spaces=True): 47 | assert os.path.isfile(filename) 48 | mapping = dict() 49 | lines = open(filename).read().splitlines() 50 | lines = [line.split('\t') for line in lines] 51 | if remove_spaces: 52 | mapping = { x[1].strip():int(x[0]) for x in lines } 53 | else: 54 | mapping = { x[1]:int(x[0]) for x in lines } 55 | return mapping 56 | 57 | 58 | # color by label 59 | def visualize_label_image(filename, image): 60 | height = image.shape[0] 61 | width = image.shape[1] 62 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 63 | color_palette = create_color_palette() 64 | for idx, color in enumerate(color_palette): 65 | vis_image[image==idx] = color 66 | imageio.imwrite(filename, vis_image) 67 | 68 | 69 | # color by different instances (mod length of color palette) 70 | def visualize_instance_image(filename, image): 71 | height = image.shape[0] 72 | width = image.shape[1] 73 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 74 | color_palette = create_color_palette() 75 | instances = np.unique(image) 76 | for idx, inst in enumerate(instances): 77 | vis_image[image==inst] = color_palette[inst%len(color_palette)] 78 | imageio.imwrite(filename, vis_image) 79 | 80 | 81 | # color palette for nyu40 labels 82 | def create_color_palette(): 83 | return [ 84 | (0, 0, 0), 85 | (174, 199, 232), # wall 86 | (152, 223, 138), # floor 87 | (31, 119, 180), # cabinet 88 | (255, 187, 120), # bed 89 | (188, 189, 34), # chair 90 | (140, 86, 75), # sofa 91 | (255, 152, 150), # table 92 | (214, 39, 40), # door 93 | (197, 176, 213), # window 94 | (148, 103, 189), # bookshelf 95 | (196, 156, 148), # picture 96 | (23, 190, 207), # counter 97 | (178, 76, 76), 98 | (247, 182, 210), # desk 99 | (66, 188, 102), 100 | (219, 219, 141), # curtain 101 | (140, 57, 197), 102 | (202, 185, 52), 103 | (51, 176, 203), 104 | (200, 54, 131), 105 | (92, 193, 61), 106 | (78, 71, 183), 107 | (172, 114, 82), 108 | (255, 127, 14), # refrigerator 109 | (91, 163, 138), 110 | (153, 98, 156), 111 | (140, 153, 101), 112 | (158, 218, 229), # shower curtain 113 | (100, 125, 154), 114 | (178, 127, 135), 115 | (120, 185, 128), 116 | (146, 111, 194), 117 | (44, 160, 44), # toilet 118 | (112, 128, 144), # sink 119 | (96, 207, 209), 120 | (227, 119, 194), # bathtub 121 | (213, 92, 176), 122 | (94, 106, 211), 123 | (82, 84, 163), # otherfurn 124 | (100, 85, 144) 125 | ] 126 | -------------------------------------------------------------------------------- /openmask3d/mask_features_computation/utils.py: -------------------------------------------------------------------------------- 1 | from segment_anything import sam_model_registry, SamPredictor 2 | import numpy as np 3 | import torch 4 | 5 | def initialize_sam_model(device, sam_model_type, sam_checkpoint): 6 | sam = sam_model_registry[sam_model_type](checkpoint=sam_checkpoint) 7 | sam.to(device) 8 | predictor_sam = SamPredictor(sam) 9 | return predictor_sam 10 | 11 | def mask2box(mask: torch.Tensor): 12 | row = torch.nonzero(mask.sum(axis=0))[:, 0] 13 | if len(row) == 0: 14 | return None 15 | x1 = row.min().item() 16 | x2 = row.max().item() 17 | col = np.nonzero(mask.sum(axis=1))[:, 0] 18 | y1 = col.min().item() 19 | y2 = col.max().item() 20 | return x1, y1, x2 + 1, y2 + 1 21 | 22 | def mask2box_multi_level(mask: torch.Tensor, level, expansion_ratio): 23 | x1, y1, x2 , y2 = mask2box(mask) 24 | if level == 0: 25 | return x1, y1, x2 , y2 26 | shape = mask.shape 27 | x_exp = int(abs(x2- x1)*expansion_ratio) * level 28 | y_exp = int(abs(y2-y1)*expansion_ratio) * level 29 | return max(0, x1 - x_exp), max(0, y1 - y_exp), min(shape[1], x2 + x_exp), min(shape[0], y2 + y_exp) 30 | 31 | def run_sam(image_size, num_random_rounds, num_selected_points, point_coords, predictor_sam): 32 | best_score = 0 33 | best_mask = np.zeros_like(image_size, dtype=bool) 34 | 35 | point_coords_new = np.zeros_like(point_coords) 36 | point_coords_new[:,0] = point_coords[:,1] 37 | point_coords_new[:,1] = point_coords[:,0] 38 | 39 | # Get only a random subsample of them for num_random_rounds times and choose the mask with highest confidence score 40 | for i in range(num_random_rounds): 41 | np.random.shuffle(point_coords_new) 42 | masks, scores, logits = predictor_sam.predict( 43 | point_coords=point_coords_new[:num_selected_points], 44 | point_labels=np.ones(point_coords_new[:num_selected_points].shape[0]), 45 | multimask_output=False, 46 | ) 47 | if scores[0] > best_score: 48 | best_score = scores[0] 49 | best_mask = masks[0] 50 | 51 | return best_mask -------------------------------------------------------------------------------- /openmask3d/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from tempfile import NamedTemporaryFile 4 | import numpy as np 5 | import torch 6 | import glob 7 | from datetime import date 8 | 9 | def get_free_gpu(min_mem=20000): 10 | torch.cuda.empty_cache() 11 | try: 12 | with NamedTemporaryFile() as f: 13 | os.system(f"nvidia-smi -q -d Memory | grep -A5 GPU | grep Free > {f.name}") 14 | memory_available = [int(x.split()[2]) for x in open(f.name, 'r').readlines()] 15 | if max(memory_available) < min_mem: 16 | warnings.warn("Not enough memory on GPU, using CPU") 17 | return torch.device("cpu") 18 | return torch.device("cuda", np.argmax(memory_available)) 19 | except: 20 | warnings.warn("Could not get free GPU, using CPU") 21 | return torch.device("cpu") 22 | 23 | def create_out_folder(experiment_name: str, 24 | output_path: str = "outputs"): 25 | date_str = date.today().strftime("%Y-%m-%d-%H:%M:%S") 26 | folder_name = date_str + '-' + experiment_name 27 | out_folder = os.path.join(output_path, folder_name) 28 | os.makedirs(out_folder, exist_ok=True) 29 | return out_folder -------------------------------------------------------------------------------- /openmask3d/visualization/constants.py: -------------------------------------------------------------------------------- 1 | 2 | INSTANCE_COLORS = { 3 | 0: (165.0, 80.0, 115.0), 4 | 1: (254., 97., 0.), #orange 5 | 2: (120., 94., 240.), #purple 6 | 3: (100., 143., 255.), #blue 7 | 4: (220., 38., 127.), #pink 8 | 5: (255., 176., 0.), #yellow 9 | 6: (100., 143., 255.), 10 | 7: (160.0, 50.0, 50.0), 11 | 8: (129.0, 0.0, 50.0), 12 | 9: (255., 176., 0.), 13 | 10: (192.0, 100.0, 119.0), 14 | 11: (149.0, 192.0, 228.0), 15 | 12: (14.0, 0.0, 120.0), 16 | 13: (90., 64., 210.), 17 | 14: (152.0, 200.0, 156.0), 18 | 15: (129.0, 103.0, 106.0), 19 | 16: (100.0, 160.0, 100.0), # 20 | 17: (70.0, 70.0, 140.0), 21 | 18: (160.0, 20.0, 60.0), 22 | 19: (20., 130., 20.), 23 | 20: (140.0, 30.0, 60.0), 24 | 21: (20.0, 20.0, 120.0), 25 | 22: (243.0, 115.0, 68.0), 26 | 23: (120.0, 162.0, 227.0), 27 | 24: (100.0, 78.0, 142.0), 28 | 25: (152.0, 95.0, 163.0), 29 | 26: (160.0, 20.0, 60.0), 30 | 27: (100.0, 143.0, 255.0), 31 | 28: (255., 204., 153.), 32 | 29: (50., 100., 0.), 33 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "openmask3d" 7 | description = "OpenMask3D: Open-Vocabulary 3D Instance Segmentation " 8 | authors = [ 9 | { name = "Elisabetta Fedele" }, 10 | { name = "Ayça Takmaz" }, 11 | { name = "Robert W. Sumner" }, 12 | { name = "Marc Pollefeys" }, 13 | { name = "Federico Tombari" }, 14 | { name = "Francis Engelmann" } 15 | ] 16 | readme = "README.md" 17 | urls = {github = "https://github.com/OpenMask3D/openmask3d"} 18 | dynamic = ["version"] 19 | 20 | [tool.setuptools] 21 | packages = ["openmask3d"] 22 | 23 | [tool.setuptools.dynamic] 24 | version = {attr = "openmask3d.__version__"} -------------------------------------------------------------------------------- /run_openmask3d_scannet200_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | set -e 4 | 5 | # OPENMASK3D SCANNET200 EVALUATION SCRIPT 6 | # This script performs the following in order to evaluate OpenMask3D predictions on the ScanNet200 validation set 7 | # 1. Compute class agnostic masks and save them 8 | # 2. Compute mask features for each mask and save them 9 | # 3. Evaluate for closed-set 3D semantic instance segmentation 10 | 11 | # -------- 12 | # NOTE: SET THESE PARAMETERS! 13 | SCANS_PATH="/PATH/TO/SCANNET/SCANS" 14 | SCANNET_PROCESSED_DIR="/PATH/TO/scannet_processed/scannet200" 15 | # model ckpt paths 16 | MASK_MODULE_CKPT_PATH="$(pwd)/resources/scannet200_val.ckpt" 17 | SAM_CKPT_PATH="$(pwd)/resources/sam_vit_h_4b8939.pth" 18 | # output directories to save masks and mask features 19 | EXPERIMENT_NAME="scannet200" 20 | OUTPUT_DIRECTORY="$(pwd)/output" 21 | TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S") 22 | OUTPUT_FOLDER_DIRECTORY="${OUTPUT_DIRECTORY}/${TIMESTAMP}-${EXPERIMENT_NAME}" 23 | MASK_SAVE_DIR="${OUTPUT_FOLDER_DIRECTORY}/masks" 24 | MASK_FEATURE_SAVE_DIR="${OUTPUT_FOLDER_DIRECTORY}/mask_features" 25 | SAVE_VISUALIZATIONS=false #if set to true, saves pyviz3d visualizations 26 | 27 | # Paremeters below are AUTOMATICALLY set based on the parameters above: 28 | SCANNET_LABEL_DB_PATH="${SCANNET_PROCESSED_DIR%/}/label_database.yaml" 29 | SCANNET_INSTANCE_GT_DIR="${SCANNET_PROCESSED_DIR%/}/instance_gt/validation" 30 | # gpu optimization 31 | OPTIMIZE_GPU_USAGE=false 32 | 33 | cd openmask3d 34 | 35 | # 1.Compute class agnostic masks and save them 36 | python class_agnostic_mask_computation/get_masks_scannet200.py \ 37 | general.experiment_name=${EXPERIMENT_NAME} \ 38 | general.project_name="scannet200" \ 39 | general.checkpoint=${MASK_MODULE_CKPT_PATH} \ 40 | general.train_mode=false \ 41 | model.num_queries=150 \ 42 | general.use_dbscan=true \ 43 | general.dbscan_eps=0.95 \ 44 | general.save_visualizations=${SAVE_VISUALIZATIONS} \ 45 | data.test_dataset.data_dir=${SCANNET_PROCESSED_DIR} \ 46 | data.validation_dataset.data_dir=${SCANNET_PROCESSED_DIR} \ 47 | data.train_dataset.data_dir=${SCANNET_PROCESSED_DIR} \ 48 | data.train_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 49 | data.validation_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 50 | data.test_dataset.label_db_filepath=${SCANNET_LABEL_DB_PATH} \ 51 | general.mask_save_dir=${MASK_SAVE_DIR} \ 52 | hydra.run.dir="${OUTPUT_FOLDER_DIRECTORY}/hydra_outputs/class_agnostic_mask_computation" 53 | echo "[INFO] Mask computation done!" 54 | # get the path of the saved masks 55 | echo "[INFO] Masks saved to ${MASK_SAVE_DIR}." 56 | 57 | # 2. Compute mask features 58 | echo "[INFO] Computing mask features..." 59 | python compute_features_scannet200.py \ 60 | data.scans_path=${SCANS_PATH} \ 61 | data.masks.masks_path=${MASK_SAVE_DIR} \ 62 | output.output_directory=${MASK_FEATURE_SAVE_DIR} \ 63 | output.experiment_name=${EXPERIMENT_NAME} \ 64 | external.sam_checkpoint=${SAM_CKPT_PATH} \ 65 | gpu.optimize_gpu_usage=${OPTIMIZE_GPU_USAGE} \ 66 | hydra.run.dir="${OUTPUT_FOLDER_DIRECTORY}/hydra_outputs/mask_features_computation" 67 | echo "[INFO] Feature computation done!" 68 | 69 | # 3. Evaluate for closed-set 3D semantic instance segmentation 70 | python evaluation/run_eval_close_vocab_inst_seg.py \ 71 | --gt_dir=${SCANNET_INSTANCE_GT_DIR} \ 72 | --mask_pred_dir=${MASK_SAVE_DIR} \ 73 | --mask_features_dir=${MASK_FEATURE_SAVE_DIR} \ 74 | -------------------------------------------------------------------------------- /run_openmask3d_single_scene.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | set -e 4 | 5 | # RUN OPENMASK3D FOR A SINGLE SCENE 6 | # This script performs the following: 7 | # 1. Compute class agnostic masks and save them 8 | # 2. Compute mask features for each mask and save them 9 | 10 | # -------- 11 | # NOTE: SET THESE PARAMETERS BASED ON YOUR SCENE! 12 | # data paths 13 | SCENE_DIR="$(pwd)/resources/scene_example" 14 | SCENE_POSE_DIR="${SCENE_DIR}/pose" 15 | SCENE_INTRINSIC_PATH="${SCENE_DIR}/intrinsic/intrinsic_color.txt" 16 | SCENE_INTRINSIC_RESOLUTION="[968,1296]" # change if your intrinsics are based on another resolution 17 | SCENE_PLY_PATH="${SCENE_DIR}/scene_example.ply" 18 | SCENE_COLOR_IMG_DIR="${SCENE_DIR}/color" 19 | SCENE_DEPTH_IMG_DIR="${SCENE_DIR}/depth" 20 | IMG_EXTENSION=".jpg" 21 | DEPTH_EXTENSION=".png" 22 | DEPTH_SCALE=1000 23 | # model ckpt paths 24 | MASK_MODULE_CKPT_PATH="$(pwd)/resources/scannet200_model.ckpt" 25 | SAM_CKPT_PATH="$(pwd)/resources/sam_vit_h_4b8939.pth" 26 | # output directories to save masks and mask features 27 | EXPERIMENT_NAME="experiment" 28 | OUTPUT_DIRECTORY="$(pwd)/output" 29 | TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S") 30 | OUTPUT_FOLDER_DIRECTORY="${OUTPUT_DIRECTORY}/${TIMESTAMP}-${EXPERIMENT_NAME}" 31 | SAVE_VISUALIZATIONS=false #if set to true, saves pyviz3d visualizations 32 | SAVE_CROPS=false 33 | # gpu optimization 34 | OPTIMIZE_GPU_USAGE=false 35 | 36 | cd openmask3d 37 | 38 | # 1. Compute class agnostic masks and save them 39 | echo "[INFO] Extracting class agnostic masks..." 40 | python class_agnostic_mask_computation/get_masks_single_scene.py \ 41 | general.experiment_name=${EXPERIMENT_NAME} \ 42 | general.checkpoint=${MASK_MODULE_CKPT_PATH} \ 43 | general.train_mode=false \ 44 | data.test_mode=test \ 45 | model.num_queries=120 \ 46 | general.use_dbscan=true \ 47 | general.dbscan_eps=0.95 \ 48 | general.save_visualizations=${SAVE_VISUALIZATIONS} \ 49 | general.scene_path=${SCENE_PLY_PATH} \ 50 | general.mask_save_dir="${OUTPUT_FOLDER_DIRECTORY}" \ 51 | hydra.run.dir="${OUTPUT_FOLDER_DIRECTORY}/hydra_outputs/class_agnostic_mask_computation" 52 | echo "[INFO] Mask computation done!" 53 | 54 | # get the path of the saved masks 55 | MASK_FILE_BASE=$(echo $SCENE_PLY_PATH | sed 's:.*/::') 56 | MASK_FILE_NAME=${MASK_FILE_BASE/.ply/_masks.pt} 57 | SCENE_MASK_PATH="${OUTPUT_FOLDER_DIRECTORY}/${MASK_FILE_NAME}" 58 | echo "[INFO] Masks saved to ${SCENE_MASK_PATH}." 59 | 60 | # 2. Compute mask features for each mask and save them 61 | echo "[INFO] Computing mask features..." 62 | 63 | python compute_features_single_scene.py \ 64 | data.masks.masks_path=${SCENE_MASK_PATH} \ 65 | data.camera.poses_path=${SCENE_POSE_DIR} \ 66 | data.camera.intrinsic_path=${SCENE_INTRINSIC_PATH} \ 67 | data.camera.intrinsic_resolution=${SCENE_INTRINSIC_RESOLUTION} \ 68 | data.depths.depths_path=${SCENE_DEPTH_IMG_DIR} \ 69 | data.depths.depth_scale=${DEPTH_SCALE} \ 70 | data.depths.depths_ext=${DEPTH_EXTENSION} \ 71 | data.images.images_path=${SCENE_COLOR_IMG_DIR} \ 72 | data.images.images_ext=${IMG_EXTENSION} \ 73 | data.point_cloud_path=${SCENE_PLY_PATH} \ 74 | output.output_directory=${OUTPUT_FOLDER_DIRECTORY} \ 75 | output.save_crops=${SAVE_CROPS} \ 76 | hydra.run.dir="${OUTPUT_FOLDER_DIRECTORY}/hydra_outputs/mask_features_computation" \ 77 | external.sam_checkpoint=${SAM_CKPT_PATH} \ 78 | gpu.optimize_gpu_usage=${OPTIMIZE_GPU_USAGE} 79 | #echo "[INFO] Feature computation done!" 80 | --------------------------------------------------------------------------------