├── .gitignore ├── .hydra ├── config.yaml ├── hydra.yaml └── overrides.yaml ├── LICENSE ├── README.md ├── automatic_mask_generator.py ├── benchmark ├── __init__.py ├── evaluate_semantic_instance.py ├── util.py └── util_3d.py ├── conf ├── __init__.py ├── augmentation │ ├── albumentations_aug.yaml │ └── volumentations_aug.yaml ├── callbacks │ └── callbacks_instance_segmentation.yaml ├── config_base_instance_segmentation.yaml ├── config_base_instance_segmentation_generate_mask_trainset.yaml ├── config_base_instance_segmentation_generate_mask_valset.yaml ├── config_base_instance_segmentation_stage1.yaml ├── config_base_instance_segmentation_stage2.yaml ├── data │ ├── collation_functions │ │ ├── voxelize_collate.yaml │ │ ├── voxelize_collate_merge.yaml │ │ └── voxelize_collate_stage2.yaml │ ├── data_loaders │ │ ├── simple_loader.yaml │ │ └── simple_loader_save_memory.yaml │ ├── datasets │ │ ├── scannet_generate_mask_trainset.yaml │ │ ├── scannet_generate_mask_valset.yaml │ │ ├── scannet_stage1.yaml │ │ ├── scannet_stage2.yaml │ │ └── scannetpp.yaml │ └── indoor.yaml ├── logging │ ├── base.yaml │ ├── full.yaml │ ├── minimal.yaml │ └── offline.yaml ├── loss │ ├── cross_entropy.yaml │ └── set_criterion.yaml ├── matcher │ └── hungarian_matcher.yaml ├── metrics │ └── miou.yaml ├── model │ ├── mask3d.yaml │ └── mask3d_no_aux.yaml ├── optimizer │ ├── adamw.yaml │ └── adamw_lower.yaml ├── scheduler │ ├── exponentiallr.yaml │ ├── lambdalr.yaml │ └── onecyclelr.yaml └── trainer │ ├── trainer.yaml │ └── trainer_stage2.yaml ├── data └── scannet_info │ ├── intrinsics.txt │ ├── scannet_train.txt │ └── scannet_val.txt ├── datasets ├── __init__.py ├── preprocessing │ ├── __init__.py │ ├── base_preprocessing.py │ ├── scannet_preprocessing.py │ └── scannetpp_preprocessing.py ├── random_cuboid.py ├── scannet200 │ ├── __init__.py │ ├── scannet200_constants.py │ └── scannet200_splits.py ├── scannet_generate_mask.py ├── scannet_stage1.py ├── scannet_stage2.py ├── scannetpp.py ├── utils.py └── utils_stage2.py ├── demo.py ├── demo_utils.py ├── docs └── teaser.jpeg ├── main_instance_segmentation.py ├── main_instance_segmentation_generate_mask_trainset.py ├── main_instance_segmentation_generate_mask_valset.py ├── main_instance_segmentation_stage1.py ├── main_instance_segmentation_stage2.py ├── models ├── __init__.py ├── criterion.py ├── mask3d.py ├── mask3d_no_aux.py ├── matcher.py ├── metrics │ ├── __init__.py │ ├── confusionmatrix.py │ └── metrics.py ├── misc.py ├── model.py ├── modules │ ├── 3detr_helpers.py │ ├── __init__.py │ ├── common.py │ ├── helpers_3detr.py │ ├── resnet_block.py │ ├── resnet_block.py.tmp │ └── senet_block.py ├── position_embedding.py ├── res16unet.py ├── resnet.py ├── resnet.py.tmp ├── resunet.py └── wrapper.py ├── process.py ├── scripts ├── eval.sh ├── generate_mask_trainset.sh ├── generate_mask_valset.sh ├── run_demo.sh ├── train_stage1.sh └── train_stage2.sh ├── third_party ├── Segmentator │ ├── .gitignore │ ├── CMakeLists.txt │ ├── Makefile │ ├── README.md │ ├── process.sh │ ├── segmentator.cpp │ ├── tiny_obj_loader.h │ ├── tinyply.cpp │ └── tinyply.h └── pointnet2 │ ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── cuda_utils.h │ │ ├── group_points.h │ │ ├── interpolate.h │ │ ├── sampling.h │ │ └── utils.h │ └── src │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── bindings.cpp │ │ ├── group_points.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── sampling.cpp │ │ └── sampling_gpu.cu │ ├── pointnet2_modules.py │ ├── pointnet2_test.py │ ├── pointnet2_utils.py │ ├── pytorch_utils.py │ └── setup.py ├── trainer ├── __init__.py ├── trainer.py ├── trainer_generate_mask.py ├── trainer_stage1.py └── trainer_stage2.py └── utils ├── __init__.py ├── gradflow_check.py ├── kfold.py ├── pc_visualizations.py ├── point_cloud_utils.py ├── point_cloud_utils_scannetpp.py ├── pointops2 ├── __init__.py ├── functions │ ├── __init__.py │ ├── pointops.py │ ├── pointops2.py │ ├── pointops_ablation.py │ ├── test_attention_op_step1.py │ ├── test_attention_op_step1_v2.py │ ├── test_attention_op_step2.py │ ├── test_relative_pos_encoding_op_step1.py │ ├── test_relative_pos_encoding_op_step1_v2.py │ ├── test_relative_pos_encoding_op_step1_v3.py │ ├── test_relative_pos_encoding_op_step2.py │ └── test_relative_pos_encoding_op_step2_v2.py ├── setup.py └── src │ ├── __init__.py │ ├── aggregation │ ├── aggregation_cuda.cpp │ ├── aggregation_cuda_kernel.cu │ └── aggregation_cuda_kernel.h │ ├── attention │ ├── attention_cuda.cpp │ ├── attention_cuda_kernel.cu │ └── attention_cuda_kernel.h │ ├── attention_v2 │ ├── attention_cuda_kernel_v2.cu │ ├── attention_cuda_kernel_v2.h │ └── attention_cuda_v2.cpp │ ├── cuda_utils.h │ ├── grouping │ ├── grouping_cuda.cpp │ ├── grouping_cuda_kernel.cu │ └── grouping_cuda_kernel.h │ ├── interpolation │ ├── interpolation_cuda.cpp │ ├── interpolation_cuda_kernel.cu │ └── interpolation_cuda_kernel.h │ ├── knnquery │ ├── knnquery_cuda.cpp │ ├── knnquery_cuda_kernel.cu │ └── knnquery_cuda_kernel.h │ ├── pointops_api.cpp │ ├── rpe │ ├── relative_pos_encoding_cuda.cpp │ ├── relative_pos_encoding_cuda_kernel.cu │ └── relative_pos_encoding_cuda_kernel.h │ ├── rpe_v2 │ ├── relative_pos_encoding_cuda_kernel_v2.cu │ ├── relative_pos_encoding_cuda_kernel_v2.h │ └── relative_pos_encoding_cuda_v2.cpp │ ├── sampling │ ├── sampling_cuda.cpp │ ├── sampling_cuda_kernel.cu │ └── sampling_cuda_kernel.h │ └── subtraction │ ├── subtraction_cuda.cpp │ ├── subtraction_cuda_kernel.cu │ └── subtraction_cuda_kernel.h ├── utils.py └── votenet_utils ├── box_util.py ├── eval_det.py ├── metric_util.py ├── nms.py ├── nn_distance.py ├── pc_util.py ├── tf_logger.py └── tf_visualizer.py /.gitignore: -------------------------------------------------------------------------------- 1 | /saved 2 | /logs 3 | # /data 4 | /eval_output 5 | third_party/MinkowskiEngine 6 | third_party/ScanNet 7 | third_party/segment-anything 8 | demo_test/scene1 9 | demo_test/scene2 10 | data/processed 11 | checkpoints/segment3d.ckpt 12 | *.out 13 | 14 | .vscode/ 15 | 16 | 17 | 18 | 19 | 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | share/python-wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .nox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | *.py,cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | cover/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | .pybuilder/ 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | # For a library or package, you might want to ignore these files since the code is 106 | # intended to run in multiple environments; otherwise, check them in: 107 | # .python-version 108 | 109 | # pipenv 110 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 111 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 112 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 113 | # install all needed dependencies. 114 | #Pipfile.lock 115 | 116 | # poetry 117 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 118 | # This is especially recommended for binary packages to ensure reproducibility, and is more 119 | # commonly ignored for libraries. 120 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 121 | #poetry.lock 122 | 123 | # pdm 124 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 125 | #pdm.lock 126 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 127 | # in version control. 128 | # https://pdm.fming.dev/#use-with-ide 129 | .pdm.toml 130 | 131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 132 | __pypackages__/ 133 | 134 | # Celery stuff 135 | celerybeat-schedule 136 | celerybeat.pid 137 | 138 | # SageMath parsed files 139 | *.sage.py 140 | 141 | # Environments 142 | .env 143 | .venv 144 | env/ 145 | venv/ 146 | ENV/ 147 | env.bak/ 148 | venv.bak/ 149 | 150 | # Spyder project settings 151 | .spyderproject 152 | .spyproject 153 | 154 | # Rope project settings 155 | .ropeproject 156 | 157 | # mkdocs documentation 158 | /site 159 | 160 | # mypy 161 | .mypy_cache/ 162 | .dmypy.json 163 | dmypy.json 164 | 165 | # Pyre type checker 166 | .pyre/ 167 | 168 | # pytype static type analyzer 169 | .pytype/ 170 | 171 | # Cython debug symbols 172 | cython_debug/ 173 | 174 | # PyCharm 175 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 176 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 177 | # and can be added to the global gitignore or merged into this file. For a more nuclear 178 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 179 | .idea/ -------------------------------------------------------------------------------- /.hydra/overrides.yaml: -------------------------------------------------------------------------------- 1 | - general.experiment_name=train_stage2_20240914_154910 2 | - general.project_name=scannet 3 | - optimizer.lr=0.0002 4 | - data.batch_size=2 5 | - data.num_workers=2 6 | - trainer.max_epochs=50 7 | - trainer.log_every_n_steps=5 8 | - trainer.check_val_every_n_epoch=5 9 | - general.train_mode=true 10 | - general.eval_on_segments=false 11 | - general.train_on_segments=false 12 | - model.num_queries=150 13 | - matcher.cost_class=0.0 14 | - general.topk_per_image=-1 15 | - general.use_dbscan=false 16 | - general.gpus=4 17 | - general.save_visualizations=False 18 | - general.checkpoint=/cluster/nvme6/hr/Mask3D/saved/scannet_sam_20240227_041848/last.ckpt 19 | - data.use_masks_th=150 20 | - data.use_masks_score=0.6 21 | -------------------------------------------------------------------------------- /benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/benchmark/__init__.py -------------------------------------------------------------------------------- /benchmark/util.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import csv 3 | 4 | try: 5 | import numpy as np 6 | except: 7 | print("Failed to import numpy package.") 8 | sys.exit(-1) 9 | try: 10 | import imageio 11 | except: 12 | print("Please install the module 'imageio' for image processing, e.g.") 13 | print("pip install imageio") 14 | sys.exit(-1) 15 | 16 | # print an error message and quit 17 | def print_error(message, user_fault=False): 18 | sys.stderr.write("ERROR: " + str(message) + "\n") 19 | if user_fault: 20 | sys.exit(2) 21 | sys.exit(-1) 22 | 23 | 24 | # if string s represents an int 25 | def represents_int(s): 26 | try: 27 | int(s) 28 | return True 29 | except ValueError: 30 | return False 31 | 32 | 33 | def read_label_mapping( 34 | filename, label_from="raw_category", label_to="nyu40id" 35 | ): 36 | assert os.path.isfile(filename) 37 | mapping = dict() 38 | with open(filename) as csvfile: 39 | reader = csv.DictReader(csvfile, delimiter="\t") 40 | for row in reader: 41 | mapping[row[label_from]] = int(row[label_to]) 42 | # if ints convert 43 | if represents_int(list(mapping.keys())[0]): 44 | mapping = {int(k): v for k, v in mapping.items()} 45 | return mapping 46 | 47 | 48 | # input: scene_types.txt or scene_types_all.txt 49 | def read_scene_types_mapping(filename, remove_spaces=True): 50 | assert os.path.isfile(filename) 51 | mapping = dict() 52 | lines = open(filename).read().splitlines() 53 | lines = [line.split("\t") for line in lines] 54 | if remove_spaces: 55 | mapping = {x[1].strip(): int(x[0]) for x in lines} 56 | else: 57 | mapping = {x[1]: int(x[0]) for x in lines} 58 | return mapping 59 | 60 | 61 | # color by label 62 | def visualize_label_image(filename, image): 63 | height = image.shape[0] 64 | width = image.shape[1] 65 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 66 | color_palette = create_color_palette() 67 | for idx, color in enumerate(color_palette): 68 | vis_image[image == idx] = color 69 | imageio.imwrite(filename, vis_image) 70 | 71 | 72 | # color by different instances (mod length of color palette) 73 | def visualize_instance_image(filename, image): 74 | height = image.shape[0] 75 | width = image.shape[1] 76 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 77 | color_palette = create_color_palette() 78 | instances = np.unique(image) 79 | for idx, inst in enumerate(instances): 80 | vis_image[image == inst] = color_palette[inst % len(color_palette)] 81 | imageio.imwrite(filename, vis_image) 82 | 83 | 84 | # color palette for nyu40 labels 85 | def create_color_palette(): 86 | return [ 87 | (0, 0, 0), 88 | (174, 199, 232), # wall 89 | (152, 223, 138), # floor 90 | (31, 119, 180), # cabinet 91 | (255, 187, 120), # bed 92 | (188, 189, 34), # chair 93 | (140, 86, 75), # sofa 94 | (255, 152, 150), # table 95 | (214, 39, 40), # door 96 | (197, 176, 213), # window 97 | (148, 103, 189), # bookshelf 98 | (196, 156, 148), # picture 99 | (23, 190, 207), # counter 100 | (178, 76, 76), 101 | (247, 182, 210), # desk 102 | (66, 188, 102), 103 | (219, 219, 141), # curtain 104 | (140, 57, 197), 105 | (202, 185, 52), 106 | (51, 176, 203), 107 | (200, 54, 131), 108 | (92, 193, 61), 109 | (78, 71, 183), 110 | (172, 114, 82), 111 | (255, 127, 14), # refrigerator 112 | (91, 163, 138), 113 | (153, 98, 156), 114 | (140, 153, 101), 115 | (158, 218, 229), # shower curtain 116 | (100, 125, 154), 117 | (178, 127, 135), 118 | (120, 185, 128), 119 | (146, 111, 194), 120 | (44, 160, 44), # toilet 121 | (112, 128, 144), # sink 122 | (96, 207, 209), 123 | (227, 119, 194), # bathtub 124 | (213, 92, 176), 125 | (94, 106, 211), 126 | (82, 84, 163), # otherfurn 127 | (100, 85, 144), 128 | ] 129 | -------------------------------------------------------------------------------- /conf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/conf/__init__.py -------------------------------------------------------------------------------- /conf/augmentation/albumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | __version__: 0.4.5 2 | transform: 3 | __class_fullname__: albumentations.core.composition.Compose 4 | additional_targets: {} 5 | bbox_params: null 6 | keypoint_params: null 7 | p: 1.0 8 | transforms: 9 | - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast 10 | always_apply: true 11 | brightness_by_max: true 12 | brightness_limit: 13 | - -0.2 14 | - 0.2 15 | contrast_limit: 16 | - -0.2 17 | - 0.2 18 | p: 0.5 19 | - __class_fullname__: albumentations.augmentations.transforms.RGBShift 20 | always_apply: true 21 | b_shift_limit: 22 | - -20 23 | - 20 24 | g_shift_limit: 25 | - -20 26 | - 20 27 | p: 0.5 28 | r_shift_limit: 29 | - -20 30 | - 20 31 | -------------------------------------------------------------------------------- /conf/augmentation/volumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | # pi = 3.14159265358979 2 | # pi/2 = 1.57079632679489 3 | # pi/3 = 1.04719755119659 4 | # pi/6 = 0.52359877559829 5 | # pi/12 = 0.26179938779914 6 | # pi/24 = 0.13089969389957 7 | # 8 | __version__: 0.1.6 9 | transform: 10 | __class_fullname__: volumentations.core.composition.Compose 11 | additional_targets: {} 12 | p: 1.0 13 | transforms: 14 | - __class_fullname__: volumentations.augmentations.transforms.Scale3d 15 | always_apply: true 16 | p: 0.5 17 | scale_limit: 18 | - - -0.1 19 | - 0.1 20 | - - -0.1 21 | - 0.1 22 | - - -0.1 23 | - 0.1 24 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 25 | always_apply: true 26 | axis: 27 | - 0 28 | - 0 29 | - 1 30 | p: 0.5 31 | rotation_limit: 32 | - -3.141592653589793 33 | - 3.141592653589793 34 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 35 | always_apply: true 36 | axis: 37 | - 0 38 | - 1 39 | - 0 40 | p: 0.5 41 | rotation_limit: 42 | - -0.13089969389957 43 | - 0.13089969389957 44 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 45 | always_apply: true 46 | axis: 47 | - 1 48 | - 0 49 | - 0 50 | p: 0.5 51 | rotation_limit: 52 | - -0.13089969389957 53 | - 0.13089969389957 54 | -------------------------------------------------------------------------------- /conf/callbacks/callbacks_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint 3 | monitor: val_mean_box_ap_50 4 | save_last: true 5 | save_top_k: 1 6 | mode: max 7 | dirpath: ${general.save_dir} 8 | filename: "{epoch}-{val_mean_box_ap_50:.3f}" 9 | every_n_epochs: 1 10 | 11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor 12 | logging_interval: step 13 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | test_scene: 'test.ply' 48 | 49 | gpus: 1 50 | 51 | defaults: 52 | - data: indoor 53 | - data/data_loaders: simple_loader 54 | - data/datasets: scannetpp 55 | - data/collation_functions: voxelize_collate 56 | - logging: full 57 | - model: mask3d_no_aux 58 | - metrics: miou 59 | - optimizer: adamw 60 | - scheduler: onecyclelr 61 | - trainer: trainer 62 | - callbacks: callbacks_instance_segmentation 63 | - matcher: hungarian_matcher 64 | - loss: set_criterion 65 | 66 | hydra: 67 | run: 68 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 69 | sweep: 70 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 71 | # dir: ${general.save_dir} 72 | subdir: ${hydra.job.num}_${hydra.job.id} 73 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation_generate_mask_trainset.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | 48 | gpus: 1 49 | 50 | defaults: 51 | - data: indoor 52 | - data/data_loaders: simple_loader 53 | - data/datasets: scannet_generate_mask_trainset 54 | - data/collation_functions: voxelize_collate 55 | - logging: full 56 | - model: mask3d 57 | - metrics: miou 58 | - optimizer: adamw 59 | - scheduler: onecyclelr 60 | - trainer: trainer_stage2 61 | - callbacks: callbacks_instance_segmentation 62 | - matcher: hungarian_matcher 63 | - loss: set_criterion 64 | 65 | hydra: 66 | run: 67 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 68 | sweep: 69 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 70 | # dir: ${general.save_dir} 71 | subdir: ${hydra.job.num}_${hydra.job.id} 72 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation_generate_mask_valset.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | 48 | gpus: 1 49 | 50 | defaults: 51 | - data: indoor 52 | - data/data_loaders: simple_loader 53 | - data/datasets: scannet_generate_mask_valset 54 | - data/collation_functions: voxelize_collate 55 | - logging: full 56 | - model: mask3d 57 | - metrics: miou 58 | - optimizer: adamw 59 | - scheduler: onecyclelr 60 | - trainer: trainer_stage2 61 | - callbacks: callbacks_instance_segmentation 62 | - matcher: hungarian_matcher 63 | - loss: set_criterion 64 | 65 | hydra: 66 | run: 67 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 68 | sweep: 69 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 70 | # dir: ${general.save_dir} 71 | subdir: ${hydra.job.num}_${hydra.job.id} 72 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation_stage1.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | 48 | gpus: 1 49 | 50 | defaults: 51 | - data: indoor 52 | - data/data_loaders: simple_loader 53 | - data/datasets: scannet_stage1 54 | - data/collation_functions: voxelize_collate 55 | - logging: full 56 | - model: mask3d 57 | - metrics: miou 58 | - optimizer: adamw 59 | - scheduler: onecyclelr 60 | - trainer: trainer 61 | - callbacks: callbacks_instance_segmentation 62 | - matcher: hungarian_matcher 63 | - loss: set_criterion 64 | 65 | hydra: 66 | run: 67 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 68 | sweep: 69 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 70 | # dir: ${general.save_dir} 71 | subdir: ${hydra.job.num}_${hydra.job.id} 72 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation_stage2.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | 27 | export_threshold: 0.0001 28 | 29 | reps_per_epoch: 1 30 | 31 | on_crops: false 32 | 33 | scores_threshold: 0.0 34 | iou_threshold: 1.0 35 | 36 | area: 5 37 | 38 | eval_inner_core: -1 # disabled 39 | 40 | topk_per_image: 100 41 | 42 | ignore_mask_idx: [] 43 | 44 | max_batch_size: 99999999 45 | 46 | save_dir: saved/${general.experiment_name} 47 | 48 | gpus: 1 49 | 50 | defaults: 51 | - data: indoor 52 | - data/data_loaders: simple_loader 53 | - data/datasets: scannet_stage2 54 | - data/collation_functions: voxelize_collate_stage2 55 | - logging: full 56 | - model: mask3d 57 | - metrics: miou 58 | - optimizer: adamw 59 | - scheduler: onecyclelr 60 | - trainer: trainer_stage2 61 | - callbacks: callbacks_instance_segmentation 62 | - matcher: hungarian_matcher 63 | - loss: set_criterion 64 | 65 | hydra: 66 | run: 67 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 68 | sweep: 69 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 70 | # dir: ${general.save_dir} 71 | subdir: ${hydra.job.num}_${hydra.job.id} 72 | -------------------------------------------------------------------------------- /conf/data/collation_functions/voxelize_collate.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils.VoxelizeCollate 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | batch_instance: false 11 | probing: ${general.linear_probing_backbone} 12 | task: ${general.task} 13 | ignore_class_threshold: ${general.ignore_class_threshold} 14 | filter_out_classes: ${data.train_dataset.filter_out_classes} 15 | label_offset: ${data.train_dataset.label_offset} 16 | num_queries: ${model.num_queries} 17 | 18 | validation_collation: 19 | _target_: datasets.utils.VoxelizeCollate 20 | ignore_label: ${data.ignore_label} 21 | voxel_size: ${data.voxel_size} 22 | mode: ${data.validation_mode} 23 | batch_instance: false 24 | probing: ${general.linear_probing_backbone} 25 | task: ${general.task} 26 | ignore_class_threshold: ${general.ignore_class_threshold} 27 | filter_out_classes: ${data.validation_dataset.filter_out_classes} 28 | label_offset: ${data.validation_dataset.label_offset} 29 | num_queries: ${model.num_queries} 30 | 31 | test_collation: 32 | _target_: datasets.utils.VoxelizeCollate 33 | ignore_label: ${data.ignore_label} 34 | voxel_size: ${data.voxel_size} 35 | mode: ${data.test_mode} 36 | batch_instance: false 37 | probing: ${general.linear_probing_backbone} 38 | task: ${general.task} 39 | ignore_class_threshold: ${general.ignore_class_threshold} 40 | filter_out_classes: ${data.test_dataset.filter_out_classes} 41 | label_offset: ${data.test_dataset.label_offset} 42 | num_queries: ${model.num_queries} -------------------------------------------------------------------------------- /conf/data/collation_functions/voxelize_collate_merge.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils.VoxelizeCollateMerge 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | scenes: 2 11 | batch_instance: false 12 | make_one_pc_noise: false 13 | place_nearby: false 14 | place_far: false 15 | proba: 1 16 | probing: ${general.linear_probing_backbone} 17 | include_ignore: ${general.include_ignore} 18 | task: ${general.task} 19 | 20 | validation_collation: 21 | _target_: datasets.utils.VoxelizeCollate 22 | ignore_label: ${data.ignore_label} 23 | voxel_size: ${data.voxel_size} 24 | mode: ${data.validation_mode} 25 | probing: ${general.linear_probing_backbone} 26 | include_ignore: ${general.include_ignore} 27 | task: ${general.task} 28 | 29 | test_collation: 30 | _target_: datasets.utils.VoxelizeCollate 31 | ignore_label: ${data.ignore_label} 32 | voxel_size: ${data.voxel_size} 33 | mode: ${data.test_mode} 34 | probing: ${general.linear_probing_backbone} 35 | include_ignore: ${general.include_ignore} 36 | task: ${general.task} 37 | -------------------------------------------------------------------------------- /conf/data/collation_functions/voxelize_collate_stage2.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils_stage2.VoxelizeCollate 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | batch_instance: false 11 | probing: ${general.linear_probing_backbone} 12 | task: ${general.task} 13 | ignore_class_threshold: ${general.ignore_class_threshold} 14 | filter_out_classes: ${data.train_dataset.filter_out_classes} 15 | label_offset: ${data.train_dataset.label_offset} 16 | num_queries: ${model.num_queries} 17 | generate_masks_path: ${data.generate_masks_path} 18 | use_masks_th: ${data.use_masks_th} 19 | use_masks_score: ${data.use_masks_score} 20 | 21 | validation_collation: 22 | _target_: datasets.utils_stage2.VoxelizeCollate 23 | ignore_label: ${data.ignore_label} 24 | voxel_size: ${data.voxel_size} 25 | mode: ${data.validation_mode} 26 | batch_instance: false 27 | probing: ${general.linear_probing_backbone} 28 | task: ${general.task} 29 | ignore_class_threshold: ${general.ignore_class_threshold} 30 | filter_out_classes: ${data.validation_dataset.filter_out_classes} 31 | label_offset: ${data.validation_dataset.label_offset} 32 | num_queries: ${model.num_queries} 33 | generate_masks_path: ${data.generate_masks_path} 34 | use_masks_th: ${data.use_masks_th} 35 | use_masks_score: ${data.use_masks_score} 36 | 37 | test_collation: 38 | _target_: datasets.utils_stage2.VoxelizeCollate 39 | ignore_label: ${data.ignore_label} 40 | voxel_size: ${data.voxel_size} 41 | mode: ${data.test_mode} 42 | batch_instance: false 43 | probing: ${general.linear_probing_backbone} 44 | task: ${general.task} 45 | ignore_class_threshold: ${general.ignore_class_threshold} 46 | filter_out_classes: ${data.test_dataset.filter_out_classes} 47 | label_offset: ${data.test_dataset.label_offset} 48 | num_queries: ${model.num_queries} 49 | generate_masks_path: ${data.generate_masks_path} 50 | use_masks_th: ${data.use_masks_th} 51 | use_masks_score: ${data.use_masks_score} -------------------------------------------------------------------------------- /conf/data/data_loaders/simple_loader.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_dataloader: 4 | _target_: torch.utils.data.DataLoader 5 | shuffle: true 6 | pin_memory: ${data.pin_memory} 7 | num_workers: ${data.num_workers} 8 | batch_size: ${data.batch_size} 9 | 10 | validation_dataloader: 11 | _target_: torch.utils.data.DataLoader 12 | shuffle: false 13 | pin_memory: ${data.pin_memory} 14 | num_workers: ${data.num_workers} 15 | batch_size: ${data.test_batch_size} 16 | 17 | test_dataloader: 18 | _target_: torch.utils.data.DataLoader 19 | shuffle: false 20 | pin_memory: ${data.pin_memory} 21 | num_workers: ${data.num_workers} 22 | batch_size: ${data.test_batch_size} 23 | -------------------------------------------------------------------------------- /conf/data/data_loaders/simple_loader_save_memory.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_dataloader: 4 | _target_: torch.utils.data.DataLoader 5 | shuffle: true 6 | pin_memory: ${data.pin_memory} 7 | num_workers: ${data.num_workers} 8 | batch_size: ${data.batch_size} 9 | 10 | validation_dataloader: 11 | _target_: torch.utils.data.DataLoader 12 | shuffle: false 13 | pin_memory: ${data.pin_memory} 14 | num_workers: 1 15 | batch_size: ${data.test_batch_size} 16 | 17 | test_dataloader: 18 | _target_: torch.utils.data.DataLoader 19 | shuffle: false 20 | pin_memory: ${data.pin_memory} 21 | num_workers: 1 22 | batch_size: ${data.test_batch_size} 23 | -------------------------------------------------------------------------------- /conf/data/datasets/scannet_generate_mask_trainset.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 4 | dataset_name: "scannet" 5 | data_dir: data/processed/scannet200 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet200/label_database.yaml 9 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.validation_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [] 33 | label_offset: 0 34 | 35 | validation_dataset: 36 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 37 | dataset_name: "scannet" 38 | data_dir: data/processed/scannet200 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet200/label_database.yaml 42 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.train_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [] 56 | label_offset: 0 57 | 58 | test_dataset: 59 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 60 | dataset_name: "scannet" 61 | data_dir: data/processed/scannet200 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannet200/label_database.yaml 65 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.train_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [] 79 | label_offset: 0 80 | -------------------------------------------------------------------------------- /conf/data/datasets/scannet_generate_mask_valset.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 4 | dataset_name: "scannet" 5 | data_dir: data/processed/scannet200 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet200/label_database.yaml 9 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [] 33 | label_offset: 0 34 | 35 | validation_dataset: 36 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 37 | dataset_name: "scannet" 38 | data_dir: data/processed/scannet200 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet200/label_database.yaml 42 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [] 56 | label_offset: 0 57 | 58 | test_dataset: 59 | _target_: datasets.scannet_generate_mask.SemanticSegmentationDataset 60 | dataset_name: "scannet" 61 | data_dir: data/processed/scannet200 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannet200/label_database.yaml 65 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [] 79 | label_offset: 0 80 | -------------------------------------------------------------------------------- /conf/data/datasets/scannet_stage1.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.scannet_stage1.SemanticSegmentationDataset 4 | dataset_name: "scannet" 5 | data_dir: data/processed 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet200/label_database.yaml 9 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [] 33 | label_offset: 0 34 | 35 | validation_dataset: 36 | _target_: datasets.scannet_stage1.SemanticSegmentationDataset 37 | dataset_name: "scannet" 38 | data_dir: data/processed 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet200/label_database.yaml 42 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [] 56 | label_offset: 0 57 | 58 | test_dataset: 59 | _target_: datasets.scannet_stage1.SemanticSegmentationDataset 60 | dataset_name: "scannet" 61 | data_dir: data/processed 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannet200/label_database.yaml 65 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [] 79 | label_offset: 0 80 | -------------------------------------------------------------------------------- /conf/data/datasets/scannet_stage2.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.scannet_stage2.SemanticSegmentationDataset 4 | dataset_name: "scannet" 5 | data_dir: data/processed/scannet200 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannet200/label_database.yaml 9 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [] 33 | label_offset: 0 34 | 35 | validation_dataset: 36 | _target_: datasets.scannet_stage2.SemanticSegmentationDataset 37 | dataset_name: "scannet" 38 | data_dir: data/processed/scannet200 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannet200/label_database.yaml 42 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [] 56 | label_offset: 0 57 | 58 | test_dataset: 59 | _target_: datasets.scannet_stage2.SemanticSegmentationDataset 60 | dataset_name: "scannet" 61 | data_dir: data/processed/scannet200 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannet200/label_database.yaml 65 | color_mean_std: data/processed/scannet200/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [] 79 | label_offset: 0 80 | -------------------------------------------------------------------------------- /conf/data/datasets/scannetpp.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.scannetpp.SemanticSegmentationDataset 4 | dataset_name: "scannetpp" 5 | data_dir: data/processed/scannetpp 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/scannetpp/label_database.yaml 9 | color_mean_std: data/processed/scannetpp/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | # different augs experiments 19 | instance_oversampling: 0.0 20 | place_around_existing: false 21 | point_per_cut: 0 22 | max_cut_region: 0 23 | flip_in_center: false 24 | noise_rate: 0 25 | resample_points: 0 26 | add_unlabeled_pc: false 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | filter_out_classes: [1] 33 | label_offset: 0 34 | 35 | validation_dataset: 36 | _target_: datasets.scannetpp.SemanticSegmentationDataset 37 | dataset_name: "scannetpp" 38 | data_dir: data/processed/scannetpp 39 | image_augmentations_path: null 40 | volume_augmentations_path: null 41 | label_db_filepath: data/processed/scannetpp/label_database.yaml 42 | color_mean_std: data/processed/scannetpp/color_mean_std.yaml 43 | data_percent: 1.0 44 | mode: ${data.validation_mode} 45 | ignore_label: ${data.ignore_label} 46 | num_labels: ${data.num_labels} 47 | add_raw_coordinates: ${data.add_raw_coordinates} 48 | add_colors: ${data.add_colors} 49 | add_normals: ${data.add_normals} 50 | add_instance: ${data.add_instance} 51 | cropping: false 52 | is_tta: false 53 | crop_min_size: ${data.crop_min_size} 54 | crop_length: ${data.crop_length} 55 | filter_out_classes: [1] 56 | label_offset: 0 57 | 58 | test_dataset: 59 | _target_: datasets.scannetpp.SemanticSegmentationDataset 60 | dataset_name: "scannetpp" 61 | data_dir: data/processed/scannetpp 62 | image_augmentations_path: null 63 | volume_augmentations_path: null 64 | label_db_filepath: data/processed/scannetpp/label_database.yaml 65 | color_mean_std: data/processed/scannetpp/color_mean_std.yaml 66 | data_percent: 1.0 67 | mode: ${data.test_mode} 68 | ignore_label: ${data.ignore_label} 69 | num_labels: ${data.num_labels} 70 | add_raw_coordinates: ${data.add_raw_coordinates} 71 | add_colors: ${data.add_colors} 72 | add_normals: ${data.add_normals} 73 | add_instance: ${data.add_instance} 74 | cropping: false 75 | is_tta: false 76 | crop_min_size: ${data.crop_min_size} 77 | crop_length: ${data.crop_length} 78 | filter_out_classes: [1] 79 | label_offset: 0 80 | -------------------------------------------------------------------------------- /conf/data/indoor.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | # these parameters are inherited by datasets, data_loaders and collators 4 | # but they might be overwritten 5 | 6 | # splits 7 | train_mode: train 8 | validation_mode: validation 9 | test_mode: validation # test # validation 10 | 11 | # dataset 12 | ignore_label: 255 13 | add_raw_coordinates: true # 3dim 14 | add_colors: true # 3dim 15 | add_normals: false # 3dim 16 | in_channels: 3 # in_channels = 3 * (add_normals + add_colors + add_raw_coordinates) 17 | num_labels: 20 18 | # num_labels: 41 19 | add_instance: ${general.add_instance} 20 | task: ${general.task} 21 | 22 | # data loader 23 | pin_memory: false 24 | num_workers: 4 25 | batch_size: 5 26 | test_batch_size: 1 27 | cache_data: false 28 | 29 | # collation 30 | voxel_size: 0.02 31 | 32 | reps_per_epoch: ${general.reps_per_epoch} 33 | 34 | cropping: false 35 | cropping_args: 36 | min_points: 30000 37 | aspect: 0.8 38 | min_crop: 0.5 39 | max_crop: 1.0 40 | 41 | crop_min_size: 20000 42 | crop_length: 6.0 43 | cropping_v1: true 44 | 45 | remove_small_group: 15 46 | 47 | generate_masks_path: "data/processed/scannet_3d_masks" 48 | use_masks_th: 150 49 | use_masks_score: 0.6 -------------------------------------------------------------------------------- /conf/logging/base.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.NeptuneLogger 3 | project_name: ${general.workspace}/${general.project_name} 4 | experiment_name: ${general.experiment_name} 5 | offline_mode: false 6 | 7 | - _target_: pytorch_lightning.loggers.CSVLogger 8 | save_dir: ${general.save_dir} 9 | name: ${general.experiment_id} 10 | version: ${general.version} 11 | -------------------------------------------------------------------------------- /conf/logging/full.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.WandbLogger 3 | project: ${general.project_name} 4 | name: ${general.experiment_name} 5 | save_dir: ${general.save_dir} 6 | entity: "hr" 7 | resume: "allow" 8 | id: ${general.experiment_name} 9 | -------------------------------------------------------------------------------- /conf/logging/minimal.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.CSVLogger 3 | save_dir: ${general.save_dir} 4 | name: ${general.experiment_name} 5 | # version: ${general.version} 6 | -------------------------------------------------------------------------------- /conf/logging/offline.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.TensorBoardLogger 3 | name: ${general.experiment_id} 4 | version: ${general.version} 5 | save_dir: ${general.save_dir} 6 | 7 | - _target_: pytorch_lightning.loggers.CSVLogger 8 | name: ${general.experiment_id} 9 | version: ${general.version} 10 | save_dir: ${general.save_dir} -------------------------------------------------------------------------------- /conf/loss/cross_entropy.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.nn.CrossEntropyLoss 3 | ignore_index: ${data.ignore_label} 4 | -------------------------------------------------------------------------------- /conf/loss/set_criterion.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.criterion.SetCriterion 3 | num_classes: 2 4 | eos_coef: 0.1 5 | losses: 6 | - "labels" 7 | - "masks" 8 | num_points: ${matcher.num_points} 9 | oversample_ratio: 3.0 10 | importance_sample_ratio: 0.75 11 | class_weights: -1 12 | -------------------------------------------------------------------------------- /conf/matcher/hungarian_matcher.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.matcher.HungarianMatcher 3 | cost_class: 2. 4 | cost_mask: 5. 5 | cost_dice: 2. 6 | num_points: -1 7 | -------------------------------------------------------------------------------- /conf/metrics/miou.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.metrics.ConfusionMatrix 3 | num_classes: ${data.num_labels} 4 | ignore_label: ${data.ignore_label} 5 | -------------------------------------------------------------------------------- /conf/model/mask3d.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.Mask3D 3 | 4 | # transformer parameters 5 | hidden_dim: 128 6 | dim_feedforward: 1024 7 | num_queries: 100 8 | num_heads: 8 9 | num_decoders: 3 10 | dropout: 0.0 11 | pre_norm: false 12 | use_level_embed: false 13 | normalize_pos_enc: true 14 | positional_encoding_type: "fourier" 15 | gauss_scale: 1.0 16 | hlevels: [0,1,2,3] 17 | 18 | # queries 19 | non_parametric_queries: true 20 | random_query_both: false 21 | random_normal: false 22 | random_queries: false 23 | use_np_features: false 24 | 25 | # sampling 26 | sample_sizes: [200, 800, 3200, 12800, 51200] 27 | max_sample_size: false # change false means sampling activated 28 | 29 | shared_decoder: true 30 | num_classes: 2 31 | train_on_segments: ${general.train_on_segments} 32 | scatter_type: "mean" 33 | 34 | voxel_size: ${data.voxel_size} 35 | 36 | config: 37 | backbone: 38 | _target_: models.Res16UNet34C 39 | config: 40 | dialations: [ 1, 1, 1, 1 ] 41 | conv1_kernel_size: 5 42 | bn_momentum: 0.02 43 | # depends on normals, color, raw_coordinates 44 | # varies from 3 to 9 45 | in_channels: ${data.in_channels} 46 | out_channels: ${data.num_labels} 47 | out_fpn: true 48 | -------------------------------------------------------------------------------- /conf/model/mask3d_no_aux.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.Mask3D_no_aux 3 | 4 | # transformer parameters 5 | hidden_dim: 128 6 | dim_feedforward: 1024 7 | num_queries: 100 8 | num_heads: 8 9 | num_decoders: 3 10 | dropout: 0.0 11 | pre_norm: false 12 | use_level_embed: false 13 | normalize_pos_enc: true 14 | positional_encoding_type: "fourier" 15 | gauss_scale: 1.0 16 | hlevels: [0,1,2,3] 17 | 18 | # queries 19 | non_parametric_queries: true 20 | random_query_both: false 21 | random_normal: false 22 | random_queries: false 23 | use_np_features: false 24 | 25 | # sampling 26 | sample_sizes: [200, 800, 3200, 12800, 51200] 27 | max_sample_size: false # change false means sampling activated 28 | 29 | shared_decoder: true 30 | num_classes: 2 31 | train_on_segments: ${general.train_on_segments} 32 | scatter_type: "mean" 33 | 34 | voxel_size: ${data.voxel_size} 35 | 36 | config: 37 | backbone: 38 | _target_: models.Res16UNet34C 39 | config: 40 | dialations: [ 1, 1, 1, 1 ] 41 | conv1_kernel_size: 5 42 | bn_momentum: 0.02 43 | # depends on normals, color, raw_coordinates 44 | # varies from 3 to 9 45 | in_channels: ${data.in_channels} 46 | out_channels: ${data.num_labels} 47 | out_fpn: true 48 | -------------------------------------------------------------------------------- /conf/optimizer/adamw.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.optim.AdamW 3 | lr: 0.0001 -------------------------------------------------------------------------------- /conf/optimizer/adamw_lower.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.optim.AdamW 3 | lr: 0.005 4 | -------------------------------------------------------------------------------- /conf/scheduler/exponentiallr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.ExponentialLR 5 | gamma: 0.99999 6 | last_epoch: -1 # ${trainer.max_epochs} 7 | # need to set to number because of tensorboard logger 8 | # steps_per_epoch: -1 9 | 10 | pytorch_lightning_params: 11 | interval: step 12 | -------------------------------------------------------------------------------- /conf/scheduler/lambdalr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.StepLR 5 | step_size: 99999 6 | 7 | pytorch_lightning_params: 8 | interval: epoch 9 | -------------------------------------------------------------------------------- /conf/scheduler/onecyclelr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.OneCycleLR 5 | max_lr: ${optimizer.lr} 6 | epochs: ${trainer.max_epochs} 7 | # need to set to number because of tensorboard logger 8 | steps_per_epoch: -1 9 | 10 | pytorch_lightning_params: 11 | interval: step 12 | -------------------------------------------------------------------------------- /conf/trainer/trainer.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | deterministic: false 3 | max_epochs: 20 4 | min_epochs: 1 5 | resume_from_checkpoint: null 6 | num_sanity_val_steps: 2 7 | val_check_interval: 100 8 | log_every_n_steps: 5 9 | -------------------------------------------------------------------------------- /conf/trainer/trainer_stage2.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | deterministic: false 3 | max_epochs: 50 4 | min_epochs: 1 5 | resume_from_checkpoint: null 6 | check_val_every_n_epoch: 50 7 | num_sanity_val_steps: 2 8 | log_every_n_steps: 5 -------------------------------------------------------------------------------- /data/scannet_info/intrinsics.txt: -------------------------------------------------------------------------------- 1 | 577.870605 0.000000 319.500000 0.000000 2 | 0.000000 577.870605 239.500000 0.000000 3 | 0.000000 0.000000 1.000000 0.000000 4 | 0.000000 0.000000 0.000000 1.000000 5 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/datasets/preprocessing/__init__.py -------------------------------------------------------------------------------- /datasets/random_cuboid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def check_aspect(crop_range, aspect_min): 7 | xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2]) 8 | xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]]) 9 | yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:]) 10 | return ( 11 | (xy_aspect >= aspect_min) 12 | or (xz_aspect >= aspect_min) 13 | or (yz_aspect >= aspect_min) 14 | ) 15 | 16 | 17 | class RandomCuboid(object): 18 | """ 19 | RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691] 20 | We slightly modify this operation to account for object detection. 21 | This augmentation randomly crops a cuboid from the input and 22 | ensures that the cropped cuboid contains at least one bounding box 23 | """ 24 | 25 | def __init__( 26 | self, 27 | min_points, 28 | # aspect=0.8, 29 | crop_length=6.0, 30 | version1=True, 31 | ): 32 | # self.aspect = aspect 33 | self.crop_length = crop_length 34 | self.min_points = min_points 35 | self.version1 = version1 36 | 37 | def __call__(self, point_cloud): 38 | if point_cloud.shape[0] < self.min_points: 39 | print("too small pcd") 40 | return np.ones(point_cloud.shape[0], dtype=np.bool) 41 | 42 | range_xyz = np.max(point_cloud[:, :2], axis=0) - np.min( 43 | point_cloud[:, :2], axis=0 44 | ) 45 | 46 | for _ in range(100): 47 | # crop_range = self.min_crop + np.random.rand(3) * ( 48 | # self.max_crop - self.min_crop 49 | # ) 50 | # crop_range[-1] = 999. 51 | # if not check_aspect(crop_range, self.aspect): 52 | # continue 53 | 54 | sample_center = point_cloud[:, :2].min(axis=0) + range_xyz / 2 55 | 56 | if self.version1: 57 | offset_x = np.random.uniform( 58 | -range_xyz[0] / 4, range_xyz[0] / 4 59 | ) 60 | offset_y = np.random.uniform( 61 | -range_xyz[1] / 4, range_xyz[1] / 4 62 | ) 63 | else: 64 | offset_x = np.random.uniform( 65 | -(range_xyz[0] / 2) + self.crop_length / 4, 66 | +(range_xyz[0] / 2) - self.crop_length / 4, 67 | ) 68 | offset_y = np.random.uniform( 69 | -(range_xyz[1] / 2) + self.crop_length / 4, 70 | +(range_xyz[1] / 2) - self.crop_length / 4, 71 | ) 72 | 73 | sample_center[0] = sample_center[0] + offset_x 74 | sample_center[1] = sample_center[1] + offset_y 75 | 76 | min_xy = sample_center - self.crop_length / 2 77 | max_xy = sample_center + self.crop_length / 2 78 | 79 | upper_idx = ( 80 | np.sum((point_cloud[:, :2] <= max_xy).astype(np.int32), 1) == 2 81 | ) 82 | lower_idx = ( 83 | np.sum((point_cloud[:, :2] >= min_xy).astype(np.int32), 1) == 2 84 | ) 85 | 86 | new_pointidx = (upper_idx) & (lower_idx) 87 | 88 | if np.sum(new_pointidx) < self.min_points: 89 | print("TOO SMALL") 90 | continue 91 | 92 | return new_pointidx 93 | 94 | # fallback 95 | print("FALLBACK") 96 | return np.ones(point_cloud.shape[0], dtype=np.bool) 97 | -------------------------------------------------------------------------------- /datasets/scannet200/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/datasets/scannet200/__init__.py -------------------------------------------------------------------------------- /demo_utils.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | import torch 3 | 4 | from models.mask3d import Mask3D 5 | from models.mask3d_no_aux import Mask3D_no_aux 6 | from utils.utils import ( 7 | load_checkpoint_with_missing_or_exsessive_keys, 8 | load_backbone_checkpoint_with_missing_or_exsessive_keys, 9 | ) 10 | 11 | from omegaconf import OmegaConf, DictConfig 12 | import hydra 13 | from hydra.core.global_hydra import GlobalHydra 14 | from hydra.experimental import initialize, compose 15 | 16 | import albumentations as A 17 | import MinkowskiEngine as ME 18 | import numpy as np 19 | import open3d as o3d 20 | 21 | 22 | class InstanceSegmentation(torch.nn.Module): 23 | def __init__(self, cfg): 24 | super().__init__() 25 | self.model = hydra.utils.instantiate(cfg.model) 26 | 27 | 28 | def forward(self, x, point2segment=None, raw_coordinates=None): 29 | return self.model(x, point2segment=point2segment, raw_coordinates=raw_coordinates) 30 | 31 | 32 | def get_model(cfg): 33 | 34 | model = InstanceSegmentation(cfg) 35 | 36 | if cfg.general.backbone_checkpoint is not None: 37 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys( 38 | cfg, model 39 | ) 40 | if cfg.general.checkpoint is not None: 41 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 42 | 43 | return model 44 | 45 | 46 | def load_mesh(pcl_file): 47 | 48 | # load point cloud 49 | input_mesh_path = pcl_file 50 | mesh = o3d.io.read_triangle_mesh(input_mesh_path) 51 | return mesh 52 | 53 | def prepare_data(cfg, mesh, point2segment, device): 54 | 55 | # normalization for point cloud features 56 | color_mean = (0.47793125906962, 0.4303257521323044, 0.3749598901421883) 57 | color_std = (0.2834475483823543, 0.27566157565723015, 0.27018971370874995) 58 | normalize_color = A.Normalize(mean=color_mean, std=color_std) 59 | 60 | points = np.asarray(mesh.vertices) 61 | colors = np.asarray(mesh.vertex_colors) 62 | colors = colors * 255. 63 | 64 | pseudo_image = colors.astype(np.uint8)[np.newaxis, :, :] 65 | colors = np.squeeze(normalize_color(image=pseudo_image)["image"]) 66 | 67 | coords = np.floor(points / cfg.data.voxel_size) 68 | _, _, unique_map, inverse_map = ME.utils.sparse_quantize( 69 | coordinates=coords, 70 | features=colors, 71 | return_index=True, 72 | return_inverse=True, 73 | ) 74 | 75 | sample_coordinates = coords[unique_map] 76 | coordinates = [torch.from_numpy(sample_coordinates).int()] 77 | sample_features = colors[unique_map] 78 | features = [torch.from_numpy(sample_features).float()] 79 | raw_coordinates = torch.from_numpy(points[unique_map]).float() 80 | 81 | coordinates, _ = ME.utils.sparse_collate(coords=coordinates, feats=features) 82 | features = torch.cat(features, dim=0) 83 | data = ME.SparseTensor( 84 | coordinates=coordinates, 85 | features=features, 86 | device=device, 87 | ) 88 | 89 | if point2segment is not None: 90 | point2segment_full = point2segment.copy() 91 | _, _, ret_inv = np.unique(point2segment_full, return_index=True, return_inverse=True) 92 | point2segment_full = torch.from_numpy(ret_inv).unsqueeze(0) 93 | 94 | point2segment = point2segment[unique_map] 95 | _, _, ret_inv = np.unique(point2segment, return_index=True, return_inverse=True) 96 | point2segment = torch.from_numpy(ret_inv).unsqueeze(0).to(device) 97 | else: 98 | point2segment_full = None 99 | 100 | return data, point2segment, point2segment_full, raw_coordinates, inverse_map 101 | -------------------------------------------------------------------------------- /docs/teaser.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/docs/teaser.jpeg -------------------------------------------------------------------------------- /main_instance_segmentation.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from hashlib import md5 4 | from uuid import uuid4 5 | import hydra 6 | from dotenv import load_dotenv 7 | from omegaconf import DictConfig, OmegaConf 8 | from trainer.trainer import InstanceSegmentation, RegularCheckpointing 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from utils.utils import ( 11 | flatten_dict, 12 | load_baseline_model, 13 | load_checkpoint_with_missing_or_exsessive_keys, 14 | load_backbone_checkpoint_with_missing_or_exsessive_keys, 15 | ) 16 | from pytorch_lightning import Trainer, seed_everything 17 | import torch 18 | from detectron2.utils.comm import is_main_process 19 | 20 | def get_parameters(cfg: DictConfig): 21 | logger = logging.getLogger(__name__) 22 | load_dotenv(".env") 23 | 24 | # parsing input parameters 25 | seed_everything(cfg.general.seed) 26 | 27 | # getting basic configuration 28 | if cfg.general.get("gpus", None) is None: 29 | cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None) 30 | loggers = [] 31 | 32 | # cfg.general.experiment_id = "0" # str(Repo("./").commit())[:8] 33 | # params = flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 34 | 35 | # create unique id for experiments that are run locally 36 | # unique_id = "_" + str(uuid4())[:4] 37 | # cfg.general.version = md5(str(params).encode("utf-8")).hexdigest()[:8] + unique_id 38 | 39 | if not os.path.exists(cfg.general.save_dir): 40 | if is_main_process(): 41 | os.makedirs(cfg.general.save_dir) 42 | else: 43 | print("EXPERIMENT ALREADY EXIST") 44 | if os.path.isfile(f"{cfg.general.save_dir}/last-epoch.ckpt"): 45 | cfg["trainer"][ 46 | "resume_from_checkpoint" 47 | ] = f"{cfg.general.save_dir}/last-epoch.ckpt" 48 | 49 | for log in cfg.logging: 50 | loggers.append(hydra.utils.instantiate(log)) 51 | loggers[-1].log_hyperparams( 52 | flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 53 | ) 54 | 55 | model = InstanceSegmentation(cfg) 56 | if cfg.general.backbone_checkpoint is not None: 57 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys( 58 | cfg, model 59 | ) 60 | if cfg.general.checkpoint is not None: 61 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 62 | 63 | logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True))) 64 | return cfg, model, loggers 65 | 66 | 67 | @hydra.main( 68 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 69 | ) 70 | def train(cfg: DictConfig): 71 | os.chdir(hydra.utils.get_original_cwd()) 72 | cfg, model, loggers = get_parameters(cfg) 73 | callbacks = [] 74 | for cb in cfg.callbacks: 75 | callbacks.append(hydra.utils.instantiate(cb)) 76 | 77 | callbacks.append(RegularCheckpointing()) 78 | print(torch.cuda.device_count()) 79 | 80 | runner = Trainer( 81 | accelerator='cuda', 82 | strategy='ddp', 83 | logger=loggers, 84 | gpus=cfg.general.gpus, 85 | callbacks=callbacks, 86 | weights_save_path=str(cfg.general.save_dir), 87 | **cfg.trainer, 88 | ) 89 | runner.fit(model) 90 | 91 | 92 | @hydra.main( 93 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 94 | ) 95 | def test(cfg: DictConfig): 96 | print(torch.cuda.device_count()) 97 | 98 | # because hydra wants to change dir for some reason 99 | os.chdir(hydra.utils.get_original_cwd()) 100 | cfg, model, loggers = get_parameters(cfg) 101 | runner = Trainer( 102 | accelerator='cuda', 103 | strategy='ddp', 104 | gpus=cfg.general.gpus, 105 | logger=loggers, 106 | weights_save_path=str(cfg.general.save_dir), 107 | **cfg.trainer, 108 | ) 109 | runner.test(model) 110 | 111 | 112 | @hydra.main( 113 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 114 | ) 115 | def main(cfg: DictConfig): 116 | if cfg["general"]["train_mode"]: 117 | train(cfg) 118 | else: 119 | test(cfg) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /main_instance_segmentation_stage2.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from hashlib import md5 4 | from uuid import uuid4 5 | import hydra 6 | from dotenv import load_dotenv 7 | from omegaconf import DictConfig, OmegaConf 8 | from trainer.trainer_stage2 import InstanceSegmentation, RegularCheckpointing 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from utils.utils import ( 11 | flatten_dict, 12 | load_baseline_model, 13 | load_checkpoint_with_missing_or_exsessive_keys, 14 | load_backbone_checkpoint_with_missing_or_exsessive_keys, 15 | ) 16 | from pytorch_lightning import Trainer, seed_everything 17 | import torch 18 | from detectron2.utils.comm import is_main_process 19 | 20 | def get_parameters(cfg: DictConfig): 21 | logger = logging.getLogger(__name__) 22 | load_dotenv(".env") 23 | 24 | # parsing input parameters 25 | seed_everything(cfg.general.seed) 26 | 27 | # getting basic configuration 28 | if cfg.general.get("gpus", None) is None: 29 | cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None) 30 | loggers = [] 31 | 32 | # cfg.general.experiment_id = "0" # str(Repo("./").commit())[:8] 33 | # params = flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 34 | 35 | # create unique id for experiments that are run locally 36 | # unique_id = "_" + str(uuid4())[:4] 37 | # cfg.general.version = md5(str(params).encode("utf-8")).hexdigest()[:8] + unique_id 38 | 39 | if not os.path.exists(cfg.general.save_dir): 40 | if is_main_process(): 41 | os.makedirs(cfg.general.save_dir) 42 | else: 43 | print("EXPERIMENT ALREADY EXIST") 44 | if os.path.isfile(f"{cfg.general.save_dir}/last-epoch.ckpt"): 45 | cfg["trainer"][ 46 | "resume_from_checkpoint" 47 | ] = f"{cfg.general.save_dir}/last-epoch.ckpt" 48 | 49 | for log in cfg.logging: 50 | loggers.append(hydra.utils.instantiate(log)) 51 | loggers[-1].log_hyperparams( 52 | flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 53 | ) 54 | 55 | model = InstanceSegmentation(cfg) 56 | if cfg.general.backbone_checkpoint is not None: 57 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys( 58 | cfg, model 59 | ) 60 | if cfg.general.checkpoint is not None: 61 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 62 | 63 | logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True))) 64 | return cfg, model, loggers 65 | 66 | 67 | @hydra.main( 68 | config_path="conf", config_name="config_base_instance_segmentation_stage2.yaml" 69 | ) 70 | def train(cfg: DictConfig): 71 | os.chdir(hydra.utils.get_original_cwd()) 72 | cfg, model, loggers = get_parameters(cfg) 73 | callbacks = [] 74 | for cb in cfg.callbacks: 75 | callbacks.append(hydra.utils.instantiate(cb)) 76 | 77 | callbacks.append(RegularCheckpointing()) 78 | print(torch.cuda.device_count()) 79 | 80 | runner = Trainer( 81 | accelerator='cuda', 82 | strategy='ddp', 83 | logger=loggers, 84 | gpus=cfg.general.gpus, 85 | callbacks=callbacks, 86 | weights_save_path=str(cfg.general.save_dir), 87 | **cfg.trainer, 88 | ) 89 | runner.fit(model) 90 | 91 | 92 | @hydra.main( 93 | config_path="conf", config_name="config_base_instance_segmentation_stage2.yaml" 94 | ) 95 | def test(cfg: DictConfig): 96 | print(torch.cuda.device_count()) 97 | 98 | # because hydra wants to change dir for some reason 99 | os.chdir(hydra.utils.get_original_cwd()) 100 | cfg, model, loggers = get_parameters(cfg) 101 | runner = Trainer( 102 | accelerator='cuda', 103 | strategy='ddp', 104 | gpus=cfg.general.gpus, 105 | logger=loggers, 106 | weights_save_path=str(cfg.general.save_dir), 107 | **cfg.trainer, 108 | ) 109 | runner.test(model) 110 | 111 | 112 | @hydra.main( 113 | config_path="conf", config_name="config_base_instance_segmentation_stage2.yaml" 114 | ) 115 | def main(cfg: DictConfig): 116 | if cfg["general"]["train_mode"]: 117 | train(cfg) 118 | else: 119 | test(cfg) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import models.resunet as resunet 2 | import models.res16unet as res16unet 3 | from models.res16unet import ( 4 | Res16UNet34C, 5 | Res16UNet34A, 6 | Res16UNet14A, 7 | Res16UNet34D, 8 | Res16UNet18D, 9 | Res16UNet18B, 10 | Custom30M, 11 | ) 12 | from models.mask3d import Mask3D 13 | from models.mask3d_no_aux import Mask3D_no_aux 14 | 15 | MODELS = [] 16 | 17 | 18 | def add_models(module): 19 | MODELS.extend([getattr(module, a) for a in dir(module) if "Net" in a]) 20 | 21 | 22 | add_models(resunet) 23 | add_models(res16unet) 24 | add_models(mask3d) 25 | add_models(mask3d_no_aux) 26 | 27 | 28 | def get_models(): 29 | """Returns a tuple of sample models.""" 30 | return MODELS 31 | 32 | 33 | def load_model(name): 34 | """Creates and returns an instance of the model given its class name.""" 35 | # Find the model class from its name 36 | all_models = get_models() 37 | mdict = {model.__name__: model for model in all_models} 38 | if name not in mdict: 39 | print("Invalid model index. Options are:") 40 | # Display a list of valid model names 41 | for model in all_models: 42 | print(f"\t* {model.__name__}") 43 | return None 44 | NetClass = mdict[name] 45 | 46 | return NetClass 47 | -------------------------------------------------------------------------------- /models/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .confusionmatrix import ConfusionMatrix 2 | from .metrics import IoU 3 | 4 | __all__ = ["ConfusionMatrix", "IoU"] 5 | -------------------------------------------------------------------------------- /models/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class IoU: 5 | """Computes the intersection over union (IoU) per class and corresponding 6 | mean (mIoU). 7 | 8 | Intersection over union (IoU) is a common evaluation metric for semantic 9 | segmentation. The predictions are first accumulated in a confusion matrix 10 | and the IoU is computed from it as follows: 11 | 12 | IoU = true_positive / (true_positive + false_positive + false_negative). 13 | 14 | Keyword arguments: 15 | - num_classes (int): number of classes in the classification problem 16 | - normalized (boolean, optional): Determines whether or not the confusion 17 | matrix is normalized or not. Default: False. 18 | - ignore_index (int or iterable, optional): Index of the classes to ignore 19 | when computing the IoU. Can be an int, or any iterable of ints. 20 | 21 | Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter 22 | 23 | """ 24 | 25 | def __init__(self): 26 | super().__init__() 27 | 28 | def value(self, conf_matrix): 29 | """Computes the IoU and mean IoU. 30 | 31 | The mean computation ignores NaN elements of the IoU array. 32 | 33 | Returns: 34 | Tuple: (IoU, mIoU). The first output is the per class IoU, 35 | for K classes it's numpy.ndarray with K elements. The second output, 36 | is the mean IoU. 37 | """ 38 | true_positive = np.diag(conf_matrix) 39 | false_positive = np.sum(conf_matrix, 0) - true_positive 40 | false_negative = np.sum(conf_matrix, 1) - true_positive 41 | 42 | # Just in case we get a division by 0, ignore/hide the error 43 | with np.errstate(divide="ignore", invalid="ignore"): 44 | iou = true_positive / ( 45 | true_positive + false_positive + false_negative 46 | ) 47 | 48 | return iou 49 | -------------------------------------------------------------------------------- /models/model.py: -------------------------------------------------------------------------------- 1 | from MinkowskiEngine import MinkowskiNetwork 2 | 3 | 4 | class Model(MinkowskiNetwork): 5 | """ 6 | Base network for all sparse convnet 7 | 8 | By default, all networks are segmentation networks. 9 | """ 10 | 11 | OUT_PIXEL_DIST = -1 12 | 13 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 14 | super().__init__(D) 15 | self.in_channels = in_channels 16 | self.out_channels = out_channels 17 | self.config = config 18 | 19 | 20 | class HighDimensionalModel(Model): 21 | """ 22 | Base network for all spatio (temporal) chromatic sparse convnet 23 | """ 24 | 25 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 26 | assert D > 4, "Num dimension smaller than 5" 27 | super().__init__(in_channels, out_channels, config, D, **kwargs) 28 | -------------------------------------------------------------------------------- /models/modules/3detr_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch.nn as nn 3 | from functools import partial 4 | import copy 5 | 6 | 7 | class BatchNormDim1Swap(nn.BatchNorm1d): 8 | """ 9 | Used for nn.Transformer that uses a HW x N x C rep 10 | """ 11 | 12 | def forward(self, x): 13 | """ 14 | x: HW x N x C 15 | permute to N x C x HW 16 | Apply BN on C 17 | permute back 18 | """ 19 | hw, n, c = x.shape 20 | x = x.permute(1, 2, 0) 21 | x = super(BatchNormDim1Swap, self).forward(x) 22 | # x: n x c x hw -> hw x n x c 23 | x = x.permute(2, 0, 1) 24 | return x 25 | 26 | 27 | NORM_DICT = { 28 | "bn": BatchNormDim1Swap, 29 | "bn1d": nn.BatchNorm1d, 30 | "id": nn.Identity, 31 | "ln": nn.LayerNorm, 32 | } 33 | 34 | ACTIVATION_DICT = { 35 | "relu": nn.ReLU, 36 | "gelu": nn.GELU, 37 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 38 | } 39 | 40 | WEIGHT_INIT_DICT = { 41 | "xavier_uniform": nn.init.xavier_uniform_, 42 | } 43 | 44 | 45 | class GenericMLP(nn.Module): 46 | def __init__( 47 | self, 48 | input_dim, 49 | hidden_dims, 50 | output_dim, 51 | norm_fn_name=None, 52 | activation="relu", 53 | use_conv=False, 54 | dropout=None, 55 | hidden_use_bias=False, 56 | output_use_bias=True, 57 | output_use_activation=False, 58 | output_use_norm=False, 59 | weight_init_name=None, 60 | ): 61 | super().__init__() 62 | activation = ACTIVATION_DICT[activation] 63 | norm = None 64 | if norm_fn_name is not None: 65 | norm = NORM_DICT[norm_fn_name] 66 | if norm_fn_name == "ln" and use_conv: 67 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 68 | 69 | if dropout is not None: 70 | if not isinstance(dropout, list): 71 | dropout = [dropout for _ in range(len(hidden_dims))] 72 | 73 | layers = [] 74 | prev_dim = input_dim 75 | for idx, x in enumerate(hidden_dims): 76 | if use_conv: 77 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 78 | else: 79 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 80 | layers.append(layer) 81 | if norm: 82 | layers.append(norm(x)) 83 | layers.append(activation()) 84 | if dropout is not None: 85 | layers.append(nn.Dropout(p=dropout[idx])) 86 | prev_dim = x 87 | if use_conv: 88 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 89 | else: 90 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 91 | layers.append(layer) 92 | 93 | if output_use_norm: 94 | layers.append(norm(output_dim)) 95 | 96 | if output_use_activation: 97 | layers.append(activation()) 98 | 99 | self.layers = nn.Sequential(*layers) 100 | 101 | if weight_init_name is not None: 102 | self.do_weight_init(weight_init_name) 103 | 104 | def do_weight_init(self, weight_init_name): 105 | func = WEIGHT_INIT_DICT[weight_init_name] 106 | for (_, param) in self.named_parameters(): 107 | if param.dim() > 1: # skips batchnorm/layernorm 108 | func(param) 109 | 110 | def forward(self, x): 111 | output = self.layers(x) 112 | return output 113 | 114 | 115 | def get_clones(module, N): 116 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 117 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/models/modules/__init__.py -------------------------------------------------------------------------------- /models/modules/helpers_3detr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch.nn as nn 3 | from functools import partial 4 | import copy 5 | 6 | 7 | class BatchNormDim1Swap(nn.BatchNorm1d): 8 | """ 9 | Used for nn.Transformer that uses a HW x N x C rep 10 | """ 11 | 12 | def forward(self, x): 13 | """ 14 | x: HW x N x C 15 | permute to N x C x HW 16 | Apply BN on C 17 | permute back 18 | """ 19 | hw, n, c = x.shape 20 | x = x.permute(1, 2, 0) 21 | x = super(BatchNormDim1Swap, self).forward(x) 22 | # x: n x c x hw -> hw x n x c 23 | x = x.permute(2, 0, 1) 24 | return x 25 | 26 | 27 | NORM_DICT = { 28 | "bn": BatchNormDim1Swap, 29 | "bn1d": nn.BatchNorm1d, 30 | "id": nn.Identity, 31 | "ln": nn.LayerNorm, 32 | } 33 | 34 | ACTIVATION_DICT = { 35 | "relu": nn.ReLU, 36 | "gelu": nn.GELU, 37 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 38 | } 39 | 40 | WEIGHT_INIT_DICT = { 41 | "xavier_uniform": nn.init.xavier_uniform_, 42 | } 43 | 44 | 45 | class GenericMLP(nn.Module): 46 | def __init__( 47 | self, 48 | input_dim, 49 | hidden_dims, 50 | output_dim, 51 | norm_fn_name=None, 52 | activation="relu", 53 | use_conv=False, 54 | dropout=None, 55 | hidden_use_bias=False, 56 | output_use_bias=True, 57 | output_use_activation=False, 58 | output_use_norm=False, 59 | weight_init_name=None, 60 | ): 61 | super().__init__() 62 | activation = ACTIVATION_DICT[activation] 63 | norm = None 64 | if norm_fn_name is not None: 65 | norm = NORM_DICT[norm_fn_name] 66 | if norm_fn_name == "ln" and use_conv: 67 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 68 | 69 | if dropout is not None: 70 | if not isinstance(dropout, list): 71 | dropout = [dropout for _ in range(len(hidden_dims))] 72 | 73 | layers = [] 74 | prev_dim = input_dim 75 | for idx, x in enumerate(hidden_dims): 76 | if use_conv: 77 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 78 | else: 79 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 80 | layers.append(layer) 81 | if norm: 82 | layers.append(norm(x)) 83 | layers.append(activation()) 84 | if dropout is not None: 85 | layers.append(nn.Dropout(p=dropout[idx])) 86 | prev_dim = x 87 | if use_conv: 88 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 89 | else: 90 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 91 | layers.append(layer) 92 | 93 | if output_use_norm: 94 | layers.append(norm(output_dim)) 95 | 96 | if output_use_activation: 97 | layers.append(activation()) 98 | 99 | self.layers = nn.Sequential(*layers) 100 | 101 | if weight_init_name is not None: 102 | self.do_weight_init(weight_init_name) 103 | 104 | def do_weight_init(self, weight_init_name): 105 | func = WEIGHT_INIT_DICT[weight_init_name] 106 | for (_, param) in self.named_parameters(): 107 | if param.dim() > 1: # skips batchnorm/layernorm 108 | func(param) 109 | 110 | def forward(self, x): 111 | output = self.layers(x) 112 | return output 113 | 114 | 115 | def get_clones(module, N): 116 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 117 | -------------------------------------------------------------------------------- /models/modules/resnet_block.py.tmp: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from MinkowskiEngine import MinkowskiReLU 3 | 4 | from mix3d.models.modules.common import ConvType, NormType, conv, get_norm 5 | 6 | 7 | class BasicBlockBase(nn.Module): 8 | expansion = 1 9 | NORM_TYPE = NormType.BATCH_NORM 10 | 11 | def __init__( 12 | self, 13 | inplanes, 14 | planes, 15 | stride=1, 16 | dilation=1, 17 | downsample=None, 18 | conv_type=ConvType.HYPERCUBE, 19 | bn_momentum=0.1, 20 | D=3, 21 | ): 22 | super().__init__() 23 | 24 | self.conv1 = conv( 25 | inplanes, 26 | planes, 27 | kernel_size=3, 28 | stride=stride, 29 | dilation=dilation, 30 | conv_type=conv_type, 31 | D=D, 32 | ) 33 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 34 | self.conv2 = conv( 35 | planes, 36 | planes, 37 | kernel_size=3, 38 | stride=1, 39 | dilation=dilation, 40 | bias=False, 41 | conv_type=conv_type, 42 | D=D, 43 | ) 44 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 45 | self.relu = MinkowskiReLU(inplace=True) 46 | self.downsample = downsample 47 | 48 | def forward(self, x): 49 | residual = x 50 | 51 | out = self.conv1(x) 52 | out = self.norm1(out) 53 | out = self.relu(out) 54 | 55 | out = self.conv2(out) 56 | out = self.norm2(out) 57 | 58 | if self.downsample is not None: 59 | residual = self.downsample(x) 60 | 61 | out += residual 62 | out = self.relu(out) 63 | 64 | return out 65 | 66 | 67 | class BasicBlock(BasicBlockBase): 68 | NORM_TYPE = NormType.BATCH_NORM 69 | 70 | 71 | class BasicBlockIN(BasicBlockBase): 72 | NORM_TYPE = NormType.INSTANCE_NORM 73 | 74 | 75 | class BasicBlockINBN(BasicBlockBase): 76 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 77 | 78 | 79 | class BottleneckBase(nn.Module): 80 | expansion = 4 81 | NORM_TYPE = NormType.BATCH_NORM 82 | 83 | def __init__( 84 | self, 85 | inplanes, 86 | planes, 87 | stride=1, 88 | dilation=1, 89 | downsample=None, 90 | conv_type=ConvType.HYPERCUBE, 91 | bn_momentum=0.1, 92 | D=3, 93 | ): 94 | super().__init__() 95 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 96 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 97 | 98 | self.conv2 = conv( 99 | planes, 100 | planes, 101 | kernel_size=3, 102 | stride=stride, 103 | dilation=dilation, 104 | conv_type=conv_type, 105 | D=D, 106 | ) 107 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 108 | 109 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 110 | self.norm3 = get_norm( 111 | self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum 112 | ) 113 | 114 | self.relu = MinkowskiReLU(inplace=True) 115 | self.downsample = downsample 116 | 117 | def forward(self, x): 118 | residual = x 119 | 120 | out = self.conv1(x) 121 | out = self.norm1(out) 122 | out = self.relu(out) 123 | 124 | out = self.conv2(out) 125 | out = self.norm2(out) 126 | out = self.relu(out) 127 | 128 | out = self.conv3(out) 129 | out = self.norm3(out) 130 | 131 | if self.downsample is not None: 132 | residual = self.downsample(x) 133 | 134 | out += residual 135 | out = self.relu(out) 136 | 137 | return out 138 | 139 | 140 | class Bottleneck(BottleneckBase): 141 | NORM_TYPE = NormType.BATCH_NORM 142 | 143 | 144 | class BottleneckIN(BottleneckBase): 145 | NORM_TYPE = NormType.INSTANCE_NORM 146 | 147 | 148 | class BottleneckINBN(BottleneckBase): 149 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 150 | -------------------------------------------------------------------------------- /models/modules/senet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import MinkowskiEngine as ME 3 | 4 | from mix3d.models.modules.common import ConvType, NormType 5 | from mix3d.models.modules.resnet_block import BasicBlock, Bottleneck 6 | 7 | 8 | class SELayer(nn.Module): 9 | def __init__(self, channel, reduction=16, D=-1): 10 | # Global coords does not require coords_key 11 | super().__init__() 12 | self.fc = nn.Sequential( 13 | ME.MinkowskiLinear(channel, channel // reduction), 14 | ME.MinkowskiReLU(inplace=True), 15 | ME.MinkowskiLinear(channel // reduction, channel), 16 | ME.MinkowskiSigmoid(), 17 | ) 18 | self.pooling = ME.MinkowskiGlobalPooling(dimension=D) 19 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D) 20 | 21 | def forward(self, x): 22 | y = self.pooling(x) 23 | y = self.fc(y) 24 | return self.broadcast_mul(x, y) 25 | 26 | 27 | class SEBasicBlock(BasicBlock): 28 | def __init__( 29 | self, 30 | inplanes, 31 | planes, 32 | stride=1, 33 | dilation=1, 34 | downsample=None, 35 | conv_type=ConvType.HYPERCUBE, 36 | reduction=16, 37 | D=-1, 38 | ): 39 | super().__init__( 40 | inplanes, 41 | planes, 42 | stride=stride, 43 | dilation=dilation, 44 | downsample=downsample, 45 | conv_type=conv_type, 46 | D=D, 47 | ) 48 | self.se = SELayer(planes, reduction=reduction, D=D) 49 | 50 | def forward(self, x): 51 | residual = x 52 | 53 | out = self.conv1(x) 54 | out = self.norm1(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv2(out) 58 | out = self.norm2(out) 59 | out = self.se(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class SEBasicBlockSN(SEBasicBlock): 71 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 72 | 73 | 74 | class SEBasicBlockIN(SEBasicBlock): 75 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 76 | 77 | 78 | class SEBasicBlockLN(SEBasicBlock): 79 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 80 | 81 | 82 | class SEBottleneck(Bottleneck): 83 | def __init__( 84 | self, 85 | inplanes, 86 | planes, 87 | stride=1, 88 | dilation=1, 89 | downsample=None, 90 | conv_type=ConvType.HYPERCUBE, 91 | D=3, 92 | reduction=16, 93 | ): 94 | super().__init__( 95 | inplanes, 96 | planes, 97 | stride=stride, 98 | dilation=dilation, 99 | downsample=downsample, 100 | conv_type=conv_type, 101 | D=D, 102 | ) 103 | self.se = SELayer(planes * self.expansion, reduction=reduction, D=D) 104 | 105 | def forward(self, x): 106 | residual = x 107 | 108 | out = self.conv1(x) 109 | out = self.norm1(out) 110 | out = self.relu(out) 111 | 112 | out = self.conv2(out) 113 | out = self.norm2(out) 114 | out = self.relu(out) 115 | 116 | out = self.conv3(out) 117 | out = self.norm3(out) 118 | out = self.se(out) 119 | 120 | if self.downsample is not None: 121 | residual = self.downsample(x) 122 | 123 | out += residual 124 | out = self.relu(out) 125 | 126 | return out 127 | 128 | 129 | class SEBottleneckSN(SEBottleneck): 130 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 131 | 132 | 133 | class SEBottleneckIN(SEBottleneck): 134 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 135 | 136 | 137 | class SEBottleneckLN(SEBottleneck): 138 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 139 | -------------------------------------------------------------------------------- /models/wrapper.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from torch.nn import Module 4 | from MinkowskiEngine import SparseTensor 5 | 6 | 7 | class Wrapper(Module): 8 | """ 9 | Wrapper for the segmentation networks. 10 | """ 11 | 12 | OUT_PIXEL_DIST = -1 13 | 14 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 15 | super().__init__() 16 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 17 | 18 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 19 | raise NotImplementedError("Must initialize a model and a filter") 20 | 21 | def forward(self, x, coords, colors=None): 22 | soutput = self.model(x) 23 | 24 | # During training, make the network invariant to the filter 25 | if not self.training or random.random() < 0.5: 26 | # Filter requires the model to finish the forward pass 27 | wrapper_coords = self.filter.initialize_coords( 28 | self.model, coords, colors 29 | ) 30 | finput = SparseTensor(soutput.F, wrapper_coords) 31 | soutput = self.filter(finput) 32 | return soutput 33 | -------------------------------------------------------------------------------- /scripts/eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 5 | CURR_QUERY=400 6 | CURR_TOPK=0.8 7 | CURR_DBSCAN=0.05 8 | CURR_DBSCAN_MIN_POINTS=5 9 | 10 | CUDA_VISIBLE_DEVICES=0 python main_instance_segmentation.py \ 11 | general.experiment_name="eval_${CURRENT_TIME}_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}_${CURR_DBSCAN_MIN_POINTS}" \ 12 | general.project_name="scannetpp" \ 13 | general.train_mode=false \ 14 | general.eval_on_segments=true \ 15 | general.train_on_segments=true \ 16 | model.num_queries=${CURR_QUERY} \ 17 | general.topk_per_image=${CURR_TOPK} \ 18 | general.use_dbscan=true \ 19 | general.dbscan_eps=${CURR_DBSCAN} \ 20 | general.dbscan_min_points=${CURR_DBSCAN_MIN_POINTS} \ 21 | general.gpus=1 \ 22 | general.save_visualizations=false \ 23 | general.checkpoint="checkpoints/segment3d.ckpt" \ 24 | data.remove_small_group=15 \ 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/generate_mask_trainset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.05 5 | CURR_DBSCAN_MIN_POINTS=5 6 | CURR_TOPK=-1 7 | CURR_QUERY=150 8 | 9 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 10 | 11 | 12 | CUDA_VISIBLE_DEVICES=0 python main_instance_segmentation_generate_mask_trainset.py \ 13 | general.experiment_name="mask_generation_${CURRENT_TIME}" \ 14 | general.train_mode=false \ 15 | general.eval_on_segments=false \ 16 | general.train_on_segments=false \ 17 | model.num_queries=${CURR_QUERY} \ 18 | general.topk_per_image=${CURR_TOPK} \ 19 | general.use_dbscan=true \ 20 | general.dbscan_eps=${CURR_DBSCAN} \ 21 | general.dbscan_min_points=${CURR_DBSCAN_MIN_POINTS} \ 22 | general.gpus=1 \ 23 | general.save_visualizations=False \ 24 | general.checkpoint="PATH_TO_STAGE1_CHECKPOINT" \ 25 | -------------------------------------------------------------------------------- /scripts/generate_mask_valset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURR_DBSCAN=0.05 5 | CURR_DBSCAN_MIN_POINTS=5 6 | CURR_TOPK=-1 7 | CURR_QUERY=150 8 | 9 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 10 | 11 | 12 | CUDA_VISIBLE_DEVICES=0 python main_instance_segmentation_generate_mask_valset.py \ 13 | general.experiment_name="mask_generation_${CURRENT_TIME}" \ 14 | general.train_mode=false \ 15 | general.eval_on_segments=false \ 16 | general.train_on_segments=false \ 17 | model.num_queries=${CURR_QUERY} \ 18 | general.topk_per_image=${CURR_TOPK} \ 19 | general.use_dbscan=true \ 20 | general.dbscan_eps=${CURR_DBSCAN} \ 21 | general.dbscan_min_points=${CURR_DBSCAN_MIN_POINTS} \ 22 | general.gpus=1 \ 23 | general.save_visualizations=False \ 24 | general.checkpoint="PATH_TO_STAGE1_CHECKPOINT" \ 25 | -------------------------------------------------------------------------------- /scripts/run_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | TEST_SCENE=$1 5 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 6 | CURR_QUERY=400 7 | CURR_TOPK=-1 8 | CURR_DBSCAN=0.05 9 | CURR_DBSCAN_MIN_POINTS=5 10 | 11 | CUDA_VISIBLE_DEVICES=0 python demo.py \ 12 | general.experiment_name="eval_${TEST_SCENE}_${CURRENT_TIME}_${CURR_QUERY}_topk_${CURR_TOPK}_dbscan_${CURR_DBSCAN}_${CURR_DBSCAN_MIN_POINTS}" \ 13 | general.project_name="demo" \ 14 | general.train_mode=false \ 15 | general.train_on_segments=true \ 16 | model.num_queries=${CURR_QUERY} \ 17 | general.topk_per_image=${CURR_TOPK} \ 18 | general.use_dbscan=true \ 19 | general.dbscan_eps=${CURR_DBSCAN} \ 20 | general.dbscan_min_points=${CURR_DBSCAN_MIN_POINTS} \ 21 | general.gpus=1 \ 22 | general.save_visualizations=True \ 23 | general.checkpoint="checkpoints/segment3d.ckpt" \ 24 | general.test_scene=${TEST_SCENE} \ 25 | data.remove_small_group=15 \ 26 | -------------------------------------------------------------------------------- /scripts/train_stage1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 5 | 6 | 7 | CUDA_VISIBLE_DEVICES=0,1 python main_instance_segmentation_stage1.py \ 8 | general.experiment_name="train_stage1_${CURRENT_TIME}" \ 9 | general.project_name="scannet" \ 10 | optimizer.lr=0.0002 \ 11 | data.batch_size=8 \ 12 | data.num_workers=4 \ 13 | trainer.max_epochs=20 \ 14 | trainer.log_every_n_steps=5 \ 15 | trainer.val_check_interval=2000 \ 16 | general.save_visualizations=False \ 17 | general.gpus=2 \ 18 | model.num_queries=100 \ 19 | -------------------------------------------------------------------------------- /scripts/train_stage2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export OMP_NUM_THREADS=3 # speeds up MinkowskiEngine 3 | 4 | CURRENT_TIME=$(date +"%Y%m%d_%H%M%S") 5 | 6 | 7 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main_instance_segmentation_stage2.py \ 8 | general.experiment_name="train_stage2_${CURRENT_TIME}" \ 9 | general.project_name="scannet" \ 10 | optimizer.lr=0.0002 \ 11 | data.batch_size=2 \ 12 | data.num_workers=2 \ 13 | trainer.max_epochs=50 \ 14 | trainer.log_every_n_steps=5 \ 15 | trainer.check_val_every_n_epoch=5 \ 16 | general.train_mode=true \ 17 | general.eval_on_segments=false \ 18 | general.train_on_segments=false \ 19 | model.num_queries=150 \ 20 | matcher.cost_class=0.0 \ 21 | general.topk_per_image=-1 \ 22 | general.use_dbscan=false \ 23 | general.gpus=4 \ 24 | general.save_visualizations=False \ 25 | general.checkpoint="PATH_TO_STAGE1_CHECKPOINT" \ 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /third_party/Segmentator/.gitignore: -------------------------------------------------------------------------------- 1 | segmentator 2 | *.o 3 | -------------------------------------------------------------------------------- /third_party/Segmentator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1 FATAL_ERROR) 2 | set(CMAKE_CXX_STANDARD 11) 3 | project(Segmentator) 4 | set(SOURCES segmentator.cpp tinyply.cpp) 5 | add_executable(segmentator ${SOURCES}) 6 | -------------------------------------------------------------------------------- /third_party/Segmentator/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | FLAGS=-std=c++11 3 | 4 | main: 5 | $(CXX) $(FLAGS) -o segmentator segmentator.cpp tinyply.cpp 6 | 7 | clean: 8 | rm -f *~ *.o segmentator 9 | -------------------------------------------------------------------------------- /third_party/Segmentator/README.md: -------------------------------------------------------------------------------- 1 | Mesh Segmentation 2 | ================= 3 | 4 | Mesh segmentation code using Felzenswalb and Huttenlocher's [*Graph Based Image Segmentation*](https://cs.brown.edu/~pff/segment/index.html) algorithm on computed mesh normals. 5 | 6 | Build by running `make` (or create makefiles for your system using `cmake`). This will create a `segmentator` binary that can be run by: 7 | 8 | `./segmentator input.ply [kThresh=0.01] [segMinVerts=20]` 9 | 10 | The first argument is a path to an input mesh in PLY format. 11 | The second (optional) argument is the segmentation cluster threshold parameter (larger values lead to larger segments). 12 | The third (optional) argument is the minimum number of vertices per-segment, enforced by merging small clusters into larger segments. -------------------------------------------------------------------------------- /third_party/Segmentator/process.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | file_list=($(ls ../demo_test)) 4 | 5 | for scene_name in "${file_list[@]}"; do 6 | echo "$scene_name" 7 | ./segmentator ../demo_test/${scene_name}/mesh.ply 0.01 20 8 | done 9 | 10 | 11 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | #include 5 | 6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 7 | const int nsample); 8 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #ifndef _CUDA_UTILS_H 4 | #define _CUDA_UTILS_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #define TOTAL_THREADS 512 16 | 17 | inline int opt_n_threads(int work_size) { 18 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 19 | 20 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 21 | } 22 | 23 | inline dim3 opt_block_config(int x, int y) { 24 | const int x_threads = opt_n_threads(x); 25 | const int y_threads = 26 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 27 | dim3 block_config(x_threads, y_threads, 1); 28 | 29 | return block_config; 30 | } 31 | 32 | #define CUDA_CHECK_ERRORS() \ 33 | do { \ 34 | cudaError_t err = cudaGetLastError(); \ 35 | if (cudaSuccess != err) { \ 36 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 37 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 38 | __FILE__); \ 39 | exit(-1); \ 40 | } \ 41 | } while (0) 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 10 | at::Tensor weight); 11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 12 | at::Tensor weight, const int m); 13 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 10 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | #define CHECK_CUDA(x) \ 9 | do { \ 10 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 11 | } while (0) 12 | 13 | #define CHECK_CONTIGUOUS(x) \ 14 | do { \ 15 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 16 | } while (0) 17 | 18 | #define CHECK_IS_INT(x) \ 19 | do { \ 20 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 21 | #x " must be an int tensor"); \ 22 | } while (0) 23 | 24 | #define CHECK_IS_FLOAT(x) \ 25 | do { \ 26 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 27 | #x " must be a float tensor"); \ 28 | } while (0) 29 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "utils.h" 6 | 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 8 | int nsample, const float *new_xyz, 9 | const float *xyz, int *idx); 10 | 11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 12 | const int nsample) { 13 | CHECK_CONTIGUOUS(new_xyz); 14 | CHECK_CONTIGUOUS(xyz); 15 | CHECK_IS_FLOAT(new_xyz); 16 | CHECK_IS_FLOAT(xyz); 17 | 18 | if (new_xyz.is_cuda()) { 19 | CHECK_CUDA(xyz); 20 | } 21 | 22 | at::Tensor idx = 23 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 24 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 25 | 26 | if (new_xyz.is_cuda()) { 27 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 28 | radius, nsample, new_xyz.data(), 29 | xyz.data(), idx.data()); 30 | } else { 31 | AT_ASSERT(false, "CPU not supported"); 32 | } 33 | 34 | return idx; 35 | } 36 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 11 | // output: idx(b, m, nsample) 12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 13 | int nsample, 14 | const float *__restrict__ new_xyz, 15 | const float *__restrict__ xyz, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | xyz += batch_index * n * 3; 19 | new_xyz += batch_index * m * 3; 20 | idx += m * nsample * batch_index; 21 | 22 | int index = threadIdx.x; 23 | int stride = blockDim.x; 24 | 25 | float radius2 = radius * radius; 26 | for (int j = index; j < m; j += stride) { 27 | float new_x = new_xyz[j * 3 + 0]; 28 | float new_y = new_xyz[j * 3 + 1]; 29 | float new_z = new_xyz[j * 3 + 2]; 30 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 31 | float x = xyz[k * 3 + 0]; 32 | float y = xyz[k * 3 + 1]; 33 | float z = xyz[k * 3 + 2]; 34 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 35 | (new_z - z) * (new_z - z); 36 | if (d2 < radius2) { 37 | if (cnt == 0) { 38 | for (int l = 0; l < nsample; ++l) { 39 | idx[j * nsample + l] = k; 40 | } 41 | } 42 | idx[j * nsample + cnt] = k; 43 | ++cnt; 44 | } 45 | } 46 | } 47 | } 48 | 49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 50 | int nsample, const float *new_xyz, 51 | const float *xyz, int *idx) { 52 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 53 | query_ball_point_kernel<<>>( 54 | b, n, m, radius, nsample, new_xyz, xyz, idx); 55 | 56 | CUDA_CHECK_ERRORS(); 57 | } 58 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "group_points.h" 6 | #include "interpolate.h" 7 | #include "sampling.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("gather_points", &gather_points); 11 | m.def("gather_points_grad", &gather_points_grad); 12 | m.def("furthest_point_sampling", &furthest_point_sampling); 13 | 14 | m.def("three_nn", &three_nn); 15 | m.def("three_interpolate", &three_interpolate); 16 | m.def("three_interpolate_grad", &three_interpolate_grad); 17 | 18 | m.def("ball_query", &ball_query); 19 | 20 | m.def("group_points", &group_points); 21 | m.def("group_points_grad", &group_points_grad); 22 | } 23 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "group_points.h" 5 | #include "utils.h" 6 | 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 8 | const float *points, const int *idx, 9 | float *out); 10 | 11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 12 | int nsample, const float *grad_out, 13 | const int *idx, float *grad_points); 14 | 15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 16 | CHECK_CONTIGUOUS(points); 17 | CHECK_CONTIGUOUS(idx); 18 | CHECK_IS_FLOAT(points); 19 | CHECK_IS_INT(idx); 20 | 21 | if (points.is_cuda()) { 22 | CHECK_CUDA(idx); 23 | } 24 | 25 | at::Tensor output = 26 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 27 | at::device(points.device()).dtype(at::ScalarType::Float)); 28 | 29 | if (points.is_cuda()) { 30 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 31 | idx.size(1), idx.size(2), points.data(), 32 | idx.data(), output.data()); 33 | } else { 34 | AT_ASSERT(false, "CPU not supported"); 35 | } 36 | 37 | return output; 38 | } 39 | 40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 41 | CHECK_CONTIGUOUS(grad_out); 42 | CHECK_CONTIGUOUS(idx); 43 | CHECK_IS_FLOAT(grad_out); 44 | CHECK_IS_INT(idx); 45 | 46 | if (grad_out.is_cuda()) { 47 | CHECK_CUDA(idx); 48 | } 49 | 50 | at::Tensor output = 51 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 52 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 53 | 54 | if (grad_out.is_cuda()) { 55 | group_points_grad_kernel_wrapper( 56 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 57 | grad_out.data(), idx.data(), output.data()); 58 | } else { 59 | AT_ASSERT(false, "CPU not supported"); 60 | } 61 | 62 | return output; 63 | } 64 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include "cuda_utils.h" 8 | 9 | // input: points(b, c, n) idx(b, npoints, nsample) 10 | // output: out(b, c, npoints, nsample) 11 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 12 | int nsample, 13 | const float *__restrict__ points, 14 | const int *__restrict__ idx, 15 | float *__restrict__ out) { 16 | int batch_index = blockIdx.x; 17 | points += batch_index * n * c; 18 | idx += batch_index * npoints * nsample; 19 | out += batch_index * npoints * nsample * c; 20 | 21 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 22 | const int stride = blockDim.y * blockDim.x; 23 | for (int i = index; i < c * npoints; i += stride) { 24 | const int l = i / npoints; 25 | const int j = i % npoints; 26 | for (int k = 0; k < nsample; ++k) { 27 | int ii = idx[j * nsample + k]; 28 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 29 | } 30 | } 31 | } 32 | 33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 34 | const float *points, const int *idx, 35 | float *out) { 36 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 37 | 38 | group_points_kernel<<>>( 39 | b, c, n, npoints, nsample, points, idx, out); 40 | 41 | CUDA_CHECK_ERRORS(); 42 | } 43 | 44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 45 | // output: grad_points(b, c, n) 46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 47 | int nsample, 48 | const float *__restrict__ grad_out, 49 | const int *__restrict__ idx, 50 | float *__restrict__ grad_points) { 51 | int batch_index = blockIdx.x; 52 | grad_out += batch_index * npoints * nsample * c; 53 | idx += batch_index * npoints * nsample; 54 | grad_points += batch_index * n * c; 55 | 56 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 57 | const int stride = blockDim.y * blockDim.x; 58 | for (int i = index; i < c * npoints; i += stride) { 59 | const int l = i / npoints; 60 | const int j = i % npoints; 61 | for (int k = 0; k < nsample; ++k) { 62 | int ii = idx[j * nsample + k]; 63 | atomicAdd(grad_points + l * n + ii, 64 | grad_out[(l * npoints + j) * nsample + k]); 65 | } 66 | } 67 | } 68 | 69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 70 | int nsample, const float *grad_out, 71 | const int *idx, float *grad_points) { 72 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 73 | 74 | group_points_grad_kernel<<>>( 75 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 76 | 77 | CUDA_CHECK_ERRORS(); 78 | } 79 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "interpolate.h" 4 | #include "utils.h" 5 | 6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 7 | const float *known, float *dist2, int *idx); 8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 9 | const float *points, const int *idx, 10 | const float *weight, float *out); 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 12 | const float *grad_out, 13 | const int *idx, const float *weight, 14 | float *grad_points); 15 | 16 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 17 | CHECK_CONTIGUOUS(unknowns); 18 | CHECK_CONTIGUOUS(knows); 19 | CHECK_IS_FLOAT(unknowns); 20 | CHECK_IS_FLOAT(knows); 21 | 22 | if (unknowns.is_cuda()) { 23 | CHECK_CUDA(knows); 24 | } 25 | 26 | at::Tensor idx = 27 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 28 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 29 | at::Tensor dist2 = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 32 | 33 | if (unknowns.is_cuda()) { 34 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 35 | unknowns.data(), knows.data(), 36 | dist2.data(), idx.data()); 37 | } else { 38 | AT_ASSERT(false, "CPU not supported"); 39 | } 40 | 41 | return {dist2, idx}; 42 | } 43 | 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 45 | at::Tensor weight) { 46 | CHECK_CONTIGUOUS(points); 47 | CHECK_CONTIGUOUS(idx); 48 | CHECK_CONTIGUOUS(weight); 49 | CHECK_IS_FLOAT(points); 50 | CHECK_IS_INT(idx); 51 | CHECK_IS_FLOAT(weight); 52 | 53 | if (points.is_cuda()) { 54 | CHECK_CUDA(idx); 55 | CHECK_CUDA(weight); 56 | } 57 | 58 | at::Tensor output = 59 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 60 | at::device(points.device()).dtype(at::ScalarType::Float)); 61 | 62 | if (points.is_cuda()) { 63 | three_interpolate_kernel_wrapper( 64 | points.size(0), points.size(1), points.size(2), idx.size(1), 65 | points.data(), idx.data(), weight.data(), 66 | output.data()); 67 | } else { 68 | AT_ASSERT(false, "CPU not supported"); 69 | } 70 | 71 | return output; 72 | } 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 74 | at::Tensor weight, const int m) { 75 | CHECK_CONTIGUOUS(grad_out); 76 | CHECK_CONTIGUOUS(idx); 77 | CHECK_CONTIGUOUS(weight); 78 | CHECK_IS_FLOAT(grad_out); 79 | CHECK_IS_INT(idx); 80 | CHECK_IS_FLOAT(weight); 81 | 82 | if (grad_out.is_cuda()) { 83 | CHECK_CUDA(idx); 84 | CHECK_CUDA(weight); 85 | } 86 | 87 | at::Tensor output = 88 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 89 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 90 | 91 | if (grad_out.is_cuda()) { 92 | three_interpolate_grad_kernel_wrapper( 93 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 94 | grad_out.data(), idx.data(), weight.data(), 95 | output.data()); 96 | } else { 97 | AT_ASSERT(false, "CPU not supported"); 98 | } 99 | 100 | return output; 101 | } 102 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "sampling.h" 4 | #include "utils.h" 5 | 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 7 | const float *points, const int *idx, 8 | float *out); 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *grad_out, const int *idx, 11 | float *grad_points); 12 | 13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 14 | const float *dataset, float *temp, 15 | int *idxs); 16 | 17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.is_cuda()) { 32 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_ASSERT(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 43 | const int n) { 44 | CHECK_CONTIGUOUS(grad_out); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_IS_FLOAT(grad_out); 47 | CHECK_IS_INT(idx); 48 | 49 | if (grad_out.is_cuda()) { 50 | CHECK_CUDA(idx); 51 | } 52 | 53 | at::Tensor output = 54 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 55 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 56 | 57 | if (grad_out.is_cuda()) { 58 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 59 | idx.size(1), grad_out.data(), 60 | idx.data(), output.data()); 61 | } else { 62 | AT_ASSERT(false, "CPU not supported"); 63 | } 64 | 65 | return output; 66 | } 67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 68 | CHECK_CONTIGUOUS(points); 69 | CHECK_IS_FLOAT(points); 70 | 71 | at::Tensor output = 72 | torch::zeros({points.size(0), nsamples}, 73 | at::device(points.device()).dtype(at::ScalarType::Int)); 74 | 75 | at::Tensor tmp = 76 | torch::full({points.size(0), points.size(1)}, 1e10, 77 | at::device(points.device()).dtype(at::ScalarType::Float)); 78 | 79 | if (points.is_cuda()) { 80 | furthest_point_sampling_kernel_wrapper( 81 | points.size(0), points.size(1), nsamples, points.data(), 82 | tmp.data(), output.data()); 83 | } else { 84 | AT_ASSERT(false, "CPU not supported"); 85 | } 86 | 87 | return output; 88 | } 89 | -------------------------------------------------------------------------------- /third_party/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ Testing customized ops. """ 4 | 5 | import torch 6 | from torch.autograd import gradcheck 7 | import numpy as np 8 | 9 | import os 10 | import sys 11 | 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(BASE_DIR) 14 | import pointnet2_utils 15 | 16 | 17 | def test_interpolation_grad(): 18 | batch_size = 1 19 | feat_dim = 2 20 | m = 4 21 | feats = ( 22 | torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | ) 24 | 25 | def interpolate_func(inputs): 26 | idx = torch.from_numpy(np.array([[[0, 1, 2], [1, 2, 3]]])).int().cuda() 27 | weight = ( 28 | torch.from_numpy(np.array([[[1, 1, 1], [2, 2, 2]]])).float().cuda() 29 | ) 30 | interpolated_feats = pointnet2_utils.three_interpolate( 31 | inputs, idx, weight 32 | ) 33 | return interpolated_feats 34 | 35 | assert gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1) 36 | 37 | 38 | if __name__ == "__main__": 39 | test_interpolation_grad() 40 | -------------------------------------------------------------------------------- /third_party/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | import os.path as osp 10 | 11 | this_dir = osp.dirname(osp.abspath(__file__)) 12 | 13 | _ext_src_root = "_ext_src" 14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 15 | "{}/src/*.cu".format(_ext_src_root) 16 | ) 17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 18 | 19 | setup( 20 | name="pointnet2", 21 | ext_modules=[ 22 | CUDAExtension( 23 | name="pointnet2._ext", 24 | sources=_ext_sources, 25 | extra_compile_args={ 26 | "cxx": [ 27 | "-O2", 28 | "-I{}".format("{}/include".format(_ext_src_root)), 29 | ], 30 | "nvcc": [ 31 | "-O2", 32 | "-I{}".format("{}/include".format(_ext_src_root)), 33 | ], 34 | }, 35 | include_dirs=[osp.join(this_dir, _ext_src_root, "include")], 36 | ) 37 | ], 38 | cmdclass={"build_ext": BuildExtension}, 39 | ) 40 | -------------------------------------------------------------------------------- /trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/trainer/__init__.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeapLabTHU/Segment3D/c510d89a66c372c5358384d6d619f713506214db/utils/__init__.py -------------------------------------------------------------------------------- /utils/gradflow_check.py: -------------------------------------------------------------------------------- 1 | """ https://github.com/alwynmathew/gradflow-check """ 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.lines import Line2D 5 | 6 | 7 | def plot_grad_flow(named_parameters): 8 | ave_grads = [] 9 | layers = [] 10 | for n, p in named_parameters: 11 | if (p.requires_grad) and ("bias" not in n): 12 | if p.grad: 13 | layers.append(n) 14 | ave_grads.append(p.grad.abs().mean()) 15 | else: 16 | print(f"{n} - doesn't have gradient computed") 17 | 18 | plt.plot(ave_grads, alpha=0.3, color="b") 19 | plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k") 20 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 21 | plt.xlim(xmin=0, xmax=len(ave_grads)) 22 | plt.xlabel("Layers") 23 | plt.ylabel("average gradient") 24 | plt.title("Gradient flow") 25 | plt.grid(True) 26 | 27 | 28 | def plot_grad_flow_v2(named_parameters): 29 | """Plots the gradients flowing through different layers in the net during training. 30 | Can be used for checking for possible gradient vanishing / exploding problems. 31 | 32 | Usage: Plug this function in Trainer class after loss.backwards() as 33 | "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow""" 34 | ave_grads = [] 35 | max_grads = [] 36 | layers = [] 37 | for n, p in named_parameters: 38 | if (p.requires_grad) and ("bias" not in n): 39 | layers.append(n) 40 | if p.grad: 41 | ave_grads.append(p.grad.abs().mean()) 42 | max_grads.append(p.grad.abs().max()) 43 | else: 44 | print(f"{n} - doesn't have gradient computed") 45 | plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c") 46 | plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b") 47 | plt.hlines(0, 0, len(ave_grads) + 1, lw=2, color="k") 48 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 49 | plt.xlim(left=0, right=len(ave_grads)) 50 | plt.ylim(bottom=-0.001, top=0.02) # zoom in on the lower gradient regions 51 | plt.xlabel("Layers") 52 | plt.ylabel("average gradient") 53 | plt.title("Gradient flow") 54 | plt.grid(True) 55 | plt.legend( 56 | [ 57 | Line2D([0], [0], color="c", lw=4), 58 | Line2D([0], [0], color="b", lw=4), 59 | Line2D([0], [0], color="k", lw=4), 60 | ], 61 | ["max-gradient", "mean-gradient", "zero-gradient"], 62 | ) 63 | -------------------------------------------------------------------------------- /utils/kfold.py: -------------------------------------------------------------------------------- 1 | """ Author: https://github.com/yk-szk/stratified_group_kfold """ 2 | import random 3 | import numpy as np 4 | 5 | 6 | class StratifiedGroupKFold: 7 | """ 8 | Stratified Group K-fold with sklearn.model_selection.KFold compabitility. 9 | 10 | Split dataset into k folds with balanced label distribution (stratified) and non-overlapping group. 11 | 12 | Args: 13 | n_splits (int): # of splits 14 | shuffle (bool): Shuffle 15 | seed (int): Seed value for random number generator 16 | """ 17 | 18 | def __init__(self, n_splits, shuffle=True, random_state=None): 19 | self.n_splits = n_splits 20 | self.shuffle = shuffle 21 | self.seed = random_state 22 | 23 | def split(self, X, labels, groups): 24 | assert len(X) == len(labels) == len(groups), "Invalid input length" 25 | assert ( 26 | len(set(groups)) >= self.n_splits 27 | ), "The number of groups needs to be larger than n_splits" 28 | 29 | def encode(v): 30 | s = set(v) 31 | d = {l: i for i, l in enumerate(s)} 32 | return [d[e] for e in v] 33 | 34 | labels, groups = encode(labels), encode(groups) 35 | num_labels, num_groups = max(labels) + 1, max(groups) + 1 36 | label_counts_per_group = np.zeros((num_groups, num_labels), dtype=int) 37 | global_label_dist = np.bincount(labels) 38 | for label, g in zip(labels, groups): 39 | label_counts_per_group[g][label] += 1 40 | 41 | label_counts_per_fold = np.zeros( 42 | (self.n_splits, num_labels), dtype=int 43 | ) 44 | groups_per_fold = [set() for _ in range(self.n_splits)] 45 | 46 | def eval_label_counts_per_fold(y_counts, fold): 47 | fold += y_counts 48 | std_per_label = ( 49 | np.std(label_counts_per_fold, axis=0) / global_label_dist 50 | ) 51 | fold -= y_counts 52 | return np.mean(std_per_label) 53 | 54 | groups_and_label_counts = list(enumerate(label_counts_per_group)) 55 | if self.shuffle: 56 | rng = random.Random(self.seed) 57 | mean_std = np.mean(np.std(label_counts_per_group, axis=1)) 58 | groups_and_label_counts.sort( 59 | key=lambda g_counts: -np.std(g_counts[1]) 60 | + rng.gauss(0, mean_std) 61 | ) # add rng.gauss to increase the randomness 62 | else: 63 | groups_and_label_counts.sort( 64 | key=lambda g_counts: -np.std(g_counts[1]) 65 | ) 66 | 67 | for g, label_counts in groups_and_label_counts: 68 | evals = [ 69 | eval_label_counts_per_fold( 70 | label_counts, label_counts_per_fold[i] 71 | ) 72 | for i in range(self.n_splits) 73 | ] 74 | best_fold = np.argmin(evals) 75 | label_counts_per_fold[best_fold] += label_counts 76 | groups_per_fold[best_fold].add(g) 77 | 78 | all_groups = set(groups) 79 | for test_groups in groups_per_fold: 80 | train_groups = all_groups - test_groups 81 | 82 | train_indices = [ 83 | i for i, g in enumerate(groups) if g in train_groups 84 | ] 85 | test_indices = [ 86 | i for i, g in enumerate(groups) if g in test_groups 87 | ] 88 | 89 | yield train_indices, test_indices 90 | -------------------------------------------------------------------------------- /utils/point_cloud_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional, Tuple 3 | 4 | import numpy as np 5 | import open3d 6 | from plyfile import PlyData, PlyElement 7 | 8 | 9 | def load_ply(filepath): 10 | with open(filepath, "rb") as f: 11 | plydata = PlyData.read(f) 12 | data = plydata.elements[0].data 13 | coords = np.array([data["x"], data["y"], data["z"]], dtype=np.float32).T 14 | feats = None 15 | labels = None 16 | if ({"red", "green", "blue"} - set(data.dtype.names)) == set(): 17 | feats = np.array( 18 | [data["red"], data["green"], data["blue"]], dtype=np.uint8 19 | ).T 20 | if "label" in data.dtype.names: 21 | labels = np.array(data["label"], dtype=np.uint32) 22 | return coords, feats, labels 23 | 24 | 25 | def load_ply_with_normals(filepath): 26 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 27 | if not mesh.has_vertex_normals(): 28 | mesh.compute_vertex_normals() 29 | vertices = np.asarray(mesh.vertices) 30 | normals = np.asarray(mesh.vertex_normals) 31 | 32 | coords, feats, labels = load_ply(filepath) 33 | assert np.allclose(coords, vertices), "different coordinates" 34 | feats = np.hstack((feats, normals)) 35 | 36 | return coords, feats, labels 37 | 38 | 39 | def load_obj_with_normals(filepath): 40 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 41 | if not mesh.has_vertex_normals(): 42 | mesh.compute_vertex_normals() 43 | coords = np.asarray(mesh.vertices) 44 | normals = np.asarray(mesh.vertex_normals) 45 | colors = np.asarray(mesh.vertex_colors) 46 | feats = np.hstack((colors, normals)) 47 | 48 | return coords, feats 49 | 50 | 51 | def write_point_cloud_in_ply( 52 | filepath: Path, 53 | coords: np.ndarray, 54 | feats: Optional[np.ndarray] = None, 55 | labels: Optional[np.ndarray] = None, 56 | dtypes: Optional[List[Tuple[str, str]]] = [ 57 | ("x", " 2 | #include 3 | #include 4 | #include 5 | #include "aggregation_cuda_kernel.h" 6 | 7 | 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const float *position = position_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 16 | } 17 | 18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 19 | { 20 | const float *input = input_tensor.data_ptr(); 21 | const float *position = position_tensor.data_ptr(); 22 | const float *weight = weight_tensor.data_ptr(); 23 | const int *idx = idx_tensor.data_ptr(); 24 | const float *grad_output = grad_output_tensor.data_ptr(); 25 | float *grad_input = grad_input_tensor.data_ptr(); 26 | float *grad_position = grad_position_tensor.data_ptr(); 27 | float *grad_weight = grad_weight_tensor.data_ptr(); 28 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 29 | } 30 | -------------------------------------------------------------------------------- /utils/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /utils/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention/attention_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel.h" 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0 = index0_tensor.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0 = index0_tensor.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel_v2.h" 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /utils/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "grouping_cuda_kernel.h" 6 | 7 | 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 14 | } 15 | 16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | float *grad_input = grad_input_tensor.data_ptr(); 21 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 22 | } 23 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 15 | } 16 | 17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 18 | { 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | const int *idx = idx_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | float *grad_input = grad_input_tensor.data_ptr(); 23 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 24 | } 25 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "knnquery_cuda_kernel.h" 6 | 7 | 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const float *new_xyz = new_xyz_tensor.data_ptr(); 12 | const int *offset = offset_tensor.data_ptr(); 13 | const int *new_offset = new_offset_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | float *dist2 = dist2_tensor.data_ptr(); 16 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 17 | } 18 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /utils/pointops2/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | #include "attention/attention_cuda_kernel.h" 11 | #include "rpe/relative_pos_encoding_cuda_kernel.h" 12 | #include "attention_v2/attention_cuda_kernel_v2.h" 13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" 14 | 15 | 16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 17 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 18 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 22 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 23 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 24 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 25 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 26 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 27 | m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); 28 | m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); 29 | m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); 30 | m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); 31 | m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); 32 | m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); 33 | m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); 34 | m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); 35 | m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); 36 | m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); 37 | m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); 38 | m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); 39 | m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); 40 | m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); 41 | m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); 42 | m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); 43 | m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); 44 | m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); 45 | } 46 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "relative_pos_encoding_cuda_kernel.h" 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 8 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *table = table_tensor.data_ptr(); 12 | const int *index = index_tensor.data_ptr(); 13 | const int *rel_idx = rel_idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 16 | } 17 | 18 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 19 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const float *q = q_tensor.data_ptr(); 24 | const int *index = index_tensor.data_ptr(); 25 | const float *table = table_tensor.data_ptr(); 26 | const int *rel_idx = rel_idx_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_table = grad_table_tensor.data_ptr(); 29 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 30 | } 31 | 32 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | const float *table = table_tensor.data_ptr(); 40 | const int *rel_idx = rel_idx_tensor.data_ptr(); 41 | float *output = output_tensor.data_ptr(); 42 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 43 | } 44 | 45 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 46 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 47 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 48 | { 49 | const float *grad_out = grad_out_tensor.data_ptr(); 50 | const int *index0 = index0_tensor.data_ptr(); 51 | const int *index1 = index1_tensor.data_ptr(); 52 | const float *attn = attn_tensor.data_ptr(); 53 | const float *v = v_tensor.data_ptr(); 54 | const float *table = table_tensor.data_ptr(); 55 | const int *rel_idx = rel_idx_tensor.data_ptr(); 56 | float *grad_attn = grad_attn_tensor.data_ptr(); 57 | float *grad_v = grad_v_tensor.data_ptr(); 58 | float *grad_table = grad_table_tensor.data_ptr(); 59 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 60 | } 61 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_V2_CUDA_KERNEL 2 | #define _RPE_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 9 | 10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 12 | 13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output); 21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 22 | 23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output); 24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 25 | 26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output); 27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /utils/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | float *tmp = tmp_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 16 | } 17 | -------------------------------------------------------------------------------- /utils/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subtraction_cuda_kernel.h" 6 | 7 | 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input1 = input1_tensor.data_ptr(); 11 | const float *input2 = input2_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 15 | } 16 | 17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 18 | { 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *grad_output = grad_output_tensor.data_ptr(); 21 | float *grad_input1 = grad_input1_tensor.data_ptr(); 22 | float *grad_input2 = grad_input2_tensor.data_ptr(); 23 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 24 | } 25 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/votenet_utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | # quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = abs_error - quadratic 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | 35 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 36 | """ 37 | Input: 38 | pc1: (B,N,C) torch tensor 39 | pc2: (B,M,C) torch tensor 40 | l1smooth: bool, whether to use l1smooth loss 41 | delta: scalar, the delta used in l1smooth loss 42 | Output: 43 | dist1: (B,N) torch float32 tensor 44 | idx1: (B,N) torch int64 tensor 45 | dist2: (B,M) torch float32 tensor 46 | idx2: (B,M) torch int64 tensor 47 | """ 48 | N = pc1.shape[1] 49 | M = pc2.shape[1] 50 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1, 1, M, 1) 51 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1, N, 1, 1) 52 | pc_diff = pc1_expand_tile - pc2_expand_tile 53 | 54 | if l1smooth: 55 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 56 | elif l1: 57 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 58 | else: 59 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 60 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 61 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 62 | return dist1, idx1, dist2, idx2 63 | 64 | 65 | def demo_nn_distance(): 66 | np.random.seed(0) 67 | pc1arr = np.random.random((1, 5, 3)) 68 | pc2arr = np.random.random((1, 6, 3)) 69 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 70 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 71 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 72 | print(dist1) 73 | print(idx1) 74 | dist = np.zeros((5, 6)) 75 | for i in range(5): 76 | for j in range(6): 77 | dist[i, j] = np.sum((pc1arr[0, i, :] - pc2arr[0, j, :]) ** 2) 78 | print(dist) 79 | print("-" * 30) 80 | print("L1smooth dists:") 81 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 82 | print(dist1) 83 | print(idx1) 84 | dist = np.zeros((5, 6)) 85 | for i in range(5): 86 | for j in range(6): 87 | error = np.abs(pc1arr[0, i, :] - pc2arr[0, j, :]) 88 | quad = np.minimum(error, 1.0) 89 | linear = error - quad 90 | loss = 0.5 * quad**2 + 1.0 * linear 91 | dist[i, j] = np.sum(loss) 92 | print(dist) 93 | 94 | 95 | if __name__ == "__main__": 96 | demo_nn_distance() 97 | -------------------------------------------------------------------------------- /utils/votenet_utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | 10 | try: 11 | from StringIO import StringIO # Python 2.7 12 | except ImportError: 13 | from io import BytesIO # Python 3.x 14 | 15 | 16 | class Logger(object): 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary( 24 | value=[tf.Summary.Value(tag=tag, simple_value=value)] 25 | ) 26 | self.writer.add_summary(summary, step) 27 | 28 | def image_summary(self, tag, images, step): 29 | """Log a list of images.""" 30 | 31 | img_summaries = [] 32 | for i, img in enumerate(images): 33 | # Write the image to a string 34 | try: 35 | s = StringIO() 36 | except: 37 | s = BytesIO() 38 | scipy.misc.toimage(img).save(s, format="png") 39 | 40 | # Create an Image object 41 | img_sum = tf.Summary.Image( 42 | encoded_image_string=s.getvalue(), 43 | height=img.shape[0], 44 | width=img.shape[1], 45 | ) 46 | # Create a Summary value 47 | img_summaries.append( 48 | tf.Summary.Value(tag="%s/%d" % (tag, i), image=img_sum) 49 | ) 50 | 51 | # Create and write Summary 52 | summary = tf.Summary(value=img_summaries) 53 | self.writer.add_summary(summary, step) 54 | 55 | def histo_summary(self, tag, values, step, bins=1000): 56 | """Log a histogram of the tensor of values.""" 57 | 58 | # Create a histogram using numpy 59 | counts, bin_edges = np.histogram(values, bins=bins) 60 | 61 | # Fill the fields of the histogram proto 62 | hist = tf.HistogramProto() 63 | hist.min = float(np.min(values)) 64 | hist.max = float(np.max(values)) 65 | hist.num = int(np.prod(values.shape)) 66 | hist.sum = float(np.sum(values)) 67 | hist.sum_squares = float(np.sum(values**2)) 68 | 69 | # Drop the start of the first bin 70 | bin_edges = bin_edges[1:] 71 | 72 | # Add bin edges and counts 73 | for edge in bin_edges: 74 | hist.bucket_limit.append(edge) 75 | for c in counts: 76 | hist.bucket.append(c) 77 | 78 | # Create and write Summary 79 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 80 | self.writer.add_summary(summary, step) 81 | self.writer.flush() 82 | -------------------------------------------------------------------------------- /utils/votenet_utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix""" 7 | import os 8 | import time 9 | 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | import sys 12 | 13 | sys.path.append(BASE_DIR) 14 | import tf_logger 15 | 16 | 17 | class Visualizer: 18 | def __init__(self, opt, name="train"): 19 | # self.opt = opt 20 | # self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 21 | # self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 22 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 23 | self.log_name = os.path.join(opt.log_dir, "tf_visualizer_log.txt") 24 | with open(self.log_name, "a") as log_file: 25 | now = time.strftime("%c") 26 | log_file.write( 27 | "================ Training Loss (%s) ================\n" % now 28 | ) 29 | 30 | # |visuals|: dictionary of images to save 31 | def log_images(self, visuals, step): 32 | for label, image_numpy in visuals.items(): 33 | self.logger.image_summary(label, [image_numpy], step) 34 | 35 | # scalars: dictionary of scalar labels and values 36 | def log_scalars(self, scalars, step): 37 | for label, val in scalars.items(): 38 | self.logger.scalar_summary(label, val, step) 39 | 40 | # scatter plots 41 | def plot_current_points(self, points, disp_offset=10): 42 | pass 43 | 44 | # scalars: same format as |scalars| of plot_current_scalars 45 | def print_current_scalars(self, epoch, i, scalars): 46 | message = "(epoch: %d, iters: %d) " % (epoch, i) 47 | for k, v in scalars.items(): 48 | message += "%s: %.3f " % (k, v) 49 | 50 | print(message) 51 | with open(self.log_name, "a") as log_file: 52 | log_file.write("%s\n" % message) 53 | --------------------------------------------------------------------------------