├── conf ├── __init__.py ├── optimizer │ └── adamw.yaml ├── metrics │ └── miou.yaml ├── matcher │ └── hungarian_matcher.yaml ├── trainer │ └── trainer600.yaml ├── logging │ └── full.yaml ├── loss │ ├── set_criterion.yaml │ └── set_criterion_hp.yaml ├── scheduler │ └── onecyclelr.yaml ├── callbacks │ ├── callbacks_instance_segmentation.yaml │ └── callbacks_instance_segmentation_human.yaml ├── data │ ├── data_loaders │ │ └── simple_loader.yaml │ ├── indoor.yaml │ ├── collation_functions │ │ └── voxelize_collate.yaml │ └── datasets │ │ ├── egobody.yaml │ │ └── synthetic_humans.yaml ├── augmentation │ ├── albumentations_aug.yaml │ └── volumentations_aug.yaml ├── model │ ├── mask3d.yaml │ └── mask3d_hp.yaml └── config_base_instance_segmentation.yaml ├── utils ├── __init__.py ├── pointops2 │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ ├── sampling │ │ │ ├── sampling_cuda_kernel.h │ │ │ └── sampling_cuda.cpp │ │ ├── knnquery │ │ │ ├── knnquery_cuda_kernel.h │ │ │ ├── knnquery_cuda.cpp │ │ │ └── knnquery_cuda_kernel.cu │ │ ├── cuda_utils.h │ │ ├── grouping │ │ │ ├── grouping_cuda_kernel.h │ │ │ ├── grouping_cuda.cpp │ │ │ └── grouping_cuda_kernel.cu │ │ ├── interpolation │ │ │ ├── interpolation_cuda_kernel.h │ │ │ ├── interpolation_cuda.cpp │ │ │ └── interpolation_cuda_kernel.cu │ │ ├── subtraction │ │ │ ├── subtraction_cuda_kernel.h │ │ │ ├── subtraction_cuda.cpp │ │ │ └── subtraction_cuda_kernel.cu │ │ ├── aggregation │ │ │ ├── aggregation_cuda_kernel.h │ │ │ ├── aggregation_cuda.cpp │ │ │ └── aggregation_cuda_kernel.cu │ │ ├── attention │ │ │ ├── attention_cuda_kernel.h │ │ │ ├── attention_cuda.cpp │ │ │ └── attention_cuda_kernel.cu │ │ ├── attention_v2 │ │ │ ├── attention_cuda_kernel_v2.h │ │ │ └── attention_cuda_v2.cpp │ │ ├── rpe │ │ │ ├── relative_pos_encoding_cuda_kernel.h │ │ │ └── relative_pos_encoding_cuda.cpp │ │ ├── pointops_api.cpp │ │ └── rpe_v2 │ │ │ └── relative_pos_encoding_cuda_kernel_v2.h │ ├── functions │ │ ├── __init__.py │ │ ├── test_attention_op_step2.py │ │ ├── test_relative_pos_encoding_op_step1.py │ │ ├── test_relative_pos_encoding_op_step1_v2.py │ │ ├── test_relative_pos_encoding_op_step2.py │ │ ├── test_attention_op_step1.py │ │ ├── test_relative_pos_encoding_op_step1_v3.py │ │ ├── test_relative_pos_encoding_op_step2_v2.py │ │ └── test_attention_op_step1_v2.py │ └── setup.py ├── votenet_utils │ ├── tf_visualizer.py │ ├── tf_logger.py │ └── nn_distance.py ├── gradflow_check.py ├── point_cloud_utils.py ├── kfold.py └── utils.py ├── benchmark ├── __init__.py └── util.py ├── trainer └── __init__.py ├── models ├── modules │ ├── __init__.py │ ├── 3detr_helpers.py │ ├── helpers_3detr.py │ ├── senet_block.py │ └── resnet_block.py ├── metrics │ ├── __init__.py │ ├── metrics.py │ └── confusionmatrix.py ├── model.py ├── wrapper.py ├── __init__.py └── misc.py ├── docs └── assets │ └── teaser.jpg ├── third_party └── pointnet2 │ ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── group_points.h │ │ ├── sampling.h │ │ ├── interpolate.h │ │ ├── utils.h │ │ └── cuda_utils.h │ └── src │ │ ├── bindings.cpp │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── group_points.cpp │ │ ├── sampling.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ └── interpolate_gpu.cu │ ├── pointnet2_test.py │ └── setup.py ├── scripts ├── eval │ ├── eval_human3d.sh │ └── eval_mask3d.sh └── train │ ├── train_human3d.sh │ └── train_mask3d.sh ├── download_checkpoints.sh ├── datasets └── random_cuboid.py ├── main.py ├── .gitignore └── occlusion_subsets ├── split_test_occlusion_high.txt └── split_test_occlusion_mid.txt /conf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/pointops2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/pointops2/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/pointops2/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conf/optimizer/adamw.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: torch.optim.AdamW 3 | lr: 0.0001 -------------------------------------------------------------------------------- /docs/assets/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/human-3d/Human3D/HEAD/docs/assets/teaser.jpg -------------------------------------------------------------------------------- /models/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .confusionmatrix import ConfusionMatrix 2 | from .metrics import IoU 3 | 4 | __all__ = ["ConfusionMatrix", "IoU"] 5 | -------------------------------------------------------------------------------- /conf/metrics/miou.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.metrics.ConfusionMatrix 3 | num_classes: ${data.num_labels} 4 | ignore_label: ${data.ignore_label} 5 | -------------------------------------------------------------------------------- /conf/matcher/hungarian_matcher.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.matcher.HungarianMatcher 3 | cost_class: 2. 4 | cost_mask: 5. 5 | cost_dice: 2. 6 | num_points: -1 7 | -------------------------------------------------------------------------------- /conf/trainer/trainer600.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | deterministic: false 3 | max_epochs: 601 4 | min_epochs: 1 5 | resume_from_checkpoint: null 6 | check_val_every_n_epoch: 50 7 | num_sanity_val_steps: -1 8 | -------------------------------------------------------------------------------- /conf/logging/full.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.loggers.WandbLogger 3 | project: ${general.project_name} 4 | name: ${general.experiment_name} 5 | save_dir: ${general.save_dir} 6 | entity: "schult" 7 | resume: "allow" 8 | id: ${general.experiment_name} 9 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | #include 5 | 6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 7 | const int nsample); 8 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | -------------------------------------------------------------------------------- /conf/loss/set_criterion.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.criterion.SetCriterion 3 | num_classes: ${general.num_targets} 4 | eos_coef: 0.1 5 | losses: 6 | - "labels" 7 | - "masks" 8 | num_points: ${matcher.num_points} 9 | oversample_ratio: 3.0 10 | importance_sample_ratio: 0.75 11 | class_weights: -1 12 | -------------------------------------------------------------------------------- /conf/scheduler/onecyclelr.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | scheduler: 4 | _target_: torch.optim.lr_scheduler.OneCycleLR 5 | max_lr: ${optimizer.lr} 6 | epochs: ${trainer.max_epochs} 7 | # need to set to number because of tensorboard logger 8 | steps_per_epoch: -1 9 | 10 | pytorch_lightning_params: 11 | interval: step 12 | -------------------------------------------------------------------------------- /conf/callbacks/callbacks_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint 3 | monitor: val_AP_50_parts 4 | save_last: true 5 | save_top_k: 1 6 | mode: max 7 | dirpath: ${general.save_dir} 8 | filename: "{epoch}-{val_AP_50_parts:.3f}" 9 | every_n_epochs: 1 10 | 11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor 12 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 10 | -------------------------------------------------------------------------------- /conf/callbacks/callbacks_instance_segmentation_human.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint 3 | monitor: val_AP_50_human 4 | save_last: true 5 | save_top_k: 1 6 | mode: max 7 | dirpath: ${general.save_dir} 8 | filename: "{epoch}-{val_AP_50_parts:.3f}" 9 | every_n_epochs: 1 10 | 11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor 12 | -------------------------------------------------------------------------------- /conf/loss/set_criterion_hp.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.criterion_hp.SetCriterionHumanParts 3 | num_classes: 2 # ${general.num_targets} 4 | eos_coef: 0.1 5 | losses: 6 | - "labels" 7 | - "masks" 8 | num_points: ${matcher.num_points} 9 | oversample_ratio: 3.0 10 | importance_sample_ratio: 0.75 11 | class_weights: -1 12 | num_human_queries: ${model.num_human_queries} 13 | num_parts_per_human_queries: ${model.num_parts_per_human_queries} 14 | num_parts: ${data.num_labels} 15 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 10 | at::Tensor weight); 11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 12 | at::Tensor weight, const int m); 13 | -------------------------------------------------------------------------------- /utils/pointops2/src/sampling/sampling_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUDA_KERNEL 2 | #define _SAMPLING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /conf/data/data_loaders/simple_loader.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_dataloader: 4 | _target_: torch.utils.data.DataLoader 5 | shuffle: true 6 | pin_memory: ${data.pin_memory} 7 | num_workers: ${data.num_workers} 8 | batch_size: ${data.batch_size} 9 | 10 | validation_dataloader: 11 | _target_: torch.utils.data.DataLoader 12 | shuffle: false 13 | pin_memory: ${data.pin_memory} 14 | num_workers: ${data.num_workers} 15 | batch_size: ${data.test_batch_size} 16 | 17 | test_dataloader: 18 | _target_: torch.utils.data.DataLoader 19 | shuffle: false 20 | pin_memory: ${data.pin_memory} 21 | num_workers: ${data.num_workers} 22 | batch_size: ${data.test_batch_size} 23 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _KNNQUERY_CUDA_KERNEL 2 | #define _KNNQUERY_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif 19 | -------------------------------------------------------------------------------- /scripts/eval/eval_human3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py \ 4 | general.experiment_name="Human3D_eval" \ 5 | general.project_name="human3d" \ 6 | data/datasets=egobody \ 7 | general.num_targets=16 \ 8 | data.num_labels=16 \ 9 | model=mask3d_hp \ 10 | loss=set_criterion_hp \ 11 | model.num_human_queries=5 \ 12 | model.num_parts_per_human_queries=16 \ 13 | trainer.check_val_every_n_epoch=1 \ 14 | general.topk_per_image=-1 \ 15 | model.non_parametric_queries=false \ 16 | trainer.max_epochs=36 \ 17 | data.batch_size=4 \ 18 | data.num_workers=10 \ 19 | general.reps_per_epoch=1 \ 20 | model.config.backbone._target_=models.Res16UNet18B \ 21 | general.checkpoint="checkpoints/human3d.ckpt" \ 22 | general.train_mode=false \ 23 | general.save_visualizations=false 24 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "group_points.h" 6 | #include "interpolate.h" 7 | #include "sampling.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("gather_points", &gather_points); 11 | m.def("gather_points_grad", &gather_points_grad); 12 | m.def("furthest_point_sampling", &furthest_point_sampling); 13 | 14 | m.def("three_nn", &three_nn); 15 | m.def("three_interpolate", &three_interpolate); 16 | m.def("three_interpolate_grad", &three_interpolate_grad); 17 | 18 | m.def("ball_query", &ball_query); 19 | 20 | m.def("group_points", &group_points); 21 | m.def("group_points_grad", &group_points_grad); 22 | } 23 | -------------------------------------------------------------------------------- /utils/pointops2/src/sampling/sampling_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "sampling_cuda_kernel.h" 6 | 7 | 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const int *offset = offset_tensor.data_ptr(); 12 | const int *new_offset = new_offset_tensor.data_ptr(); 13 | float *tmp = tmp_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); 16 | } 17 | -------------------------------------------------------------------------------- /utils/pointops2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #define TOTAL_THREADS 1024 8 | #define THREADS_PER_BLOCK 256 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 19 | dim3 block_config(x_threads, y_threads, 1); 20 | return block_config; 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "knnquery_cuda_kernel.h" 6 | 7 | 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) 9 | { 10 | const float *xyz = xyz_tensor.data_ptr(); 11 | const float *new_xyz = new_xyz_tensor.data_ptr(); 12 | const int *offset = offset_tensor.data_ptr(); 13 | const int *new_offset = new_offset_tensor.data_ptr(); 14 | int *idx = idx_tensor.data_ptr(); 15 | float *dist2 = dist2_tensor.data_ptr(); 16 | knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 17 | } 18 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUDA_KERNEL 2 | #define _GROUPING_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); 15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /conf/augmentation/albumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | __version__: 0.4.5 2 | transform: 3 | __class_fullname__: albumentations.core.composition.Compose 4 | additional_targets: {} 5 | bbox_params: null 6 | keypoint_params: null 7 | p: 1.0 8 | transforms: 9 | - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast 10 | always_apply: true 11 | brightness_by_max: true 12 | brightness_limit: 13 | - -0.2 14 | - 0.2 15 | contrast_limit: 16 | - -0.2 17 | - 0.2 18 | p: 0.5 19 | - __class_fullname__: albumentations.augmentations.transforms.RGBShift 20 | always_apply: true 21 | b_shift_limit: 22 | - -20 23 | - 20 24 | g_shift_limit: 25 | - -20 26 | - 20 27 | p: 0.5 28 | r_shift_limit: 29 | - -20 30 | - 20 31 | -------------------------------------------------------------------------------- /models/model.py: -------------------------------------------------------------------------------- 1 | from MinkowskiEngine import MinkowskiNetwork 2 | 3 | 4 | class Model(MinkowskiNetwork): 5 | """ 6 | Base network for all sparse convnet 7 | 8 | By default, all networks are segmentation networks. 9 | """ 10 | 11 | OUT_PIXEL_DIST = -1 12 | 13 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 14 | super().__init__(D) 15 | self.in_channels = in_channels 16 | self.out_channels = out_channels 17 | self.config = config 18 | 19 | 20 | class HighDimensionalModel(Model): 21 | """ 22 | Base network for all spatio (temporal) chromatic sparse convnet 23 | """ 24 | 25 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 26 | assert D > 4, "Num dimension smaller than 5" 27 | super().__init__(in_channels, out_channels, config, D, **kwargs) 28 | -------------------------------------------------------------------------------- /scripts/eval/eval_mask3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py \ 4 | general.experiment_name="Mask3D_eval" \ 5 | general.project_name="mask3d_humanseg" \ 6 | data/datasets=egobody \ 7 | general.num_targets=16 \ 8 | data.num_labels=16 \ 9 | model=mask3d \ 10 | loss=set_criterion \ 11 | model.num_queries=5 \ 12 | trainer.check_val_every_n_epoch=1 \ 13 | general.topk_per_image=-1 \ 14 | model.non_parametric_queries=false \ 15 | trainer.max_epochs=36 \ 16 | data.batch_size=4 \ 17 | data.num_workers=10 \ 18 | general.reps_per_epoch=1 \ 19 | general.save_visualizations=false \ 20 | model.config.backbone._target_=models.Res16UNet18B \ 21 | data.part2human=true \ 22 | loss.num_classes=2 \ 23 | model.num_classes=2 \ 24 | callbacks=callbacks_instance_segmentation_human \ 25 | general.checkpoint="checkpoints/mask3d.ckpt" \ 26 | general.train_mode=false \ 27 | general.save_visualizations=false 28 | -------------------------------------------------------------------------------- /download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if wget is installed 4 | if ! command -v wget &> /dev/null; then 5 | echo "Error: wget is not installed. Please install wget and try again." 6 | exit 1 7 | fi 8 | 9 | # Directory to store the downloaded files 10 | DIR="checkpoints" 11 | 12 | # Create the directory if it doesn't exist 13 | mkdir -p "$DIR" 14 | 15 | # URLs of the files to be downloaded 16 | URL1="https://omnomnom.vision.rwth-aachen.de/data/human3d/checkpoints/mask3d.ckpt" 17 | URL2="https://omnomnom.vision.rwth-aachen.de/data/human3d/checkpoints/human3d.ckpt" 18 | 19 | # Download the files using wget 20 | wget -P "$DIR" "$URL1" 21 | wget -P "$DIR" "$URL2" 22 | 23 | # Print a success message if both files are downloaded successfully 24 | if [ $? -eq 0 ]; then 25 | echo "Files downloaded successfully to $DIR/" 26 | else 27 | echo "There was an error downloading the files." 28 | fi 29 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATION_CUDA_KERNEL 2 | #define _INTERPOLATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); 15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _SUBTRACTION_CUDA_KERNEL 2 | #define _SUBTRACTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); 15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "grouping_cuda_kernel.h" 6 | 7 | 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | float *output = output_tensor.data_ptr(); 13 | grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); 14 | } 15 | 16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) 17 | { 18 | const float *grad_output = grad_output_tensor.data_ptr(); 19 | const int *idx = idx_tensor.data_ptr(); 20 | float *grad_input = grad_input_tensor.data_ptr(); 21 | grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); 22 | } 23 | -------------------------------------------------------------------------------- /third_party/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ Testing customized ops. """ 4 | 5 | import torch 6 | from torch.autograd import gradcheck 7 | import numpy as np 8 | 9 | import os 10 | import sys 11 | 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | sys.path.append(BASE_DIR) 14 | import pointnet2_utils 15 | 16 | 17 | def test_interpolation_grad(): 18 | batch_size = 1 19 | feat_dim = 2 20 | m = 4 21 | feats = ( 22 | torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | ) 24 | 25 | def interpolate_func(inputs): 26 | idx = torch.from_numpy(np.array([[[0, 1, 2], [1, 2, 3]]])).int().cuda() 27 | weight = ( 28 | torch.from_numpy(np.array([[[1, 1, 1], [2, 2, 2]]])).float().cuda() 29 | ) 30 | interpolated_feats = pointnet2_utils.three_interpolate( 31 | inputs, idx, weight 32 | ) 33 | return interpolated_feats 34 | 35 | assert gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1) 36 | 37 | 38 | if __name__ == "__main__": 39 | test_interpolation_grad() 40 | -------------------------------------------------------------------------------- /conf/data/indoor.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | 3 | # these parameters are inherited by datasets, data_loaders and collators 4 | # but they might be overwritten 5 | 6 | # splits 7 | train_mode: train 8 | validation_mode: validation 9 | test_mode: validation # test # validation 10 | 11 | part2human: false 12 | 13 | # dataset 14 | ignore_label: 255 15 | add_raw_coordinates: true # 3dim 16 | add_colors: true # 3dim 17 | add_normals: false # 3dim 18 | in_channels: 3 # in_channels = 3 * (add_normals + add_colors + add_raw_coordinates) 19 | num_labels: 20 20 | # num_labels: 41 21 | add_instance: ${general.add_instance} 22 | task: ${general.task} 23 | add_clip: ${general.add_clip} 24 | 25 | # data loader 26 | pin_memory: false 27 | num_workers: 4 28 | batch_size: 5 29 | test_batch_size: 1 30 | cache_data: false 31 | 32 | # collation 33 | voxel_size: 0.02 34 | 35 | reps_per_epoch: ${general.reps_per_epoch} 36 | 37 | is_mirroring: true 38 | broken_mirror_version: false 39 | 40 | cropping: false 41 | cropping_args: 42 | min_points: 30000 43 | aspect: 0.8 44 | min_crop: 0.5 45 | max_crop: 1.0 46 | 47 | crop_min_size: 20000 48 | crop_length: 6.0 49 | cropping_v1: true -------------------------------------------------------------------------------- /models/wrapper.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from MinkowskiEngine import SparseTensor 4 | from torch.nn import Module 5 | 6 | 7 | class Wrapper(Module): 8 | """ 9 | Wrapper for the segmentation networks. 10 | """ 11 | 12 | OUT_PIXEL_DIST = -1 13 | 14 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 15 | super().__init__() 16 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 17 | 18 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 19 | raise NotImplementedError("Must initialize a model and a filter") 20 | 21 | def forward(self, x, coords, colors=None): 22 | soutput = self.model(x) 23 | 24 | # During training, make the network invariant to the filter 25 | if not self.training or random.random() < 0.5: 26 | # Filter requires the model to finish the forward pass 27 | wrapper_coords = self.filter.initialize_coords( 28 | self.model, coords, colors 29 | ) 30 | finput = SparseTensor(soutput.F, wrapper_coords) 31 | soutput = self.filter(finput) 32 | return soutput 33 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | #define CHECK_CUDA(x) \ 9 | do { \ 10 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 11 | } while (0) 12 | 13 | #define CHECK_CONTIGUOUS(x) \ 14 | do { \ 15 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 16 | } while (0) 17 | 18 | #define CHECK_IS_INT(x) \ 19 | do { \ 20 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 21 | #x " must be an int tensor"); \ 22 | } while (0) 23 | 24 | #define CHECK_IS_FLOAT(x) \ 25 | do { \ 26 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 27 | #x " must be a float tensor"); \ 28 | } while (0) 29 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "interpolation_cuda_kernel.h" 6 | 7 | 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const int *idx = idx_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); 15 | } 16 | 17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) 18 | { 19 | const float *grad_output = grad_output_tensor.data_ptr(); 20 | const int *idx = idx_tensor.data_ptr(); 21 | const float *weight = weight_tensor.data_ptr(); 22 | float *grad_input = grad_input_tensor.data_ptr(); 23 | interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); 24 | } 25 | -------------------------------------------------------------------------------- /utils/pointops2/src/aggregation/aggregation_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _AGGREGATION_CUDA_KERNEL 2 | #define _AGGREGATION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); 15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "utils.h" 6 | 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 8 | int nsample, const float *new_xyz, 9 | const float *xyz, int *idx); 10 | 11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 12 | const int nsample) { 13 | CHECK_CONTIGUOUS(new_xyz); 14 | CHECK_CONTIGUOUS(xyz); 15 | CHECK_IS_FLOAT(new_xyz); 16 | CHECK_IS_FLOAT(xyz); 17 | 18 | if (new_xyz.is_cuda()) { 19 | CHECK_CUDA(xyz); 20 | } 21 | 22 | at::Tensor idx = 23 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 24 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 25 | 26 | if (new_xyz.is_cuda()) { 27 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 28 | radius, nsample, new_xyz.data(), 29 | xyz.data(), idx.data()); 30 | } else { 31 | AT_ASSERT(false, "CPU not supported"); 32 | } 33 | 34 | return idx; 35 | } 36 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subtraction_cuda_kernel.h" 6 | 7 | 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input1 = input1_tensor.data_ptr(); 11 | const float *input2 = input2_tensor.data_ptr(); 12 | const int *idx = idx_tensor.data_ptr(); 13 | float *output = output_tensor.data_ptr(); 14 | subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); 15 | } 16 | 17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) 18 | { 19 | const int *idx = idx_tensor.data_ptr(); 20 | const float *grad_output = grad_output_tensor.data_ptr(); 21 | float *grad_input1 = grad_input1_tensor.data_ptr(); 22 | float *grad_input2 = grad_input2_tensor.data_ptr(); 23 | subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 24 | } 25 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import models.res16unet as res16unet 2 | import models.resunet as resunet 3 | from models.mask3d import Mask3D 4 | from models.mask3d_hp import Mask3DHumanParts 5 | from models.res16unet import ( 6 | Custom30M, 7 | Res16UNet14A, 8 | Res16UNet18B, 9 | Res16UNet18D, 10 | Res16UNet34A, 11 | Res16UNet34C, 12 | Res16UNet34D, 13 | ) 14 | 15 | MODELS = [] 16 | 17 | 18 | def add_models(module): 19 | MODELS.extend([getattr(module, a) for a in dir(module) if "Net" in a]) 20 | 21 | 22 | add_models(resunet) 23 | add_models(res16unet) 24 | add_models(mask3d) 25 | add_models(mask3d_hp) 26 | 27 | 28 | def get_models(): 29 | """Returns a tuple of sample models.""" 30 | return MODELS 31 | 32 | 33 | def load_model(name): 34 | """Creates and returns an instance of the model given its class name.""" 35 | # Find the model class from its name 36 | all_models = get_models() 37 | mdict = {model.__name__: model for model in all_models} 38 | if name not in mdict: 39 | print("Invalid model index. Options are:") 40 | # Display a list of valid model names 41 | for model in all_models: 42 | print(f"\t* {model.__name__}") 43 | return None 44 | NetClass = mdict[name] 45 | 46 | return NetClass 47 | -------------------------------------------------------------------------------- /conf/model/mask3d.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.Mask3D 3 | 4 | # transformer parameters 5 | hidden_dim: 128 6 | dim_feedforward: 1024 7 | num_queries: 100 8 | num_heads: 8 9 | num_decoders: 3 10 | dropout: 0.0 11 | pre_norm: false 12 | use_level_embed: false 13 | normalize_pos_enc: true 14 | positional_encoding_type: "fourier" 15 | gauss_scale: 1.0 16 | hlevels: [0,1,2,3] 17 | 18 | # queries 19 | non_parametric_queries: true 20 | random_query_both: false 21 | random_normal: false 22 | random_queries: false 23 | use_np_features: false 24 | 25 | query_init: None # ['clip_init', ...] TODO 26 | clip_proj_dropout: 0.0 27 | 28 | # sampling 29 | sample_sizes: [200, 800, 3200, 12800, 51200] 30 | max_sample_size: false # change false means sampling activated 31 | 32 | shared_decoder: true 33 | num_classes: ${general.num_targets} 34 | train_on_segments: ${general.train_on_segments} 35 | scatter_type: "mean" 36 | 37 | voxel_size: ${data.voxel_size} 38 | 39 | config: 40 | backbone: 41 | _target_: models.Res16UNet34C 42 | config: 43 | dialations: [ 1, 1, 1, 1 ] 44 | conv1_kernel_size: 5 45 | bn_momentum: 0.02 46 | # depends on normals, color, raw_coordinates 47 | # varies from 3 to 9 48 | in_channels: ${data.in_channels} 49 | out_channels: ${data.num_labels} 50 | out_fpn: true 51 | 52 | -------------------------------------------------------------------------------- /third_party/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | import os.path as osp 10 | 11 | this_dir = osp.dirname(osp.abspath(__file__)) 12 | 13 | _ext_src_root = "_ext_src" 14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 15 | "{}/src/*.cu".format(_ext_src_root) 16 | ) 17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 18 | 19 | setup( 20 | name="pointnet2", 21 | ext_modules=[ 22 | CUDAExtension( 23 | name="pointnet2._ext", 24 | sources=_ext_sources, 25 | extra_compile_args={ 26 | "cxx": [ 27 | "-O2", 28 | "-I{}".format("{}/include".format(_ext_src_root)), 29 | ], 30 | "nvcc": [ 31 | "-O2", 32 | "-I{}".format("{}/include".format(_ext_src_root)), 33 | ], 34 | }, 35 | include_dirs=[osp.join(this_dir, _ext_src_root, "include")], 36 | ) 37 | ], 38 | cmdclass={"build_ext": BuildExtension}, 39 | ) 40 | -------------------------------------------------------------------------------- /conf/model/mask3d_hp.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | _target_: models.Mask3DHumanParts 3 | 4 | restricted_cross_attention: true 5 | 6 | # transformer parameters 7 | hidden_dim: 128 8 | dim_feedforward: 1024 9 | #num_queries: 100 10 | 11 | num_human_queries: 5 12 | num_parts_per_human_queries: 16 13 | 14 | num_heads: 8 15 | num_decoders: 3 16 | dropout: 0.0 17 | pre_norm: false 18 | use_level_embed: false 19 | normalize_pos_enc: true 20 | positional_encoding_type: "fourier" 21 | gauss_scale: 1.0 22 | hlevels: [0,1,2,3] 23 | 24 | # queries 25 | non_parametric_queries: true 26 | random_query_both: false 27 | random_normal: false 28 | random_queries: false 29 | use_np_features: false 30 | 31 | query_init: None # ['clip_init', ...] TODO 32 | clip_proj_dropout: 0.0 33 | 34 | # sampling 35 | sample_sizes: [200, 800, 3200, 12800, 51200] 36 | max_sample_size: false # change false means sampling activated 37 | 38 | shared_decoder: true 39 | num_classes: ${general.num_targets} 40 | train_on_segments: ${general.train_on_segments} 41 | scatter_type: "mean" 42 | 43 | voxel_size: ${data.voxel_size} 44 | 45 | config: 46 | backbone: 47 | _target_: models.Res16UNet34C 48 | config: 49 | dialations: [ 1, 1, 1, 1 ] 50 | conv1_kernel_size: 5 51 | bn_momentum: 0.02 52 | # depends on normals, color, raw_coordinates 53 | # varies from 3 to 9 54 | in_channels: ${data.in_channels} 55 | out_channels: ${data.num_labels} 56 | out_fpn: true 57 | 58 | -------------------------------------------------------------------------------- /conf/augmentation/volumentations_aug.yaml: -------------------------------------------------------------------------------- 1 | # pi = 3.14159265358979 2 | # pi/2 = 1.57079632679489 3 | # pi/3 = 1.04719755119659 4 | # pi/6 = 0.52359877559829 5 | # pi/12 = 0.26179938779914 6 | # pi/24 = 0.13089969389957 7 | # 8 | __version__: 0.1.6 9 | transform: 10 | __class_fullname__: volumentations.core.composition.Compose 11 | additional_targets: {} 12 | p: 1.0 13 | transforms: 14 | - __class_fullname__: volumentations.augmentations.transforms.Scale3d 15 | always_apply: true 16 | p: 0.5 17 | scale_limit: 18 | - - -0.1 19 | - 0.1 20 | - - -0.1 21 | - 0.1 22 | - - -0.1 23 | - 0.1 24 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 25 | always_apply: true 26 | axis: 27 | - 0 28 | - 0 29 | - 1 30 | p: 0.5 31 | rotation_limit: 32 | - -3.141592653589793 33 | - 3.141592653589793 34 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 35 | always_apply: true 36 | axis: 37 | - 0 38 | - 1 39 | - 0 40 | p: 0.5 41 | rotation_limit: 42 | - -0.13089969389957 43 | - 0.13089969389957 44 | - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d 45 | always_apply: true 46 | axis: 47 | - 1 48 | - 0 49 | - 0 50 | p: 0.5 51 | rotation_limit: 52 | - -0.13089969389957 53 | - 0.13089969389957 54 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #ifndef _CUDA_UTILS_H 4 | #define _CUDA_UTILS_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #define TOTAL_THREADS 512 16 | 17 | inline int opt_n_threads(int work_size) { 18 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 19 | 20 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 21 | } 22 | 23 | inline dim3 opt_block_config(int x, int y) { 24 | const int x_threads = opt_n_threads(x); 25 | const int y_threads = 26 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 27 | dim3 block_config(x_threads, y_threads, 1); 28 | 29 | return block_config; 30 | } 31 | 32 | #define CUDA_CHECK_ERRORS() \ 33 | do { \ 34 | cudaError_t err = cudaGetLastError(); \ 35 | if (cudaSuccess != err) { \ 36 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 37 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 38 | __FILE__); \ 39 | exit(-1); \ 40 | } \ 41 | } while (0) 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /scripts/train/train_human3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 1) FIRST TRAIN THE MODEL ON SYNTHETIC DATA 4 | python main.py \ 5 | general.experiment_name="Human3D_on_synthetic_data" \ 6 | general.project_name="human3d_humanseg" \ 7 | data/datasets=synthetic_humans \ 8 | general.num_targets=16 \ 9 | data.num_labels=16 \ 10 | model=mask3d_hp \ 11 | loss=set_criterion_hp \ 12 | model.num_human_queries=5 \ 13 | model.num_parts_per_human_queries=16 \ 14 | trainer.check_val_every_n_epoch=1 \ 15 | general.topk_per_image=-1 \ 16 | model.non_parametric_queries=false \ 17 | trainer.max_epochs=36 \ 18 | data.batch_size=4 \ 19 | data.num_workers=10 \ 20 | general.reps_per_epoch=1 \ 21 | model.config.backbone._target_=models.Res16UNet18B \ 22 | general.train_mode=true \ 23 | general.save_visualizations=false 24 | 25 | ### 2) THEN FINETUNE WITH EGOBODY DATA 26 | python main.py \ 27 | general.experiment_name="Human3D_finetuned_on_egobody_data" \ 28 | general.project_name="human3d_humanseg" \ 29 | data/datasets=synthetic_humans \ 30 | general.num_targets=16 \ 31 | data.num_labels=16 \ 32 | model=mask3d_hp \ 33 | loss=set_criterion_hp \ 34 | model.num_human_queries=5 \ 35 | model.num_parts_per_human_queries=16 \ 36 | trainer.check_val_every_n_epoch=1 \ 37 | general.topk_per_image=-1 \ 38 | model.non_parametric_queries=false \ 39 | trainer.max_epochs=36 \ 40 | data.batch_size=4 \ 41 | data.num_workers=10 \ 42 | general.reps_per_epoch=1 \ 43 | model.config.backbone._target_=models.Res16UNet18B \ 44 | general.checkpoint='saved/Human3D_on_synthetic_data/last.ckpt' \ 45 | general.train_mode=true \ 46 | general.save_visualizations=false 47 | -------------------------------------------------------------------------------- /conf/data/collation_functions/voxelize_collate.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | 3 | train_collation: 4 | _target_: datasets.utils.VoxelizeCollate 5 | ignore_label: ${data.ignore_label} 6 | voxel_size: ${data.voxel_size} 7 | mode: ${data.train_mode} 8 | small_crops: false 9 | very_small_crops: false 10 | batch_instance: false 11 | probing: ${general.linear_probing_backbone} 12 | task: ${general.task} 13 | ignore_class_threshold: ${general.ignore_class_threshold} 14 | filter_out_classes: ${data.train_dataset.filter_out_classes} 15 | label_offset: ${data.train_dataset.label_offset} 16 | num_queries: 0 # ${model.num_queries} 17 | 18 | validation_collation: 19 | _target_: datasets.utils.VoxelizeCollate 20 | ignore_label: ${data.ignore_label} 21 | voxel_size: ${data.voxel_size} 22 | mode: ${data.validation_mode} 23 | batch_instance: false 24 | probing: ${general.linear_probing_backbone} 25 | task: ${general.task} 26 | ignore_class_threshold: ${general.ignore_class_threshold} 27 | filter_out_classes: ${data.validation_dataset.filter_out_classes} 28 | label_offset: ${data.validation_dataset.label_offset} 29 | num_queries: 0 # ${model.num_queries} 30 | 31 | test_collation: 32 | _target_: datasets.utils.VoxelizeCollate 33 | ignore_label: ${data.ignore_label} 34 | voxel_size: ${data.voxel_size} 35 | mode: ${data.test_mode} 36 | batch_instance: false 37 | probing: ${general.linear_probing_backbone} 38 | task: ${general.task} 39 | ignore_class_threshold: ${general.ignore_class_threshold} 40 | filter_out_classes: ${data.test_dataset.filter_out_classes} 41 | label_offset: ${data.test_dataset.label_offset} 42 | num_queries: 0 # ${model.num_queries} -------------------------------------------------------------------------------- /scripts/train/train_mask3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 1) FIRST TRAIN THE MODEL ON SYNTHETIC DATA 4 | python main.py \ 5 | general.experiment_name="Mask3D_on_synthetic_data" \ 6 | general.project_name="mask3d_humanseg" \ 7 | data/datasets=synthetic_humans \ 8 | general.num_targets=16 \ 9 | data.num_labels=16 \ 10 | model=mask3d \ 11 | loss=set_criterion \ 12 | model.num_queries=5 \ 13 | trainer.check_val_every_n_epoch=1 \ 14 | general.topk_per_image=-1 \ 15 | model.non_parametric_queries=false \ 16 | trainer.max_epochs=36 \ 17 | data.batch_size=4 \ 18 | data.num_workers=10 \ 19 | general.reps_per_epoch=1 \ 20 | general.save_visualizations=false \ 21 | model.config.backbone._target_=models.Res16UNet18B \ 22 | data.part2human=true \ 23 | loss.num_classes=2 \ 24 | model.num_classes=2 \ 25 | callbacks=callbacks_instance_segmentation_human \ 26 | general.train_mode=true 27 | 28 | 29 | ### 2) THEN FINETUNE WITH EGOBODY DATA 30 | python main.py \ 31 | general.experiment_name="Mask3D_finetuned_on_egobody_data" \ 32 | general.project_name="mask3d_humanseg" \ 33 | data/datasets=egobody \ 34 | general.num_targets=16 \ 35 | data.num_labels=16 \ 36 | model=mask3d \ 37 | loss=set_criterion \ 38 | model.num_queries=5 \ 39 | trainer.check_val_every_n_epoch=1 \ 40 | general.topk_per_image=-1 \ 41 | model.non_parametric_queries=false \ 42 | trainer.max_epochs=36 \ 43 | data.batch_size=4 \ 44 | data.num_workers=10 \ 45 | general.reps_per_epoch=1 \ 46 | general.save_visualizations=false \ 47 | model.config.backbone._target_=models.Res16UNet18B \ 48 | data.part2human=true \ 49 | loss.num_classes=2 \ 50 | model.num_classes=2 \ 51 | callbacks=callbacks_instance_segmentation_human \ 52 | general.checkpoint='saved/Mask3D_on_synthetic_data/last.ckpt' \ 53 | general.train_mode=true 54 | -------------------------------------------------------------------------------- /utils/pointops2/src/aggregation/aggregation_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "aggregation_cuda_kernel.h" 6 | 7 | 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *input = input_tensor.data_ptr(); 11 | const float *position = position_tensor.data_ptr(); 12 | const float *weight = weight_tensor.data_ptr(); 13 | const int *idx = idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); 16 | } 17 | 18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) 19 | { 20 | const float *input = input_tensor.data_ptr(); 21 | const float *position = position_tensor.data_ptr(); 22 | const float *weight = weight_tensor.data_ptr(); 23 | const int *idx = idx_tensor.data_ptr(); 24 | const float *grad_output = grad_output_tensor.data_ptr(); 25 | float *grad_input = grad_input_tensor.data_ptr(); 26 | float *grad_position = grad_position_tensor.data_ptr(); 27 | float *grad_weight = grad_weight_tensor.data_ptr(); 28 | aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 29 | } 30 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_attention_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 800000 7 | N = 35000 8 | C = 96 9 | h = 6 10 | softmax_attn_flat = torch.rand(M, h).cuda() 11 | value = torch.rand(N, h, C // h).cuda() 12 | 13 | index_0 = torch.rand(M) 14 | index_0[index_0 < 0] = 0 15 | index_0 = (index_0 * N).long().cuda() 16 | 17 | index_1 = torch.rand(M) 18 | index_1[index_1 < 0] = 0 19 | index_1 = (index_1 * N).long().cuda() 20 | 21 | softmax_attn_flat.requires_grad = True 22 | value.requires_grad = True 23 | 24 | # value_flat = value[index_1] #[M, num_heads, C // num_heads] 25 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C) 26 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C] 27 | # loss = x.sum() 28 | # loss.backward() 29 | 30 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10])) 31 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 32 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 33 | # input() 34 | 35 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous()) 36 | print("value.is_contiguous(): ", value.is_contiguous()) 37 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 38 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 39 | 40 | x_v2 = pointops.attention_step2( 41 | softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() 42 | ) 43 | x_v2 = x_v2.view(N, C) 44 | loss = x_v2.sum() 45 | loss.backward() 46 | 47 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10])) 48 | 49 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) 50 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) 51 | input() 52 | 53 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all()) 54 | 55 | print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2)) 56 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention/attention_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_CUDA_KERNEL 2 | #define _ATTENTION_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /models/metrics/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class IoU: 5 | """Computes the intersection over union (IoU) per class and corresponding 6 | mean (mIoU). 7 | 8 | Intersection over union (IoU) is a common evaluation metric for semantic 9 | segmentation. The predictions are first accumulated in a confusion matrix 10 | and the IoU is computed from it as follows: 11 | 12 | IoU = true_positive / (true_positive + false_positive + false_negative). 13 | 14 | Keyword arguments: 15 | - num_classes (int): number of classes in the classification problem 16 | - normalized (boolean, optional): Determines whether or not the confusion 17 | matrix is normalized or not. Default: False. 18 | - ignore_index (int or iterable, optional): Index of the classes to ignore 19 | when computing the IoU. Can be an int, or any iterable of ints. 20 | 21 | Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter 22 | 23 | """ 24 | 25 | def __init__(self): 26 | super().__init__() 27 | 28 | def value(self, conf_matrix): 29 | """Computes the IoU and mean IoU. 30 | 31 | The mean computation ignores NaN elements of the IoU array. 32 | 33 | Returns: 34 | Tuple: (IoU, mIoU). The first output is the per class IoU, 35 | for K classes it's numpy.ndarray with K elements. The second output, 36 | is the mean IoU. 37 | """ 38 | true_positive = np.diag(conf_matrix) 39 | false_positive = np.sum(conf_matrix, 0) - true_positive 40 | false_negative = np.sum(conf_matrix, 1) - true_positive 41 | 42 | # Just in case we get a division by 0, ignore/hide the error 43 | with np.errstate(divide="ignore", invalid="ignore"): 44 | iou = true_positive / ( 45 | true_positive + false_positive + false_negative 46 | ) 47 | 48 | return iou 49 | -------------------------------------------------------------------------------- /utils/pointops2/setup.py: -------------------------------------------------------------------------------- 1 | # python3 setup.py install 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | import os 5 | from distutils.sysconfig import get_config_vars 6 | 7 | (opt,) = get_config_vars("OPT") 8 | os.environ["OPT"] = " ".join( 9 | flag for flag in opt.split() if flag != "-Wstrict-prototypes" 10 | ) 11 | 12 | setup( 13 | name="pointops2", 14 | ext_modules=[ 15 | CUDAExtension( 16 | "pointops2_cuda", 17 | [ 18 | "src/pointops_api.cpp", 19 | "src/knnquery/knnquery_cuda.cpp", 20 | "src/knnquery/knnquery_cuda_kernel.cu", 21 | "src/sampling/sampling_cuda.cpp", 22 | "src/sampling/sampling_cuda_kernel.cu", 23 | "src/grouping/grouping_cuda.cpp", 24 | "src/grouping/grouping_cuda_kernel.cu", 25 | "src/interpolation/interpolation_cuda.cpp", 26 | "src/interpolation/interpolation_cuda_kernel.cu", 27 | "src/subtraction/subtraction_cuda.cpp", 28 | "src/subtraction/subtraction_cuda_kernel.cu", 29 | "src/aggregation/aggregation_cuda.cpp", 30 | "src/aggregation/aggregation_cuda_kernel.cu", 31 | "src/attention/attention_cuda.cpp", 32 | "src/attention/attention_cuda_kernel.cu", 33 | "src/rpe/relative_pos_encoding_cuda.cpp", 34 | "src/rpe/relative_pos_encoding_cuda_kernel.cu", 35 | "src/attention_v2/attention_cuda_v2.cpp", 36 | "src/attention_v2/attention_cuda_kernel_v2.cu", 37 | "src/rpe_v2/relative_pos_encoding_cuda_v2.cpp", 38 | "src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.cu", 39 | ], 40 | extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, 41 | ) 42 | ], 43 | cmdclass={"build_ext": BuildExtension}, 44 | ) 45 | -------------------------------------------------------------------------------- /conf/config_base_instance_segmentation.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | train_mode: true 3 | task: "instance_segmentation" 4 | seed: null 5 | checkpoint: null 6 | backbone_checkpoint: null 7 | freeze_backbone: false # train only last layer 8 | linear_probing_backbone: false 9 | train_on_segments: false 10 | eval_on_segments: false 11 | filter_out_instances: false 12 | save_visualizations: false 13 | visualization_point_size: 20 14 | decoder_id: -1 15 | export: false 16 | use_dbscan: false 17 | ignore_class_threshold: 100 18 | project_name: scannet 19 | workspace: jonasschult 20 | experiment_name: DEBUG_ABLATION 21 | num_targets: 19 22 | add_instance: true 23 | dbscan_eps: 0.95 24 | dbscan_min_points: 1 25 | 26 | add_clip: false 27 | 28 | export_threshold: 0.0001 29 | 30 | reps_per_epoch: 1 31 | 32 | on_crops: false 33 | 34 | body_part_segmentation: false 35 | 36 | scores_threshold: 0.0 37 | iou_threshold: 1.0 38 | 39 | area: 5 40 | 41 | eval_inner_core: -1 # disabled 42 | 43 | topk_per_image: 100 44 | 45 | ignore_mask_idx: [] 46 | 47 | max_batch_size: 99999999 48 | 49 | save_dir: saved/${general.experiment_name} 50 | # time/commit/md5(config)_uuid 51 | # time/experiment_id/version_uuid 52 | # experiment_id: 1 # commit[:8], or unique from logger 53 | # version: 1 # md5[:8] of config 54 | 55 | gpus: 1 56 | 57 | defaults: 58 | - data: indoor 59 | - data/data_loaders: simple_loader 60 | - data/datasets: scannet 61 | - data/collation_functions: voxelize_collate 62 | - logging: full 63 | - model: mask3d 64 | - metrics: miou 65 | - optimizer: adamw 66 | - scheduler: onecyclelr 67 | - trainer: trainer600 68 | - callbacks: callbacks_instance_segmentation 69 | - matcher: hungarian_matcher 70 | - loss: set_criterion 71 | 72 | hydra: 73 | run: 74 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 75 | sweep: 76 | dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S} 77 | # dir: ${general.save_dir} 78 | subdir: ${hydra.job.num}_${hydra.job.id} 79 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention_v2/attention_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL 2 | #define _ATTENTION_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); 9 | 10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); 11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); 18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); 19 | 20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); 21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /utils/votenet_utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix""" 7 | import os 8 | import time 9 | 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | import sys 12 | 13 | sys.path.append(BASE_DIR) 14 | import tf_logger 15 | 16 | 17 | class Visualizer: 18 | def __init__(self, opt, name="train"): 19 | # self.opt = opt 20 | # self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 21 | # self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 22 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 23 | self.log_name = os.path.join(opt.log_dir, "tf_visualizer_log.txt") 24 | with open(self.log_name, "a") as log_file: 25 | now = time.strftime("%c") 26 | log_file.write( 27 | "================ Training Loss (%s) ================\n" % now 28 | ) 29 | 30 | # |visuals|: dictionary of images to save 31 | def log_images(self, visuals, step): 32 | for label, image_numpy in visuals.items(): 33 | self.logger.image_summary(label, [image_numpy], step) 34 | 35 | # scalars: dictionary of scalar labels and values 36 | def log_scalars(self, scalars, step): 37 | for label, val in scalars.items(): 38 | self.logger.scalar_summary(label, val, step) 39 | 40 | # scatter plots 41 | def plot_current_points(self, points, disp_offset=10): 42 | pass 43 | 44 | # scalars: same format as |scalars| of plot_current_scalars 45 | def print_current_scalars(self, epoch, i, scalars): 46 | message = "(epoch: %d, iters: %d) " % (epoch, i) 47 | for k, v in scalars.items(): 48 | message += "%s: %.3f " % (k, v) 49 | 50 | print(message) 51 | with open(self.log_name, "a") as log_file: 52 | log_file.write("%s\n" % message) 53 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 11 | // output: idx(b, m, nsample) 12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 13 | int nsample, 14 | const float *__restrict__ new_xyz, 15 | const float *__restrict__ xyz, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | xyz += batch_index * n * 3; 19 | new_xyz += batch_index * m * 3; 20 | idx += m * nsample * batch_index; 21 | 22 | int index = threadIdx.x; 23 | int stride = blockDim.x; 24 | 25 | float radius2 = radius * radius; 26 | for (int j = index; j < m; j += stride) { 27 | float new_x = new_xyz[j * 3 + 0]; 28 | float new_y = new_xyz[j * 3 + 1]; 29 | float new_z = new_xyz[j * 3 + 2]; 30 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 31 | float x = xyz[k * 3 + 0]; 32 | float y = xyz[k * 3 + 1]; 33 | float z = xyz[k * 3 + 2]; 34 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 35 | (new_z - z) * (new_z - z); 36 | if (d2 < radius2) { 37 | if (cnt == 0) { 38 | for (int l = 0; l < nsample; ++l) { 39 | idx[j * nsample + l] = k; 40 | } 41 | } 42 | idx[j * nsample + cnt] = k; 43 | ++cnt; 44 | } 45 | } 46 | } 47 | } 48 | 49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 50 | int nsample, const float *new_xyz, 51 | const float *xyz, int *idx) { 52 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 53 | query_ball_point_kernel<<>>( 54 | b, n, m, radius, nsample, new_xyz, xyz, idx); 55 | 56 | CUDA_CHECK_ERRORS(); 57 | } 58 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_relative_pos_encoding_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 80000 7 | N = 3500 8 | hdim = 16 9 | h = 6 10 | L = 31 11 | query = torch.rand(N, h, hdim).cuda() 12 | table = torch.rand(L, h, hdim, 3).cuda() 13 | 14 | index = torch.rand(M) 15 | index[index < 0] = 0 16 | index = (index * N).long().cuda() 17 | 18 | rel_index = torch.rand(M, 3) 19 | rel_index[rel_index < 0] = 0 20 | rel_index = (rel_index * L).long().cuda() 21 | 22 | query.requires_grad = True 23 | table.requires_grad = True 24 | 25 | # query_flat = query[index] #[M, h, hdim] 26 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] 27 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] 28 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] 29 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h] 30 | # loss = output.mean() 31 | # loss.backward() 32 | 33 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 34 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 35 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 36 | # input() 37 | 38 | # print("query.is_contiguous(): ", query.is_contiguous()) 39 | # print("key.is_contiguous(): ", key.is_contiguous()) 40 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 41 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 42 | 43 | output_v2 = pointops.dot_prod_with_idx( 44 | query, index.int(), table, rel_index.int() 45 | ) 46 | loss = output_v2.mean() 47 | loss.backward() 48 | 49 | print( 50 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 51 | output_v2.shape, output_v2[:5, :10] 52 | ) 53 | ) 54 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 55 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 56 | input() 57 | 58 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 59 | 60 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 61 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "group_points.h" 5 | #include "utils.h" 6 | 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 8 | const float *points, const int *idx, 9 | float *out); 10 | 11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 12 | int nsample, const float *grad_out, 13 | const int *idx, float *grad_points); 14 | 15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 16 | CHECK_CONTIGUOUS(points); 17 | CHECK_CONTIGUOUS(idx); 18 | CHECK_IS_FLOAT(points); 19 | CHECK_IS_INT(idx); 20 | 21 | if (points.is_cuda()) { 22 | CHECK_CUDA(idx); 23 | } 24 | 25 | at::Tensor output = 26 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 27 | at::device(points.device()).dtype(at::ScalarType::Float)); 28 | 29 | if (points.is_cuda()) { 30 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 31 | idx.size(1), idx.size(2), points.data(), 32 | idx.data(), output.data()); 33 | } else { 34 | AT_ASSERT(false, "CPU not supported"); 35 | } 36 | 37 | return output; 38 | } 39 | 40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 41 | CHECK_CONTIGUOUS(grad_out); 42 | CHECK_CONTIGUOUS(idx); 43 | CHECK_IS_FLOAT(grad_out); 44 | CHECK_IS_INT(idx); 45 | 46 | if (grad_out.is_cuda()) { 47 | CHECK_CUDA(idx); 48 | } 49 | 50 | at::Tensor output = 51 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 52 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 53 | 54 | if (grad_out.is_cuda()) { 55 | group_points_grad_kernel_wrapper( 56 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 57 | grad_out.data(), idx.data(), output.data()); 58 | } else { 59 | AT_ASSERT(false, "CPU not supported"); 60 | } 61 | 62 | return output; 63 | } 64 | -------------------------------------------------------------------------------- /utils/pointops2/src/grouping/grouping_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "grouping_cuda_kernel.h" 3 | 4 | 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { 6 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= m * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int m_idx = index / nsample / c; 12 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 13 | output[index] = input[input_idx]; 14 | } 15 | 16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { 17 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 18 | int index = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (index >= m * nsample * c) return; 20 | const int c_idx = index % c; 21 | const int nsample_idx = (index / c) % nsample; 22 | const int m_idx = index / nsample / c; 23 | const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; 24 | atomicAdd(grad_input + input_idx, grad_output[index]); 25 | } 26 | 27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { 28 | // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) 29 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 30 | dim3 threads(THREADS_PER_BLOCK); 31 | grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); 32 | } 33 | 34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) 35 | { 36 | // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) 37 | dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); 40 | } 41 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_CUDA_KERNEL 2 | #define _RPE_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); 9 | 10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); 18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); 19 | 20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); 21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | #endif 27 | -------------------------------------------------------------------------------- /utils/pointops2/src/interpolation/interpolation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "interpolation_cuda_kernel.h" 3 | 4 | 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) 6 | { 7 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 8 | int index = blockIdx.x * blockDim.x + threadIdx.x; 9 | if (index >= n * c) return; 10 | int c_idx = index % c; 11 | int n_idx = index / c; 12 | for (int i = 0; i < k; i++) 13 | { 14 | int idx_idx = n_idx * k + i; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | output[index] += input[input_idx] * weight[idx_idx]; 17 | } 18 | } 19 | 20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) 21 | { 22 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 23 | int index = blockIdx.x * blockDim.x + threadIdx.x; 24 | if (index >= n * c) return; 25 | int c_idx = index % c; 26 | int n_idx = index / c; 27 | for (int i = 0; i < k; i++) 28 | { 29 | int idx_idx = n_idx * k + i; 30 | int input_idx = idx[idx_idx] * c + c_idx; 31 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); 32 | } 33 | } 34 | 35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { 36 | // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) 37 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 38 | dim3 threads(THREADS_PER_BLOCK); 39 | interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); 40 | } 41 | 42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { 43 | // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) 44 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 45 | dim3 threads(THREADS_PER_BLOCK); 46 | interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); 47 | } 48 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 80000 7 | N = 3500 8 | hdim = 16 9 | h = 6 10 | L = 31 11 | query = torch.rand(N, h, hdim).cuda() 12 | table_q = torch.rand(L, h, hdim, 3).cuda() 13 | key = torch.rand(N, h, hdim).cuda() 14 | table_k = torch.rand(L, h, hdim, 3).cuda() 15 | 16 | index_q = torch.rand(M) 17 | index_q[index_q < 0] = 0 18 | index_q = (index_q * N).long().cuda() 19 | 20 | index_k = torch.rand(M) 21 | index_k[index_k < 0] = 0 22 | index_k = (index_k * N).long().cuda() 23 | 24 | rel_index = torch.rand(M, 3) 25 | rel_index[rel_index < 0] = 0 26 | rel_index = (rel_index * L).long().cuda() 27 | 28 | query.requires_grad = True 29 | table_q.requires_grad = True 30 | key.requires_grad = True 31 | table_k.requires_grad = True 32 | 33 | output1 = pointops.dot_prod_with_idx( 34 | query, index_q.int(), table_q, rel_index.int() 35 | ) 36 | output2 = pointops.dot_prod_with_idx( 37 | key, index_k.int(), table_k, rel_index.int() 38 | ) 39 | output = output1 + output2 40 | # loss = output.mean() 41 | # loss.backward() 42 | 43 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 44 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 45 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 46 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 47 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 48 | # input() 49 | 50 | # print("query.is_contiguous(): ", query.is_contiguous()) 51 | # print("key.is_contiguous(): ", key.is_contiguous()) 52 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 53 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 54 | 55 | output_v2 = pointops.dot_prod_with_idx_v2( 56 | query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int() 57 | ) 58 | loss = output_v2.mean() 59 | loss.backward() 60 | 61 | print( 62 | "output_v2.shape: {}, output_v2[:5,:10]: {}".format( 63 | output_v2.shape, output_v2[:5, :10] 64 | ) 65 | ) 66 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 67 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 68 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 69 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 70 | # input() 71 | 72 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 73 | -------------------------------------------------------------------------------- /utils/gradflow_check.py: -------------------------------------------------------------------------------- 1 | """ https://github.com/alwynmathew/gradflow-check """ 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from matplotlib.lines import Line2D 5 | 6 | 7 | def plot_grad_flow(named_parameters): 8 | ave_grads = [] 9 | layers = [] 10 | for n, p in named_parameters: 11 | if (p.requires_grad) and ("bias" not in n): 12 | if p.grad: 13 | layers.append(n) 14 | ave_grads.append(p.grad.abs().mean()) 15 | else: 16 | print(f"{n} - doesn't have gradient computed") 17 | 18 | plt.plot(ave_grads, alpha=0.3, color="b") 19 | plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k") 20 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 21 | plt.xlim(xmin=0, xmax=len(ave_grads)) 22 | plt.xlabel("Layers") 23 | plt.ylabel("average gradient") 24 | plt.title("Gradient flow") 25 | plt.grid(True) 26 | 27 | 28 | def plot_grad_flow_v2(named_parameters): 29 | """Plots the gradients flowing through different layers in the net during training. 30 | Can be used for checking for possible gradient vanishing / exploding problems. 31 | 32 | Usage: Plug this function in Trainer class after loss.backwards() as 33 | "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow""" 34 | ave_grads = [] 35 | max_grads = [] 36 | layers = [] 37 | for n, p in named_parameters: 38 | if (p.requires_grad) and ("bias" not in n): 39 | layers.append(n) 40 | if p.grad: 41 | ave_grads.append(p.grad.abs().mean()) 42 | max_grads.append(p.grad.abs().max()) 43 | else: 44 | print(f"{n} - doesn't have gradient computed") 45 | plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c") 46 | plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b") 47 | plt.hlines(0, 0, len(ave_grads) + 1, lw=2, color="k") 48 | plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical") 49 | plt.xlim(left=0, right=len(ave_grads)) 50 | plt.ylim(bottom=-0.001, top=0.02) # zoom in on the lower gradient regions 51 | plt.xlabel("Layers") 52 | plt.ylabel("average gradient") 53 | plt.title("Gradient flow") 54 | plt.grid(True) 55 | plt.legend( 56 | [ 57 | Line2D([0], [0], color="c", lw=4), 58 | Line2D([0], [0], color="b", lw=4), 59 | Line2D([0], [0], color="k", lw=4), 60 | ], 61 | ["max-gradient", "mean-gradient", "zero-gradient"], 62 | ) 63 | -------------------------------------------------------------------------------- /utils/pointops2/src/subtraction/subtraction_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "subtraction_cuda_kernel.h" 3 | 4 | 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 6 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * nsample * c) return; 9 | const int c_idx = index % c; 10 | const int nsample_idx = (index / c) % nsample; 11 | const int n_idx = index / nsample / c; 12 | const int idx_idx = n_idx * nsample + nsample_idx; 13 | const int input1_idx = n_idx * c + c_idx; 14 | const int input2_idx = idx[idx_idx] * c + c_idx; 15 | output[index] = input1[input1_idx] - input2[input2_idx]; 16 | } 17 | 18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 19 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if (index >= n * nsample * c) return; 22 | const int c_idx = index % c; 23 | const int nsample_idx = (index / c) % nsample; 24 | const int n_idx = index / nsample / c; 25 | const int idx_idx = n_idx * nsample + nsample_idx; 26 | const int input1_idx = n_idx * c + c_idx; 27 | const int input2_idx = idx[idx_idx] * c + c_idx; 28 | atomicAdd(grad_input1 + input1_idx, grad_output[index]); 29 | atomicAdd(grad_input2 + input2_idx, -grad_output[index]); 30 | } 31 | 32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { 33 | // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) 34 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 35 | dim3 threads(THREADS_PER_BLOCK); 36 | subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); 37 | } 38 | 39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { 40 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 41 | dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); 42 | dim3 threads(THREADS_PER_BLOCK); 43 | subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); 44 | } 45 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_relative_pos_encoding_op_step2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_scatter import ( 3 | scatter_sum, 4 | ) 5 | 6 | torch.manual_seed(1) 7 | 8 | M = 80000 9 | N = 3500 10 | hdim = 16 11 | h = 6 12 | L = 31 13 | attn = torch.rand(M, h).cuda() 14 | v = torch.rand(N, h, hdim).cuda() 15 | table = torch.rand(L, h, hdim, 3).cuda() 16 | 17 | index_0 = torch.rand(M) 18 | index_0[index_0 < 0] = 0 19 | index_0 = (index_0 * N).long().cuda() 20 | 21 | index_1 = torch.rand(M) 22 | index_1[index_1 < 0] = 0 23 | index_1 = (index_1 * N).long().cuda() 24 | 25 | rel_index = torch.rand(M, 3) 26 | rel_index[rel_index < 0] = 0 27 | rel_index = (rel_index * L).long().cuda() 28 | 29 | attn.requires_grad = True 30 | v.requires_grad = True 31 | table.requires_grad = True 32 | 33 | v_flat = v[index_1] # [M, h, hdim] 34 | table_x, table_y, table_z = ( 35 | table[:, :, :, 0], 36 | table[:, :, :, 1], 37 | table[:, :, :, 2], 38 | ) # [L, h, hdim] 39 | rel_index_x, rel_index_y, rel_index_z = ( 40 | rel_index[:, 0], 41 | rel_index[:, 1], 42 | rel_index[:, 2], 43 | ) # [M] 44 | rel_pos_encoding = ( 45 | table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] 46 | ) # [M, h, hdim] 47 | v_flat_new = v_flat + rel_pos_encoding # [M, h, hdim] 48 | output = attn.unsqueeze(-1) * v_flat_new # [M, h, hdim] 49 | output = scatter_sum( 50 | src=output, index=index_0, dim=0, dim_size=N 51 | ) # [N, h, hdim] 52 | loss = output.mean() 53 | loss.backward() 54 | 55 | print( 56 | "output.shape: {}, output[:5,:10,:5]: {}".format( 57 | output.shape, output[:5, :10, :5] 58 | ) 59 | ) 60 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 61 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 62 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 63 | input() 64 | 65 | # print("query.is_contiguous(): ", query.is_contiguous()) 66 | # print("key.is_contiguous(): ", key.is_contiguous()) 67 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 68 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 69 | 70 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) 71 | # loss = output_v2.mean() 72 | # loss.backward() 73 | 74 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) 75 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 76 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 77 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 78 | # input() 79 | 80 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) 81 | 82 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 83 | -------------------------------------------------------------------------------- /utils/point_cloud_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional, Tuple 3 | 4 | import numpy as np 5 | import open3d 6 | from plyfile import PlyData, PlyElement 7 | 8 | 9 | def load_ply(filepath): 10 | with open(filepath, "rb") as f: 11 | plydata = PlyData.read(f) 12 | data = plydata.elements[0].data 13 | coords = np.array([data["x"], data["y"], data["z"]], dtype=np.float32).T 14 | feats = None 15 | labels = None 16 | if ({"red", "green", "blue"} - set(data.dtype.names)) == set(): 17 | feats = np.array( 18 | [data["red"], data["green"], data["blue"]], dtype=np.uint8 19 | ).T 20 | if "label" in data.dtype.names: 21 | labels = np.array(data["label"], dtype=np.uint32) 22 | return coords, feats, labels 23 | 24 | 25 | def load_ply_with_normals(filepath): 26 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 27 | if not mesh.has_vertex_normals(): 28 | mesh.compute_vertex_normals() 29 | vertices = np.asarray(mesh.vertices) 30 | normals = np.asarray(mesh.vertex_normals) 31 | 32 | coords, feats, labels = load_ply(filepath) 33 | assert np.allclose(coords, vertices), "different coordinates" 34 | feats = np.hstack((feats, normals)) 35 | 36 | return coords, feats, labels 37 | 38 | 39 | def load_obj_with_normals(filepath): 40 | mesh = open3d.io.read_triangle_mesh(str(filepath)) 41 | if not mesh.has_vertex_normals(): 42 | mesh.compute_vertex_normals() 43 | coords = np.asarray(mesh.vertices) 44 | normals = np.asarray(mesh.vertex_normals) 45 | colors = np.asarray(mesh.vertex_colors) 46 | feats = np.hstack((colors, normals)) 47 | 48 | return coords, feats 49 | 50 | 51 | def write_point_cloud_in_ply( 52 | filepath: Path, 53 | coords: np.ndarray, 54 | feats: Optional[np.ndarray] = None, 55 | labels: Optional[np.ndarray] = None, 56 | dtypes: Optional[List[Tuple[str, str]]] = [ 57 | ("x", " 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel.h" 6 | 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0 = index0_tensor.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0 = index0_tensor.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention_v2/attention_cuda_v2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "attention_cuda_kernel_v2.h" 6 | 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 8 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *k = k_tensor.data_ptr(); 12 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 13 | const int *index1 = index1_tensor.data_ptr(); 14 | float *attn = attn_tensor.data_ptr(); 15 | attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); 16 | } 17 | 18 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 19 | at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const int *index0_offsets = index0_tensor_offsets.data_ptr(); 24 | const int *index1 = index1_tensor.data_ptr(); 25 | const float *q = q_tensor.data_ptr(); 26 | const float *k = k_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_k = grad_k_tensor.data_ptr(); 29 | attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); 30 | } 31 | 32 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | float *output = output_tensor.data_ptr(); 40 | attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); 41 | } 42 | 43 | 44 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 45 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 46 | at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) 47 | { 48 | const float *grad_out = grad_out_tensor.data_ptr(); 49 | const int *index0 = index0_tensor.data_ptr(); 50 | const int *index1 = index1_tensor.data_ptr(); 51 | const float *attn = attn_tensor.data_ptr(); 52 | const float *v = v_tensor.data_ptr(); 53 | float *grad_attn = grad_attn_tensor.data_ptr(); 54 | float *grad_v = grad_v_tensor.data_ptr(); 55 | attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 56 | } 57 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_attention_op_step1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 800000 7 | N = 35000 8 | C = 96 9 | h = 6 10 | query = torch.rand(N, h, C // h).cuda() 11 | key = torch.rand(N, h, C // h).cuda() 12 | 13 | index_0 = torch.rand(M) 14 | index_0[index_0 < 0] = 0 15 | index_0 = (index_0 * N).long().cuda() 16 | 17 | index_1 = torch.rand(M) 18 | index_1[index_1 < 0] = 0 19 | index_1 = (index_1 * N).long().cuda() 20 | 21 | query.requires_grad = True 22 | key.requires_grad = True 23 | 24 | # rearrange index for acceleration 25 | index_0, indices = torch.sort(index_0) # [M,] 26 | index_1 = index_1[indices] # [M,] 27 | index_0_counts = index_0.bincount() 28 | 29 | print("index_0_counts.shape: ", index_0_counts.shape) 30 | 31 | n_max = index_0_counts.max() 32 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 33 | 34 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 35 | 36 | index_0_offsets = torch.cat( 37 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 38 | ) # [N+1] 39 | 40 | # print("index_0[:100]: ", index_0[:100]) 41 | print("n_max: ", n_max) 42 | print("index_0_offsets.shape: ", index_0_offsets.shape) 43 | # input() 44 | 45 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 46 | print("index_1[300:320]: ", index_1[300:320]) 47 | 48 | 49 | attn_flat = pointops.attention_step1( 50 | query.float(), key.float(), index_0.int(), index_1.int() 51 | ) 52 | # loss = attn_flat.sum() 53 | # loss.backward() 54 | print( 55 | "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format( 56 | attn_flat.shape, attn_flat[300:320, :10] 57 | ) 58 | ) 59 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 60 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 61 | # input() 62 | 63 | print("query.is_contiguous(): ", query.is_contiguous()) 64 | print("key.is_contiguous(): ", key.is_contiguous()) 65 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 66 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 67 | 68 | attn_flat_v2 = pointops.attention_step1_v2( 69 | query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max 70 | ) 71 | # loss = attn_flat_v2.sum() 72 | # loss.backward() 73 | print( 74 | "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format( 75 | attn_flat_v2.shape, attn_flat_v2[300:320, :10] 76 | ) 77 | ) 78 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 79 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 80 | # input() 81 | 82 | mask = attn_flat_v2.sum(-1) != 0 83 | print("mask.sum(): ", mask.sum()) 84 | print( 85 | "attn_flat_v2[mask] - attn_flat[mask]: ", 86 | ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(), 87 | ) 88 | 89 | 90 | print( 91 | "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", 92 | ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), 93 | ) 94 | 95 | selected = 10000 96 | print( 97 | "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", 98 | torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), 99 | ) 100 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "sampling.h" 4 | #include "utils.h" 5 | 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 7 | const float *points, const int *idx, 8 | float *out); 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *grad_out, const int *idx, 11 | float *grad_points); 12 | 13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 14 | const float *dataset, float *temp, 15 | int *idxs); 16 | 17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.is_cuda()) { 32 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_ASSERT(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 43 | const int n) { 44 | CHECK_CONTIGUOUS(grad_out); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_IS_FLOAT(grad_out); 47 | CHECK_IS_INT(idx); 48 | 49 | if (grad_out.is_cuda()) { 50 | CHECK_CUDA(idx); 51 | } 52 | 53 | at::Tensor output = 54 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 55 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 56 | 57 | if (grad_out.is_cuda()) { 58 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 59 | idx.size(1), grad_out.data(), 60 | idx.data(), output.data()); 61 | } else { 62 | AT_ASSERT(false, "CPU not supported"); 63 | } 64 | 65 | return output; 66 | } 67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 68 | CHECK_CONTIGUOUS(points); 69 | CHECK_IS_FLOAT(points); 70 | 71 | at::Tensor output = 72 | torch::zeros({points.size(0), nsamples}, 73 | at::device(points.device()).dtype(at::ScalarType::Int)); 74 | 75 | at::Tensor tmp = 76 | torch::full({points.size(0), points.size(1)}, 1e10, 77 | at::device(points.device()).dtype(at::ScalarType::Float)); 78 | 79 | if (points.is_cuda()) { 80 | furthest_point_sampling_kernel_wrapper( 81 | points.size(0), points.size(1), nsamples, points.data(), 82 | tmp.data(), output.data()); 83 | } else { 84 | AT_ASSERT(false, "CPU not supported"); 85 | } 86 | 87 | return output; 88 | } 89 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include "cuda_utils.h" 8 | 9 | // input: points(b, c, n) idx(b, npoints, nsample) 10 | // output: out(b, c, npoints, nsample) 11 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 12 | int nsample, 13 | const float *__restrict__ points, 14 | const int *__restrict__ idx, 15 | float *__restrict__ out) { 16 | int batch_index = blockIdx.x; 17 | points += batch_index * n * c; 18 | idx += batch_index * npoints * nsample; 19 | out += batch_index * npoints * nsample * c; 20 | 21 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 22 | const int stride = blockDim.y * blockDim.x; 23 | for (int i = index; i < c * npoints; i += stride) { 24 | const int l = i / npoints; 25 | const int j = i % npoints; 26 | for (int k = 0; k < nsample; ++k) { 27 | int ii = idx[j * nsample + k]; 28 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 29 | } 30 | } 31 | } 32 | 33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 34 | const float *points, const int *idx, 35 | float *out) { 36 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 37 | 38 | group_points_kernel<<>>( 39 | b, c, n, npoints, nsample, points, idx, out); 40 | 41 | CUDA_CHECK_ERRORS(); 42 | } 43 | 44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 45 | // output: grad_points(b, c, n) 46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 47 | int nsample, 48 | const float *__restrict__ grad_out, 49 | const int *__restrict__ idx, 50 | float *__restrict__ grad_points) { 51 | int batch_index = blockIdx.x; 52 | grad_out += batch_index * npoints * nsample * c; 53 | idx += batch_index * npoints * nsample; 54 | grad_points += batch_index * n * c; 55 | 56 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 57 | const int stride = blockDim.y * blockDim.x; 58 | for (int i = index; i < c * npoints; i += stride) { 59 | const int l = i / npoints; 60 | const int j = i % npoints; 61 | for (int k = 0; k < nsample; ++k) { 62 | int ii = idx[j * nsample + k]; 63 | atomicAdd(grad_points + l * n + ii, 64 | grad_out[(l * npoints + j) * nsample + k]); 65 | } 66 | } 67 | } 68 | 69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 70 | int nsample, const float *grad_out, 71 | const int *idx, float *grad_points) { 72 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 73 | 74 | group_points_grad_kernel<<>>( 75 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 76 | 77 | CUDA_CHECK_ERRORS(); 78 | } 79 | -------------------------------------------------------------------------------- /utils/votenet_utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import numpy as np 12 | 13 | 14 | def huber_loss(error, delta=1.0): 15 | """ 16 | Args: 17 | error: Torch tensor (d1,d2,...,dk) 18 | Returns: 19 | loss: Torch tensor (d1,d2,...,dk) 20 | 21 | x = error = pred - gt or dist(pred,gt) 22 | 0.5 * |x|^2 if |x|<=d 23 | 0.5 * d^2 + d * (|x|-d) if |x|>d 24 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 25 | """ 26 | abs_error = torch.abs(error) 27 | # quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 28 | quadratic = torch.clamp(abs_error, max=delta) 29 | linear = abs_error - quadratic 30 | loss = 0.5 * quadratic**2 + delta * linear 31 | return loss 32 | 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1, 1, M, 1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1, N, 1, 1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | 64 | def demo_nn_distance(): 65 | np.random.seed(0) 66 | pc1arr = np.random.random((1, 5, 3)) 67 | pc2arr = np.random.random((1, 6, 3)) 68 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 69 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 70 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 71 | print(dist1) 72 | print(idx1) 73 | dist = np.zeros((5, 6)) 74 | for i in range(5): 75 | for j in range(6): 76 | dist[i, j] = np.sum((pc1arr[0, i, :] - pc2arr[0, j, :]) ** 2) 77 | print(dist) 78 | print("-" * 30) 79 | print("L1smooth dists:") 80 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 81 | print(dist1) 82 | print(idx1) 83 | dist = np.zeros((5, 6)) 84 | for i in range(5): 85 | for j in range(6): 86 | error = np.abs(pc1arr[0, i, :] - pc2arr[0, j, :]) 87 | quad = np.minimum(error, 1.0) 88 | linear = error - quad 89 | loss = 0.5 * quad**2 + 1.0 * linear 90 | dist[i, j] = np.sum(loss) 91 | print(dist) 92 | 93 | 94 | if __name__ == "__main__": 95 | demo_nn_distance() 96 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 80000 7 | N = 3500 8 | # M = 80 9 | # N = 5 10 | hdim = 16 11 | h = 6 12 | L = 31 13 | query = torch.rand(N, h, hdim).cuda() 14 | table_q = torch.rand(L, h, hdim, 3).cuda() 15 | key = torch.rand(N, h, hdim).cuda() 16 | table_k = torch.rand(L, h, hdim, 3).cuda() 17 | 18 | index_q = torch.rand(M) 19 | index_q[index_q < 0] = 0 20 | index_q = (index_q * N).long().cuda() 21 | 22 | index_k = torch.rand(M) 23 | index_k[index_k < 0] = 0 24 | index_k = (index_k * N).long().cuda() 25 | 26 | rel_index = torch.rand(M, 3) 27 | rel_index[rel_index < 0] = 0 28 | rel_index = (rel_index * L).long().cuda() 29 | 30 | 31 | # rearrange index for acceleration 32 | index_q, indices = torch.sort(index_q) # [M,] 33 | index_k = index_k[indices] # [M,] 34 | rel_index = rel_index[indices] 35 | index_q_counts = index_q.bincount() 36 | 37 | print("index_q_counts.shape: ", index_q_counts.shape) 38 | 39 | n_max = index_q_counts.max() 40 | index_q_offsets = index_q_counts.cumsum(dim=-1) # [N] 41 | 42 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape) 43 | 44 | index_q_offsets = torch.cat( 45 | [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0 46 | ) # [N+1] 47 | 48 | # print("index_q[:100]: ", index_q[:100]) 49 | print("n_max: ", n_max) 50 | print("index_q_offsets.shape: ", index_q_offsets.shape) 51 | # input() 52 | 53 | print("index_q_offsets[:100]: ", index_q_offsets[:100]) 54 | print("index_k[:20]: ", index_k[:20]) 55 | 56 | query.requires_grad = True 57 | table_q.requires_grad = True 58 | key.requires_grad = True 59 | table_k.requires_grad = True 60 | 61 | output1 = pointops.dot_prod_with_idx( 62 | query, index_q.int(), table_q, rel_index.int() 63 | ) 64 | output2 = pointops.dot_prod_with_idx( 65 | key, index_k.int(), table_k, rel_index.int() 66 | ) 67 | output = output1 + output2 68 | loss = output.mean() 69 | loss.backward() 70 | 71 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) 72 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 73 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 74 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 75 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 76 | # input() 77 | 78 | # print("query.is_contiguous(): ", query.is_contiguous()) 79 | # print("key.is_contiguous(): ", key.is_contiguous()) 80 | # print("index_q.is_contiguous(): ", index_q.is_contiguous()) 81 | # print("index_k.is_contiguous(): ", index_k.is_contiguous()) 82 | 83 | output_v2 = pointops.dot_prod_with_idx_v3( 84 | query, 85 | index_q_offsets.int(), 86 | n_max, 87 | key, 88 | index_k.int(), 89 | table_q, 90 | table_k, 91 | rel_index.int(), 92 | ) 93 | # loss = output_v2.mean() 94 | # loss.backward() 95 | 96 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) 97 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 98 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) 99 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 100 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) 101 | # input() 102 | 103 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 104 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 80000 7 | N = 3500 8 | hdim = 16 9 | h = 6 10 | L = 31 11 | attn = torch.rand(M, h).cuda() 12 | v = torch.rand(N, h, hdim).cuda() 13 | table = torch.rand(L, h, hdim, 3).cuda() 14 | 15 | index_0 = torch.rand(M) 16 | index_0[index_0 < 0] = 0 17 | index_0 = (index_0 * N).long().cuda() 18 | 19 | index_1 = torch.rand(M) 20 | index_1[index_1 < 0] = 0 21 | index_1 = (index_1 * N).long().cuda() 22 | 23 | rel_index = torch.rand(M, 3) 24 | rel_index[rel_index < 0] = 0 25 | rel_index = (rel_index * L).long().cuda() 26 | 27 | 28 | # rearrange index for acceleration 29 | index_0, indices = torch.sort(index_0) # [M,] 30 | index_1 = index_1[indices] # [M,] 31 | rel_index = rel_index[indices] 32 | index_0_counts = index_0.bincount() 33 | 34 | print("index_0_counts.shape: ", index_0_counts.shape) 35 | 36 | n_max = index_0_counts.max() 37 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 38 | 39 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 40 | 41 | index_0_offsets = torch.cat( 42 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 43 | ) # [N+1] 44 | 45 | 46 | attn.requires_grad = True 47 | v.requires_grad = True 48 | table.requires_grad = True 49 | 50 | 51 | output = pointops.attention_step2_with_rel_pos_value( 52 | attn, v, index_0.int(), index_1.int(), table, rel_index.int() 53 | ) 54 | loss = output.mean() 55 | loss.backward() 56 | 57 | print( 58 | "output.shape: {}, output[:5,:10,:5]: {}".format( 59 | output.shape, output[:5, :10, :5] 60 | ) 61 | ) 62 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) 63 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 64 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 65 | # input() 66 | 67 | attn_grad = attn.grad.clone() 68 | v_grad = v.grad.clone() 69 | table_grad = table.grad.clone() 70 | 71 | attn.grad.zero_() 72 | v.grad.zero_() 73 | table.grad.zero_() 74 | 75 | # print("query.is_contiguous(): ", query.is_contiguous()) 76 | # print("key.is_contiguous(): ", key.is_contiguous()) 77 | # print("index_0.is_contiguous(): ", index_0.is_contiguous()) 78 | # print("index_1.is_contiguous(): ", index_1.is_contiguous()) 79 | 80 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2( 81 | attn, 82 | v, 83 | index_0_offsets.int(), 84 | n_max, 85 | index_1.int(), 86 | table, 87 | rel_index.int(), 88 | ) 89 | loss = output_v2.mean() 90 | loss.backward() 91 | 92 | print( 93 | "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format( 94 | output_v2.shape, output_v2[:5, :10, :5] 95 | ) 96 | ) 97 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) 98 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) 99 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) 100 | # input() 101 | 102 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) 103 | 104 | print( 105 | "((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max() 106 | ) 107 | 108 | print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max()) 109 | 110 | print( 111 | "((table_grad-table.grad)**2).max(): ", 112 | ((table_grad - table.grad) ** 2).max(), 113 | ) 114 | 115 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) 116 | -------------------------------------------------------------------------------- /utils/kfold.py: -------------------------------------------------------------------------------- 1 | """ Author: https://github.com/yk-szk/stratified_group_kfold """ 2 | import random 3 | 4 | import numpy as np 5 | 6 | 7 | class StratifiedGroupKFold: 8 | """ 9 | Stratified Group K-fold with sklearn.model_selection.KFold compabitility. 10 | 11 | Split dataset into k folds with balanced label distribution (stratified) and non-overlapping group. 12 | 13 | Args: 14 | n_splits (int): # of splits 15 | shuffle (bool): Shuffle 16 | seed (int): Seed value for random number generator 17 | """ 18 | 19 | def __init__(self, n_splits, shuffle=True, random_state=None): 20 | self.n_splits = n_splits 21 | self.shuffle = shuffle 22 | self.seed = random_state 23 | 24 | def split(self, X, labels, groups): 25 | assert len(X) == len(labels) == len(groups), "Invalid input length" 26 | assert ( 27 | len(set(groups)) >= self.n_splits 28 | ), "The number of groups needs to be larger than n_splits" 29 | 30 | def encode(v): 31 | s = set(v) 32 | d = {l: i for i, l in enumerate(s)} 33 | return [d[e] for e in v] 34 | 35 | labels, groups = encode(labels), encode(groups) 36 | num_labels, num_groups = max(labels) + 1, max(groups) + 1 37 | label_counts_per_group = np.zeros((num_groups, num_labels), dtype=int) 38 | global_label_dist = np.bincount(labels) 39 | for label, g in zip(labels, groups): 40 | label_counts_per_group[g][label] += 1 41 | 42 | label_counts_per_fold = np.zeros( 43 | (self.n_splits, num_labels), dtype=int 44 | ) 45 | groups_per_fold = [set() for _ in range(self.n_splits)] 46 | 47 | def eval_label_counts_per_fold(y_counts, fold): 48 | fold += y_counts 49 | std_per_label = ( 50 | np.std(label_counts_per_fold, axis=0) / global_label_dist 51 | ) 52 | fold -= y_counts 53 | return np.mean(std_per_label) 54 | 55 | groups_and_label_counts = list(enumerate(label_counts_per_group)) 56 | if self.shuffle: 57 | rng = random.Random(self.seed) 58 | mean_std = np.mean(np.std(label_counts_per_group, axis=1)) 59 | groups_and_label_counts.sort( 60 | key=lambda g_counts: -np.std(g_counts[1]) 61 | + rng.gauss(0, mean_std) 62 | ) # add rng.gauss to increase the randomness 63 | else: 64 | groups_and_label_counts.sort( 65 | key=lambda g_counts: -np.std(g_counts[1]) 66 | ) 67 | 68 | for g, label_counts in groups_and_label_counts: 69 | evals = [ 70 | eval_label_counts_per_fold( 71 | label_counts, label_counts_per_fold[i] 72 | ) 73 | for i in range(self.n_splits) 74 | ] 75 | best_fold = np.argmin(evals) 76 | label_counts_per_fold[best_fold] += label_counts 77 | groups_per_fold[best_fold].add(g) 78 | 79 | all_groups = set(groups) 80 | for test_groups in groups_per_fold: 81 | train_groups = all_groups - test_groups 82 | 83 | train_indices = [ 84 | i for i, g in enumerate(groups) if g in train_groups 85 | ] 86 | test_indices = [ 87 | i for i, g in enumerate(groups) if g in test_groups 88 | ] 89 | 90 | yield train_indices, test_indices 91 | -------------------------------------------------------------------------------- /utils/pointops2/src/aggregation/aggregation_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "aggregation_cuda_kernel.h" 3 | 4 | 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 6 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 7 | int index = blockIdx.x * blockDim.x + threadIdx.x; 8 | if (index >= n * c) return; 9 | const int c_idx = index % c; 10 | const int n_idx = index / c; 11 | const int w_c_idx = c_idx % w_c; 12 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 13 | { 14 | int idx_idx = n_idx * nsample + nsample_idx; 15 | int input_idx = idx[idx_idx] * c + c_idx; 16 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 17 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 18 | output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; 19 | } 20 | } 21 | 22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 23 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 24 | int index = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (index >= n * c) return; 26 | const int c_idx = index % c; 27 | const int n_idx = index / c; 28 | const int w_c_idx = c_idx % w_c; 29 | for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) 30 | { 31 | int idx_idx = n_idx * nsample + nsample_idx; 32 | int input_idx = idx[idx_idx] * c + c_idx; 33 | int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; 34 | int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; 35 | atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); 36 | grad_position[position_idx] = grad_output[index] * weight[weight_idx]; 37 | atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); 38 | } 39 | } 40 | 41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { 42 | // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) 43 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 44 | dim3 threads(THREADS_PER_BLOCK); 45 | aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); 46 | } 47 | 48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { 49 | // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) 50 | dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); 51 | dim3 threads(THREADS_PER_BLOCK); 52 | aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); 53 | } 54 | -------------------------------------------------------------------------------- /datasets/random_cuboid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | 4 | 5 | def check_aspect(crop_range, aspect_min): 6 | xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2]) 7 | xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]]) 8 | yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:]) 9 | return ( 10 | (xy_aspect >= aspect_min) 11 | or (xz_aspect >= aspect_min) 12 | or (yz_aspect >= aspect_min) 13 | ) 14 | 15 | 16 | class RandomCuboid(object): 17 | """ 18 | RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691] 19 | We slightly modify this operation to account for object detection. 20 | This augmentation randomly crops a cuboid from the input and 21 | ensures that the cropped cuboid contains at least one bounding box 22 | """ 23 | 24 | def __init__( 25 | self, 26 | min_points, 27 | # aspect=0.8, 28 | crop_length=6.0, 29 | version1=True, 30 | ): 31 | # self.aspect = aspect 32 | self.crop_length = crop_length 33 | self.min_points = min_points 34 | self.version1 = version1 35 | 36 | def __call__(self, point_cloud): 37 | if point_cloud.shape[0] < self.min_points: 38 | print("too small pcd") 39 | return np.ones(point_cloud.shape[0], dtype=np.bool) 40 | 41 | range_xyz = np.max(point_cloud[:, :2], axis=0) - np.min( 42 | point_cloud[:, :2], axis=0 43 | ) 44 | 45 | for _ in range(100): 46 | # crop_range = self.min_crop + np.random.rand(3) * ( 47 | # self.max_crop - self.min_crop 48 | # ) 49 | # crop_range[-1] = 999. 50 | # if not check_aspect(crop_range, self.aspect): 51 | # continue 52 | 53 | sample_center = point_cloud[:, :2].min(axis=0) + range_xyz / 2 54 | 55 | if self.version1: 56 | offset_x = np.random.uniform( 57 | -range_xyz[0] / 4, range_xyz[0] / 4 58 | ) 59 | offset_y = np.random.uniform( 60 | -range_xyz[1] / 4, range_xyz[1] / 4 61 | ) 62 | else: 63 | offset_x = np.random.uniform( 64 | -(range_xyz[0] / 2) + self.crop_length / 4, 65 | +(range_xyz[0] / 2) - self.crop_length / 4, 66 | ) 67 | offset_y = np.random.uniform( 68 | -(range_xyz[1] / 2) + self.crop_length / 4, 69 | +(range_xyz[1] / 2) - self.crop_length / 4, 70 | ) 71 | 72 | sample_center[0] = sample_center[0] + offset_x 73 | sample_center[1] = sample_center[1] + offset_y 74 | 75 | min_xy = sample_center - self.crop_length / 2 76 | max_xy = sample_center + self.crop_length / 2 77 | 78 | upper_idx = ( 79 | np.sum((point_cloud[:, :2] <= max_xy).astype(np.int32), 1) == 2 80 | ) 81 | lower_idx = ( 82 | np.sum((point_cloud[:, :2] >= min_xy).astype(np.int32), 1) == 2 83 | ) 84 | 85 | new_pointidx = (upper_idx) & (lower_idx) 86 | 87 | if np.sum(new_pointidx) < self.min_points: 88 | print("TOO SMALL") 89 | continue 90 | 91 | return new_pointidx 92 | 93 | # fallback 94 | print("FALLBACK") 95 | return np.ones(point_cloud.shape[0], dtype=np.bool) 96 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe/relative_pos_encoding_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "relative_pos_encoding_cuda_kernel.h" 6 | 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 8 | at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 9 | { 10 | const float *q = q_tensor.data_ptr(); 11 | const float *table = table_tensor.data_ptr(); 12 | const int *index = index_tensor.data_ptr(); 13 | const int *rel_idx = rel_idx_tensor.data_ptr(); 14 | float *output = output_tensor.data_ptr(); 15 | dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); 16 | } 17 | 18 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 19 | at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 20 | at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) 21 | { 22 | const float *grad_out = grad_out_tensor.data_ptr(); 23 | const float *q = q_tensor.data_ptr(); 24 | const int *index = index_tensor.data_ptr(); 25 | const float *table = table_tensor.data_ptr(); 26 | const int *rel_idx = rel_idx_tensor.data_ptr(); 27 | float *grad_q = grad_q_tensor.data_ptr(); 28 | float *grad_table = grad_table_tensor.data_ptr(); 29 | dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); 30 | } 31 | 32 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 33 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) 34 | { 35 | const float *attn = attn_tensor.data_ptr(); 36 | const float *v = v_tensor.data_ptr(); 37 | const int *index0 = index0_tensor.data_ptr(); 38 | const int *index1 = index1_tensor.data_ptr(); 39 | const float *table = table_tensor.data_ptr(); 40 | const int *rel_idx = rel_idx_tensor.data_ptr(); 41 | float *output = output_tensor.data_ptr(); 42 | attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); 43 | } 44 | 45 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 46 | at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, 47 | at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) 48 | { 49 | const float *grad_out = grad_out_tensor.data_ptr(); 50 | const int *index0 = index0_tensor.data_ptr(); 51 | const int *index1 = index1_tensor.data_ptr(); 52 | const float *attn = attn_tensor.data_ptr(); 53 | const float *v = v_tensor.data_ptr(); 54 | const float *table = table_tensor.data_ptr(); 55 | const int *rel_idx = rel_idx_tensor.data_ptr(); 56 | float *grad_attn = grad_attn_tensor.data_ptr(); 57 | float *grad_v = grad_v_tensor.data_ptr(); 58 | float *grad_table = grad_table_tensor.data_ptr(); 59 | attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); 60 | } 61 | -------------------------------------------------------------------------------- /utils/pointops2/src/knnquery/knnquery_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "knnquery_cuda_kernel.h" 3 | 4 | 5 | __device__ void swap_float(float *x, float *y) 6 | { 7 | float tmp = *x; 8 | *x = *y; 9 | *y = tmp; 10 | } 11 | 12 | 13 | __device__ void swap_int(int *x, int *y) 14 | { 15 | int tmp = *x; 16 | *x = *y; 17 | *y = tmp; 18 | } 19 | 20 | 21 | __device__ void reheap(float *dist, int *idx, int k) 22 | { 23 | int root = 0; 24 | int child = root * 2 + 1; 25 | while (child < k) 26 | { 27 | if(child + 1 < k && dist[child+1] > dist[child]) 28 | child++; 29 | if(dist[root] > dist[child]) 30 | return; 31 | swap_float(&dist[root], &dist[child]); 32 | swap_int(&idx[root], &idx[child]); 33 | root = child; 34 | child = root * 2 + 1; 35 | } 36 | } 37 | 38 | 39 | __device__ void heap_sort(float *dist, int *idx, int k) 40 | { 41 | int i; 42 | for (i = k - 1; i > 0; i--) 43 | { 44 | swap_float(&dist[0], &dist[i]); 45 | swap_int(&idx[0], &idx[i]); 46 | reheap(dist, idx, i); 47 | } 48 | } 49 | 50 | 51 | __device__ int get_bt_idx(int idx, const int *offset) 52 | { 53 | int i = 0; 54 | while (1) 55 | { 56 | if (idx < offset[i]) 57 | break; 58 | else 59 | i++; 60 | } 61 | return i; 62 | } 63 | 64 | 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { 66 | // input: xyz (n, 3) new_xyz (m, 3) 67 | // output: idx (m, nsample) dist2 (m, nsample) 68 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 69 | if (pt_idx >= m) return; 70 | 71 | new_xyz += pt_idx * 3; 72 | idx += pt_idx * nsample; 73 | dist2 += pt_idx * nsample; 74 | int bt_idx = get_bt_idx(pt_idx, new_offset); 75 | int start; 76 | if (bt_idx == 0) 77 | start = 0; 78 | else 79 | start = offset[bt_idx - 1]; 80 | int end = offset[bt_idx]; 81 | 82 | float new_x = new_xyz[0]; 83 | float new_y = new_xyz[1]; 84 | float new_z = new_xyz[2]; 85 | 86 | float best_dist[100]; 87 | int best_idx[100]; 88 | for(int i = 0; i < nsample; i++){ 89 | best_dist[i] = 1e10; 90 | best_idx[i] = start; 91 | } 92 | for(int i = start; i < end; i++){ 93 | float x = xyz[i * 3 + 0]; 94 | float y = xyz[i * 3 + 1]; 95 | float z = xyz[i * 3 + 2]; 96 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 97 | if (d2 < best_dist[0]){ 98 | best_dist[0] = d2; 99 | best_idx[0] = i; 100 | reheap(best_dist, best_idx, nsample); 101 | } 102 | } 103 | heap_sort(best_dist, best_idx, nsample); 104 | for(int i = 0; i < nsample; i++){ 105 | idx[i] = best_idx[i]; 106 | dist2[i] = best_dist[i]; 107 | } 108 | } 109 | 110 | 111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { 112 | // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) 113 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); 114 | dim3 threads(THREADS_PER_BLOCK); 115 | knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); 116 | } 117 | -------------------------------------------------------------------------------- /models/modules/3detr_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | from functools import partial 4 | 5 | import torch.nn as nn 6 | 7 | 8 | class BatchNormDim1Swap(nn.BatchNorm1d): 9 | """ 10 | Used for nn.Transformer that uses a HW x N x C rep 11 | """ 12 | 13 | def forward(self, x): 14 | """ 15 | x: HW x N x C 16 | permute to N x C x HW 17 | Apply BN on C 18 | permute back 19 | """ 20 | hw, n, c = x.shape 21 | x = x.permute(1, 2, 0) 22 | x = super(BatchNormDim1Swap, self).forward(x) 23 | # x: n x c x hw -> hw x n x c 24 | x = x.permute(2, 0, 1) 25 | return x 26 | 27 | 28 | NORM_DICT = { 29 | "bn": BatchNormDim1Swap, 30 | "bn1d": nn.BatchNorm1d, 31 | "id": nn.Identity, 32 | "ln": nn.LayerNorm, 33 | } 34 | 35 | ACTIVATION_DICT = { 36 | "relu": nn.ReLU, 37 | "gelu": nn.GELU, 38 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 39 | } 40 | 41 | WEIGHT_INIT_DICT = { 42 | "xavier_uniform": nn.init.xavier_uniform_, 43 | } 44 | 45 | 46 | class GenericMLP(nn.Module): 47 | def __init__( 48 | self, 49 | input_dim, 50 | hidden_dims, 51 | output_dim, 52 | norm_fn_name=None, 53 | activation="relu", 54 | use_conv=False, 55 | dropout=None, 56 | hidden_use_bias=False, 57 | output_use_bias=True, 58 | output_use_activation=False, 59 | output_use_norm=False, 60 | weight_init_name=None, 61 | ): 62 | super().__init__() 63 | activation = ACTIVATION_DICT[activation] 64 | norm = None 65 | if norm_fn_name is not None: 66 | norm = NORM_DICT[norm_fn_name] 67 | if norm_fn_name == "ln" and use_conv: 68 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 69 | 70 | if dropout is not None: 71 | if not isinstance(dropout, list): 72 | dropout = [dropout for _ in range(len(hidden_dims))] 73 | 74 | layers = [] 75 | prev_dim = input_dim 76 | for idx, x in enumerate(hidden_dims): 77 | if use_conv: 78 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 79 | else: 80 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 81 | layers.append(layer) 82 | if norm: 83 | layers.append(norm(x)) 84 | layers.append(activation()) 85 | if dropout is not None: 86 | layers.append(nn.Dropout(p=dropout[idx])) 87 | prev_dim = x 88 | if use_conv: 89 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 90 | else: 91 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 92 | layers.append(layer) 93 | 94 | if output_use_norm: 95 | layers.append(norm(output_dim)) 96 | 97 | if output_use_activation: 98 | layers.append(activation()) 99 | 100 | self.layers = nn.Sequential(*layers) 101 | 102 | if weight_init_name is not None: 103 | self.do_weight_init(weight_init_name) 104 | 105 | def do_weight_init(self, weight_init_name): 106 | func = WEIGHT_INIT_DICT[weight_init_name] 107 | for (_, param) in self.named_parameters(): 108 | if param.dim() > 1: # skips batchnorm/layernorm 109 | func(param) 110 | 111 | def forward(self, x): 112 | output = self.layers(x) 113 | return output 114 | 115 | 116 | def get_clones(module, N): 117 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 118 | -------------------------------------------------------------------------------- /models/modules/helpers_3detr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import copy 3 | from functools import partial 4 | 5 | import torch.nn as nn 6 | 7 | 8 | class BatchNormDim1Swap(nn.BatchNorm1d): 9 | """ 10 | Used for nn.Transformer that uses a HW x N x C rep 11 | """ 12 | 13 | def forward(self, x): 14 | """ 15 | x: HW x N x C 16 | permute to N x C x HW 17 | Apply BN on C 18 | permute back 19 | """ 20 | hw, n, c = x.shape 21 | x = x.permute(1, 2, 0) 22 | x = super(BatchNormDim1Swap, self).forward(x) 23 | # x: n x c x hw -> hw x n x c 24 | x = x.permute(2, 0, 1) 25 | return x 26 | 27 | 28 | NORM_DICT = { 29 | "bn": BatchNormDim1Swap, 30 | "bn1d": nn.BatchNorm1d, 31 | "id": nn.Identity, 32 | "ln": nn.LayerNorm, 33 | } 34 | 35 | ACTIVATION_DICT = { 36 | "relu": nn.ReLU, 37 | "gelu": nn.GELU, 38 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 39 | } 40 | 41 | WEIGHT_INIT_DICT = { 42 | "xavier_uniform": nn.init.xavier_uniform_, 43 | } 44 | 45 | 46 | class GenericMLP(nn.Module): 47 | def __init__( 48 | self, 49 | input_dim, 50 | hidden_dims, 51 | output_dim, 52 | norm_fn_name=None, 53 | activation="relu", 54 | use_conv=False, 55 | dropout=None, 56 | hidden_use_bias=False, 57 | output_use_bias=True, 58 | output_use_activation=False, 59 | output_use_norm=False, 60 | weight_init_name=None, 61 | ): 62 | super().__init__() 63 | activation = ACTIVATION_DICT[activation] 64 | norm = None 65 | if norm_fn_name is not None: 66 | norm = NORM_DICT[norm_fn_name] 67 | if norm_fn_name == "ln" and use_conv: 68 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 69 | 70 | if dropout is not None: 71 | if not isinstance(dropout, list): 72 | dropout = [dropout for _ in range(len(hidden_dims))] 73 | 74 | layers = [] 75 | prev_dim = input_dim 76 | for idx, x in enumerate(hidden_dims): 77 | if use_conv: 78 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 79 | else: 80 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 81 | layers.append(layer) 82 | if norm: 83 | layers.append(norm(x)) 84 | layers.append(activation()) 85 | if dropout is not None: 86 | layers.append(nn.Dropout(p=dropout[idx])) 87 | prev_dim = x 88 | if use_conv: 89 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 90 | else: 91 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 92 | layers.append(layer) 93 | 94 | if output_use_norm: 95 | layers.append(norm(output_dim)) 96 | 97 | if output_use_activation: 98 | layers.append(activation()) 99 | 100 | self.layers = nn.Sequential(*layers) 101 | 102 | if weight_init_name is not None: 103 | self.do_weight_init(weight_init_name) 104 | 105 | def do_weight_init(self, weight_init_name): 106 | func = WEIGHT_INIT_DICT[weight_init_name] 107 | for (_, param) in self.named_parameters(): 108 | if param.dim() > 1: # skips batchnorm/layernorm 109 | func(param) 110 | 111 | def forward(self, x): 112 | output = self.layers(x) 113 | return output 114 | 115 | 116 | def get_clones(module, N): 117 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 118 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import hydra 5 | from dotenv import load_dotenv 6 | from omegaconf import DictConfig, OmegaConf 7 | from pytorch_lightning import Trainer, seed_everything 8 | 9 | from trainer.trainer import InstanceSegmentation, RegularCheckpointing 10 | from utils.utils import ( 11 | flatten_dict, 12 | load_backbone_checkpoint_with_missing_or_exsessive_keys, 13 | load_checkpoint_with_missing_or_exsessive_keys, 14 | ) 15 | 16 | 17 | def get_parameters(cfg: DictConfig): 18 | logger = logging.getLogger(__name__) 19 | load_dotenv(".env") 20 | 21 | # parsing input parameters 22 | seed_everything(cfg.general.seed) 23 | 24 | # getting basic configuration 25 | if cfg.general.get("gpus", None) is None: 26 | cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None) 27 | loggers = [] 28 | 29 | # cfg.general.experiment_id = "0" # str(Repo("./").commit())[:8] 30 | # params = flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 31 | 32 | # create unique id for experiments that are run locally 33 | # unique_id = "_" + str(uuid4())[:4] 34 | # cfg.general.version = md5(str(params).encode("utf-8")).hexdigest()[:8] + unique_id 35 | 36 | if not os.path.exists(cfg.general.save_dir): 37 | os.makedirs(cfg.general.save_dir) 38 | else: 39 | print("EXPERIMENT ALREADY EXIST") 40 | cfg["trainer"][ 41 | "resume_from_checkpoint" 42 | ] = f"{cfg.general.save_dir}/last-epoch.ckpt" 43 | 44 | for log in cfg.logging: 45 | print(log) 46 | loggers.append(hydra.utils.instantiate(log)) 47 | loggers[-1].log_hyperparams( 48 | flatten_dict(OmegaConf.to_container(cfg, resolve=True)) 49 | ) 50 | 51 | model = InstanceSegmentation(cfg) 52 | if cfg.general.backbone_checkpoint is not None: 53 | cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys( 54 | cfg, model 55 | ) 56 | if cfg.general.checkpoint is not None: 57 | cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model) 58 | 59 | logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True))) 60 | return cfg, model, loggers 61 | 62 | 63 | @hydra.main( 64 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 65 | ) 66 | def train(cfg: DictConfig): 67 | os.chdir(hydra.utils.get_original_cwd()) 68 | cfg, model, loggers = get_parameters(cfg) 69 | callbacks = [] 70 | for cb in cfg.callbacks: 71 | callbacks.append(hydra.utils.instantiate(cb)) 72 | 73 | callbacks.append(RegularCheckpointing()) 74 | 75 | runner = Trainer( 76 | logger=loggers, 77 | gpus=cfg.general.gpus, 78 | callbacks=callbacks, 79 | weights_save_path=str(cfg.general.save_dir), 80 | **cfg.trainer, 81 | ) 82 | runner.fit(model) 83 | 84 | 85 | @hydra.main( 86 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 87 | ) 88 | def test(cfg: DictConfig): 89 | # because hydra wants to change dir for some reason 90 | os.chdir(hydra.utils.get_original_cwd()) 91 | cfg, model, loggers = get_parameters(cfg) 92 | runner = Trainer( 93 | gpus=cfg.general.gpus, 94 | logger=loggers, 95 | weights_save_path=str(cfg.general.save_dir), 96 | **cfg.trainer, 97 | ) 98 | runner.test(model) 99 | 100 | 101 | @hydra.main( 102 | config_path="conf", config_name="config_base_instance_segmentation.yaml" 103 | ) 104 | def main(cfg: DictConfig): 105 | if cfg["general"]["train_mode"]: 106 | train(cfg) 107 | else: 108 | test(cfg) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "interpolate.h" 4 | #include "utils.h" 5 | 6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 7 | const float *known, float *dist2, int *idx); 8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 9 | const float *points, const int *idx, 10 | const float *weight, float *out); 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 12 | const float *grad_out, 13 | const int *idx, const float *weight, 14 | float *grad_points); 15 | 16 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 17 | CHECK_CONTIGUOUS(unknowns); 18 | CHECK_CONTIGUOUS(knows); 19 | CHECK_IS_FLOAT(unknowns); 20 | CHECK_IS_FLOAT(knows); 21 | 22 | if (unknowns.is_cuda()) { 23 | CHECK_CUDA(knows); 24 | } 25 | 26 | at::Tensor idx = 27 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 28 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 29 | at::Tensor dist2 = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 32 | 33 | if (unknowns.is_cuda()) { 34 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 35 | unknowns.data(), knows.data(), 36 | dist2.data(), idx.data()); 37 | } else { 38 | AT_ASSERT(false, "CPU not supported"); 39 | } 40 | 41 | return {dist2, idx}; 42 | } 43 | 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 45 | at::Tensor weight) { 46 | CHECK_CONTIGUOUS(points); 47 | CHECK_CONTIGUOUS(idx); 48 | CHECK_CONTIGUOUS(weight); 49 | CHECK_IS_FLOAT(points); 50 | CHECK_IS_INT(idx); 51 | CHECK_IS_FLOAT(weight); 52 | 53 | if (points.is_cuda()) { 54 | CHECK_CUDA(idx); 55 | CHECK_CUDA(weight); 56 | } 57 | 58 | at::Tensor output = 59 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 60 | at::device(points.device()).dtype(at::ScalarType::Float)); 61 | 62 | if (points.is_cuda()) { 63 | three_interpolate_kernel_wrapper( 64 | points.size(0), points.size(1), points.size(2), idx.size(1), 65 | points.data(), idx.data(), weight.data(), 66 | output.data()); 67 | } else { 68 | AT_ASSERT(false, "CPU not supported"); 69 | } 70 | 71 | return output; 72 | } 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 74 | at::Tensor weight, const int m) { 75 | CHECK_CONTIGUOUS(grad_out); 76 | CHECK_CONTIGUOUS(idx); 77 | CHECK_CONTIGUOUS(weight); 78 | CHECK_IS_FLOAT(grad_out); 79 | CHECK_IS_INT(idx); 80 | CHECK_IS_FLOAT(weight); 81 | 82 | if (grad_out.is_cuda()) { 83 | CHECK_CUDA(idx); 84 | CHECK_CUDA(weight); 85 | } 86 | 87 | at::Tensor output = 88 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 89 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 90 | 91 | if (grad_out.is_cuda()) { 92 | three_interpolate_grad_kernel_wrapper( 93 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 94 | grad_out.data(), idx.data(), weight.data(), 95 | output.data()); 96 | } else { 97 | AT_ASSERT(false, "CPU not supported"); 98 | } 99 | 100 | return output; 101 | } 102 | -------------------------------------------------------------------------------- /utils/pointops2/functions/test_attention_op_step1_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pointops 3 | 4 | torch.manual_seed(1) 5 | 6 | M = 800000 7 | N = 35000 8 | C = 96 9 | h = 6 10 | query = torch.rand(N, h, C // h).cuda() 11 | key = torch.rand(N, h, C // h).cuda() 12 | 13 | index_0 = torch.rand(M) 14 | index_0[index_0 < 0] = 0 15 | index_0 = (index_0 * N).long().cuda() 16 | 17 | index_1 = torch.rand(M) 18 | index_1[index_1 < 0] = 0 19 | index_1 = (index_1 * N).long().cuda() 20 | 21 | query.requires_grad = True 22 | key.requires_grad = True 23 | 24 | 25 | attn_flat = pointops.attention_step1( 26 | query.float(), key.float(), index_0.int(), index_1.int() 27 | ) 28 | loss = attn_flat.sum() 29 | loss.backward() 30 | print( 31 | "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format( 32 | attn_flat.shape, attn_flat[:20, :10] 33 | ) 34 | ) 35 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 36 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 37 | input() 38 | 39 | 40 | # rearrange index for acceleration 41 | index_0, indices = torch.sort(index_0) # [M,] 42 | index_1 = index_1[indices] # [M,] 43 | index_0_counts = index_0.bincount() 44 | 45 | print("index_0_counts.shape: ", index_0_counts.shape) 46 | 47 | n_max = index_0_counts.max() 48 | index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] 49 | 50 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape) 51 | 52 | index_0_offsets = torch.cat( 53 | [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 54 | ) # [N+1] 55 | 56 | # print("index_0[:100]: ", index_0[:100]) 57 | print("n_max: ", n_max) 58 | print("index_0_offsets.shape: ", index_0_offsets.shape) 59 | # input() 60 | 61 | print("index_0_offsets[:100]: ", index_0_offsets[:100]) 62 | print("index_1[:20]: ", index_1[:20]) 63 | 64 | 65 | attn_flat = pointops.attention_step1( 66 | query.float(), key.float(), index_0.int(), index_1.int() 67 | ) 68 | # loss = attn_flat.sum() 69 | # loss.backward() 70 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) 71 | # # loss = attn_flat.sum() 72 | # # loss.backward() 73 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10])) 74 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 75 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 76 | # input() 77 | 78 | print("query.is_contiguous(): ", query.is_contiguous()) 79 | print("key.is_contiguous(): ", key.is_contiguous()) 80 | print("index_0.is_contiguous(): ", index_0.is_contiguous()) 81 | print("index_1.is_contiguous(): ", index_1.is_contiguous()) 82 | 83 | attn_flat_v2 = pointops.attention_step1_v2( 84 | query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max 85 | ) 86 | loss = attn_flat_v2.sum() 87 | loss.backward() 88 | 89 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) 90 | # loss = attn_flat_v2.sum() 91 | # loss.backward() 92 | 93 | print( 94 | "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format( 95 | attn_flat_v2.shape, attn_flat_v2[:20, :10] 96 | ) 97 | ) 98 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) 99 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) 100 | # input() 101 | 102 | # mask = attn_flat_v2.sum(-1) != 0 103 | # print("mask.sum(): ", mask.sum()) 104 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max()) 105 | 106 | 107 | print( 108 | "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", 109 | ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), 110 | ) 111 | 112 | selected = 10000 113 | print( 114 | "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", 115 | torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), 116 | ) 117 | -------------------------------------------------------------------------------- /conf/data/datasets/egobody.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.semseg.SemanticSegmentationDataset 4 | dataset_name: "human_segmentation" 5 | data_dir: data/processed/egobody 6 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 7 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 8 | label_db_filepath: data/processed/egobody/part_database.yaml 9 | color_mean_std: data/processed/egobody/color_mean_std.yaml 10 | data_percent: 1.0 11 | mode: ${data.train_mode} 12 | ignore_label: ${data.ignore_label} 13 | num_labels: ${data.num_labels} 14 | add_raw_coordinates: ${data.add_raw_coordinates} 15 | add_colors: ${data.add_colors} 16 | add_normals: ${data.add_normals} 17 | add_instance: ${data.add_instance} 18 | cache_data: ${data.cache_data} 19 | # different augs experiments 20 | instance_oversampling: 0.0 21 | place_around_existing: False 22 | point_per_cut: 0 23 | max_cut_region: 0 24 | flip_in_center: false 25 | noise_rate: 0 26 | resample_points: 0 27 | cropping: ${data.cropping} 28 | cropping_args: ${data.cropping_args} 29 | is_tta: false 30 | crop_min_size: ${data.crop_min_size} 31 | crop_length: ${data.crop_length} 32 | cropping_v1: ${data.cropping_v1} 33 | area: ${general.area} 34 | reps_per_epoch: ${general.reps_per_epoch} 35 | eval_inner_core: ${general.eval_inner_core} 36 | filter_out_classes: [0] 37 | label_offset: 1 38 | is_elastic_distortion: true 39 | color_drop: 0.0 40 | is_mirroring: ${data.is_mirroring} 41 | part2human: ${data.part2human} 42 | broken_mirror_version: ${data.broken_mirror_version} 43 | 44 | validation_dataset: 45 | _target_: datasets.semseg.SemanticSegmentationDataset 46 | dataset_name: "human_segmentation" 47 | data_dir: 48 | - data/processed/egobody 49 | image_augmentations_path: null 50 | volume_augmentations_path: null 51 | label_db_filepath: data/processed/egobody/part_database.yaml 52 | color_mean_std: data/processed/egobody/color_mean_std.yaml 53 | data_percent: 1.0 54 | mode: ${data.validation_mode} 55 | ignore_label: ${data.ignore_label} 56 | num_labels: ${data.num_labels} 57 | add_raw_coordinates: ${data.add_raw_coordinates} 58 | add_colors: ${data.add_colors} 59 | add_normals: ${data.add_normals} 60 | add_instance: ${data.add_instance} 61 | cache_data: ${data.cache_data} 62 | cropping: false 63 | is_tta: false 64 | crop_min_size: ${data.crop_min_size} 65 | crop_length: ${data.crop_length} 66 | cropping_v1: ${data.cropping_v1} 67 | area: ${general.area} 68 | on_crops: ${general.on_crops} 69 | eval_inner_core: ${general.eval_inner_core} 70 | filter_out_classes: [0] 71 | label_offset: 1 72 | part2human: ${data.part2human} 73 | 74 | test_dataset: 75 | _target_: datasets.semseg.SemanticSegmentationDataset 76 | dataset_name: "human_segmentation" 77 | data_dir: data/processed/egobody 78 | image_augmentations_path: null 79 | volume_augmentations_path: null 80 | label_db_filepath: data/processed/egobody/part_database.yaml 81 | color_mean_std: data/processed/egobody/color_mean_std.yaml 82 | data_percent: 1.0 83 | mode: ${data.test_mode} 84 | ignore_label: ${data.ignore_label} 85 | num_labels: ${data.num_labels} 86 | add_raw_coordinates: ${data.add_raw_coordinates} 87 | add_colors: ${data.add_colors} 88 | add_normals: ${data.add_normals} 89 | add_instance: ${data.add_instance} 90 | cache_data: ${data.cache_data} 91 | cropping: false 92 | is_tta: false 93 | crop_min_size: ${data.crop_min_size} 94 | crop_length: ${data.crop_length} 95 | cropping_v1: ${data.cropping_v1} 96 | area: ${general.area} 97 | on_crops: ${general.on_crops} 98 | eval_inner_core: ${general.eval_inner_core} 99 | filter_out_classes: [0] 100 | label_offset: 1 101 | part2human: ${data.part2human} 102 | -------------------------------------------------------------------------------- /utils/pointops2/src/pointops_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "knnquery/knnquery_cuda_kernel.h" 5 | #include "sampling/sampling_cuda_kernel.h" 6 | #include "grouping/grouping_cuda_kernel.h" 7 | #include "interpolation/interpolation_cuda_kernel.h" 8 | #include "aggregation/aggregation_cuda_kernel.h" 9 | #include "subtraction/subtraction_cuda_kernel.h" 10 | #include "attention/attention_cuda_kernel.h" 11 | #include "rpe/relative_pos_encoding_cuda_kernel.h" 12 | #include "attention_v2/attention_cuda_kernel_v2.h" 13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" 14 | 15 | 16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 17 | m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); 18 | m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); 19 | m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); 20 | m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); 21 | m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); 22 | m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); 23 | m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); 24 | m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); 25 | m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); 26 | m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); 27 | m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); 28 | m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); 29 | m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); 30 | m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); 31 | m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); 32 | m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); 33 | m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); 34 | m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); 35 | m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); 36 | m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); 37 | m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); 38 | m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); 39 | m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); 40 | m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); 41 | m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); 42 | m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); 43 | m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); 44 | m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); 45 | } 46 | -------------------------------------------------------------------------------- /conf/data/datasets/synthetic_humans.yaml: -------------------------------------------------------------------------------- 1 | # @package data 2 | train_dataset: 3 | _target_: datasets.semseg.SemanticSegmentationDataset 4 | dataset_name: "human_segmentation" 5 | data_dir: 6 | - data/processed/synthetic_humans 7 | image_augmentations_path: conf/augmentation/albumentations_aug.yaml 8 | volume_augmentations_path: conf/augmentation/volumentations_aug.yaml 9 | label_db_filepath: data/processed/synthetic_humans/part_database.yaml 10 | color_mean_std: data/processed/synthetic_humans/color_mean_std.yaml 11 | data_percent: 1.0 12 | mode: ${data.train_mode} 13 | ignore_label: ${data.ignore_label} 14 | num_labels: ${data.num_labels} 15 | add_raw_coordinates: ${data.add_raw_coordinates} 16 | add_colors: ${data.add_colors} 17 | add_normals: ${data.add_normals} 18 | add_instance: ${data.add_instance} 19 | cache_data: ${data.cache_data} 20 | # different augs experiments 21 | instance_oversampling: 0.0 22 | place_around_existing: False 23 | point_per_cut: 0 24 | max_cut_region: 0 25 | flip_in_center: false 26 | noise_rate: 0 27 | resample_points: 0 28 | cropping: ${data.cropping} 29 | cropping_args: ${data.cropping_args} 30 | is_tta: false 31 | crop_min_size: ${data.crop_min_size} 32 | crop_length: ${data.crop_length} 33 | cropping_v1: ${data.cropping_v1} 34 | area: ${general.area} 35 | reps_per_epoch: ${general.reps_per_epoch} 36 | eval_inner_core: ${general.eval_inner_core} 37 | filter_out_classes: [0] 38 | label_offset: 1 39 | is_elastic_distortion: true 40 | color_drop: 0.0 41 | part2human: ${data.part2human} 42 | is_mirroring: ${data.is_mirroring} 43 | broken_mirror_version: ${data.broken_mirror_version} 44 | 45 | validation_dataset: 46 | _target_: datasets.semseg.SemanticSegmentationDataset 47 | dataset_name: "human_segmentation" 48 | data_dir: 49 | - data/processed/egobody 50 | image_augmentations_path: null 51 | volume_augmentations_path: null 52 | label_db_filepath: data/processed/egobody/part_database.yaml 53 | color_mean_std: data/processed/egobody/color_mean_std.yaml 54 | data_percent: 1.0 55 | mode: ${data.validation_mode} 56 | ignore_label: ${data.ignore_label} 57 | num_labels: ${data.num_labels} 58 | add_raw_coordinates: ${data.add_raw_coordinates} 59 | add_colors: ${data.add_colors} 60 | add_normals: ${data.add_normals} 61 | add_instance: ${data.add_instance} 62 | cache_data: ${data.cache_data} 63 | cropping: false 64 | is_tta: false 65 | crop_min_size: ${data.crop_min_size} 66 | crop_length: ${data.crop_length} 67 | cropping_v1: ${data.cropping_v1} 68 | area: ${general.area} 69 | on_crops: ${general.on_crops} 70 | eval_inner_core: ${general.eval_inner_core} 71 | filter_out_classes: [0] 72 | label_offset: 1 73 | part2human: ${data.part2human} 74 | 75 | test_dataset: 76 | _target_: datasets.semseg.SemanticSegmentationDataset 77 | dataset_name: "human_segmentation" 78 | data_dir: data/processed/egobody 79 | image_augmentations_path: null 80 | volume_augmentations_path: null 81 | label_db_filepath: data/processed/egobody/part_database.yaml 82 | color_mean_std: data/processed/egobody/color_mean_std.yaml 83 | data_percent: 1.0 84 | mode: ${data.test_mode} 85 | ignore_label: ${data.ignore_label} 86 | num_labels: ${data.num_labels} 87 | add_raw_coordinates: ${data.add_raw_coordinates} 88 | add_colors: ${data.add_colors} 89 | add_normals: ${data.add_normals} 90 | add_instance: ${data.add_instance} 91 | cache_data: ${data.cache_data} 92 | cropping: false 93 | is_tta: false 94 | crop_min_size: ${data.crop_min_size} 95 | crop_length: ${data.crop_length} 96 | cropping_v1: ${data.cropping_v1} 97 | area: ${general.area} 98 | on_crops: ${general.on_crops} 99 | eval_inner_core: ${general.eval_inner_core} 100 | filter_out_classes: [0] 101 | label_offset: 1 102 | part2human: ${data.part2human} 103 | -------------------------------------------------------------------------------- /models/modules/senet_block.py: -------------------------------------------------------------------------------- 1 | import MinkowskiEngine as ME 2 | import torch.nn as nn 3 | from mix3d.models.modules.common import ConvType, NormType 4 | from mix3d.models.modules.resnet_block import BasicBlock, Bottleneck 5 | 6 | 7 | class SELayer(nn.Module): 8 | def __init__(self, channel, reduction=16, D=-1): 9 | # Global coords does not require coords_key 10 | super().__init__() 11 | self.fc = nn.Sequential( 12 | ME.MinkowskiLinear(channel, channel // reduction), 13 | ME.MinkowskiReLU(inplace=True), 14 | ME.MinkowskiLinear(channel // reduction, channel), 15 | ME.MinkowskiSigmoid(), 16 | ) 17 | self.pooling = ME.MinkowskiGlobalPooling(dimension=D) 18 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D) 19 | 20 | def forward(self, x): 21 | y = self.pooling(x) 22 | y = self.fc(y) 23 | return self.broadcast_mul(x, y) 24 | 25 | 26 | class SEBasicBlock(BasicBlock): 27 | def __init__( 28 | self, 29 | inplanes, 30 | planes, 31 | stride=1, 32 | dilation=1, 33 | downsample=None, 34 | conv_type=ConvType.HYPERCUBE, 35 | reduction=16, 36 | D=-1, 37 | ): 38 | super().__init__( 39 | inplanes, 40 | planes, 41 | stride=stride, 42 | dilation=dilation, 43 | downsample=downsample, 44 | conv_type=conv_type, 45 | D=D, 46 | ) 47 | self.se = SELayer(planes, reduction=reduction, D=D) 48 | 49 | def forward(self, x): 50 | residual = x 51 | 52 | out = self.conv1(x) 53 | out = self.norm1(out) 54 | out = self.relu(out) 55 | 56 | out = self.conv2(out) 57 | out = self.norm2(out) 58 | out = self.se(out) 59 | 60 | if self.downsample is not None: 61 | residual = self.downsample(x) 62 | 63 | out += residual 64 | out = self.relu(out) 65 | 66 | return out 67 | 68 | 69 | class SEBasicBlockSN(SEBasicBlock): 70 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 71 | 72 | 73 | class SEBasicBlockIN(SEBasicBlock): 74 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 75 | 76 | 77 | class SEBasicBlockLN(SEBasicBlock): 78 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 79 | 80 | 81 | class SEBottleneck(Bottleneck): 82 | def __init__( 83 | self, 84 | inplanes, 85 | planes, 86 | stride=1, 87 | dilation=1, 88 | downsample=None, 89 | conv_type=ConvType.HYPERCUBE, 90 | D=3, 91 | reduction=16, 92 | ): 93 | super().__init__( 94 | inplanes, 95 | planes, 96 | stride=stride, 97 | dilation=dilation, 98 | downsample=downsample, 99 | conv_type=conv_type, 100 | D=D, 101 | ) 102 | self.se = SELayer(planes * self.expansion, reduction=reduction, D=D) 103 | 104 | def forward(self, x): 105 | residual = x 106 | 107 | out = self.conv1(x) 108 | out = self.norm1(out) 109 | out = self.relu(out) 110 | 111 | out = self.conv2(out) 112 | out = self.norm2(out) 113 | out = self.relu(out) 114 | 115 | out = self.conv3(out) 116 | out = self.norm3(out) 117 | out = self.se(out) 118 | 119 | if self.downsample is not None: 120 | residual = self.downsample(x) 121 | 122 | out += residual 123 | out = self.relu(out) 124 | 125 | return out 126 | 127 | 128 | class SEBottleneckSN(SEBottleneck): 129 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 130 | 131 | 132 | class SEBottleneckIN(SEBottleneck): 133 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 134 | 135 | 136 | class SEBottleneckLN(SEBottleneck): 137 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /saved 2 | /logs 3 | /data 4 | third_party 5 | *.out 6 | checkpoints/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | cover/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | .pybuilder/ 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | # For a library or package, you might want to ignore these files since the code is 94 | # intended to run in multiple environments; otherwise, check them in: 95 | # .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | #pdm.lock 114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 115 | # in version control. 116 | # https://pdm.fming.dev/#use-with-ide 117 | .pdm.toml 118 | 119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 120 | __pypackages__/ 121 | 122 | # Celery stuff 123 | celerybeat-schedule 124 | celerybeat.pid 125 | 126 | # SageMath parsed files 127 | *.sage.py 128 | 129 | # Environments 130 | .env 131 | .venv 132 | env/ 133 | venv/ 134 | ENV/ 135 | env.bak/ 136 | venv.bak/ 137 | 138 | # Spyder project settings 139 | .spyderproject 140 | .spyproject 141 | 142 | # Rope project settings 143 | .ropeproject 144 | 145 | # mkdocs documentation 146 | /site 147 | 148 | # mypy 149 | .mypy_cache/ 150 | .dmypy.json 151 | dmypy.json 152 | 153 | # Pyre type checker 154 | .pyre/ 155 | 156 | # pytype static type analyzer 157 | .pytype/ 158 | 159 | # Cython debug symbols 160 | cython_debug/ 161 | 162 | # PyCharm 163 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 164 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 165 | # and can be added to the global gitignore or merged into this file. For a more nuclear 166 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 167 | .idea/ 168 | -------------------------------------------------------------------------------- /benchmark/util.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sys 4 | 5 | try: 6 | import numpy as np 7 | except: 8 | print("Failed to import numpy package.") 9 | sys.exit(-1) 10 | try: 11 | import imageio 12 | except: 13 | print("Please install the module 'imageio' for image processing, e.g.") 14 | print("pip install imageio") 15 | sys.exit(-1) 16 | 17 | # print an error message and quit 18 | def print_error(message, user_fault=False): 19 | sys.stderr.write("ERROR: " + str(message) + "\n") 20 | if user_fault: 21 | sys.exit(2) 22 | sys.exit(-1) 23 | 24 | 25 | # if string s represents an int 26 | def represents_int(s): 27 | try: 28 | int(s) 29 | return True 30 | except ValueError: 31 | return False 32 | 33 | 34 | def read_label_mapping( 35 | filename, label_from="raw_category", label_to="nyu40id" 36 | ): 37 | assert os.path.isfile(filename) 38 | mapping = dict() 39 | with open(filename) as csvfile: 40 | reader = csv.DictReader(csvfile, delimiter="\t") 41 | for row in reader: 42 | mapping[row[label_from]] = int(row[label_to]) 43 | # if ints convert 44 | if represents_int(list(mapping.keys())[0]): 45 | mapping = {int(k): v for k, v in mapping.items()} 46 | return mapping 47 | 48 | 49 | # input: scene_types.txt or scene_types_all.txt 50 | def read_scene_types_mapping(filename, remove_spaces=True): 51 | assert os.path.isfile(filename) 52 | mapping = dict() 53 | lines = open(filename).read().splitlines() 54 | lines = [line.split("\t") for line in lines] 55 | if remove_spaces: 56 | mapping = {x[1].strip(): int(x[0]) for x in lines} 57 | else: 58 | mapping = {x[1]: int(x[0]) for x in lines} 59 | return mapping 60 | 61 | 62 | # color by label 63 | def visualize_label_image(filename, image): 64 | height = image.shape[0] 65 | width = image.shape[1] 66 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 67 | color_palette = create_color_palette() 68 | for idx, color in enumerate(color_palette): 69 | vis_image[image == idx] = color 70 | imageio.imwrite(filename, vis_image) 71 | 72 | 73 | # color by different instances (mod length of color palette) 74 | def visualize_instance_image(filename, image): 75 | height = image.shape[0] 76 | width = image.shape[1] 77 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 78 | color_palette = create_color_palette() 79 | instances = np.unique(image) 80 | for idx, inst in enumerate(instances): 81 | vis_image[image == inst] = color_palette[inst % len(color_palette)] 82 | imageio.imwrite(filename, vis_image) 83 | 84 | 85 | # color palette for nyu40 labels 86 | def create_color_palette(): 87 | return [ 88 | (0, 0, 0), 89 | (174, 199, 232), # wall 90 | (152, 223, 138), # floor 91 | (31, 119, 180), # cabinet 92 | (255, 187, 120), # bed 93 | (188, 189, 34), # chair 94 | (140, 86, 75), # sofa 95 | (255, 152, 150), # table 96 | (214, 39, 40), # door 97 | (197, 176, 213), # window 98 | (148, 103, 189), # bookshelf 99 | (196, 156, 148), # picture 100 | (23, 190, 207), # counter 101 | (178, 76, 76), 102 | (247, 182, 210), # desk 103 | (66, 188, 102), 104 | (219, 219, 141), # curtain 105 | (140, 57, 197), 106 | (202, 185, 52), 107 | (51, 176, 203), 108 | (200, 54, 131), 109 | (92, 193, 61), 110 | (78, 71, 183), 111 | (172, 114, 82), 112 | (255, 127, 14), # refrigerator 113 | (91, 163, 138), 114 | (153, 98, 156), 115 | (140, 153, 101), 116 | (158, 218, 229), # shower curtain 117 | (100, 125, 154), 118 | (178, 127, 135), 119 | (120, 185, 128), 120 | (146, 111, 194), 121 | (44, 160, 44), # toilet 122 | (112, 128, 144), # sink 123 | (96, 207, 209), 124 | (227, 119, 194), # bathtub 125 | (213, 92, 176), 126 | (94, 106, 211), 127 | (82, 84, 163), # otherfurn 128 | (100, 85, 144), 129 | ] 130 | -------------------------------------------------------------------------------- /utils/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h: -------------------------------------------------------------------------------- 1 | #ifndef _RPE_V2_CUDA_KERNEL 2 | #define _RPE_V2_CUDA_KERNEL 3 | #include 4 | #include 5 | #include 6 | 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor); 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 9 | 10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); 12 | 13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); 14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output); 21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 22 | 23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output); 24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); 25 | 26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output); 27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif 33 | -------------------------------------------------------------------------------- /occlusion_subsets/split_test_occlusion_high.txt: -------------------------------------------------------------------------------- 1 | recording_20210910_S06_S05_02_scene_main_02741.ply 2 | recording_20210911_S07_S06_02_scene_sub_2_01231.ply 3 | recording_20210918_S05_S06_01_scene_sub_2_02241.ply 4 | recording_20210918_S05_S06_02_scene_sub_2_00751.ply 5 | recording_20210918_S05_S06_02_scene_sub_2_00961.ply 6 | recording_20210918_S05_S06_02_scene_sub_2_03031.ply 7 | recording_20210918_S05_S06_02_scene_sub_2_03511.ply 8 | recording_20210918_S05_S06_02_scene_sub_2_03901.ply 9 | recording_20210918_S05_S06_03_scene_sub_2_02321.ply 10 | recording_20210918_S05_S06_03_scene_sub_2_02531.ply 11 | recording_20210918_S05_S06_04_scene_sub_2_03361.ply 12 | recording_20210918_S05_S06_04_scene_sub_2_04021.ply 13 | recording_20210918_S05_S06_05_scene_main_01021.ply 14 | recording_20210918_S05_S06_05_scene_sub_2_01051.ply 15 | recording_20210918_S05_S06_05_scene_sub_2_01231.ply 16 | recording_20210918_S05_S06_05_scene_sub_2_01591.ply 17 | recording_20210918_S05_S06_05_scene_sub_2_03451.ply 18 | recording_20210918_S05_S09_01_scene_sub_2_01541.ply 19 | recording_20210918_S05_S09_01_scene_sub_2_01601.ply 20 | recording_20210918_S05_S09_01_scene_sub_2_02591.ply 21 | recording_20210918_S06_S05_01_scene_sub_2_02831.ply 22 | recording_20210918_S06_S05_01_scene_sub_2_02891.ply 23 | recording_20210918_S06_S05_01_scene_sub_2_02981.ply 24 | recording_20210918_S06_S05_01_scene_sub_2_03071.ply 25 | recording_20210918_S06_S05_02_scene_sub_2_03721.ply 26 | recording_20210918_S06_S05_02_scene_sub_2_03781.ply 27 | recording_20210918_S06_S05_02_scene_sub_2_03841.ply 28 | recording_20210918_S06_S05_03_scene_main_01301.ply 29 | recording_20210918_S06_S05_03_scene_sub_2_00971.ply 30 | recording_20210918_S06_S05_03_scene_sub_2_01271.ply 31 | recording_20210918_S06_S05_03_scene_sub_2_01331.ply 32 | recording_20210918_S06_S05_03_scene_sub_2_01451.ply 33 | recording_20210918_S09_S05_01_scene_sub_2_01881.ply 34 | recording_20210918_S09_S05_01_scene_sub_2_02001.ply 35 | recording_20210918_S09_S05_01_scene_sub_2_02421.ply 36 | recording_20210918_S09_S05_01_scene_sub_2_02481.ply 37 | recording_20210918_S09_S05_02_scene_sub_2_03091.ply 38 | recording_20210918_S09_S05_02_scene_sub_2_03181.ply 39 | recording_20210918_S09_S05_02_scene_sub_2_03781.ply 40 | recording_20210918_S09_S05_02_scene_sub_2_03931.ply 41 | recording_20210918_S09_S05_02_scene_sub_2_04231.ply 42 | recording_20210918_S09_S05_03_scene_sub_2_01751.ply 43 | recording_20210918_S09_S05_03_scene_sub_2_02291.ply 44 | recording_20210918_S09_S05_03_scene_sub_2_02441.ply 45 | recording_20210918_S09_S05_03_scene_sub_2_02501.ply 46 | recording_20210923_S05_S13_01_scene_sub_1_03521.ply 47 | recording_20210923_S05_S13_01_scene_sub_1_03551.ply 48 | recording_20210923_S13_S05_01_scene_sub_1_02811.ply 49 | recording_20210923_S13_S05_01_scene_sub_1_03471.ply 50 | recording_20210923_S13_S05_01_scene_sub_1_03501.ply 51 | recording_20210929_S05_S16_02_scene_sub_1_02831.ply 52 | recording_20210929_S05_S16_02_scene_sub_1_03041.ply 53 | recording_20210929_S05_S16_04_scene_sub_1_02771.ply 54 | recording_20210929_S16_S05_01_scene_sub_1_03301.ply 55 | recording_20211004_S19_S06_01_scene_sub_1_03161.ply 56 | recording_20211004_S19_S06_01_scene_sub_1_03671.ply 57 | recording_20211004_S19_S06_01_scene_sub_1_03821.ply 58 | recording_20211004_S19_S06_01_scene_sub_1_04661.ply 59 | recording_20211004_S19_S06_02_scene_sub_1_03001.ply 60 | recording_20211004_S19_S06_02_scene_sub_1_03421.ply 61 | recording_20211004_S19_S06_03_scene_main_03781.ply 62 | recording_20211004_S19_S06_03_scene_sub_1_04171.ply 63 | recording_20211004_S19_S06_04_scene_sub_1_02181.ply 64 | recording_20211004_S19_S06_04_scene_sub_1_02211.ply 65 | recording_20211004_S19_S06_04_scene_sub_1_02931.ply 66 | recording_20220415_S35_S36_01_scene_sub_3_02391.ply 67 | recording_20220415_S35_S36_01_scene_sub_3_03291.ply 68 | recording_20220415_S35_S36_02_scene_sub_4_02351.ply 69 | recording_20220415_S35_S36_02_scene_sub_4_03011.ply 70 | recording_20220415_S36_S35_01_scene_sub_1_03681.ply 71 | recording_20220415_S36_S35_01_scene_sub_2_02151.ply 72 | recording_20220415_S36_S35_01_scene_sub_2_02241.ply 73 | recording_20220415_S36_S35_01_scene_sub_3_01851.ply 74 | recording_20220415_S36_S35_01_scene_sub_3_03711.ply 75 | recording_20220415_S36_S35_01_scene_sub_4_01401.ply 76 | recording_20220415_S36_S35_01_scene_sub_4_01791.ply 77 | recording_20220415_S36_S35_01_scene_sub_4_02511.ply 78 | recording_20220415_S36_S35_02_scene_sub_3_02101.ply -------------------------------------------------------------------------------- /models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from MinkowskiEngine import MinkowskiReLU 3 | 4 | from models.modules.common import ConvType, NormType, conv, get_norm 5 | 6 | 7 | class BasicBlockBase(nn.Module): 8 | expansion = 1 9 | NORM_TYPE = NormType.BATCH_NORM 10 | 11 | def __init__( 12 | self, 13 | inplanes, 14 | planes, 15 | stride=1, 16 | dilation=1, 17 | downsample=None, 18 | conv_type=ConvType.HYPERCUBE, 19 | bn_momentum=0.1, 20 | D=3, 21 | ): 22 | super().__init__() 23 | 24 | self.conv1 = conv( 25 | inplanes, 26 | planes, 27 | kernel_size=3, 28 | stride=stride, 29 | dilation=dilation, 30 | conv_type=conv_type, 31 | D=D, 32 | ) 33 | self.norm1 = get_norm( 34 | self.NORM_TYPE, planes, D, bn_momentum=bn_momentum 35 | ) 36 | self.conv2 = conv( 37 | planes, 38 | planes, 39 | kernel_size=3, 40 | stride=1, 41 | dilation=dilation, 42 | bias=False, 43 | conv_type=conv_type, 44 | D=D, 45 | ) 46 | self.norm2 = get_norm( 47 | self.NORM_TYPE, planes, D, bn_momentum=bn_momentum 48 | ) 49 | self.relu = MinkowskiReLU(inplace=True) 50 | self.downsample = downsample 51 | 52 | def forward(self, x): 53 | residual = x 54 | 55 | out = self.conv1(x) 56 | out = self.norm1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.norm2(out) 61 | 62 | if self.downsample is not None: 63 | residual = self.downsample(x) 64 | 65 | out += residual 66 | out = self.relu(out) 67 | 68 | return out 69 | 70 | 71 | class BasicBlock(BasicBlockBase): 72 | NORM_TYPE = NormType.BATCH_NORM 73 | 74 | 75 | class BasicBlockIN(BasicBlockBase): 76 | NORM_TYPE = NormType.INSTANCE_NORM 77 | 78 | 79 | class BasicBlockINBN(BasicBlockBase): 80 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 81 | 82 | 83 | class BottleneckBase(nn.Module): 84 | expansion = 4 85 | NORM_TYPE = NormType.BATCH_NORM 86 | 87 | def __init__( 88 | self, 89 | inplanes, 90 | planes, 91 | stride=1, 92 | dilation=1, 93 | downsample=None, 94 | conv_type=ConvType.HYPERCUBE, 95 | bn_momentum=0.1, 96 | D=3, 97 | ): 98 | super().__init__() 99 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 100 | self.norm1 = get_norm( 101 | self.NORM_TYPE, planes, D, bn_momentum=bn_momentum 102 | ) 103 | 104 | self.conv2 = conv( 105 | planes, 106 | planes, 107 | kernel_size=3, 108 | stride=stride, 109 | dilation=dilation, 110 | conv_type=conv_type, 111 | D=D, 112 | ) 113 | self.norm2 = get_norm( 114 | self.NORM_TYPE, planes, D, bn_momentum=bn_momentum 115 | ) 116 | 117 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 118 | self.norm3 = get_norm( 119 | self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum 120 | ) 121 | 122 | self.relu = MinkowskiReLU(inplace=True) 123 | self.downsample = downsample 124 | 125 | def forward(self, x): 126 | residual = x 127 | 128 | out = self.conv1(x) 129 | out = self.norm1(out) 130 | out = self.relu(out) 131 | 132 | out = self.conv2(out) 133 | out = self.norm2(out) 134 | out = self.relu(out) 135 | 136 | out = self.conv3(out) 137 | out = self.norm3(out) 138 | 139 | if self.downsample is not None: 140 | residual = self.downsample(x) 141 | 142 | out += residual 143 | out = self.relu(out) 144 | 145 | return out 146 | 147 | 148 | class Bottleneck(BottleneckBase): 149 | NORM_TYPE = NormType.BATCH_NORM 150 | 151 | 152 | class BottleneckIN(BottleneckBase): 153 | NORM_TYPE = NormType.INSTANCE_NORM 154 | 155 | 156 | class BottleneckINBN(BottleneckBase): 157 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 158 | -------------------------------------------------------------------------------- /models/metrics/confusionmatrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class ConfusionMatrix: 6 | """Constructs a confusion matrix for a multi-class classification problems. 7 | 8 | Does not support multi-label, multi-class problems. 9 | 10 | Keyword arguments: 11 | - num_classes (int): number of classes in the classification problem. 12 | - normalized (boolean, optional): Determines whether or not the confusion 13 | matrix is normalized or not. Default: False. 14 | 15 | Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter/confusionmeter.py 16 | """ 17 | 18 | def __init__(self, num_classes, ignore_label): 19 | super().__init__() 20 | 21 | self.conf = np.ndarray((num_classes, num_classes), dtype=np.int32) 22 | self.ignore_label = ignore_label 23 | self.num_classes = num_classes 24 | self.reset() 25 | 26 | def reset(self): 27 | self.conf.fill(0) 28 | 29 | def add(self, predicted, target): 30 | """Computes the confusion matrix 31 | 32 | The shape of the confusion matrix is K x K, where K is the number 33 | of classes. 34 | 35 | Keyword arguments: 36 | - predicted (Tensor or numpy.ndarray): Can be an N x K tensor/array of 37 | predicted scores obtained from the model for N examples and K classes, 38 | or an N-tensor/array of integer values between 0 and K-1. 39 | - target (Tensor or numpy.ndarray): Can be an N x K tensor/array of 40 | ground-truth classes for N examples and K classes, or an N-tensor/array 41 | of integer values between 0 and K-1. 42 | 43 | """ 44 | # _, predicted = predicted.max(1) 45 | 46 | # predicted = predicted.view(-1) 47 | # target = target.view(-1) 48 | 49 | # If target and/or predicted are tensors, convert them to numpy arrays 50 | if torch.is_tensor(predicted): 51 | predicted = predicted.cpu().numpy() 52 | if torch.is_tensor(target): 53 | target = target.cpu().numpy() 54 | ind = ~np.isin(target, self.ignore_label) 55 | predicted, target = predicted[ind], target[ind] 56 | 57 | assert ( 58 | predicted.shape[0] == target.shape[0] 59 | ), "number of targets and predicted outputs do not match" 60 | 61 | if np.ndim(predicted) != 1: 62 | assert ( 63 | predicted.shape[1] == self.num_classes 64 | ), "number of predictions does not match size of confusion matrix" 65 | predicted = np.argmax(predicted, 1) 66 | else: 67 | assert (predicted.max() < self.num_classes) and ( 68 | predicted.min() >= 0 69 | ), "predicted values are not between 0 and k-1" 70 | 71 | if np.ndim(target) != 1: 72 | assert ( 73 | target.shape[1] == self.num_classes 74 | ), "Onehot target does not match size of confusion matrix" 75 | assert (target >= 0).all() and ( 76 | target <= 1 77 | ).all(), "in one-hot encoding, target values should be 0 or 1" 78 | assert ( 79 | target.sum(1) == 1 80 | ).all(), "multi-label setting is not supported" 81 | target = np.argmax(target, 1) 82 | else: 83 | assert (target.max() < self.num_classes) and ( 84 | target.min() >= 0 85 | ), "target values are not between 0 and k-1" 86 | 87 | # hack for bincounting 2 arrays together 88 | x = predicted + self.num_classes * target 89 | bincount_2d = np.bincount( 90 | x.astype(np.int32), minlength=self.num_classes**2 91 | ) 92 | assert bincount_2d.size == self.num_classes**2 93 | conf = bincount_2d.reshape((self.num_classes, self.num_classes)) 94 | 95 | self.conf += conf 96 | 97 | def value(self, normalized=False): 98 | """ 99 | Returns: 100 | Confustion matrix of K rows and K columns, where rows corresponds 101 | to ground-truth targets and columns corresponds to predicted 102 | targets. 103 | """ 104 | if normalized: 105 | conf = self.conf.astype(np.float32) 106 | return conf / conf.sum(1).clip(min=1e-12)[:, None] 107 | return self.conf 108 | -------------------------------------------------------------------------------- /models/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py 3 | """ 4 | Misc functions, including distributed helpers. 5 | 6 | Mostly copy-paste from torchvision references. 7 | """ 8 | from typing import List, Optional 9 | 10 | import torch 11 | import torch.distributed as dist 12 | import torchvision 13 | from torch import Tensor 14 | 15 | 16 | def _max_by_axis(the_list): 17 | # type: (List[List[int]]) -> List[int] 18 | maxes = the_list[0] 19 | for sublist in the_list[1:]: 20 | for index, item in enumerate(sublist): 21 | maxes[index] = max(maxes[index], item) 22 | return maxes 23 | 24 | 25 | class NestedTensor(object): 26 | def __init__(self, tensors, mask: Optional[Tensor]): 27 | self.tensors = tensors 28 | self.mask = mask 29 | 30 | def to(self, device): 31 | # type: (Device) -> NestedTensor # noqa 32 | cast_tensor = self.tensors.to(device) 33 | mask = self.mask 34 | if mask is not None: 35 | assert mask is not None 36 | cast_mask = mask.to(device) 37 | else: 38 | cast_mask = None 39 | return NestedTensor(cast_tensor, cast_mask) 40 | 41 | def decompose(self): 42 | return self.tensors, self.mask 43 | 44 | def __repr__(self): 45 | return str(self.tensors) 46 | 47 | 48 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): 49 | # TODO make this more general 50 | if tensor_list[0].ndim == 3: 51 | if torchvision._is_tracing(): 52 | # nested_tensor_from_tensor_list() does not export well to ONNX 53 | # call _onnx_nested_tensor_from_tensor_list() instead 54 | return _onnx_nested_tensor_from_tensor_list(tensor_list) 55 | 56 | # TODO make it support different-sized images 57 | max_size = _max_by_axis([list(img.shape) for img in tensor_list]) 58 | # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) 59 | batch_shape = [len(tensor_list)] + max_size 60 | b, c, h, w = batch_shape 61 | dtype = tensor_list[0].dtype 62 | device = tensor_list[0].device 63 | tensor = torch.zeros(batch_shape, dtype=dtype, device=device) 64 | mask = torch.ones((b, h, w), dtype=torch.bool, device=device) 65 | for img, pad_img, m in zip(tensor_list, tensor, mask): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | m[: img.shape[1], : img.shape[2]] = False 68 | else: 69 | raise ValueError("not supported") 70 | return NestedTensor(tensor, mask) 71 | 72 | 73 | # _onnx_nested_tensor_from_tensor_list() is an implementation of 74 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing. 75 | @torch.jit.unused 76 | def _onnx_nested_tensor_from_tensor_list( 77 | tensor_list: List[Tensor], 78 | ) -> NestedTensor: 79 | max_size = [] 80 | for i in range(tensor_list[0].dim()): 81 | max_size_i = torch.max( 82 | torch.stack([img.shape[i] for img in tensor_list]).to( 83 | torch.float32 84 | ) 85 | ).to(torch.int64) 86 | max_size.append(max_size_i) 87 | max_size = tuple(max_size) 88 | 89 | # work around for 90 | # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 91 | # m[: img.shape[1], :img.shape[2]] = False 92 | # which is not yet supported in onnx 93 | padded_imgs = [] 94 | padded_masks = [] 95 | for img in tensor_list: 96 | padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] 97 | padded_img = torch.nn.functional.pad( 98 | img, (0, padding[2], 0, padding[1], 0, padding[0]) 99 | ) 100 | padded_imgs.append(padded_img) 101 | 102 | m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) 103 | padded_mask = torch.nn.functional.pad( 104 | m, (0, padding[2], 0, padding[1]), "constant", 1 105 | ) 106 | padded_masks.append(padded_mask.to(torch.bool)) 107 | 108 | tensor = torch.stack(padded_imgs) 109 | mask = torch.stack(padded_masks) 110 | 111 | return NestedTensor(tensor, mask=mask) 112 | 113 | 114 | def is_dist_avail_and_initialized(): 115 | if not dist.is_available(): 116 | return False 117 | if not dist.is_initialized(): 118 | return False 119 | return True 120 | -------------------------------------------------------------------------------- /utils/pointops2/src/attention/attention_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include "../cuda_utils.h" 2 | #include "attention_cuda_kernel.h" 3 | 4 | 5 | __global__ void attention_step1_forward_cuda_kernel( // M, h, C//h 6 | int N, int M, int h, int C, const float *q, const float *k, 7 | const int *index0, const int *index1, float *attn) { 8 | 9 | int c_idx = blockIdx.z; 10 | int h_idx = blockIdx.y; 11 | int m_idx = blockIdx.x * blockDim.x + threadIdx.x; 12 | if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; 13 | 14 | int idx0 = index0[m_idx]; 15 | int idx1 = index1[m_idx]; 16 | float val = q[idx0*C+h_idx*C/h+c_idx] * k[idx1*C+h_idx*C/h+c_idx]; 17 | atomicAdd(attn+m_idx*h+h_idx, val); 18 | } 19 | 20 | __global__ void attention_step1_backward_cuda_kernel( // M, h, C//h 21 | int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, 22 | float *grad_q, float *grad_k) { 23 | 24 | int c_idx = blockIdx.z; 25 | int h_idx = blockIdx.y; 26 | int m_idx = blockIdx.x * blockDim.x + threadIdx.x; 27 | if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; 28 | 29 | int idx0 = index0[m_idx]; 30 | int idx1 = index1[m_idx]; 31 | int grad_out_idx = m_idx*h+h_idx; 32 | int q_idx = idx0*C+h_idx*C/h+c_idx; 33 | int k_idx = idx1*C+h_idx*C/h+c_idx; 34 | atomicAdd(grad_q+q_idx, grad_out[grad_out_idx] * k[k_idx]); 35 | atomicAdd(grad_k+k_idx, grad_out[grad_out_idx] * q[q_idx]); 36 | } 37 | 38 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, 39 | const int *index0, const int *index1, float *attn) { 40 | // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) 41 | //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); 42 | dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); 43 | dim3 threads(THREADS_PER_BLOCK); 44 | attention_step1_forward_cuda_kernel<<>>(N, M, h, C, q, k, index0, index1, attn); 45 | } 46 | 47 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, 48 | const float *q, const float *k, float *grad_q, float *grad_k) { 49 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 50 | //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); 51 | dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); 52 | dim3 threads(THREADS_PER_BLOCK); 53 | attention_step1_backward_cuda_kernel<<>>(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); 54 | } 55 | 56 | __global__ void attention_step2_forward_cuda_kernel( // M, h, C//h 57 | int N, int M, int h, int C, const float *attn, const float *v, 58 | const int *index0, const int *index1, float *output) { 59 | 60 | int c_idx = blockIdx.z; 61 | int h_idx = blockIdx.y; 62 | int m_idx = blockIdx.x * blockDim.x + threadIdx.x; 63 | if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; 64 | 65 | int idx1 = index1[m_idx]; 66 | float val = attn[m_idx*h+h_idx] * v[idx1*C+h_idx*C/h+c_idx]; 67 | int idx0 = index0[m_idx]; 68 | atomicAdd(output+idx0*C+h_idx*C/h+c_idx, val); 69 | } 70 | 71 | __global__ void attention_step2_backward_cuda_kernel( // M, h, C//h 72 | int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, 73 | float *grad_attn, float *grad_v) { 74 | 75 | int c_idx = blockIdx.z; 76 | int h_idx = blockIdx.y; 77 | int m_idx = blockIdx.x * blockDim.x + threadIdx.x; 78 | if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; 79 | 80 | int idx0 = index0[m_idx]; 81 | int idx1 = index1[m_idx]; 82 | int grad_out_idx = idx0*C+h_idx*C/h+c_idx; 83 | atomicAdd(grad_attn+m_idx*h+h_idx, grad_out[grad_out_idx] * v[idx1*C+h_idx*C/h+c_idx]); 84 | atomicAdd(grad_v+idx1*C+h_idx*C/h+c_idx, grad_out[grad_out_idx] * attn[m_idx*h+h_idx]); 85 | } 86 | 87 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, 88 | const int *index0, const int *index1, float *output) { 89 | // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) 90 | //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); 91 | dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); 92 | dim3 threads(THREADS_PER_BLOCK); 93 | attention_step2_forward_cuda_kernel<<>>(N, M, h, C, attn, v, index0, index1, output); 94 | } 95 | 96 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, 97 | const float *attn, const float *v, float *grad_attn, float *grad_v) { 98 | // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) 99 | //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); 100 | dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); 101 | dim3 threads(THREADS_PER_BLOCK); 102 | attention_step2_backward_cuda_kernel<<>>(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); 103 | } 104 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info[:2] >= (3, 8): 4 | from collections.abc import MutableMapping 5 | else: 6 | from collections import MutableMapping 7 | 8 | import torch 9 | from loguru import logger 10 | 11 | 12 | def flatten_dict(d, parent_key="", sep="_"): 13 | """ 14 | https://stackoverflow.com/questions/6027558/flatten-nested-dictionaries-compressing-keys 15 | """ 16 | items = [] 17 | for k, v in d.items(): 18 | new_key = parent_key + sep + k if parent_key else k 19 | if isinstance(v, MutableMapping): 20 | items.extend(flatten_dict(v, new_key, sep=sep).items()) 21 | else: 22 | items.append((new_key, v)) 23 | return dict(items) 24 | 25 | 26 | def load_baseline_model(cfg, model): 27 | # if it is Minkoski weights 28 | cfg.model.in_channels = 3 29 | cfg.model.config.conv1_kernel_size = 5 30 | cfg.data.add_normals = False 31 | cfg.data.train_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)] 32 | cfg.data.validation_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)] 33 | cfg.data.test_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)] 34 | cfg.data.voxel_size = 0.02 35 | model = model(cfg) 36 | state_dict = torch.load(cfg.general.checkpoint)["state_dict"] 37 | model.model.load_state_dict(state_dict) 38 | return cfg, model 39 | 40 | 41 | def load_backbone_checkpoint_with_missing_or_exsessive_keys(cfg, model): 42 | state_dict = torch.load(cfg.general.backbone_checkpoint)["state_dict"] 43 | correct_dict = dict(model.state_dict()) 44 | 45 | # if parametrs not found in checkpoint they will be randomly initialized 46 | for key in state_dict.keys(): 47 | if correct_dict.pop(f"model.backbone.{key}", None) is None: 48 | logger.warning( 49 | f"Key not found, it will be initialized randomly: {key}" 50 | ) 51 | 52 | # if parametrs have different shape, it will randomly initialize 53 | state_dict = torch.load(cfg.general.backbone_checkpoint)["state_dict"] 54 | correct_dict = dict(model.state_dict()) 55 | for key in correct_dict.keys(): 56 | if key.replace("model.backbone.", "") not in state_dict: 57 | logger.warning(f"{key} not in loaded checkpoint") 58 | state_dict.update( 59 | {key.replace("model.backbone.", ""): correct_dict[key]} 60 | ) 61 | elif ( 62 | state_dict[key.replace("model.backbone.", "")].shape 63 | != correct_dict[key].shape 64 | ): 65 | logger.warning( 66 | f"incorrect shape {key}:{state_dict[key.replace('model.backbone.', '')].shape} vs {correct_dict[key].shape}" 67 | ) 68 | state_dict.update({key: correct_dict[key]}) 69 | 70 | # if we have more keys just discard them 71 | correct_dict = dict(model.state_dict()) 72 | new_state_dict = dict() 73 | for key in state_dict.keys(): 74 | if f"model.backbone.{key}" in correct_dict.keys(): 75 | new_state_dict.update({f"model.backbone.{key}": state_dict[key]}) 76 | elif key in correct_dict.keys(): 77 | new_state_dict.update({key: correct_dict[key]}) 78 | else: 79 | logger.warning(f"excessive key: {key}") 80 | model.load_state_dict(new_state_dict) 81 | return cfg, model 82 | 83 | 84 | def load_checkpoint_with_missing_or_exsessive_keys(cfg, model): 85 | state_dict = torch.load(cfg.general.checkpoint)["state_dict"] 86 | correct_dict = dict(model.state_dict()) 87 | 88 | # if parametrs not found in checkpoint they will be randomly initialized 89 | for key in state_dict.keys(): 90 | if correct_dict.pop(key, None) is None: 91 | logger.warning( 92 | f"Key not found, it will be initialized randomly: {key}" 93 | ) 94 | 95 | # if parametrs have different shape, it will randomly initialize 96 | state_dict = torch.load(cfg.general.checkpoint)["state_dict"] 97 | correct_dict = dict(model.state_dict()) 98 | for key in correct_dict.keys(): 99 | if key not in state_dict: 100 | logger.warning(f"{key} not in loaded checkpoint") 101 | state_dict.update({key: correct_dict[key]}) 102 | elif state_dict[key].shape != correct_dict[key].shape: 103 | logger.warning( 104 | f"incorrect shape {key}:{state_dict[key].shape} vs {correct_dict[key].shape}" 105 | ) 106 | state_dict.update({key: correct_dict[key]}) 107 | 108 | # if we have more keys just discard them 109 | correct_dict = dict(model.state_dict()) 110 | new_state_dict = dict() 111 | for key in state_dict.keys(): 112 | if key in correct_dict.keys(): 113 | new_state_dict.update({key: state_dict[key]}) 114 | else: 115 | logger.warning(f"excessive key: {key}") 116 | model.load_state_dict(new_state_dict) 117 | return cfg, model 118 | 119 | 120 | def freeze_until(net, param_name: str = None): 121 | """ 122 | Freeze net until param_name 123 | https://opendatascience.slack.com/archives/CGK4KQBHD/p1588373239292300?thread_ts=1588105223.275700&cid=CGK4KQBHD 124 | Args: 125 | net: 126 | param_name: 127 | Returns: 128 | """ 129 | found_name = False 130 | for name, params in net.named_parameters(): 131 | if name == param_name: 132 | found_name = True 133 | params.requires_grad = found_name 134 | -------------------------------------------------------------------------------- /occlusion_subsets/split_test_occlusion_mid.txt: -------------------------------------------------------------------------------- 1 | recording_20210911_S07_S06_01_scene_main_02001.ply 2 | recording_20210918_S05_S06_01_scene_sub_1_03561.ply 3 | recording_20210918_S05_S06_01_scene_sub_2_02421.ply 4 | recording_20210918_S05_S06_01_scene_sub_2_02511.ply 5 | recording_20210918_S05_S06_01_scene_sub_2_02601.ply 6 | recording_20210918_S05_S06_01_scene_sub_2_03351.ply 7 | recording_20210918_S05_S06_02_scene_main_00991.ply 8 | recording_20210918_S05_S06_03_scene_main_02651.ply 9 | recording_20210918_S05_S06_03_scene_sub_1_01751.ply 10 | recording_20210918_S05_S06_04_scene_main_03271.ply 11 | recording_20210918_S05_S06_04_scene_main_04051.ply 12 | recording_20210918_S05_S06_04_scene_sub_1_03511.ply 13 | recording_20210918_S05_S06_04_scene_sub_1_03931.ply 14 | recording_20210918_S05_S06_04_scene_sub_2_03451.ply 15 | recording_20210918_S05_S06_04_scene_sub_2_03811.ply 16 | recording_20210918_S05_S06_05_scene_sub_1_03751.ply 17 | recording_20210918_S05_S06_05_scene_sub_2_01801.ply 18 | recording_20210918_S05_S06_05_scene_sub_2_02101.ply 19 | recording_20210918_S05_S09_01_scene_main_01571.ply 20 | recording_20210918_S05_S09_01_scene_sub_1_01391.ply 21 | recording_20210918_S05_S09_01_scene_sub_2_01421.ply 22 | recording_20210918_S05_S09_01_scene_sub_2_01451.ply 23 | recording_20210918_S06_S05_01_scene_sub_1_01811.ply 24 | recording_20210918_S06_S05_01_scene_sub_1_03011.ply 25 | recording_20210918_S06_S05_01_scene_sub_2_02021.ply 26 | recording_20210918_S06_S05_01_scene_sub_2_02081.ply 27 | recording_20210918_S06_S05_02_scene_main_03421.ply 28 | recording_20210918_S06_S05_02_scene_sub_1_03361.ply 29 | recording_20210918_S06_S05_02_scene_sub_2_03391.ply 30 | recording_20210918_S06_S05_02_scene_sub_2_03661.ply 31 | recording_20210918_S06_S05_03_scene_sub_1_00941.ply 32 | recording_20210918_S06_S05_03_scene_sub_2_00911.ply 33 | recording_20210918_S09_S05_01_scene_main_01761.ply 34 | recording_20210918_S09_S05_01_scene_sub_1_01581.ply 35 | recording_20210918_S09_S05_01_scene_sub_2_01611.ply 36 | recording_20210918_S09_S05_02_scene_main_04261.ply 37 | recording_20210918_S09_S05_02_scene_sub_1_04081.ply 38 | recording_20210918_S09_S05_02_scene_sub_1_04201.ply 39 | recording_20210918_S09_S05_03_scene_main_01961.ply 40 | recording_20210918_S09_S05_03_scene_main_02351.ply 41 | recording_20210918_S09_S05_03_scene_sub_1_01901.ply 42 | recording_20210918_S09_S05_03_scene_sub_1_01931.ply 43 | recording_20210923_S05_S13_01_scene_main_03161.ply 44 | recording_20210923_S05_S13_01_scene_main_03371.ply 45 | recording_20210923_S05_S13_01_scene_sub_2_03101.ply 46 | recording_20210923_S05_S13_01_scene_sub_2_03911.ply 47 | recording_20210923_S13_S05_01_scene_main_01701.ply 48 | recording_20210923_S13_S05_01_scene_main_03471.ply 49 | recording_20210923_S13_S05_01_scene_sub_1_02061.ply 50 | recording_20210929_S05_S16_01_scene_sub_1_01181.ply 51 | recording_20210929_S05_S16_01_scene_sub_1_02321.ply 52 | recording_20210929_S05_S16_03_scene_sub_1_00991.ply 53 | recording_20210929_S05_S16_03_scene_sub_1_01261.ply 54 | recording_20210929_S05_S16_03_scene_sub_2_01081.ply 55 | recording_20210929_S05_S16_04_scene_sub_1_02381.ply 56 | recording_20210929_S05_S16_04_scene_sub_1_02471.ply 57 | recording_20210929_S16_S05_01_scene_sub_1_02131.ply 58 | recording_20210929_S16_S05_01_scene_sub_2_01201.ply 59 | recording_20210929_S16_S05_01_scene_sub_2_02251.ply 60 | recording_20210929_S16_S05_01_scene_sub_2_03841.ply 61 | recording_20210929_S16_S05_01_scene_sub_2_04651.ply 62 | recording_20211004_S19_S06_01_scene_main_04301.ply 63 | recording_20211004_S19_S06_01_scene_sub_1_04271.ply 64 | recording_20211004_S19_S06_01_scene_sub_2_04391.ply 65 | recording_20211004_S19_S06_01_scene_sub_2_04541.ply 66 | recording_20211004_S19_S06_02_scene_sub_1_02911.ply 67 | recording_20211004_S19_S06_02_scene_sub_2_02461.ply 68 | recording_20211004_S19_S06_02_scene_sub_2_02611.ply 69 | recording_20211004_S19_S06_02_scene_sub_2_03421.ply 70 | recording_20211004_S19_S06_02_scene_sub_2_03481.ply 71 | recording_20211004_S19_S06_03_scene_main_04111.ply 72 | recording_20211004_S19_S06_03_scene_main_04171.ply 73 | recording_20211004_S19_S06_03_scene_sub_1_03781.ply 74 | recording_20211004_S19_S06_03_scene_sub_1_04021.ply 75 | recording_20211004_S19_S06_03_scene_sub_1_04051.ply 76 | recording_20211004_S19_S06_03_scene_sub_2_03781.ply 77 | recording_20211004_S19_S06_04_scene_main_02481.ply 78 | recording_20211004_S19_S06_04_scene_main_02901.ply 79 | recording_20211004_S19_S06_04_scene_sub_1_02061.ply 80 | recording_20211004_S19_S06_04_scene_sub_1_02511.ply 81 | recording_20211004_S19_S06_04_scene_sub_1_02661.ply 82 | recording_20211004_S19_S06_05_scene_main_03111.ply 83 | recording_20211004_S19_S06_05_scene_main_03141.ply 84 | recording_20211004_S19_S06_05_scene_sub_1_03441.ply 85 | recording_20211004_S19_S06_05_scene_sub_1_03501.ply 86 | recording_20211004_S19_S06_05_scene_sub_1_03531.ply 87 | recording_20211004_S19_S06_05_scene_sub_2_03531.ply 88 | recording_20211004_S19_S06_05_scene_sub_2_03741.ply 89 | recording_20220415_S35_S36_01_scene_sub_4_01461.ply 90 | recording_20220415_S35_S36_01_scene_sub_4_01941.ply 91 | recording_20220415_S35_S36_01_scene_sub_4_03411.ply 92 | recording_20220415_S35_S36_02_scene_sub_1_02201.ply 93 | recording_20220415_S35_S36_02_scene_sub_1_02531.ply 94 | recording_20220415_S35_S36_02_scene_sub_3_02771.ply 95 | recording_20220415_S35_S36_02_scene_sub_3_02801.ply 96 | recording_20220415_S35_S36_02_scene_sub_3_03101.ply 97 | recording_20220415_S35_S36_02_scene_sub_4_02321.ply 98 | recording_20220415_S36_S35_02_scene_main_01801.ply 99 | recording_20220415_S36_S35_02_scene_main_02551.ply 100 | recording_20220415_S36_S35_02_scene_main_02881.ply 101 | recording_20220415_S36_S35_02_scene_sub_1_02161.ply 102 | recording_20220415_S36_S35_02_scene_sub_1_03571.ply 103 | recording_20220415_S36_S35_02_scene_sub_2_03751.ply 104 | recording_20220415_S36_S35_02_scene_sub_4_03751.ply -------------------------------------------------------------------------------- /third_party/pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: unknown(b, n, 3) known(b, m, 3) 11 | // output: dist2(b, n, 3), idx(b, n, 3) 12 | __global__ void three_nn_kernel(int b, int n, int m, 13 | const float *__restrict__ unknown, 14 | const float *__restrict__ known, 15 | float *__restrict__ dist2, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | unknown += batch_index * n * 3; 19 | known += batch_index * m * 3; 20 | dist2 += batch_index * n * 3; 21 | idx += batch_index * n * 3; 22 | 23 | int index = threadIdx.x; 24 | int stride = blockDim.x; 25 | for (int j = index; j < n; j += stride) { 26 | float ux = unknown[j * 3 + 0]; 27 | float uy = unknown[j * 3 + 1]; 28 | float uz = unknown[j * 3 + 2]; 29 | 30 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 31 | int besti1 = 0, besti2 = 0, besti3 = 0; 32 | for (int k = 0; k < m; ++k) { 33 | float x = known[k * 3 + 0]; 34 | float y = known[k * 3 + 1]; 35 | float z = known[k * 3 + 2]; 36 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 37 | if (d < best1) { 38 | best3 = best2; 39 | besti3 = besti2; 40 | best2 = best1; 41 | besti2 = besti1; 42 | best1 = d; 43 | besti1 = k; 44 | } else if (d < best2) { 45 | best3 = best2; 46 | besti3 = besti2; 47 | best2 = d; 48 | besti2 = k; 49 | } else if (d < best3) { 50 | best3 = d; 51 | besti3 = k; 52 | } 53 | } 54 | dist2[j * 3 + 0] = best1; 55 | dist2[j * 3 + 1] = best2; 56 | dist2[j * 3 + 2] = best3; 57 | 58 | idx[j * 3 + 0] = besti1; 59 | idx[j * 3 + 1] = besti2; 60 | idx[j * 3 + 2] = besti3; 61 | } 62 | } 63 | 64 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 65 | const float *known, float *dist2, int *idx) { 66 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 67 | three_nn_kernel<<>>(b, n, m, unknown, known, 68 | dist2, idx); 69 | 70 | CUDA_CHECK_ERRORS(); 71 | } 72 | 73 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 74 | // output: out(b, c, n) 75 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 76 | const float *__restrict__ points, 77 | const int *__restrict__ idx, 78 | const float *__restrict__ weight, 79 | float *__restrict__ out) { 80 | int batch_index = blockIdx.x; 81 | points += batch_index * m * c; 82 | 83 | idx += batch_index * n * 3; 84 | weight += batch_index * n * 3; 85 | 86 | out += batch_index * n * c; 87 | 88 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 89 | const int stride = blockDim.y * blockDim.x; 90 | for (int i = index; i < c * n; i += stride) { 91 | const int l = i / n; 92 | const int j = i % n; 93 | float w1 = weight[j * 3 + 0]; 94 | float w2 = weight[j * 3 + 1]; 95 | float w3 = weight[j * 3 + 2]; 96 | 97 | int i1 = idx[j * 3 + 0]; 98 | int i2 = idx[j * 3 + 1]; 99 | int i3 = idx[j * 3 + 2]; 100 | 101 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 102 | points[l * m + i3] * w3; 103 | } 104 | } 105 | 106 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 107 | const float *points, const int *idx, 108 | const float *weight, float *out) { 109 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 110 | three_interpolate_kernel<<>>( 111 | b, c, m, n, points, idx, weight, out); 112 | 113 | CUDA_CHECK_ERRORS(); 114 | } 115 | 116 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 117 | // output: grad_points(b, c, m) 118 | 119 | __global__ void three_interpolate_grad_kernel( 120 | int b, int c, int n, int m, const float *__restrict__ grad_out, 121 | const int *__restrict__ idx, const float *__restrict__ weight, 122 | float *__restrict__ grad_points) { 123 | int batch_index = blockIdx.x; 124 | grad_out += batch_index * n * c; 125 | idx += batch_index * n * 3; 126 | weight += batch_index * n * 3; 127 | grad_points += batch_index * m * c; 128 | 129 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 130 | const int stride = blockDim.y * blockDim.x; 131 | for (int i = index; i < c * n; i += stride) { 132 | const int l = i / n; 133 | const int j = i % n; 134 | float w1 = weight[j * 3 + 0]; 135 | float w2 = weight[j * 3 + 1]; 136 | float w3 = weight[j * 3 + 2]; 137 | 138 | int i1 = idx[j * 3 + 0]; 139 | int i2 = idx[j * 3 + 1]; 140 | int i3 = idx[j * 3 + 2]; 141 | 142 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 143 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 144 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 145 | } 146 | } 147 | 148 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 149 | const float *grad_out, 150 | const int *idx, const float *weight, 151 | float *grad_points) { 152 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 153 | three_interpolate_grad_kernel<<>>( 154 | b, c, n, m, grad_out, idx, weight, grad_points); 155 | 156 | CUDA_CHECK_ERRORS(); 157 | } 158 | --------------------------------------------------------------------------------