├── conf
    ├── __init__.py
    ├── optimizer
    │   └── adamw.yaml
    ├── metrics
    │   └── miou.yaml
    ├── matcher
    │   └── hungarian_matcher.yaml
    ├── trainer
    │   └── trainer600.yaml
    ├── logging
    │   └── full.yaml
    ├── loss
    │   ├── set_criterion.yaml
    │   └── set_criterion_hp.yaml
    ├── scheduler
    │   └── onecyclelr.yaml
    ├── callbacks
    │   ├── callbacks_instance_segmentation.yaml
    │   └── callbacks_instance_segmentation_human.yaml
    ├── data
    │   ├── data_loaders
    │   │   └── simple_loader.yaml
    │   ├── indoor.yaml
    │   ├── collation_functions
    │   │   └── voxelize_collate.yaml
    │   └── datasets
    │   │   ├── egobody.yaml
    │   │   └── synthetic_humans.yaml
    ├── augmentation
    │   ├── albumentations_aug.yaml
    │   └── volumentations_aug.yaml
    ├── model
    │   ├── mask3d.yaml
    │   └── mask3d_hp.yaml
    └── config_base_instance_segmentation.yaml
├── utils
    ├── __init__.py
    ├── pointops2
    │   ├── __init__.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── sampling
    │   │   │   ├── sampling_cuda_kernel.h
    │   │   │   └── sampling_cuda.cpp
    │   │   ├── knnquery
    │   │   │   ├── knnquery_cuda_kernel.h
    │   │   │   ├── knnquery_cuda.cpp
    │   │   │   └── knnquery_cuda_kernel.cu
    │   │   ├── cuda_utils.h
    │   │   ├── grouping
    │   │   │   ├── grouping_cuda_kernel.h
    │   │   │   ├── grouping_cuda.cpp
    │   │   │   └── grouping_cuda_kernel.cu
    │   │   ├── interpolation
    │   │   │   ├── interpolation_cuda_kernel.h
    │   │   │   ├── interpolation_cuda.cpp
    │   │   │   └── interpolation_cuda_kernel.cu
    │   │   ├── subtraction
    │   │   │   ├── subtraction_cuda_kernel.h
    │   │   │   ├── subtraction_cuda.cpp
    │   │   │   └── subtraction_cuda_kernel.cu
    │   │   ├── aggregation
    │   │   │   ├── aggregation_cuda_kernel.h
    │   │   │   ├── aggregation_cuda.cpp
    │   │   │   └── aggregation_cuda_kernel.cu
    │   │   ├── attention
    │   │   │   ├── attention_cuda_kernel.h
    │   │   │   ├── attention_cuda.cpp
    │   │   │   └── attention_cuda_kernel.cu
    │   │   ├── attention_v2
    │   │   │   ├── attention_cuda_kernel_v2.h
    │   │   │   └── attention_cuda_v2.cpp
    │   │   ├── rpe
    │   │   │   ├── relative_pos_encoding_cuda_kernel.h
    │   │   │   └── relative_pos_encoding_cuda.cpp
    │   │   ├── pointops_api.cpp
    │   │   └── rpe_v2
    │   │   │   └── relative_pos_encoding_cuda_kernel_v2.h
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── test_attention_op_step2.py
    │   │   ├── test_relative_pos_encoding_op_step1.py
    │   │   ├── test_relative_pos_encoding_op_step1_v2.py
    │   │   ├── test_relative_pos_encoding_op_step2.py
    │   │   ├── test_attention_op_step1.py
    │   │   ├── test_relative_pos_encoding_op_step1_v3.py
    │   │   ├── test_relative_pos_encoding_op_step2_v2.py
    │   │   └── test_attention_op_step1_v2.py
    │   └── setup.py
    ├── votenet_utils
    │   ├── tf_visualizer.py
    │   ├── tf_logger.py
    │   └── nn_distance.py
    ├── gradflow_check.py
    ├── point_cloud_utils.py
    ├── kfold.py
    └── utils.py
├── benchmark
    ├── __init__.py
    └── util.py
├── trainer
    └── __init__.py
├── models
    ├── modules
    │   ├── __init__.py
    │   ├── 3detr_helpers.py
    │   ├── helpers_3detr.py
    │   ├── senet_block.py
    │   └── resnet_block.py
    ├── metrics
    │   ├── __init__.py
    │   ├── metrics.py
    │   └── confusionmatrix.py
    ├── model.py
    ├── wrapper.py
    ├── __init__.py
    └── misc.py
├── docs
    └── assets
    │   └── teaser.jpg
├── third_party
    └── pointnet2
    │   ├── _ext_src
    │       ├── include
    │       │   ├── ball_query.h
    │       │   ├── group_points.h
    │       │   ├── sampling.h
    │       │   ├── interpolate.h
    │       │   ├── utils.h
    │       │   └── cuda_utils.h
    │       └── src
    │       │   ├── bindings.cpp
    │       │   ├── ball_query.cpp
    │       │   ├── ball_query_gpu.cu
    │       │   ├── group_points.cpp
    │       │   ├── sampling.cpp
    │       │   ├── group_points_gpu.cu
    │       │   ├── interpolate.cpp
    │       │   └── interpolate_gpu.cu
    │   ├── pointnet2_test.py
    │   └── setup.py
├── scripts
    ├── eval
    │   ├── eval_human3d.sh
    │   └── eval_mask3d.sh
    └── train
    │   ├── train_human3d.sh
    │   └── train_mask3d.sh
├── download_checkpoints.sh
├── datasets
    └── random_cuboid.py
├── main.py
├── .gitignore
└── occlusion_subsets
    ├── split_test_occlusion_high.txt
    └── split_test_occlusion_mid.txt


/conf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/pointops2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conf/optimizer/adamw.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: torch.optim.AdamW
3 | lr: 0.0001


--------------------------------------------------------------------------------
/docs/assets/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/human-3d/Human3D/HEAD/docs/assets/teaser.jpg


--------------------------------------------------------------------------------
/models/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .confusionmatrix import ConfusionMatrix
2 | from .metrics import IoU
3 | 
4 | __all__ = ["ConfusionMatrix", "IoU"]
5 | 


--------------------------------------------------------------------------------
/conf/metrics/miou.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: models.metrics.ConfusionMatrix
3 | num_classes: ${data.num_labels}
4 | ignore_label: ${data.ignore_label}
5 | 


--------------------------------------------------------------------------------
/conf/matcher/hungarian_matcher.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: models.matcher.HungarianMatcher
3 | cost_class: 2.
4 | cost_mask: 5.
5 | cost_dice: 2.
6 | num_points: -1
7 | 


--------------------------------------------------------------------------------
/conf/trainer/trainer600.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | deterministic: false
3 | max_epochs: 601
4 | min_epochs: 1
5 | resume_from_checkpoint: null
6 | check_val_every_n_epoch: 50
7 | num_sanity_val_steps: -1
8 | 


--------------------------------------------------------------------------------
/conf/logging/full.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | - _target_: pytorch_lightning.loggers.WandbLogger
3 |   project: ${general.project_name}
4 |   name: ${general.experiment_name}
5 |   save_dir: ${general.save_dir}
6 |   entity: "schult"
7 |   resume: "allow"
8 |   id: ${general.experiment_name}
9 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/ball_query.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | #pragma once
4 | #include <torch/extension.h>
5 | 
6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
7 |                       const int nsample);
8 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/group_points.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | 
4 | #pragma once
5 | #include <torch/extension.h>
6 | 
7 | at::Tensor group_points(at::Tensor points, at::Tensor idx);
8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
9 | 


--------------------------------------------------------------------------------
/conf/loss/set_criterion.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | _target_: models.criterion.SetCriterion
 3 | num_classes: ${general.num_targets}
 4 | eos_coef: 0.1
 5 | losses:
 6 |   - "labels"
 7 |   - "masks"
 8 | num_points: ${matcher.num_points}
 9 | oversample_ratio: 3.0
10 | importance_sample_ratio: 0.75
11 | class_weights: -1
12 | 


--------------------------------------------------------------------------------
/conf/scheduler/onecyclelr.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | 
 3 | scheduler:
 4 |   _target_: torch.optim.lr_scheduler.OneCycleLR
 5 |   max_lr: ${optimizer.lr}
 6 |   epochs: ${trainer.max_epochs}
 7 |   # need to set to number because of tensorboard logger
 8 |   steps_per_epoch: -1
 9 | 
10 | pytorch_lightning_params:
11 |   interval: step
12 | 


--------------------------------------------------------------------------------
/conf/callbacks/callbacks_instance_segmentation.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint
 3 |   monitor: val_AP_50_parts
 4 |   save_last: true
 5 |   save_top_k: 1
 6 |   mode: max
 7 |   dirpath: ${general.save_dir}
 8 |   filename: "{epoch}-{val_AP_50_parts:.3f}"
 9 |   every_n_epochs: 1
10 | 
11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor
12 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/sampling.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #pragma once
 5 | #include <torch/extension.h>
 6 | 
 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx);
 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
10 | 


--------------------------------------------------------------------------------
/conf/callbacks/callbacks_instance_segmentation_human.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | - _target_: pytorch_lightning.callbacks.ModelCheckpoint
 3 |   monitor: val_AP_50_human
 4 |   save_last: true
 5 |   save_top_k: 1
 6 |   mode: max
 7 |   dirpath: ${general.save_dir}
 8 |   filename: "{epoch}-{val_AP_50_parts:.3f}"
 9 |   every_n_epochs: 1
10 | 
11 | - _target_: pytorch_lightning.callbacks.LearningRateMonitor
12 | 


--------------------------------------------------------------------------------
/conf/loss/set_criterion_hp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | _target_: models.criterion_hp.SetCriterionHumanParts
 3 | num_classes: 2 # ${general.num_targets}
 4 | eos_coef: 0.1
 5 | losses:
 6 |   - "labels"
 7 |   - "masks"
 8 | num_points: ${matcher.num_points}
 9 | oversample_ratio: 3.0
10 | importance_sample_ratio: 0.75
11 | class_weights: -1
12 | num_human_queries: ${model.num_human_queries}
13 | num_parts_per_human_queries: ${model.num_parts_per_human_queries}
14 | num_parts: ${data.num_labels}
15 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/interpolate.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <torch/extension.h>
 6 | #include <vector>
 7 | 
 8 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
10 |                              at::Tensor weight);
11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
12 |                                   at::Tensor weight, const int m);
13 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/sampling/sampling_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUDA_KERNEL
 2 | #define _SAMPLING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/conf/data/data_loaders/simple_loader.yaml:
--------------------------------------------------------------------------------
 1 | # @package data
 2 | 
 3 | train_dataloader:
 4 |   _target_: torch.utils.data.DataLoader
 5 |   shuffle: true
 6 |   pin_memory: ${data.pin_memory}
 7 |   num_workers: ${data.num_workers}
 8 |   batch_size: ${data.batch_size}
 9 | 
10 | validation_dataloader:
11 |   _target_: torch.utils.data.DataLoader
12 |   shuffle: false
13 |   pin_memory: ${data.pin_memory}
14 |   num_workers: ${data.num_workers}
15 |   batch_size: ${data.test_batch_size}
16 | 
17 | test_dataloader:
18 |   _target_: torch.utils.data.DataLoader
19 |   shuffle: false
20 |   pin_memory: ${data.pin_memory}
21 |   num_workers: ${data.num_workers}
22 |   batch_size: ${data.test_batch_size}
23 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/knnquery/knnquery_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KNNQUERY_CUDA_KERNEL
 2 | #define _KNNQUERY_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | #endif
19 | 


--------------------------------------------------------------------------------
/scripts/eval/eval_human3d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python main.py \
 4 | general.experiment_name="Human3D_eval" \
 5 | general.project_name="human3d" \
 6 | data/datasets=egobody \
 7 | general.num_targets=16 \
 8 | data.num_labels=16 \
 9 | model=mask3d_hp \
10 | loss=set_criterion_hp \
11 | model.num_human_queries=5 \
12 | model.num_parts_per_human_queries=16 \
13 | trainer.check_val_every_n_epoch=1 \
14 | general.topk_per_image=-1 \
15 | model.non_parametric_queries=false \
16 | trainer.max_epochs=36 \
17 | data.batch_size=4 \
18 | data.num_workers=10 \
19 | general.reps_per_epoch=1 \
20 | model.config.backbone._target_=models.Res16UNet18B \
21 | general.checkpoint="checkpoints/human3d.ckpt" \
22 | general.train_mode=false \
23 | general.save_visualizations=false
24 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "ball_query.h"
 5 | #include "group_points.h"
 6 | #include "interpolate.h"
 7 | #include "sampling.h"
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("gather_points", &gather_points);
11 |   m.def("gather_points_grad", &gather_points_grad);
12 |   m.def("furthest_point_sampling", &furthest_point_sampling);
13 | 
14 |   m.def("three_nn", &three_nn);
15 |   m.def("three_interpolate", &three_interpolate);
16 |   m.def("three_interpolate_grad", &three_interpolate_grad);
17 | 
18 |   m.def("ball_query", &ball_query);
19 | 
20 |   m.def("group_points", &group_points);
21 |   m.def("group_points_grad", &group_points_grad);
22 | }
23 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/sampling/sampling_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "sampling_cuda_kernel.h"
 6 | 
 7 | 
 8 | void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
 9 | {
10 |     const float *xyz = xyz_tensor.data_ptr<float>();
11 |     const int *offset = offset_tensor.data_ptr<int>();
12 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
13 |     float *tmp = tmp_tensor.data_ptr<float>();
14 |     int *idx = idx_tensor.data_ptr<int>();
15 |     furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
16 | }
17 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | #include <algorithm>
 6 | 
 7 | #define TOTAL_THREADS 1024
 8 | #define THREADS_PER_BLOCK 256
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |     return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |     const int x_threads = opt_n_threads(x);
18 |     const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
19 |     dim3 block_config(x_threads, y_threads, 1);
20 |     return block_config;
21 | }
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/knnquery/knnquery_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "knnquery_cuda_kernel.h"
 6 | 
 7 | 
 8 | void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
 9 | {
10 |     const float *xyz = xyz_tensor.data_ptr<float>();
11 |     const float *new_xyz = new_xyz_tensor.data_ptr<float>();
12 |     const int *offset = offset_tensor.data_ptr<int>();
13 |     const int *new_offset = new_offset_tensor.data_ptr<int>();
14 |     int *idx = idx_tensor.data_ptr<int>();
15 |     float *dist2 = dist2_tensor.data_ptr<float>();
16 |     knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
17 | }
18 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/grouping/grouping_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUDA_KERNEL
 2 | #define _GROUPING_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
15 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/conf/augmentation/albumentations_aug.yaml:
--------------------------------------------------------------------------------
 1 | __version__: 0.4.5
 2 | transform:
 3 |   __class_fullname__: albumentations.core.composition.Compose
 4 |   additional_targets: {}
 5 |   bbox_params: null
 6 |   keypoint_params: null
 7 |   p: 1.0
 8 |   transforms:
 9 |     - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast
10 |       always_apply: true
11 |       brightness_by_max: true
12 |       brightness_limit:
13 |         - -0.2
14 |         - 0.2
15 |       contrast_limit:
16 |         - -0.2
17 |         - 0.2
18 |       p: 0.5
19 |     - __class_fullname__: albumentations.augmentations.transforms.RGBShift
20 |       always_apply: true
21 |       b_shift_limit:
22 |         - -20
23 |         - 20
24 |       g_shift_limit:
25 |         - -20
26 |         - 20
27 |       p: 0.5
28 |       r_shift_limit:
29 |         - -20
30 |         - 20
31 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
 1 | from MinkowskiEngine import MinkowskiNetwork
 2 | 
 3 | 
 4 | class Model(MinkowskiNetwork):
 5 |     """
 6 |     Base network for all sparse convnet
 7 | 
 8 |     By default, all networks are segmentation networks.
 9 |     """
10 | 
11 |     OUT_PIXEL_DIST = -1
12 | 
13 |     def __init__(self, in_channels, out_channels, config, D, **kwargs):
14 |         super().__init__(D)
15 |         self.in_channels = in_channels
16 |         self.out_channels = out_channels
17 |         self.config = config
18 | 
19 | 
20 | class HighDimensionalModel(Model):
21 |     """
22 |     Base network for all spatio (temporal) chromatic sparse convnet
23 |     """
24 | 
25 |     def __init__(self, in_channels, out_channels, config, D, **kwargs):
26 |         assert D > 4, "Num dimension smaller than 5"
27 |         super().__init__(in_channels, out_channels, config, D, **kwargs)
28 | 


--------------------------------------------------------------------------------
/scripts/eval/eval_mask3d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python main.py \
 4 | general.experiment_name="Mask3D_eval" \
 5 | general.project_name="mask3d_humanseg" \
 6 | data/datasets=egobody \
 7 | general.num_targets=16 \
 8 | data.num_labels=16 \
 9 | model=mask3d \
10 | loss=set_criterion \
11 | model.num_queries=5 \
12 | trainer.check_val_every_n_epoch=1 \
13 | general.topk_per_image=-1 \
14 | model.non_parametric_queries=false \
15 | trainer.max_epochs=36 \
16 | data.batch_size=4 \
17 | data.num_workers=10 \
18 | general.reps_per_epoch=1 \
19 | general.save_visualizations=false \
20 | model.config.backbone._target_=models.Res16UNet18B \
21 | data.part2human=true \
22 | loss.num_classes=2 \
23 | model.num_classes=2 \
24 | callbacks=callbacks_instance_segmentation_human \
25 | general.checkpoint="checkpoints/mask3d.ckpt" \
26 | general.train_mode=false \
27 | general.save_visualizations=false
28 | 


--------------------------------------------------------------------------------
/download_checkpoints.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if wget is installed
 4 | if ! command -v wget &> /dev/null; then
 5 |     echo "Error: wget is not installed. Please install wget and try again."
 6 |     exit 1
 7 | fi
 8 | 
 9 | # Directory to store the downloaded files
10 | DIR="checkpoints"
11 | 
12 | # Create the directory if it doesn't exist
13 | mkdir -p "$DIR"
14 | 
15 | # URLs of the files to be downloaded
16 | URL1="https://omnomnom.vision.rwth-aachen.de/data/human3d/checkpoints/mask3d.ckpt"
17 | URL2="https://omnomnom.vision.rwth-aachen.de/data/human3d/checkpoints/human3d.ckpt"
18 | 
19 | # Download the files using wget
20 | wget -P "$DIR" "$URL1"
21 | wget -P "$DIR" "$URL2"
22 | 
23 | # Print a success message if both files are downloaded successfully
24 | if [ $? -eq 0 ]; then
25 |     echo "Files downloaded successfully to $DIR/"
26 | else
27 |     echo "There was an error downloading the files."
28 | fi
29 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/interpolation/interpolation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATION_CUDA_KERNEL
 2 | #define _INTERPOLATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
 8 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
15 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/subtraction/subtraction_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SUBTRACTION_CUDA_KERNEL
 2 | #define _SUBTRACTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
15 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/grouping/grouping_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "grouping_cuda_kernel.h"
 6 | 
 7 | 
 8 | void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     float *output = output_tensor.data_ptr<float>();
13 |     grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
14 | }
15 | 
16 | void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
17 | {
18 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     float *grad_input = grad_input_tensor.data_ptr<float>();
21 |     grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
22 | }
23 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/pointnet2_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | """ Testing customized ops. """
 4 | 
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | import numpy as np
 8 | 
 9 | import os
10 | import sys
11 | 
12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13 | sys.path.append(BASE_DIR)
14 | import pointnet2_utils
15 | 
16 | 
17 | def test_interpolation_grad():
18 |     batch_size = 1
19 |     feat_dim = 2
20 |     m = 4
21 |     feats = (
22 |         torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda()
23 |     )
24 | 
25 |     def interpolate_func(inputs):
26 |         idx = torch.from_numpy(np.array([[[0, 1, 2], [1, 2, 3]]])).int().cuda()
27 |         weight = (
28 |             torch.from_numpy(np.array([[[1, 1, 1], [2, 2, 2]]])).float().cuda()
29 |         )
30 |         interpolated_feats = pointnet2_utils.three_interpolate(
31 |             inputs, idx, weight
32 |         )
33 |         return interpolated_feats
34 | 
35 |     assert gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     test_interpolation_grad()
40 | 


--------------------------------------------------------------------------------
/conf/data/indoor.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | 
 3 | # these parameters are inherited by datasets, data_loaders and collators
 4 | # but they might be overwritten
 5 | 
 6 | # splits
 7 | train_mode: train
 8 | validation_mode: validation
 9 | test_mode: validation # test  # validation
10 | 
11 | part2human: false
12 | 
13 | # dataset
14 | ignore_label: 255
15 | add_raw_coordinates: true # 3dim
16 | add_colors: true # 3dim
17 | add_normals: false # 3dim
18 | in_channels: 3 # in_channels = 3 * (add_normals + add_colors + add_raw_coordinates)
19 | num_labels: 20
20 | # num_labels: 41
21 | add_instance: ${general.add_instance}
22 | task: ${general.task}
23 | add_clip: ${general.add_clip}
24 | 
25 | # data loader
26 | pin_memory: false
27 | num_workers: 4
28 | batch_size: 5
29 | test_batch_size: 1
30 | cache_data: false
31 | 
32 | # collation
33 | voxel_size: 0.02
34 | 
35 | reps_per_epoch: ${general.reps_per_epoch}
36 | 
37 | is_mirroring: true
38 | broken_mirror_version: false
39 | 
40 | cropping: false
41 | cropping_args:
42 |   min_points: 30000
43 |   aspect: 0.8
44 |   min_crop: 0.5
45 |   max_crop: 1.0
46 | 
47 | crop_min_size: 20000
48 | crop_length: 6.0
49 | cropping_v1: true


--------------------------------------------------------------------------------
/models/wrapper.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from MinkowskiEngine import SparseTensor
 4 | from torch.nn import Module
 5 | 
 6 | 
 7 | class Wrapper(Module):
 8 |     """
 9 |     Wrapper for the segmentation networks.
10 |     """
11 | 
12 |     OUT_PIXEL_DIST = -1
13 | 
14 |     def __init__(self, NetClass, in_nchannel, out_nchannel, config):
15 |         super().__init__()
16 |         self.initialize_filter(NetClass, in_nchannel, out_nchannel, config)
17 | 
18 |     def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config):
19 |         raise NotImplementedError("Must initialize a model and a filter")
20 | 
21 |     def forward(self, x, coords, colors=None):
22 |         soutput = self.model(x)
23 | 
24 |         # During training, make the network invariant to the filter
25 |         if not self.training or random.random() < 0.5:
26 |             # Filter requires the model to finish the forward pass
27 |             wrapper_coords = self.filter.initialize_coords(
28 |                 self.model, coords, colors
29 |             )
30 |             finput = SparseTensor(soutput.F, wrapper_coords)
31 |             soutput = self.filter(finput)
32 |         return soutput
33 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #pragma once
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include <torch/extension.h>
 7 | 
 8 | #define CHECK_CUDA(x)                                          \
 9 |   do {                                                         \
10 |     AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \
11 |   } while (0)
12 | 
13 | #define CHECK_CONTIGUOUS(x)                                         \
14 |   do {                                                              \
15 |     AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \
16 |   } while (0)
17 | 
18 | #define CHECK_IS_INT(x)                              \
19 |   do {                                               \
20 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \
21 |              #x " must be an int tensor");           \
22 |   } while (0)
23 | 
24 | #define CHECK_IS_FLOAT(x)                              \
25 |   do {                                                 \
26 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \
27 |              #x " must be a float tensor");            \
28 |   } while (0)
29 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/interpolation/interpolation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "interpolation_cuda_kernel.h"
 6 | 
 7 | 
 8 | void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const int *idx = idx_tensor.data_ptr<int>();
12 |     const float *weight = weight_tensor.data_ptr<float>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
15 | }
16 | 
17 | void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
18 | {
19 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
20 |     const int *idx = idx_tensor.data_ptr<int>();
21 |     const float *weight = weight_tensor.data_ptr<float>();
22 |     float *grad_input = grad_input_tensor.data_ptr<float>();
23 |     interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
24 | }
25 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/aggregation/aggregation_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AGGREGATION_CUDA_KERNEL
 2 | #define _AGGREGATION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
 8 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
15 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | #endif
21 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "ball_query.h"
 5 | #include "utils.h"
 6 | 
 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
 8 |                                      int nsample, const float *new_xyz,
 9 |                                      const float *xyz, int *idx);
10 | 
11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
12 |                       const int nsample) {
13 |   CHECK_CONTIGUOUS(new_xyz);
14 |   CHECK_CONTIGUOUS(xyz);
15 |   CHECK_IS_FLOAT(new_xyz);
16 |   CHECK_IS_FLOAT(xyz);
17 | 
18 |   if (new_xyz.is_cuda()) {
19 |     CHECK_CUDA(xyz);
20 |   }
21 | 
22 |   at::Tensor idx =
23 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
24 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
25 | 
26 |   if (new_xyz.is_cuda()) {
27 |     query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
28 |                                     radius, nsample, new_xyz.data<float>(),
29 |                                     xyz.data<float>(), idx.data<int>());
30 |   } else {
31 |     AT_ASSERT(false, "CPU not supported");
32 |   }
33 | 
34 |   return idx;
35 | }
36 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/subtraction/subtraction_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "subtraction_cuda_kernel.h"
 6 | 
 7 | 
 8 | void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input1 = input1_tensor.data_ptr<float>();
11 |     const float *input2 = input2_tensor.data_ptr<float>();
12 |     const int *idx = idx_tensor.data_ptr<int>();
13 |     float *output = output_tensor.data_ptr<float>();
14 |     subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
15 | }
16 | 
17 | void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
18 | {
19 |     const int *idx = idx_tensor.data_ptr<int>();
20 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
21 |     float *grad_input1 = grad_input1_tensor.data_ptr<float>();
22 |     float *grad_input2 = grad_input2_tensor.data_ptr<float>();
23 |     subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
24 | }
25 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import models.res16unet as res16unet
 2 | import models.resunet as resunet
 3 | from models.mask3d import Mask3D
 4 | from models.mask3d_hp import Mask3DHumanParts
 5 | from models.res16unet import (
 6 |     Custom30M,
 7 |     Res16UNet14A,
 8 |     Res16UNet18B,
 9 |     Res16UNet18D,
10 |     Res16UNet34A,
11 |     Res16UNet34C,
12 |     Res16UNet34D,
13 | )
14 | 
15 | MODELS = []
16 | 
17 | 
18 | def add_models(module):
19 |     MODELS.extend([getattr(module, a) for a in dir(module) if "Net" in a])
20 | 
21 | 
22 | add_models(resunet)
23 | add_models(res16unet)
24 | add_models(mask3d)
25 | add_models(mask3d_hp)
26 | 
27 | 
28 | def get_models():
29 |     """Returns a tuple of sample models."""
30 |     return MODELS
31 | 
32 | 
33 | def load_model(name):
34 |     """Creates and returns an instance of the model given its class name."""
35 |     # Find the model class from its name
36 |     all_models = get_models()
37 |     mdict = {model.__name__: model for model in all_models}
38 |     if name not in mdict:
39 |         print("Invalid model index. Options are:")
40 |         # Display a list of valid model names
41 |         for model in all_models:
42 |             print(f"\t* {model.__name__}")
43 |         return None
44 |     NetClass = mdict[name]
45 | 
46 |     return NetClass
47 | 


--------------------------------------------------------------------------------
/conf/model/mask3d.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | _target_: models.Mask3D
 3 | 
 4 | # transformer parameters
 5 | hidden_dim: 128
 6 | dim_feedforward: 1024
 7 | num_queries: 100
 8 | num_heads: 8
 9 | num_decoders: 3
10 | dropout: 0.0
11 | pre_norm: false
12 | use_level_embed: false
13 | normalize_pos_enc: true
14 | positional_encoding_type: "fourier"
15 | gauss_scale: 1.0
16 | hlevels: [0,1,2,3]
17 | 
18 | # queries
19 | non_parametric_queries: true
20 | random_query_both: false
21 | random_normal: false
22 | random_queries: false
23 | use_np_features: false
24 | 
25 | query_init: None # ['clip_init', ...] TODO
26 | clip_proj_dropout: 0.0
27 | 
28 | # sampling
29 | sample_sizes: [200, 800, 3200, 12800, 51200]
30 | max_sample_size: false # change false means sampling activated
31 | 
32 | shared_decoder: true
33 | num_classes: ${general.num_targets}
34 | train_on_segments: ${general.train_on_segments}
35 | scatter_type: "mean"
36 | 
37 | voxel_size: ${data.voxel_size}
38 | 
39 | config:
40 |   backbone:
41 |     _target_: models.Res16UNet34C
42 |     config:
43 |       dialations: [ 1, 1, 1, 1 ]
44 |       conv1_kernel_size: 5
45 |       bn_momentum: 0.02
46 |     # depends on normals, color, raw_coordinates
47 |     # varies from 3 to 9
48 |     in_channels: ${data.in_channels}
49 |     out_channels: ${data.num_labels}
50 |     out_fpn: true
51 | 
52 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from setuptools import setup
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | import glob
 9 | import os.path as osp
10 | 
11 | this_dir = osp.dirname(osp.abspath(__file__))
12 | 
13 | _ext_src_root = "_ext_src"
14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob(
15 |     "{}/src/*.cu".format(_ext_src_root)
16 | )
17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root))
18 | 
19 | setup(
20 |     name="pointnet2",
21 |     ext_modules=[
22 |         CUDAExtension(
23 |             name="pointnet2._ext",
24 |             sources=_ext_sources,
25 |             extra_compile_args={
26 |                 "cxx": [
27 |                     "-O2",
28 |                     "-I{}".format("{}/include".format(_ext_src_root)),
29 |                 ],
30 |                 "nvcc": [
31 |                     "-O2",
32 |                     "-I{}".format("{}/include".format(_ext_src_root)),
33 |                 ],
34 |             },
35 |             include_dirs=[osp.join(this_dir, _ext_src_root, "include")],
36 |         )
37 |     ],
38 |     cmdclass={"build_ext": BuildExtension},
39 | )
40 | 


--------------------------------------------------------------------------------
/conf/model/mask3d_hp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | _target_: models.Mask3DHumanParts
 3 | 
 4 | restricted_cross_attention: true
 5 | 
 6 | # transformer parameters
 7 | hidden_dim: 128
 8 | dim_feedforward: 1024
 9 | #num_queries: 100
10 | 
11 | num_human_queries: 5
12 | num_parts_per_human_queries: 16
13 | 
14 | num_heads: 8
15 | num_decoders: 3
16 | dropout: 0.0
17 | pre_norm: false
18 | use_level_embed: false
19 | normalize_pos_enc: true
20 | positional_encoding_type: "fourier"
21 | gauss_scale: 1.0
22 | hlevels: [0,1,2,3]
23 | 
24 | # queries
25 | non_parametric_queries: true
26 | random_query_both: false
27 | random_normal: false
28 | random_queries: false
29 | use_np_features: false
30 | 
31 | query_init: None # ['clip_init', ...] TODO
32 | clip_proj_dropout: 0.0
33 | 
34 | # sampling
35 | sample_sizes: [200, 800, 3200, 12800, 51200]
36 | max_sample_size: false # change false means sampling activated
37 | 
38 | shared_decoder: true
39 | num_classes: ${general.num_targets}
40 | train_on_segments: ${general.train_on_segments}
41 | scatter_type: "mean"
42 | 
43 | voxel_size: ${data.voxel_size}
44 | 
45 | config:
46 |   backbone:
47 |     _target_: models.Res16UNet34C
48 |     config:
49 |       dialations: [ 1, 1, 1, 1 ]
50 |       conv1_kernel_size: 5
51 |       bn_momentum: 0.02
52 |     # depends on normals, color, raw_coordinates
53 |     # varies from 3 to 9
54 |     in_channels: ${data.in_channels}
55 |     out_channels: ${data.num_labels}
56 |     out_fpn: true
57 | 
58 | 


--------------------------------------------------------------------------------
/conf/augmentation/volumentations_aug.yaml:
--------------------------------------------------------------------------------
 1 | # pi   = 3.14159265358979
 2 | # pi/2 = 1.57079632679489
 3 | # pi/3 = 1.04719755119659
 4 | # pi/6 = 0.52359877559829
 5 | # pi/12 = 0.26179938779914
 6 | # pi/24 = 0.13089969389957
 7 | #
 8 | __version__: 0.1.6
 9 | transform:
10 |   __class_fullname__: volumentations.core.composition.Compose
11 |   additional_targets: {}
12 |   p: 1.0
13 |   transforms:
14 |     - __class_fullname__: volumentations.augmentations.transforms.Scale3d
15 |       always_apply: true
16 |       p: 0.5
17 |       scale_limit:
18 |         - - -0.1
19 |           - 0.1
20 |         - - -0.1
21 |           - 0.1
22 |         - - -0.1
23 |           - 0.1
24 |     - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
25 |       always_apply: true
26 |       axis:
27 |         - 0
28 |         - 0
29 |         - 1
30 |       p: 0.5
31 |       rotation_limit:
32 |         - -3.141592653589793
33 |         - 3.141592653589793
34 |     - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
35 |       always_apply: true
36 |       axis:
37 |         - 0
38 |         - 1
39 |         - 0
40 |       p: 0.5
41 |       rotation_limit:
42 |         - -0.13089969389957
43 |         - 0.13089969389957
44 |     - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
45 |       always_apply: true
46 |       axis:
47 |         - 1
48 |         - 0
49 |         - 0
50 |       p: 0.5
51 |       rotation_limit:
52 |         - -0.13089969389957
53 |         - 0.13089969389957
54 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #ifndef _CUDA_UTILS_H
 4 | #define _CUDA_UTILS_H
 5 | 
 6 | #include <ATen/ATen.h>
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | #include <cmath>
 9 | 
10 | #include <cuda.h>
11 | #include <cuda_runtime.h>
12 | 
13 | #include <vector>
14 | 
15 | #define TOTAL_THREADS 512
16 | 
17 | inline int opt_n_threads(int work_size) {
18 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
19 | 
20 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
21 | }
22 | 
23 | inline dim3 opt_block_config(int x, int y) {
24 |   const int x_threads = opt_n_threads(x);
25 |   const int y_threads =
26 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
27 |   dim3 block_config(x_threads, y_threads, 1);
28 | 
29 |   return block_config;
30 | }
31 | 
32 | #define CUDA_CHECK_ERRORS()                                           \
33 |   do {                                                                \
34 |     cudaError_t err = cudaGetLastError();                             \
35 |     if (cudaSuccess != err) {                                         \
36 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",  \
37 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
38 |               __FILE__);                                              \
39 |       exit(-1);                                                       \
40 |     }                                                                 \
41 |   } while (0)
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/scripts/train/train_human3d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ### 1) FIRST TRAIN THE MODEL ON SYNTHETIC DATA
 4 | python main.py \
 5 | general.experiment_name="Human3D_on_synthetic_data" \
 6 | general.project_name="human3d_humanseg" \
 7 | data/datasets=synthetic_humans \
 8 | general.num_targets=16 \
 9 | data.num_labels=16 \
10 | model=mask3d_hp \
11 | loss=set_criterion_hp \
12 | model.num_human_queries=5 \
13 | model.num_parts_per_human_queries=16 \
14 | trainer.check_val_every_n_epoch=1 \
15 | general.topk_per_image=-1 \
16 | model.non_parametric_queries=false \
17 | trainer.max_epochs=36 \
18 | data.batch_size=4 \
19 | data.num_workers=10 \
20 | general.reps_per_epoch=1 \
21 | model.config.backbone._target_=models.Res16UNet18B \
22 | general.train_mode=true \
23 | general.save_visualizations=false
24 | 
25 | ### 2) THEN FINETUNE WITH EGOBODY DATA
26 | python main.py \
27 | general.experiment_name="Human3D_finetuned_on_egobody_data" \
28 | general.project_name="human3d_humanseg" \
29 | data/datasets=synthetic_humans \
30 | general.num_targets=16 \
31 | data.num_labels=16 \
32 | model=mask3d_hp \
33 | loss=set_criterion_hp \
34 | model.num_human_queries=5 \
35 | model.num_parts_per_human_queries=16 \
36 | trainer.check_val_every_n_epoch=1 \
37 | general.topk_per_image=-1 \
38 | model.non_parametric_queries=false \
39 | trainer.max_epochs=36 \
40 | data.batch_size=4 \
41 | data.num_workers=10 \
42 | general.reps_per_epoch=1 \
43 | model.config.backbone._target_=models.Res16UNet18B \
44 | general.checkpoint='saved/Human3D_on_synthetic_data/last.ckpt' \
45 | general.train_mode=true \
46 | general.save_visualizations=false
47 | 


--------------------------------------------------------------------------------
/conf/data/collation_functions/voxelize_collate.yaml:
--------------------------------------------------------------------------------
 1 | # @package data
 2 | 
 3 | train_collation:
 4 |   _target_: datasets.utils.VoxelizeCollate
 5 |   ignore_label: ${data.ignore_label}
 6 |   voxel_size: ${data.voxel_size}
 7 |   mode: ${data.train_mode}
 8 |   small_crops: false
 9 |   very_small_crops: false
10 |   batch_instance: false
11 |   probing: ${general.linear_probing_backbone}
12 |   task: ${general.task}
13 |   ignore_class_threshold: ${general.ignore_class_threshold}
14 |   filter_out_classes: ${data.train_dataset.filter_out_classes}
15 |   label_offset: ${data.train_dataset.label_offset}
16 |   num_queries: 0 # ${model.num_queries}
17 | 
18 | validation_collation:
19 |   _target_: datasets.utils.VoxelizeCollate
20 |   ignore_label: ${data.ignore_label}
21 |   voxel_size: ${data.voxel_size}
22 |   mode: ${data.validation_mode}
23 |   batch_instance: false
24 |   probing: ${general.linear_probing_backbone}
25 |   task: ${general.task}
26 |   ignore_class_threshold: ${general.ignore_class_threshold}
27 |   filter_out_classes: ${data.validation_dataset.filter_out_classes}
28 |   label_offset: ${data.validation_dataset.label_offset}
29 |   num_queries: 0 # ${model.num_queries}
30 | 
31 | test_collation:
32 |   _target_: datasets.utils.VoxelizeCollate
33 |   ignore_label: ${data.ignore_label}
34 |   voxel_size: ${data.voxel_size}
35 |   mode: ${data.test_mode}
36 |   batch_instance: false
37 |   probing: ${general.linear_probing_backbone}
38 |   task: ${general.task}
39 |   ignore_class_threshold: ${general.ignore_class_threshold}
40 |   filter_out_classes: ${data.test_dataset.filter_out_classes}
41 |   label_offset: ${data.test_dataset.label_offset}
42 |   num_queries: 0 # ${model.num_queries}


--------------------------------------------------------------------------------
/scripts/train/train_mask3d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ### 1) FIRST TRAIN THE MODEL ON SYNTHETIC DATA
 4 | python main.py \
 5 | general.experiment_name="Mask3D_on_synthetic_data" \
 6 | general.project_name="mask3d_humanseg" \
 7 | data/datasets=synthetic_humans \
 8 | general.num_targets=16 \
 9 | data.num_labels=16 \
10 | model=mask3d \
11 | loss=set_criterion \
12 | model.num_queries=5 \
13 | trainer.check_val_every_n_epoch=1 \
14 | general.topk_per_image=-1 \
15 | model.non_parametric_queries=false \
16 | trainer.max_epochs=36 \
17 | data.batch_size=4 \
18 | data.num_workers=10 \
19 | general.reps_per_epoch=1 \
20 | general.save_visualizations=false \
21 | model.config.backbone._target_=models.Res16UNet18B \
22 | data.part2human=true \
23 | loss.num_classes=2 \
24 | model.num_classes=2 \
25 | callbacks=callbacks_instance_segmentation_human \
26 | general.train_mode=true
27 | 
28 | 
29 | ### 2) THEN FINETUNE WITH EGOBODY DATA
30 | python main.py \
31 | general.experiment_name="Mask3D_finetuned_on_egobody_data" \
32 | general.project_name="mask3d_humanseg" \
33 | data/datasets=egobody \
34 | general.num_targets=16 \
35 | data.num_labels=16 \
36 | model=mask3d \
37 | loss=set_criterion \
38 | model.num_queries=5 \
39 | trainer.check_val_every_n_epoch=1 \
40 | general.topk_per_image=-1 \
41 | model.non_parametric_queries=false \
42 | trainer.max_epochs=36 \
43 | data.batch_size=4 \
44 | data.num_workers=10 \
45 | general.reps_per_epoch=1 \
46 | general.save_visualizations=false \
47 | model.config.backbone._target_=models.Res16UNet18B \
48 | data.part2human=true \
49 | loss.num_classes=2 \
50 | model.num_classes=2 \
51 | callbacks=callbacks_instance_segmentation_human \
52 | general.checkpoint='saved/Mask3D_on_synthetic_data/last.ckpt' \
53 | general.train_mode=true
54 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/aggregation/aggregation_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "aggregation_cuda_kernel.h"
 6 | 
 7 | 
 8 | void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *input = input_tensor.data_ptr<float>();
11 |     const float *position = position_tensor.data_ptr<float>();
12 |     const float *weight = weight_tensor.data_ptr<float>();
13 |     const int *idx = idx_tensor.data_ptr<int>();
14 |     float *output = output_tensor.data_ptr<float>();
15 |     aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
16 | }
17 | 
18 | void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
19 | {
20 | 	const float *input = input_tensor.data_ptr<float>();
21 |     const float *position = position_tensor.data_ptr<float>();
22 |     const float *weight = weight_tensor.data_ptr<float>();
23 |     const int *idx = idx_tensor.data_ptr<int>();
24 |     const float *grad_output = grad_output_tensor.data_ptr<float>();
25 |     float *grad_input = grad_input_tensor.data_ptr<float>();
26 |     float *grad_position = grad_position_tensor.data_ptr<float>();
27 |     float *grad_weight = grad_weight_tensor.data_ptr<float>();
28 |     aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
29 | }
30 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_attention_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | 
 4 | torch.manual_seed(1)
 5 | 
 6 | M = 800000
 7 | N = 35000
 8 | C = 96
 9 | h = 6
10 | softmax_attn_flat = torch.rand(M, h).cuda()
11 | value = torch.rand(N, h, C // h).cuda()
12 | 
13 | index_0 = torch.rand(M)
14 | index_0[index_0 < 0] = 0
15 | index_0 = (index_0 * N).long().cuda()
16 | 
17 | index_1 = torch.rand(M)
18 | index_1[index_1 < 0] = 0
19 | index_1 = (index_1 * N).long().cuda()
20 | 
21 | softmax_attn_flat.requires_grad = True
22 | value.requires_grad = True
23 | 
24 | # value_flat = value[index_1] #[M, num_heads, C // num_heads]
25 | # x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C)
26 | # x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C]
27 | # loss = x.sum()
28 | # loss.backward()
29 | 
30 | # print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10]))
31 | # print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
32 | # print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
33 | # input()
34 | 
35 | print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous())
36 | print("value.is_contiguous(): ", value.is_contiguous())
37 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
38 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
39 | 
40 | x_v2 = pointops.attention_step2(
41 |     softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int()
42 | )
43 | x_v2 = x_v2.view(N, C)
44 | loss = x_v2.sum()
45 | loss.backward()
46 | 
47 | print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10]))
48 | 
49 | print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
50 | print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
51 | input()
52 | 
53 | print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all())
54 | 
55 | print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2))
56 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/attention/attention_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_CUDA_KERNEL
 2 | #define _ATTENTION_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/models/metrics/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class IoU:
 5 |     """Computes the intersection over union (IoU) per class and corresponding
 6 |     mean (mIoU).
 7 | 
 8 |     Intersection over union (IoU) is a common evaluation metric for semantic
 9 |     segmentation. The predictions are first accumulated in a confusion matrix
10 |     and the IoU is computed from it as follows:
11 | 
12 |         IoU = true_positive / (true_positive + false_positive + false_negative).
13 | 
14 |     Keyword arguments:
15 |     - num_classes (int): number of classes in the classification problem
16 |     - normalized (boolean, optional): Determines whether or not the confusion
17 |     matrix is normalized or not. Default: False.
18 |     - ignore_index (int or iterable, optional): Index of the classes to ignore
19 |     when computing the IoU. Can be an int, or any iterable of ints.
20 | 
21 |     Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter
22 | 
23 |     """
24 | 
25 |     def __init__(self):
26 |         super().__init__()
27 | 
28 |     def value(self, conf_matrix):
29 |         """Computes the IoU and mean IoU.
30 | 
31 |         The mean computation ignores NaN elements of the IoU array.
32 | 
33 |         Returns:
34 |             Tuple: (IoU, mIoU). The first output is the per class IoU,
35 |             for K classes it's numpy.ndarray with K elements. The second output,
36 |             is the mean IoU.
37 |         """
38 |         true_positive = np.diag(conf_matrix)
39 |         false_positive = np.sum(conf_matrix, 0) - true_positive
40 |         false_negative = np.sum(conf_matrix, 1) - true_positive
41 | 
42 |         # Just in case we get a division by 0, ignore/hide the error
43 |         with np.errstate(divide="ignore", invalid="ignore"):
44 |             iou = true_positive / (
45 |                 true_positive + false_positive + false_negative
46 |             )
47 | 
48 |         return iou
49 | 


--------------------------------------------------------------------------------
/utils/pointops2/setup.py:
--------------------------------------------------------------------------------
 1 | # python3 setup.py install
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | import os
 5 | from distutils.sysconfig import get_config_vars
 6 | 
 7 | (opt,) = get_config_vars("OPT")
 8 | os.environ["OPT"] = " ".join(
 9 |     flag for flag in opt.split() if flag != "-Wstrict-prototypes"
10 | )
11 | 
12 | setup(
13 |     name="pointops2",
14 |     ext_modules=[
15 |         CUDAExtension(
16 |             "pointops2_cuda",
17 |             [
18 |                 "src/pointops_api.cpp",
19 |                 "src/knnquery/knnquery_cuda.cpp",
20 |                 "src/knnquery/knnquery_cuda_kernel.cu",
21 |                 "src/sampling/sampling_cuda.cpp",
22 |                 "src/sampling/sampling_cuda_kernel.cu",
23 |                 "src/grouping/grouping_cuda.cpp",
24 |                 "src/grouping/grouping_cuda_kernel.cu",
25 |                 "src/interpolation/interpolation_cuda.cpp",
26 |                 "src/interpolation/interpolation_cuda_kernel.cu",
27 |                 "src/subtraction/subtraction_cuda.cpp",
28 |                 "src/subtraction/subtraction_cuda_kernel.cu",
29 |                 "src/aggregation/aggregation_cuda.cpp",
30 |                 "src/aggregation/aggregation_cuda_kernel.cu",
31 |                 "src/attention/attention_cuda.cpp",
32 |                 "src/attention/attention_cuda_kernel.cu",
33 |                 "src/rpe/relative_pos_encoding_cuda.cpp",
34 |                 "src/rpe/relative_pos_encoding_cuda_kernel.cu",
35 |                 "src/attention_v2/attention_cuda_v2.cpp",
36 |                 "src/attention_v2/attention_cuda_kernel_v2.cu",
37 |                 "src/rpe_v2/relative_pos_encoding_cuda_v2.cpp",
38 |                 "src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.cu",
39 |             ],
40 |             extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
41 |         )
42 |     ],
43 |     cmdclass={"build_ext": BuildExtension},
44 | )
45 | 


--------------------------------------------------------------------------------
/conf/config_base_instance_segmentation.yaml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   train_mode: true
 3 |   task: "instance_segmentation"
 4 |   seed: null
 5 |   checkpoint: null
 6 |   backbone_checkpoint: null
 7 |   freeze_backbone: false # train only last layer
 8 |   linear_probing_backbone: false
 9 |   train_on_segments: false
10 |   eval_on_segments: false
11 |   filter_out_instances: false
12 |   save_visualizations: false
13 |   visualization_point_size: 20
14 |   decoder_id: -1
15 |   export: false
16 |   use_dbscan: false
17 |   ignore_class_threshold: 100
18 |   project_name: scannet
19 |   workspace: jonasschult
20 |   experiment_name: DEBUG_ABLATION
21 |   num_targets: 19
22 |   add_instance: true
23 |   dbscan_eps: 0.95
24 |   dbscan_min_points: 1
25 | 
26 |   add_clip: false
27 | 
28 |   export_threshold: 0.0001
29 | 
30 |   reps_per_epoch: 1
31 | 
32 |   on_crops: false
33 | 
34 |   body_part_segmentation: false
35 | 
36 |   scores_threshold: 0.0
37 |   iou_threshold: 1.0
38 | 
39 |   area: 5
40 | 
41 |   eval_inner_core: -1 # disabled
42 | 
43 |   topk_per_image: 100
44 | 
45 |   ignore_mask_idx: []
46 | 
47 |   max_batch_size: 99999999
48 | 
49 |   save_dir: saved/${general.experiment_name}
50 |   # time/commit/md5(config)_uuid
51 |   # time/experiment_id/version_uuid
52 |   # experiment_id: 1 # commit[:8], or unique from logger
53 |   # version: 1 # md5[:8] of config
54 | 
55 |   gpus: 1
56 | 
57 | defaults:
58 |   - data: indoor
59 |   - data/data_loaders: simple_loader
60 |   - data/datasets: scannet
61 |   - data/collation_functions: voxelize_collate
62 |   - logging: full
63 |   - model: mask3d
64 |   - metrics: miou
65 |   - optimizer: adamw
66 |   - scheduler: onecyclelr
67 |   - trainer: trainer600
68 |   - callbacks: callbacks_instance_segmentation
69 |   - matcher: hungarian_matcher
70 |   - loss: set_criterion
71 | 
72 | hydra:
73 |   run:
74 |     dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S}
75 |   sweep:
76 |     dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S}
77 |     # dir: ${general.save_dir}
78 |     subdir: ${hydra.job.num}_${hydra.job.id}
79 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/attention_v2/attention_cuda_kernel_v2.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ATTENTION_V2_CUDA_KERNEL
 2 | #define _ATTENTION_V2_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor);
 8 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor);
 9 | 
10 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor);
11 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn);
18 | void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k);
19 | 
20 | void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output);
21 | void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/utils/votenet_utils/tf_visualizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix"""
 7 | import os
 8 | import time
 9 | 
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 | import sys
12 | 
13 | sys.path.append(BASE_DIR)
14 | import tf_logger
15 | 
16 | 
17 | class Visualizer:
18 |     def __init__(self, opt, name="train"):
19 |         # self.opt = opt
20 |         # self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name))
21 |         # self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt')
22 |         self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name))
23 |         self.log_name = os.path.join(opt.log_dir, "tf_visualizer_log.txt")
24 |         with open(self.log_name, "a") as log_file:
25 |             now = time.strftime("%c")
26 |             log_file.write(
27 |                 "================ Training Loss (%s) ================\n" % now
28 |             )
29 | 
30 |     # |visuals|: dictionary of images to save
31 |     def log_images(self, visuals, step):
32 |         for label, image_numpy in visuals.items():
33 |             self.logger.image_summary(label, [image_numpy], step)
34 | 
35 |     # scalars: dictionary of scalar labels and values
36 |     def log_scalars(self, scalars, step):
37 |         for label, val in scalars.items():
38 |             self.logger.scalar_summary(label, val, step)
39 | 
40 |     # scatter plots
41 |     def plot_current_points(self, points, disp_offset=10):
42 |         pass
43 | 
44 |     # scalars: same format as |scalars| of plot_current_scalars
45 |     def print_current_scalars(self, epoch, i, scalars):
46 |         message = "(epoch: %d, iters: %d) " % (epoch, i)
47 |         for k, v in scalars.items():
48 |             message += "%s: %.3f " % (k, v)
49 | 
50 |         print(message)
51 |         with open(self.log_name, "a") as log_file:
52 |             log_file.write("%s\n" % message)
53 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include "cuda_utils.h"
 9 | 
10 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
11 | // output: idx(b, m, nsample)
12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
13 |                                         int nsample,
14 |                                         const float *__restrict__ new_xyz,
15 |                                         const float *__restrict__ xyz,
16 |                                         int *__restrict__ idx) {
17 |   int batch_index = blockIdx.x;
18 |   xyz += batch_index * n * 3;
19 |   new_xyz += batch_index * m * 3;
20 |   idx += m * nsample * batch_index;
21 | 
22 |   int index = threadIdx.x;
23 |   int stride = blockDim.x;
24 | 
25 |   float radius2 = radius * radius;
26 |   for (int j = index; j < m; j += stride) {
27 |     float new_x = new_xyz[j * 3 + 0];
28 |     float new_y = new_xyz[j * 3 + 1];
29 |     float new_z = new_xyz[j * 3 + 2];
30 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
31 |       float x = xyz[k * 3 + 0];
32 |       float y = xyz[k * 3 + 1];
33 |       float z = xyz[k * 3 + 2];
34 |       float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
35 |                  (new_z - z) * (new_z - z);
36 |       if (d2 < radius2) {
37 |         if (cnt == 0) {
38 |           for (int l = 0; l < nsample; ++l) {
39 |             idx[j * nsample + l] = k;
40 |           }
41 |         }
42 |         idx[j * nsample + cnt] = k;
43 |         ++cnt;
44 |       }
45 |     }
46 |   }
47 | }
48 | 
49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
50 |                                      int nsample, const float *new_xyz,
51 |                                      const float *xyz, int *idx) {
52 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
53 |   query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
54 |       b, n, m, radius, nsample, new_xyz, xyz, idx);
55 | 
56 |   CUDA_CHECK_ERRORS();
57 | }
58 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_relative_pos_encoding_op_step1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | 
 4 | torch.manual_seed(1)
 5 | 
 6 | M = 80000
 7 | N = 3500
 8 | hdim = 16
 9 | h = 6
10 | L = 31
11 | query = torch.rand(N, h, hdim).cuda()
12 | table = torch.rand(L, h, hdim, 3).cuda()
13 | 
14 | index = torch.rand(M)
15 | index[index < 0] = 0
16 | index = (index * N).long().cuda()
17 | 
18 | rel_index = torch.rand(M, 3)
19 | rel_index[rel_index < 0] = 0
20 | rel_index = (rel_index * L).long().cuda()
21 | 
22 | query.requires_grad = True
23 | table.requires_grad = True
24 | 
25 | # query_flat = query[index] #[M, h, hdim]
26 | # table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim]
27 | # rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M]
28 | # rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim]
29 | # output = (query_flat * rel_pos_encoding).sum(-1) #[M, h]
30 | # loss = output.mean()
31 | # loss.backward()
32 | 
33 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
34 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
35 | # print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
36 | # input()
37 | 
38 | # print("query.is_contiguous(): ", query.is_contiguous())
39 | # print("key.is_contiguous(): ", key.is_contiguous())
40 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
41 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
42 | 
43 | output_v2 = pointops.dot_prod_with_idx(
44 |     query, index.int(), table, rel_index.int()
45 | )
46 | loss = output_v2.mean()
47 | loss.backward()
48 | 
49 | print(
50 |     "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
51 |         output_v2.shape, output_v2[:5, :10]
52 |     )
53 | )
54 | print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
55 | print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
56 | input()
57 | 
58 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
59 | 
60 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
61 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "group_points.h"
 5 | #include "utils.h"
 6 | 
 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
 8 |                                  const float *points, const int *idx,
 9 |                                  float *out);
10 | 
11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
12 |                                       int nsample, const float *grad_out,
13 |                                       const int *idx, float *grad_points);
14 | 
15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) {
16 |   CHECK_CONTIGUOUS(points);
17 |   CHECK_CONTIGUOUS(idx);
18 |   CHECK_IS_FLOAT(points);
19 |   CHECK_IS_INT(idx);
20 | 
21 |   if (points.is_cuda()) {
22 |     CHECK_CUDA(idx);
23 |   }
24 | 
25 |   at::Tensor output =
26 |       torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
27 |                    at::device(points.device()).dtype(at::ScalarType::Float));
28 | 
29 |   if (points.is_cuda()) {
30 |     group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
31 |                                 idx.size(1), idx.size(2), points.data<float>(),
32 |                                 idx.data<int>(), output.data<float>());
33 |   } else {
34 |     AT_ASSERT(false, "CPU not supported");
35 |   }
36 | 
37 |   return output;
38 | }
39 | 
40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
41 |   CHECK_CONTIGUOUS(grad_out);
42 |   CHECK_CONTIGUOUS(idx);
43 |   CHECK_IS_FLOAT(grad_out);
44 |   CHECK_IS_INT(idx);
45 | 
46 |   if (grad_out.is_cuda()) {
47 |     CHECK_CUDA(idx);
48 |   }
49 | 
50 |   at::Tensor output =
51 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
52 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
53 | 
54 |   if (grad_out.is_cuda()) {
55 |     group_points_grad_kernel_wrapper(
56 |         grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
57 |         grad_out.data<float>(), idx.data<int>(), output.data<float>());
58 |   } else {
59 |     AT_ASSERT(false, "CPU not supported");
60 |   }
61 | 
62 |   return output;
63 | }
64 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/grouping/grouping_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "grouping_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
 6 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= m * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int m_idx = index / nsample / c;
12 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
13 |     output[index] = input[input_idx];
14 | }
15 | 
16 | __global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
17 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
18 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
19 |     if (index >= m * nsample * c) return;
20 |     const int c_idx = index % c;
21 |     const int nsample_idx = (index / c) % nsample;
22 |     const int m_idx = index / nsample / c;
23 |     const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
24 |     atomicAdd(grad_input + input_idx, grad_output[index]);
25 | }
26 | 
27 | void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
28 |     // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
29 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
30 |     dim3 threads(THREADS_PER_BLOCK);
31 |     grouping_forward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, input, idx, output);
32 | }
33 | 
34 | void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
35 | {  
36 |     // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
37 |     dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     grouping_backward_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, c, grad_output, idx, grad_input);
40 | }
41 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RPE_CUDA_KERNEL
 2 | #define _RPE_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
 8 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor);
 9 | 
10 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
11 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor);
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output);
18 | void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table);
19 | 
20 | void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output);
21 | void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table);
22 | 
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 | #endif
27 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/interpolation/interpolation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "interpolation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
 6 | {
 7 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
 8 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 9 |     if (index >= n * c) return;
10 |     int c_idx = index % c;
11 |     int n_idx = index / c;
12 |     for (int i = 0; i < k; i++)
13 |     {
14 |         int idx_idx = n_idx * k + i;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         output[index] += input[input_idx] * weight[idx_idx];
17 |     }
18 | }
19 | 
20 | __global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
21 | {
22 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
23 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
24 |     if (index >= n * c) return;
25 |     int c_idx = index % c;
26 |     int n_idx = index / c;
27 |     for (int i = 0; i < k; i++)
28 |     {
29 |         int idx_idx = n_idx * k + i;
30 |         int input_idx = idx[idx_idx] * c + c_idx;
31 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
32 |     }
33 | }
34 | 
35 | void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
36 |     // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
37 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
38 |     dim3 threads(THREADS_PER_BLOCK);
39 |     interpolation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, input, idx, weight, output);
40 | }
41 | 
42 | void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
43 |     // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
44 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
45 |     dim3 threads(THREADS_PER_BLOCK);
46 |     interpolation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, c, k, grad_output, idx, weight, grad_input);
47 | }
48 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pointops
 3 | 
 4 | torch.manual_seed(1)
 5 | 
 6 | M = 80000
 7 | N = 3500
 8 | hdim = 16
 9 | h = 6
10 | L = 31
11 | query = torch.rand(N, h, hdim).cuda()
12 | table_q = torch.rand(L, h, hdim, 3).cuda()
13 | key = torch.rand(N, h, hdim).cuda()
14 | table_k = torch.rand(L, h, hdim, 3).cuda()
15 | 
16 | index_q = torch.rand(M)
17 | index_q[index_q < 0] = 0
18 | index_q = (index_q * N).long().cuda()
19 | 
20 | index_k = torch.rand(M)
21 | index_k[index_k < 0] = 0
22 | index_k = (index_k * N).long().cuda()
23 | 
24 | rel_index = torch.rand(M, 3)
25 | rel_index[rel_index < 0] = 0
26 | rel_index = (rel_index * L).long().cuda()
27 | 
28 | query.requires_grad = True
29 | table_q.requires_grad = True
30 | key.requires_grad = True
31 | table_k.requires_grad = True
32 | 
33 | output1 = pointops.dot_prod_with_idx(
34 |     query, index_q.int(), table_q, rel_index.int()
35 | )
36 | output2 = pointops.dot_prod_with_idx(
37 |     key, index_k.int(), table_k, rel_index.int()
38 | )
39 | output = output1 + output2
40 | # loss = output.mean()
41 | # loss.backward()
42 | 
43 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
44 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
45 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
46 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
47 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
48 | # input()
49 | 
50 | # print("query.is_contiguous(): ", query.is_contiguous())
51 | # print("key.is_contiguous(): ", key.is_contiguous())
52 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
53 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
54 | 
55 | output_v2 = pointops.dot_prod_with_idx_v2(
56 |     query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int()
57 | )
58 | loss = output_v2.mean()
59 | loss.backward()
60 | 
61 | print(
62 |     "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
63 |         output_v2.shape, output_v2[:5, :10]
64 |     )
65 | )
66 | print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
67 | print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
68 | print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
69 | print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
70 | # input()
71 | 
72 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
73 | 


--------------------------------------------------------------------------------
/utils/gradflow_check.py:
--------------------------------------------------------------------------------
 1 | """ https://github.com/alwynmathew/gradflow-check """
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | from matplotlib.lines import Line2D
 5 | 
 6 | 
 7 | def plot_grad_flow(named_parameters):
 8 |     ave_grads = []
 9 |     layers = []
10 |     for n, p in named_parameters:
11 |         if (p.requires_grad) and ("bias" not in n):
12 |             if p.grad:
13 |                 layers.append(n)
14 |                 ave_grads.append(p.grad.abs().mean())
15 |             else:
16 |                 print(f"{n} - doesn't have gradient computed")
17 | 
18 |     plt.plot(ave_grads, alpha=0.3, color="b")
19 |     plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k")
20 |     plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical")
21 |     plt.xlim(xmin=0, xmax=len(ave_grads))
22 |     plt.xlabel("Layers")
23 |     plt.ylabel("average gradient")
24 |     plt.title("Gradient flow")
25 |     plt.grid(True)
26 | 
27 | 
28 | def plot_grad_flow_v2(named_parameters):
29 |     """Plots the gradients flowing through different layers in the net during training.
30 |     Can be used for checking for possible gradient vanishing / exploding problems.
31 | 
32 |     Usage: Plug this function in Trainer class after loss.backwards() as
33 |     "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow"""
34 |     ave_grads = []
35 |     max_grads = []
36 |     layers = []
37 |     for n, p in named_parameters:
38 |         if (p.requires_grad) and ("bias" not in n):
39 |             layers.append(n)
40 |             if p.grad:
41 |                 ave_grads.append(p.grad.abs().mean())
42 |                 max_grads.append(p.grad.abs().max())
43 |             else:
44 |                 print(f"{n} - doesn't have gradient computed")
45 |     plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
46 |     plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
47 |     plt.hlines(0, 0, len(ave_grads) + 1, lw=2, color="k")
48 |     plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical")
49 |     plt.xlim(left=0, right=len(ave_grads))
50 |     plt.ylim(bottom=-0.001, top=0.02)  # zoom in on the lower gradient regions
51 |     plt.xlabel("Layers")
52 |     plt.ylabel("average gradient")
53 |     plt.title("Gradient flow")
54 |     plt.grid(True)
55 |     plt.legend(
56 |         [
57 |             Line2D([0], [0], color="c", lw=4),
58 |             Line2D([0], [0], color="b", lw=4),
59 |             Line2D([0], [0], color="k", lw=4),
60 |         ],
61 |         ["max-gradient", "mean-gradient", "zero-gradient"],
62 |     )
63 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/subtraction/subtraction_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "subtraction_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
 6 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * nsample * c) return;
 9 |     const int c_idx = index % c;
10 |     const int nsample_idx = (index / c) % nsample;
11 |     const int n_idx = index / nsample / c;
12 |     const int idx_idx = n_idx * nsample + nsample_idx;
13 |     const int input1_idx = n_idx * c + c_idx;
14 |     const int input2_idx = idx[idx_idx] * c + c_idx;
15 |     output[index] = input1[input1_idx] - input2[input2_idx];
16 | }
17 | 
18 | __global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
19 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if (index >= n * nsample * c) return;
22 |     const int c_idx = index % c;
23 |     const int nsample_idx = (index / c) % nsample;
24 |     const int n_idx = index / nsample / c;
25 |     const int idx_idx = n_idx * nsample + nsample_idx;
26 |     const int input1_idx = n_idx * c + c_idx;
27 |     const int input2_idx = idx[idx_idx] * c + c_idx;
28 |     atomicAdd(grad_input1 + input1_idx, grad_output[index]);
29 |     atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
30 | }
31 | 
32 | void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
33 |     // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
34 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
35 |     dim3 threads(THREADS_PER_BLOCK);
36 |     subtraction_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, input1, input2, idx, output);
37 | }
38 | 
39 | void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {  
40 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
41 |     dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
42 |     dim3 threads(THREADS_PER_BLOCK);
43 |     subtraction_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
44 | }
45 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_relative_pos_encoding_op_step2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_scatter import (
 3 |     scatter_sum,
 4 | )
 5 | 
 6 | torch.manual_seed(1)
 7 | 
 8 | M = 80000
 9 | N = 3500
10 | hdim = 16
11 | h = 6
12 | L = 31
13 | attn = torch.rand(M, h).cuda()
14 | v = torch.rand(N, h, hdim).cuda()
15 | table = torch.rand(L, h, hdim, 3).cuda()
16 | 
17 | index_0 = torch.rand(M)
18 | index_0[index_0 < 0] = 0
19 | index_0 = (index_0 * N).long().cuda()
20 | 
21 | index_1 = torch.rand(M)
22 | index_1[index_1 < 0] = 0
23 | index_1 = (index_1 * N).long().cuda()
24 | 
25 | rel_index = torch.rand(M, 3)
26 | rel_index[rel_index < 0] = 0
27 | rel_index = (rel_index * L).long().cuda()
28 | 
29 | attn.requires_grad = True
30 | v.requires_grad = True
31 | table.requires_grad = True
32 | 
33 | v_flat = v[index_1]  # [M, h, hdim]
34 | table_x, table_y, table_z = (
35 |     table[:, :, :, 0],
36 |     table[:, :, :, 1],
37 |     table[:, :, :, 2],
38 | )  # [L, h, hdim]
39 | rel_index_x, rel_index_y, rel_index_z = (
40 |     rel_index[:, 0],
41 |     rel_index[:, 1],
42 |     rel_index[:, 2],
43 | )  # [M]
44 | rel_pos_encoding = (
45 |     table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z]
46 | )  # [M, h, hdim]
47 | v_flat_new = v_flat + rel_pos_encoding  # [M, h, hdim]
48 | output = attn.unsqueeze(-1) * v_flat_new  # [M, h, hdim]
49 | output = scatter_sum(
50 |     src=output, index=index_0, dim=0, dim_size=N
51 | )  # [N, h, hdim]
52 | loss = output.mean()
53 | loss.backward()
54 | 
55 | print(
56 |     "output.shape: {}, output[:5,:10,:5]: {}".format(
57 |         output.shape, output[:5, :10, :5]
58 |     )
59 | )
60 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
61 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
62 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
63 | input()
64 | 
65 | # print("query.is_contiguous(): ", query.is_contiguous())
66 | # print("key.is_contiguous(): ", key.is_contiguous())
67 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
68 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
69 | 
70 | # output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int())
71 | # loss = output_v2.mean()
72 | # loss.backward()
73 | 
74 | # print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5]))
75 | # print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
76 | # print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
77 | # print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
78 | # input()
79 | 
80 | # print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
81 | 
82 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
83 | 


--------------------------------------------------------------------------------
/utils/point_cloud_utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List, Optional, Tuple
 3 | 
 4 | import numpy as np
 5 | import open3d
 6 | from plyfile import PlyData, PlyElement
 7 | 
 8 | 
 9 | def load_ply(filepath):
10 |     with open(filepath, "rb") as f:
11 |         plydata = PlyData.read(f)
12 |     data = plydata.elements[0].data
13 |     coords = np.array([data["x"], data["y"], data["z"]], dtype=np.float32).T
14 |     feats = None
15 |     labels = None
16 |     if ({"red", "green", "blue"} - set(data.dtype.names)) == set():
17 |         feats = np.array(
18 |             [data["red"], data["green"], data["blue"]], dtype=np.uint8
19 |         ).T
20 |     if "label" in data.dtype.names:
21 |         labels = np.array(data["label"], dtype=np.uint32)
22 |     return coords, feats, labels
23 | 
24 | 
25 | def load_ply_with_normals(filepath):
26 |     mesh = open3d.io.read_triangle_mesh(str(filepath))
27 |     if not mesh.has_vertex_normals():
28 |         mesh.compute_vertex_normals()
29 |     vertices = np.asarray(mesh.vertices)
30 |     normals = np.asarray(mesh.vertex_normals)
31 | 
32 |     coords, feats, labels = load_ply(filepath)
33 |     assert np.allclose(coords, vertices), "different coordinates"
34 |     feats = np.hstack((feats, normals))
35 | 
36 |     return coords, feats, labels
37 | 
38 | 
39 | def load_obj_with_normals(filepath):
40 |     mesh = open3d.io.read_triangle_mesh(str(filepath))
41 |     if not mesh.has_vertex_normals():
42 |         mesh.compute_vertex_normals()
43 |     coords = np.asarray(mesh.vertices)
44 |     normals = np.asarray(mesh.vertex_normals)
45 |     colors = np.asarray(mesh.vertex_colors)
46 |     feats = np.hstack((colors, normals))
47 | 
48 |     return coords, feats
49 | 
50 | 
51 | def write_point_cloud_in_ply(
52 |     filepath: Path,
53 |     coords: np.ndarray,
54 |     feats: Optional[np.ndarray] = None,
55 |     labels: Optional[np.ndarray] = None,
56 |     dtypes: Optional[List[Tuple[str, str]]] = [
57 |         ("x", "<f4"),
58 |         ("y", "<f4"),
59 |         ("z", "<f4"),
60 |         ("red", "u1"),
61 |         ("green", "u1"),
62 |         ("blue", "u1"),
63 |         ("label", "<u2"),
64 |     ],
65 |     comments: Optional[List[str]] = [""],
66 | ):
67 |     combined_coords = tuple([coords])
68 |     if feats is not None:
69 |         combined_coords += tuple([feats])
70 |     else:
71 |         dtypes = dtypes[:3] + dtypes[-1:]
72 |     if labels is not None:
73 |         combined_coords += tuple([labels[:, np.newaxis]])
74 |     else:
75 |         dtypes = dtypes[:-1]
76 |     combined_coords = np.hstack(combined_coords)
77 |     ply_data = np.empty(len(coords), dtype=dtypes)
78 |     for i, dtype in enumerate(dtypes):
79 |         ply_data[dtype[0]] = combined_coords[:, i]
80 |     ply_data = PlyData(
81 |         [PlyElement.describe(ply_data, "vertex", comments=comments)]
82 |     )
83 |     ply_data.write(filepath)
84 | 


--------------------------------------------------------------------------------
/utils/votenet_utils/tf_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import scipy.misc
 9 | 
10 | try:
11 |     from StringIO import StringIO  # Python 2.7
12 | except ImportError:
13 |     from io import BytesIO  # Python 3.x
14 | 
15 | 
16 | class Logger(object):
17 |     def __init__(self, log_dir):
18 |         """Create a summary writer logging to log_dir."""
19 |         self.writer = tf.summary.FileWriter(log_dir)
20 | 
21 |     def scalar_summary(self, tag, value, step):
22 |         """Log a scalar variable."""
23 |         summary = tf.Summary(
24 |             value=[tf.Summary.Value(tag=tag, simple_value=value)]
25 |         )
26 |         self.writer.add_summary(summary, step)
27 | 
28 |     def image_summary(self, tag, images, step):
29 |         """Log a list of images."""
30 | 
31 |         img_summaries = []
32 |         for i, img in enumerate(images):
33 |             # Write the image to a string
34 |             try:
35 |                 s = StringIO()
36 |             except:
37 |                 s = BytesIO()
38 |             scipy.misc.toimage(img).save(s, format="png")
39 | 
40 |             # Create an Image object
41 |             img_sum = tf.Summary.Image(
42 |                 encoded_image_string=s.getvalue(),
43 |                 height=img.shape[0],
44 |                 width=img.shape[1],
45 |             )
46 |             # Create a Summary value
47 |             img_summaries.append(
48 |                 tf.Summary.Value(tag="%s/%d" % (tag, i), image=img_sum)
49 |             )
50 | 
51 |         # Create and write Summary
52 |         summary = tf.Summary(value=img_summaries)
53 |         self.writer.add_summary(summary, step)
54 | 
55 |     def histo_summary(self, tag, values, step, bins=1000):
56 |         """Log a histogram of the tensor of values."""
57 | 
58 |         # Create a histogram using numpy
59 |         counts, bin_edges = np.histogram(values, bins=bins)
60 | 
61 |         # Fill the fields of the histogram proto
62 |         hist = tf.HistogramProto()
63 |         hist.min = float(np.min(values))
64 |         hist.max = float(np.max(values))
65 |         hist.num = int(np.prod(values.shape))
66 |         hist.sum = float(np.sum(values))
67 |         hist.sum_squares = float(np.sum(values**2))
68 | 
69 |         # Drop the start of the first bin
70 |         bin_edges = bin_edges[1:]
71 | 
72 |         # Add bin edges and counts
73 |         for edge in bin_edges:
74 |             hist.bucket_limit.append(edge)
75 |         for c in counts:
76 |             hist.bucket.append(c)
77 | 
78 |         # Create and write Summary
79 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
80 |         self.writer.add_summary(summary, step)
81 |         self.writer.flush()
82 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/attention/attention_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "attention_cuda_kernel.h"
 6 | 
 7 | void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, 
 8 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor)
 9 | {
10 |     const float *q = q_tensor.data_ptr<float>();
11 |     const float *k = k_tensor.data_ptr<float>();
12 |     const int *index0 = index0_tensor.data_ptr<int>();
13 |     const int *index1 = index1_tensor.data_ptr<int>();
14 |     float *attn = attn_tensor.data_ptr<float>();
15 |     attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn);
16 | }
17 | 
18 | void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
19 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
20 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
21 | {
22 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
23 |     const int *index0 = index0_tensor.data_ptr<int>();
24 |     const int *index1 = index1_tensor.data_ptr<int>();
25 |     const float *q = q_tensor.data_ptr<float>();
26 |     const float *k = k_tensor.data_ptr<float>();
27 |     float *grad_q = grad_q_tensor.data_ptr<float>();
28 |     float *grad_k = grad_k_tensor.data_ptr<float>();
29 |     attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k);
30 | }
31 | 
32 | void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
33 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
34 | {
35 |     const float *attn = attn_tensor.data_ptr<float>();
36 |     const float *v = v_tensor.data_ptr<float>();
37 |     const int *index0 = index0_tensor.data_ptr<int>();
38 |     const int *index1 = index1_tensor.data_ptr<int>();
39 |     float *output = output_tensor.data_ptr<float>();
40 |     attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output);
41 | }
42 | 
43 | 
44 | void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
45 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
46 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
47 | {
48 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
49 |     const int *index0 = index0_tensor.data_ptr<int>();
50 |     const int *index1 = index1_tensor.data_ptr<int>();
51 |     const float *attn = attn_tensor.data_ptr<float>();
52 |     const float *v = v_tensor.data_ptr<float>();
53 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
54 |     float *grad_v = grad_v_tensor.data_ptr<float>();
55 |     attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
56 | }
57 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/attention_v2/attention_cuda_v2.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "attention_cuda_kernel_v2.h"
 6 | 
 7 | void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, 
 8 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor)
 9 | {
10 |     const float *q = q_tensor.data_ptr<float>();
11 |     const float *k = k_tensor.data_ptr<float>();
12 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
13 |     const int *index1 = index1_tensor.data_ptr<int>();
14 |     float *attn = attn_tensor.data_ptr<float>();
15 |     attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn);
16 | }
17 | 
18 | void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, 
19 |     at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, 
20 |     at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
21 | {
22 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
23 |     const int *index0_offsets = index0_tensor_offsets.data_ptr<int>();
24 |     const int *index1 = index1_tensor.data_ptr<int>();
25 |     const float *q = q_tensor.data_ptr<float>();
26 |     const float *k = k_tensor.data_ptr<float>();
27 |     float *grad_q = grad_q_tensor.data_ptr<float>();
28 |     float *grad_k = grad_k_tensor.data_ptr<float>();
29 |     attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k);
30 | }
31 | 
32 | void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, 
33 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
34 | {
35 |     const float *attn = attn_tensor.data_ptr<float>();
36 |     const float *v = v_tensor.data_ptr<float>();
37 |     const int *index0 = index0_tensor.data_ptr<int>();
38 |     const int *index1 = index1_tensor.data_ptr<int>();
39 |     float *output = output_tensor.data_ptr<float>();
40 |     attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output);
41 | }
42 | 
43 | 
44 | void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, 
45 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, 
46 |     at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor)
47 | {
48 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
49 |     const int *index0 = index0_tensor.data_ptr<int>();
50 |     const int *index1 = index1_tensor.data_ptr<int>();
51 |     const float *attn = attn_tensor.data_ptr<float>();
52 |     const float *v = v_tensor.data_ptr<float>();
53 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
54 |     float *grad_v = grad_v_tensor.data_ptr<float>();
55 |     attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
56 | }
57 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_attention_op_step1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | 
  4 | torch.manual_seed(1)
  5 | 
  6 | M = 800000
  7 | N = 35000
  8 | C = 96
  9 | h = 6
 10 | query = torch.rand(N, h, C // h).cuda()
 11 | key = torch.rand(N, h, C // h).cuda()
 12 | 
 13 | index_0 = torch.rand(M)
 14 | index_0[index_0 < 0] = 0
 15 | index_0 = (index_0 * N).long().cuda()
 16 | 
 17 | index_1 = torch.rand(M)
 18 | index_1[index_1 < 0] = 0
 19 | index_1 = (index_1 * N).long().cuda()
 20 | 
 21 | query.requires_grad = True
 22 | key.requires_grad = True
 23 | 
 24 | # rearrange index for acceleration
 25 | index_0, indices = torch.sort(index_0)  # [M,]
 26 | index_1 = index_1[indices]  # [M,]
 27 | index_0_counts = index_0.bincount()
 28 | 
 29 | print("index_0_counts.shape: ", index_0_counts.shape)
 30 | 
 31 | n_max = index_0_counts.max()
 32 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 33 | 
 34 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 35 | 
 36 | index_0_offsets = torch.cat(
 37 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 38 | )  # [N+1]
 39 | 
 40 | # print("index_0[:100]: ", index_0[:100])
 41 | print("n_max: ", n_max)
 42 | print("index_0_offsets.shape: ", index_0_offsets.shape)
 43 | # input()
 44 | 
 45 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
 46 | print("index_1[300:320]: ", index_1[300:320])
 47 | 
 48 | 
 49 | attn_flat = pointops.attention_step1(
 50 |     query.float(), key.float(), index_0.int(), index_1.int()
 51 | )
 52 | # loss = attn_flat.sum()
 53 | # loss.backward()
 54 | print(
 55 |     "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format(
 56 |         attn_flat.shape, attn_flat[300:320, :10]
 57 |     )
 58 | )
 59 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 60 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 61 | # input()
 62 | 
 63 | print("query.is_contiguous(): ", query.is_contiguous())
 64 | print("key.is_contiguous(): ", key.is_contiguous())
 65 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
 66 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
 67 | 
 68 | attn_flat_v2 = pointops.attention_step1_v2(
 69 |     query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
 70 | )
 71 | # loss = attn_flat_v2.sum()
 72 | # loss.backward()
 73 | print(
 74 |     "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format(
 75 |         attn_flat_v2.shape, attn_flat_v2[300:320, :10]
 76 |     )
 77 | )
 78 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 79 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 80 | # input()
 81 | 
 82 | mask = attn_flat_v2.sum(-1) != 0
 83 | print("mask.sum(): ", mask.sum())
 84 | print(
 85 |     "attn_flat_v2[mask] - attn_flat[mask]: ",
 86 |     ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(),
 87 | )
 88 | 
 89 | 
 90 | print(
 91 |     "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
 92 |     ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
 93 | )
 94 | 
 95 | selected = 10000
 96 | print(
 97 |     "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
 98 |     torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
 99 | )
100 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #include "sampling.h"
 4 | #include "utils.h"
 5 | 
 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 7 |                                   const float *points, const int *idx,
 8 |                                   float *out);
 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
10 |                                        const float *grad_out, const int *idx,
11 |                                        float *grad_points);
12 | 
13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
14 |                                             const float *dataset, float *temp,
15 |                                             int *idxs);
16 | 
17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
18 |   CHECK_CONTIGUOUS(points);
19 |   CHECK_CONTIGUOUS(idx);
20 |   CHECK_IS_FLOAT(points);
21 |   CHECK_IS_INT(idx);
22 | 
23 |   if (points.is_cuda()) {
24 |     CHECK_CUDA(idx);
25 |   }
26 | 
27 |   at::Tensor output =
28 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
29 |                    at::device(points.device()).dtype(at::ScalarType::Float));
30 | 
31 |   if (points.is_cuda()) {
32 |     gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
33 |                                  idx.size(1), points.data<float>(),
34 |                                  idx.data<int>(), output.data<float>());
35 |   } else {
36 |     AT_ASSERT(false, "CPU not supported");
37 |   }
38 | 
39 |   return output;
40 | }
41 | 
42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
43 |                               const int n) {
44 |   CHECK_CONTIGUOUS(grad_out);
45 |   CHECK_CONTIGUOUS(idx);
46 |   CHECK_IS_FLOAT(grad_out);
47 |   CHECK_IS_INT(idx);
48 | 
49 |   if (grad_out.is_cuda()) {
50 |     CHECK_CUDA(idx);
51 |   }
52 | 
53 |   at::Tensor output =
54 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
55 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
56 | 
57 |   if (grad_out.is_cuda()) {
58 |     gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
59 |                                       idx.size(1), grad_out.data<float>(),
60 |                                       idx.data<int>(), output.data<float>());
61 |   } else {
62 |     AT_ASSERT(false, "CPU not supported");
63 |   }
64 | 
65 |   return output;
66 | }
67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
68 |   CHECK_CONTIGUOUS(points);
69 |   CHECK_IS_FLOAT(points);
70 | 
71 |   at::Tensor output =
72 |       torch::zeros({points.size(0), nsamples},
73 |                    at::device(points.device()).dtype(at::ScalarType::Int));
74 | 
75 |   at::Tensor tmp =
76 |       torch::full({points.size(0), points.size(1)}, 1e10,
77 |                   at::device(points.device()).dtype(at::ScalarType::Float));
78 | 
79 |   if (points.is_cuda()) {
80 |     furthest_point_sampling_kernel_wrapper(
81 |         points.size(0), points.size(1), nsamples, points.data<float>(),
82 |         tmp.data<float>(), output.data<int>());
83 |   } else {
84 |     AT_ASSERT(false, "CPU not supported");
85 |   }
86 | 
87 |   return output;
88 | }
89 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #include "cuda_utils.h"
 8 | 
 9 | // input: points(b, c, n) idx(b, npoints, nsample)
10 | // output: out(b, c, npoints, nsample)
11 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
12 |                                     int nsample,
13 |                                     const float *__restrict__ points,
14 |                                     const int *__restrict__ idx,
15 |                                     float *__restrict__ out) {
16 |   int batch_index = blockIdx.x;
17 |   points += batch_index * n * c;
18 |   idx += batch_index * npoints * nsample;
19 |   out += batch_index * npoints * nsample * c;
20 | 
21 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
22 |   const int stride = blockDim.y * blockDim.x;
23 |   for (int i = index; i < c * npoints; i += stride) {
24 |     const int l = i / npoints;
25 |     const int j = i % npoints;
26 |     for (int k = 0; k < nsample; ++k) {
27 |       int ii = idx[j * nsample + k];
28 |       out[(l * npoints + j) * nsample + k] = points[l * n + ii];
29 |     }
30 |   }
31 | }
32 | 
33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
34 |                                  const float *points, const int *idx,
35 |                                  float *out) {
36 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
37 | 
38 |   group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
39 |       b, c, n, npoints, nsample, points, idx, out);
40 | 
41 |   CUDA_CHECK_ERRORS();
42 | }
43 | 
44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
45 | // output: grad_points(b, c, n)
46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
47 |                                          int nsample,
48 |                                          const float *__restrict__ grad_out,
49 |                                          const int *__restrict__ idx,
50 |                                          float *__restrict__ grad_points) {
51 |   int batch_index = blockIdx.x;
52 |   grad_out += batch_index * npoints * nsample * c;
53 |   idx += batch_index * npoints * nsample;
54 |   grad_points += batch_index * n * c;
55 | 
56 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
57 |   const int stride = blockDim.y * blockDim.x;
58 |   for (int i = index; i < c * npoints; i += stride) {
59 |     const int l = i / npoints;
60 |     const int j = i % npoints;
61 |     for (int k = 0; k < nsample; ++k) {
62 |       int ii = idx[j * nsample + k];
63 |       atomicAdd(grad_points + l * n + ii,
64 |                 grad_out[(l * npoints + j) * nsample + k]);
65 |     }
66 |   }
67 | }
68 | 
69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
70 |                                       int nsample, const float *grad_out,
71 |                                       const int *idx, float *grad_points) {
72 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
73 | 
74 |   group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
75 |       b, c, n, npoints, nsample, grad_out, idx, grad_points);
76 | 
77 |   CUDA_CHECK_ERRORS();
78 | }
79 | 


--------------------------------------------------------------------------------
/utils/votenet_utils/nn_distance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """ Chamfer distance in Pytorch.
 7 | Author: Charles R. Qi
 8 | """
 9 | 
10 | import torch
11 | import numpy as np
12 | 
13 | 
14 | def huber_loss(error, delta=1.0):
15 |     """
16 |     Args:
17 |         error: Torch tensor (d1,d2,...,dk)
18 |     Returns:
19 |         loss: Torch tensor (d1,d2,...,dk)
20 | 
21 |     x = error = pred - gt or dist(pred,gt)
22 |     0.5 * |x|^2                 if |x|<=d
23 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
24 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
25 |     """
26 |     abs_error = torch.abs(error)
27 |     # quadratic = torch.min(abs_error, torch.FloatTensor([delta]))
28 |     quadratic = torch.clamp(abs_error, max=delta)
29 |     linear = abs_error - quadratic
30 |     loss = 0.5 * quadratic**2 + delta * linear
31 |     return loss
32 | 
33 | 
34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False):
35 |     """
36 |     Input:
37 |         pc1: (B,N,C) torch tensor
38 |         pc2: (B,M,C) torch tensor
39 |         l1smooth: bool, whether to use l1smooth loss
40 |         delta: scalar, the delta used in l1smooth loss
41 |     Output:
42 |         dist1: (B,N) torch float32 tensor
43 |         idx1: (B,N) torch int64 tensor
44 |         dist2: (B,M) torch float32 tensor
45 |         idx2: (B,M) torch int64 tensor
46 |     """
47 |     N = pc1.shape[1]
48 |     M = pc2.shape[1]
49 |     pc1_expand_tile = pc1.unsqueeze(2).repeat(1, 1, M, 1)
50 |     pc2_expand_tile = pc2.unsqueeze(1).repeat(1, N, 1, 1)
51 |     pc_diff = pc1_expand_tile - pc2_expand_tile
52 | 
53 |     if l1smooth:
54 |         pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1)  # (B,N,M)
55 |     elif l1:
56 |         pc_dist = torch.sum(torch.abs(pc_diff), dim=-1)  # (B,N,M)
57 |     else:
58 |         pc_dist = torch.sum(pc_diff**2, dim=-1)  # (B,N,M)
59 |     dist1, idx1 = torch.min(pc_dist, dim=2)  # (B,N)
60 |     dist2, idx2 = torch.min(pc_dist, dim=1)  # (B,M)
61 |     return dist1, idx1, dist2, idx2
62 | 
63 | 
64 | def demo_nn_distance():
65 |     np.random.seed(0)
66 |     pc1arr = np.random.random((1, 5, 3))
67 |     pc2arr = np.random.random((1, 6, 3))
68 |     pc1 = torch.from_numpy(pc1arr.astype(np.float32))
69 |     pc2 = torch.from_numpy(pc2arr.astype(np.float32))
70 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2)
71 |     print(dist1)
72 |     print(idx1)
73 |     dist = np.zeros((5, 6))
74 |     for i in range(5):
75 |         for j in range(6):
76 |             dist[i, j] = np.sum((pc1arr[0, i, :] - pc2arr[0, j, :]) ** 2)
77 |     print(dist)
78 |     print("-" * 30)
79 |     print("L1smooth dists:")
80 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True)
81 |     print(dist1)
82 |     print(idx1)
83 |     dist = np.zeros((5, 6))
84 |     for i in range(5):
85 |         for j in range(6):
86 |             error = np.abs(pc1arr[0, i, :] - pc2arr[0, j, :])
87 |             quad = np.minimum(error, 1.0)
88 |             linear = error - quad
89 |             loss = 0.5 * quad**2 + 1.0 * linear
90 |             dist[i, j] = np.sum(loss)
91 |     print(dist)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     demo_nn_distance()
96 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | 
  4 | torch.manual_seed(1)
  5 | 
  6 | M = 80000
  7 | N = 3500
  8 | # M = 80
  9 | # N = 5
 10 | hdim = 16
 11 | h = 6
 12 | L = 31
 13 | query = torch.rand(N, h, hdim).cuda()
 14 | table_q = torch.rand(L, h, hdim, 3).cuda()
 15 | key = torch.rand(N, h, hdim).cuda()
 16 | table_k = torch.rand(L, h, hdim, 3).cuda()
 17 | 
 18 | index_q = torch.rand(M)
 19 | index_q[index_q < 0] = 0
 20 | index_q = (index_q * N).long().cuda()
 21 | 
 22 | index_k = torch.rand(M)
 23 | index_k[index_k < 0] = 0
 24 | index_k = (index_k * N).long().cuda()
 25 | 
 26 | rel_index = torch.rand(M, 3)
 27 | rel_index[rel_index < 0] = 0
 28 | rel_index = (rel_index * L).long().cuda()
 29 | 
 30 | 
 31 | # rearrange index for acceleration
 32 | index_q, indices = torch.sort(index_q)  # [M,]
 33 | index_k = index_k[indices]  # [M,]
 34 | rel_index = rel_index[indices]
 35 | index_q_counts = index_q.bincount()
 36 | 
 37 | print("index_q_counts.shape: ", index_q_counts.shape)
 38 | 
 39 | n_max = index_q_counts.max()
 40 | index_q_offsets = index_q_counts.cumsum(dim=-1)  # [N]
 41 | 
 42 | print("v1 index_q_offsets.shape: ", index_q_offsets.shape)
 43 | 
 44 | index_q_offsets = torch.cat(
 45 |     [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0
 46 | )  # [N+1]
 47 | 
 48 | # print("index_q[:100]: ", index_q[:100])
 49 | print("n_max: ", n_max)
 50 | print("index_q_offsets.shape: ", index_q_offsets.shape)
 51 | # input()
 52 | 
 53 | print("index_q_offsets[:100]: ", index_q_offsets[:100])
 54 | print("index_k[:20]: ", index_k[:20])
 55 | 
 56 | query.requires_grad = True
 57 | table_q.requires_grad = True
 58 | key.requires_grad = True
 59 | table_k.requires_grad = True
 60 | 
 61 | output1 = pointops.dot_prod_with_idx(
 62 |     query, index_q.int(), table_q, rel_index.int()
 63 | )
 64 | output2 = pointops.dot_prod_with_idx(
 65 |     key, index_k.int(), table_k, rel_index.int()
 66 | )
 67 | output = output1 + output2
 68 | loss = output.mean()
 69 | loss.backward()
 70 | 
 71 | # print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
 72 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 73 | # print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
 74 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 75 | # print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
 76 | # input()
 77 | 
 78 | # print("query.is_contiguous(): ", query.is_contiguous())
 79 | # print("key.is_contiguous(): ", key.is_contiguous())
 80 | # print("index_q.is_contiguous(): ", index_q.is_contiguous())
 81 | # print("index_k.is_contiguous(): ", index_k.is_contiguous())
 82 | 
 83 | output_v2 = pointops.dot_prod_with_idx_v3(
 84 |     query,
 85 |     index_q_offsets.int(),
 86 |     n_max,
 87 |     key,
 88 |     index_k.int(),
 89 |     table_q,
 90 |     table_k,
 91 |     rel_index.int(),
 92 | )
 93 | # loss = output_v2.mean()
 94 | # loss.backward()
 95 | 
 96 | # print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
 97 | # print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 98 | # print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
 99 | # print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
100 | # print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
101 | # input()
102 | 
103 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
104 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | 
  4 | torch.manual_seed(1)
  5 | 
  6 | M = 80000
  7 | N = 3500
  8 | hdim = 16
  9 | h = 6
 10 | L = 31
 11 | attn = torch.rand(M, h).cuda()
 12 | v = torch.rand(N, h, hdim).cuda()
 13 | table = torch.rand(L, h, hdim, 3).cuda()
 14 | 
 15 | index_0 = torch.rand(M)
 16 | index_0[index_0 < 0] = 0
 17 | index_0 = (index_0 * N).long().cuda()
 18 | 
 19 | index_1 = torch.rand(M)
 20 | index_1[index_1 < 0] = 0
 21 | index_1 = (index_1 * N).long().cuda()
 22 | 
 23 | rel_index = torch.rand(M, 3)
 24 | rel_index[rel_index < 0] = 0
 25 | rel_index = (rel_index * L).long().cuda()
 26 | 
 27 | 
 28 | # rearrange index for acceleration
 29 | index_0, indices = torch.sort(index_0)  # [M,]
 30 | index_1 = index_1[indices]  # [M,]
 31 | rel_index = rel_index[indices]
 32 | index_0_counts = index_0.bincount()
 33 | 
 34 | print("index_0_counts.shape: ", index_0_counts.shape)
 35 | 
 36 | n_max = index_0_counts.max()
 37 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 38 | 
 39 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 40 | 
 41 | index_0_offsets = torch.cat(
 42 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 43 | )  # [N+1]
 44 | 
 45 | 
 46 | attn.requires_grad = True
 47 | v.requires_grad = True
 48 | table.requires_grad = True
 49 | 
 50 | 
 51 | output = pointops.attention_step2_with_rel_pos_value(
 52 |     attn, v, index_0.int(), index_1.int(), table, rel_index.int()
 53 | )
 54 | loss = output.mean()
 55 | loss.backward()
 56 | 
 57 | print(
 58 |     "output.shape: {}, output[:5,:10,:5]: {}".format(
 59 |         output.shape, output[:5, :10, :5]
 60 |     )
 61 | )
 62 | print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
 63 | print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
 64 | print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
 65 | # input()
 66 | 
 67 | attn_grad = attn.grad.clone()
 68 | v_grad = v.grad.clone()
 69 | table_grad = table.grad.clone()
 70 | 
 71 | attn.grad.zero_()
 72 | v.grad.zero_()
 73 | table.grad.zero_()
 74 | 
 75 | # print("query.is_contiguous(): ", query.is_contiguous())
 76 | # print("key.is_contiguous(): ", key.is_contiguous())
 77 | # print("index_0.is_contiguous(): ", index_0.is_contiguous())
 78 | # print("index_1.is_contiguous(): ", index_1.is_contiguous())
 79 | 
 80 | output_v2 = pointops.attention_step2_with_rel_pos_value_v2(
 81 |     attn,
 82 |     v,
 83 |     index_0_offsets.int(),
 84 |     n_max,
 85 |     index_1.int(),
 86 |     table,
 87 |     rel_index.int(),
 88 | )
 89 | loss = output_v2.mean()
 90 | loss.backward()
 91 | 
 92 | print(
 93 |     "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(
 94 |         output_v2.shape, output_v2[:5, :10, :5]
 95 |     )
 96 | )
 97 | print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
 98 | print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
 99 | print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
100 | # input()
101 | 
102 | print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
103 | 
104 | print(
105 |     "((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max()
106 | )
107 | 
108 | print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max())
109 | 
110 | print(
111 |     "((table_grad-table.grad)**2).max(): ",
112 |     ((table_grad - table.grad) ** 2).max(),
113 | )
114 | 
115 | # print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
116 | 


--------------------------------------------------------------------------------
/utils/kfold.py:
--------------------------------------------------------------------------------
 1 | """ Author: https://github.com/yk-szk/stratified_group_kfold """
 2 | import random
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class StratifiedGroupKFold:
 8 |     """
 9 |     Stratified Group K-fold with sklearn.model_selection.KFold compabitility.
10 | 
11 |     Split dataset into k folds with balanced label distribution (stratified) and non-overlapping group.
12 | 
13 |     Args:
14 |         n_splits (int): # of splits
15 |         shuffle (bool): Shuffle
16 |         seed (int): Seed value for random number generator
17 |     """
18 | 
19 |     def __init__(self, n_splits, shuffle=True, random_state=None):
20 |         self.n_splits = n_splits
21 |         self.shuffle = shuffle
22 |         self.seed = random_state
23 | 
24 |     def split(self, X, labels, groups):
25 |         assert len(X) == len(labels) == len(groups), "Invalid input length"
26 |         assert (
27 |             len(set(groups)) >= self.n_splits
28 |         ), "The number of groups needs to be larger than n_splits"
29 | 
30 |         def encode(v):
31 |             s = set(v)
32 |             d = {l: i for i, l in enumerate(s)}
33 |             return [d[e] for e in v]
34 | 
35 |         labels, groups = encode(labels), encode(groups)
36 |         num_labels, num_groups = max(labels) + 1, max(groups) + 1
37 |         label_counts_per_group = np.zeros((num_groups, num_labels), dtype=int)
38 |         global_label_dist = np.bincount(labels)
39 |         for label, g in zip(labels, groups):
40 |             label_counts_per_group[g][label] += 1
41 | 
42 |         label_counts_per_fold = np.zeros(
43 |             (self.n_splits, num_labels), dtype=int
44 |         )
45 |         groups_per_fold = [set() for _ in range(self.n_splits)]
46 | 
47 |         def eval_label_counts_per_fold(y_counts, fold):
48 |             fold += y_counts
49 |             std_per_label = (
50 |                 np.std(label_counts_per_fold, axis=0) / global_label_dist
51 |             )
52 |             fold -= y_counts
53 |             return np.mean(std_per_label)
54 | 
55 |         groups_and_label_counts = list(enumerate(label_counts_per_group))
56 |         if self.shuffle:
57 |             rng = random.Random(self.seed)
58 |             mean_std = np.mean(np.std(label_counts_per_group, axis=1))
59 |             groups_and_label_counts.sort(
60 |                 key=lambda g_counts: -np.std(g_counts[1])
61 |                 + rng.gauss(0, mean_std)
62 |             )  # add rng.gauss to increase the randomness
63 |         else:
64 |             groups_and_label_counts.sort(
65 |                 key=lambda g_counts: -np.std(g_counts[1])
66 |             )
67 | 
68 |         for g, label_counts in groups_and_label_counts:
69 |             evals = [
70 |                 eval_label_counts_per_fold(
71 |                     label_counts, label_counts_per_fold[i]
72 |                 )
73 |                 for i in range(self.n_splits)
74 |             ]
75 |             best_fold = np.argmin(evals)
76 |             label_counts_per_fold[best_fold] += label_counts
77 |             groups_per_fold[best_fold].add(g)
78 | 
79 |         all_groups = set(groups)
80 |         for test_groups in groups_per_fold:
81 |             train_groups = all_groups - test_groups
82 | 
83 |             train_indices = [
84 |                 i for i, g in enumerate(groups) if g in train_groups
85 |             ]
86 |             test_indices = [
87 |                 i for i, g in enumerate(groups) if g in test_groups
88 |             ]
89 | 
90 |             yield train_indices, test_indices
91 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/aggregation/aggregation_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include "../cuda_utils.h"
 2 | #include "aggregation_cuda_kernel.h"
 3 | 
 4 | 
 5 | __global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
 6 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
 7 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 8 |     if (index >= n * c) return;
 9 |     const int c_idx = index % c;
10 |     const int n_idx = index / c;
11 |     const int w_c_idx = c_idx % w_c;
12 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
13 |     {   
14 |         int idx_idx = n_idx * nsample + nsample_idx;
15 |         int input_idx = idx[idx_idx] * c + c_idx;
16 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
17 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
18 |         output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
19 |     }
20 | }
21 | 
22 | __global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
23 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
24 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
25 |     if (index >= n * c) return;
26 |     const int c_idx = index % c;
27 |     const int n_idx = index / c;
28 |     const int w_c_idx = c_idx % w_c;
29 |     for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
30 |     {   
31 |         int idx_idx = n_idx * nsample + nsample_idx;
32 |         int input_idx = idx[idx_idx] * c + c_idx;
33 |         int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
34 |         int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
35 |         atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
36 |         grad_position[position_idx] = grad_output[index] * weight[weight_idx];
37 |         atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
38 |     }
39 | }
40 | 
41 | void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
42 |     // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
43 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
44 |     dim3 threads(THREADS_PER_BLOCK);
45 |     aggregation_forward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, output);
46 | }
47 | 
48 | void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {  
49 |     // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
50 |     dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
51 |     dim3 threads(THREADS_PER_BLOCK);
52 |     aggregation_backward_cuda_kernel<<<blocks, threads, 0>>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
53 | }
54 | 


--------------------------------------------------------------------------------
/datasets/random_cuboid.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import numpy as np
 3 | 
 4 | 
 5 | def check_aspect(crop_range, aspect_min):
 6 |     xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2])
 7 |     xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]])
 8 |     yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:])
 9 |     return (
10 |         (xy_aspect >= aspect_min)
11 |         or (xz_aspect >= aspect_min)
12 |         or (yz_aspect >= aspect_min)
13 |     )
14 | 
15 | 
16 | class RandomCuboid(object):
17 |     """
18 |     RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691]
19 |     We slightly modify this operation to account for object detection.
20 |     This augmentation randomly crops a cuboid from the input and
21 |     ensures that the cropped cuboid contains at least one bounding box
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         min_points,
27 |         # aspect=0.8,
28 |         crop_length=6.0,
29 |         version1=True,
30 |     ):
31 |         # self.aspect = aspect
32 |         self.crop_length = crop_length
33 |         self.min_points = min_points
34 |         self.version1 = version1
35 | 
36 |     def __call__(self, point_cloud):
37 |         if point_cloud.shape[0] < self.min_points:
38 |             print("too small pcd")
39 |             return np.ones(point_cloud.shape[0], dtype=np.bool)
40 | 
41 |         range_xyz = np.max(point_cloud[:, :2], axis=0) - np.min(
42 |             point_cloud[:, :2], axis=0
43 |         )
44 | 
45 |         for _ in range(100):
46 |             # crop_range = self.min_crop + np.random.rand(3) * (
47 |             #    self.max_crop - self.min_crop
48 |             # )
49 |             # crop_range[-1] = 999.
50 |             # if not check_aspect(crop_range, self.aspect):
51 |             #     continue
52 | 
53 |             sample_center = point_cloud[:, :2].min(axis=0) + range_xyz / 2
54 | 
55 |             if self.version1:
56 |                 offset_x = np.random.uniform(
57 |                     -range_xyz[0] / 4, range_xyz[0] / 4
58 |                 )
59 |                 offset_y = np.random.uniform(
60 |                     -range_xyz[1] / 4, range_xyz[1] / 4
61 |                 )
62 |             else:
63 |                 offset_x = np.random.uniform(
64 |                     -(range_xyz[0] / 2) + self.crop_length / 4,
65 |                     +(range_xyz[0] / 2) - self.crop_length / 4,
66 |                 )
67 |                 offset_y = np.random.uniform(
68 |                     -(range_xyz[1] / 2) + self.crop_length / 4,
69 |                     +(range_xyz[1] / 2) - self.crop_length / 4,
70 |                 )
71 | 
72 |             sample_center[0] = sample_center[0] + offset_x
73 |             sample_center[1] = sample_center[1] + offset_y
74 | 
75 |             min_xy = sample_center - self.crop_length / 2
76 |             max_xy = sample_center + self.crop_length / 2
77 | 
78 |             upper_idx = (
79 |                 np.sum((point_cloud[:, :2] <= max_xy).astype(np.int32), 1) == 2
80 |             )
81 |             lower_idx = (
82 |                 np.sum((point_cloud[:, :2] >= min_xy).astype(np.int32), 1) == 2
83 |             )
84 | 
85 |             new_pointidx = (upper_idx) & (lower_idx)
86 | 
87 |             if np.sum(new_pointidx) < self.min_points:
88 |                 print("TOO SMALL")
89 |                 continue
90 | 
91 |             return new_pointidx
92 | 
93 |         # fallback
94 |         print("FALLBACK")
95 |         return np.ones(point_cloud.shape[0], dtype=np.bool)
96 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/rpe/relative_pos_encoding_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <THC/THC.h>
 3 | #include <torch/serialize/tensor.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include "relative_pos_encoding_cuda_kernel.h"
 6 | 
 7 | void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, 
 8 |     at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
 9 | {
10 |     const float *q = q_tensor.data_ptr<float>();
11 |     const float *table = table_tensor.data_ptr<float>();
12 |     const int *index = index_tensor.data_ptr<int>();
13 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
14 |     float *output = output_tensor.data_ptr<float>();
15 |     dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output);
16 | }
17 | 
18 | void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
19 |     at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, 
20 |     at::Tensor grad_q_tensor, at::Tensor grad_table_tensor)
21 | {
22 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
23 |     const float *q = q_tensor.data_ptr<float>();
24 |     const int *index = index_tensor.data_ptr<int>();
25 |     const float *table = table_tensor.data_ptr<float>();
26 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
27 |     float *grad_q = grad_q_tensor.data_ptr<float>();
28 |     float *grad_table = grad_table_tensor.data_ptr<float>();
29 |     dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table);
30 | }
31 | 
32 | void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, 
33 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor)
34 | {
35 |     const float *attn = attn_tensor.data_ptr<float>();
36 |     const float *v = v_tensor.data_ptr<float>();
37 |     const int *index0 = index0_tensor.data_ptr<int>();
38 |     const int *index1 = index1_tensor.data_ptr<int>();
39 |     const float *table = table_tensor.data_ptr<float>();
40 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
41 |     float *output = output_tensor.data_ptr<float>();
42 |     attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output);
43 | }
44 | 
45 | void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, 
46 |     at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor,
47 |     at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor)
48 | {
49 |     const float *grad_out = grad_out_tensor.data_ptr<float>();
50 |     const int *index0 = index0_tensor.data_ptr<int>();
51 |     const int *index1 = index1_tensor.data_ptr<int>();
52 |     const float *attn = attn_tensor.data_ptr<float>();
53 |     const float *v = v_tensor.data_ptr<float>();
54 |     const float *table = table_tensor.data_ptr<float>();
55 |     const int *rel_idx = rel_idx_tensor.data_ptr<int>();
56 |     float *grad_attn = grad_attn_tensor.data_ptr<float>();
57 |     float *grad_v = grad_v_tensor.data_ptr<float>();
58 |     float *grad_table = grad_table_tensor.data_ptr<float>();
59 |     attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table);
60 | }
61 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/knnquery/knnquery_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "knnquery_cuda_kernel.h"
  3 | 
  4 | 
  5 | __device__ void swap_float(float *x, float *y)
  6 | {
  7 |     float tmp = *x;
  8 |     *x = *y;
  9 |     *y = tmp;
 10 | }
 11 | 
 12 | 
 13 | __device__ void swap_int(int *x, int *y)
 14 | {
 15 |     int tmp = *x;
 16 |     *x = *y;
 17 |     *y = tmp;
 18 | }
 19 | 
 20 | 
 21 | __device__ void reheap(float *dist, int *idx, int k)
 22 | {
 23 |     int root = 0;
 24 |     int child = root * 2 + 1;
 25 |     while (child < k)
 26 |     {
 27 |         if(child + 1 < k && dist[child+1] > dist[child])
 28 |             child++;
 29 |         if(dist[root] > dist[child])
 30 |             return;
 31 |         swap_float(&dist[root], &dist[child]);
 32 |         swap_int(&idx[root], &idx[child]);
 33 |         root = child;
 34 |         child = root * 2 + 1;
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | __device__ void heap_sort(float *dist, int *idx, int k)
 40 | {
 41 |     int i;
 42 |     for (i = k - 1; i > 0; i--)
 43 |     {
 44 |         swap_float(&dist[0], &dist[i]);
 45 |         swap_int(&idx[0], &idx[i]);
 46 |         reheap(dist, idx, i);
 47 |     }
 48 | }
 49 | 
 50 | 
 51 | __device__ int get_bt_idx(int idx, const int *offset)
 52 | {
 53 |     int i = 0;
 54 |     while (1)
 55 |     {
 56 |         if (idx < offset[i])
 57 |             break;
 58 |         else
 59 |             i++;
 60 |     }
 61 |     return i;
 62 | }
 63 | 
 64 | 
 65 | __global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
 66 |     // input: xyz (n, 3) new_xyz (m, 3)
 67 |     // output: idx (m, nsample) dist2 (m, nsample)
 68 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 69 |     if (pt_idx >= m) return;
 70 | 
 71 |     new_xyz += pt_idx * 3;
 72 |     idx += pt_idx * nsample;
 73 |     dist2 += pt_idx * nsample;
 74 |     int bt_idx = get_bt_idx(pt_idx, new_offset);
 75 |     int start;
 76 |     if (bt_idx == 0)
 77 |         start = 0;
 78 |     else
 79 |         start = offset[bt_idx - 1];
 80 |     int end = offset[bt_idx];
 81 | 
 82 |     float new_x = new_xyz[0];
 83 |     float new_y = new_xyz[1];
 84 |     float new_z = new_xyz[2];
 85 | 
 86 |     float best_dist[100];
 87 |     int best_idx[100];
 88 |     for(int i = 0; i < nsample; i++){
 89 |         best_dist[i] = 1e10;
 90 |         best_idx[i] = start;
 91 |     }
 92 |     for(int i = start; i < end; i++){
 93 |         float x = xyz[i * 3 + 0];
 94 |         float y = xyz[i * 3 + 1];
 95 |         float z = xyz[i * 3 + 2];
 96 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
 97 |         if (d2 < best_dist[0]){
 98 |             best_dist[0] = d2;
 99 |             best_idx[0] = i;
100 |             reheap(best_dist, best_idx, nsample);
101 |         }
102 |     }
103 |     heap_sort(best_dist, best_idx, nsample);
104 |     for(int i = 0; i < nsample; i++){
105 |         idx[i] = best_idx[i];
106 |         dist2[i] = best_dist[i];
107 |     }
108 | }
109 | 
110 | 
111 | void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
112 |     // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
113 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
114 |     dim3 threads(THREADS_PER_BLOCK);
115 |     knnquery_cuda_kernel<<<blocks, threads, 0>>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
116 | }
117 | 


--------------------------------------------------------------------------------
/models/modules/3detr_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | from functools import partial
  4 | 
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | class BatchNormDim1Swap(nn.BatchNorm1d):
  9 |     """
 10 |     Used for nn.Transformer that uses a HW x N x C rep
 11 |     """
 12 | 
 13 |     def forward(self, x):
 14 |         """
 15 |         x: HW x N x C
 16 |         permute to N x C x HW
 17 |         Apply BN on C
 18 |         permute back
 19 |         """
 20 |         hw, n, c = x.shape
 21 |         x = x.permute(1, 2, 0)
 22 |         x = super(BatchNormDim1Swap, self).forward(x)
 23 |         # x: n x c x hw -> hw x n x c
 24 |         x = x.permute(2, 0, 1)
 25 |         return x
 26 | 
 27 | 
 28 | NORM_DICT = {
 29 |     "bn": BatchNormDim1Swap,
 30 |     "bn1d": nn.BatchNorm1d,
 31 |     "id": nn.Identity,
 32 |     "ln": nn.LayerNorm,
 33 | }
 34 | 
 35 | ACTIVATION_DICT = {
 36 |     "relu": nn.ReLU,
 37 |     "gelu": nn.GELU,
 38 |     "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1),
 39 | }
 40 | 
 41 | WEIGHT_INIT_DICT = {
 42 |     "xavier_uniform": nn.init.xavier_uniform_,
 43 | }
 44 | 
 45 | 
 46 | class GenericMLP(nn.Module):
 47 |     def __init__(
 48 |         self,
 49 |         input_dim,
 50 |         hidden_dims,
 51 |         output_dim,
 52 |         norm_fn_name=None,
 53 |         activation="relu",
 54 |         use_conv=False,
 55 |         dropout=None,
 56 |         hidden_use_bias=False,
 57 |         output_use_bias=True,
 58 |         output_use_activation=False,
 59 |         output_use_norm=False,
 60 |         weight_init_name=None,
 61 |     ):
 62 |         super().__init__()
 63 |         activation = ACTIVATION_DICT[activation]
 64 |         norm = None
 65 |         if norm_fn_name is not None:
 66 |             norm = NORM_DICT[norm_fn_name]
 67 |         if norm_fn_name == "ln" and use_conv:
 68 |             norm = lambda x: nn.GroupNorm(1, x)  # easier way to use LayerNorm
 69 | 
 70 |         if dropout is not None:
 71 |             if not isinstance(dropout, list):
 72 |                 dropout = [dropout for _ in range(len(hidden_dims))]
 73 | 
 74 |         layers = []
 75 |         prev_dim = input_dim
 76 |         for idx, x in enumerate(hidden_dims):
 77 |             if use_conv:
 78 |                 layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias)
 79 |             else:
 80 |                 layer = nn.Linear(prev_dim, x, bias=hidden_use_bias)
 81 |             layers.append(layer)
 82 |             if norm:
 83 |                 layers.append(norm(x))
 84 |             layers.append(activation())
 85 |             if dropout is not None:
 86 |                 layers.append(nn.Dropout(p=dropout[idx]))
 87 |             prev_dim = x
 88 |         if use_conv:
 89 |             layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias)
 90 |         else:
 91 |             layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias)
 92 |         layers.append(layer)
 93 | 
 94 |         if output_use_norm:
 95 |             layers.append(norm(output_dim))
 96 | 
 97 |         if output_use_activation:
 98 |             layers.append(activation())
 99 | 
100 |         self.layers = nn.Sequential(*layers)
101 | 
102 |         if weight_init_name is not None:
103 |             self.do_weight_init(weight_init_name)
104 | 
105 |     def do_weight_init(self, weight_init_name):
106 |         func = WEIGHT_INIT_DICT[weight_init_name]
107 |         for (_, param) in self.named_parameters():
108 |             if param.dim() > 1:  # skips batchnorm/layernorm
109 |                 func(param)
110 | 
111 |     def forward(self, x):
112 |         output = self.layers(x)
113 |         return output
114 | 
115 | 
116 | def get_clones(module, N):
117 |     return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
118 | 


--------------------------------------------------------------------------------
/models/modules/helpers_3detr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import copy
  3 | from functools import partial
  4 | 
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | class BatchNormDim1Swap(nn.BatchNorm1d):
  9 |     """
 10 |     Used for nn.Transformer that uses a HW x N x C rep
 11 |     """
 12 | 
 13 |     def forward(self, x):
 14 |         """
 15 |         x: HW x N x C
 16 |         permute to N x C x HW
 17 |         Apply BN on C
 18 |         permute back
 19 |         """
 20 |         hw, n, c = x.shape
 21 |         x = x.permute(1, 2, 0)
 22 |         x = super(BatchNormDim1Swap, self).forward(x)
 23 |         # x: n x c x hw -> hw x n x c
 24 |         x = x.permute(2, 0, 1)
 25 |         return x
 26 | 
 27 | 
 28 | NORM_DICT = {
 29 |     "bn": BatchNormDim1Swap,
 30 |     "bn1d": nn.BatchNorm1d,
 31 |     "id": nn.Identity,
 32 |     "ln": nn.LayerNorm,
 33 | }
 34 | 
 35 | ACTIVATION_DICT = {
 36 |     "relu": nn.ReLU,
 37 |     "gelu": nn.GELU,
 38 |     "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1),
 39 | }
 40 | 
 41 | WEIGHT_INIT_DICT = {
 42 |     "xavier_uniform": nn.init.xavier_uniform_,
 43 | }
 44 | 
 45 | 
 46 | class GenericMLP(nn.Module):
 47 |     def __init__(
 48 |         self,
 49 |         input_dim,
 50 |         hidden_dims,
 51 |         output_dim,
 52 |         norm_fn_name=None,
 53 |         activation="relu",
 54 |         use_conv=False,
 55 |         dropout=None,
 56 |         hidden_use_bias=False,
 57 |         output_use_bias=True,
 58 |         output_use_activation=False,
 59 |         output_use_norm=False,
 60 |         weight_init_name=None,
 61 |     ):
 62 |         super().__init__()
 63 |         activation = ACTIVATION_DICT[activation]
 64 |         norm = None
 65 |         if norm_fn_name is not None:
 66 |             norm = NORM_DICT[norm_fn_name]
 67 |         if norm_fn_name == "ln" and use_conv:
 68 |             norm = lambda x: nn.GroupNorm(1, x)  # easier way to use LayerNorm
 69 | 
 70 |         if dropout is not None:
 71 |             if not isinstance(dropout, list):
 72 |                 dropout = [dropout for _ in range(len(hidden_dims))]
 73 | 
 74 |         layers = []
 75 |         prev_dim = input_dim
 76 |         for idx, x in enumerate(hidden_dims):
 77 |             if use_conv:
 78 |                 layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias)
 79 |             else:
 80 |                 layer = nn.Linear(prev_dim, x, bias=hidden_use_bias)
 81 |             layers.append(layer)
 82 |             if norm:
 83 |                 layers.append(norm(x))
 84 |             layers.append(activation())
 85 |             if dropout is not None:
 86 |                 layers.append(nn.Dropout(p=dropout[idx]))
 87 |             prev_dim = x
 88 |         if use_conv:
 89 |             layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias)
 90 |         else:
 91 |             layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias)
 92 |         layers.append(layer)
 93 | 
 94 |         if output_use_norm:
 95 |             layers.append(norm(output_dim))
 96 | 
 97 |         if output_use_activation:
 98 |             layers.append(activation())
 99 | 
100 |         self.layers = nn.Sequential(*layers)
101 | 
102 |         if weight_init_name is not None:
103 |             self.do_weight_init(weight_init_name)
104 | 
105 |     def do_weight_init(self, weight_init_name):
106 |         func = WEIGHT_INIT_DICT[weight_init_name]
107 |         for (_, param) in self.named_parameters():
108 |             if param.dim() > 1:  # skips batchnorm/layernorm
109 |                 func(param)
110 | 
111 |     def forward(self, x):
112 |         output = self.layers(x)
113 |         return output
114 | 
115 | 
116 | def get_clones(module, N):
117 |     return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
118 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import hydra
  5 | from dotenv import load_dotenv
  6 | from omegaconf import DictConfig, OmegaConf
  7 | from pytorch_lightning import Trainer, seed_everything
  8 | 
  9 | from trainer.trainer import InstanceSegmentation, RegularCheckpointing
 10 | from utils.utils import (
 11 |     flatten_dict,
 12 |     load_backbone_checkpoint_with_missing_or_exsessive_keys,
 13 |     load_checkpoint_with_missing_or_exsessive_keys,
 14 | )
 15 | 
 16 | 
 17 | def get_parameters(cfg: DictConfig):
 18 |     logger = logging.getLogger(__name__)
 19 |     load_dotenv(".env")
 20 | 
 21 |     # parsing input parameters
 22 |     seed_everything(cfg.general.seed)
 23 | 
 24 |     # getting basic configuration
 25 |     if cfg.general.get("gpus", None) is None:
 26 |         cfg.general.gpus = os.environ.get("CUDA_VISIBLE_DEVICES", None)
 27 |     loggers = []
 28 | 
 29 |     # cfg.general.experiment_id = "0" # str(Repo("./").commit())[:8]
 30 |     # params = flatten_dict(OmegaConf.to_container(cfg, resolve=True))
 31 | 
 32 |     # create unique id for experiments that are run locally
 33 |     # unique_id = "_" + str(uuid4())[:4]
 34 |     # cfg.general.version = md5(str(params).encode("utf-8")).hexdigest()[:8] + unique_id
 35 | 
 36 |     if not os.path.exists(cfg.general.save_dir):
 37 |         os.makedirs(cfg.general.save_dir)
 38 |     else:
 39 |         print("EXPERIMENT ALREADY EXIST")
 40 |         cfg["trainer"][
 41 |             "resume_from_checkpoint"
 42 |         ] = f"{cfg.general.save_dir}/last-epoch.ckpt"
 43 | 
 44 |     for log in cfg.logging:
 45 |         print(log)
 46 |         loggers.append(hydra.utils.instantiate(log))
 47 |         loggers[-1].log_hyperparams(
 48 |             flatten_dict(OmegaConf.to_container(cfg, resolve=True))
 49 |         )
 50 | 
 51 |     model = InstanceSegmentation(cfg)
 52 |     if cfg.general.backbone_checkpoint is not None:
 53 |         cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys(
 54 |             cfg, model
 55 |         )
 56 |     if cfg.general.checkpoint is not None:
 57 |         cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model)
 58 | 
 59 |     logger.info(flatten_dict(OmegaConf.to_container(cfg, resolve=True)))
 60 |     return cfg, model, loggers
 61 | 
 62 | 
 63 | @hydra.main(
 64 |     config_path="conf", config_name="config_base_instance_segmentation.yaml"
 65 | )
 66 | def train(cfg: DictConfig):
 67 |     os.chdir(hydra.utils.get_original_cwd())
 68 |     cfg, model, loggers = get_parameters(cfg)
 69 |     callbacks = []
 70 |     for cb in cfg.callbacks:
 71 |         callbacks.append(hydra.utils.instantiate(cb))
 72 | 
 73 |     callbacks.append(RegularCheckpointing())
 74 | 
 75 |     runner = Trainer(
 76 |         logger=loggers,
 77 |         gpus=cfg.general.gpus,
 78 |         callbacks=callbacks,
 79 |         weights_save_path=str(cfg.general.save_dir),
 80 |         **cfg.trainer,
 81 |     )
 82 |     runner.fit(model)
 83 | 
 84 | 
 85 | @hydra.main(
 86 |     config_path="conf", config_name="config_base_instance_segmentation.yaml"
 87 | )
 88 | def test(cfg: DictConfig):
 89 |     # because hydra wants to change dir for some reason
 90 |     os.chdir(hydra.utils.get_original_cwd())
 91 |     cfg, model, loggers = get_parameters(cfg)
 92 |     runner = Trainer(
 93 |         gpus=cfg.general.gpus,
 94 |         logger=loggers,
 95 |         weights_save_path=str(cfg.general.save_dir),
 96 |         **cfg.trainer,
 97 |     )
 98 |     runner.test(model)
 99 | 
100 | 
101 | @hydra.main(
102 |     config_path="conf", config_name="config_base_instance_segmentation.yaml"
103 | )
104 | def main(cfg: DictConfig):
105 |     if cfg["general"]["train_mode"]:
106 |         train(cfg)
107 |     else:
108 |         test(cfg)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | #include "interpolate.h"
  4 | #include "utils.h"
  5 | 
  6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
  7 |                              const float *known, float *dist2, int *idx);
  8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
  9 |                                       const float *points, const int *idx,
 10 |                                       const float *weight, float *out);
 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
 12 |                                            const float *grad_out,
 13 |                                            const int *idx, const float *weight,
 14 |                                            float *grad_points);
 15 | 
 16 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
 17 |   CHECK_CONTIGUOUS(unknowns);
 18 |   CHECK_CONTIGUOUS(knows);
 19 |   CHECK_IS_FLOAT(unknowns);
 20 |   CHECK_IS_FLOAT(knows);
 21 | 
 22 |   if (unknowns.is_cuda()) {
 23 |     CHECK_CUDA(knows);
 24 |   }
 25 | 
 26 |   at::Tensor idx =
 27 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 28 |                    at::device(unknowns.device()).dtype(at::ScalarType::Int));
 29 |   at::Tensor dist2 =
 30 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 31 |                    at::device(unknowns.device()).dtype(at::ScalarType::Float));
 32 | 
 33 |   if (unknowns.is_cuda()) {
 34 |     three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
 35 |                             unknowns.data<float>(), knows.data<float>(),
 36 |                             dist2.data<float>(), idx.data<int>());
 37 |   } else {
 38 |     AT_ASSERT(false, "CPU not supported");
 39 |   }
 40 | 
 41 |   return {dist2, idx};
 42 | }
 43 | 
 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 45 |                              at::Tensor weight) {
 46 |   CHECK_CONTIGUOUS(points);
 47 |   CHECK_CONTIGUOUS(idx);
 48 |   CHECK_CONTIGUOUS(weight);
 49 |   CHECK_IS_FLOAT(points);
 50 |   CHECK_IS_INT(idx);
 51 |   CHECK_IS_FLOAT(weight);
 52 | 
 53 |   if (points.is_cuda()) {
 54 |     CHECK_CUDA(idx);
 55 |     CHECK_CUDA(weight);
 56 |   }
 57 | 
 58 |   at::Tensor output =
 59 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
 60 |                    at::device(points.device()).dtype(at::ScalarType::Float));
 61 | 
 62 |   if (points.is_cuda()) {
 63 |     three_interpolate_kernel_wrapper(
 64 |         points.size(0), points.size(1), points.size(2), idx.size(1),
 65 |         points.data<float>(), idx.data<int>(), weight.data<float>(),
 66 |         output.data<float>());
 67 |   } else {
 68 |     AT_ASSERT(false, "CPU not supported");
 69 |   }
 70 | 
 71 |   return output;
 72 | }
 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
 74 |                                   at::Tensor weight, const int m) {
 75 |   CHECK_CONTIGUOUS(grad_out);
 76 |   CHECK_CONTIGUOUS(idx);
 77 |   CHECK_CONTIGUOUS(weight);
 78 |   CHECK_IS_FLOAT(grad_out);
 79 |   CHECK_IS_INT(idx);
 80 |   CHECK_IS_FLOAT(weight);
 81 | 
 82 |   if (grad_out.is_cuda()) {
 83 |     CHECK_CUDA(idx);
 84 |     CHECK_CUDA(weight);
 85 |   }
 86 | 
 87 |   at::Tensor output =
 88 |       torch::zeros({grad_out.size(0), grad_out.size(1), m},
 89 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
 90 | 
 91 |   if (grad_out.is_cuda()) {
 92 |     three_interpolate_grad_kernel_wrapper(
 93 |         grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
 94 |         grad_out.data<float>(), idx.data<int>(), weight.data<float>(),
 95 |         output.data<float>());
 96 |   } else {
 97 |     AT_ASSERT(false, "CPU not supported");
 98 |   }
 99 | 
100 |   return output;
101 | }
102 | 


--------------------------------------------------------------------------------
/utils/pointops2/functions/test_attention_op_step1_v2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import pointops
  3 | 
  4 | torch.manual_seed(1)
  5 | 
  6 | M = 800000
  7 | N = 35000
  8 | C = 96
  9 | h = 6
 10 | query = torch.rand(N, h, C // h).cuda()
 11 | key = torch.rand(N, h, C // h).cuda()
 12 | 
 13 | index_0 = torch.rand(M)
 14 | index_0[index_0 < 0] = 0
 15 | index_0 = (index_0 * N).long().cuda()
 16 | 
 17 | index_1 = torch.rand(M)
 18 | index_1[index_1 < 0] = 0
 19 | index_1 = (index_1 * N).long().cuda()
 20 | 
 21 | query.requires_grad = True
 22 | key.requires_grad = True
 23 | 
 24 | 
 25 | attn_flat = pointops.attention_step1(
 26 |     query.float(), key.float(), index_0.int(), index_1.int()
 27 | )
 28 | loss = attn_flat.sum()
 29 | loss.backward()
 30 | print(
 31 |     "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(
 32 |         attn_flat.shape, attn_flat[:20, :10]
 33 |     )
 34 | )
 35 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 36 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 37 | input()
 38 | 
 39 | 
 40 | # rearrange index for acceleration
 41 | index_0, indices = torch.sort(index_0)  # [M,]
 42 | index_1 = index_1[indices]  # [M,]
 43 | index_0_counts = index_0.bincount()
 44 | 
 45 | print("index_0_counts.shape: ", index_0_counts.shape)
 46 | 
 47 | n_max = index_0_counts.max()
 48 | index_0_offsets = index_0_counts.cumsum(dim=-1)  # [N]
 49 | 
 50 | print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
 51 | 
 52 | index_0_offsets = torch.cat(
 53 |     [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
 54 | )  # [N+1]
 55 | 
 56 | # print("index_0[:100]: ", index_0[:100])
 57 | print("n_max: ", n_max)
 58 | print("index_0_offsets.shape: ", index_0_offsets.shape)
 59 | # input()
 60 | 
 61 | print("index_0_offsets[:100]: ", index_0_offsets[:100])
 62 | print("index_1[:20]: ", index_1[:20])
 63 | 
 64 | 
 65 | attn_flat = pointops.attention_step1(
 66 |     query.float(), key.float(), index_0.int(), index_1.int()
 67 | )
 68 | # loss = attn_flat.sum()
 69 | # loss.backward()
 70 | # # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
 71 | # # loss = attn_flat.sum()
 72 | # # loss.backward()
 73 | # print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10]))
 74 | # print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 75 | # print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
 76 | # input()
 77 | 
 78 | print("query.is_contiguous(): ", query.is_contiguous())
 79 | print("key.is_contiguous(): ", key.is_contiguous())
 80 | print("index_0.is_contiguous(): ", index_0.is_contiguous())
 81 | print("index_1.is_contiguous(): ", index_1.is_contiguous())
 82 | 
 83 | attn_flat_v2 = pointops.attention_step1_v2(
 84 |     query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
 85 | )
 86 | loss = attn_flat_v2.sum()
 87 | loss.backward()
 88 | 
 89 | # attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
 90 | # loss = attn_flat_v2.sum()
 91 | # loss.backward()
 92 | 
 93 | print(
 94 |     "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format(
 95 |         attn_flat_v2.shape, attn_flat_v2[:20, :10]
 96 |     )
 97 | )
 98 | print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
 99 | print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
100 | # input()
101 | 
102 | # mask = attn_flat_v2.sum(-1) != 0
103 | # print("mask.sum(): ", mask.sum())
104 | # print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max())
105 | 
106 | 
107 | print(
108 |     "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
109 |     ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
110 | )
111 | 
112 | selected = 10000
113 | print(
114 |     "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
115 |     torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
116 | )
117 | 


--------------------------------------------------------------------------------
/conf/data/datasets/egobody.yaml:
--------------------------------------------------------------------------------
  1 | # @package data
  2 | train_dataset:
  3 |   _target_: datasets.semseg.SemanticSegmentationDataset
  4 |   dataset_name: "human_segmentation"
  5 |   data_dir: data/processed/egobody
  6 |   image_augmentations_path: conf/augmentation/albumentations_aug.yaml
  7 |   volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
  8 |   label_db_filepath: data/processed/egobody/part_database.yaml
  9 |   color_mean_std: data/processed/egobody/color_mean_std.yaml
 10 |   data_percent: 1.0
 11 |   mode: ${data.train_mode}
 12 |   ignore_label: ${data.ignore_label}
 13 |   num_labels: ${data.num_labels}
 14 |   add_raw_coordinates: ${data.add_raw_coordinates}
 15 |   add_colors: ${data.add_colors}
 16 |   add_normals: ${data.add_normals}
 17 |   add_instance: ${data.add_instance}
 18 |   cache_data: ${data.cache_data}
 19 |   # different augs experiments
 20 |   instance_oversampling: 0.0
 21 |   place_around_existing: False
 22 |   point_per_cut: 0
 23 |   max_cut_region: 0
 24 |   flip_in_center: false
 25 |   noise_rate: 0
 26 |   resample_points: 0
 27 |   cropping: ${data.cropping}
 28 |   cropping_args: ${data.cropping_args}
 29 |   is_tta: false
 30 |   crop_min_size: ${data.crop_min_size}
 31 |   crop_length: ${data.crop_length}
 32 |   cropping_v1: ${data.cropping_v1}
 33 |   area: ${general.area}
 34 |   reps_per_epoch: ${general.reps_per_epoch}
 35 |   eval_inner_core: ${general.eval_inner_core}
 36 |   filter_out_classes: [0]
 37 |   label_offset: 1
 38 |   is_elastic_distortion: true
 39 |   color_drop: 0.0
 40 |   is_mirroring: ${data.is_mirroring}
 41 |   part2human: ${data.part2human}
 42 |   broken_mirror_version: ${data.broken_mirror_version}
 43 | 
 44 | validation_dataset:
 45 |   _target_: datasets.semseg.SemanticSegmentationDataset
 46 |   dataset_name: "human_segmentation"
 47 |   data_dir:
 48 |   - data/processed/egobody
 49 |   image_augmentations_path: null
 50 |   volume_augmentations_path: null
 51 |   label_db_filepath: data/processed/egobody/part_database.yaml
 52 |   color_mean_std: data/processed/egobody/color_mean_std.yaml
 53 |   data_percent: 1.0
 54 |   mode: ${data.validation_mode}
 55 |   ignore_label: ${data.ignore_label}
 56 |   num_labels: ${data.num_labels}
 57 |   add_raw_coordinates: ${data.add_raw_coordinates}
 58 |   add_colors: ${data.add_colors}
 59 |   add_normals: ${data.add_normals}
 60 |   add_instance: ${data.add_instance}
 61 |   cache_data: ${data.cache_data}
 62 |   cropping: false
 63 |   is_tta: false
 64 |   crop_min_size: ${data.crop_min_size}
 65 |   crop_length: ${data.crop_length}
 66 |   cropping_v1: ${data.cropping_v1}
 67 |   area: ${general.area}
 68 |   on_crops: ${general.on_crops}
 69 |   eval_inner_core: ${general.eval_inner_core}
 70 |   filter_out_classes: [0]
 71 |   label_offset: 1
 72 |   part2human: ${data.part2human}
 73 | 
 74 | test_dataset:
 75 |   _target_: datasets.semseg.SemanticSegmentationDataset
 76 |   dataset_name: "human_segmentation"
 77 |   data_dir: data/processed/egobody
 78 |   image_augmentations_path: null
 79 |   volume_augmentations_path: null
 80 |   label_db_filepath: data/processed/egobody/part_database.yaml
 81 |   color_mean_std: data/processed/egobody/color_mean_std.yaml
 82 |   data_percent: 1.0
 83 |   mode: ${data.test_mode}
 84 |   ignore_label: ${data.ignore_label}
 85 |   num_labels: ${data.num_labels}
 86 |   add_raw_coordinates: ${data.add_raw_coordinates}
 87 |   add_colors: ${data.add_colors}
 88 |   add_normals: ${data.add_normals}
 89 |   add_instance: ${data.add_instance}
 90 |   cache_data: ${data.cache_data}
 91 |   cropping: false
 92 |   is_tta: false
 93 |   crop_min_size: ${data.crop_min_size}
 94 |   crop_length: ${data.crop_length}
 95 |   cropping_v1: ${data.cropping_v1}
 96 |   area: ${general.area}
 97 |   on_crops: ${general.on_crops}
 98 |   eval_inner_core: ${general.eval_inner_core}
 99 |   filter_out_classes: [0]
100 |   label_offset: 1
101 |   part2human: ${data.part2human}
102 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/pointops_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "knnquery/knnquery_cuda_kernel.h"
 5 | #include "sampling/sampling_cuda_kernel.h"
 6 | #include "grouping/grouping_cuda_kernel.h"
 7 | #include "interpolation/interpolation_cuda_kernel.h"
 8 | #include "aggregation/aggregation_cuda_kernel.h"
 9 | #include "subtraction/subtraction_cuda_kernel.h"
10 | #include "attention/attention_cuda_kernel.h"
11 | #include "rpe/relative_pos_encoding_cuda_kernel.h"
12 | #include "attention_v2/attention_cuda_kernel_v2.h"
13 | #include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h"
14 | 
15 | 
16 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
17 |     m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda");
18 |     m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda");
19 |     m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
20 |     m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
21 |     m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
22 |     m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
23 |     m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
24 |     m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
25 |     m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
26 |     m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
27 |     m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda");
28 |     m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda");
29 |     m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda");
30 |     m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda");
31 |     m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda");
32 |     m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda");
33 |     m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda");
34 |     m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda");
35 |     m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2");
36 |     m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2");
37 |     m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2");
38 |     m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2");
39 |     m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2");
40 |     m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2");
41 |     m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2");
42 |     m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2");
43 |     m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3");
44 |     m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3");
45 |     }
46 | 


--------------------------------------------------------------------------------
/conf/data/datasets/synthetic_humans.yaml:
--------------------------------------------------------------------------------
  1 | # @package data
  2 | train_dataset:
  3 |   _target_: datasets.semseg.SemanticSegmentationDataset
  4 |   dataset_name: "human_segmentation"
  5 |   data_dir:
  6 |   - data/processed/synthetic_humans
  7 |   image_augmentations_path: conf/augmentation/albumentations_aug.yaml
  8 |   volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
  9 |   label_db_filepath: data/processed/synthetic_humans/part_database.yaml
 10 |   color_mean_std: data/processed/synthetic_humans/color_mean_std.yaml
 11 |   data_percent: 1.0
 12 |   mode: ${data.train_mode}
 13 |   ignore_label: ${data.ignore_label}
 14 |   num_labels: ${data.num_labels}
 15 |   add_raw_coordinates: ${data.add_raw_coordinates}
 16 |   add_colors: ${data.add_colors}
 17 |   add_normals: ${data.add_normals}
 18 |   add_instance: ${data.add_instance}
 19 |   cache_data: ${data.cache_data}
 20 |   # different augs experiments
 21 |   instance_oversampling: 0.0
 22 |   place_around_existing: False
 23 |   point_per_cut: 0
 24 |   max_cut_region: 0
 25 |   flip_in_center: false
 26 |   noise_rate: 0
 27 |   resample_points: 0
 28 |   cropping: ${data.cropping}
 29 |   cropping_args: ${data.cropping_args}
 30 |   is_tta: false
 31 |   crop_min_size: ${data.crop_min_size}
 32 |   crop_length: ${data.crop_length}
 33 |   cropping_v1: ${data.cropping_v1}
 34 |   area: ${general.area}
 35 |   reps_per_epoch: ${general.reps_per_epoch}
 36 |   eval_inner_core: ${general.eval_inner_core}
 37 |   filter_out_classes: [0]
 38 |   label_offset: 1
 39 |   is_elastic_distortion: true
 40 |   color_drop: 0.0
 41 |   part2human: ${data.part2human}
 42 |   is_mirroring: ${data.is_mirroring}
 43 |   broken_mirror_version: ${data.broken_mirror_version}
 44 | 
 45 | validation_dataset:
 46 |   _target_: datasets.semseg.SemanticSegmentationDataset
 47 |   dataset_name: "human_segmentation"
 48 |   data_dir:
 49 |   - data/processed/egobody
 50 |   image_augmentations_path: null
 51 |   volume_augmentations_path: null
 52 |   label_db_filepath: data/processed/egobody/part_database.yaml
 53 |   color_mean_std: data/processed/egobody/color_mean_std.yaml
 54 |   data_percent: 1.0
 55 |   mode: ${data.validation_mode}
 56 |   ignore_label: ${data.ignore_label}
 57 |   num_labels: ${data.num_labels}
 58 |   add_raw_coordinates: ${data.add_raw_coordinates}
 59 |   add_colors: ${data.add_colors}
 60 |   add_normals: ${data.add_normals}
 61 |   add_instance: ${data.add_instance}
 62 |   cache_data: ${data.cache_data}
 63 |   cropping: false
 64 |   is_tta: false
 65 |   crop_min_size: ${data.crop_min_size}
 66 |   crop_length: ${data.crop_length}
 67 |   cropping_v1: ${data.cropping_v1}
 68 |   area: ${general.area}
 69 |   on_crops: ${general.on_crops}
 70 |   eval_inner_core: ${general.eval_inner_core}
 71 |   filter_out_classes: [0]
 72 |   label_offset: 1
 73 |   part2human: ${data.part2human}
 74 | 
 75 | test_dataset:
 76 |   _target_: datasets.semseg.SemanticSegmentationDataset
 77 |   dataset_name: "human_segmentation"
 78 |   data_dir: data/processed/egobody
 79 |   image_augmentations_path: null
 80 |   volume_augmentations_path: null
 81 |   label_db_filepath: data/processed/egobody/part_database.yaml
 82 |   color_mean_std: data/processed/egobody/color_mean_std.yaml
 83 |   data_percent: 1.0
 84 |   mode: ${data.test_mode}
 85 |   ignore_label: ${data.ignore_label}
 86 |   num_labels: ${data.num_labels}
 87 |   add_raw_coordinates: ${data.add_raw_coordinates}
 88 |   add_colors: ${data.add_colors}
 89 |   add_normals: ${data.add_normals}
 90 |   add_instance: ${data.add_instance}
 91 |   cache_data: ${data.cache_data}
 92 |   cropping: false
 93 |   is_tta: false
 94 |   crop_min_size: ${data.crop_min_size}
 95 |   crop_length: ${data.crop_length}
 96 |   cropping_v1: ${data.cropping_v1}
 97 |   area: ${general.area}
 98 |   on_crops: ${general.on_crops}
 99 |   eval_inner_core: ${general.eval_inner_core}
100 |   filter_out_classes: [0]
101 |   label_offset: 1
102 |   part2human: ${data.part2human}
103 | 


--------------------------------------------------------------------------------
/models/modules/senet_block.py:
--------------------------------------------------------------------------------
  1 | import MinkowskiEngine as ME
  2 | import torch.nn as nn
  3 | from mix3d.models.modules.common import ConvType, NormType
  4 | from mix3d.models.modules.resnet_block import BasicBlock, Bottleneck
  5 | 
  6 | 
  7 | class SELayer(nn.Module):
  8 |     def __init__(self, channel, reduction=16, D=-1):
  9 |         # Global coords does not require coords_key
 10 |         super().__init__()
 11 |         self.fc = nn.Sequential(
 12 |             ME.MinkowskiLinear(channel, channel // reduction),
 13 |             ME.MinkowskiReLU(inplace=True),
 14 |             ME.MinkowskiLinear(channel // reduction, channel),
 15 |             ME.MinkowskiSigmoid(),
 16 |         )
 17 |         self.pooling = ME.MinkowskiGlobalPooling(dimension=D)
 18 |         self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D)
 19 | 
 20 |     def forward(self, x):
 21 |         y = self.pooling(x)
 22 |         y = self.fc(y)
 23 |         return self.broadcast_mul(x, y)
 24 | 
 25 | 
 26 | class SEBasicBlock(BasicBlock):
 27 |     def __init__(
 28 |         self,
 29 |         inplanes,
 30 |         planes,
 31 |         stride=1,
 32 |         dilation=1,
 33 |         downsample=None,
 34 |         conv_type=ConvType.HYPERCUBE,
 35 |         reduction=16,
 36 |         D=-1,
 37 |     ):
 38 |         super().__init__(
 39 |             inplanes,
 40 |             planes,
 41 |             stride=stride,
 42 |             dilation=dilation,
 43 |             downsample=downsample,
 44 |             conv_type=conv_type,
 45 |             D=D,
 46 |         )
 47 |         self.se = SELayer(planes, reduction=reduction, D=D)
 48 | 
 49 |     def forward(self, x):
 50 |         residual = x
 51 | 
 52 |         out = self.conv1(x)
 53 |         out = self.norm1(out)
 54 |         out = self.relu(out)
 55 | 
 56 |         out = self.conv2(out)
 57 |         out = self.norm2(out)
 58 |         out = self.se(out)
 59 | 
 60 |         if self.downsample is not None:
 61 |             residual = self.downsample(x)
 62 | 
 63 |         out += residual
 64 |         out = self.relu(out)
 65 | 
 66 |         return out
 67 | 
 68 | 
 69 | class SEBasicBlockSN(SEBasicBlock):
 70 |     NORM_TYPE = NormType.SPARSE_SWITCH_NORM
 71 | 
 72 | 
 73 | class SEBasicBlockIN(SEBasicBlock):
 74 |     NORM_TYPE = NormType.SPARSE_INSTANCE_NORM
 75 | 
 76 | 
 77 | class SEBasicBlockLN(SEBasicBlock):
 78 |     NORM_TYPE = NormType.SPARSE_LAYER_NORM
 79 | 
 80 | 
 81 | class SEBottleneck(Bottleneck):
 82 |     def __init__(
 83 |         self,
 84 |         inplanes,
 85 |         planes,
 86 |         stride=1,
 87 |         dilation=1,
 88 |         downsample=None,
 89 |         conv_type=ConvType.HYPERCUBE,
 90 |         D=3,
 91 |         reduction=16,
 92 |     ):
 93 |         super().__init__(
 94 |             inplanes,
 95 |             planes,
 96 |             stride=stride,
 97 |             dilation=dilation,
 98 |             downsample=downsample,
 99 |             conv_type=conv_type,
100 |             D=D,
101 |         )
102 |         self.se = SELayer(planes * self.expansion, reduction=reduction, D=D)
103 | 
104 |     def forward(self, x):
105 |         residual = x
106 | 
107 |         out = self.conv1(x)
108 |         out = self.norm1(out)
109 |         out = self.relu(out)
110 | 
111 |         out = self.conv2(out)
112 |         out = self.norm2(out)
113 |         out = self.relu(out)
114 | 
115 |         out = self.conv3(out)
116 |         out = self.norm3(out)
117 |         out = self.se(out)
118 | 
119 |         if self.downsample is not None:
120 |             residual = self.downsample(x)
121 | 
122 |         out += residual
123 |         out = self.relu(out)
124 | 
125 |         return out
126 | 
127 | 
128 | class SEBottleneckSN(SEBottleneck):
129 |     NORM_TYPE = NormType.SPARSE_SWITCH_NORM
130 | 
131 | 
132 | class SEBottleneckIN(SEBottleneck):
133 |     NORM_TYPE = NormType.SPARSE_INSTANCE_NORM
134 | 
135 | 
136 | class SEBottleneckLN(SEBottleneck):
137 |     NORM_TYPE = NormType.SPARSE_LAYER_NORM
138 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /saved
  2 | /logs
  3 | /data
  4 | third_party
  5 | *.out
  6 | checkpoints/
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # poetry
105 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
107 | #   commonly ignored for libraries.
108 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #pdm.lock
114 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115 | #   in version control.
116 | #   https://pdm.fming.dev/#use-with-ide
117 | .pdm.toml
118 | 
119 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120 | __pypackages__/
121 | 
122 | # Celery stuff
123 | celerybeat-schedule
124 | celerybeat.pid
125 | 
126 | # SageMath parsed files
127 | *.sage.py
128 | 
129 | # Environments
130 | .env
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 | 
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 | 
142 | # Rope project settings
143 | .ropeproject
144 | 
145 | # mkdocs documentation
146 | /site
147 | 
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 | 
153 | # Pyre type checker
154 | .pyre/
155 | 
156 | # pytype static type analyzer
157 | .pytype/
158 | 
159 | # Cython debug symbols
160 | cython_debug/
161 | 
162 | # PyCharm
163 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
166 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
167 | .idea/
168 | 


--------------------------------------------------------------------------------
/benchmark/util.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | import sys
  4 | 
  5 | try:
  6 |     import numpy as np
  7 | except:
  8 |     print("Failed to import numpy package.")
  9 |     sys.exit(-1)
 10 | try:
 11 |     import imageio
 12 | except:
 13 |     print("Please install the module 'imageio' for image processing, e.g.")
 14 |     print("pip install imageio")
 15 |     sys.exit(-1)
 16 | 
 17 | # print an error message and quit
 18 | def print_error(message, user_fault=False):
 19 |     sys.stderr.write("ERROR: " + str(message) + "\n")
 20 |     if user_fault:
 21 |         sys.exit(2)
 22 |     sys.exit(-1)
 23 | 
 24 | 
 25 | # if string s represents an int
 26 | def represents_int(s):
 27 |     try:
 28 |         int(s)
 29 |         return True
 30 |     except ValueError:
 31 |         return False
 32 | 
 33 | 
 34 | def read_label_mapping(
 35 |     filename, label_from="raw_category", label_to="nyu40id"
 36 | ):
 37 |     assert os.path.isfile(filename)
 38 |     mapping = dict()
 39 |     with open(filename) as csvfile:
 40 |         reader = csv.DictReader(csvfile, delimiter="\t")
 41 |         for row in reader:
 42 |             mapping[row[label_from]] = int(row[label_to])
 43 |     # if ints convert
 44 |     if represents_int(list(mapping.keys())[0]):
 45 |         mapping = {int(k): v for k, v in mapping.items()}
 46 |     return mapping
 47 | 
 48 | 
 49 | # input: scene_types.txt or scene_types_all.txt
 50 | def read_scene_types_mapping(filename, remove_spaces=True):
 51 |     assert os.path.isfile(filename)
 52 |     mapping = dict()
 53 |     lines = open(filename).read().splitlines()
 54 |     lines = [line.split("\t") for line in lines]
 55 |     if remove_spaces:
 56 |         mapping = {x[1].strip(): int(x[0]) for x in lines}
 57 |     else:
 58 |         mapping = {x[1]: int(x[0]) for x in lines}
 59 |     return mapping
 60 | 
 61 | 
 62 | # color by label
 63 | def visualize_label_image(filename, image):
 64 |     height = image.shape[0]
 65 |     width = image.shape[1]
 66 |     vis_image = np.zeros([height, width, 3], dtype=np.uint8)
 67 |     color_palette = create_color_palette()
 68 |     for idx, color in enumerate(color_palette):
 69 |         vis_image[image == idx] = color
 70 |     imageio.imwrite(filename, vis_image)
 71 | 
 72 | 
 73 | # color by different instances (mod length of color palette)
 74 | def visualize_instance_image(filename, image):
 75 |     height = image.shape[0]
 76 |     width = image.shape[1]
 77 |     vis_image = np.zeros([height, width, 3], dtype=np.uint8)
 78 |     color_palette = create_color_palette()
 79 |     instances = np.unique(image)
 80 |     for idx, inst in enumerate(instances):
 81 |         vis_image[image == inst] = color_palette[inst % len(color_palette)]
 82 |     imageio.imwrite(filename, vis_image)
 83 | 
 84 | 
 85 | # color palette for nyu40 labels
 86 | def create_color_palette():
 87 |     return [
 88 |         (0, 0, 0),
 89 |         (174, 199, 232),  # wall
 90 |         (152, 223, 138),  # floor
 91 |         (31, 119, 180),  # cabinet
 92 |         (255, 187, 120),  # bed
 93 |         (188, 189, 34),  # chair
 94 |         (140, 86, 75),  # sofa
 95 |         (255, 152, 150),  # table
 96 |         (214, 39, 40),  # door
 97 |         (197, 176, 213),  # window
 98 |         (148, 103, 189),  # bookshelf
 99 |         (196, 156, 148),  # picture
100 |         (23, 190, 207),  # counter
101 |         (178, 76, 76),
102 |         (247, 182, 210),  # desk
103 |         (66, 188, 102),
104 |         (219, 219, 141),  # curtain
105 |         (140, 57, 197),
106 |         (202, 185, 52),
107 |         (51, 176, 203),
108 |         (200, 54, 131),
109 |         (92, 193, 61),
110 |         (78, 71, 183),
111 |         (172, 114, 82),
112 |         (255, 127, 14),  # refrigerator
113 |         (91, 163, 138),
114 |         (153, 98, 156),
115 |         (140, 153, 101),
116 |         (158, 218, 229),  # shower curtain
117 |         (100, 125, 154),
118 |         (178, 127, 135),
119 |         (120, 185, 128),
120 |         (146, 111, 194),
121 |         (44, 160, 44),  # toilet
122 |         (112, 128, 144),  # sink
123 |         (96, 207, 209),
124 |         (227, 119, 194),  # bathtub
125 |         (213, 92, 176),
126 |         (94, 106, 211),
127 |         (82, 84, 163),  # otherfurn
128 |         (100, 85, 144),
129 |     ]
130 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RPE_V2_CUDA_KERNEL
 2 | #define _RPE_V2_CUDA_KERNEL
 3 | #include <vector>
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | 
 7 | void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor);
 8 | void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor);
 9 | 
10 | void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
11 | void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor);
12 | 
13 | void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor);
14 | void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor);
15 | 
16 | #ifdef __cplusplus
17 | extern "C" {
18 | #endif
19 | 
20 | void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output);
21 | void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k);
22 | 
23 | void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output);
24 | void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k);
25 | 
26 | void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output);
27 | void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | #endif
33 | 


--------------------------------------------------------------------------------
/occlusion_subsets/split_test_occlusion_high.txt:
--------------------------------------------------------------------------------
 1 | recording_20210910_S06_S05_02_scene_main_02741.ply
 2 | recording_20210911_S07_S06_02_scene_sub_2_01231.ply
 3 | recording_20210918_S05_S06_01_scene_sub_2_02241.ply
 4 | recording_20210918_S05_S06_02_scene_sub_2_00751.ply
 5 | recording_20210918_S05_S06_02_scene_sub_2_00961.ply
 6 | recording_20210918_S05_S06_02_scene_sub_2_03031.ply
 7 | recording_20210918_S05_S06_02_scene_sub_2_03511.ply
 8 | recording_20210918_S05_S06_02_scene_sub_2_03901.ply
 9 | recording_20210918_S05_S06_03_scene_sub_2_02321.ply
10 | recording_20210918_S05_S06_03_scene_sub_2_02531.ply
11 | recording_20210918_S05_S06_04_scene_sub_2_03361.ply
12 | recording_20210918_S05_S06_04_scene_sub_2_04021.ply
13 | recording_20210918_S05_S06_05_scene_main_01021.ply
14 | recording_20210918_S05_S06_05_scene_sub_2_01051.ply
15 | recording_20210918_S05_S06_05_scene_sub_2_01231.ply
16 | recording_20210918_S05_S06_05_scene_sub_2_01591.ply
17 | recording_20210918_S05_S06_05_scene_sub_2_03451.ply
18 | recording_20210918_S05_S09_01_scene_sub_2_01541.ply
19 | recording_20210918_S05_S09_01_scene_sub_2_01601.ply
20 | recording_20210918_S05_S09_01_scene_sub_2_02591.ply
21 | recording_20210918_S06_S05_01_scene_sub_2_02831.ply
22 | recording_20210918_S06_S05_01_scene_sub_2_02891.ply
23 | recording_20210918_S06_S05_01_scene_sub_2_02981.ply
24 | recording_20210918_S06_S05_01_scene_sub_2_03071.ply
25 | recording_20210918_S06_S05_02_scene_sub_2_03721.ply
26 | recording_20210918_S06_S05_02_scene_sub_2_03781.ply
27 | recording_20210918_S06_S05_02_scene_sub_2_03841.ply
28 | recording_20210918_S06_S05_03_scene_main_01301.ply
29 | recording_20210918_S06_S05_03_scene_sub_2_00971.ply
30 | recording_20210918_S06_S05_03_scene_sub_2_01271.ply
31 | recording_20210918_S06_S05_03_scene_sub_2_01331.ply
32 | recording_20210918_S06_S05_03_scene_sub_2_01451.ply
33 | recording_20210918_S09_S05_01_scene_sub_2_01881.ply
34 | recording_20210918_S09_S05_01_scene_sub_2_02001.ply
35 | recording_20210918_S09_S05_01_scene_sub_2_02421.ply
36 | recording_20210918_S09_S05_01_scene_sub_2_02481.ply
37 | recording_20210918_S09_S05_02_scene_sub_2_03091.ply
38 | recording_20210918_S09_S05_02_scene_sub_2_03181.ply
39 | recording_20210918_S09_S05_02_scene_sub_2_03781.ply
40 | recording_20210918_S09_S05_02_scene_sub_2_03931.ply
41 | recording_20210918_S09_S05_02_scene_sub_2_04231.ply
42 | recording_20210918_S09_S05_03_scene_sub_2_01751.ply
43 | recording_20210918_S09_S05_03_scene_sub_2_02291.ply
44 | recording_20210918_S09_S05_03_scene_sub_2_02441.ply
45 | recording_20210918_S09_S05_03_scene_sub_2_02501.ply
46 | recording_20210923_S05_S13_01_scene_sub_1_03521.ply
47 | recording_20210923_S05_S13_01_scene_sub_1_03551.ply
48 | recording_20210923_S13_S05_01_scene_sub_1_02811.ply
49 | recording_20210923_S13_S05_01_scene_sub_1_03471.ply
50 | recording_20210923_S13_S05_01_scene_sub_1_03501.ply
51 | recording_20210929_S05_S16_02_scene_sub_1_02831.ply
52 | recording_20210929_S05_S16_02_scene_sub_1_03041.ply
53 | recording_20210929_S05_S16_04_scene_sub_1_02771.ply
54 | recording_20210929_S16_S05_01_scene_sub_1_03301.ply
55 | recording_20211004_S19_S06_01_scene_sub_1_03161.ply
56 | recording_20211004_S19_S06_01_scene_sub_1_03671.ply
57 | recording_20211004_S19_S06_01_scene_sub_1_03821.ply
58 | recording_20211004_S19_S06_01_scene_sub_1_04661.ply
59 | recording_20211004_S19_S06_02_scene_sub_1_03001.ply
60 | recording_20211004_S19_S06_02_scene_sub_1_03421.ply
61 | recording_20211004_S19_S06_03_scene_main_03781.ply
62 | recording_20211004_S19_S06_03_scene_sub_1_04171.ply
63 | recording_20211004_S19_S06_04_scene_sub_1_02181.ply
64 | recording_20211004_S19_S06_04_scene_sub_1_02211.ply
65 | recording_20211004_S19_S06_04_scene_sub_1_02931.ply
66 | recording_20220415_S35_S36_01_scene_sub_3_02391.ply
67 | recording_20220415_S35_S36_01_scene_sub_3_03291.ply
68 | recording_20220415_S35_S36_02_scene_sub_4_02351.ply
69 | recording_20220415_S35_S36_02_scene_sub_4_03011.ply
70 | recording_20220415_S36_S35_01_scene_sub_1_03681.ply
71 | recording_20220415_S36_S35_01_scene_sub_2_02151.ply
72 | recording_20220415_S36_S35_01_scene_sub_2_02241.ply
73 | recording_20220415_S36_S35_01_scene_sub_3_01851.ply
74 | recording_20220415_S36_S35_01_scene_sub_3_03711.ply
75 | recording_20220415_S36_S35_01_scene_sub_4_01401.ply
76 | recording_20220415_S36_S35_01_scene_sub_4_01791.ply
77 | recording_20220415_S36_S35_01_scene_sub_4_02511.ply
78 | recording_20220415_S36_S35_02_scene_sub_3_02101.ply


--------------------------------------------------------------------------------
/models/modules/resnet_block.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from MinkowskiEngine import MinkowskiReLU
  3 | 
  4 | from models.modules.common import ConvType, NormType, conv, get_norm
  5 | 
  6 | 
  7 | class BasicBlockBase(nn.Module):
  8 |     expansion = 1
  9 |     NORM_TYPE = NormType.BATCH_NORM
 10 | 
 11 |     def __init__(
 12 |         self,
 13 |         inplanes,
 14 |         planes,
 15 |         stride=1,
 16 |         dilation=1,
 17 |         downsample=None,
 18 |         conv_type=ConvType.HYPERCUBE,
 19 |         bn_momentum=0.1,
 20 |         D=3,
 21 |     ):
 22 |         super().__init__()
 23 | 
 24 |         self.conv1 = conv(
 25 |             inplanes,
 26 |             planes,
 27 |             kernel_size=3,
 28 |             stride=stride,
 29 |             dilation=dilation,
 30 |             conv_type=conv_type,
 31 |             D=D,
 32 |         )
 33 |         self.norm1 = get_norm(
 34 |             self.NORM_TYPE, planes, D, bn_momentum=bn_momentum
 35 |         )
 36 |         self.conv2 = conv(
 37 |             planes,
 38 |             planes,
 39 |             kernel_size=3,
 40 |             stride=1,
 41 |             dilation=dilation,
 42 |             bias=False,
 43 |             conv_type=conv_type,
 44 |             D=D,
 45 |         )
 46 |         self.norm2 = get_norm(
 47 |             self.NORM_TYPE, planes, D, bn_momentum=bn_momentum
 48 |         )
 49 |         self.relu = MinkowskiReLU(inplace=True)
 50 |         self.downsample = downsample
 51 | 
 52 |     def forward(self, x):
 53 |         residual = x
 54 | 
 55 |         out = self.conv1(x)
 56 |         out = self.norm1(out)
 57 |         out = self.relu(out)
 58 | 
 59 |         out = self.conv2(out)
 60 |         out = self.norm2(out)
 61 | 
 62 |         if self.downsample is not None:
 63 |             residual = self.downsample(x)
 64 | 
 65 |         out += residual
 66 |         out = self.relu(out)
 67 | 
 68 |         return out
 69 | 
 70 | 
 71 | class BasicBlock(BasicBlockBase):
 72 |     NORM_TYPE = NormType.BATCH_NORM
 73 | 
 74 | 
 75 | class BasicBlockIN(BasicBlockBase):
 76 |     NORM_TYPE = NormType.INSTANCE_NORM
 77 | 
 78 | 
 79 | class BasicBlockINBN(BasicBlockBase):
 80 |     NORM_TYPE = NormType.INSTANCE_BATCH_NORM
 81 | 
 82 | 
 83 | class BottleneckBase(nn.Module):
 84 |     expansion = 4
 85 |     NORM_TYPE = NormType.BATCH_NORM
 86 | 
 87 |     def __init__(
 88 |         self,
 89 |         inplanes,
 90 |         planes,
 91 |         stride=1,
 92 |         dilation=1,
 93 |         downsample=None,
 94 |         conv_type=ConvType.HYPERCUBE,
 95 |         bn_momentum=0.1,
 96 |         D=3,
 97 |     ):
 98 |         super().__init__()
 99 |         self.conv1 = conv(inplanes, planes, kernel_size=1, D=D)
100 |         self.norm1 = get_norm(
101 |             self.NORM_TYPE, planes, D, bn_momentum=bn_momentum
102 |         )
103 | 
104 |         self.conv2 = conv(
105 |             planes,
106 |             planes,
107 |             kernel_size=3,
108 |             stride=stride,
109 |             dilation=dilation,
110 |             conv_type=conv_type,
111 |             D=D,
112 |         )
113 |         self.norm2 = get_norm(
114 |             self.NORM_TYPE, planes, D, bn_momentum=bn_momentum
115 |         )
116 | 
117 |         self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D)
118 |         self.norm3 = get_norm(
119 |             self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum
120 |         )
121 | 
122 |         self.relu = MinkowskiReLU(inplace=True)
123 |         self.downsample = downsample
124 | 
125 |     def forward(self, x):
126 |         residual = x
127 | 
128 |         out = self.conv1(x)
129 |         out = self.norm1(out)
130 |         out = self.relu(out)
131 | 
132 |         out = self.conv2(out)
133 |         out = self.norm2(out)
134 |         out = self.relu(out)
135 | 
136 |         out = self.conv3(out)
137 |         out = self.norm3(out)
138 | 
139 |         if self.downsample is not None:
140 |             residual = self.downsample(x)
141 | 
142 |         out += residual
143 |         out = self.relu(out)
144 | 
145 |         return out
146 | 
147 | 
148 | class Bottleneck(BottleneckBase):
149 |     NORM_TYPE = NormType.BATCH_NORM
150 | 
151 | 
152 | class BottleneckIN(BottleneckBase):
153 |     NORM_TYPE = NormType.INSTANCE_NORM
154 | 
155 | 
156 | class BottleneckINBN(BottleneckBase):
157 |     NORM_TYPE = NormType.INSTANCE_BATCH_NORM
158 | 


--------------------------------------------------------------------------------
/models/metrics/confusionmatrix.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | 
  5 | class ConfusionMatrix:
  6 |     """Constructs a confusion matrix for a multi-class classification problems.
  7 | 
  8 |     Does not support multi-label, multi-class problems.
  9 | 
 10 |     Keyword arguments:
 11 |     - num_classes (int): number of classes in the classification problem.
 12 |     - normalized (boolean, optional): Determines whether or not the confusion
 13 |     matrix is normalized or not. Default: False.
 14 | 
 15 |     Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter/confusionmeter.py
 16 |     """
 17 | 
 18 |     def __init__(self, num_classes, ignore_label):
 19 |         super().__init__()
 20 | 
 21 |         self.conf = np.ndarray((num_classes, num_classes), dtype=np.int32)
 22 |         self.ignore_label = ignore_label
 23 |         self.num_classes = num_classes
 24 |         self.reset()
 25 | 
 26 |     def reset(self):
 27 |         self.conf.fill(0)
 28 | 
 29 |     def add(self, predicted, target):
 30 |         """Computes the confusion matrix
 31 | 
 32 |         The shape of the confusion matrix is K x K, where K is the number
 33 |         of classes.
 34 | 
 35 |         Keyword arguments:
 36 |         - predicted (Tensor or numpy.ndarray): Can be an N x K tensor/array of
 37 |         predicted scores obtained from the model for N examples and K classes,
 38 |         or an N-tensor/array of integer values between 0 and K-1.
 39 |         - target (Tensor or numpy.ndarray): Can be an N x K tensor/array of
 40 |         ground-truth classes for N examples and K classes, or an N-tensor/array
 41 |         of integer values between 0 and K-1.
 42 | 
 43 |         """
 44 |         # _, predicted = predicted.max(1)
 45 | 
 46 |         # predicted = predicted.view(-1)
 47 |         # target = target.view(-1)
 48 | 
 49 |         # If target and/or predicted are tensors, convert them to numpy arrays
 50 |         if torch.is_tensor(predicted):
 51 |             predicted = predicted.cpu().numpy()
 52 |         if torch.is_tensor(target):
 53 |             target = target.cpu().numpy()
 54 |         ind = ~np.isin(target, self.ignore_label)
 55 |         predicted, target = predicted[ind], target[ind]
 56 | 
 57 |         assert (
 58 |             predicted.shape[0] == target.shape[0]
 59 |         ), "number of targets and predicted outputs do not match"
 60 | 
 61 |         if np.ndim(predicted) != 1:
 62 |             assert (
 63 |                 predicted.shape[1] == self.num_classes
 64 |             ), "number of predictions does not match size of confusion matrix"
 65 |             predicted = np.argmax(predicted, 1)
 66 |         else:
 67 |             assert (predicted.max() < self.num_classes) and (
 68 |                 predicted.min() >= 0
 69 |             ), "predicted values are not between 0 and k-1"
 70 | 
 71 |         if np.ndim(target) != 1:
 72 |             assert (
 73 |                 target.shape[1] == self.num_classes
 74 |             ), "Onehot target does not match size of confusion matrix"
 75 |             assert (target >= 0).all() and (
 76 |                 target <= 1
 77 |             ).all(), "in one-hot encoding, target values should be 0 or 1"
 78 |             assert (
 79 |                 target.sum(1) == 1
 80 |             ).all(), "multi-label setting is not supported"
 81 |             target = np.argmax(target, 1)
 82 |         else:
 83 |             assert (target.max() < self.num_classes) and (
 84 |                 target.min() >= 0
 85 |             ), "target values are not between 0 and k-1"
 86 | 
 87 |         # hack for bincounting 2 arrays together
 88 |         x = predicted + self.num_classes * target
 89 |         bincount_2d = np.bincount(
 90 |             x.astype(np.int32), minlength=self.num_classes**2
 91 |         )
 92 |         assert bincount_2d.size == self.num_classes**2
 93 |         conf = bincount_2d.reshape((self.num_classes, self.num_classes))
 94 | 
 95 |         self.conf += conf
 96 | 
 97 |     def value(self, normalized=False):
 98 |         """
 99 |         Returns:
100 |             Confustion matrix of K rows and K columns, where rows corresponds
101 |             to ground-truth targets and columns corresponds to predicted
102 |             targets.
103 |         """
104 |         if normalized:
105 |             conf = self.conf.astype(np.float32)
106 |             return conf / conf.sum(1).clip(min=1e-12)[:, None]
107 |         return self.conf
108 | 


--------------------------------------------------------------------------------
/models/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/util/misc.py
  3 | """
  4 | Misc functions, including distributed helpers.
  5 | 
  6 | Mostly copy-paste from torchvision references.
  7 | """
  8 | from typing import List, Optional
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | import torchvision
 13 | from torch import Tensor
 14 | 
 15 | 
 16 | def _max_by_axis(the_list):
 17 |     # type: (List[List[int]]) -> List[int]
 18 |     maxes = the_list[0]
 19 |     for sublist in the_list[1:]:
 20 |         for index, item in enumerate(sublist):
 21 |             maxes[index] = max(maxes[index], item)
 22 |     return maxes
 23 | 
 24 | 
 25 | class NestedTensor(object):
 26 |     def __init__(self, tensors, mask: Optional[Tensor]):
 27 |         self.tensors = tensors
 28 |         self.mask = mask
 29 | 
 30 |     def to(self, device):
 31 |         # type: (Device) -> NestedTensor # noqa
 32 |         cast_tensor = self.tensors.to(device)
 33 |         mask = self.mask
 34 |         if mask is not None:
 35 |             assert mask is not None
 36 |             cast_mask = mask.to(device)
 37 |         else:
 38 |             cast_mask = None
 39 |         return NestedTensor(cast_tensor, cast_mask)
 40 | 
 41 |     def decompose(self):
 42 |         return self.tensors, self.mask
 43 | 
 44 |     def __repr__(self):
 45 |         return str(self.tensors)
 46 | 
 47 | 
 48 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
 49 |     # TODO make this more general
 50 |     if tensor_list[0].ndim == 3:
 51 |         if torchvision._is_tracing():
 52 |             # nested_tensor_from_tensor_list() does not export well to ONNX
 53 |             # call _onnx_nested_tensor_from_tensor_list() instead
 54 |             return _onnx_nested_tensor_from_tensor_list(tensor_list)
 55 | 
 56 |         # TODO make it support different-sized images
 57 |         max_size = _max_by_axis([list(img.shape) for img in tensor_list])
 58 |         # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
 59 |         batch_shape = [len(tensor_list)] + max_size
 60 |         b, c, h, w = batch_shape
 61 |         dtype = tensor_list[0].dtype
 62 |         device = tensor_list[0].device
 63 |         tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
 64 |         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
 65 |         for img, pad_img, m in zip(tensor_list, tensor, mask):
 66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 67 |             m[: img.shape[1], : img.shape[2]] = False
 68 |     else:
 69 |         raise ValueError("not supported")
 70 |     return NestedTensor(tensor, mask)
 71 | 
 72 | 
 73 | # _onnx_nested_tensor_from_tensor_list() is an implementation of
 74 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing.
 75 | @torch.jit.unused
 76 | def _onnx_nested_tensor_from_tensor_list(
 77 |     tensor_list: List[Tensor],
 78 | ) -> NestedTensor:
 79 |     max_size = []
 80 |     for i in range(tensor_list[0].dim()):
 81 |         max_size_i = torch.max(
 82 |             torch.stack([img.shape[i] for img in tensor_list]).to(
 83 |                 torch.float32
 84 |             )
 85 |         ).to(torch.int64)
 86 |         max_size.append(max_size_i)
 87 |     max_size = tuple(max_size)
 88 | 
 89 |     # work around for
 90 |     # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 91 |     # m[: img.shape[1], :img.shape[2]] = False
 92 |     # which is not yet supported in onnx
 93 |     padded_imgs = []
 94 |     padded_masks = []
 95 |     for img in tensor_list:
 96 |         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
 97 |         padded_img = torch.nn.functional.pad(
 98 |             img, (0, padding[2], 0, padding[1], 0, padding[0])
 99 |         )
100 |         padded_imgs.append(padded_img)
101 | 
102 |         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
103 |         padded_mask = torch.nn.functional.pad(
104 |             m, (0, padding[2], 0, padding[1]), "constant", 1
105 |         )
106 |         padded_masks.append(padded_mask.to(torch.bool))
107 | 
108 |     tensor = torch.stack(padded_imgs)
109 |     mask = torch.stack(padded_masks)
110 | 
111 |     return NestedTensor(tensor, mask=mask)
112 | 
113 | 
114 | def is_dist_avail_and_initialized():
115 |     if not dist.is_available():
116 |         return False
117 |     if not dist.is_initialized():
118 |         return False
119 |     return True
120 | 


--------------------------------------------------------------------------------
/utils/pointops2/src/attention/attention_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include "../cuda_utils.h"
  2 | #include "attention_cuda_kernel.h"
  3 | 
  4 | 
  5 | __global__ void attention_step1_forward_cuda_kernel( // M, h, C//h
  6 |     int N, int M, int h, int C, const float *q, const float *k,
  7 |     const int *index0, const int *index1, float *attn) {
  8 | 
  9 |     int c_idx = blockIdx.z;
 10 |     int h_idx = blockIdx.y;
 11 |     int m_idx = blockIdx.x * blockDim.x + threadIdx.x;
 12 |     if (m_idx >= M || h_idx >= h || c_idx >= C / h) return;
 13 | 
 14 |     int idx0 = index0[m_idx];
 15 |     int idx1 = index1[m_idx];
 16 |     float val = q[idx0*C+h_idx*C/h+c_idx] * k[idx1*C+h_idx*C/h+c_idx];
 17 |     atomicAdd(attn+m_idx*h+h_idx, val);
 18 | }
 19 | 
 20 | __global__ void attention_step1_backward_cuda_kernel( // M, h, C//h
 21 |     int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k,
 22 |     float *grad_q, float *grad_k) {
 23 |     
 24 |     int c_idx = blockIdx.z;
 25 |     int h_idx = blockIdx.y;
 26 |     int m_idx = blockIdx.x * blockDim.x + threadIdx.x;
 27 |     if (m_idx >= M || h_idx >= h || c_idx >= C / h) return;
 28 | 
 29 |     int idx0 = index0[m_idx];
 30 |     int idx1 = index1[m_idx];
 31 |     int grad_out_idx = m_idx*h+h_idx;
 32 |     int q_idx = idx0*C+h_idx*C/h+c_idx;
 33 |     int k_idx = idx1*C+h_idx*C/h+c_idx;
 34 |     atomicAdd(grad_q+q_idx, grad_out[grad_out_idx] * k[k_idx]);
 35 |     atomicAdd(grad_k+k_idx, grad_out[grad_out_idx] * q[q_idx]);
 36 | }
 37 | 
 38 | void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k,
 39 |     const int *index0, const int *index1, float *attn) {
 40 |     // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, )
 41 |     //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M);
 42 |     dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h);
 43 |     dim3 threads(THREADS_PER_BLOCK);
 44 |     attention_step1_forward_cuda_kernel<<<blocks, threads, 0>>>(N, M, h, C, q, k, index0, index1, attn);
 45 | }
 46 | 
 47 | void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, 
 48 |     const float *q, const float *k, float *grad_q, float *grad_k) {  
 49 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
 50 |     //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M);
 51 |     dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h);
 52 |     dim3 threads(THREADS_PER_BLOCK);
 53 |     attention_step1_backward_cuda_kernel<<<blocks, threads, 0>>>(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k);
 54 | }
 55 | 
 56 | __global__ void attention_step2_forward_cuda_kernel( // M, h, C//h
 57 |     int N, int M, int h, int C, const float *attn, const float *v,
 58 |     const int *index0, const int *index1, float *output) {
 59 | 
 60 |     int c_idx = blockIdx.z;
 61 |     int h_idx = blockIdx.y;
 62 |     int m_idx = blockIdx.x * blockDim.x + threadIdx.x;
 63 |     if (m_idx >= M || h_idx >= h || c_idx >= C / h) return;
 64 | 
 65 |     int idx1 = index1[m_idx];
 66 |     float val = attn[m_idx*h+h_idx] * v[idx1*C+h_idx*C/h+c_idx];
 67 |     int idx0 = index0[m_idx];
 68 |     atomicAdd(output+idx0*C+h_idx*C/h+c_idx, val);
 69 | }
 70 | 
 71 | __global__ void attention_step2_backward_cuda_kernel( // M, h, C//h
 72 |     int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v,
 73 |     float *grad_attn, float *grad_v) {
 74 |     
 75 |     int c_idx = blockIdx.z;
 76 |     int h_idx = blockIdx.y;
 77 |     int m_idx = blockIdx.x * blockDim.x + threadIdx.x;
 78 |     if (m_idx >= M || h_idx >= h || c_idx >= C / h) return;
 79 | 
 80 |     int idx0 = index0[m_idx];
 81 |     int idx1 = index1[m_idx];
 82 |     int grad_out_idx = idx0*C+h_idx*C/h+c_idx;
 83 |     atomicAdd(grad_attn+m_idx*h+h_idx, grad_out[grad_out_idx] * v[idx1*C+h_idx*C/h+c_idx]);
 84 |     atomicAdd(grad_v+idx1*C+h_idx*C/h+c_idx, grad_out[grad_out_idx] * attn[m_idx*h+h_idx]);
 85 | }
 86 | 
 87 | void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v,
 88 |     const int *index0, const int *index1, float *output) {
 89 |     // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, )
 90 |     //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M);
 91 |     dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h);
 92 |     dim3 threads(THREADS_PER_BLOCK);
 93 |     attention_step2_forward_cuda_kernel<<<blocks, threads, 0>>>(N, M, h, C, attn, v, index0, index1, output);
 94 | }
 95 | 
 96 | void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, 
 97 |     const float *attn, const float *v, float *grad_attn, float *grad_v) {  
 98 |     // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
 99 |     //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M);
100 |     dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h);
101 |     dim3 threads(THREADS_PER_BLOCK);
102 |     attention_step2_backward_cuda_kernel<<<blocks, threads, 0>>>(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v);
103 | }
104 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | if sys.version_info[:2] >= (3, 8):
  4 |     from collections.abc import MutableMapping
  5 | else:
  6 |     from collections import MutableMapping
  7 | 
  8 | import torch
  9 | from loguru import logger
 10 | 
 11 | 
 12 | def flatten_dict(d, parent_key="", sep="_"):
 13 |     """
 14 |     https://stackoverflow.com/questions/6027558/flatten-nested-dictionaries-compressing-keys
 15 |     """
 16 |     items = []
 17 |     for k, v in d.items():
 18 |         new_key = parent_key + sep + k if parent_key else k
 19 |         if isinstance(v, MutableMapping):
 20 |             items.extend(flatten_dict(v, new_key, sep=sep).items())
 21 |         else:
 22 |             items.append((new_key, v))
 23 |     return dict(items)
 24 | 
 25 | 
 26 | def load_baseline_model(cfg, model):
 27 |     # if it is Minkoski weights
 28 |     cfg.model.in_channels = 3
 29 |     cfg.model.config.conv1_kernel_size = 5
 30 |     cfg.data.add_normals = False
 31 |     cfg.data.train_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)]
 32 |     cfg.data.validation_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)]
 33 |     cfg.data.test_dataset.color_mean_std = [(0.5, 0.5, 0.5), (1, 1, 1)]
 34 |     cfg.data.voxel_size = 0.02
 35 |     model = model(cfg)
 36 |     state_dict = torch.load(cfg.general.checkpoint)["state_dict"]
 37 |     model.model.load_state_dict(state_dict)
 38 |     return cfg, model
 39 | 
 40 | 
 41 | def load_backbone_checkpoint_with_missing_or_exsessive_keys(cfg, model):
 42 |     state_dict = torch.load(cfg.general.backbone_checkpoint)["state_dict"]
 43 |     correct_dict = dict(model.state_dict())
 44 | 
 45 |     # if parametrs not found in checkpoint they will be randomly initialized
 46 |     for key in state_dict.keys():
 47 |         if correct_dict.pop(f"model.backbone.{key}", None) is None:
 48 |             logger.warning(
 49 |                 f"Key not found, it will be initialized randomly: {key}"
 50 |             )
 51 | 
 52 |     # if parametrs have different shape, it will randomly initialize
 53 |     state_dict = torch.load(cfg.general.backbone_checkpoint)["state_dict"]
 54 |     correct_dict = dict(model.state_dict())
 55 |     for key in correct_dict.keys():
 56 |         if key.replace("model.backbone.", "") not in state_dict:
 57 |             logger.warning(f"{key} not in loaded checkpoint")
 58 |             state_dict.update(
 59 |                 {key.replace("model.backbone.", ""): correct_dict[key]}
 60 |             )
 61 |         elif (
 62 |             state_dict[key.replace("model.backbone.", "")].shape
 63 |             != correct_dict[key].shape
 64 |         ):
 65 |             logger.warning(
 66 |                 f"incorrect shape {key}:{state_dict[key.replace('model.backbone.', '')].shape} vs {correct_dict[key].shape}"
 67 |             )
 68 |             state_dict.update({key: correct_dict[key]})
 69 | 
 70 |     # if we have more keys just discard them
 71 |     correct_dict = dict(model.state_dict())
 72 |     new_state_dict = dict()
 73 |     for key in state_dict.keys():
 74 |         if f"model.backbone.{key}" in correct_dict.keys():
 75 |             new_state_dict.update({f"model.backbone.{key}": state_dict[key]})
 76 |         elif key in correct_dict.keys():
 77 |             new_state_dict.update({key: correct_dict[key]})
 78 |         else:
 79 |             logger.warning(f"excessive key: {key}")
 80 |     model.load_state_dict(new_state_dict)
 81 |     return cfg, model
 82 | 
 83 | 
 84 | def load_checkpoint_with_missing_or_exsessive_keys(cfg, model):
 85 |     state_dict = torch.load(cfg.general.checkpoint)["state_dict"]
 86 |     correct_dict = dict(model.state_dict())
 87 | 
 88 |     # if parametrs not found in checkpoint they will be randomly initialized
 89 |     for key in state_dict.keys():
 90 |         if correct_dict.pop(key, None) is None:
 91 |             logger.warning(
 92 |                 f"Key not found, it will be initialized randomly: {key}"
 93 |             )
 94 | 
 95 |     # if parametrs have different shape, it will randomly initialize
 96 |     state_dict = torch.load(cfg.general.checkpoint)["state_dict"]
 97 |     correct_dict = dict(model.state_dict())
 98 |     for key in correct_dict.keys():
 99 |         if key not in state_dict:
100 |             logger.warning(f"{key} not in loaded checkpoint")
101 |             state_dict.update({key: correct_dict[key]})
102 |         elif state_dict[key].shape != correct_dict[key].shape:
103 |             logger.warning(
104 |                 f"incorrect shape {key}:{state_dict[key].shape} vs {correct_dict[key].shape}"
105 |             )
106 |             state_dict.update({key: correct_dict[key]})
107 | 
108 |     # if we have more keys just discard them
109 |     correct_dict = dict(model.state_dict())
110 |     new_state_dict = dict()
111 |     for key in state_dict.keys():
112 |         if key in correct_dict.keys():
113 |             new_state_dict.update({key: state_dict[key]})
114 |         else:
115 |             logger.warning(f"excessive key: {key}")
116 |     model.load_state_dict(new_state_dict)
117 |     return cfg, model
118 | 
119 | 
120 | def freeze_until(net, param_name: str = None):
121 |     """
122 |     Freeze net until param_name
123 |     https://opendatascience.slack.com/archives/CGK4KQBHD/p1588373239292300?thread_ts=1588105223.275700&cid=CGK4KQBHD
124 |     Args:
125 |         net:
126 |         param_name:
127 |     Returns:
128 |     """
129 |     found_name = False
130 |     for name, params in net.named_parameters():
131 |         if name == param_name:
132 |             found_name = True
133 |         params.requires_grad = found_name
134 | 


--------------------------------------------------------------------------------
/occlusion_subsets/split_test_occlusion_mid.txt:
--------------------------------------------------------------------------------
  1 | recording_20210911_S07_S06_01_scene_main_02001.ply
  2 | recording_20210918_S05_S06_01_scene_sub_1_03561.ply
  3 | recording_20210918_S05_S06_01_scene_sub_2_02421.ply
  4 | recording_20210918_S05_S06_01_scene_sub_2_02511.ply
  5 | recording_20210918_S05_S06_01_scene_sub_2_02601.ply
  6 | recording_20210918_S05_S06_01_scene_sub_2_03351.ply
  7 | recording_20210918_S05_S06_02_scene_main_00991.ply
  8 | recording_20210918_S05_S06_03_scene_main_02651.ply
  9 | recording_20210918_S05_S06_03_scene_sub_1_01751.ply
 10 | recording_20210918_S05_S06_04_scene_main_03271.ply
 11 | recording_20210918_S05_S06_04_scene_main_04051.ply
 12 | recording_20210918_S05_S06_04_scene_sub_1_03511.ply
 13 | recording_20210918_S05_S06_04_scene_sub_1_03931.ply
 14 | recording_20210918_S05_S06_04_scene_sub_2_03451.ply
 15 | recording_20210918_S05_S06_04_scene_sub_2_03811.ply
 16 | recording_20210918_S05_S06_05_scene_sub_1_03751.ply
 17 | recording_20210918_S05_S06_05_scene_sub_2_01801.ply
 18 | recording_20210918_S05_S06_05_scene_sub_2_02101.ply
 19 | recording_20210918_S05_S09_01_scene_main_01571.ply
 20 | recording_20210918_S05_S09_01_scene_sub_1_01391.ply
 21 | recording_20210918_S05_S09_01_scene_sub_2_01421.ply
 22 | recording_20210918_S05_S09_01_scene_sub_2_01451.ply
 23 | recording_20210918_S06_S05_01_scene_sub_1_01811.ply
 24 | recording_20210918_S06_S05_01_scene_sub_1_03011.ply
 25 | recording_20210918_S06_S05_01_scene_sub_2_02021.ply
 26 | recording_20210918_S06_S05_01_scene_sub_2_02081.ply
 27 | recording_20210918_S06_S05_02_scene_main_03421.ply
 28 | recording_20210918_S06_S05_02_scene_sub_1_03361.ply
 29 | recording_20210918_S06_S05_02_scene_sub_2_03391.ply
 30 | recording_20210918_S06_S05_02_scene_sub_2_03661.ply
 31 | recording_20210918_S06_S05_03_scene_sub_1_00941.ply
 32 | recording_20210918_S06_S05_03_scene_sub_2_00911.ply
 33 | recording_20210918_S09_S05_01_scene_main_01761.ply
 34 | recording_20210918_S09_S05_01_scene_sub_1_01581.ply
 35 | recording_20210918_S09_S05_01_scene_sub_2_01611.ply
 36 | recording_20210918_S09_S05_02_scene_main_04261.ply
 37 | recording_20210918_S09_S05_02_scene_sub_1_04081.ply
 38 | recording_20210918_S09_S05_02_scene_sub_1_04201.ply
 39 | recording_20210918_S09_S05_03_scene_main_01961.ply
 40 | recording_20210918_S09_S05_03_scene_main_02351.ply
 41 | recording_20210918_S09_S05_03_scene_sub_1_01901.ply
 42 | recording_20210918_S09_S05_03_scene_sub_1_01931.ply
 43 | recording_20210923_S05_S13_01_scene_main_03161.ply
 44 | recording_20210923_S05_S13_01_scene_main_03371.ply
 45 | recording_20210923_S05_S13_01_scene_sub_2_03101.ply
 46 | recording_20210923_S05_S13_01_scene_sub_2_03911.ply
 47 | recording_20210923_S13_S05_01_scene_main_01701.ply
 48 | recording_20210923_S13_S05_01_scene_main_03471.ply
 49 | recording_20210923_S13_S05_01_scene_sub_1_02061.ply
 50 | recording_20210929_S05_S16_01_scene_sub_1_01181.ply
 51 | recording_20210929_S05_S16_01_scene_sub_1_02321.ply
 52 | recording_20210929_S05_S16_03_scene_sub_1_00991.ply
 53 | recording_20210929_S05_S16_03_scene_sub_1_01261.ply
 54 | recording_20210929_S05_S16_03_scene_sub_2_01081.ply
 55 | recording_20210929_S05_S16_04_scene_sub_1_02381.ply
 56 | recording_20210929_S05_S16_04_scene_sub_1_02471.ply
 57 | recording_20210929_S16_S05_01_scene_sub_1_02131.ply
 58 | recording_20210929_S16_S05_01_scene_sub_2_01201.ply
 59 | recording_20210929_S16_S05_01_scene_sub_2_02251.ply
 60 | recording_20210929_S16_S05_01_scene_sub_2_03841.ply
 61 | recording_20210929_S16_S05_01_scene_sub_2_04651.ply
 62 | recording_20211004_S19_S06_01_scene_main_04301.ply
 63 | recording_20211004_S19_S06_01_scene_sub_1_04271.ply
 64 | recording_20211004_S19_S06_01_scene_sub_2_04391.ply
 65 | recording_20211004_S19_S06_01_scene_sub_2_04541.ply
 66 | recording_20211004_S19_S06_02_scene_sub_1_02911.ply
 67 | recording_20211004_S19_S06_02_scene_sub_2_02461.ply
 68 | recording_20211004_S19_S06_02_scene_sub_2_02611.ply
 69 | recording_20211004_S19_S06_02_scene_sub_2_03421.ply
 70 | recording_20211004_S19_S06_02_scene_sub_2_03481.ply
 71 | recording_20211004_S19_S06_03_scene_main_04111.ply
 72 | recording_20211004_S19_S06_03_scene_main_04171.ply
 73 | recording_20211004_S19_S06_03_scene_sub_1_03781.ply
 74 | recording_20211004_S19_S06_03_scene_sub_1_04021.ply
 75 | recording_20211004_S19_S06_03_scene_sub_1_04051.ply
 76 | recording_20211004_S19_S06_03_scene_sub_2_03781.ply
 77 | recording_20211004_S19_S06_04_scene_main_02481.ply
 78 | recording_20211004_S19_S06_04_scene_main_02901.ply
 79 | recording_20211004_S19_S06_04_scene_sub_1_02061.ply
 80 | recording_20211004_S19_S06_04_scene_sub_1_02511.ply
 81 | recording_20211004_S19_S06_04_scene_sub_1_02661.ply
 82 | recording_20211004_S19_S06_05_scene_main_03111.ply
 83 | recording_20211004_S19_S06_05_scene_main_03141.ply
 84 | recording_20211004_S19_S06_05_scene_sub_1_03441.ply
 85 | recording_20211004_S19_S06_05_scene_sub_1_03501.ply
 86 | recording_20211004_S19_S06_05_scene_sub_1_03531.ply
 87 | recording_20211004_S19_S06_05_scene_sub_2_03531.ply
 88 | recording_20211004_S19_S06_05_scene_sub_2_03741.ply
 89 | recording_20220415_S35_S36_01_scene_sub_4_01461.ply
 90 | recording_20220415_S35_S36_01_scene_sub_4_01941.ply
 91 | recording_20220415_S35_S36_01_scene_sub_4_03411.ply
 92 | recording_20220415_S35_S36_02_scene_sub_1_02201.ply
 93 | recording_20220415_S35_S36_02_scene_sub_1_02531.ply
 94 | recording_20220415_S35_S36_02_scene_sub_3_02771.ply
 95 | recording_20220415_S35_S36_02_scene_sub_3_02801.ply
 96 | recording_20220415_S35_S36_02_scene_sub_3_03101.ply
 97 | recording_20220415_S35_S36_02_scene_sub_4_02321.ply
 98 | recording_20220415_S36_S35_02_scene_main_01801.ply
 99 | recording_20220415_S36_S35_02_scene_main_02551.ply
100 | recording_20220415_S36_S35_02_scene_main_02881.ply
101 | recording_20220415_S36_S35_02_scene_sub_1_02161.ply
102 | recording_20220415_S36_S35_02_scene_sub_1_03571.ply
103 | recording_20220415_S36_S35_02_scene_sub_2_03751.ply
104 | recording_20220415_S36_S35_02_scene_sub_4_03751.ply


--------------------------------------------------------------------------------
/third_party/pointnet2/_ext_src/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | 
  4 | #include <math.h>
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | 
  8 | #include "cuda_utils.h"
  9 | 
 10 | // input: unknown(b, n, 3) known(b, m, 3)
 11 | // output: dist2(b, n, 3), idx(b, n, 3)
 12 | __global__ void three_nn_kernel(int b, int n, int m,
 13 |                                 const float *__restrict__ unknown,
 14 |                                 const float *__restrict__ known,
 15 |                                 float *__restrict__ dist2,
 16 |                                 int *__restrict__ idx) {
 17 |   int batch_index = blockIdx.x;
 18 |   unknown += batch_index * n * 3;
 19 |   known += batch_index * m * 3;
 20 |   dist2 += batch_index * n * 3;
 21 |   idx += batch_index * n * 3;
 22 | 
 23 |   int index = threadIdx.x;
 24 |   int stride = blockDim.x;
 25 |   for (int j = index; j < n; j += stride) {
 26 |     float ux = unknown[j * 3 + 0];
 27 |     float uy = unknown[j * 3 + 1];
 28 |     float uz = unknown[j * 3 + 2];
 29 | 
 30 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 31 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 32 |     for (int k = 0; k < m; ++k) {
 33 |       float x = known[k * 3 + 0];
 34 |       float y = known[k * 3 + 1];
 35 |       float z = known[k * 3 + 2];
 36 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 37 |       if (d < best1) {
 38 |         best3 = best2;
 39 |         besti3 = besti2;
 40 |         best2 = best1;
 41 |         besti2 = besti1;
 42 |         best1 = d;
 43 |         besti1 = k;
 44 |       } else if (d < best2) {
 45 |         best3 = best2;
 46 |         besti3 = besti2;
 47 |         best2 = d;
 48 |         besti2 = k;
 49 |       } else if (d < best3) {
 50 |         best3 = d;
 51 |         besti3 = k;
 52 |       }
 53 |     }
 54 |     dist2[j * 3 + 0] = best1;
 55 |     dist2[j * 3 + 1] = best2;
 56 |     dist2[j * 3 + 2] = best3;
 57 | 
 58 |     idx[j * 3 + 0] = besti1;
 59 |     idx[j * 3 + 1] = besti2;
 60 |     idx[j * 3 + 2] = besti3;
 61 |   }
 62 | }
 63 | 
 64 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 65 |                              const float *known, float *dist2, int *idx) {
 66 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 67 |   three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 68 |                                                       dist2, idx);
 69 | 
 70 |   CUDA_CHECK_ERRORS();
 71 | }
 72 | 
 73 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 74 | // output: out(b, c, n)
 75 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 76 |                                          const float *__restrict__ points,
 77 |                                          const int *__restrict__ idx,
 78 |                                          const float *__restrict__ weight,
 79 |                                          float *__restrict__ out) {
 80 |   int batch_index = blockIdx.x;
 81 |   points += batch_index * m * c;
 82 | 
 83 |   idx += batch_index * n * 3;
 84 |   weight += batch_index * n * 3;
 85 | 
 86 |   out += batch_index * n * c;
 87 | 
 88 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
 89 |   const int stride = blockDim.y * blockDim.x;
 90 |   for (int i = index; i < c * n; i += stride) {
 91 |     const int l = i / n;
 92 |     const int j = i % n;
 93 |     float w1 = weight[j * 3 + 0];
 94 |     float w2 = weight[j * 3 + 1];
 95 |     float w3 = weight[j * 3 + 2];
 96 | 
 97 |     int i1 = idx[j * 3 + 0];
 98 |     int i2 = idx[j * 3 + 1];
 99 |     int i3 = idx[j * 3 + 2];
100 | 
101 |     out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
102 |              points[l * m + i3] * w3;
103 |   }
104 | }
105 | 
106 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
107 |                                       const float *points, const int *idx,
108 |                                       const float *weight, float *out) {
109 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
110 |   three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
111 |       b, c, m, n, points, idx, weight, out);
112 | 
113 |   CUDA_CHECK_ERRORS();
114 | }
115 | 
116 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
117 | // output: grad_points(b, c, m)
118 | 
119 | __global__ void three_interpolate_grad_kernel(
120 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
121 |     const int *__restrict__ idx, const float *__restrict__ weight,
122 |     float *__restrict__ grad_points) {
123 |   int batch_index = blockIdx.x;
124 |   grad_out += batch_index * n * c;
125 |   idx += batch_index * n * 3;
126 |   weight += batch_index * n * 3;
127 |   grad_points += batch_index * m * c;
128 | 
129 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
130 |   const int stride = blockDim.y * blockDim.x;
131 |   for (int i = index; i < c * n; i += stride) {
132 |     const int l = i / n;
133 |     const int j = i % n;
134 |     float w1 = weight[j * 3 + 0];
135 |     float w2 = weight[j * 3 + 1];
136 |     float w3 = weight[j * 3 + 2];
137 | 
138 |     int i1 = idx[j * 3 + 0];
139 |     int i2 = idx[j * 3 + 1];
140 |     int i3 = idx[j * 3 + 2];
141 | 
142 |     atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
143 |     atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
144 |     atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
145 |   }
146 | }
147 | 
148 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
149 |                                            const float *grad_out,
150 |                                            const int *idx, const float *weight,
151 |                                            float *grad_points) {
152 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
153 |   three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
154 |       b, c, n, m, grad_out, idx, weight, grad_points);
155 | 
156 |   CUDA_CHECK_ERRORS();
157 | }
158 | 


--------------------------------------------------------------------------------