├── lib ├── __init__.py ├── constants │ └── __init__.py ├── losses │ ├── __init__.py │ ├── SoftIoULoss.py │ ├── RecallCrossEntropy.py │ ├── SupervisedSimiam.py │ ├── FocalLoss.py │ ├── utils.py │ └── PointSupConLoss.py ├── train_test │ ├── __init__.py │ └── pl_ClassifierTrainer.py ├── datasets │ ├── preprocessing │ │ ├── __init__.py │ │ ├── scannet200_insseg.py │ │ ├── utils.py │ │ └── scannet_long.py │ ├── __init__.py │ ├── prior_info.py │ └── stanford.py ├── ext │ └── pointnet2 │ │ ├── .gitignore │ │ ├── _ext_src │ │ ├── include │ │ │ ├── ball_query.h │ │ │ ├── group_points.h │ │ │ ├── sampling.h │ │ │ ├── interpolate.h │ │ │ ├── utils.h │ │ │ └── cuda_utils.h │ │ └── src │ │ │ ├── bindings.cpp │ │ │ ├── ball_query.cpp │ │ │ ├── ball_query_gpu.cu │ │ │ ├── group_points.cpp │ │ │ ├── group_points_gpu.cu │ │ │ ├── sampling.cpp │ │ │ ├── interpolate.cpp │ │ │ └── interpolate_gpu.cu │ │ ├── setup.py │ │ └── pointnet2_test.py ├── dataloader.py ├── math_functions.py └── solvers.py ├── models ├── modules │ ├── __init__.py │ ├── senet_block.py │ └── resnet_block.py ├── classifier_models.py ├── projection_models.py ├── model.py ├── wrapper.py ├── __init__.py ├── layers.py ├── resnet.py └── conditional_random_fields.py ├── downstream └── insseg │ ├── lib │ ├── bfs │ │ ├── __init__.py │ │ ├── ops │ │ │ ├── __init__.py │ │ │ ├── .gitignore │ │ │ ├── ops.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── bfs_cluster_kernel.cu │ │ │ │ └── bfs_cluster.cpp │ │ └── bfs.py │ ├── __init__.py │ ├── losses │ │ ├── utils.py │ │ └── FocalLoss.py │ ├── io3d.py │ ├── math_functions.py │ ├── layers.py │ └── solvers.py │ ├── datasets │ ├── evaluation │ │ ├── __init__.py │ │ ├── scannet_benchmark_utils │ │ │ ├── __init__.py │ │ │ ├── scripts │ │ │ │ ├── __init__.py │ │ │ │ ├── util.py │ │ │ │ ├── util_3d.py │ │ │ │ └── evaluate_semantic_label.py │ │ │ ├── util.py │ │ │ └── util_3d.py │ │ └── evaluate_semantic_label.py │ ├── __init__.py │ └── dataloader.py │ ├── .style.yapf │ ├── .gitignore │ ├── scripts │ ├── test_scannet.sh │ ├── test_scannet_benchmark.sh │ ├── test_stanford3d.sh │ ├── train_scannet_benchmark.sh │ ├── data_efficient │ │ ├── by_scenes.sh │ │ └── by_points.sh │ ├── train_scannet.sh │ ├── train_stanford3d.sh │ └── train_scannet_slurm.sh │ ├── insseg_models │ └── __init__.py │ ├── eval_all.sh │ ├── config │ └── default.yaml │ └── ddp_main.py ├── docs └── teaser.jpg ├── .gitignore └── scripts ├── train_models.sh ├── text_representation_train.sh └── fine_tune_classifier.sh /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/constants/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/losses/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/train_test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/datasets/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | PG_OP.egg-info -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | pointnet2.egg-info/ 4 | */.idea/ 5 | -------------------------------------------------------------------------------- /docs/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RozDavid/LanguageGroundedSemseg/HEAD/docs/teaser.jpg -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/ops.py: -------------------------------------------------------------------------------- 1 | ''' 2 | PointGroup operations 3 | Written by Li Jiang 4 | ''' 5 | 6 | -------------------------------------------------------------------------------- /downstream/insseg/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = chromium 3 | column_limit = 100 4 | indent_width = 2 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | **/__pycache__/ 3 | **/.pyc 4 | output/ 5 | outputs/ 6 | wandb/ 7 | notebooks/ 8 | output/*.csv 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /downstream/insseg/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /downstream/insseg/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized 2 | __pycache__/ 3 | *.py[cod] 4 | *.o 5 | *.so 6 | 7 | # Text edits 8 | *.swp 9 | *.swo 10 | *.orig 11 | 12 | # Training 13 | outputs/ 14 | 15 | # Profiling 16 | callgrind.out* 17 | *.dSYM 18 | 19 | # Misc 20 | .DS_Store 21 | 22 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='PG_OP', 6 | ext_modules=[ 7 | CUDAExtension('PG_OP', [ 8 | 'src/bfs_cluster.cpp', 9 | 'src/bfs_cluster_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension} 13 | ) 14 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 10 | const int nsample); 11 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | -------------------------------------------------------------------------------- /downstream/insseg/lib/losses/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from lib.losses.FocalLoss import FocalLoss 4 | 5 | def loss_by_name(loss_name, ignore_index=0, alpha=0.5, gamma=2.0, reduction='mean', weight=None): 6 | if loss_name == 'focal': 7 | return FocalLoss(alpha, gamma, reduction=reduction, ignore_index=ignore_index) 8 | elif loss_name == 'cross_entropy': 9 | return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction) 10 | else: 11 | return None -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 12 | -------------------------------------------------------------------------------- /models/classifier_models.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class ClassifierNet(nn.Module): 5 | def __init__(self, num_in_channel, num_labels, config): 6 | super().__init__() 7 | 8 | self.config = config 9 | self.input_dim = num_in_channel 10 | self.output_dim = num_labels 11 | 12 | self.classifier = nn.Linear(self.input_dim, self.output_dim, bias=True) 13 | 14 | def forward(self, x): 15 | 16 | out = self.classifier(x) 17 | return out, x 18 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 13 | at::Tensor weight); 14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 15 | at::Tensor weight, const int m); 16 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/test_scannet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | MODEL=Res16UNet34C 8 | 9 | python ddp_main.py \ 10 | train.is_train=False \ 11 | train.lenient_weight_loading=True \ 12 | net.model=${MODEL} \ 13 | net.conv1_kernel_size=3 \ 14 | augmentation.normalize_color=True \ 15 | data.dataset=ScannetVoxelization2cmDataset \ 16 | data.num_workers=1 \ 17 | data.scannet_path=${DATAPATH} \ 18 | data.return_transformation=False \ 19 | test.test_phase=val \ 20 | test.evaluate_benchmark=False \ 21 | misc.log_dir=${LOG_DIR} \ 22 | misc.num_gpus=1 \ 23 | net.weights=$PRETRAIN \ 24 | 25 | -------------------------------------------------------------------------------- /models/projection_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class AttributeFittingModel(nn.Module): 5 | def __init__(self, inputSize, outputSize, num_attributes): 6 | super(AttributeFittingModel, self).__init__() 7 | 8 | self.input_size = inputSize 9 | self.output_size = outputSize 10 | self.num_attributes = num_attributes 11 | 12 | self.attr_linears = nn.ModuleList([nn.Linear(inputSize, outputSize) for i in range(num_attributes)]) 13 | 14 | def forward(self, x): 15 | # x = num_cats x num_attrs x num_dims 16 | out = torch.cuda.FloatTensor(x.shape[0], self.num_attributes, self.output_size).fill_(0) # type: torch.FloatTensor 17 | for i in range(len(self.attr_linears)): 18 | out[:, i, :] = self.attr_linears[i](x) 19 | 20 | return out -------------------------------------------------------------------------------- /models/model.py: -------------------------------------------------------------------------------- 1 | from MinkowskiEngine import MinkowskiNetwork 2 | 3 | 4 | class Model(MinkowskiNetwork): 5 | """ 6 | Base network for all sparse convnet 7 | 8 | By default, all networks are segmentation networks. 9 | """ 10 | OUT_PIXEL_DIST = -1 11 | 12 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 13 | super(Model, self).__init__(D) 14 | self.in_channels = in_channels 15 | self.out_channels = out_channels 16 | self.config = config 17 | 18 | 19 | class HighDimensionalModel(Model): 20 | """ 21 | Base network for all spatio (temporal) chromatic sparse convnet 22 | """ 23 | 24 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 25 | assert D > 4, "Num dimension smaller than 5" 26 | super(HighDimensionalModel, self).__init__(in_channels, out_channels, config, D, **kwargs) 27 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "group_points.h" 8 | #include "interpolate.h" 9 | #include "sampling.h" 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("gather_points", &gather_points); 13 | m.def("gather_points_grad", &gather_points_grad); 14 | m.def("furthest_point_sampling", &furthest_point_sampling); 15 | 16 | m.def("three_nn", &three_nn); 17 | m.def("three_interpolate", &three_interpolate); 18 | m.def("three_interpolate_grad", &three_interpolate_grad); 19 | 20 | m.def("ball_query", &ball_query); 21 | 22 | m.def("group_points", &group_points); 23 | m.def("group_points_grad", &group_points_grad); 24 | } 25 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/test_scannet_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | MODEL=Res16UNet34C 8 | 9 | python ddp_main.py \ 10 | train.is_train=False \ 11 | train.val_freq=5 \ 12 | train.lenient_weight_loading=True \ 13 | net.model=${MODEL} \ 14 | net.conv1_kernel_size=3 \ 15 | augmentation.normalize_color=True \ 16 | data.dataset=ScannetVoxelization2cmDataset \ 17 | data.num_workers=1 \ 18 | data.scannet_path=${DATAPATH} \ 19 | data.return_transformation=True \ 20 | test.test_phase=test \ 21 | test.evaluate_benchmark=True \ 22 | test.dual_set_cluster=True \ 23 | misc.log_dir=${LOG_DIR} \ 24 | misc.num_gpus=1 \ 25 | net.weights=$PRETRAIN \ 26 | 27 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/test_stanford3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | MODEL=Res16UNet34C 9 | 10 | python ddp_main.py \ 11 | train.is_train=False \ 12 | train.lenient_weight_loading=True \ 13 | net.model=${MODEL} \ 14 | net.conv1_kernel_size=3 \ 15 | net.weights=$PRETRAIN \ 16 | data.dataset=StanfordArea5Dataset \ 17 | data.voxel_size=0.05 \ 18 | data.num_workers=1 \ 19 | data.stanford3d_path=${DATAPATH} \ 20 | augmentation.data_aug_color_trans_ratio=0.05 \ 21 | augmentation.data_aug_color_jitter_std=0.005 \ 22 | misc.log_dir=${LOG_DIR} \ 23 | misc.train_stuff=True \ 24 | hydra.launcher.partition=learnfair \ 25 | hydra.launcher.comment=CVPR_Deadline \ 26 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import lib.datasets.stanford as stanford 2 | import lib.datasets.scannet as scannet 3 | import lib.datasets.prior_info as prior_info 4 | 5 | DATASETS = [] 6 | 7 | def add_datasets(module): 8 | DATASETS.extend([getattr(module, a) for a in dir(module) if 'Dataset' in a]) 9 | 10 | 11 | add_datasets(stanford) 12 | add_datasets(scannet) 13 | add_datasets(prior_info) 14 | 15 | 16 | def load_dataset(name): 17 | '''Creates and returns an instance of the datasets given its name. 18 | ''' 19 | # Find the model class from its name 20 | mdict = {dataset.__name__: dataset for dataset in DATASETS} 21 | if name not in mdict: 22 | print('Invalid dataset index. Options are:') 23 | # Display a list of valid dataset names 24 | for dataset in DATASETS: 25 | print('\t* {}'.format(dataset.__name__)) 26 | raise ValueError(f'Dataset {name} not defined') 27 | DatasetClass = mdict[name] 28 | 29 | return DatasetClass 30 | -------------------------------------------------------------------------------- /lib/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DistributedSampler 3 | from torch.utils.data.sampler import Sampler 4 | 5 | 6 | class InfSampler(Sampler): 7 | """Samples elements randomly, without replacement. 8 | 9 | Arguments: 10 | data_source (Dataset): dataset to sample from 11 | """ 12 | 13 | def __init__(self, data_source, shuffle=False): 14 | self.data_source = data_source 15 | self.shuffle = shuffle 16 | self.reset_permutation() 17 | 18 | def reset_permutation(self): 19 | perm = len(self.data_source) 20 | if self.shuffle: 21 | perm = torch.randperm(perm) 22 | self._perm = perm.tolist() 23 | 24 | def __iter__(self): 25 | return self 26 | 27 | def __next__(self): 28 | if len(self._perm) == 0: 29 | self.reset_permutation() 30 | return self._perm.pop() 31 | 32 | def __len__(self): 33 | return len(self.data_source) 34 | 35 | next = __next__ # Python 2 compatibility 36 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/train_scannet_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | export MODEL=Res16UNet34C 8 | export DATAPATH='/mnt/data/Datasets/scannet_200_insseg' 9 | export LOG_DIR='./outputs' 10 | export PRETRAIN='' 11 | 12 | 13 | python ddp_main.py \ 14 | train.is_train=True \ 15 | train.lenient_weight_loading=True \ 16 | train.stat_freq=5 \ 17 | train.val_freq=500 \ 18 | net.model=${MODEL} \ 19 | data.dataset=Scannet200Voxelization2cmDataset \ 20 | data.batch_size=8 \ 21 | data.num_workers=2 \ 22 | data.scannet_path=${DATAPATH} \ 23 | data.return_transformation=True \ 24 | optimizer.lr=0.05 \ 25 | optimizer.scheduler=PolyLR \ 26 | optimizer.max_iter=20000 \ 27 | misc.log_dir=${LOG_DIR} \ 28 | misc.num_gpus=1 \ 29 | hydra.launcher.comment=ECCV_supplemental \ 30 | net.weights=$PRETRAIN \ 31 | 32 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/data_efficient/by_scenes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | MODEL=Res16UNet34C 9 | 10 | python ddp_main.py \ 11 | train.train_phase=train \ 12 | train.is_train=True \ 13 | train.lenient_weight_loading=True \ 14 | train.stat_freq=2 \ 15 | train.val_freq=250 \ 16 | net.model=${MODEL} \ 17 | net.conv1_kernel_size=3 \ 18 | augmentation.normalize_color=True \ 19 | data.dataset=ScannetVoxelization2cmDataset \ 20 | data.train_file=$TRAIN_FILE \ 21 | data.batch_size=48 \ 22 | data.num_workers=2 \ 23 | data.scannet_path=${DATAPATH} \ 24 | optimizer.lr=0.1 \ 25 | optimizer.scheduler=PolyLR \ 26 | optimizer.max_iter=5000 \ 27 | misc.log_dir=${LOG_DIR} \ 28 | misc.num_gpus=8 \ 29 | hydra.launcher.partition=learnfair \ 30 | hydra.launcher.comment=CVPR_Deadline \ 31 | net.weights=$PRETRAIN \ 32 | 33 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/data_efficient/by_points.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | MODEL=Res16UNet34C 9 | 10 | python ddp_main.py -m \ 11 | train.train_phase=train \ 12 | train.is_train=True \ 13 | train.lenient_weight_loading=True \ 14 | train.stat_freq=2 \ 15 | train.val_freq=250 \ 16 | net.model=${MODEL} \ 17 | net.conv1_kernel_size=3 \ 18 | augmentation.normalize_color=True \ 19 | data.dataset=ScannetVoxelization2cmDataset \ 20 | data.batch_size=48 \ 21 | data.sampled_inds=$SAMPLED_INDS \ 22 | data.num_workers=2 \ 23 | data.scannet_path=${DATAPATH} \ 24 | optimizer.lr=0.1 \ 25 | optimizer.scheduler=PolyLR \ 26 | optimizer.max_iter=10000 \ 27 | misc.log_dir=${LOG_DIR} \ 28 | misc.num_gpus=8 \ 29 | hydra.launcher.partition=priority \ 30 | hydra.launcher.comment=CVPR_Deadline \ 31 | net.weights=$PRETRAIN \ 32 | 33 | -------------------------------------------------------------------------------- /models/wrapper.py: -------------------------------------------------------------------------------- 1 | import random 2 | from torch.nn import Module 3 | 4 | from MinkowskiEngine import SparseTensor 5 | 6 | 7 | class Wrapper(Module): 8 | """ 9 | Wrapper for the segmentation networks. 10 | """ 11 | OUT_PIXEL_DIST = -1 12 | 13 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 14 | super(Wrapper, self).__init__() 15 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 16 | 17 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 18 | raise NotImplementedError('Must initialize a model and a filter') 19 | 20 | def forward(self, x, coords, colors=None): 21 | soutput = self.model(x) 22 | 23 | # During training, make the network invariant to the filter 24 | if not self.training or random.random() < 0.5: 25 | # Filter requires the model to finish the forward pass 26 | wrapper_coords = self.filter.initialize_coords(self.model, coords, colors) 27 | finput = SparseTensor(soutput.F, wrapper_coords) 28 | soutput = self.filter(finput) 29 | 30 | return soutput 31 | -------------------------------------------------------------------------------- /downstream/insseg/insseg_models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import insseg_models.insseg_res16unet as res16unet 7 | 8 | MODELS = [] 9 | 10 | def add_models(module): 11 | MODELS.extend([getattr(module, a) for a in dir(module) if 'Net' in a]) 12 | 13 | add_models(res16unet) 14 | 15 | def get_models(): 16 | '''Returns a tuple of sample models.''' 17 | return MODELS 18 | 19 | def load_model(name): 20 | '''Creates and returns an instance of the model given its class name. 21 | ''' 22 | # Find the model class from its name 23 | all_models = get_models() 24 | mdict = {model.__name__: model for model in all_models} 25 | if name not in mdict: 26 | print('Invalid model index. Options are:') 27 | # Display a list of valid model names 28 | for model in all_models: 29 | print('\t* {}'.format(model.__name__)) 30 | return None 31 | NetClass = mdict[name] 32 | 33 | return NetClass 34 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/train_scannet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | MODEL=Res16UNet34C 8 | 9 | python ddp_main.py \ 10 | train.train_phase=train \ 11 | train.val_phase=val \ 12 | train.is_train=True \ 13 | train.lenient_weight_loading=True \ 14 | train.stat_freq=5 \ 15 | train.val_freq=250 \ 16 | net.model=${MODEL} \ 17 | net.conv1_kernel_size=3 \ 18 | augmentation.normalize_color=True \ 19 | data.dataset=ScannetVoxelization2cmDataset \ 20 | data.batch_size=48 \ 21 | data.num_workers=2 \ 22 | data.scannet_path=${DATAPATH} \ 23 | data.return_transformation=True \ 24 | optimizer.lr=0.1 \ 25 | optimizer.scheduler=PolyLR \ 26 | optimizer.max_iter=20000 \ 27 | misc.log_dir=${LOG_DIR} \ 28 | misc.num_gpus=8 \ 29 | hydra.launcher.partition=dev \ 30 | hydra.launcher.comment=CVPR_rebuttal \ 31 | net.weights=$PRETRAIN \ 32 | 33 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | import os 10 | 11 | _ext_src_root = "_ext_src" 12 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 13 | "{}/src/*.cu".format(_ext_src_root) 14 | ) 15 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 16 | 17 | headers = "-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), '_ext_src', 'include') 18 | 19 | setup( 20 | name='pointnet2', 21 | ext_modules=[ 22 | CUDAExtension( 23 | name='pointnet2._ext', 24 | sources=_ext_sources, 25 | extra_compile_args={ 26 | "cxx": ["-O2", headers], 27 | "nvcc": ["-O2", headers] 28 | }, 29 | ) 30 | ], 31 | cmdclass={ 32 | 'build_ext': BuildExtension 33 | } 34 | ) -------------------------------------------------------------------------------- /downstream/insseg/eval_all.sh: -------------------------------------------------------------------------------- 1 | conda activate 3dsemseg 2 | 3 | # Add project root to pythonpath 4 | insseg_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | downstream_dir="$(dirname "$insseg_dir")" 6 | project_dir="$(dirname "$downstream_dir")" 7 | export PYTHONPATH="${PYTHONPATH}:${project_dir}" 8 | 9 | echo "${project_dir}" 10 | 11 | 12 | outputs_base=/home/drozenberszki/dev/LongTailSemseg/output/instseg 13 | for d in "$outputs_base"/*/ ; do 14 | echo "$d" 15 | 16 | if [[ "$d" == *"34D"* ]] 17 | then 18 | model=Res16UNet34D 19 | else 20 | model=Res16UNet34C 21 | fi 22 | 23 | python ddp_main.py train.is_train=False \ 24 | train.lenient_weight_loading=True \ 25 | net.model="$model" \ 26 | data.dataset=Scannet200Voxelization2cmDataset \ 27 | data.scannet_path=/mnt/data/Datasets/scannet_200_insseg \ 28 | data.return_transformation=True \ 29 | misc.log_dir=./outputs \ 30 | train.resume="$d" \ 31 | test.visualize=True \ 32 | test.visualize_path="$d"visualize 33 | 34 | done -------------------------------------------------------------------------------- /downstream/insseg/scripts/train_stanford3d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | MODEL=Res16UNet34C 8 | 9 | python ddp_main.py \ 10 | train.train_phase=train \ 11 | train.is_train=True \ 12 | train.lenient_weight_loading=True \ 13 | train.stat_freq=5 \ 14 | train.val_freq=200 \ 15 | train.overwrite_weights=False \ 16 | net.model=${MODEL} \ 17 | net.conv1_kernel_size=3 \ 18 | data.dataset=StanfordArea5Dataset \ 19 | data.batch_size=48 \ 20 | data.voxel_size=0.05 \ 21 | data.num_workers=2 \ 22 | data.stanford3d_path=${DATAPATH} \ 23 | augmentation.data_aug_color_trans_ratio=0.05 \ 24 | augmentation.data_aug_color_jitter_std=0.005 \ 25 | optimizer.lr=0.1 \ 26 | optimizer.scheduler=PolyLR \ 27 | optimizer.max_iter=15000 \ 28 | misc.log_dir=${LOG_DIR} \ 29 | misc.num_gpus=8 \ 30 | misc.train_stuff=True \ 31 | hydra.launcher.partition=priority \ 32 | hydra.launcher.comment=CriticalEXP \ 33 | net.weights=$PRETRAIN \ 34 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Testing customized ops. ''' 7 | 8 | import torch 9 | from torch.autograd import gradcheck 10 | import numpy as np 11 | 12 | import os 13 | import sys 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | import pointnet2_utils 17 | 18 | def test_interpolation_grad(): 19 | batch_size = 1 20 | feat_dim = 2 21 | m = 4 22 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | 24 | def interpolate_func(inputs): 25 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 26 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 27 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 28 | return interpolated_feats 29 | 30 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 31 | 32 | if __name__=='__main__': 33 | test_interpolation_grad() 34 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | #from lib.datasets import synthia 8 | from datasets import stanford 9 | from datasets import scannet 10 | #from lib.datasets import shapenet 11 | 12 | DATASETS = [] 13 | 14 | 15 | def add_datasets(module): 16 | DATASETS.extend([getattr(module, a) for a in dir(module) if 'Dataset' in a]) 17 | 18 | 19 | add_datasets(stanford) 20 | #add_datasets(synthia) 21 | add_datasets(scannet) 22 | #add_datasets(shapenet) 23 | 24 | def load_dataset(name): 25 | '''Creates and returns an instance of the datasets given its name. 26 | ''' 27 | # Find the model class from its name 28 | mdict = {dataset.__name__: dataset for dataset in DATASETS} 29 | if name not in mdict: 30 | print('Invalid dataset index. Options are:') 31 | # Display a list of valid dataset names 32 | for dataset in DATASETS: 33 | print('\t* {}'.format(dataset.__name__)) 34 | raise ValueError(f'Dataset {name} not defined') 35 | DatasetClass = mdict[name] 36 | 37 | return DatasetClass 38 | -------------------------------------------------------------------------------- /scripts/train_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONUNBUFFERED="True" 4 | 5 | export DATASET=Scannet200Voxelization2cmDataset 6 | 7 | export MODEL=$1 #Res16UNet34C, Res16UNet34D 8 | export BATCH_SIZE=$2 9 | export SUFFIX=$3 10 | export ARGS=$4 11 | 12 | export WEIGHTS_SUFFIX=$5 13 | 14 | export DATA_ROOT="/mnt/Data/ScanNet/scannet_200" 15 | export PRETRAINED_WEIGHTS="/mnt/Data//weights/"$WEIGHTS_SUFFIX 16 | export OUTPUT_DIR_ROOT="/home/drozenberszki/Downloads/tmp" 17 | 18 | export TIME=$(date +"%Y-%m-%d_%H-%M-%S") 19 | export LOG_DIR=$OUTPUT_DIR_ROOT/$DATASET/$MODEL-$SUFFIX 20 | 21 | # Save the experiment detail and dir to the common log file 22 | mkdir -p $LOG_DIR 23 | 24 | LOG="$LOG_DIR/$TIME.txt" 25 | 26 | python -m main \ 27 | --log_dir $LOG_DIR \ 28 | --dataset $DATASET \ 29 | --model $MODEL \ 30 | --batch_size $BATCH_SIZE \ 31 | --val_batch_size $BATCH_SIZE \ 32 | --scannet_path $DATA_ROOT \ 33 | --stat_freq 100 \ 34 | --visualize False \ 35 | --visualize_path $LOG_DIR/visualize \ 36 | --num_gpu 2 \ 37 | --balanced_category_sampling True \ 38 | $ARGS \ 39 | 2>&1 | tee -a "$LOG" 40 | 41 | # --resume $LOG_DIR \ 42 | # --weights $PRETRAINED_WEIGHTS \ -------------------------------------------------------------------------------- /scripts/text_representation_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit script when a command returns nonzero state 4 | export PYTHONUNBUFFERED="True" 5 | 6 | export BATCH_SIZE=$1 7 | export MODEL=Res16UNet34D 8 | export DATASET=Scannet200Textual2cmDataset 9 | 10 | export POSTFIX=$2 11 | export ARGS=$3 12 | 13 | # export DATA_ROOT="/mnt/Data/ScanNet/scannet_200" 14 | # export LIMITED_DATA_ROOT="/mnt/Data/ScanNet/limited/"$DATASET_FOLDER 15 | # export OUTPUT_DIR_ROOT="/mnt/Data/output" 16 | # export PRETRAINED_WEIGHTS="/mnt/Data/weights/CLIP/Res16UNet34D.ckpt" 17 | 18 | export TIME=$(date +"%Y-%m-%d_%H-%M-%S") 19 | 20 | export LOG_DIR=$OUTPUT_DIR_ROOT/$DATASET/$MODEL-$POSTFIX 21 | 22 | # Save the experiment detail and dir to the common log file 23 | mkdir -p $LOG_DIR 24 | 25 | LOG="$LOG_DIR/$TIME.txt" 26 | 27 | python -m main \ 28 | --log_dir $LOG_DIR \ 29 | --dataset $DATASET \ 30 | --model $MODEL \ 31 | --batch_size $BATCH_SIZE \ 32 | --val_batch_size $BATCH_SIZE \ 33 | --train_limit_numpoints 1400000 \ 34 | --scannet_path $DATA_ROOT \ 35 | --stat_freq 100 \ 36 | --num_gpu 2 \ 37 | --balanced_category_sampling False \ 38 | --use_embedding_loss True \ 39 | $ARGS \ 40 | 2>&1 | tee -a "$LOG" 41 | 42 | # --resume $LOG_DIR \ 43 | # --weights $PRETRAINED_WEIGHTS \ -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | #include 9 | 10 | #define CHECK_CUDA(x) \ 11 | do { \ 12 | TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_CONTIGUOUS(x) \ 16 | do { \ 17 | TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \ 18 | } while (0) 19 | 20 | #define CHECK_IS_INT(x) \ 21 | do { \ 22 | TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \ 23 | #x " must be an int tensor"); \ 24 | } while (0) 25 | 26 | #define CHECK_IS_FLOAT(x) \ 27 | do { \ 28 | TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \ 29 | #x " must be a float tensor"); \ 30 | } while (0) 31 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "utils.h" 8 | 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 10 | int nsample, const float *new_xyz, 11 | const float *xyz, int *idx); 12 | 13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 14 | const int nsample) { 15 | CHECK_CONTIGUOUS(new_xyz); 16 | CHECK_CONTIGUOUS(xyz); 17 | CHECK_IS_FLOAT(new_xyz); 18 | CHECK_IS_FLOAT(xyz); 19 | 20 | if (new_xyz.type().is_cuda()) { 21 | CHECK_CUDA(xyz); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 26 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 27 | 28 | if (new_xyz.type().is_cuda()) { 29 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 30 | radius, nsample, new_xyz.data(), 31 | xyz.data(), idx.data()); 32 | } else { 33 | TORCH_CHECK(false, "CPU not supported"); 34 | } 35 | 36 | return idx; 37 | } 38 | -------------------------------------------------------------------------------- /downstream/insseg/scripts/train_scannet_slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Add project root to pythonpath 4 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 5 | insseg_dir="$(dirname "$SCRIPT_DIR")" 6 | downstream_dir="$(dirname "$insseg_dir")" 7 | project_dir="$(dirname "$downstream_dir")" 8 | export PYTHONPATH="${PYTHONPATH}:${project_dir}" 9 | 10 | export BATCH_SIZE=$1 11 | export MODEL=$2 12 | export POSTFIX=$3 13 | export PRETRAINED_CHECKPOINT=$4 14 | export DATASET=Scannet200Voxelization2cmDataset 15 | 16 | export DATA_ROOT="/mnt/Data/ScanNet/scannet_200_insseg" 17 | export OUTPUT_DIR_ROOT="/mnt/Data/outputs" 18 | export PRETRAINED_WEIGHTS="/mnt/Data/ScanNet/weights/"$PRETRAINED_CHECKPOINT 19 | 20 | export TIME=$(date +"%Y-%m-%d_%H-%M-%S") 21 | export LOG_DIR=$OUTPUT_DIR_ROOT/$DATASET/$MODEL-$POSTFIX 22 | 23 | # Save the experiment detail and dir to the common log file 24 | mkdir -p $LOG_DIR 25 | LOG="$LOG_DIR/$TIME.txt" 26 | 27 | python ddp_main.py \ 28 | train.is_train=True \ 29 | train.lenient_weight_loading=True \ 30 | train.stat_freq=20 \ 31 | net.model=${MODEL} \ 32 | data.dataset=${DATASET} \ 33 | data.batch_size=${BATCH_SIZE} \ 34 | data.num_workers=4 \ 35 | data.scannet_path=${DATA_ROOT} \ 36 | data.return_transformation=True \ 37 | optimizer.lr=0.02 \ 38 | misc.log_dir=${LOG_DIR} \ 39 | misc.num_gpus=8 \ 40 | net.weights=${PRETRAINED_WEIGHTS} \ 41 | 2>&1 | tee -a "$LOG" -------------------------------------------------------------------------------- /scripts/fine_tune_classifier.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONUNBUFFERED="True" 4 | 5 | export BATCH_SIZE=$1 6 | export LOSS_TYPE=$2 7 | export MODEL=$3 8 | export ARGS=$4 9 | export POSTFIX=$5 10 | 11 | export DATASET=Scannet200Textual2cmDataset # Scannet200Voxelization2cmDataset 12 | 13 | # export DATA_ROOT="/mnt/Data/ScanNet/scannet_200" 14 | # export LIMITED_DATA_ROOT="/mnt/Data/ScanNet/limited/"$DATASET_FOLDER 15 | # export OUTPUT_DIR_ROOT="/mnt/Data/output" 16 | # export PRETRAINED_WEIGHTS="/mnt/Data/weights/CLIP/Res16UNet34D.ckpt" 17 | 18 | export TIME=$(date +"%Y-%m-%d_%H-%M-%S") 19 | 20 | export LOG_DIR=$OUTPUT_DIR_ROOT/$DATASET/$MODEL-finetune-$POSTFIX 21 | 22 | # Save the experiment detail and dir to the common log file 23 | mkdir -p $LOG_DIR 24 | 25 | LOG="$LOG_DIR/$TIME.txt" 26 | 27 | python -m lightning_main \ 28 | --log_dir $LOG_DIR \ 29 | --dataset $DATASET \ 30 | --model $MODEL \ 31 | --batch_size $BATCH_SIZE \ 32 | --val_batch_size $BATCH_SIZE \ 33 | --train_phase train \ 34 | --scannet_path $DATA_ROOT \ 35 | --stat_freq 40 \ 36 | --visualize False \ 37 | --visualize_freq 150 \ 38 | --visualize_path $LOG_DIR/visualize \ 39 | --num_gpu 4 \ 40 | --use_embedding_loss both \ 41 | --loss_type $LOSS_TYPE \ 42 | --classifier_only True \ 43 | --resume $LOG_DIR \ 44 | $ARGS \ 45 | 2>&1 | tee -a "$LOG" 46 | 47 | # --resume $LOG_DIR \ 48 | # --classifier_only True \ 49 | # --weights $PRETRAINED_WEIGHTS \ -------------------------------------------------------------------------------- /lib/losses/SoftIoULoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class SoftIoULoss(nn.Module): 7 | def __init__(self, n_classes, ignore_index=255): 8 | super(SoftIoULoss, self).__init__() 9 | self.n_classes = n_classes 10 | self.ignore_index = ignore_index 11 | 12 | def to_one_hot(self, tensor, n_classes): 13 | n, h, w = tensor.size() 14 | tensor[tensor == self.ignore_index] = n_classes 15 | one_hot = torch.zeros(n, n_classes+1, h, w).scatter_(1, tensor.view(n, 1, h, w).cpu(), 1) 16 | return one_hot 17 | 18 | def forward(self, input, target): 19 | # logit => N x Classes x H x W 20 | # target => N x H x W 21 | input = input[target != self.ignore_index] 22 | target = target[target != self.ignore_index] 23 | 24 | N = len(input) 25 | 26 | pred = F.softmax(input, dim=1) 27 | target_onehot = torch.nn.functional.one_hot(target, num_classes=self.n_classes) 28 | # Numerator Product 29 | inter = pred * target_onehot 30 | # Sum over all pixels N x C x H x W => N x C 31 | inter = inter.view(N, self.n_classes, -1).sum(2) 32 | 33 | # Denominator 34 | union = pred + target_onehot - (pred * target_onehot) 35 | # Sum over all pixels N x C x H x W => N x C 36 | union = union.view(N, self.n_classes, -1).sum(2) 37 | 38 | loss = inter / (union + 1e-16) 39 | 40 | # Return average loss over classes and batch 41 | return 1 - loss.mean() 42 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #ifndef _CUDA_UTILS_H 7 | #define _CUDA_UTILS_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define TOTAL_THREADS 512 19 | 20 | inline int opt_n_threads(int work_size) { 21 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 22 | 23 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 24 | } 25 | 26 | inline dim3 opt_block_config(int x, int y) { 27 | const int x_threads = opt_n_threads(x); 28 | const int y_threads = 29 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 30 | dim3 block_config(x_threads, y_threads, 1); 31 | 32 | return block_config; 33 | } 34 | 35 | #define CUDA_CHECK_ERRORS() \ 36 | do { \ 37 | cudaError_t err = cudaGetLastError(); \ 38 | if (cudaSuccess != err) { \ 39 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 40 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 41 | __FILE__); \ 42 | exit(-1); \ 43 | } \ 44 | } while (0) 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /lib/losses/RecallCrossEntropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | class RecallCrossEntropy(torch.nn.Module): 5 | def __init__(self, n_classes=19, ignore_index=255, reduction='mean'): 6 | super(RecallCrossEntropy, self).__init__() 7 | self.n_classes = n_classes 8 | self.ignore_index = ignore_index 9 | self.reduction = reduction 10 | 11 | def forward(self, input, target): 12 | # input (batch,n_classes,H,W) 13 | # target (batch,H,W) 14 | pred = input.argmax(1) 15 | idex = (pred != target).view(-1) 16 | 17 | # calculate ground truth counts 18 | gt_counter = torch.ones((self.n_classes,)).to(target.device) 19 | gt_idx, gt_count = torch.unique(target, return_counts=True) 20 | 21 | # map ignored label to an exisiting one 22 | gt_count[gt_idx == self.ignore_index] = gt_count[1].clone() 23 | gt_idx[gt_idx == self.ignore_index] = 1 24 | gt_counter[gt_idx] = gt_count.float() 25 | 26 | # calculate false negative counts 27 | fn_counter = torch.ones((self.n_classes)).to(target.device) 28 | fn = target.view(-1)[idex] 29 | fn_idx, fn_count = torch.unique(fn, return_counts=True) 30 | 31 | # map ignored label to an exisiting one 32 | fn_count[fn_idx == self.ignore_index] = fn_count[1].clone() 33 | fn_idx[fn_idx == self.ignore_index] = 1 34 | fn_counter[fn_idx] = fn_count.float() 35 | 36 | weight = fn_counter / gt_counter 37 | 38 | CE = F.cross_entropy(input, target, reduction='none', ignore_index=self.ignore_index) 39 | loss = weight[target] * CE 40 | 41 | if self.reduction == 'mean': 42 | loss = loss.mean() 43 | elif self.reduction == 'sum': 44 | loss = loss.sum() 45 | 46 | return loss 47 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import models.resunet as resunet 2 | import models.res16unet as res16unet 3 | import models.clip_models as clip_models 4 | import models.classifier_models as classifier_models 5 | 6 | # from models.trilateral_crf import TrilateralCRF 7 | from models.conditional_random_fields import BilateralCRF, TrilateralCRF 8 | 9 | MODELS = [] 10 | 11 | 12 | def add_models(module): 13 | MODELS.extend([getattr(module, a) for a in dir(module) if 'Net' in a]) 14 | 15 | 16 | add_models(resunet) 17 | add_models(res16unet) 18 | add_models(clip_models) 19 | add_models(classifier_models) 20 | 21 | WRAPPERS = [BilateralCRF, TrilateralCRF] 22 | 23 | 24 | def get_models(): 25 | '''Returns a tuple of sample models.''' 26 | return MODELS 27 | 28 | 29 | def get_wrappers(): 30 | return WRAPPERS 31 | 32 | 33 | def load_model(name): 34 | '''Creates and returns an instance of the model given its class name. 35 | ''' 36 | # Find the model class from its name 37 | all_models = get_models() 38 | mdict = {model.__name__: model for model in all_models} 39 | if name not in mdict: 40 | print('Invalid model index. Options are:') 41 | # Display a list of valid model names 42 | for model in all_models: 43 | print('\t* {}'.format(model.__name__)) 44 | return None 45 | NetClass = mdict[name] 46 | 47 | return NetClass 48 | 49 | 50 | def load_wrapper(name): 51 | '''Creates and returns an instance of the model given its class name. 52 | ''' 53 | # Find the model class from its name 54 | all_wrappers = get_wrappers() 55 | mdict = {wrapper.__name__: wrapper for wrapper in all_wrappers} 56 | if name not in mdict: 57 | print('Invalid wrapper index. Options are:') 58 | # Display a list of valid model names 59 | for wrapper in all_wrappers: 60 | print('\t* {}'.format(wrapper.__name__)) 61 | return None 62 | WrapperClass = mdict[name] 63 | 64 | return WrapperClass 65 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 13 | // output: idx(b, m, nsample) 14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 15 | int nsample, 16 | const float *__restrict__ new_xyz, 17 | const float *__restrict__ xyz, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | xyz += batch_index * n * 3; 21 | new_xyz += batch_index * m * 3; 22 | idx += m * nsample * batch_index; 23 | 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | 27 | float radius2 = radius * radius; 28 | for (int j = index; j < m; j += stride) { 29 | float new_x = new_xyz[j * 3 + 0]; 30 | float new_y = new_xyz[j * 3 + 1]; 31 | float new_z = new_xyz[j * 3 + 2]; 32 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 37 | (new_z - z) * (new_z - z); 38 | if (d2 < radius2) { 39 | if (cnt == 0) { 40 | for (int l = 0; l < nsample; ++l) { 41 | idx[j * nsample + l] = k; 42 | } 43 | } 44 | idx[j * nsample + cnt] = k; 45 | ++cnt; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 52 | int nsample, const float *new_xyz, 53 | const float *xyz, int *idx) { 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | query_ball_point_kernel<<>>( 56 | b, n, m, radius, nsample, new_xyz, xyz, idx); 57 | 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /lib/math_functions.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse import csr_matrix 2 | import torch 3 | 4 | 5 | class SparseMM(torch.autograd.Function): 6 | """ 7 | Sparse x dense matrix multiplication with autograd support. 8 | Implementation by Soumith Chintala: 9 | https://discuss.pytorch.org/t/ 10 | does-pytorch-support-autograd-on-sparse-matrix/6156/7 11 | """ 12 | 13 | def forward(self, matrix1, matrix2): 14 | self.save_for_backward(matrix1, matrix2) 15 | return torch.mm(matrix1, matrix2) 16 | 17 | def backward(self, grad_output): 18 | matrix1, matrix2 = self.saved_tensors 19 | grad_matrix1 = grad_matrix2 = None 20 | 21 | if self.needs_input_grad[0]: 22 | grad_matrix1 = torch.mm(grad_output, matrix2.t()) 23 | 24 | if self.needs_input_grad[1]: 25 | grad_matrix2 = torch.mm(matrix1.t(), grad_output) 26 | 27 | return grad_matrix1, grad_matrix2 28 | 29 | 30 | def sparse_float_tensor(values, indices, size=None): 31 | """ 32 | Return a torch sparse matrix give values and indices (row_ind, col_ind). 33 | If the size is an integer, return a square matrix with side size. 34 | If the size is a torch.Size, use it to initialize the out tensor. 35 | If none, the size is inferred. 36 | """ 37 | indices = torch.stack(indices).int() 38 | sargs = [indices, values.float()] 39 | if size is not None: 40 | # Use the provided size 41 | if isinstance(size, int): 42 | size = torch.Size((size, size)) 43 | sargs.append(size) 44 | if values.is_cuda: 45 | return torch.cuda.sparse.FloatTensor(*sargs) 46 | else: 47 | return torch.sparse.FloatTensor(*sargs) 48 | 49 | 50 | def diags(values, size=None): 51 | values = values.view(-1) 52 | n = values.nelement() 53 | size = torch.Size((n, n)) 54 | indices = (torch.arange(0, n), torch.arange(0, n)) 55 | return sparse_float_tensor(values, indices, size) 56 | 57 | 58 | def sparse_to_csr_matrix(tensor): 59 | tensor = tensor.cpu() 60 | inds = tensor._indices().numpy() 61 | vals = tensor._values().numpy() 62 | return csr_matrix((vals, (inds[0], inds[1])), shape=[s for s in tensor.shape]) 63 | 64 | 65 | def csr_matrix_to_sparse(mat): 66 | row_ind, col_ind = mat.nonzero() 67 | return sparse_float_tensor( 68 | torch.from_numpy(mat.data), 69 | (torch.from_numpy(row_ind), torch.from_numpy(col_ind)), 70 | size=torch.Size(mat.shape)) 71 | -------------------------------------------------------------------------------- /downstream/insseg/lib/io3d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import numpy as np 8 | import trimesh 9 | 10 | # color palette for nyu40 labels 11 | def create_color_palette(): 12 | return [ 13 | (0, 0, 0), 14 | (174, 199, 232), # wall 15 | (152, 223, 138), # floor 16 | (31, 119, 180), # cabinet 17 | (255, 187, 120), # bed 18 | (188, 189, 34), # chair 19 | (140, 86, 75), # sofa 20 | (255, 152, 150), # table 21 | (214, 39, 40), # door 22 | (197, 176, 213), # window 23 | (148, 103, 189), # bookshelf 24 | (196, 156, 148), # picture 25 | (23, 190, 207), # counter 26 | (178, 76, 76), 27 | (247, 182, 210), # desk 28 | (66, 188, 102), 29 | (219, 219, 141), # curtain 30 | (140, 57, 197), 31 | (202, 185, 52), 32 | (51, 176, 203), 33 | (200, 54, 131), 34 | (92, 193, 61), 35 | (78, 71, 183), 36 | (172, 114, 82), 37 | (255, 127, 14), # refrigerator 38 | (91, 163, 138), 39 | (153, 98, 156), 40 | (140, 153, 101), 41 | (158, 218, 229), # shower curtain 42 | (100, 125, 154), 43 | (178, 127, 135), 44 | (120, 185, 128), 45 | (146, 111, 194), 46 | (44, 160, 44), # toilet 47 | (112, 128, 144), # sink 48 | (96, 207, 209), 49 | (227, 119, 194), # bathtub 50 | (213, 92, 176), 51 | (94, 106, 211), 52 | (82, 84, 163), # otherfurn 53 | (100, 85, 144), 54 | ] 55 | 56 | def write_triangle_mesh(vertices, colors, faces, outputFile): 57 | mesh = trimesh.Trimesh(vertices=vertices, vertex_colors=colors, faces=faces, process=False) 58 | mesh.export(outputFile) 59 | 60 | def read_triangle_mesh(filename): 61 | mesh = trimesh.load_mesh(filename, process=False) 62 | if isinstance(mesh, trimesh.PointCloud): 63 | vertices = mesh.vertices 64 | colors = mesh.colors 65 | faces = None 66 | elif isinstance(mesh, trimesh.Trimesh): 67 | vertices = mesh.vertices 68 | colors = mesh.visual.vertex_colors 69 | faces = mesh.faces 70 | 71 | return vertices, colors, faces -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "group_points.h" 7 | #include "utils.h" 8 | 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 10 | const float *points, const int *idx, 11 | float *out); 12 | 13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 14 | int nsample, const float *grad_out, 15 | const int *idx, float *grad_points); 16 | 17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.type().is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.type().is_cuda()) { 32 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), idx.size(2), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | TORCH_CHECK(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 43 | CHECK_CONTIGUOUS(grad_out); 44 | CHECK_CONTIGUOUS(idx); 45 | CHECK_IS_FLOAT(grad_out); 46 | CHECK_IS_INT(idx); 47 | 48 | if (grad_out.type().is_cuda()) { 49 | CHECK_CUDA(idx); 50 | } 51 | 52 | at::Tensor output = 53 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 54 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 55 | 56 | if (grad_out.type().is_cuda()) { 57 | group_points_grad_kernel_wrapper( 58 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 59 | grad_out.data(), idx.data(), output.data()); 60 | } else { 61 | TORCH_CHECK(false, "CPU not supported"); 62 | } 63 | 64 | return output; 65 | } 66 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/dataloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import math 7 | import torch 8 | import torch.distributed as dist 9 | from torch.utils.data.sampler import Sampler 10 | 11 | 12 | class InfSampler(Sampler): 13 | """Samples elements randomly, without replacement. 14 | 15 | Arguments: 16 | data_source (Dataset): dataset to sample from 17 | """ 18 | 19 | def __init__(self, data_source, shuffle=False): 20 | self.data_source = data_source 21 | self.shuffle = shuffle 22 | self.reset_permutation() 23 | 24 | def reset_permutation(self): 25 | perm = len(self.data_source) 26 | if self.shuffle: 27 | perm = torch.randperm(perm) 28 | self._perm = perm.tolist() 29 | 30 | def __iter__(self): 31 | return self 32 | 33 | def __next__(self): 34 | if len(self._perm) == 0: 35 | self.reset_permutation() 36 | 37 | return self._perm.pop() 38 | 39 | def __len__(self): 40 | return len(self.data_source) 41 | 42 | next = __next__ # Python 2 compatibility 43 | 44 | 45 | class DistributedInfSampler(InfSampler): 46 | def __init__(self, data_source, num_replicas=None, rank=None, shuffle=True): 47 | if num_replicas is None: 48 | if not dist.is_available(): 49 | raise RuntimeError("Requires distributed package to be available") 50 | num_replicas = dist.get_world_size() 51 | if rank is None: 52 | if not dist.is_available(): 53 | raise RuntimeError("Requires distributed package to be available") 54 | rank = dist.get_rank() 55 | 56 | self.data_source = data_source 57 | self.num_replicas = num_replicas 58 | self.rank = rank 59 | self.epoch = 0 60 | self.it = 0 61 | self.num_samples = int(math.ceil(len(self.data_source) * 1.0 / self.num_replicas)) 62 | self.total_size = self.num_samples * self.num_replicas 63 | self.shuffle = shuffle 64 | self.reset_permutation() 65 | 66 | def __next__(self): 67 | it = self.it * self.num_replicas + self.rank 68 | value = self._perm[it % len(self._perm)] 69 | self.it = self.it + 1 70 | 71 | if (self.it * self.num_replicas) >= len(self._perm): 72 | self.reset_permutation() 73 | self.it = 0 74 | return value 75 | 76 | def __len__(self): 77 | return self.num_samples -------------------------------------------------------------------------------- /downstream/insseg/lib/math_functions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | from scipy.sparse import csr_matrix 8 | import torch 9 | 10 | 11 | class SparseMM(torch.autograd.Function): 12 | """ 13 | Sparse x dense matrix multiplication with autograd support. 14 | Implementation by Soumith Chintala: 15 | https://discuss.pytorch.org/t/ 16 | does-pytorch-support-autograd-on-sparse-matrix/6156/7 17 | """ 18 | 19 | def forward(self, matrix1, matrix2): 20 | self.save_for_backward(matrix1, matrix2) 21 | return torch.mm(matrix1, matrix2) 22 | 23 | def backward(self, grad_output): 24 | matrix1, matrix2 = self.saved_tensors 25 | grad_matrix1 = grad_matrix2 = None 26 | 27 | if self.needs_input_grad[0]: 28 | grad_matrix1 = torch.mm(grad_output, matrix2.t()) 29 | 30 | if self.needs_input_grad[1]: 31 | grad_matrix2 = torch.mm(matrix1.t(), grad_output) 32 | 33 | return grad_matrix1, grad_matrix2 34 | 35 | 36 | def sparse_float_tensor(values, indices, size=None): 37 | """ 38 | Return a torch sparse matrix give values and indices (row_ind, col_ind). 39 | If the size is an integer, return a square matrix with side size. 40 | If the size is a torch.Size, use it to initialize the out tensor. 41 | If none, the size is inferred. 42 | """ 43 | indices = torch.stack(indices).int() 44 | sargs = [indices, values.float()] 45 | if size is not None: 46 | # Use the provided size 47 | if isinstance(size, int): 48 | size = torch.Size((size, size)) 49 | sargs.append(size) 50 | if values.is_cuda: 51 | return torch.cuda.sparse.FloatTensor(*sargs) 52 | else: 53 | return torch.sparse.FloatTensor(*sargs) 54 | 55 | 56 | def diags(values, size=None): 57 | values = values.view(-1) 58 | n = values.nelement() 59 | size = torch.Size((n, n)) 60 | indices = (torch.arange(0, n), torch.arange(0, n)) 61 | return sparse_float_tensor(values, indices, size) 62 | 63 | 64 | def sparse_to_csr_matrix(tensor): 65 | tensor = tensor.cpu() 66 | inds = tensor._indices().numpy() 67 | vals = tensor._values().numpy() 68 | return csr_matrix((vals, (inds[0], inds[1])), shape=[s for s in tensor.shape]) 69 | 70 | 71 | def csr_matrix_to_sparse(mat): 72 | row_ind, col_ind = mat.nonzero() 73 | return sparse_float_tensor( 74 | torch.from_numpy(mat.data), 75 | (torch.from_numpy(row_ind), torch.from_numpy(col_ind)), 76 | size=torch.Size(mat.shape)) 77 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/util.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import csv 3 | try: 4 | import numpy as np 5 | except: 6 | print("Failed to import numpy package.") 7 | sys.exit(-1) 8 | try: 9 | import imageio 10 | except: 11 | print("Please install the module 'imageio' for image processing, e.g.") 12 | print("pip install imageio") 13 | sys.exit(-1) 14 | 15 | # print an error message and quit 16 | def print_error(message, user_fault=False): 17 | sys.stderr.write('ERROR: ' + str(message) + '\n') 18 | if user_fault: 19 | sys.exit(2) 20 | sys.exit(-1) 21 | 22 | 23 | # if string s represents an int 24 | def represents_int(s): 25 | try: 26 | int(s) 27 | return True 28 | except ValueError: 29 | return False 30 | 31 | 32 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 33 | assert os.path.isfile(filename) 34 | mapping = dict() 35 | with open(filename) as csvfile: 36 | reader = csv.DictReader(csvfile, delimiter='\t') 37 | for row in reader: 38 | mapping[row[label_from]] = int(row[label_to]) 39 | # if ints convert 40 | if represents_int([key for key in mapping.keys()][0]): 41 | mapping = {int(k):v for k,v in mapping.items()} 42 | return mapping 43 | 44 | 45 | # input: scene_types.txt or scene_types_all.txt 46 | def read_scene_types_mapping(filename, remove_spaces=True): 47 | assert os.path.isfile(filename) 48 | mapping = dict() 49 | lines = open(filename).read().splitlines() 50 | lines = [line.split('\t') for line in lines] 51 | if remove_spaces: 52 | mapping = { x[1].strip():int(x[0]) for x in lines } 53 | else: 54 | mapping = { x[1]:int(x[0]) for x in lines } 55 | return mapping 56 | 57 | 58 | # color by label 59 | def visualize_label_image(filename, image): 60 | height = image.shape[0] 61 | width = image.shape[1] 62 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 63 | color_palette = create_color_palette() 64 | for idx, color in enumerate(color_palette): 65 | vis_image[image==idx] = color 66 | imageio.imwrite(filename, vis_image) 67 | 68 | 69 | # color by different instances (mod length of color palette) 70 | def visualize_instance_image(filename, image): 71 | height = image.shape[0] 72 | width = image.shape[1] 73 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 74 | color_palette = create_color_palette() 75 | instances = np.unique(image) 76 | for idx, inst in enumerate(instances): 77 | vis_image[image==inst] = color_palette[inst%len(color_palette)] 78 | imageio.imwrite(filename, vis_image) 79 | 80 | -------------------------------------------------------------------------------- /lib/datasets/prior_info.py: -------------------------------------------------------------------------------- 1 | from lib.datasets.scannet import * 2 | 3 | class Scannet200Textual2cmDataset(Scannet200Voxelization2cmDataset): 4 | 5 | def __init__(self, 6 | config, 7 | prevoxel_transform=None, 8 | input_transform=None, 9 | target_transform=None, 10 | augment_data=True, 11 | elastic_distortion=False, 12 | cache=False, 13 | phase=DatasetPhase.Train): 14 | 15 | super().__init__( 16 | prevoxel_transform=prevoxel_transform, 17 | input_transform=input_transform, 18 | target_transform=target_transform, 19 | augment_data=augment_data, 20 | elastic_distortion=elastic_distortion, 21 | config=config, 22 | cache=cache, 23 | phase=phase) 24 | 25 | language_features_path = config.scannet_path + '/' + config.language_features_path 26 | if os.path.isfile(language_features_path): 27 | with open(language_features_path, 'rb') as f: 28 | self.loaded_text_features = pickle.load(f) 29 | 30 | logging.info(f"Loaded file {config.language_features_path}") 31 | else: 32 | logging.info(f"Cant find file {config.language_features_path}") 33 | 34 | 35 | class ScannetTextual2cmDataset(ScannetVoxelization2cmDataset): 36 | 37 | def __init__(self, 38 | config, 39 | prevoxel_transform=None, 40 | input_transform=None, 41 | target_transform=None, 42 | augment_data=True, 43 | elastic_distortion=False, 44 | cache=False, 45 | phase=DatasetPhase.Train): 46 | 47 | super().__init__( 48 | prevoxel_transform=prevoxel_transform, 49 | input_transform=input_transform, 50 | target_transform=target_transform, 51 | augment_data=augment_data, 52 | elastic_distortion=elastic_distortion, 53 | config=config, 54 | cache=cache, 55 | phase=phase) 56 | 57 | language_features_path = config.scannet_path + '/' + config.language_features_path 58 | if os.path.isfile(language_features_path): 59 | with open(language_features_path, 'rb') as f: 60 | self.loaded_text_features = pickle.load(f) 61 | 62 | logging.info(f"Loaded file {config.language_features_path}") 63 | else: 64 | logging.info(f"Can't find file {config.language_features_path}") 65 | 66 | 67 | class Scannet200TextualDataset(Scannet200Textual2cmDataset): 68 | VOXEL_SIZE = 0.05 69 | 70 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/scripts/util.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import csv 3 | try: 4 | import numpy as np 5 | except: 6 | print("Failed to import numpy package.") 7 | sys.exit(-1) 8 | try: 9 | import imageio 10 | except: 11 | print("Please install the module 'imageio' for image processing, e.g.") 12 | print("pip install imageio") 13 | sys.exit(-1) 14 | 15 | # print an error message and quit 16 | def print_error(message, user_fault=False): 17 | sys.stderr.write('ERROR: ' + str(message) + '\n') 18 | if user_fault: 19 | sys.exit(2) 20 | sys.exit(-1) 21 | 22 | 23 | # if string s represents an int 24 | def represents_int(s): 25 | try: 26 | int(s) 27 | return True 28 | except ValueError: 29 | return False 30 | 31 | 32 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 33 | assert os.path.isfile(filename) 34 | mapping = dict() 35 | with open(filename) as csvfile: 36 | reader = csv.DictReader(csvfile, delimiter='\t') 37 | for row in reader: 38 | mapping[row[label_from]] = int(row[label_to]) 39 | # if ints convert 40 | if represents_int([key for key in mapping.keys()][0]): 41 | mapping = {int(k):v for k,v in mapping.items()} 42 | return mapping 43 | 44 | 45 | # input: scene_types.txt or scene_types_all.txt 46 | def read_scene_types_mapping(filename, remove_spaces=True): 47 | assert os.path.isfile(filename) 48 | mapping = dict() 49 | lines = open(filename).read().splitlines() 50 | lines = [line.split('\t') for line in lines] 51 | if remove_spaces: 52 | mapping = { x[1].strip():int(x[0]) for x in lines } 53 | else: 54 | mapping = { x[1]:int(x[0]) for x in lines } 55 | return mapping 56 | 57 | 58 | # color by label 59 | def visualize_label_image(filename, image): 60 | height = image.shape[0] 61 | width = image.shape[1] 62 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 63 | color_palette = create_color_palette() 64 | for idx, color in enumerate(color_palette): 65 | vis_image[image==idx] = color 66 | imageio.imwrite(filename, vis_image) 67 | 68 | 69 | # color by different instances (mod length of color palette) 70 | def visualize_instance_image(filename, image): 71 | height = image.shape[0] 72 | width = image.shape[1] 73 | vis_image = np.zeros([height, width, 3], dtype=np.uint8) 74 | color_palette = create_color_palette() 75 | instances = np.unique(image) 76 | for idx, inst in enumerate(instances): 77 | vis_image[image==inst] = color_palette[inst%len(color_palette)] 78 | imageio.imwrite(filename, vis_image) 79 | 80 | -------------------------------------------------------------------------------- /lib/losses/SupervisedSimiam.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: David Rozenberszki (david.rozenberszki@tum.de) 3 | Date: Jan 29, 2022 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import multiprocessing 10 | 11 | from lib.losses.utils import sample_categories_for_balancing 12 | 13 | 14 | class SupervisedSimSiam(nn.Module): 15 | 16 | def __init__(self, config, anchor_features, dataset): 17 | super(SupervisedSimSiam, self).__init__() 18 | 19 | # general global vars 20 | self.ignore_label = config.ignore_label 21 | self.config = config 22 | self.eps = 10e-5 23 | 24 | # For multiprocessing 25 | self.num_cores = multiprocessing.cpu_count() 26 | 27 | # Probably CLIP features to drive the representations 28 | self.anchor_features = anchor_features 29 | 30 | # Save dataset where it runs 31 | self.dataset = dataset 32 | 33 | def cosine_loss(self, A, B): 34 | An = F.normalize(A, p=2, dim=1) 35 | Bn = F.normalize(B, p=2, dim=1) 36 | return 1 - (An * Bn).sum(1) 37 | 38 | 39 | def forward(self, p1, p2, z1, z2, corrs1, corrs2, labels1, labels2): 40 | 41 | # Push to correct device if not already there 42 | device = p1.device 43 | if self.anchor_features.device != device: 44 | self.anchor_features = self.anchor_features.to(device) 45 | 46 | valid1 = labels1 != self.ignore_label 47 | valid2 = labels2 != self.ignore_label 48 | 49 | simsiam_loss1 = self.cosine_loss(p1[valid1], z2[corrs1][valid1]) 50 | simsiam_loss2 = self.cosine_loss(p2[valid2], z1[corrs2][valid2]) 51 | 52 | target_features1 = self.anchor_features[labels1[valid1]] 53 | target_features2 = self.anchor_features[labels2[valid2]] 54 | 55 | anchor_loss1 = self.cosine_loss(p1[valid1], target_features1) 56 | anchor_loss2 = self.cosine_loss(p2[valid2], target_features2) 57 | 58 | loss1 = (anchor_loss1) / 4. 59 | loss2 = (anchor_loss2) / 4. 60 | 61 | loss1, split_losses1, split_items1 = sample_categories_for_balancing(loss1, self.config, self.dataset, targets=labels1[valid1]) 62 | loss2, split_losses2, split_items2 = sample_categories_for_balancing(loss2, self.config, self.dataset, targets=labels2[valid2]) 63 | 64 | return loss1 + loss2, split_losses1, split_losses2, split_items1, split_items2 65 | 66 | 67 | class PointSimSiamLoss(nn.Module): 68 | 69 | def __init__(self, config): 70 | super(PointSimSiamLoss, self).__init__() 71 | 72 | self.config = config 73 | self.eps = 10e-5 74 | 75 | # For multiprocessing 76 | self.num_cores = multiprocessing.cpu_count() 77 | 78 | def cosine_loss(self, A, B): 79 | An = F.normalize(A, p=2, dim=1) 80 | Bn = F.normalize(B, p=2, dim=1) 81 | return 1 - (An * Bn).sum(1) 82 | 83 | 84 | def forward(self, z1, z2, corrs1, corrs2): 85 | 86 | simsiam_loss = self.cosine_loss(z1, z2[corrs1]) 87 | 88 | return simsiam_loss.mean() 89 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/src/bfs_cluster_kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | #include 7 | #include 8 | #include 9 | 10 | #define TOTAL_THREADS 1024 11 | #define THREADS_PER_BLOCK 512 12 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 13 | 14 | 15 | /* ================================== ballquery_batch_p ================================== */ 16 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) { 17 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 18 | if (pt_idx >= n) return; 19 | 20 | start_len += (pt_idx * 2); 21 | int idx_temp[1000]; 22 | 23 | float radius2 = radius * radius; 24 | float o_x = xyz[pt_idx * 3 + 0]; 25 | float o_y = xyz[pt_idx * 3 + 1]; 26 | float o_z = xyz[pt_idx * 3 + 2]; 27 | 28 | int batch_idx = batch_idxs[pt_idx]; 29 | int start = batch_offsets[batch_idx]; 30 | int end = batch_offsets[batch_idx + 1]; 31 | 32 | int cnt = 0; 33 | for(int k = start; k < end; k++){ 34 | float x = xyz[k * 3 + 0]; 35 | float y = xyz[k * 3 + 1]; 36 | float z = xyz[k * 3 + 2]; 37 | float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); 38 | if(d2 < radius2){ 39 | if(cnt < 1000){ 40 | idx_temp[cnt] = k; 41 | } 42 | else{ 43 | break; 44 | } 45 | ++cnt; 46 | } 47 | } 48 | 49 | start_len[0] = atomicAdd(cumsum, cnt); 50 | start_len[1] = cnt; 51 | 52 | int thre = n * meanActive; 53 | if(start_len[0] >= thre) return; 54 | 55 | idx += start_len[0]; 56 | if(start_len[0] + cnt >= thre) cnt = thre - start_len[0]; 57 | 58 | for(int k = 0; k < cnt; k++){ 59 | idx[k] = idx_temp[k]; 60 | } 61 | } 62 | 63 | 64 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) { 65 | // param xyz: (n, 3) 66 | // param batch_idxs: (n) 67 | // param batch_offsets: (B + 1) 68 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 69 | // output start_len: (n, 2), int 70 | 71 | cudaError_t err; 72 | 73 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); 74 | dim3 threads(THREADS_PER_BLOCK); 75 | 76 | int cumsum = 0; 77 | int* p_cumsum; 78 | cudaMalloc((void**)&p_cumsum, sizeof(int)); 79 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); 80 | 81 | ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum); 82 | 83 | err = cudaGetLastError(); 84 | if (cudaSuccess != err) { 85 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 86 | exit(-1); 87 | } 88 | 89 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); 90 | return cumsum; 91 | } 92 | -------------------------------------------------------------------------------- /models/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from MinkowskiEngine import MinkowskiGlobalPooling, MinkowskiBroadcastAddition, MinkowskiBroadcastMultiplication 5 | 6 | 7 | class MinkowskiLayerNorm(nn.Module): 8 | 9 | def __init__(self, num_features, eps=1e-5, D=-1): 10 | super(MinkowskiLayerNorm, self).__init__() 11 | self.num_features = num_features 12 | self.eps = eps 13 | self.weight = nn.Parameter(torch.ones(1, num_features)) 14 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 15 | 16 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 17 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 18 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 19 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 20 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 21 | self.D = D 22 | self.reset_parameters() 23 | 24 | def __repr__(self): 25 | s = f'(D={self.D})' 26 | return self.__class__.__name__ + s 27 | 28 | def reset_parameters(self): 29 | self.weight.data.fill_(1) 30 | self.bias.data.zero_() 31 | 32 | def _check_input_dim(self, input): 33 | if input.F.dim() != 2: 34 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 35 | 36 | def forward(self, x): 37 | self._check_input_dim(x) 38 | mean = self.mean_in(x).F.mean(-1, keepdim=True) 39 | mean = mean + torch.zeros(mean.size(0), self.num_features).type_as(mean) 40 | temp = self.glob_sum(x.F, -mean)**2 41 | var = self.glob_mean(temp.data).mean(-1, keepdim=True) 42 | var = var + torch.zeros(var.size(0), self.num_features).type_as(var) 43 | instd = 1 / (var + self.eps).sqrt() 44 | 45 | x = self.glob_times(self.glob_sum2(x, -mean), instd) 46 | return x * self.weight + self.bias 47 | 48 | 49 | class MinkowskiInstanceNorm(nn.Module): 50 | 51 | def __init__(self, num_features, eps=1e-5, D=-1): 52 | super(MinkowskiInstanceNorm, self).__init__() 53 | self.eps = eps 54 | self.weight = nn.Parameter(torch.ones(1, num_features)) 55 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 56 | 57 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 58 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 59 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 60 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 61 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 62 | self.D = D 63 | self.reset_parameters() 64 | 65 | def __repr__(self): 66 | s = f'(pixel_dist={self.pixel_dist}, D={self.D})' 67 | return self.__class__.__name__ + s 68 | 69 | def reset_parameters(self): 70 | self.weight.data.fill_(1) 71 | self.bias.data.zero_() 72 | 73 | def _check_input_dim(self, input): 74 | if input.dim() != 2: 75 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 76 | 77 | def forward(self, x): 78 | self._check_input_dim(x) 79 | mean_in = self.mean_in(x) 80 | temp = self.glob_sum(x, -mean_in)**2 81 | var_in = self.glob_mean(temp.data) 82 | instd_in = 1 / (var_in + self.eps).sqrt() 83 | 84 | x = self.glob_times(self.glob_sum2(x, -mean_in), instd_in) 85 | return x * self.weight + self.bias 86 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, npoints, nsample) 12 | // output: out(b, c, npoints, nsample) 13 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 14 | int nsample, 15 | const float *__restrict__ points, 16 | const int *__restrict__ idx, 17 | float *__restrict__ out) { 18 | int batch_index = blockIdx.x; 19 | points += batch_index * n * c; 20 | idx += batch_index * npoints * nsample; 21 | out += batch_index * npoints * nsample * c; 22 | 23 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 24 | const int stride = blockDim.y * blockDim.x; 25 | for (int i = index; i < c * npoints; i += stride) { 26 | const int l = i / npoints; 27 | const int j = i % npoints; 28 | for (int k = 0; k < nsample; ++k) { 29 | int ii = idx[j * nsample + k]; 30 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 31 | } 32 | } 33 | } 34 | 35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 36 | const float *points, const int *idx, 37 | float *out) { 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | group_points_kernel<<>>( 41 | b, c, n, npoints, nsample, points, idx, out); 42 | 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points) { 74 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 75 | 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | CUDA_CHECK_ERRORS(); 80 | } 81 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "sampling.h" 7 | #include "utils.h" 8 | 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *points, const int *idx, 11 | float *out); 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs); 19 | 20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 21 | CHECK_CONTIGUOUS(points); 22 | CHECK_CONTIGUOUS(idx); 23 | CHECK_IS_FLOAT(points); 24 | CHECK_IS_INT(idx); 25 | 26 | if (points.type().is_cuda()) { 27 | CHECK_CUDA(idx); 28 | } 29 | 30 | at::Tensor output = 31 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 32 | at::device(points.device()).dtype(at::ScalarType::Float)); 33 | 34 | if (points.type().is_cuda()) { 35 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 36 | idx.size(1), points.data(), 37 | idx.data(), output.data()); 38 | } else { 39 | TORCH_CHECK(false, "CPU not supported"); 40 | } 41 | 42 | return output; 43 | } 44 | 45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 46 | const int n) { 47 | CHECK_CONTIGUOUS(grad_out); 48 | CHECK_CONTIGUOUS(idx); 49 | CHECK_IS_FLOAT(grad_out); 50 | CHECK_IS_INT(idx); 51 | 52 | if (grad_out.type().is_cuda()) { 53 | CHECK_CUDA(idx); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 58 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (grad_out.type().is_cuda()) { 61 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 62 | idx.size(1), grad_out.data(), 63 | idx.data(), output.data()); 64 | } else { 65 | TORCH_CHECK(false, "CPU not supported"); 66 | } 67 | 68 | return output; 69 | } 70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 71 | CHECK_CONTIGUOUS(points); 72 | CHECK_IS_FLOAT(points); 73 | 74 | at::Tensor output = 75 | torch::zeros({points.size(0), nsamples}, 76 | at::device(points.device()).dtype(at::ScalarType::Int)); 77 | 78 | at::Tensor tmp = 79 | torch::full({points.size(0), points.size(1)}, 1e10, 80 | at::device(points.device()).dtype(at::ScalarType::Float)); 81 | 82 | if (points.type().is_cuda()) { 83 | furthest_point_sampling_kernel_wrapper( 84 | points.size(0), points.size(1), nsamples, points.data(), 85 | tmp.data(), output.data()); 86 | } else { 87 | TORCH_CHECK(false, "CPU not supported"); 88 | } 89 | 90 | return output; 91 | } 92 | -------------------------------------------------------------------------------- /downstream/insseg/lib/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from MinkowskiEngine import MinkowskiGlobalPooling, MinkowskiBroadcastAddition, MinkowskiBroadcastMultiplication 11 | 12 | 13 | class MinkowskiLayerNorm(nn.Module): 14 | 15 | def __init__(self, num_features, eps=1e-5, D=-1): 16 | super(MinkowskiLayerNorm, self).__init__() 17 | self.num_features = num_features 18 | self.eps = eps 19 | self.weight = nn.Parameter(torch.ones(1, num_features)) 20 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 21 | 22 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 23 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 24 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 25 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 26 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 27 | self.D = D 28 | self.reset_parameters() 29 | 30 | def __repr__(self): 31 | s = f'(D={self.D})' 32 | return self.__class__.__name__ + s 33 | 34 | def reset_parameters(self): 35 | self.weight.data.fill_(1) 36 | self.bias.data.zero_() 37 | 38 | def _check_input_dim(self, input): 39 | if input.F.dim() != 2: 40 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 41 | 42 | def forward(self, x): 43 | self._check_input_dim(x) 44 | mean = self.mean_in(x).F.mean(-1, keepdim=True) 45 | mean = mean + torch.zeros(mean.size(0), self.num_features).type_as(mean) 46 | temp = self.glob_sum(x.F, -mean)**2 47 | var = self.glob_mean(temp.data).mean(-1, keepdim=True) 48 | var = var + torch.zeros(var.size(0), self.num_features).type_as(var) 49 | instd = 1 / (var + self.eps).sqrt() 50 | 51 | x = self.glob_times(self.glob_sum2(x, -mean), instd) 52 | return x * self.weight + self.bias 53 | 54 | 55 | class MinkowskiInstanceNorm(nn.Module): 56 | 57 | def __init__(self, num_features, eps=1e-5, D=-1): 58 | super(MinkowskiInstanceNorm, self).__init__() 59 | self.eps = eps 60 | self.weight = nn.Parameter(torch.ones(1, num_features)) 61 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 62 | 63 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 64 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 65 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 66 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 67 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 68 | self.D = D 69 | self.reset_parameters() 70 | 71 | def __repr__(self): 72 | s = f'(pixel_dist={self.pixel_dist}, D={self.D})' 73 | return self.__class__.__name__ + s 74 | 75 | def reset_parameters(self): 76 | self.weight.data.fill_(1) 77 | self.bias.data.zero_() 78 | 79 | def _check_input_dim(self, input): 80 | if input.dim() != 2: 81 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 82 | 83 | def forward(self, x): 84 | self._check_input_dim(x) 85 | mean_in = self.mean_in(x) 86 | temp = self.glob_sum(x, -mean_in)**2 87 | var_in = self.glob_mean(temp.data) 88 | instd_in = 1 / (var_in + self.eps).sqrt() 89 | 90 | x = self.glob_times(self.glob_sum2(x, -mean_in), instd_in) 91 | return x * self.weight + self.bias 92 | -------------------------------------------------------------------------------- /lib/datasets/preprocessing/scannet200_insseg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import plyfile 3 | import json 4 | import torch 5 | import argparse 6 | import numpy as np 7 | import pandas as pd 8 | from lib.constants.scannet_constants import * 9 | from concurrent.futures import ProcessPoolExecutor 10 | import itertools 11 | 12 | # Load labels table 13 | labels_pd = pd.read_csv('scannetv2-labels.combined.tsv', sep='\t', header=0) 14 | labels_pd.loc[labels_pd.raw_category == 'stick', ['category']] = 'object' 15 | labels_pd.loc[labels_pd.category == 'wardrobe ', ['category']] = 'wardrobe' 16 | category_label_names = labels_pd['category'].unique() 17 | valid_raw_cats = np.unique(labels_pd['raw_category'].to_numpy()) 18 | 19 | def RAW2SCANNET(label): 20 | if label not in valid_raw_cats: 21 | return 0 22 | 23 | label_id = int(labels_pd[labels_pd['raw_category'] == label]['id'].iloc[0]) 24 | if not label_id in VALID_CLASS_IDS_LONG: 25 | label_id = 0 26 | return label_id 27 | 28 | def parse_args(): 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument('--input', default='/mnt/data/ScanNet/scans/') 31 | parser.add_argument('--output', default='./output') 32 | parser.add_argument('--num_threads', type=int, default=8) 33 | opt = parser.parse_args() 34 | return opt 35 | 36 | 37 | def main(config, scene_name): 38 | 39 | print(scene_name) 40 | # Over-segmented segments: maps from segment to vertex/point IDs 41 | segid_to_pointid = {} 42 | segfile = os.path.join(config.input, scene_name, '%s_vh_clean_2.0.010000.segs.json' % (scene_name)) 43 | 44 | if not os.path.exists(segfile): # test scene 45 | return 46 | 47 | with open(segfile) as jsondata: 48 | d = json.load(jsondata) 49 | seg = d['segIndices'] 50 | for i in range(len(seg)): 51 | if seg[i] not in segid_to_pointid: 52 | segid_to_pointid[seg[i]] = [] 53 | segid_to_pointid[seg[i]].append(i) 54 | 55 | # Raw points in XYZRGBA 56 | ply_filename = os.path.join(config.input, scene_name, '%s_vh_clean_2.ply' % (scene_name)) 57 | f = plyfile.PlyData().read(ply_filename) 58 | points = np.array([list(x) for x in f.elements[0]]) 59 | 60 | # Instances over-segmented segment IDs: annotation on segments 61 | instance_segids = [] 62 | labels = [] 63 | annotation_filename = os.path.join(config.input, scene_name, '%s.aggregation.json' % (scene_name)) 64 | with open(annotation_filename) as jsondata: 65 | d = json.load(jsondata) 66 | for x in d['segGroups']: 67 | instance_segids.append(x['segments']) 68 | labels.append(x['label']) 69 | 70 | # Each instance's points 71 | instance_labels = np.zeros(points.shape[0]) 72 | semantic_labels = np.zeros(points.shape[0]) 73 | for i in range(len(instance_segids)): 74 | segids = instance_segids[i] 75 | pointids = [] 76 | for segid in segids: 77 | pointids += segid_to_pointid[segid] 78 | pointids = np.array(pointids) 79 | instance_labels[pointids] = i + 1 80 | semantic_labels[pointids] = RAW2SCANNET(labels[i]) 81 | 82 | colors = points[:, 3:6] 83 | points = points[:, 0:3] # XYZ+RGB+NORMAL 84 | torch.save((points, colors, semantic_labels, instance_labels), os.path.join(config.output, scene_name + '.pth')) 85 | 86 | 87 | if __name__ == '__main__': 88 | config = parse_args() 89 | os.makedirs(config.output, exist_ok=True) 90 | 91 | pool = ProcessPoolExecutor(max_workers=config.num_threads) 92 | result = list(pool.map(main, itertools.repeat(config), os.listdir(config.input))) 93 | -------------------------------------------------------------------------------- /models/modules/senet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | import MinkowskiEngine as ME 4 | 5 | from models.modules.common import ConvType, NormType 6 | from models.modules.resnet_block import BasicBlock, Bottleneck 7 | 8 | 9 | class SELayer(nn.Module): 10 | 11 | def __init__(self, channel, reduction=16, D=-1): 12 | # Global coords does not require coords_key 13 | super(SELayer, self).__init__() 14 | self.fc = nn.Sequential( 15 | ME.MinkowskiLinear(channel, channel // reduction), ME.MinkowskiReLU(inplace=True), 16 | ME.MinkowskiLinear(channel // reduction, channel), ME.MinkowskiSigmoid()) 17 | self.pooling = ME.MinkowskiGlobalPooling(dimension=D) 18 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D) 19 | 20 | def forward(self, x): 21 | y = self.pooling(x) 22 | y = self.fc(y) 23 | return self.broadcast_mul(x, y) 24 | 25 | 26 | class SEBasicBlock(BasicBlock): 27 | 28 | def __init__(self, 29 | inplanes, 30 | planes, 31 | stride=1, 32 | dilation=1, 33 | downsample=None, 34 | conv_type=ConvType.HYPERCUBE, 35 | reduction=16, 36 | D=-1): 37 | super(SEBasicBlock, self).__init__( 38 | inplanes, 39 | planes, 40 | stride=stride, 41 | dilation=dilation, 42 | downsample=downsample, 43 | conv_type=conv_type, 44 | D=D) 45 | self.se = SELayer(planes, reduction=reduction, D=D) 46 | 47 | def forward(self, x): 48 | residual = x 49 | 50 | out = self.conv1(x) 51 | out = self.norm1(out) 52 | out = self.relu(out) 53 | 54 | out = self.conv2(out) 55 | out = self.norm2(out) 56 | out = self.se(out) 57 | 58 | if self.downsample is not None: 59 | residual = self.downsample(x) 60 | 61 | out += residual 62 | out = self.relu(out) 63 | 64 | return out 65 | 66 | 67 | class SEBasicBlockSN(SEBasicBlock): 68 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 69 | 70 | 71 | class SEBasicBlockIN(SEBasicBlock): 72 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 73 | 74 | 75 | class SEBasicBlockLN(SEBasicBlock): 76 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 77 | 78 | 79 | class SEBottleneck(Bottleneck): 80 | 81 | def __init__(self, 82 | inplanes, 83 | planes, 84 | stride=1, 85 | dilation=1, 86 | downsample=None, 87 | conv_type=ConvType.HYPERCUBE, 88 | D=3, 89 | reduction=16): 90 | super(SEBottleneck, self).__init__( 91 | inplanes, 92 | planes, 93 | stride=stride, 94 | dilation=dilation, 95 | downsample=downsample, 96 | conv_type=conv_type, 97 | D=D) 98 | self.se = SELayer(planes * self.expansion, reduction=reduction, D=D) 99 | 100 | def forward(self, x): 101 | residual = x 102 | 103 | out = self.conv1(x) 104 | out = self.norm1(out) 105 | out = self.relu(out) 106 | 107 | out = self.conv2(out) 108 | out = self.norm2(out) 109 | out = self.relu(out) 110 | 111 | out = self.conv3(out) 112 | out = self.norm3(out) 113 | out = self.se(out) 114 | 115 | if self.downsample is not None: 116 | residual = self.downsample(x) 117 | 118 | out += residual 119 | out = self.relu(out) 120 | 121 | return out 122 | 123 | 124 | class SEBottleneckSN(SEBottleneck): 125 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 126 | 127 | 128 | class SEBottleneckIN(SEBottleneck): 129 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 130 | 131 | 132 | class SEBottleneckLN(SEBottleneck): 133 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 134 | -------------------------------------------------------------------------------- /downstream/insseg/lib/solvers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import logging 8 | 9 | from torch.optim import SGD, Adam 10 | from torch.optim.lr_scheduler import LambdaLR, StepLR, ReduceLROnPlateau, MultiStepLR 11 | 12 | 13 | class LambdaStepLR(LambdaLR): 14 | 15 | def __init__(self, optimizer, lr_lambda, last_step=-1): 16 | super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step) 17 | 18 | @property 19 | def last_step(self): 20 | """Use last_epoch for the step counter""" 21 | return self.last_epoch 22 | 23 | @last_step.setter 24 | def last_step(self, v): 25 | self.last_epoch = v 26 | 27 | 28 | class PolyLR(LambdaStepLR): 29 | """DeepLab learning rate policy""" 30 | 31 | def __init__(self, optimizer, max_iter, power=0.9, last_step=-1): 32 | super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step) 33 | 34 | 35 | class SquaredLR(LambdaStepLR): 36 | """ Used for SGD Lars""" 37 | 38 | def __init__(self, optimizer, max_iter, last_step=-1): 39 | super(SquaredLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**2, last_step) 40 | 41 | 42 | class ExpLR(LambdaStepLR): 43 | 44 | def __init__(self, optimizer, step_size, gamma=0.9, last_step=-1): 45 | # (0.9 ** 21.854) = 0.1, (0.95 ** 44.8906) = 0.1 46 | # To get 0.1 every N using gamma 0.9, N * log(0.9)/log(0.1) = 0.04575749 N 47 | # To get 0.1 every N using gamma g, g ** N = 0.1 -> N * log(g) = log(0.1) -> g = np.exp(log(0.1) / N) 48 | super(ExpLR, self).__init__(optimizer, lambda s: gamma**(s / step_size), last_step) 49 | 50 | 51 | def initialize_optimizer(params, config): 52 | assert config.optimizer in ['SGD', 'Adagrad', 'Adam', 'RMSProp', 'Rprop', 'SGDLars'] 53 | 54 | if config.optimizer == 'SGD': 55 | return SGD( 56 | params, 57 | lr=config.lr, 58 | momentum=config.sgd_momentum, 59 | dampening=config.sgd_dampening, 60 | weight_decay=config.weight_decay) 61 | elif config.optimizer == 'Adam': 62 | return Adam( 63 | params, 64 | lr=config.lr, 65 | betas=(config.adam_beta1, config.adam_beta2), 66 | weight_decay=config.weight_decay) 67 | else: 68 | logging.error('Optimizer type not supported') 69 | raise ValueError('Optimizer type not supported') 70 | 71 | 72 | def initialize_scheduler(optimizer, config, last_step=-1): 73 | if config.scheduler == 'StepLR': 74 | return StepLR( 75 | optimizer, step_size=config.step_size, gamma=config.step_gamma, last_epoch=last_step) 76 | if config.scheduler == 'MultiStepLR': 77 | return MultiStepLR(optimizer, milestones=config.multi_step_milestones, gamma=config.step_gamma) 78 | elif config.scheduler == 'PolyLR': 79 | return PolyLR(optimizer, max_iter=config.max_epoch, power=config.poly_power, last_step=last_step) 80 | elif config.scheduler == 'SquaredLR': 81 | return SquaredLR(optimizer, max_iter=config.max_epoch, last_step=last_step) 82 | elif config.scheduler == 'ExpLR': 83 | return ExpLR( 84 | optimizer, step_size=config.exp_step_size, gamma=config.exp_gamma, last_step=last_step) 85 | elif config.scheduler == 'ReduceLROnPlateau': 86 | 87 | lr_scheduler = ReduceLROnPlateau( 88 | optimizer, mode='max', verbose=True, 89 | factor=config.step_gamma, patience=config.reduce_patience, 90 | min_lr=config.scheduler_min_lr) 91 | 92 | scheduler = { 93 | 'scheduler': lr_scheduler, 94 | 'reduce_on_plateau': True, 95 | 'monitor': config.scheadule_monitor 96 | } 97 | 98 | return scheduler 99 | else: 100 | logging.error('Scheduler not supported') 101 | -------------------------------------------------------------------------------- /lib/datasets/preprocessing/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from lib.pc_utils import read_plyfile, save_point_cloud 4 | 5 | import json 6 | import pandas as pd 7 | from plyfile import PlyData 8 | import open3d as o3d 9 | from lib.constants.dataset_sets import * 10 | 11 | def point_indices_from_group(points, seg_indices, group, labels_pd, CLASS_IDs): 12 | group_segments = np.array(group['segments']) 13 | label = group['label'] 14 | 15 | label_ids = labels_pd[labels_pd['raw_category'] == label]['id'] 16 | label_id = int(label_ids.iloc[0]) if len(label_ids) > 0 else 0 17 | 18 | if not label_id in CLASS_IDs: 19 | label_id = 0 20 | 21 | # get points, where segindices (points labelled with segment ids) are in the group segment list 22 | point_IDs = np.where(np.isin(seg_indices, group_segments)) 23 | 24 | return points[point_IDs], point_IDs[0], label_id 25 | 26 | def save_instance(segment_points, label_id, cat_name, scene_id, base_path, segment_faces=None, limited_annotation_points=None): 27 | 28 | if scene_id in TRAIN_SCENES: 29 | path = base_path + f'/train/train_instances/{cat_name}' 30 | elif scene_id in VAL_SCENES: 31 | path = base_path + f'/train/val_instances/{cat_name}' 32 | else: 33 | return 34 | 35 | if not os.path.exists(path): 36 | os.makedirs(path) 37 | 38 | path, dirs, files = next(os.walk(path)) 39 | file_count = len(files) 40 | 41 | if limited_annotation_points is None: 42 | labels = np.ones((segment_points.shape[0], 1), dtype=int) * label_id 43 | else: 44 | labels = np.zeros((segment_points.shape[0], 1), dtype=int) 45 | labels[limited_annotation_points] = label_id 46 | 47 | labbelled_instance_points = np.append(segment_points[:, :6], labels, axis=1) 48 | 49 | # Push to origin 50 | centroid = np.mean(segment_points[:, :3], axis=0) 51 | labbelled_instance_points[:, :3] -= centroid 52 | 53 | # Save 54 | out_file = path + f'/{cat_name}_{file_count}.ply' 55 | save_point_cloud(labbelled_instance_points, out_file, with_label=True, verbose=False, faces=segment_faces) 56 | 57 | 58 | def load_pcd(path): 59 | filepath = path 60 | plydata = PlyData.read(str(filepath)) 61 | data = plydata.elements[0].data 62 | coords = np.array([data['x'], data['y'], data['z']], dtype=np.float32).T 63 | feats = np.array([data['red'], data['green'], data['blue']], dtype=np.float32).T 64 | labels = np.array([data['label']], dtype=np.float32).T 65 | 66 | pcd = o3d.geometry.PointCloud() 67 | pcd.points = o3d.utility.Vector3dVector(coords[:, :3]) 68 | pcd.colors = o3d.utility.Vector3dVector(feats / 255.) 69 | 70 | return pcd, feats, labels 71 | 72 | 73 | def box_intersect(box_a, box_b): 74 | return (box_a[0, 0] <= box_b[1, 0] and box_a[1, 0] >= box_b[0, 0]) and ( 75 | box_a[0, 1] <= box_b[1, 1] and box_a[1, 1] >= box_b[0, 1]) and ( 76 | box_a[0, 2] <= box_b[1, 2] and box_a[1, 2] >= box_b[0, 2]) 77 | 78 | 79 | def box_contains(parent_box, child_box, inflate_size = 0.): 80 | 81 | parent_min = parent_box[0,:] - inflate_size 82 | parent_max = parent_box[1,:] + inflate_size 83 | 84 | child_min = child_box[0,:] 85 | child_max = child_box[1, :] 86 | 87 | return np.all(np.greater(child_min, parent_min)) and np.all(np.less(child_max, parent_max)) 88 | 89 | def box_contains_percentage_inflate(parent_box, child_box, inflate_size = 0.): 90 | 91 | inflate = np.abs(parent_box[0, :] - parent_box[1, :]) * inflate_size 92 | 93 | parent_min = parent_box[0, :] - inflate 94 | parent_max = parent_box[1, :] + inflate 95 | 96 | child_min = child_box[0,:] 97 | child_max = child_box[1, :] 98 | 99 | return np.all(np.greater(child_min, parent_min)) and np.all(np.less(child_max, parent_max)) 100 | -------------------------------------------------------------------------------- /lib/solvers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from torch.optim import SGD, Adam 4 | from torch.optim.lr_scheduler import LambdaLR, StepLR, ReduceLROnPlateau, MultiStepLR 5 | 6 | 7 | class LambdaStepLR(LambdaLR): 8 | 9 | def __init__(self, optimizer, lr_lambda, last_step=-1): 10 | super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step) 11 | 12 | @property 13 | def last_step(self): 14 | """Use last_epoch for the step counter""" 15 | return self.last_epoch 16 | 17 | @last_step.setter 18 | def last_step(self, v): 19 | self.last_epoch = v 20 | 21 | 22 | class PolyLR(LambdaStepLR): 23 | """DeepLab learning rate policy""" 24 | 25 | def __init__(self, optimizer, max_iter, power=0.9, last_step=-1): 26 | super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step) 27 | 28 | 29 | class SquaredLR(LambdaStepLR): 30 | """ Used for SGD Lars""" 31 | 32 | def __init__(self, optimizer, max_iter, last_step=-1): 33 | super(SquaredLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**2, last_step) 34 | 35 | 36 | class ExpLR(LambdaStepLR): 37 | 38 | def __init__(self, optimizer, step_size, gamma=0.9, last_step=-1): 39 | # (0.9 ** 21.854) = 0.1, (0.95 ** 44.8906) = 0.1 40 | # To get 0.1 every N using gamma 0.9, N * log(0.9)/log(0.1) = 0.04575749 N 41 | # To get 0.1 every N using gamma g, g ** N = 0.1 -> N * log(g) = log(0.1) -> g = np.exp(log(0.1) / N) 42 | super(ExpLR, self).__init__(optimizer, lambda s: gamma**(s / step_size), last_step) 43 | 44 | 45 | def initialize_optimizer(model, config, lr=None): 46 | 47 | assert config.optimizer in ['SGD', 'Adagrad', 'Adam', 'RMSProp', 'Rprop', 'SGDLars'] 48 | 49 | params = model.parameters() 50 | model_name = type(model).__name__ 51 | 52 | if lr is not None: 53 | learning_rate = lr 54 | else: 55 | learning_rate = config.lr 56 | 57 | if config.optimizer == 'SGD': 58 | return SGD( 59 | params, 60 | lr=learning_rate, 61 | momentum=config.sgd_momentum, 62 | dampening=config.sgd_dampening, 63 | weight_decay=config.weight_decay) 64 | elif config.optimizer == 'Adam': 65 | return Adam( 66 | params, 67 | lr=learning_rate, 68 | betas=(config.adam_beta1, config.adam_beta2), 69 | weight_decay=config.weight_decay) 70 | else: 71 | logging.error('Optimizer type not supported') 72 | raise ValueError('Optimizer type not supported') 73 | 74 | 75 | def initialize_scheduler(optimizer, config, last_step=-1): 76 | if config.scheduler == 'StepLR': 77 | return StepLR(optimizer, step_size=config.step_size, gamma=config.step_gamma, last_epoch=last_step) 78 | if config.scheduler == 'MultiStepLR': 79 | return MultiStepLR(optimizer, milestones=config.multi_step_milestones, gamma=config.step_gamma) 80 | elif config.scheduler == 'PolyLR': 81 | return PolyLR(optimizer, max_iter=config.max_epoch, power=config.poly_power, last_step=last_step) 82 | elif config.scheduler == 'SquaredLR': 83 | return SquaredLR(optimizer, max_iter=config.max_iter, last_step=last_step) 84 | elif config.scheduler == 'ExpLR': 85 | return ExpLR( 86 | optimizer, step_size=config.exp_step_size, gamma=config.exp_gamma, last_step=last_step) 87 | elif config.scheduler == 'ReduceLROnPlateau': 88 | 89 | lr_scheduler = ReduceLROnPlateau( 90 | optimizer, mode='max', verbose=True, 91 | factor=config.step_gamma, patience=config.reduce_patience, 92 | min_lr=config.scheduler_min_lr) 93 | 94 | scheduler = { 95 | 'scheduler': lr_scheduler, 96 | 'reduce_on_plateau': True, 97 | 'monitor': config.scheadule_monitor 98 | } 99 | 100 | return scheduler 101 | else: 102 | logging.error('Scheduler not supported') 103 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "interpolate.h" 7 | #include "utils.h" 8 | 9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 10 | const float *known, float *dist2, int *idx); 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 12 | const float *points, const int *idx, 13 | const float *weight, float *out); 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 15 | const float *grad_out, 16 | const int *idx, const float *weight, 17 | float *grad_points); 18 | 19 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 20 | CHECK_CONTIGUOUS(unknowns); 21 | CHECK_CONTIGUOUS(knows); 22 | CHECK_IS_FLOAT(unknowns); 23 | CHECK_IS_FLOAT(knows); 24 | 25 | if (unknowns.type().is_cuda()) { 26 | CHECK_CUDA(knows); 27 | } 28 | 29 | at::Tensor idx = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 32 | at::Tensor dist2 = 33 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 34 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 35 | 36 | if (unknowns.type().is_cuda()) { 37 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 38 | unknowns.data(), knows.data(), 39 | dist2.data(), idx.data()); 40 | } else { 41 | TORCH_CHECK(false, "CPU not supported"); 42 | } 43 | 44 | return {dist2, idx}; 45 | } 46 | 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 48 | at::Tensor weight) { 49 | CHECK_CONTIGUOUS(points); 50 | CHECK_CONTIGUOUS(idx); 51 | CHECK_CONTIGUOUS(weight); 52 | CHECK_IS_FLOAT(points); 53 | CHECK_IS_INT(idx); 54 | CHECK_IS_FLOAT(weight); 55 | 56 | if (points.type().is_cuda()) { 57 | CHECK_CUDA(idx); 58 | CHECK_CUDA(weight); 59 | } 60 | 61 | at::Tensor output = 62 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 63 | at::device(points.device()).dtype(at::ScalarType::Float)); 64 | 65 | if (points.type().is_cuda()) { 66 | three_interpolate_kernel_wrapper( 67 | points.size(0), points.size(1), points.size(2), idx.size(1), 68 | points.data(), idx.data(), weight.data(), 69 | output.data()); 70 | } else { 71 | TORCH_CHECK(false, "CPU not supported"); 72 | } 73 | 74 | return output; 75 | } 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 77 | at::Tensor weight, const int m) { 78 | CHECK_CONTIGUOUS(grad_out); 79 | CHECK_CONTIGUOUS(idx); 80 | CHECK_CONTIGUOUS(weight); 81 | CHECK_IS_FLOAT(grad_out); 82 | CHECK_IS_INT(idx); 83 | CHECK_IS_FLOAT(weight); 84 | 85 | if (grad_out.type().is_cuda()) { 86 | CHECK_CUDA(idx); 87 | CHECK_CUDA(weight); 88 | } 89 | 90 | at::Tensor output = 91 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 92 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 93 | 94 | if (grad_out.type().is_cuda()) { 95 | three_interpolate_grad_kernel_wrapper( 96 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 97 | grad_out.data(), idx.data(), weight.data(), 98 | output.data()); 99 | } else { 100 | TORCH_CHECK(false, "CPU not supported"); 101 | } 102 | 103 | return output; 104 | } 105 | -------------------------------------------------------------------------------- /downstream/insseg/config/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | net: 7 | model: ResUNet14 8 | conv1_kernel_size: 3 9 | weights: 10 | weights_for_inner_model: False 11 | dilations: [1,1,1,1] 12 | 13 | # Wrappers 14 | wrapper_type: 15 | wrapper_region_type: 1 16 | wrapper_kernel_size: 3 17 | wrapper_lr: 0.1 18 | 19 | # Meanfield arguments 20 | meanfield_iterations: 10 21 | crf_spatial_sigma: 1 22 | crf_chromatic_sigma: 12 23 | 24 | optimizer: 25 | optimizer: SGD 26 | lr: 0.05 27 | sgd_momentum: 0.9 28 | sgd_dampening: 0.1 29 | adam_beta1: 0.9 30 | adam_beta2: 0.999 31 | weight_decay: 0.0001 32 | param_histogram_freq: 100 33 | save_param_histogram: False 34 | iter_size: 1 35 | bn_momentum: 0.02 36 | 37 | # Criterion 38 | loss_type: cross_entropy # cross_entropy, focal 39 | 40 | # Scheduler 41 | scheduler: MultiStepLR 42 | max_iter: 600000 43 | max_epoch: 400 44 | step_size: 2e4 45 | step_gamma: 0.3 46 | poly_power: 0.9 47 | exp_gamma: 0.95 48 | exp_step_size: 445 49 | multi_step_milestones: [150,200] 50 | 51 | data: 52 | dataset: ScannetVoxelization2cmDataset 53 | train_file: 54 | voxel_size: 0.05 55 | data_dir: data 56 | sampled_inds: 57 | temporal_dilation: 30 58 | temporal_numseq: 3 59 | point_lim: -1 60 | pre_point_lim: -1 61 | batch_size: 16 62 | val_batch_size: 1 63 | test_batch_size: 1 64 | cache_data: False 65 | num_workers: 0 66 | ignore_label: -1 67 | return_transformation: False 68 | ignore_duplicate_class: False 69 | partial_crop: 0 70 | train_limit_numpoints: 0 71 | 72 | # Point Cloud Dataset 73 | synthia_path: /home/chrischoy/datasets/Synthia/Synthia4D 74 | # For temporal sequences 75 | synthia_camera_path: /home/chrischoy/datasets/Synthia/%s/CameraParams/ 76 | synthia_camera_intrinsic_file: intrinsics.txt 77 | synthia_camera_extrinsics_file: Stereo_Right/Omni_F/%s.txt 78 | temporal_rand_dilation: False 79 | temporal_rand_numseq: False 80 | 81 | scannet_path: /home/chrischoy/datasets/scannet/scannet_preprocessed 82 | stanford3d_path: /home/chrischoy/datasets/Stanford3D 83 | 84 | # For weighted criterions 85 | category_weights: feature_data/scannet200_category_weights.pkl 86 | 87 | train: 88 | # Training / test parameters 89 | is_train: True 90 | stat_freq: 40 91 | val_freq: 1000 92 | empty_cache_freq: 1 93 | train_phase: train 94 | val_phase: val 95 | overwrite_weights: True 96 | resume: '' 97 | resume_optimizer: True, 98 | eval_upsample: False 99 | lenient_weight_loading: False, 100 | 101 | # Distributed Training configurations 102 | distributed: 103 | distributed_world_size: 8 104 | distributed_rank: 0 105 | distributed_backend: nccl 106 | distributed_init_method: 107 | distributed_port: 10010 108 | device_id: 0 109 | distributed_no_spawn: True 110 | ddp_backend: c10d #['c10d', 'no_c10d'] 111 | bucket_cap_mb: 25 112 | 113 | # Data augmentation 114 | augmentation: 115 | use_feat_aug: True 116 | data_aug_color_trans_ratio: 0.10 117 | data_aug_color_jitter_std: 0.05 118 | normalize_color: True 119 | data_aug_scale_min: 0.9 120 | data_aug_scale_max: 1.1 121 | data_aug_hue_max: 0.5 122 | data_aug_saturation_max: 0.2 123 | 124 | # Test 125 | test: 126 | test_phase: test 127 | test_stat_freq: 100 128 | evaluate_benchmark: False 129 | dual_set_cluster: False 130 | visualize: False 131 | visualize_path: './visualize' 132 | 133 | # Misc 134 | misc: 135 | is_cuda: True 136 | load_path: 137 | log_step: 50 138 | log_level: INFO #['INFO', 'DEBUG', 'WARN'] 139 | num_gpus: 1 140 | seed: 123 141 | log_dir: outputs/default 142 | # New configs for experimental sweeps 143 | load_bn: all_bn 144 | resume_config: 145 | train_stuff: False 146 | wandb_id: '' 147 | 148 | defaults: 149 | - hydra/launcher: submitit_slurm 150 | - hydra/hydra_logging: colorlog 151 | 152 | hydra: 153 | run: 154 | dir: ${misc.log_dir} 155 | sweep: 156 | dir: ${misc.log_dir} 157 | launcher: 158 | partition: priority 159 | submitit_folder: ${hydra.sweep.dir}/.submitit/%j 160 | name: ${hydra.job.name} 161 | timeout_min: 1440 162 | cpus_per_task: 80 163 | gpus_per_node: 8 164 | tasks_per_node: 1 165 | mem_gb: 64 166 | nodes: 1 167 | comment: CriticalExp 168 | max_num_timeout: 3 169 | -------------------------------------------------------------------------------- /models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from models.modules.common import ConvType, NormType, get_norm, conv 4 | 5 | from MinkowskiEngine import MinkowskiReLU 6 | 7 | 8 | class BasicBlockBase(nn.Module): 9 | expansion = 1 10 | NORM_TYPE = NormType.BATCH_NORM 11 | 12 | def __init__(self, 13 | inplanes, 14 | planes, 15 | stride=1, 16 | dilation=1, 17 | downsample=None, 18 | conv_type=ConvType.HYPERCUBE, 19 | bn_momentum=0.1, 20 | D=3): 21 | super(BasicBlockBase, self).__init__() 22 | 23 | self.conv1 = conv( 24 | inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 25 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 26 | self.conv2 = conv( 27 | planes, 28 | planes, 29 | kernel_size=3, 30 | stride=1, 31 | dilation=dilation, 32 | bias=False, 33 | conv_type=conv_type, 34 | D=D) 35 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 36 | self.relu = MinkowskiReLU(inplace=True) 37 | self.downsample = downsample 38 | self.inplanes = inplanes 39 | self.planes = planes 40 | 41 | def forward(self, x): 42 | residual = x 43 | 44 | out = self.conv1(x) 45 | out = self.norm1(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv2(out) 49 | out = self.norm2(out) 50 | 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | 54 | out += residual 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class BasicBlock(BasicBlockBase): 61 | NORM_TYPE = NormType.BATCH_NORM 62 | 63 | 64 | class BasicBlockIN(BasicBlockBase): 65 | NORM_TYPE = NormType.INSTANCE_NORM 66 | 67 | 68 | class BasicBlockINBN(BasicBlockBase): 69 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 70 | 71 | 72 | class BottleneckBase(nn.Module): 73 | expansion = 4 74 | NORM_TYPE = NormType.BATCH_NORM 75 | 76 | def __init__(self, 77 | inplanes, 78 | planes, 79 | stride=1, 80 | dilation=1, 81 | downsample=None, 82 | conv_type=ConvType.HYPERCUBE, 83 | bn_momentum=0.1, 84 | D=3): 85 | super(BottleneckBase, self).__init__() 86 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 87 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 88 | 89 | self.conv2 = conv( 90 | planes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 91 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 92 | 93 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 94 | self.norm3 = get_norm(self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum) 95 | 96 | self.relu = MinkowskiReLU(inplace=True) 97 | self.downsample = downsample 98 | 99 | def forward(self, x): 100 | residual = x 101 | 102 | out = self.conv1(x) 103 | out = self.norm1(out) 104 | out = self.relu(out) 105 | 106 | out = self.conv2(out) 107 | out = self.norm2(out) 108 | out = self.relu(out) 109 | 110 | out = self.conv3(out) 111 | out = self.norm3(out) 112 | 113 | if self.downsample is not None: 114 | residual = self.downsample(x) 115 | 116 | out += residual 117 | out = self.relu(out) 118 | 119 | return out 120 | 121 | 122 | class Bottleneck(BottleneckBase): 123 | NORM_TYPE = NormType.BATCH_NORM 124 | 125 | 126 | class BottleneckIN(BottleneckBase): 127 | NORM_TYPE = NormType.INSTANCE_NORM 128 | 129 | 130 | class BottleneckINBN(BottleneckBase): 131 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 132 | 133 | 134 | class NoReluBlock(BasicBlockBase): 135 | 136 | def __init__(self, source_block): 137 | 138 | super(NoReluBlock, self).__init__(inplanes=source_block.inplanes, planes=source_block.planes) 139 | 140 | self.conv1 = source_block.conv1 141 | self.norm1 = source_block.norm1 142 | 143 | self.conv2 = source_block.conv2 144 | self.norm2 = source_block.norm2 145 | 146 | def forward(self, x): 147 | residual = x 148 | 149 | out = self.conv1(x) 150 | out = self.norm1(out) 151 | out = self.relu(out) 152 | 153 | out = self.conv2(out) 154 | out = self.norm2(out) 155 | 156 | if self.downsample is not None: 157 | residual = self.downsample(x) 158 | 159 | out += residual 160 | 161 | return out 162 | -------------------------------------------------------------------------------- /lib/losses/FocalLoss.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | 9 | class FocalLoss(nn.Module): 10 | """ Focal Loss, as described in https://arxiv.org/abs/1708.02002. 11 | 12 | It is essentially an enhancement to cross entropy loss and is 13 | useful for classification tasks when there is a large class imbalance. 14 | x is expected to contain raw, unnormalized scores for each class. 15 | y is expected to contain class labels. 16 | 17 | Shape: 18 | - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0. 19 | - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0. 20 | """ 21 | 22 | def __init__(self, 23 | alpha: Optional[Tensor] = None, 24 | gamma: float = 0., 25 | reduction: str = 'mean', 26 | ignore_index: int = -100): 27 | """Constructor. 28 | 29 | Args: 30 | alpha (Tensor, optional): Weights for each class. Defaults to None. 31 | gamma (float, optional): A constant, as described in the paper. 32 | Defaults to 0. 33 | reduction (str, optional): 'mean', 'sum' or 'none'. 34 | Defaults to 'mean'. 35 | ignore_index (int, optional): class label to ignore. 36 | Defaults to -100. 37 | """ 38 | if reduction not in ('mean', 'sum', 'none'): 39 | raise ValueError( 40 | 'Reduction must be one of: "mean", "sum", "none".') 41 | 42 | super().__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.ignore_index = ignore_index 46 | self.reduction = reduction 47 | 48 | self.nll_loss = nn.NLLLoss( 49 | weight=alpha, reduction='none', ignore_index=ignore_index) 50 | 51 | def __repr__(self): 52 | arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction'] 53 | arg_vals = [self.__dict__[k] for k in arg_keys] 54 | arg_strs = [f'{k}={v}' for k, v in zip(arg_keys, arg_vals)] 55 | arg_str = ', '.join(arg_strs) 56 | return f'{type(self).__name__}({arg_str})' 57 | 58 | def forward(self, x: Tensor, y: Tensor) -> Tensor: 59 | if x.ndim > 2: 60 | # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C) 61 | c = x.shape[1] 62 | x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c) 63 | # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,) 64 | y = y.view(-1) 65 | 66 | unignored_mask = y != self.ignore_index 67 | y = y[unignored_mask] 68 | if len(y) == 0: 69 | return 0. 70 | x = x[unignored_mask] 71 | 72 | # compute weighted cross entropy term: -alpha * log(pt) 73 | # (alpha is already part of self.nll_loss) 74 | log_p = F.log_softmax(x, dim=-1) 75 | ce = self.nll_loss(log_p, y) 76 | 77 | # get true class column from each row 78 | all_rows = torch.arange(len(x)) 79 | log_pt = log_p[all_rows, y] 80 | 81 | # compute focal term: (1 - pt)^gamma 82 | pt = log_pt.exp() 83 | focal_term = (1 - pt)**self.gamma 84 | 85 | # the full loss: -alpha * ((1 - pt)^gamma) * log(pt) 86 | loss = focal_term * ce 87 | 88 | if self.reduction == 'mean': 89 | loss = loss.mean() 90 | elif self.reduction == 'sum': 91 | loss = loss.sum() 92 | 93 | return loss 94 | 95 | 96 | def focal_loss(alpha: Optional[Sequence] = None, 97 | gamma: float = 0., 98 | reduction: str = 'mean', 99 | ignore_index: int = -100, 100 | device='cpu', 101 | dtype=torch.float32) -> FocalLoss: 102 | """Factory function for FocalLoss. 103 | 104 | Args: 105 | alpha (Sequence, optional): Weights for each class. Will be converted 106 | to a Tensor if not None. Defaults to None. 107 | gamma (float, optional): A constant, as described in the paper. 108 | Defaults to 0. 109 | reduction (str, optional): 'mean', 'sum' or 'none'. 110 | Defaults to 'mean'. 111 | ignore_index (int, optional): class label to ignore. 112 | Defaults to -100. 113 | device (str, optional): Device to move alpha to. Defaults to 'cpu'. 114 | dtype (torch.dtype, optional): dtype to cast alpha to. 115 | Defaults to torch.float32. 116 | 117 | Returns: 118 | A FocalLoss object 119 | """ 120 | if alpha is not None: 121 | if not isinstance(alpha, Tensor): 122 | alpha = torch.tensor(alpha) 123 | alpha = alpha.to(device=device, dtype=dtype) 124 | 125 | fl = FocalLoss( 126 | alpha=alpha, 127 | gamma=gamma, 128 | reduction=reduction, 129 | ignore_index=ignore_index) 130 | return fl -------------------------------------------------------------------------------- /downstream/insseg/lib/losses/FocalLoss.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | 9 | class FocalLoss(nn.Module): 10 | """ Focal Loss, as described in https://arxiv.org/abs/1708.02002. 11 | 12 | It is essentially an enhancement to cross entropy loss and is 13 | useful for classification tasks when there is a large class imbalance. 14 | x is expected to contain raw, unnormalized scores for each class. 15 | y is expected to contain class labels. 16 | 17 | Shape: 18 | - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0. 19 | - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0. 20 | """ 21 | 22 | def __init__(self, 23 | alpha: Optional[Tensor] = None, 24 | gamma: float = 0., 25 | reduction: str = 'mean', 26 | ignore_index: int = -100): 27 | """Constructor. 28 | 29 | Args: 30 | alpha (Tensor, optional): Weights for each class. Defaults to None. 31 | gamma (float, optional): A constant, as described in the paper. 32 | Defaults to 0. 33 | reduction (str, optional): 'mean', 'sum' or 'none'. 34 | Defaults to 'mean'. 35 | ignore_index (int, optional): class label to ignore. 36 | Defaults to -100. 37 | """ 38 | if reduction not in ('mean', 'sum', 'none'): 39 | raise ValueError( 40 | 'Reduction must be one of: "mean", "sum", "none".') 41 | 42 | super().__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.ignore_index = ignore_index 46 | self.reduction = reduction 47 | 48 | self.nll_loss = nn.NLLLoss( 49 | weight=alpha, reduction='none', ignore_index=ignore_index) 50 | 51 | def __repr__(self): 52 | arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction'] 53 | arg_vals = [self.__dict__[k] for k in arg_keys] 54 | arg_strs = [f'{k}={v}' for k, v in zip(arg_keys, arg_vals)] 55 | arg_str = ', '.join(arg_strs) 56 | return f'{type(self).__name__}({arg_str})' 57 | 58 | def forward(self, x: Tensor, y: Tensor) -> Tensor: 59 | if x.ndim > 2: 60 | # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C) 61 | c = x.shape[1] 62 | x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c) 63 | # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,) 64 | y = y.view(-1) 65 | 66 | unignored_mask = y != self.ignore_index 67 | y = y[unignored_mask] 68 | if len(y) == 0: 69 | return 0. 70 | x = x[unignored_mask] 71 | 72 | # compute weighted cross entropy term: -alpha * log(pt) 73 | # (alpha is already part of self.nll_loss) 74 | log_p = F.log_softmax(x, dim=-1) 75 | ce = self.nll_loss(log_p, y) 76 | 77 | # get true class column from each row 78 | all_rows = torch.arange(len(x)) 79 | log_pt = log_p[all_rows, y] 80 | 81 | # compute focal term: (1 - pt)^gamma 82 | pt = log_pt.exp() 83 | focal_term = (1 - pt)**self.gamma 84 | 85 | # the full loss: -alpha * ((1 - pt)^gamma) * log(pt) 86 | loss = focal_term * ce 87 | 88 | if self.reduction == 'mean': 89 | loss = loss.mean() 90 | elif self.reduction == 'sum': 91 | loss = loss.sum() 92 | 93 | return loss 94 | 95 | 96 | def focal_loss(alpha: Optional[Sequence] = None, 97 | gamma: float = 0., 98 | reduction: str = 'mean', 99 | ignore_index: int = -100, 100 | device='cpu', 101 | dtype=torch.float32) -> FocalLoss: 102 | """Factory function for FocalLoss. 103 | 104 | Args: 105 | alpha (Sequence, optional): Weights for each class. Will be converted 106 | to a Tensor if not None. Defaults to None. 107 | gamma (float, optional): A constant, as described in the paper. 108 | Defaults to 0. 109 | reduction (str, optional): 'mean', 'sum' or 'none'. 110 | Defaults to 'mean'. 111 | ignore_index (int, optional): class label to ignore. 112 | Defaults to -100. 113 | device (str, optional): Device to move alpha to. Defaults to 'cpu'. 114 | dtype (torch.dtype, optional): dtype to cast alpha to. 115 | Defaults to torch.float32. 116 | 117 | Returns: 118 | A FocalLoss object 119 | """ 120 | if alpha is not None: 121 | if not isinstance(alpha, Tensor): 122 | alpha = torch.tensor(alpha) 123 | alpha = alpha.to(device=device, dtype=dtype) 124 | 125 | fl = FocalLoss( 126 | alpha=alpha, 127 | gamma=gamma, 128 | reduction=reduction, 129 | ignore_index=ignore_index) 130 | return fl -------------------------------------------------------------------------------- /lib/train_test/pl_ClassifierTrainer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | warnings.simplefilter("ignore", RuntimeWarning) 4 | 5 | from torch.utils.data import DataLoader 6 | from lib.train_test.pl_BaselineTrainer import * 7 | 8 | 9 | # This one is only for training a classifier on balanced categories 10 | class ClassifierTrainerModule(BaselineTrainerModule): 11 | 12 | def __init__(self, model, config, dataset): 13 | 14 | super().__init__(model, config, dataset) 15 | 16 | self.anchor_feats = None 17 | self.init_criterions() 18 | 19 | for name, value in list(vars().items()): 20 | if name != "self": 21 | setattr(self, name, value) 22 | 23 | def train_dataloader(self): 24 | 25 | train_dataset = self.DatasetClass(self.config, phase=self.config.train_phase) 26 | 27 | train_data_loader = DataLoader(train_dataset, 28 | num_workers=1, 29 | shuffle=True, 30 | batch_size=self.config.batch_size,) 31 | 32 | self.m_train_dataloader = train_data_loader 33 | 34 | return train_data_loader 35 | 36 | def val_dataloader(self): 37 | 38 | val_dataset = self.DatasetClass(self.config, phase=self.config.val_phase) 39 | 40 | val_data_loader = DataLoader(val_dataset, 41 | num_workers=1, 42 | shuffle=True, 43 | batch_size=self.config.batch_size, ) 44 | self.dataset = val_dataset 45 | self.validation_max_iter = len(val_data_loader) 46 | 47 | self.m_val_dataloader = val_data_loader 48 | 49 | return val_data_loader 50 | 51 | def model_step(self, batch, batch_idx, mode='training'): 52 | 53 | # Feed forward 54 | features, target = batch 55 | outputs, feature_maps = self(features) 56 | 57 | return {'feature_maps': feature_maps, 58 | 'outputs': outputs, 59 | 'target': target, 60 | 'mode': mode} 61 | 62 | def eval_step(self, model_step_outputs): 63 | 64 | outputs = model_step_outputs['outputs'] 65 | target = model_step_outputs['target'] 66 | valid_mask = target != self.config.ignore_label 67 | 68 | pred = outputs.max(1)[1].detach() 69 | prob = torch.nn.functional.softmax(outputs, dim=1).detach() 70 | loss = self.criterion(outputs, target) 71 | 72 | head_inds, common_inds, tail_inds = self.calculate_split_items(targets=target, dataset=self.dataset) 73 | # Evaluate prediction 74 | if head_inds.sum() > 0: 75 | self.head_losses(nanmean_t(loss[head_inds]), head_inds.sum()) 76 | self.head_scores(pred[head_inds], target[head_inds]) 77 | self.head_accuracy(pred[head_inds], target[head_inds]) 78 | if common_inds.sum() > 0: 79 | self.common_losses(nanmean_t(loss[common_inds]), common_inds.sum()) 80 | self.common_scores(pred[common_inds], target[common_inds]) 81 | self.common_accuracy(pred[common_inds], target[common_inds]) 82 | if tail_inds.sum() > 0: 83 | self.tail_scores(pred[tail_inds], target[tail_inds]) 84 | self.tail_accuracy(pred[tail_inds], target[tail_inds]) 85 | self.tail_losses(nanmean_t(loss[tail_inds]), tail_inds.sum()) 86 | 87 | loss = loss.mean() 88 | self.losses(loss.clone().detach(), target.size(0)) 89 | self.scores(pred[valid_mask], target[valid_mask]) 90 | self.accuracy(pred[valid_mask], target[valid_mask]) 91 | self.iou_scores(pred[valid_mask], target[valid_mask]) 92 | 93 | prediction_dict = {'final_pred': pred, 'final_target': target, 94 | 'output_features': prob} 95 | 96 | loss_dict = {'loss': loss} 97 | 98 | return {**prediction_dict, **loss_dict} 99 | 100 | def calculate_split_items(self, targets, dataset): 101 | 102 | u_values = targets.unique() 103 | 104 | head_inds = torch.zeros(targets.shape, dtype=bool, device=targets.device) 105 | common_inds = torch.zeros(targets.shape, dtype=bool, device=targets.device) 106 | tail_inds = torch.zeros(targets.shape, dtype=bool, device=targets.device) 107 | 108 | # Iterate over unique and update indexer arrays 109 | for unique_target in u_values: 110 | if unique_target.item() in dataset.head_ids: 111 | head_inds[targets == unique_target] = True 112 | elif unique_target.item() in dataset.common_ids: 113 | common_inds[targets == unique_target] = True 114 | if unique_target.item() in dataset.tail_ids: 115 | tail_inds[targets == unique_target] = True 116 | 117 | return head_inds, common_inds, tail_inds 118 | 119 | def on_train_epoch_end(self): 120 | self.m_train_dataloader.dataset.resample_features() 121 | -------------------------------------------------------------------------------- /lib/losses/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from torchmetrics import Metric 5 | 6 | def embedding_loss(embedding, target, feature_clusters, criterion, config): 7 | # Or triplet loss with semi-hard negative sampling 8 | embed_target = feature_clusters.to(target.device)[target] 9 | loss = criterion(embedding, embed_target) 10 | return torch.mean(loss[target != config.ignore_label], dim=1) 11 | 12 | 13 | def sample_categories_for_balancing(loss, config, dataset, targets, outputs=None): 14 | 15 | # This means only the valid inds are kept 16 | 17 | if loss.size(0) != targets.size(0): 18 | valid_mask = targets != config.ignore_label 19 | targets = targets[valid_mask] 20 | 21 | all_inds = torch.arange(loss.size(0)).to(loss.device) 22 | category_inds = torch.arange(dataset.NUM_LABELS).to(targets.device) 23 | 24 | # Skip if ignore label 25 | targets_valid = targets[targets != config.ignore_label] 26 | 27 | head_inds = category_inds[dataset.frequency_organized_cats[:, 0]] 28 | common_inds = category_inds[dataset.frequency_organized_cats[:, 1]] 29 | tail_inds = category_inds[dataset.frequency_organized_cats[:, 2]] 30 | 31 | head_sample_ratio = config.balanced_sample_head_ratio 32 | common_sample_ratio = config.balanced_sample_common_ratio 33 | point_loss_mask = np.zeros(loss.size(0)).astype(bool) 34 | 35 | loss_items = torch.zeros((loss.size(0), 3)).bool().to(loss.device) 36 | 37 | u_values, u_counts = targets_valid.unique(return_counts=True) 38 | for unique_target, unique_count in zip(u_values, u_counts): 39 | # Sample if head or common: 40 | if unique_target in head_inds: 41 | target_inds = all_inds[targets == unique_target] 42 | if head_sample_ratio > 0.: 43 | sampled_targets = np.random.choice(target_inds.cpu().numpy(), 44 | round(head_sample_ratio * unique_count.item()), 45 | replace=False) 46 | else: 47 | sampled_targets = all_inds[targets == unique_target].cpu().numpy() 48 | 49 | point_loss_mask[sampled_targets] = True 50 | loss_items[target_inds, 0] = True 51 | 52 | elif unique_target in common_inds: 53 | target_inds = all_inds[targets == unique_target] 54 | if common_sample_ratio > 0.: 55 | sampled_targets = np.random.choice(target_inds.cpu().numpy(), 56 | round(common_sample_ratio * unique_count.item()), 57 | replace=False) 58 | else: 59 | sampled_targets = all_inds[targets == unique_target].cpu().numpy() 60 | 61 | point_loss_mask[sampled_targets] = True 62 | loss_items[target_inds, 1] = True 63 | else: 64 | # Keep all samples for tail cats 65 | target_inds = all_inds[targets == unique_target].cpu().numpy() 66 | point_loss_mask[target_inds] = True 67 | loss_items[target_inds, 2] = True 68 | 69 | # Calculate split losses 70 | head_loss = loss[loss_items[:, 0]].detach() 71 | common_loss = loss[loss_items[:, 1]].detach() 72 | tail_loss = loss[loss_items[:, 2]].detach() 73 | 74 | # Finally, mask the loss 75 | loss = loss * torch.Tensor(point_loss_mask).to(loss.device) 76 | 77 | return loss.mean(), (head_loss, common_loss, tail_loss), loss_items[targets != config.ignore_label, :] 78 | 79 | 80 | def feature_sim(output_feats, anchor_feats, config): 81 | 82 | # Take only non attributed features if multiple is available 83 | if anchor_feats.dim() == 3: 84 | anchor_feats = anchor_feats[:, 0, :].squeeze() 85 | 86 | if config.representation_distance_type == 'l2': 87 | # need this for memory constraint 88 | D2 = torch.zeros(output_feats.shape[0], anchor_feats.shape[0]).to(output_feats.device) # dist for every point to the anchor category 89 | for i in range(anchor_feats.shape[0]): # iterate over categories 90 | D2[:, i] = (output_feats - anchor_feats[i, :]).pow(2).sum(dim=-1) 91 | return -D2 # we need to return the inverse to find the most similar with argmax 92 | elif config.representation_distance_type == 'l1': 93 | # need this for memory constraint 94 | D1 = torch.zeros(output_feats.shape[0], anchor_feats.shape[0]).to(output_feats.device) # dist for every point to the anchor category 95 | for i in range(anchor_feats.shape[0]): # iterate over categories 96 | d1 = (output_feats - anchor_feats[i, :]).sum(dim=-1) 97 | D1[:, i] = d1 98 | return -D1 # we need to return the inverse to find the most similar with argmax 99 | else: # cosine 100 | An = F.normalize(output_feats, p=2, dim=1) 101 | Bn = F.normalize(anchor_feats, p=2, dim=1).unsqueeze(0).expand(An.shape[0], anchor_feats.shape[0], anchor_feats.shape[1]) 102 | Dcos = torch.bmm(An.unsqueeze(1), Bn.transpose(1, 2)) 103 | return Dcos.squeeze() 104 | 105 | 106 | class MetricAverageMeter(Metric): 107 | def __init__(self, ignore_index=-1, dist_sync_on_step=False): 108 | super().__init__(dist_sync_on_step=dist_sync_on_step) 109 | 110 | self.add_state("value", default=torch.tensor(0).float(), dist_reduce_fx="sum") 111 | self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") 112 | self.ignore_index = ignore_index 113 | 114 | def update(self, value, count: int = None): 115 | self.total += count 116 | self.value += torch.tensor(value * count).to(self.device) 117 | 118 | def compute(self): 119 | return self.value / self.total 120 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/ops/src/bfs_cluster.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx & Clustering Algorithm 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream); 19 | 20 | 21 | using Int = int32_t; 22 | class ConnectedComponent{ 23 | public: 24 | std::vector pt_idxs {}; 25 | 26 | ConnectedComponent(){}; 27 | void addPoint(Int pt_idx) 28 | { 29 | pt_idxs.push_back(pt_idx); 30 | 31 | } 32 | }; 33 | using ConnectedComponents = std::vector; 34 | 35 | /* ================================== ballquery_batch_p ================================== */ 36 | // input xyz: (n, 3) float 37 | // input batch_idxs: (n) int 38 | // input batch_offsets: (B+1) int, batch_offsets[-1] 39 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 40 | // output start_len: (n, 2), int 41 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, at::Tensor start_len_tensor, int n, int meanActive, float radius){ 42 | const float *xyz = xyz_tensor.data(); 43 | const int *batch_idxs = batch_idxs_tensor.data(); 44 | const int *batch_offsets = batch_offsets_tensor.data(); 45 | int *idx = idx_tensor.data(); 46 | int *start_len = start_len_tensor.data(); 47 | 48 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 49 | int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, stream); 50 | return cumsum; 51 | } 52 | 53 | /* ================================== bfs_cluster ================================== */ 54 | ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs, int *start_len, int *visited){ 55 | ConnectedComponent cc; 56 | cc.addPoint(idx); 57 | visited[idx] = 1; 58 | 59 | std::queue Q; 60 | assert(Q.empty()); 61 | Q.push(idx); 62 | 63 | while(!Q.empty()){ 64 | Int cur = Q.front(); Q.pop(); 65 | int start = start_len[cur * 2]; 66 | int len = start_len[cur * 2 + 1]; 67 | int label_cur = semantic_label[cur]; 68 | for(Int i = start; i < start + len; i++){ 69 | Int idx_i = ball_query_idxs[i]; 70 | if(semantic_label[idx_i] != label_cur) continue; 71 | if(visited[idx_i] == 1) continue; 72 | 73 | cc.addPoint(idx_i); 74 | visited[idx_i] = 1; 75 | 76 | Q.push(idx_i); 77 | } 78 | } 79 | return cc; 80 | } 81 | 82 | //input: semantic_label, int, N 83 | //input: ball_query_idxs, Int, (nActive) 84 | //input: start_len, int, (N, 2) 85 | //output: clusters, CCs 86 | int get_clusters(int *semantic_label, Int *ball_query_idxs, int *start_len, const Int nPoint, int threshold, ConnectedComponents &clusters){ 87 | int visited[nPoint] = {0}; 88 | 89 | int sumNPoint = 0; 90 | for(Int i = 0; i < nPoint; i++){ 91 | if(visited[i] == 0){ 92 | ConnectedComponent CC = find_cc(i, semantic_label, ball_query_idxs, start_len, visited); 93 | if((int)CC.pt_idxs.size() >= threshold){ 94 | clusters.push_back(CC); 95 | sumNPoint += (int)CC.pt_idxs.size(); 96 | } 97 | } 98 | } 99 | 100 | return sumNPoint; 101 | } 102 | 103 | void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, int *cluster_offsets){ 104 | for(int i = 0; i < (int)CCs.size(); i++){ 105 | cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size(); 106 | for(int j = 0; j < (int)CCs[i].pt_idxs.size(); j++){ 107 | int idx = CCs[i].pt_idxs[j]; 108 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i; 109 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx; 110 | } 111 | } 112 | } 113 | 114 | //input: semantic_label, int, N 115 | //input: ball_query_idxs, int, (nActive) 116 | //input: start_len, int, (N, 2) 117 | //output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N 118 | //output: cluster_offsets, int (nCluster + 1) 119 | void bfs_cluster(at::Tensor semantic_label_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, 120 | at::Tensor cluster_idxs_tensor, at::Tensor cluster_offsets_tensor, const int N, int threshold){ 121 | int *semantic_label = semantic_label_tensor.data(); 122 | Int *ball_query_idxs = ball_query_idxs_tensor.data(); 123 | int *start_len = start_len_tensor.data(); 124 | 125 | ConnectedComponents CCs; 126 | int sumNPoint = get_clusters(semantic_label, ball_query_idxs, start_len, N, threshold, CCs); 127 | 128 | int nCluster = (int)CCs.size(); 129 | cluster_idxs_tensor.resize_({sumNPoint, 2}); 130 | cluster_offsets_tensor.resize_({nCluster + 1}); 131 | cluster_idxs_tensor.zero_(); 132 | cluster_offsets_tensor.zero_(); 133 | 134 | int *cluster_idxs = cluster_idxs_tensor.data(); 135 | int *cluster_offsets = cluster_offsets_tensor.data(); 136 | 137 | fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets); 138 | } 139 | 140 | //------------------------------------API------------------------------------------ 141 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 142 | 143 | m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p"); 144 | m.def("bfs_cluster", &bfs_cluster, "bfs_cluster"); 145 | 146 | } 147 | -------------------------------------------------------------------------------- /lib/ext/pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: unknown(b, n, 3) known(b, m, 3) 13 | // output: dist2(b, n, 3), idx(b, n, 3) 14 | __global__ void three_nn_kernel(int b, int n, int m, 15 | const float *__restrict__ unknown, 16 | const float *__restrict__ known, 17 | float *__restrict__ dist2, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | unknown += batch_index * n * 3; 21 | known += batch_index * m * 3; 22 | dist2 += batch_index * n * 3; 23 | idx += batch_index * n * 3; 24 | 25 | int index = threadIdx.x; 26 | int stride = blockDim.x; 27 | for (int j = index; j < n; j += stride) { 28 | float ux = unknown[j * 3 + 0]; 29 | float uy = unknown[j * 3 + 1]; 30 | float uz = unknown[j * 3 + 2]; 31 | 32 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 33 | int besti1 = 0, besti2 = 0, besti3 = 0; 34 | for (int k = 0; k < m; ++k) { 35 | float x = known[k * 3 + 0]; 36 | float y = known[k * 3 + 1]; 37 | float z = known[k * 3 + 2]; 38 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 39 | if (d < best1) { 40 | best3 = best2; 41 | besti3 = besti2; 42 | best2 = best1; 43 | besti2 = besti1; 44 | best1 = d; 45 | besti1 = k; 46 | } else if (d < best2) { 47 | best3 = best2; 48 | besti3 = besti2; 49 | best2 = d; 50 | besti2 = k; 51 | } else if (d < best3) { 52 | best3 = d; 53 | besti3 = k; 54 | } 55 | } 56 | dist2[j * 3 + 0] = best1; 57 | dist2[j * 3 + 1] = best2; 58 | dist2[j * 3 + 2] = best3; 59 | 60 | idx[j * 3 + 0] = besti1; 61 | idx[j * 3 + 1] = besti2; 62 | idx[j * 3 + 2] = besti3; 63 | } 64 | } 65 | 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 67 | const float *known, float *dist2, int *idx) { 68 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 69 | three_nn_kernel<<>>(b, n, m, unknown, known, 70 | dist2, idx); 71 | 72 | CUDA_CHECK_ERRORS(); 73 | } 74 | 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 76 | // output: out(b, c, n) 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 78 | const float *__restrict__ points, 79 | const int *__restrict__ idx, 80 | const float *__restrict__ weight, 81 | float *__restrict__ out) { 82 | int batch_index = blockIdx.x; 83 | points += batch_index * m * c; 84 | 85 | idx += batch_index * n * 3; 86 | weight += batch_index * n * 3; 87 | 88 | out += batch_index * n * c; 89 | 90 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 91 | const int stride = blockDim.y * blockDim.x; 92 | for (int i = index; i < c * n; i += stride) { 93 | const int l = i / n; 94 | const int j = i % n; 95 | float w1 = weight[j * 3 + 0]; 96 | float w2 = weight[j * 3 + 1]; 97 | float w3 = weight[j * 3 + 2]; 98 | 99 | int i1 = idx[j * 3 + 0]; 100 | int i2 = idx[j * 3 + 1]; 101 | int i3 = idx[j * 3 + 2]; 102 | 103 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 104 | points[l * m + i3] * w3; 105 | } 106 | } 107 | 108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 109 | const float *points, const int *idx, 110 | const float *weight, float *out) { 111 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 112 | three_interpolate_kernel<<>>( 113 | b, c, m, n, points, idx, weight, out); 114 | 115 | CUDA_CHECK_ERRORS(); 116 | } 117 | 118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 119 | // output: grad_points(b, c, m) 120 | 121 | __global__ void three_interpolate_grad_kernel( 122 | int b, int c, int n, int m, const float *__restrict__ grad_out, 123 | const int *__restrict__ idx, const float *__restrict__ weight, 124 | float *__restrict__ grad_points) { 125 | int batch_index = blockIdx.x; 126 | grad_out += batch_index * n * c; 127 | idx += batch_index * n * 3; 128 | weight += batch_index * n * 3; 129 | grad_points += batch_index * m * c; 130 | 131 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 132 | const int stride = blockDim.y * blockDim.x; 133 | for (int i = index; i < c * n; i += stride) { 134 | const int l = i / n; 135 | const int j = i % n; 136 | float w1 = weight[j * 3 + 0]; 137 | float w2 = weight[j * 3 + 1]; 138 | float w3 = weight[j * 3 + 2]; 139 | 140 | int i1 = idx[j * 3 + 0]; 141 | int i2 = idx[j * 3 + 1]; 142 | int i3 = idx[j * 3 + 2]; 143 | 144 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 145 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 146 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 147 | } 148 | } 149 | 150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 151 | const float *grad_out, 152 | const int *idx, const float *weight, 153 | float *grad_points) { 154 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 155 | three_interpolate_grad_kernel<<>>( 156 | b, c, n, m, grad_out, idx, weight, grad_points); 157 | 158 | CUDA_CHECK_ERRORS(); 159 | } 160 | -------------------------------------------------------------------------------- /downstream/insseg/ddp_main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | import glob 6 | import os 7 | import sys 8 | import hydra 9 | import logging 10 | from omegaconf import OmegaConf 11 | import numpy as np 12 | import torch 13 | from pytorch_lightning.loggers.tensorboard import TensorBoardLogger 14 | from pytorch_lightning.loggers.wandb import WandbLogger 15 | from torch.serialization import default_restore_location 16 | import MinkowskiEngine as ME 17 | 18 | from datasets import load_dataset 19 | from datasets.dataset import initialize_data_loader 20 | from insseg_models import load_model 21 | 22 | from lib.utils import load_state_with_same_shape, count_parameters, randStr 23 | 24 | import pytorch_lightning as pl 25 | from pytorch_lightning.plugins import DDPPlugin, DataParallelPlugin 26 | from pytorch_lightning import Trainer, Callback 27 | from lib.pl_Trainer import SegmentationTrainerModule as TrainerModule 28 | 29 | @hydra.main(config_path='config', config_name='default.yaml') 30 | def main(config): 31 | # Load the configurations 32 | # if os.path.exists('config.yaml'): 33 | # logging.info('===> Loading exsiting config file') 34 | # config = OmegaConf.load('config.yaml') 35 | # logging.info('===> Loaded exsiting config file') 36 | logging.info('===> Configurations') 37 | logging.info(config) 38 | 39 | # Dataloader 40 | DatasetClass = load_dataset(config.data.dataset) 41 | logging.info('===> Initializing dataloader') 42 | train_data_loader = initialize_data_loader( 43 | DatasetClass, config, phase=config.train.train_phase, 44 | num_workers=config.data.num_workers, augment_data=True, 45 | shuffle=True, repeat=True, batch_size=config.data.batch_size, 46 | limit_numpoints=config.data.train_limit_numpoints) 47 | 48 | # Model initialization 49 | logging.info('===> Building model') 50 | num_in_channel = train_data_loader.dataset.NUM_IN_CHANNEL 51 | num_labels = train_data_loader.dataset.NUM_LABELS 52 | NetClass = load_model(config.net.model) 53 | model = NetClass(num_in_channel, num_labels, config) 54 | logging.info('===> Number of trainable parameters: {}: {}'.format(NetClass.__name__, count_parameters(model))) 55 | logging.info(model) 56 | 57 | # Load weights if specified by the parameter. 58 | if config.net.weights != '' and config.net.weights is not None: 59 | 60 | if not os.path.isfile(config.net.weights): 61 | print(f'Weight file {config.net.weights} does not exists') 62 | else: 63 | logging.info('===> Loading weights: ' + config.net.weights) 64 | state = torch.load(config.net.weights, map_location=lambda s, l: default_restore_location(s, 'cpu')) 65 | matched_weights = load_state_with_same_shape(model, state['state_dict']) 66 | model_dict = model.state_dict() 67 | model_dict.update(matched_weights) 68 | model.load_state_dict(model_dict) 69 | 70 | # Use max GPU number 71 | config.misc.num_gpus = min(config.misc.num_gpus, torch.cuda.device_count()) 72 | if config.misc.num_gpus > 1: 73 | model = ME.MinkowskiSyncBatchNorm.convert_sync_batchnorm(model) 74 | 75 | checkpoint_callbacks = [ 76 | pl.callbacks.ModelCheckpoint(dirpath=config.misc.log_dir, monitor="val_miou", mode='max', filename='checkpoint-{val_miou:.2f}-{step}', save_top_k=1, every_n_epochs=1), 77 | pl.callbacks.ModelCheckpoint(dirpath=config.misc.log_dir, monitor="val_map05", mode='max', filename='checkpoint-{val_map05:.2f}-{step}', save_top_k=1, every_n_epochs=1), 78 | ] 79 | 80 | # Setup Resuming 81 | version_num = None 82 | config.misc.wandb_id = randStr() 83 | if config.train.resume != '': 84 | # Remove trailing slash 85 | config.train.resume = config.train.resume[:-1] if config.train.resume[-1] == '/' else config.train.resume 86 | 87 | directories = glob.glob(config.train.resume + '/default/*') 88 | versions = [int(dir.split('_')[-1]) for dir in directories] 89 | list_of_ckpts = glob.glob(config.train.resume + '/*.ckpt') 90 | 91 | if len(list_of_ckpts) > 0: 92 | version_num = max(versions) if len(versions) > 0 else 0 93 | ckpt_steps = np.array([int(ckpt.split('=')[1].split('.')[0]) for ckpt in list_of_ckpts]) 94 | latest_ckpt = list_of_ckpts[np.argmax(ckpt_steps)] 95 | config.train.resume = latest_ckpt 96 | state_params = torch.load(config.train.resume)['hyper_parameters'] 97 | 98 | print('Resuming: ', config.train.resume) 99 | 100 | if 'wandb_id' in state_params and state_params['wandb_id'] != '': 101 | config.misc.wandb_id = state_params['wandb_id'] 102 | else: 103 | config.train.resume = None 104 | else: 105 | config.train.resume = None 106 | 107 | # Init Loggers 108 | run_name = config.net.model + '-' + config.data.dataset if config.train.is_train else config.net.model + "_test" 109 | tensorboard_logger = TensorBoardLogger(config.misc.log_dir, default_hp_metric=False, version=version_num) 110 | #wandb_logger = WandbLogger(project="3DInsseg", name=run_name, log_model=False, id=config.misc.wandb_id) 111 | loggers = [tensorboard_logger] # , wandb_logger 112 | 113 | # Init PL and start 114 | pl_module = TrainerModule(model, config, train_data_loader.dataset) 115 | trainer = Trainer(max_epochs=config.optimizer.max_iter // len(train_data_loader), logger=loggers, 116 | devices=config.misc.num_gpus, accelerator="gpu", strategy=DDPPlugin(find_unused_parameters=True), 117 | num_sanity_val_steps=0, accumulate_grad_batches=1, 118 | callbacks=[*checkpoint_callbacks]) 119 | if config.train.is_train: 120 | trainer.fit(pl_module, ckpt_path=config.train.resume) 121 | else: 122 | trainer.test(pl_module, ckpt_path=config.train.resume) 123 | 124 | 125 | if __name__ == '__main__': 126 | __spec__ = None 127 | main() 128 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | import MinkowskiEngine as ME 4 | 5 | from models.model import Model 6 | from models.modules.common import ConvType, NormType, get_norm, conv, sum_pool 7 | from models.modules.resnet_block import BasicBlock, Bottleneck 8 | 9 | 10 | class ResNetBase(Model): 11 | BLOCK = None 12 | LAYERS = () 13 | INIT_DIM = 64 14 | PLANES = (64, 128, 256, 512) 15 | OUT_PIXEL_DIST = 32 16 | HAS_LAST_BLOCK = False 17 | CONV_TYPE = ConvType.HYPERCUBE 18 | 19 | def __init__(self, in_channels, out_channels, config, D=3, **kwargs): 20 | assert self.BLOCK is not None 21 | assert self.OUT_PIXEL_DIST > 0 22 | 23 | super(ResNetBase, self).__init__(in_channels, out_channels, config, D, **kwargs) 24 | 25 | self.network_initialization(in_channels, out_channels, config, D) 26 | self.weight_initialization() 27 | 28 | def network_initialization(self, in_channels, out_channels, config, D): 29 | 30 | def space_n_time_m(n, m): 31 | return n if D == 3 else [n, n, n, m] 32 | 33 | if D == 4: 34 | self.OUT_PIXEL_DIST = space_n_time_m(self.OUT_PIXEL_DIST, 1) 35 | 36 | dilations = config.dilations 37 | bn_momentum = config.bn_momentum 38 | self.inplanes = self.INIT_DIM 39 | self.conv1 = conv( 40 | in_channels, 41 | self.inplanes, 42 | kernel_size=space_n_time_m(config.conv1_kernel_size, 1), 43 | stride=1, 44 | D=D) 45 | 46 | self.bn1 = get_norm(NormType.BATCH_NORM, self.inplanes, D=self.D, bn_momentum=bn_momentum) 47 | self.relu = ME.MinkowskiReLU(inplace=True) 48 | self.pool = sum_pool(kernel_size=space_n_time_m(2, 1), stride=space_n_time_m(2, 1), D=D) 49 | 50 | self.layer1 = self._make_layer( 51 | self.BLOCK, 52 | self.PLANES[0], 53 | self.LAYERS[0], 54 | stride=space_n_time_m(2, 1), 55 | dilation=space_n_time_m(dilations[0], 1)) 56 | self.layer2 = self._make_layer( 57 | self.BLOCK, 58 | self.PLANES[1], 59 | self.LAYERS[1], 60 | stride=space_n_time_m(2, 1), 61 | dilation=space_n_time_m(dilations[1], 1)) 62 | self.layer3 = self._make_layer( 63 | self.BLOCK, 64 | self.PLANES[2], 65 | self.LAYERS[2], 66 | stride=space_n_time_m(2, 1), 67 | dilation=space_n_time_m(dilations[2], 1)) 68 | self.layer4 = self._make_layer( 69 | self.BLOCK, 70 | self.PLANES[3], 71 | self.LAYERS[3], 72 | stride=space_n_time_m(2, 1), 73 | dilation=space_n_time_m(dilations[3], 1)) 74 | 75 | self.final = conv( 76 | self.PLANES[3] * self.BLOCK.expansion, out_channels, kernel_size=1, bias=True, D=D) 77 | 78 | def weight_initialization(self): 79 | for m in self.modules(): 80 | if isinstance(m, ME.MinkowskiBatchNorm): 81 | nn.init.constant_(m.bn.weight, 1) 82 | nn.init.constant_(m.bn.bias, 0) 83 | 84 | def _make_layer(self, 85 | block, 86 | planes, 87 | blocks, 88 | stride=1, 89 | dilation=1, 90 | norm_type=NormType.BATCH_NORM, 91 | bn_momentum=0.1): 92 | downsample = None 93 | if stride != 1 or self.inplanes != planes * block.expansion: 94 | downsample = nn.Sequential( 95 | conv( 96 | self.inplanes, 97 | planes * block.expansion, 98 | kernel_size=1, 99 | stride=stride, 100 | bias=False, 101 | D=self.D), 102 | get_norm(norm_type, planes * block.expansion, D=self.D, bn_momentum=bn_momentum), 103 | ) 104 | layers = [] 105 | layers.append( 106 | block( 107 | self.inplanes, 108 | planes, 109 | stride=stride, 110 | dilation=dilation, 111 | downsample=downsample, 112 | conv_type=self.CONV_TYPE, 113 | D=self.D)) 114 | self.inplanes = planes * block.expansion 115 | for i in range(1, blocks): 116 | layers.append( 117 | block( 118 | self.inplanes, 119 | planes, 120 | stride=1, 121 | dilation=dilation, 122 | conv_type=self.CONV_TYPE, 123 | D=self.D)) 124 | 125 | return nn.Sequential(*layers) 126 | 127 | def forward(self, x): 128 | x = self.conv1(x) 129 | x = self.bn1(x) 130 | x = self.relu(x) 131 | x = self.pool(x) 132 | 133 | x = self.layer1(x) 134 | x = self.layer2(x) 135 | x = self.layer3(x) 136 | x = self.layer4(x) 137 | 138 | x = self.final(x) 139 | return x 140 | 141 | 142 | class ResNet14(ResNetBase): 143 | BLOCK = BasicBlock 144 | LAYERS = (1, 1, 1, 1) 145 | 146 | 147 | class ResNet18(ResNetBase): 148 | BLOCK = BasicBlock 149 | LAYERS = (2, 2, 2, 2) 150 | 151 | 152 | class ResNet34(ResNetBase): 153 | BLOCK = BasicBlock 154 | LAYERS = (3, 4, 6, 3) 155 | 156 | 157 | class ResNet50(ResNetBase): 158 | BLOCK = Bottleneck 159 | LAYERS = (3, 4, 6, 3) 160 | 161 | 162 | class ResNet101(ResNetBase): 163 | BLOCK = Bottleneck 164 | LAYERS = (3, 4, 23, 3) 165 | 166 | 167 | class STResNetBase(ResNetBase): 168 | 169 | CONV_TYPE = ConvType.SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS 170 | 171 | def __init__(self, in_channels, out_channels, config, D=4, **kwargs): 172 | super(STResNetBase, self).__init__(in_channels, out_channels, config, D, **kwargs) 173 | 174 | 175 | class STResNet14(STResNetBase, ResNet14): 176 | pass 177 | 178 | 179 | class STResNet18(STResNetBase, ResNet18): 180 | pass 181 | 182 | 183 | class STResNet34(STResNetBase, ResNet34): 184 | pass 185 | 186 | 187 | class STResNet50(STResNetBase, ResNet50): 188 | pass 189 | 190 | 191 | class STResNet101(STResNetBase, ResNet101): 192 | pass 193 | 194 | 195 | class STResTesseractNetBase(STResNetBase): 196 | CONV_TYPE = ConvType.HYPERCUBE 197 | 198 | 199 | class STResTesseractNet14(STResTesseractNetBase, STResNet14): 200 | pass 201 | 202 | 203 | class STResTesseractNet18(STResTesseractNetBase, STResNet18): 204 | pass 205 | 206 | 207 | class STResTesseractNet34(STResTesseractNetBase, STResNet34): 208 | pass 209 | 210 | 211 | class STResTesseractNet50(STResTesseractNetBase, STResNet50): 212 | pass 213 | 214 | 215 | class STResTesseractNet101(STResTesseractNetBase, STResNet101): 216 | pass 217 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/scripts/util_3d.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import json 3 | 4 | try: 5 | import numpy as np 6 | except: 7 | print("Failed to import numpy package.") 8 | sys.exit(-1) 9 | 10 | try: 11 | from plyfile import PlyData, PlyElement 12 | except: 13 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 14 | print("pip install plyfile") 15 | sys.exit(-1) 16 | 17 | import util 18 | 19 | 20 | # matrix: 4x4 np array 21 | # points Nx3 np array 22 | def transform_points(matrix, points): 23 | assert len(points.shape) == 2 and points.shape[1] == 3 24 | num_points = points.shape[0] 25 | p = np.concatenate([points, np.ones((num_points, 1))], axis=1) 26 | p = np.matmul(matrix, np.transpose(p)) 27 | p = np.transpose(p) 28 | p[:,:3] /= p[:,3,None] 29 | return p[:,:3] 30 | 31 | 32 | def export_ids(filename, ids): 33 | with open(filename, 'w') as f: 34 | for id in ids: 35 | f.write('%d\n' % id) 36 | 37 | 38 | def load_ids(filename): 39 | ids = open(filename).read().splitlines() 40 | ids = np.array(ids, dtype=np.int64) 41 | return ids 42 | 43 | 44 | def read_mesh_vertices(filename): 45 | assert os.path.isfile(filename) 46 | with open(filename, 'rb') as f: 47 | plydata = PlyData.read(f) 48 | num_verts = plydata['vertex'].count 49 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 50 | vertices[:,0] = plydata['vertex'].data['x'] 51 | vertices[:,1] = plydata['vertex'].data['y'] 52 | vertices[:,2] = plydata['vertex'].data['z'] 53 | return vertices 54 | 55 | 56 | # export 3d instance labels for instance evaluation 57 | def export_instance_ids_for_eval(filename, label_ids, instance_ids): 58 | assert label_ids.shape[0] == instance_ids.shape[0] 59 | output_mask_path_relative = 'predicted_masks' 60 | name = os.path.splitext(os.path.basename(filename))[0] 61 | output_mask_path = os.path.join(os.path.dirname(filename), output_mask_path_relative) 62 | if not os.path.isdir(output_mask_path): 63 | os.mkdir(output_mask_path) 64 | insts = np.unique(instance_ids) 65 | zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32) 66 | with open(filename, 'w') as f: 67 | for idx, inst_id in enumerate(insts): 68 | if inst_id == 0: # 0 -> no instance for this vertex 69 | continue 70 | 71 | loc = np.where(instance_ids == inst_id) 72 | label_id = label_ids[loc[0][0]] 73 | 74 | # write mask indexing 75 | output_mask_file_relavtive = os.path.join(output_mask_path_relative, name + '_' + str(idx) + '.txt') 76 | f.write('%s %d %f\n' % (output_mask_file_relavtive, label_id, 1.0)) 77 | 78 | # write mask 79 | mask = np.copy(zero_mask) 80 | mask[loc[0]] = 1 81 | output_mask_file = os.path.join(output_mask_path, name + '_' + str(idx) + '.txt') 82 | export_ids(output_mask_file, mask) 83 | 84 | 85 | 86 | # ------------ Instance Utils ------------ # 87 | 88 | class Instance(object): 89 | instance_id = 0 90 | label_id = 0 91 | vert_count = 0 92 | med_dist = -1 93 | dist_conf = 0.0 94 | 95 | def __init__(self, mesh_vert_instances, instance_id): 96 | if (instance_id == -1): 97 | return 98 | self.instance_id = int(instance_id) 99 | self.label_id = int(self.get_label_id(instance_id)) 100 | self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id)) 101 | 102 | def get_label_id(self, instance_id): 103 | return int(instance_id // 1000) 104 | 105 | def get_instance_verts(self, mesh_vert_instances, instance_id): 106 | return (mesh_vert_instances == instance_id).sum() 107 | 108 | def to_json(self): 109 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 110 | 111 | def to_dict(self): 112 | dict = {} 113 | dict["instance_id"] = self.instance_id 114 | dict["label_id"] = self.label_id 115 | dict["vert_count"] = self.vert_count 116 | dict["med_dist"] = self.med_dist 117 | dict["dist_conf"] = self.dist_conf 118 | return dict 119 | 120 | def from_json(self, data): 121 | self.instance_id = int(data["instance_id"]) 122 | self.label_id = int(data["label_id"]) 123 | self.vert_count = int(data["vert_count"]) 124 | if ("med_dist" in data): 125 | self.med_dist = float(data["med_dist"]) 126 | self.dist_conf = float(data["dist_conf"]) 127 | 128 | def __str__(self): 129 | return "("+str(self.instance_id)+")" 130 | 131 | def read_instance_prediction_file(filename, pred_path): 132 | lines = open(filename).read().splitlines() 133 | instance_info = {} 134 | abs_pred_path = os.path.abspath(pred_path) 135 | for line in lines: 136 | parts = line.split(' ') 137 | if len(parts) != 3: 138 | util.print_error('invalid instance prediction file. Expected (per line): [rel path prediction] [label id prediction] [confidence prediction]') 139 | if os.path.isabs(parts[0]): 140 | util.print_error('invalid instance prediction file. First entry in line must be a relative path') 141 | mask_file = os.path.join(os.path.dirname(filename), parts[0]) 142 | mask_file = os.path.abspath(mask_file) 143 | # check that mask_file lives inside prediction path 144 | if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path: 145 | util.print_error('predicted mask {} in prediction text file {} points outside of prediction path.'.format(mask_file, filename)) 146 | 147 | info = {} 148 | info["label_id"] = int(float(parts[1])) 149 | info["conf"] = float(parts[2]) 150 | instance_info[mask_file] = info 151 | return instance_info 152 | 153 | 154 | def get_instances(ids, class_ids, class_labels, id2label): 155 | instances = {} 156 | for label in class_labels: 157 | instances[label] = [] 158 | instance_ids = np.unique(ids) 159 | for id in instance_ids: 160 | if id == 0: 161 | continue 162 | inst = Instance(ids, id) 163 | if inst.label_id in class_ids: 164 | instances[id2label[inst.label_id]].append(inst.to_dict()) 165 | return instances 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/util_3d.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import json 3 | 4 | try: 5 | import numpy as np 6 | except: 7 | print("Failed to import numpy package.") 8 | sys.exit(-1) 9 | 10 | try: 11 | from plyfile import PlyData, PlyElement 12 | except: 13 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 14 | print("pip install plyfile") 15 | sys.exit(-1) 16 | 17 | from . import util 18 | 19 | 20 | # matrix: 4x4 np array 21 | # points Nx3 np array 22 | def transform_points(matrix, points): 23 | assert len(points.shape) == 2 and points.shape[1] == 3 24 | num_points = points.shape[0] 25 | p = np.concatenate([points, np.ones((num_points, 1))], axis=1) 26 | p = np.matmul(matrix, np.transpose(p)) 27 | p = np.transpose(p) 28 | p[:,:3] /= p[:,3,None] 29 | return p[:,:3] 30 | 31 | 32 | def export_ids(filename, ids): 33 | with open(filename, 'w') as f: 34 | for id in ids: 35 | f.write('%d\n' % id) 36 | 37 | 38 | def load_ids(filename): 39 | ids = open(filename).read().splitlines() 40 | ids = np.array(ids, dtype=np.int64) 41 | return ids 42 | 43 | 44 | def read_mesh_vertices(filename): 45 | assert os.path.isfile(filename) 46 | with open(filename, 'rb') as f: 47 | plydata = PlyData.read(f) 48 | num_verts = plydata['vertex'].count 49 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 50 | vertices[:,0] = plydata['vertex'].data['x'] 51 | vertices[:,1] = plydata['vertex'].data['y'] 52 | vertices[:,2] = plydata['vertex'].data['z'] 53 | return vertices 54 | 55 | 56 | # export 3d instance labels for instance evaluation 57 | def export_instance_ids_for_eval(filename, label_ids, instance_ids): 58 | assert label_ids.shape[0] == instance_ids.shape[0] 59 | output_mask_path_relative = 'predicted_masks' 60 | name = os.path.splitext(os.path.basename(filename))[0] 61 | output_mask_path = os.path.join(os.path.dirname(filename), output_mask_path_relative) 62 | if not os.path.isdir(output_mask_path): 63 | os.mkdir(output_mask_path) 64 | insts = np.unique(instance_ids) 65 | zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32) 66 | with open(filename, 'w') as f: 67 | for idx, inst_id in enumerate(insts): 68 | if inst_id == 0: # 0 -> no instance for this vertex 69 | continue 70 | 71 | loc = np.where(instance_ids == inst_id) 72 | label_id = label_ids[loc[0][0]] 73 | 74 | # write mask indexing 75 | output_mask_file_relavtive = os.path.join(output_mask_path_relative, name + '_' + str(idx) + '.txt') 76 | f.write('%s %d %f\n' % (output_mask_file_relavtive, label_id, 1.0)) 77 | 78 | # write mask 79 | mask = np.copy(zero_mask) 80 | mask[loc[0]] = 1 81 | output_mask_file = os.path.join(output_mask_path, name + '_' + str(idx) + '.txt') 82 | export_ids(output_mask_file, mask) 83 | 84 | 85 | 86 | # ------------ Instance Utils ------------ # 87 | 88 | class Instance(object): 89 | instance_id = 0 90 | label_id = 0 91 | vert_count = 0 92 | med_dist = -1 93 | dist_conf = 0.0 94 | 95 | def __init__(self, mesh_vert_instances, instance_id): 96 | if (instance_id == -1): 97 | return 98 | self.instance_id = int(instance_id) 99 | self.label_id = int(self.get_label_id(instance_id)) 100 | self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id)) 101 | 102 | def get_label_id(self, instance_id): 103 | return int(instance_id // 1000) 104 | 105 | def get_instance_verts(self, mesh_vert_instances, instance_id): 106 | return (mesh_vert_instances == instance_id).sum() 107 | 108 | def to_json(self): 109 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 110 | 111 | def to_dict(self): 112 | dict = {} 113 | dict["instance_id"] = self.instance_id 114 | dict["label_id"] = self.label_id 115 | dict["vert_count"] = self.vert_count 116 | dict["med_dist"] = self.med_dist 117 | dict["dist_conf"] = self.dist_conf 118 | return dict 119 | 120 | def from_json(self, data): 121 | self.instance_id = int(data["instance_id"]) 122 | self.label_id = int(data["label_id"]) 123 | self.vert_count = int(data["vert_count"]) 124 | if ("med_dist" in data): 125 | self.med_dist = float(data["med_dist"]) 126 | self.dist_conf = float(data["dist_conf"]) 127 | 128 | def __str__(self): 129 | return "("+str(self.instance_id)+")" 130 | 131 | def read_instance_prediction_file(filename, pred_path): 132 | lines = open(filename).read().splitlines() 133 | instance_info = {} 134 | abs_pred_path = os.path.abspath(pred_path) 135 | for line in lines: 136 | parts = line.split(' ') 137 | if len(parts) != 3: 138 | util.print_error('invalid instance prediction file. Expected (per line): [rel path prediction] [label id prediction] [confidence prediction]') 139 | if os.path.isabs(parts[0]): 140 | util.print_error('invalid instance prediction file. First entry in line must be a relative path') 141 | mask_file = os.path.join(os.path.dirname(filename), parts[0]) 142 | mask_file = os.path.abspath(mask_file) 143 | # check that mask_file lives inside prediction path 144 | if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path: 145 | util.print_error('predicted mask {} in prediction text file {} points outside of prediction path.'.format(mask_file, filename)) 146 | 147 | info = {} 148 | info["label_id"] = int(float(parts[1])) 149 | info["conf"] = float(parts[2]) 150 | instance_info[mask_file] = info 151 | return instance_info 152 | 153 | 154 | def get_instances(ids, class_ids, class_labels, id2label): 155 | instances = {} 156 | for label in class_labels: 157 | instances[label] = [] 158 | instance_ids = np.unique(ids) 159 | for id in instance_ids: 160 | if id == 0: 161 | continue 162 | inst = Instance(ids, id) 163 | if inst.label_id in class_ids: 164 | instances[id2label[inst.label_id]].append(inst.to_dict()) 165 | return instances 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /downstream/insseg/lib/bfs/bfs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch.autograd import Function 5 | import argparse 6 | #from lib.datasets.scannet.datagen.export_ids_per_vertex import read_segmentation, write_triangle_mesh 7 | #from lib.utils.io import read_triangle_mesh, create_color_palette, write_triangle_mesh 8 | #from lib.utils.scannet_benchmark_utils import util_3d 9 | import PG_OP 10 | 11 | 12 | class BallQueryBatchP(Function): 13 | @staticmethod 14 | def forward(ctx, coords, batch_idxs, batch_offsets, radius, meanActive): 15 | ''' 16 | :param ctx: 17 | :param coords: (n, 3) float 18 | :param batch_idxs: (n) int 19 | :param batch_offsets: (B+1) int 20 | :param radius: float 21 | :param meanActive: int 22 | :return: idx (nActive), int 23 | :return: start_len (n, 2), int 24 | ''' 25 | 26 | n = coords.size(0) 27 | 28 | assert coords.is_contiguous() and coords.is_cuda 29 | assert batch_idxs.is_contiguous() and batch_idxs.is_cuda 30 | assert batch_offsets.is_contiguous() and batch_offsets.is_cuda 31 | 32 | while True: 33 | idx = torch.cuda.IntTensor(n * meanActive).zero_() 34 | start_len = torch.cuda.IntTensor(n, 2).zero_() 35 | nActive = PG_OP.ballquery_batch_p(coords, batch_idxs, batch_offsets, idx, start_len, n, meanActive, radius) 36 | if nActive <= n * meanActive: 37 | break 38 | meanActive = int(nActive // n + 1) 39 | idx = idx[:nActive] 40 | 41 | return idx, start_len 42 | 43 | @staticmethod 44 | def backward(ctx, a=None, b=None): 45 | return None, None, None 46 | 47 | ballquery_batch_p = BallQueryBatchP.apply 48 | 49 | 50 | class BFSCluster(Function): 51 | @staticmethod 52 | def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold): 53 | ''' 54 | :param ctx: 55 | :param semantic_label: (N), int 56 | :param ball_query_idxs: (nActive), int 57 | :param start_len: (N, 2), int 58 | :return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N 59 | :return: cluster_offsets: int (nCluster + 1) 60 | ''' 61 | 62 | N = start_len.size(0) 63 | 64 | assert semantic_label.is_contiguous() 65 | assert ball_query_idxs.is_contiguous() 66 | assert start_len.is_contiguous() 67 | 68 | cluster_idxs = semantic_label.new() 69 | cluster_offsets = semantic_label.new() 70 | 71 | PG_OP.bfs_cluster(semantic_label, ball_query_idxs, start_len, cluster_idxs, cluster_offsets, N, threshold) 72 | 73 | return cluster_idxs, cluster_offsets 74 | 75 | @staticmethod 76 | def backward(ctx, a=None): 77 | return None 78 | 79 | bfs_cluster = BFSCluster.apply 80 | 81 | 82 | 83 | class Clustering: 84 | def __init__(self, ignored_labels, class_mapping, thresh=0.03, 85 | closed_points=300, 86 | min_points=50, 87 | propose_points=100, 88 | score_func=torch.max) -> None: 89 | self.ignored_labels = ignored_labels 90 | self.thresh = thresh 91 | self.closed_points = closed_points 92 | self.min_points = min_points 93 | self.class_mapping = class_mapping.cuda() 94 | self.propose_points = propose_points 95 | self.score_func=score_func 96 | 97 | def cluster(self, vertices, scores): 98 | labels = torch.max(scores, 1)[1] # (N) long, cuda 99 | proposals_idx, proposals_offset = self.cluster_(vertices, labels.cuda()) 100 | 101 | ## debug 102 | #import ipdb; ipdb.set_trace() 103 | #colors = np.array(create_color_palette())[labels.cpu()] 104 | #write_triangle_mesh(vertices, colors, None, 'semantics.ply') 105 | 106 | # scatter 107 | proposals_pred = torch.zeros((proposals_offset.shape[0] - 1, vertices.shape[0]), dtype=torch.int) # (nProposal, N), int, cuda 108 | proposals_pred[proposals_idx[:, 0].long(), proposals_idx[:, 1].long()] = 1 109 | labels = labels[proposals_idx[:, 1][proposals_offset[:-1].long()].long()] 110 | 111 | proposals_pointnum = proposals_pred.sum(1) 112 | npoint_mask = (proposals_pointnum > self.propose_points) 113 | 114 | proposals_pred = proposals_pred[npoint_mask] 115 | labels = labels[npoint_mask] 116 | return proposals_pred, labels 117 | 118 | def cluster_(self, vertices, labels): 119 | ''' 120 | :param batch_idxs: (N), int, cuda 121 | :labels: 0-19 122 | ''' 123 | batch_idxs = torch.zeros_like(labels) 124 | 125 | 126 | mask_non_ignored = torch.ones_like(labels).bool() 127 | for ignored_label in self.ignored_labels: 128 | mask_non_ignored = mask_non_ignored & (self.class_mapping[labels] != ignored_label) 129 | object_idxs = mask_non_ignored.nonzero().view(-1) 130 | 131 | vertices_ = torch.from_numpy(vertices)[object_idxs].float().cuda() 132 | labels_ = labels[object_idxs].int().cuda() 133 | 134 | if vertices_.numel() == 0: 135 | return torch.zeros((0,2)).int(), torch.zeros(1).int() 136 | 137 | batch_idxs_ = batch_idxs[object_idxs].int().cuda() 138 | batch_offsets_ = torch.FloatTensor([0, object_idxs.shape[0]]).int().cuda() 139 | 140 | idx, start_len = ballquery_batch_p(vertices_, batch_idxs_, batch_offsets_, self.thresh, self.closed_points) 141 | proposals_idx, proposals_offset = bfs_cluster(labels_.cpu(), idx.cpu(), start_len.cpu(), self.min_points) 142 | proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int() 143 | 144 | return proposals_idx, proposals_offset 145 | 146 | def get_instances(self, vertices, scores): 147 | proposals_pred, labels = self.cluster(vertices, scores) 148 | instances = {} 149 | for proposal_id in range(len(proposals_pred)): 150 | clusters_i = proposals_pred[proposal_id] 151 | score = scores[clusters_i.bool(), labels[proposal_id]] 152 | score = self.score_func(score) 153 | instances[proposal_id] = {} 154 | instances[proposal_id]['conf'] = score.cpu().numpy() 155 | instances[proposal_id]['label_id'] = self.class_mapping.cpu()[labels[proposal_id]] 156 | instances[proposal_id]['pred_mask'] = clusters_i.cpu().numpy() 157 | return instances 158 | -------------------------------------------------------------------------------- /models/conditional_random_fields.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | from MinkowskiEngine import SparseTensor, MinkowskiConvolution, MinkowskiConvolutionFunction, convert_to_int_tensor 6 | from MinkowskiEngine import convert_region_type as me_convert_region_type 7 | 8 | from models.model import HighDimensionalModel 9 | from models.wrapper import Wrapper 10 | from lib.math_functions import SparseMM 11 | from models.modules.common import convert_region_type 12 | 13 | 14 | class MeanField(HighDimensionalModel): 15 | """ 16 | Abstract class for the bilateral and trilateral meanfield 17 | """ 18 | OUT_PIXEL_DIST = 1 19 | 20 | # To use the model, must call initialize_coords before forward pass. 21 | # Once data is processed, call clear to reset the model before calling 22 | # initialize_coords 23 | def __init__(self, nchannels, spatial_sigma, chromatic_sigma, meanfield_iterations, is_temporal, 24 | config, **kwargs): 25 | D = 7 if is_temporal else 6 26 | self.is_temporal = is_temporal 27 | # Setup metadata 28 | super(MeanField, self).__init__(nchannels, nchannels, config, D=D) 29 | 30 | self.spatial_sigma = spatial_sigma 31 | self.chromatic_sigma = chromatic_sigma 32 | # temporal sigma is 1 33 | self.meanfield_iterations = meanfield_iterations 34 | 35 | self.pixel_dist = 1 36 | self.stride = 1 37 | self.dilation = 1 38 | 39 | conv = MinkowskiConvolution( 40 | nchannels, 41 | nchannels, 42 | kernel_size=config.wrapper_kernel_size, 43 | bias=False, 44 | region_type=convert_region_type(config.wrapper_region_type), 45 | dimension=D) 46 | 47 | # Create a region_offset 48 | self.region_type_, self.region_offset_, _ = me_convert_region_type( 49 | conv.region_type, 1, conv.kernel_size, conv.up_stride, conv.dilation, conv.region_offset, 50 | conv.axis_types, conv.dimension) 51 | 52 | # Check whether the mapping is required 53 | self.requires_mapping = False 54 | self.conv = conv 55 | self.kernel = conv.kernel 56 | self.convs = {} 57 | self.softmaxes = {} 58 | for i in range(self.meanfield_iterations): 59 | self.softmaxes[i] = nn.Softmax(dim=1) 60 | self.convs[i] = MinkowskiConvolutionFunction() 61 | 62 | def initialize_coords(self, model, in_coords, in_color): 63 | if torch.prod(convert_to_int_tensor(model.OUT_PIXEL_DIST, model.D)) != 1: 64 | self.requires_mapping = True 65 | 66 | out_coords = model.get_coords(model.OUT_PIXEL_DIST) 67 | out_color = model.permute_feature(in_color, model.OUT_PIXEL_DIST).int() 68 | 69 | # Tri/Bi-lateral grid 70 | out_tri_coords = torch.cat( 71 | [ 72 | (torch.floor(out_coords[:, :3].float() / self.spatial_sigma)).int(), 73 | (torch.floor(out_color.float() / self.chromatic_sigma)).int(), 74 | out_coords[:, 3:] # (time and) batch 75 | ], 76 | dim=1) 77 | orig_tri_coords = torch.cat( 78 | [ 79 | (torch.floor(in_coords[:, :3].float() / self.spatial_sigma)).int(), 80 | (torch.floor(in_color.float() / self.chromatic_sigma)).int(), 81 | in_coords[:, 3:] # (time and) batch 82 | ], 83 | dim=1) 84 | 85 | crf_tri_coords = torch.cat((out_tri_coords, orig_tri_coords), dim=0) 86 | 87 | # Create a trilateral Grid 88 | # super(MeanField, self).initialize_coords_with_duplicates(crf_tri_coords) 89 | 90 | # Create Sparse matrix mappings to/from the CRF coords 91 | in_cols = self.get_index_map(out_tri_coords, 1) 92 | self.in_mapping = torch.sparse.FloatTensor( 93 | torch.stack((in_cols.long(), torch.arange(in_cols.size(0), out=torch.LongTensor()))), 94 | torch.ones(in_cols.size(0)), torch.Size((self.n_rows, in_cols.size(0)))) 95 | 96 | out_cols = self.get_index_map(orig_tri_coords, 1) 97 | self.out_mapping = torch.sparse.FloatTensor( 98 | torch.stack((torch.arange(out_cols.size(0), out=torch.LongTensor()), out_cols.long())), 99 | torch.ones(out_cols.size(0)), torch.Size((out_cols.size(0), self.n_rows))) 100 | 101 | if self.config.is_cuda: 102 | self.in_mapping, self.out_mapping = self.in_mapping.cuda(), self.out_mapping.cuda() 103 | 104 | else: 105 | self.requires_mapping = False 106 | 107 | out_coords = in_coords 108 | out_color = in_color 109 | crf_tri_coords = torch.cat( 110 | [ 111 | (torch.floor(in_coords[:, :3].float() / self.spatial_sigma)).int(), 112 | (torch.floor(in_color.float() / self.chromatic_sigma)).int(), 113 | in_coords[:, 3:], # (time and) batch 114 | ], 115 | dim=1) 116 | 117 | return crf_tri_coords 118 | 119 | def forward(self, x): 120 | xf = x.F 121 | if self.requires_mapping: 122 | # Map the network output to CRF input 123 | xf = SparseMM()(Variable(self.in_mapping), xf) 124 | 125 | out = xf 126 | for i in range(self.meanfield_iterations): # Meanfield iteration 127 | # Normalization 128 | out = self.softmaxes[i](out) 129 | # Pairwise potential 130 | out = self.convs[i].apply(out, self.conv.kernel, x.pixel_dist, self.conv.stride, 131 | self.conv.kernel_size, self.conv.dilation, self.region_type_, 132 | self.region_offset_, x.coordinate_map_key, x.coords_key, x.coordinate_manager) 133 | # Add unary 134 | out += xf 135 | 136 | if self.requires_mapping: 137 | # Map the CRF output to the origianl space 138 | out = SparseMM()(Variable(self.out_mapping), out) 139 | 140 | return SparseTensor(out, coordinate_map_key=x.coordinate_map_key, coordinate_manager=x.coords_man) 141 | 142 | 143 | class BilateralCRF(Wrapper): 144 | OUT_PIXEL_DIST = 1 145 | 146 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 147 | self.model = NetClass(in_nchannel, out_nchannel, config) 148 | self.filter = MeanField( 149 | out_nchannel, 150 | spatial_sigma=config.crf_spatial_sigma, 151 | chromatic_sigma=config.crf_chromatic_sigma, 152 | meanfield_iterations=config.meanfield_iterations, 153 | is_temporal=False, 154 | config=config) 155 | 156 | 157 | class TrilateralCRF(Wrapper): 158 | OUT_PIXEL_DIST = 1 159 | 160 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 161 | self.model = NetClass(in_nchannel, out_nchannel, config) 162 | self.filter = MeanField( 163 | out_nchannel, 164 | spatial_sigma=config.crf_spatial_sigma, 165 | chromatic_sigma=config.crf_chromatic_sigma, 166 | meanfield_iterations=config.meanfield_iterations, 167 | is_temporal=True, 168 | config=config) 169 | -------------------------------------------------------------------------------- /lib/datasets/preprocessing/scannet_long.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') 3 | 4 | from pathlib import Path 5 | import torch 6 | from concurrent.futures import ProcessPoolExecutor 7 | 8 | from lib.constants.scannet_constants import * 9 | from utils import * 10 | from lib.ext.pointnet2.pointnet2_utils import furthest_point_sample 11 | 12 | # Modify path to point where ScanNet data lives 13 | SCANNET_RAW_PATH = Path('/mnt/data/ScanNet') 14 | SCANNET_OUT_PATH = Path('/mnt/data/Datasets/limited_annotation/scannet_200') 15 | COMBINED_LABEL_NAMES_FILE = 'scannetv2-labels.combined.tsv' 16 | 17 | 18 | in_path = 'scans' 19 | POINTCLOUD_FILE = '_vh_clean_2.ply' 20 | 21 | TRAIN_DEST = 'train' 22 | TEST_DEST = 'test' 23 | SUBSETS = {TRAIN_DEST: 'scans', TEST_DEST: 'scans_test'} 24 | 25 | CLASS_IDs = VALID_CLASS_IDS_LONG 26 | num_threads = 8 27 | 28 | # Limited annotations 29 | min_points_in_instance = 5 30 | ratio_of_annotated = -1 31 | 32 | # Save instances independently 33 | save_instances = False 34 | 35 | # Load categories 36 | print('Load Label map') 37 | labels_pd = pd.read_csv(str(SCANNET_RAW_PATH) + '/' + COMBINED_LABEL_NAMES_FILE, sep='\t', header=0) 38 | labels_pd.loc[labels_pd.raw_category == 'stick', ['category']] = 'object' 39 | labels_pd.loc[labels_pd.category == 'wardrobe ', ['category']] = 'wardrobe' 40 | category_label_names = labels_pd['category'].unique() 41 | 42 | # Preprocess data. 43 | print('Start Preprocess') 44 | def handle_process(path): 45 | 46 | cloud_file = Path(path.split(',')[0]) 47 | segments_file = cloud_file.parent / (cloud_file.stem + '.0.010000.segs.json') 48 | aggregations_file = cloud_file.parent / (cloud_file.stem[:-len('_vh_clean_2')] + '.aggregation.json') 49 | info_file = cloud_file.parent / (cloud_file.stem[:-len('_vh_clean_2')] + '.txt') 50 | phase_out_path = Path(path.split(',')[1]) 51 | 52 | scene_id = cloud_file.stem[:-(len(POINTCLOUD_FILE) - len(cloud_file.suffix))] 53 | print('Processing: ', scene_id, 'in', phase_out_path.name) 54 | 55 | info_dict = {} 56 | with open(info_file) as f: 57 | for line in f: 58 | (key, val) = line.split(" = ") 59 | info_dict[key] = np.fromstring(val, sep=' ') 60 | 61 | if 'axisAlignment' not in info_dict: 62 | rot_matrix = np.identity(4) 63 | else: 64 | rot_matrix = info_dict['axisAlignment'].reshape(4, 4) 65 | 66 | mesh = o3d.io.read_triangle_mesh(str(cloud_file)) 67 | points = np.array(mesh.vertices) 68 | colors = np.round(np.array(mesh.vertex_colors) * 255.) 69 | alphas = (np.ones(points.shape[0]) * 255).reshape(-1, 1) 70 | pointcloud = np.hstack((points, colors, alphas)) 71 | faces_array = np.array(mesh.triangles) 72 | 73 | # Rotate PC to axis aligned 74 | r_points = pointcloud[:, :3].transpose() 75 | r_points = np.append(r_points, np.ones((1, r_points.shape[1])), axis=0) 76 | r_points = np.dot(rot_matrix, r_points) 77 | pointcloud = np.append(r_points.transpose()[:, :3], pointcloud[:, 3:], axis=1) 78 | 79 | # Load segments file 80 | with open(segments_file) as f: 81 | segments = json.load(f) 82 | seg_indices = np.array(segments['segIndices']) 83 | 84 | # Load Aggregations file 85 | with open(aggregations_file) as f: 86 | aggregation = json.load(f) 87 | seg_groups = np.array(aggregation['segGroups']) 88 | 89 | # Make sure alpha value is meaningless. 90 | assert np.unique(pointcloud[:, -1]).size == 1 91 | 92 | # Generate new labels 93 | labelled_pc = np.zeros((pointcloud.shape[0], 1)) 94 | instance_ids = np.zeros((pointcloud.shape[0], 1)) 95 | for group in seg_groups: 96 | segment_points, p_inds, label_id = point_indices_from_group(pointcloud, seg_indices, group, labels_pd, CLASS_IDs) 97 | 98 | # Apply limited annotation if necessary 99 | if ratio_of_annotated != -1 and scene_id in TRAIN_SCENES: 100 | coords = pointcloud[p_inds, :3] 101 | t_coords = torch.Tensor(coords).cuda().unsqueeze(0) 102 | points_to_sample = max(min_points_in_instance, round(ratio_of_annotated * p_inds.shape[0])) 103 | sampled_inds = furthest_point_sample(t_coords, points_to_sample).squeeze(0).long().cpu().numpy() 104 | p_inds = np.vectorize(lambda p: p_inds[p])(sampled_inds) 105 | else: 106 | sampled_inds = None 107 | 108 | labelled_pc[p_inds] = label_id 109 | instance_ids[p_inds] = group['id'] 110 | 111 | cat_name = labels_pd[labels_pd['id'] == label_id]['category'].iloc[0] if label_id > 0 else 'invalid' 112 | if save_instances and cat_name in TAIL_CATS_SCANNET_200: 113 | # Save segment points as instance 114 | # calculate segment faces instances 115 | # uncomment if saving meshes instead of point clouds 116 | #face_node_mapper = lambda n: np.where(p_inds == n)[0][0] 117 | #inst_face_mask = np.all(np.isin(faces_array, p_inds), axis=1) 118 | #inst_faces = faces_array[inst_face_mask] 119 | #inst_faces = np.vectorize(face_node_mapper)(inst_faces) 120 | 121 | save_instance(segment_points, label_id, cat_name, scene_id, str(SCANNET_OUT_PATH), limited_annotation_points=sampled_inds) # , segment_faces=inst_faces 122 | 123 | labelled_pc = labelled_pc.astype(int) 124 | instance_ids = instance_ids.astype(int) 125 | 126 | # Concatenate with original cloud 127 | processed = np.hstack((pointcloud[:, :6], labelled_pc, instance_ids)) 128 | 129 | if (np.any(np.isnan(processed)) or not np.all(np.isfinite(processed))): 130 | raise ValueError('nan') 131 | 132 | # Save processed cloud 133 | out_file = phase_out_path / (cloud_file.name[:-len(POINTCLOUD_FILE)] + cloud_file.suffix) 134 | save_point_cloud(processed, out_file, with_label=True, verbose=False) 135 | 136 | 137 | train_scenes_file = open(SCANNET_RAW_PATH / "scans.txt", 'r') 138 | train_scenes = train_scenes_file.readlines() 139 | 140 | # Strips the newline character 141 | for i, line in enumerate(train_scenes): 142 | train_scenes[i] = line.rstrip() 143 | 144 | test_scenes_file = open(SCANNET_RAW_PATH / 'scans_test.txt', 'r') 145 | test_scenes = test_scenes_file.readlines() 146 | for i, line in enumerate(test_scenes): 147 | test_scenes[i] = line.rstrip() 148 | 149 | path_list = [] 150 | train_pc_files = list((SCANNET_RAW_PATH / in_path).glob('*/*' + POINTCLOUD_FILE)) 151 | for f in train_pc_files: 152 | 153 | scene_id = f.name[:-len(POINTCLOUD_FILE)] 154 | 155 | if scene_id in train_scenes: 156 | out_path = TRAIN_DEST 157 | elif scene_id in test_scenes: 158 | out_path = TEST_DEST 159 | else: 160 | out_path = '' 161 | print('ERROR: no matching scene id') 162 | 163 | phase_out_path = SCANNET_OUT_PATH / out_path 164 | phase_out_path.mkdir(parents=True, exist_ok=True) 165 | 166 | path_list.append(str(f) + ',' + str(phase_out_path)) 167 | 168 | pool = ProcessPoolExecutor(max_workers=num_threads) 169 | result = list(pool.map(handle_process, path_list)) 170 | -------------------------------------------------------------------------------- /lib/losses/PointSupConLoss.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: David Rozenberszki (david.rozenberszki@tum.de) 3 | Date: Jan 07, 2022 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import time 11 | import multiprocessing 12 | from joblib import Parallel, delayed 13 | 14 | 15 | class PointSupConLoss(nn.Module): 16 | 17 | def __init__(self, config, num_labels, temperature=0.07, base_temperature=0.07, reduction='mean'): 18 | 19 | super(PointSupConLoss, self).__init__() 20 | self.temperature = temperature 21 | self.base_temperature = base_temperature 22 | self.ignore_label = config.ignore_label 23 | self.config = config 24 | self.eps = 10e-5 25 | self.num_labels = num_labels 26 | 27 | 28 | self.register_buffer('confusion_hist', torch.zeros((num_labels, num_labels)).long()) 29 | self.num_pos_samples = config.num_pos_samples 30 | self.num_negative_samples = config.num_negative_samples 31 | 32 | self.neg_thresh = config.contrast_neg_thresh 33 | self.pos_thresh = config.contrast_pos_thresh 34 | self.neg_weight = config.contrast_neg_weight 35 | 36 | # For multiprocessing 37 | self.num_cores = multiprocessing.cpu_count() 38 | 39 | self.reduction = reduction 40 | 41 | def update_confusion_hist(self, new_confusion_hist): 42 | self.confusion_hist = new_confusion_hist + 1 # +1 to avoid summing up to 0 in the end 43 | 44 | def feat_dist(self, A, B, target): 45 | # takes two feature vectors and compute the point_wise distances 46 | # A = (n_points, feat_dim) 47 | # B = (n_points, num_samples, feat_dim) 48 | # Add zero loss to ignore labels 49 | 50 | # L2 dist 51 | distance = None 52 | if self.config.representation_distance_type == 'l2': 53 | if B.shape[1] > 0: 54 | D2 = (A.unsqueeze(1) - B).pow(2) 55 | D2 = torch.sum(D2, dim=-1) 56 | distance = torch.sqrt(D2 + 1e-7).mean(1) 57 | else: 58 | distance = torch.zeros((A.size())) 59 | # Cos/Dot product dist 60 | elif self.config.representation_distance_type == 'cos': 61 | # Multiply for batches as points 62 | An = F.normalize(A, p=2, dim=1) 63 | Bn = F.normalize(B, p=2, dim=2) 64 | Dcos = torch.bmm(An.unsqueeze(1), Bn.transpose(1, 2)) # (n_points, 1, feat_dim) @ (n_points, feat_dim, num_samples) = (n_points, 1, num_samples) 65 | Dcos = Dcos.mean(-1).squeeze() # Average out over samples and remove unnecessary dims 66 | distance = 1 - Dcos # Return the -1 * version as larger the more similar 67 | else: 68 | return None # Will throw error as it is not allowed 69 | 70 | distance[target == self.ignore_label] = 0. 71 | return distance 72 | 73 | 74 | def forward(self, features: torch.Tensor, labels: torch.Tensor, anchor_feats=None, preds: torch.Tensor = None): 75 | 76 | device = features.device 77 | comp_feats = features.clone().detach() 78 | 79 | if len(features.shape) != 2: 80 | raise ValueError('`features` needs to be [n_points, feat_dim]') 81 | 82 | point_num = features.shape[0] 83 | feat_dim = features.shape[1] 84 | labels = labels.contiguous() 85 | np_labels = labels.cpu().numpy() 86 | 87 | # Iterate over unique cats in feature tensor and remove ignore 88 | unique_targets = labels.unique() 89 | unique_targets = unique_targets[unique_targets != self.ignore_label] 90 | 91 | # Calculate valid inds for batch 92 | false_freq_mask = torch.zeros(self.num_labels).bool().to(device) 93 | false_freq_mask[unique_targets] = True 94 | 95 | # Init container for contrast tensors 96 | pos_samples = torch.zeros((point_num, self.num_pos_samples, feat_dim)).to(device) 97 | neg_samples = torch.zeros((point_num, self.num_negative_samples, feat_dim)).to(device) 98 | 99 | # Take into account only correct preds 100 | if preds is not None: 101 | correct_inds = (labels == preds) 102 | 103 | # Find feature samples for all labels 104 | #for ut in unique_targets: 105 | def target_smaples(ut): 106 | 107 | # Find inds where we want to have contrasts 108 | ut_inds = labels == ut # type: torch.Tensor 109 | ut_point_num = ut_inds.sum().item() 110 | ut_index_values = np.arange(point_num)[ut_inds.cpu().numpy()] 111 | 112 | # #Filter for correct positives 113 | # if preds is None: 114 | # ut_index_values = np.arange(point_num)[ut_inds.cpu().numpy()] 115 | # else: 116 | # inds = torch.logical_and(correct_inds, ut_inds).cpu().numpy() 117 | # ut_index_values = np.arange(point_num)[inds] 118 | 119 | # Pick pos samples 120 | pos_inds = torch.from_numpy(np.random.choice(ut_index_values, (ut_point_num, self.num_pos_samples))).to(device) 121 | pos_samples[ut_inds] = comp_feats[pos_inds.view(-1)].view(ut_point_num, self.num_pos_samples, feat_dim) 122 | 123 | # Calculate probs for sampling negative cats and only with ones present in scene, while masking out self 124 | ut_mask = false_freq_mask.clone() 125 | ut_mask[ut] = False 126 | false_freqs_prob = self.confusion_hist[ut].float() * ut_mask.float() 127 | false_freqs_prob /= false_freqs_prob.sum() + self.eps 128 | 129 | # map label to sample prob 130 | neg_probs = false_freqs_prob[labels] 131 | neg_probs[labels == self.ignore_label] = 0. 132 | 133 | # Take into account only correct preds for falses 134 | if preds is not None: 135 | neg_probs[~correct_inds] = 0. 136 | neg_probs = (neg_probs / (neg_probs.sum() + self.eps)).cpu().numpy() 137 | 138 | # Sample from all inds as ones with pos or self are given 0. prob 139 | sampled_neg_inds = torch.from_numpy(np.random.choice(np.arange(point_num), (ut_point_num, self.num_negative_samples), p=neg_probs)).to(device) 140 | neg_samples[ut_inds] = comp_feats[sampled_neg_inds.view(-1)].view(ut_point_num, self.num_negative_samples, feat_dim) 141 | 142 | _ = Parallel(n_jobs=self.num_cores, backend="threading")(map(delayed(target_smaples), unique_targets)) 143 | 144 | # Use relu where error under threshold and only take losses for non ignored points 145 | pos_loss = F.relu(self.feat_dist(features, pos_samples, labels) - self.pos_thresh) 146 | neg_loss = F.relu(self.neg_thresh - self.feat_dist(features, neg_samples, labels)) 147 | 148 | # Return weighted means 149 | if self.reduction == 'mean': 150 | loss = pos_loss.mean() + neg_loss.mean() * self.neg_weight 151 | else: 152 | loss = pos_loss + neg_loss * self.neg_weight 153 | 154 | return loss, pos_loss, neg_loss 155 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/scannet_benchmark_utils/scripts/evaluate_semantic_label.py: -------------------------------------------------------------------------------- 1 | # Evaluates semantic label task 2 | # Input: 3 | # - path to .txt prediction files 4 | # - path to .txt ground truth files 5 | # - output file to write results to 6 | # Note that only the valid classes are used for evaluation, 7 | # i.e., any ground truth label not in the valid label set 8 | # is ignored in the evaluation. 9 | # 10 | # example usage: evaluate_semantic_label.py --scan_path [path to scan data] --output_file [output file] 11 | 12 | # python imports 13 | import logging 14 | import os, sys, argparse 15 | 16 | try: 17 | import numpy as np 18 | except: 19 | print("Failed to import numpy package.") 20 | sys.exit(-1) 21 | try: 22 | from itertools import izip 23 | except ImportError: 24 | izip = zip 25 | 26 | #currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 27 | #parentdir = os.path.dirname(currentdir) 28 | #sys.path.insert(0,parentdir) 29 | import util_3d 30 | import util 31 | 32 | 33 | class Evaluator: 34 | def __init__(self, CLASS_LABELS, VALID_CLASS_IDS): 35 | self.CLASS_LABELS = CLASS_LABELS 36 | self.VALID_CLASS_IDS = VALID_CLASS_IDS 37 | self.UNKNOWN_ID = np.max(VALID_CLASS_IDS) + 1 38 | self.gt = {} 39 | self.pred = {} 40 | 41 | max_id = self.UNKNOWN_ID 42 | self.confusion = np.zeros((max_id+1, max_id+1), dtype=np.ulonglong) 43 | 44 | def update_confusion(self, pred_ids, gt_ids, sceneId=None): 45 | # sanity checks 46 | if not pred_ids.shape == gt_ids.shape: 47 | util.print_error('%s: number of predicted values does not match number of vertices' % pred_file, user_fault=True) 48 | 49 | n = self.confusion.shape[0] 50 | k = (gt_ids >= 0) & (gt_ids < n) 51 | temporal = np.bincount(n * gt_ids[k].astype(int) + pred_ids[k], minlength=n**2).reshape(n, n) 52 | 53 | for valid_class_row in self.VALID_CLASS_IDS: 54 | for valid_class_col in self.VALID_CLASS_IDS: 55 | self.confusion[valid_class_row][valid_class_col] += temporal[valid_class_row][valid_class_col] 56 | 57 | @staticmethod 58 | def write_to_benchmark(base='benchmark_segmentation', sceneId=None, pred_ids=None): 59 | os.makedirs(base, exist_ok=True) 60 | util_3d.export_ids('{}.txt'.format(os.path.join(base, sceneId)), pred_ids) 61 | 62 | def get_iou(self, label_id, confusion): 63 | if not label_id in self.VALID_CLASS_IDS: 64 | return float('nan') 65 | # #true positives 66 | tp = np.longlong(confusion[label_id, label_id]) 67 | # #false negatives 68 | fn = np.longlong(confusion[label_id, :].sum()) - tp 69 | # #false positives 70 | not_ignored = [l for l in self.VALID_CLASS_IDS if not l == label_id] 71 | fp = np.longlong(confusion[not_ignored, label_id].sum()) 72 | 73 | denom = (tp + fp + fn) 74 | if denom == 0: 75 | return float('nan') 76 | return (float(tp) / denom, tp, denom) 77 | 78 | def write_result_file(self, confusion, ious, filename): 79 | with open(filename, 'w') as f: 80 | f.write('iou scores\n') 81 | for i in range(len(self.VALID_CLASS_IDS)): 82 | label_id = self.VALID_CLASS_IDS[i] 83 | label_name = self.CLASS_LABELS[i] 84 | iou = ious[label_name][0] 85 | f.write('{0:<14s}({1:<2d}): {2:>5.3f}\n'.format(label_name, label_id, iou)) 86 | f.write("{0:<14s}: {1:>5.3f}".format('mean', np.array([ious[k][0] for k in ious]).mean())) 87 | 88 | f.write('\nconfusion matrix\n') 89 | f.write('\t\t\t') 90 | for i in range(len(self.VALID_CLASS_IDS)): 91 | #f.write('\t{0:<14s}({1:<2d})'.format(CLASS_LABELS[i], VALID_CLASS_IDS[i])) 92 | f.write('{0:<8d}'.format(self.VALID_CLASS_IDS[i])) 93 | f.write('\n') 94 | for r in range(len(self.VALID_CLASS_IDS)): 95 | f.write('{0:<14s}({1:<2d})'.format(self.CLASS_LABELS[r], self.VALID_CLASS_IDS[r])) 96 | for c in range(len(self.VALID_CLASS_IDS)): 97 | f.write('\t{0:>5.3f}'.format(confusion[self.VALID_CLASS_IDS[r],self.VALID_CLASS_IDS[c]])) 98 | f.write('\n') 99 | print('wrote results to', filename) 100 | 101 | def evaluate_confusion(self, output_file=None): 102 | class_ious = {} 103 | counter = 0 104 | summation = 0 105 | 106 | for i in range(len(self.VALID_CLASS_IDS)): 107 | label_name = self.CLASS_LABELS[i] 108 | label_id = self.VALID_CLASS_IDS[i] 109 | class_ious[label_name] = self.get_iou(label_id, self.confusion) 110 | # print 111 | logging.info('classes IoU') 112 | logging.info('----------------------------') 113 | for i in range(len(self.VALID_CLASS_IDS)): 114 | label_name = self.CLASS_LABELS[i] 115 | try: 116 | logging.info('{0:<14s}: {1:>5.3f} ({2:>6d}/{3:<6d})'.format(label_name, class_ious[label_name][0], class_ious[label_name][1], class_ious[label_name][2])) 117 | summation += class_ious[label_name][0] 118 | counter += 1 119 | except: 120 | logging.info('{0:<14s}: nan ( nan/nan )'.format(label_name)) 121 | 122 | logging.info("{0:<14s}: {1:>5.3f}".format('mean', summation / counter)) 123 | 124 | if output_file: 125 | self.write_result_file(self.confusion, class_ious, output_file) 126 | 127 | return summation / counter 128 | 129 | def config(): 130 | parser = argparse.ArgumentParser() 131 | parser.add_argument('--pred_path', required=True, help='path to directory of predicted .txt files') 132 | parser.add_argument('--gt_path', required=True, help='path to gt files') 133 | parser.add_argument('--output_file', type=str, default='./semantic_label_evaluation.txt') 134 | opt = parser.parse_args() 135 | return opt 136 | 137 | def main(): 138 | opt = config() 139 | 140 | 141 | ch = logging.StreamHandler(sys.stdout) 142 | logging.getLogger().setLevel(logging.INFO) 143 | logging.basicConfig( 144 | format=os.uname()[1].split('.')[0] + ' %(asctime)s %(message)s', 145 | datefmt='%m/%d %H:%M:%S', 146 | handlers=[ch]) 147 | 148 | #------------------------- ScanNet -------------------------- 149 | CLASS_LABELS = ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 150 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 151 | 'curtain', 'refrigerator', 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'] 152 | VALID_CLASS_IDS = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]) 153 | evaluator = Evaluator(CLASS_LABELS=CLASS_LABELS, VALID_CLASS_IDS=VALID_CLASS_IDS) 154 | 155 | print('reading', len(os.listdir(opt.pred_path))-1, 'scans...') 156 | for i, pred_file in enumerate(os.listdir(opt.pred_path)): 157 | if pred_file == 'semantic_label_evaluation.txt': 158 | continue 159 | 160 | gt_file = os.path.join(opt.gt_path, pred_file) 161 | if not os.path.isfile(gt_file): 162 | util.print_error('Result file {} does not match any gt file'.format(pred_file), user_fault=True) 163 | gt_ids = util_3d.load_ids(gt_file) 164 | 165 | pred_file = os.path.join(opt.pred_path, pred_file) 166 | pred_ids = util_3d.load_ids(pred_file) 167 | 168 | evaluator.update_confusion(pred_ids, gt_ids, pred_file.split('.')[0]) 169 | sys.stdout.write("\rscans processed: {}".format(i+1)) 170 | sys.stdout.flush() 171 | 172 | # evaluate 173 | evaluator.evaluate_confusion(opt.output_file) 174 | 175 | 176 | if __name__ == '__main__': 177 | main() 178 | -------------------------------------------------------------------------------- /downstream/insseg/datasets/evaluation/evaluate_semantic_label.py: -------------------------------------------------------------------------------- 1 | # Evaluates semantic label task 2 | # Input: 3 | # - path to .txt prediction files 4 | # - path to .txt ground truth files 5 | # - output file to write results to 6 | # Note that only the valid classes are used for evaluation, 7 | # i.e., any ground truth label not in the valid label set 8 | # is ignored in the evaluation. 9 | # 10 | # example usage: evaluate_semantic_label.py --scan_path [path to scan data] --output_file [output file] 11 | 12 | # python imports 13 | import logging 14 | import os, sys, argparse 15 | 16 | try: 17 | import numpy as np 18 | except: 19 | print("Failed to import numpy package.") 20 | sys.exit(-1) 21 | try: 22 | from itertools import izip 23 | except ImportError: 24 | izip = zip 25 | 26 | #currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 27 | #parentdir = os.path.dirname(currentdir) 28 | #sys.path.insert(0,parentdir) 29 | from datasets.evaluation.scannet_benchmark_utils import util_3d, util 30 | 31 | 32 | class Evaluator: 33 | def __init__(self, CLASS_LABELS, VALID_CLASS_IDS): 34 | #CLASS_LABELS = ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 35 | # 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 36 | # 'curtain', 'refrigerator', 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'] 37 | #VALID_CLASS_IDS = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]) 38 | self.CLASS_LABELS = CLASS_LABELS 39 | self.VALID_CLASS_IDS = VALID_CLASS_IDS 40 | self.UNKNOWN_ID = np.max(VALID_CLASS_IDS) + 1 41 | self.gt = {} 42 | self.pred = {} 43 | 44 | max_id = self.UNKNOWN_ID 45 | self.confusion = np.zeros((max_id+1, max_id+1), dtype=np.ulonglong) 46 | 47 | def update_confusion(self, pred_ids, gt_ids, sceneId=None): 48 | # sanity checks 49 | if not pred_ids.shape == gt_ids.shape: 50 | util.print_error('%s: number of predicted values does not match number of vertices' % pred_file, user_fault=True) 51 | 52 | n = self.confusion.shape[0] 53 | k = (gt_ids >= 0) & (gt_ids < n) 54 | temporal = np.bincount(n * gt_ids[k].astype(int) + pred_ids[k], minlength=n**2).reshape(n, n) 55 | 56 | for valid_class_row in self.VALID_CLASS_IDS: 57 | for valid_class_col in self.VALID_CLASS_IDS: 58 | self.confusion[valid_class_row][valid_class_col] += temporal[valid_class_row][valid_class_col] 59 | 60 | @staticmethod 61 | def write_to_benchmark(base='benchmark_segmentation', sceneId=None, pred_ids=None): 62 | os.makedirs(base, exist_ok=True) 63 | util_3d.export_ids('{}.txt'.format(os.path.join(base, sceneId)), pred_ids) 64 | 65 | def get_iou(self, label_id, confusion): 66 | if not label_id in self.VALID_CLASS_IDS: 67 | return float('nan') 68 | # #true positives 69 | tp = np.longlong(confusion[label_id, label_id]) 70 | # #false negatives 71 | fn = np.longlong(confusion[label_id, :].sum()) - tp 72 | # #false positives 73 | not_ignored = [l for l in self.VALID_CLASS_IDS if not l == label_id] 74 | fp = np.longlong(confusion[not_ignored, label_id].sum()) 75 | 76 | denom = (tp + fp + fn) 77 | if denom == 0: 78 | return float('nan') 79 | return (float(tp) / denom, tp, denom) 80 | 81 | def write_result_file(self, confusion, ious, filename): 82 | with open(filename, 'w') as f: 83 | f.write('iou scores\n') 84 | for i in range(len(self.VALID_CLASS_IDS)): 85 | label_id = self.VALID_CLASS_IDS[i] 86 | label_name = self.CLASS_LABELS[i] 87 | iou = ious[label_name][0] 88 | f.write('{0:<14s}({1:<2d}): {2:>5.3f}\n'.format(label_name, label_id, iou)) 89 | f.write("{0:<14s}: {1:>5.3f}".format('mean', np.array([ious[k][0] for k in ious]).mean())) 90 | 91 | f.write('\nconfusion matrix\n') 92 | f.write('\t\t\t') 93 | for i in range(len(self.VALID_CLASS_IDS)): 94 | #f.write('\t{0:<14s}({1:<2d})'.format(CLASS_LABELS[i], VALID_CLASS_IDS[i])) 95 | f.write('{0:<8d}'.format(self.VALID_CLASS_IDS[i])) 96 | f.write('\n') 97 | for r in range(len(self.VALID_CLASS_IDS)): 98 | f.write('{0:<14s}({1:<2d})'.format(self.CLASS_LABELS[r], self.VALID_CLASS_IDS[r])) 99 | for c in range(len(self.VALID_CLASS_IDS)): 100 | f.write('\t{0:>5.3f}'.format(confusion[self.VALID_CLASS_IDS[r],self.VALID_CLASS_IDS[c]])) 101 | f.write('\n') 102 | print('wrote results to', filename) 103 | 104 | def evaluate_confusion(self, output_file=None): 105 | class_ious = {} 106 | counter = 0 107 | summation = 0 108 | 109 | for i in range(len(self.VALID_CLASS_IDS)): 110 | label_name = self.CLASS_LABELS[i] 111 | label_id = self.VALID_CLASS_IDS[i] 112 | class_ious[label_name] = self.get_iou(label_id, self.confusion) 113 | # print 114 | logging.info('classes IoU') 115 | logging.info('----------------------------') 116 | for i in range(len(self.VALID_CLASS_IDS)): 117 | label_name = self.CLASS_LABELS[i] 118 | try: 119 | logging.info('{0:<14s}: {1:>5.3f} ({2:>6d}/{3:<6d})'.format(label_name, class_ious[label_name][0], class_ious[label_name][1], class_ious[label_name][2])) 120 | summation += class_ious[label_name][0] 121 | counter += 1 122 | except: 123 | logging.info('{0:<14s}: nan ( nan/nan )'.format(label_name)) 124 | 125 | logging.info("{0:<14s}: {1:>5.3f}".format('mean', summation / counter)) 126 | 127 | if output_file: 128 | self.write_result_file(self.confusion, class_ious, output_file) 129 | 130 | return summation / counter 131 | 132 | def config(): 133 | parser = argparse.ArgumentParser() 134 | parser.add_argument('--pred_path', required=True, help='path to directory of predicted .txt files') 135 | parser.add_argument('--gt_path', required=True, help='path to gt files') 136 | parser.add_argument('--output_file', type=str, default='./semantic_label_evaluation.txt') 137 | opt = parser.parse_args() 138 | return opt 139 | 140 | def main(): 141 | opt = config() 142 | 143 | #------------------------- ScanNet -------------------------- 144 | CLASS_LABELS = ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 145 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 146 | 'curtain', 'refrigerator', 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'] 147 | VALID_CLASS_IDS = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]) 148 | evaluator = Evaluator(CLASS_LABELS=CLASS_LABELS, VALID_CLASS_IDS=VALID_CLASS_IDS) 149 | 150 | print('reading', len(os.listdir(opt.pred_path))-1, 'scans...') 151 | for i, pred_file in enumerate(os.listdir(opt.pred_path)): 152 | if pred_file == 'semantic_label_evaluation.txt': 153 | continue 154 | 155 | gt_file = os.path.join(opt.gt_path, pred_file) 156 | if not os.path.isfile(gt_file): 157 | util.print_error('Result file {} does not match any gt file'.format(pred_file), user_fault=True) 158 | gt_ids = util_3d.load_ids(gt_file) 159 | 160 | pred_file = os.path.join(opt.pred_path, pred_file) 161 | pred_ids = util_3d.load_ids(pred_file) 162 | 163 | evaluator.update_confusion(pred_ids, gt_ids, pred_file.split('.')[0]) 164 | sys.stdout.write("\rscans processed: {}".format(i+1)) 165 | sys.stdout.flush() 166 | 167 | # evaluate 168 | evaluator.evaluate_confusion(opt.output_file) 169 | 170 | 171 | if __name__ == '__main__': 172 | main() 173 | -------------------------------------------------------------------------------- /lib/datasets/stanford.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import numpy as np 5 | from collections import defaultdict 6 | from scipy import spatial 7 | from plyfile import PlyData 8 | 9 | from lib.utils import read_txt, fast_hist, per_class_iu 10 | from lib.dataset import VoxelizationDataset, DatasetPhase, str2datasetphase_type, cache 11 | import lib.transforms as t 12 | 13 | 14 | class StanfordVoxelizationDatasetBase: 15 | CLIP_SIZE = None 16 | CLIP_BOUND = None 17 | LOCFEAT_IDX = 2 18 | ROTATION_AXIS = 'z' 19 | NUM_LABELS = 14 20 | IGNORE_LABELS = (10,) # remove stairs, following SegCloud 21 | 22 | # CLASSES = [ 23 | # 'clutter', 'beam', 'board', 'bookcase', 'ceiling', 'chair', 'column', 'door', 'floor', 'sofa', 24 | # 'table', 'wall', 'window' 25 | # ] 26 | 27 | IS_FULL_POINTCLOUD_EVAL = True 28 | 29 | DATA_PATH_FILE = { 30 | DatasetPhase.Train: 'train.txt', 31 | DatasetPhase.Val: 'val.txt', 32 | DatasetPhase.TrainVal: 'trainval.txt', 33 | DatasetPhase.Test: 'test.txt' 34 | } 35 | 36 | def test_pointcloud(self, pred_dir, num_labels): 37 | print('Running full pointcloud evaluation.') 38 | # Join room by their area and room id. 39 | room_dict = defaultdict(list) 40 | for i, data_path in enumerate(self.data_paths): 41 | area, room = data_path.split(os.sep) 42 | room, _ = os.path.splitext(room) 43 | room_id = '_'.join(room.split('_')[:-1]) 44 | room_dict[(area, room_id)].append(i) 45 | # Test independently for each room. 46 | sys.setrecursionlimit(100000) # Increase recursion limit for k-d tree. 47 | pred_list = sorted(os.listdir(pred_dir)) 48 | hist = np.zeros((num_labels, num_labels)) 49 | for room_idx, room_list in enumerate(room_dict.values()): 50 | print(f'Evaluating room {room_idx} / {len(room_dict)}.') 51 | # Join all predictions and query pointclouds of split data. 52 | pred = np.zeros((0, 4)) 53 | pointcloud = np.zeros((0, 7)) 54 | for i in room_list: 55 | pred = np.vstack((pred, np.load(os.path.join(pred_dir, pred_list[i])))) 56 | pointcloud = np.vstack((pointcloud, self.load_ply(i)[0])) 57 | # Deduplicate all query pointclouds of split data. 58 | pointcloud = np.array(list(set(tuple(l) for l in pointcloud.tolist()))) 59 | # Run test for each room. 60 | pred_tree = spatial.KDTree(pred[:, :3], leafsize=500) 61 | _, result = pred_tree.query(pointcloud[:, :3]) 62 | ptc_pred = pred[result, 3].astype(int) 63 | ptc_gt = pointcloud[:, -1].astype(int) 64 | if self.IGNORE_LABELS: 65 | ptc_pred = self.label2masked[ptc_pred] 66 | ptc_gt = self.label2masked[ptc_gt] 67 | hist += fast_hist(ptc_pred, ptc_gt, num_labels) 68 | # Print results. 69 | ious = [] 70 | print('Per class IoU:') 71 | for i, iou in enumerate(per_class_iu(hist) * 100): 72 | result_str = '' 73 | if hist.sum(1)[i]: 74 | result_str += f'{iou}' 75 | ious.append(iou) 76 | else: 77 | result_str += 'N/A' # Do not print if data not in ground truth. 78 | print(result_str) 79 | print(f'Average IoU: {np.nanmean(ious)}') 80 | 81 | def _augment_coords_to_feats(self, coords, feats, labels=None): 82 | # Center x,y 83 | coords_center = coords.mean(0, keepdims=True) 84 | coords_center[0, 2] = 0 85 | norm_coords = coords - coords_center 86 | feats = np.concatenate((feats, norm_coords), 1) 87 | return coords, feats, labels 88 | 89 | 90 | class StanfordDataset(StanfordVoxelizationDatasetBase, VoxelizationDataset): 91 | 92 | # Voxelization arguments 93 | VOXEL_SIZE = 0.05 # 5cm 94 | 95 | CLIP_BOUND = 4 # [-N, N] 96 | TEST_CLIP_BOUND = None 97 | 98 | # Augmentation arguments 99 | ROTATION_AUGMENTATION_BOUND = \ 100 | ((-np.pi / 32, np.pi / 32), (-np.pi / 32, np.pi / 32), (-np.pi, np.pi)) 101 | TRANSLATION_AUGMENTATION_RATIO_BOUND = ((-0.2, 0.2), (-0.2, 0.2), (-0.05, 0.05)) 102 | 103 | AUGMENT_COORDS_TO_FEATS = True 104 | NUM_IN_CHANNEL = 6 105 | 106 | def __init__(self, 107 | config, 108 | prevoxel_transform=None, 109 | input_transform=None, 110 | target_transform=None, 111 | cache=False, 112 | augment_data=True, 113 | elastic_distortion=False, 114 | phase=DatasetPhase.Train): 115 | if isinstance(phase, str): 116 | phase = str2datasetphase_type(phase) 117 | if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]: 118 | self.CLIP_BOUND = self.TEST_CLIP_BOUND 119 | data_root = config.stanford3d_path 120 | if isinstance(self.DATA_PATH_FILE[phase], (list, tuple)): 121 | data_paths = [] 122 | for split in self.DATA_PATH_FILE[phase]: 123 | data_paths += read_txt(os.path.join('splits/stanford', split)) 124 | else: 125 | data_paths = read_txt(os.path.join('splits/stanford', self.DATA_PATH_FILE[phase])) 126 | 127 | logging.info('Loading {} {}: {}'.format(self.__class__.__name__, phase, 128 | self.DATA_PATH_FILE[phase])) 129 | 130 | VoxelizationDataset.__init__( 131 | self, 132 | data_paths, 133 | data_root=data_root, 134 | prevoxel_transform=prevoxel_transform, 135 | input_transform=input_transform, 136 | target_transform=target_transform, 137 | ignore_label=config.ignore_label, 138 | return_transformation=config.return_transformation, 139 | augment_data=augment_data, 140 | elastic_distortion=elastic_distortion, 141 | config=config) 142 | 143 | @cache 144 | def load_ply(self, index): 145 | filepath = self.data_root / self.data_paths[index] 146 | scene_name = self.data_paths[index] 147 | plydata = PlyData.read(filepath) 148 | data = plydata.elements[0].data 149 | coords = np.array([data['x'], data['y'], data['z']], dtype=np.float32).T 150 | feats = np.array([data['red'], data['green'], data['blue']], dtype=np.float32).T 151 | labels = np.array(data['label'], dtype=np.int32) 152 | return coords, feats, labels, None, scene_name 153 | 154 | 155 | class StanfordArea5Dataset(StanfordDataset): 156 | 157 | DATA_PATH_FILE = { 158 | DatasetPhase.Train: ['area1.txt', 'area2.txt', 'area3.txt', 'area4.txt', 'area6.txt'], 159 | DatasetPhase.Val: 'area5.txt', 160 | DatasetPhase.Test: 'area5.txt' 161 | } 162 | 163 | 164 | def test(config): 165 | """Test point cloud data loader. 166 | """ 167 | from torch.utils.data import DataLoader 168 | from lib.utils import Timer 169 | import open3d as o3d 170 | 171 | def make_pcd(coords, feats): 172 | pcd = o3d.geometry.PointCloud() 173 | pcd.points = o3d.utility.Vector3dVector(coords[:, :3].float().numpy()) 174 | pcd.colors = o3d.utility.Vector3dVector(feats[:, :3].numpy() / 255) 175 | return pcd 176 | 177 | timer = Timer() 178 | DatasetClass = StanfordArea5Dataset 179 | transformations = [ 180 | t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), 181 | t.ChromaticAutoContrast(), 182 | t.ChromaticTranslation(config.data_aug_color_trans_ratio), 183 | t.ChromaticJitter(config.data_aug_color_jitter_std), 184 | ] 185 | 186 | dataset = DatasetClass( 187 | config, 188 | prevoxel_transform=t.ElasticDistortion(DatasetClass.ELASTIC_DISTORT_PARAMS), 189 | input_transform=t.Compose(transformations), 190 | augment_data=True, 191 | cache=True, 192 | elastic_distortion=True) 193 | 194 | data_loader = DataLoader( 195 | dataset=dataset, 196 | collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False), 197 | batch_size=1, 198 | shuffle=True) 199 | 200 | # Start from index 1 201 | iter = data_loader.__iter__() 202 | for i in range(100): 203 | timer.tic() 204 | coords, feats, labels = iter.next() 205 | pcd = make_pcd(coords, feats) 206 | o3d.visualization.draw_geometries([pcd]) 207 | print(timer.toc()) 208 | 209 | 210 | if __name__ == '__main__': 211 | from config import get_config 212 | config = get_config() 213 | 214 | test(config) 215 | --------------------------------------------------------------------------------