├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets ├── point_contrast_pipeline.png └── point_contrast_title.png ├── downstream ├── semseg │ ├── .gitignore │ ├── .style.yapf │ ├── config │ │ ├── default.yaml │ │ └── distributed │ │ │ ├── local.yaml │ │ │ └── slurm.yaml │ ├── ddp_main.py │ ├── lib │ │ ├── __init__.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── preprocessing │ │ │ │ ├── scannet.py │ │ │ │ └── stanford.py │ │ │ ├── scannet.py │ │ │ └── stanford.py │ │ ├── distributed_utils.py │ │ ├── layers.py │ │ ├── math_functions.py │ │ ├── pc_utils.py │ │ ├── solvers.py │ │ ├── test.py │ │ ├── train.py │ │ ├── transforms.py │ │ ├── utils.py │ │ └── voxelizer.py │ ├── models │ │ ├── __init__.py │ │ ├── common.py │ │ ├── model.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ └── resnet_block.py │ │ ├── res16unet.py │ │ ├── residual_block.py │ │ ├── resnet.py │ │ ├── resunet.py │ │ └── wrapper.py │ ├── scripts │ │ ├── train_scannet.sh │ │ └── train_stanford3d.sh │ └── splits │ │ ├── scannet │ │ ├── scannetv2_test.txt │ │ ├── scannetv2_train.txt │ │ ├── scannetv2_trainval.txt │ │ └── scannetv2_val.txt │ │ ├── scannet_old │ │ ├── scannetv2_test.txt │ │ ├── scannetv2_train.txt │ │ ├── scannetv2_trainval.txt │ │ └── scannetv2_val.txt │ │ └── stanford │ │ ├── area1.txt │ │ ├── area2.txt │ │ ├── area3.txt │ │ ├── area4.txt │ │ ├── area5.txt │ │ └── area6.txt └── votenet_det_new │ ├── config │ └── default.yaml │ ├── ddp_main.py │ ├── lib │ ├── datasets │ │ ├── scannet │ │ │ ├── README.md │ │ │ ├── batch_load_scannet_data.py │ │ │ ├── data_viz.py │ │ │ ├── load_scannet_data.py │ │ │ ├── meta_data │ │ │ │ ├── scannet_means.npz │ │ │ │ ├── scannet_train.txt │ │ │ │ ├── scannetv2-labels.combined.tsv │ │ │ │ ├── scannetv2_test.txt │ │ │ │ ├── scannetv2_train.txt │ │ │ │ └── scannetv2_val.txt │ │ │ ├── model_util_scannet.py │ │ │ ├── scannet_detection_dataset.py │ │ │ └── scannet_utils.py │ │ └── sunrgbd │ │ │ ├── OFFICIAL_SUNRGBD │ │ │ ├── SUNRGBD │ │ │ ├── SUNRGBDMeta2DBB_v2.mat │ │ │ ├── SUNRGBDMeta3DBB_v2.mat │ │ │ ├── SUNRGBDtoolbox │ │ │ └── __MACOSX │ │ │ ├── README.md │ │ │ ├── matlab │ │ │ ├── extract_rgbd_data_v1.m │ │ │ ├── extract_rgbd_data_v2.m │ │ │ └── extract_split.m │ │ │ ├── model_util_sunrgbd.py │ │ │ ├── sunrgbd_data.py │ │ │ ├── sunrgbd_detection_dataset.py │ │ │ ├── sunrgbd_trainval │ │ │ ├── calib │ │ │ ├── depth │ │ │ ├── eulerangles.py │ │ │ ├── image │ │ │ ├── label │ │ │ ├── label_v1 │ │ │ ├── pc_util.py │ │ │ ├── pcrgb.obj │ │ │ ├── plyfile.py │ │ │ ├── seg_label │ │ │ ├── tmp.py │ │ │ ├── train_data_idx.txt │ │ │ ├── training │ │ │ └── val_data_idx.txt │ │ │ └── sunrgbd_utils.py │ ├── test.py │ ├── train.py │ └── utils │ │ ├── box_util.py │ │ ├── distributed_utils.py │ │ ├── eval_det.py │ │ ├── metric_util.py │ │ ├── nms.py │ │ ├── nn_distance.py │ │ ├── pc_util.py │ │ ├── tf_logger.py │ │ └── tf_visualizer.py │ ├── models │ ├── ap_helper.py │ ├── backbone │ │ ├── pointnet2 │ │ │ ├── _ext_src │ │ │ │ ├── include │ │ │ │ │ ├── ball_query.h │ │ │ │ │ ├── cuda_utils.h │ │ │ │ │ ├── group_points.h │ │ │ │ │ ├── interpolate.h │ │ │ │ │ ├── sampling.h │ │ │ │ │ └── utils.h │ │ │ │ └── src │ │ │ │ │ ├── ball_query.cpp │ │ │ │ │ ├── ball_query_gpu.cu │ │ │ │ │ ├── bindings.cpp │ │ │ │ │ ├── group_points.cpp │ │ │ │ │ ├── group_points_gpu.cu │ │ │ │ │ ├── interpolate.cpp │ │ │ │ │ ├── interpolate_gpu.cu │ │ │ │ │ ├── sampling.cpp │ │ │ │ │ └── sampling_gpu.cu │ │ │ ├── pointnet2_modules.py │ │ │ ├── pointnet2_test.py │ │ │ ├── pointnet2_utils.py │ │ │ ├── pytorch_utils.py │ │ │ └── setup.py │ │ └── sparseconv │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── lib │ │ │ ├── __init__.py │ │ │ └── math_functions.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── conditional_random_fields.py │ │ │ ├── model.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── common.py │ │ │ │ ├── resnet_block.py │ │ │ │ └── senet_block.py │ │ │ ├── res16unet.py │ │ │ ├── resnet.py │ │ │ ├── resunet.py │ │ │ └── wrapper.py │ │ │ ├── voxelized_dataset.py │ │ │ └── voxelizer.py │ ├── backbone_module.py │ ├── boxnet.py │ ├── dump_helper.py │ ├── loss_helper.py │ ├── loss_helper_boxnet.py │ ├── proposal_module.py │ ├── votenet.py │ └── voting_module.py │ └── scripts │ ├── train_scannet.sh │ └── train_sunrgbd.sh ├── pretrain ├── data_preprocess │ ├── README.md │ └── scannet_pair │ │ ├── SensorData.py │ │ ├── compute_full_overlapping.py │ │ ├── generate_list.py │ │ ├── plyfile.py │ │ ├── point_cloud_extractor.py │ │ └── reader.py └── pointcontrast │ ├── .gitignore │ ├── config │ └── defaults.yaml │ ├── ddp_train.py │ ├── example_dataset │ └── overlap-30-50p-subset.txt │ ├── lib │ ├── __init__.py │ ├── criterion.py │ ├── data_sampler.py │ ├── ddp_data_loaders.py │ ├── ddp_trainer.py │ ├── distributed.py │ ├── error_handler.py │ ├── multiprocessing.py │ ├── timer.py │ └── transforms.py │ ├── model │ ├── __init__.py │ ├── modules │ │ ├── __init__.py │ │ ├── common.py │ │ └── resnet_block.py │ ├── res16unet.py │ └── resnet.py │ └── scripts │ ├── ddp_launch.sh │ └── ddp_local.sh └── requirements.txt /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PointContrast 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to PointContrast, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /assets/point_contrast_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/assets/point_contrast_pipeline.png -------------------------------------------------------------------------------- /assets/point_contrast_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/assets/point_contrast_title.png -------------------------------------------------------------------------------- /downstream/semseg/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized 2 | __pycache__/ 3 | *.py[cod] 4 | *.o 5 | *.so 6 | 7 | # Text edits 8 | *.swp 9 | *.swo 10 | *.orig 11 | 12 | # Training 13 | outputs/ 14 | 15 | # Profiling 16 | callgrind.out* 17 | *.dSYM 18 | 19 | # Misc 20 | .DS_Store 21 | 22 | # folder 23 | tmp 24 | tmp_scratch 25 | 26 | -------------------------------------------------------------------------------- /downstream/semseg/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = chromium 3 | column_limit = 100 4 | indent_width = 2 5 | -------------------------------------------------------------------------------- /downstream/semseg/config/default.yaml: -------------------------------------------------------------------------------- 1 | net: 2 | model: ResUNet14 3 | conv1_kernel_size: 3 4 | weights: 5 | weights_for_inner_model: False 6 | dilations: [1,1,1,1] 7 | 8 | # Wrappers 9 | wrapper_type: 10 | wrapper_region_type: 1 11 | wrapper_kernel_size: 3 12 | wrapper_lr: 0.1 13 | 14 | optimizer: 15 | optimizer: SGD 16 | lr: 0.01 17 | sgd_momentum: 0.9 18 | sgd_dampening: 0.1 19 | adam_beta1: 0.9 20 | adam_beta2: 0.999 21 | weight_decay: 0.0001 22 | param_histogram_freq: 100 23 | save_param_histogram: False 24 | iter_size: 1 25 | bn_momentum: 0.02 26 | 27 | # Scheduler 28 | scheduler: StepLR 29 | max_iter: 60000 30 | step_size: 2e4 31 | step_gamma: 0.1 32 | poly_power: 0.9 33 | exp_gamma: 0.95 34 | exp_step_size: 445 35 | 36 | data: 37 | dataset: ScannetVoxelization2cmDataset 38 | voxel_size: 0.05 39 | data_dir: data 40 | point_lim: -1 41 | pre_point_lim: -1 42 | batch_size: 16 43 | val_batch_size: 1 44 | test_batch_size: 1 45 | cache_data: False 46 | num_workers: 1 47 | num_val_workers: 1 48 | ignore_label: 255 49 | return_transformation: False 50 | ignore_duplicate_class: False 51 | partial_crop: 0 52 | train_limit_numpoints: 0 53 | 54 | # Point Cloud Dataset 55 | scannet_path: /home/datasets/scannet/scannet_preprocessed 56 | stanford3d_path: /home/datasets/Stanford3D 57 | 58 | train: 59 | # Training / test parameters 60 | is_train: True 61 | stat_freq: 40 62 | save_freq: 1000 63 | val_freq: 1000 64 | empty_cache_freq: 1 65 | train_phase: train 66 | val_phase: val 67 | overwrite_weights: True 68 | resume: 69 | resume_optimizer: True, 70 | eval_upsample: False 71 | lenient_weight_loading: False, 72 | 73 | # Distributed Training configurations 74 | distributed: 75 | distributed_world_size: 8 76 | distributed_rank: 0 77 | distributed_backend: nccl 78 | distributed_init_method: 79 | distributed_port: 10010 80 | device_id: 0 81 | distributed_no_spawn: True 82 | ddp_backend: c10d #['c10d', 'no_c10d'] 83 | bucket_cap_mb: 25 84 | 85 | # Data augmentation 86 | augmentation: 87 | use_feat_aug: True 88 | data_aug_color_trans_ratio: 0.10 89 | data_aug_color_jitter_std: 0.05 90 | normalize_color: True 91 | data_aug_scale_min: 0.9 92 | data_aug_scale_max: 1.1 93 | data_aug_hue_max: 0.5 94 | data_aug_saturation_max: 0.2 95 | 96 | # Test 97 | test: 98 | visualize: False 99 | test_temporal_average: False 100 | visualize_path: outputs/visualize 101 | save_prediction: False 102 | save_pred_dir: outputs/pred 103 | test_phase: test 104 | evaluate_original_pointcloud: False 105 | test_original_pointcloud: False 106 | test_stat_freq: 100 107 | 108 | # Misc 109 | misc: 110 | is_cuda: True 111 | load_path: 112 | log_step: 50 113 | log_level: INFO #['INFO', 'DEBUG', 'WARN'] 114 | num_gpu: 1 115 | seed: 123 116 | log_dir: outputs/default 117 | 118 | # hydra submitit launcher 119 | 120 | defaults: 121 | - hydra/launcher: submitit_slurm 122 | - hydra/hydra_logging: colorlog 123 | - distributed: slurm 124 | 125 | # hydra: 126 | # run: 127 | # dir: ${misc.log_dir} 128 | # sweep: 129 | # dir: ${misc.log_dir} 130 | # launcher: 131 | # partition: dev 132 | # submitit_folder: ${hydra.sweep.dir}/.submitit/%j 133 | # name: ${hydra.job.name} 134 | # timeout_min: 2880 135 | # cpus_per_task: 10 136 | # gpus_per_node: 8 137 | # tasks_per_node: 8 138 | # mem_gb: 480 139 | # nodes: 1 140 | # constraint: volta32gb 141 | # max_num_timeout: 3 142 | # signal_delay_s: 300 143 | -------------------------------------------------------------------------------- /downstream/semseg/config/distributed/local.yaml: -------------------------------------------------------------------------------- 1 | distributed: 2 | distributed_no_spawn: True 3 | distributed_world_size: 1 4 | distributed_backend: nccl 5 | ddp_backend: c10d 6 | device_id: 0 7 | distributed_rank: 0 8 | bucket_cap_mb: 25 9 | distributed_port: -1 10 | distributed_init_method: 11 | 12 | hydra: 13 | run: 14 | dir: ${misc.log_dir} 15 | sweep: 16 | dir: ${misc.log_dir} 17 | -------------------------------------------------------------------------------- /downstream/semseg/config/distributed/slurm.yaml: -------------------------------------------------------------------------------- 1 | distributed: 2 | distributed_no_spawn: True 3 | distributed_world_size: 8 4 | distributed_backend: nccl 5 | ddp_backend: c10d 6 | device_id: 0 7 | distributed_rank: 0 8 | bucket_cap_mb: 25 9 | distributed_port: 10100 10 | distributed_init_method: 11 | 12 | hydra: 13 | run: 14 | dir: ${misc.log_dir} 15 | sweep: 16 | dir: ${misc.log_dir} 17 | launcher: 18 | partition: priority 19 | submitit_folder: ${hydra.sweep.dir}/.submitit/%j 20 | name: ${hydra.job.name} 21 | timeout_min: 1440 22 | cpus_per_task: 10 23 | gpus_per_node: 8 24 | tasks_per_node: 8 25 | mem_gb: 480 26 | nodes: 1 27 | constraint: volta32gb 28 | comment: ICLR Deadline 29 | max_num_timeout: 3 30 | -------------------------------------------------------------------------------- /downstream/semseg/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import open3d as o3d 7 | -------------------------------------------------------------------------------- /downstream/semseg/lib/dataloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import math 7 | 8 | import torch 9 | import torch.distributed as dist 10 | from torch.utils.data.sampler import Sampler 11 | 12 | 13 | class InfSampler(Sampler): 14 | """Samples elements randomly, without replacement. 15 | 16 | Arguments: 17 | data_source (Dataset): dataset to sample from 18 | """ 19 | 20 | def __init__(self, data_source, shuffle=False): 21 | self.data_source = data_source 22 | self.shuffle = shuffle 23 | self.reset_permutation() 24 | 25 | def reset_permutation(self): 26 | perm = len(self.data_source) 27 | if self.shuffle: 28 | perm = torch.randperm(perm) 29 | self._perm = perm.tolist() 30 | 31 | def __iter__(self): 32 | return self 33 | 34 | def __next__(self): 35 | if len(self._perm) == 0: 36 | self.reset_permutation() 37 | 38 | return self._perm.pop() 39 | 40 | def __len__(self): 41 | return len(self.data_source) 42 | 43 | next = __next__ # Python 2 compatibility 44 | 45 | 46 | class DistributedInfSampler(InfSampler): 47 | def __init__(self, data_source, num_replicas=None, rank=None, shuffle=True): 48 | if num_replicas is None: 49 | if not dist.is_available(): 50 | raise RuntimeError("Requires distributed package to be available") 51 | num_replicas = dist.get_world_size() 52 | if rank is None: 53 | if not dist.is_available(): 54 | raise RuntimeError("Requires distributed package to be available") 55 | rank = dist.get_rank() 56 | 57 | self.data_source = data_source 58 | self.num_replicas = num_replicas 59 | self.rank = rank 60 | self.epoch = 0 61 | self.it = 0 62 | self.num_samples = int(math.ceil(len(self.data_source) * 1.0 / self.num_replicas)) 63 | self.total_size = self.num_samples * self.num_replicas 64 | self.shuffle = shuffle 65 | self.reset_permutation() 66 | 67 | def __next__(self): 68 | it = self.it * self.num_replicas + self.rank 69 | value = self._perm[it % len(self._perm)] 70 | self.it = self.it + 1 71 | 72 | if (self.it * self.num_replicas) >= len(self._perm): 73 | self.reset_permutation() 74 | self.it = 0 75 | return value 76 | 77 | def __len__(self): 78 | return self.num_samples -------------------------------------------------------------------------------- /downstream/semseg/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from lib.datasets import stanford 7 | from lib.datasets import scannet 8 | 9 | DATASETS = [] 10 | 11 | 12 | def add_datasets(module): 13 | DATASETS.extend([getattr(module, a) for a in dir(module) if 'Dataset' in a]) 14 | 15 | 16 | add_datasets(stanford) 17 | add_datasets(scannet) 18 | 19 | 20 | def load_dataset(name): 21 | '''Creates and returns an instance of the datasets given its name. 22 | ''' 23 | # Find the model class from its name 24 | mdict = {dataset.__name__: dataset for dataset in DATASETS} 25 | if name not in mdict: 26 | print('Invalid dataset index. Options are:') 27 | # Display a list of valid dataset names 28 | for dataset in DATASETS: 29 | print('\t* {}'.format(dataset.__name__)) 30 | raise ValueError(f'Dataset {name} not defined') 31 | DatasetClass = mdict[name] 32 | 33 | return DatasetClass 34 | -------------------------------------------------------------------------------- /downstream/semseg/lib/datasets/preprocessing/scannet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | from lib.pc_utils import read_plyfile, save_point_cloud 10 | from concurrent.futures import ProcessPoolExecutor 11 | SCANNET_RAW_PATH = Path('/path/ScanNet_data/') 12 | SCANNET_OUT_PATH = Path('/path/scans_processed/') 13 | TRAIN_DEST = 'train' 14 | TEST_DEST = 'test' 15 | SUBSETS = {TRAIN_DEST: 'scans', TEST_DEST: 'scans_test'} 16 | POINTCLOUD_FILE = '_vh_clean_2.ply' 17 | BUGS = { 18 | 'train/scene0270_00.ply': 50, 19 | 'train/scene0270_02.ply': 50, 20 | 'train/scene0384_00.ply': 149, 21 | } 22 | print('start preprocess') 23 | # Preprocess data. 24 | 25 | 26 | def handle_process(path): 27 | f = Path(path.split(',')[0]) 28 | phase_out_path = Path(path.split(',')[1]) 29 | pointcloud = read_plyfile(f) 30 | # Make sure alpha value is meaningless. 31 | assert np.unique(pointcloud[:, -1]).size == 1 32 | # Load label file. 33 | label_f = f.parent / (f.stem + '.labels' + f.suffix) 34 | if label_f.is_file(): 35 | label = read_plyfile(label_f) 36 | # Sanity check that the pointcloud and its label has same vertices. 37 | assert pointcloud.shape[0] == label.shape[0] 38 | assert np.allclose(pointcloud[:, :3], label[:, :3]) 39 | else: # Label may not exist in test case. 40 | label = np.zeros_like(pointcloud) 41 | out_f = phase_out_path / (f.name[:-len(POINTCLOUD_FILE)] + f.suffix) 42 | processed = np.hstack((pointcloud[:, :6], np.array([label[:, -1]]).T)) 43 | save_point_cloud(processed, out_f, with_label=True, verbose=False) 44 | 45 | 46 | path_list = [] 47 | for out_path, in_path in SUBSETS.items(): 48 | phase_out_path = SCANNET_OUT_PATH / out_path 49 | phase_out_path.mkdir(parents=True, exist_ok=True) 50 | for f in (SCANNET_RAW_PATH / in_path).glob('*/*' + POINTCLOUD_FILE): 51 | path_list.append(str(f) + ',' + str(phase_out_path)) 52 | 53 | pool = ProcessPoolExecutor(max_workers=20) 54 | result = list(pool.map(handle_process, path_list)) 55 | 56 | # Fix bug in the data. 57 | for files, bug_index in BUGS.items(): 58 | print(files) 59 | 60 | for f in SCANNET_OUT_PATH.glob(files): 61 | pointcloud = read_plyfile(f) 62 | bug_mask = pointcloud[:, -1] == bug_index 63 | print(f'Fixing {f} bugged label {bug_index} x {bug_mask.sum()}') 64 | pointcloud[bug_mask, -1] = 0 65 | save_point_cloud(pointcloud, f, with_label=True, verbose=False) 66 | -------------------------------------------------------------------------------- /downstream/semseg/lib/datasets/preprocessing/stanford.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import glob 7 | import numpy as np 8 | import os 9 | 10 | from tqdm import tqdm 11 | 12 | from lib.utils import mkdir_p 13 | from lib.pc_utils import save_point_cloud 14 | 15 | import MinkowskiEngine as ME 16 | 17 | STANFORD_3D_IN_PATH = 'YOUR_DATASET_PATH_HERE' 18 | STANFORD_3D_OUT_PATH = 'YOUR_PREPROCESSED_DATASET_PATH_HERE' 19 | 20 | STANFORD_3D_TO_SEGCLOUD_LABEL = { 21 | 4: 0, 22 | 8: 1, 23 | 12: 2, 24 | 1: 3, 25 | 6: 4, 26 | 13: 5, 27 | 7: 6, 28 | 5: 7, 29 | 11: 8, 30 | 3: 9, 31 | 9: 10, 32 | 2: 11, 33 | 0: 12, 34 | } 35 | 36 | 37 | class Stanford3DDatasetConverter: 38 | 39 | CLASSES = [ 40 | 'clutter', 'beam', 'board', 'bookcase', 'ceiling', 'chair', 'column', 'door', 'floor', 'sofa', 41 | 'stairs', 'table', 'wall', 'window' 42 | ] 43 | TRAIN_TEXT = 'train' 44 | VAL_TEXT = 'val' 45 | TEST_TEXT = 'test' 46 | 47 | @classmethod 48 | def read_txt(cls, txtfile): 49 | # Read txt file and parse its content. 50 | with open(txtfile) as f: 51 | pointcloud = [] 52 | for l in f: 53 | try: 54 | pointcloud += [[float(li) for li in l.split()]] 55 | except Exception as e: 56 | print(e, txtfile) 57 | continue 58 | # pointcloud = [l.split() for l in f] 59 | 60 | # Load point cloud to named numpy array. 61 | pointcloud = np.array(pointcloud).astype(np.float32) 62 | assert pointcloud.shape[1] == 6 63 | xyz = pointcloud[:, :3].astype(np.float32) 64 | rgb = pointcloud[:, 3:].astype(np.uint8) 65 | return xyz, rgb 66 | 67 | @classmethod 68 | def convert_to_ply(cls, root_path, out_path): 69 | """Convert Stanford3DDataset to PLY format that is compatible with 70 | Synthia dataset. Assumes file structure as given by the dataset. 71 | Outputs the processed PLY files to `STANFORD_3D_OUT_PATH`. 72 | """ 73 | 74 | txtfiles = glob.glob(os.path.join(root_path, '*/*/*.txt')) 75 | for txtfile in tqdm(txtfiles): 76 | file_sp = os.path.normpath(txtfile).split(os.path.sep) 77 | target_path = os.path.join(out_path, file_sp[-3]) 78 | out_file = os.path.join(target_path, file_sp[-2] + '.ply') 79 | 80 | if os.path.exists(out_file): 81 | print(out_file, ' exists') 82 | continue 83 | 84 | annotation, _ = os.path.split(txtfile) 85 | subclouds = glob.glob(os.path.join(annotation, 'Annotations/*.txt')) 86 | coords, feats, labels = [], [], [] 87 | for inst, subcloud in enumerate(subclouds): 88 | # Read ply file and parse its rgb values. 89 | xyz, rgb = cls.read_txt(subcloud) 90 | _, annotation_subfile = os.path.split(subcloud) 91 | clsidx = cls.CLASSES.index(annotation_subfile.split('_')[0]) 92 | 93 | coords.append(xyz) 94 | feats.append(rgb) 95 | labels.append(np.ones((len(xyz), 1), dtype=np.int32) * clsidx) 96 | 97 | if len(coords) == 0: 98 | print(txtfile, ' has 0 files.') 99 | else: 100 | # Concat 101 | coords = np.concatenate(coords, 0) 102 | feats = np.concatenate(feats, 0) 103 | labels = np.concatenate(labels, 0) 104 | inds, collabels = ME.utils.sparse_quantize( 105 | coords, 106 | feats, 107 | labels, 108 | return_index=True, 109 | ignore_label=255, 110 | quantization_size=0.01 # 1cm 111 | ) 112 | pointcloud = np.concatenate((coords[inds], feats[inds], collabels[:, None]), axis=1) 113 | 114 | # Write ply file. 115 | mkdir_p(target_path) 116 | save_point_cloud(pointcloud, out_file, with_label=True, verbose=False) 117 | 118 | 119 | def generate_splits(stanford_out_path): 120 | """Takes preprocessed out path and generate txt files""" 121 | split_path = './splits/stanford' 122 | mkdir_p(split_path) 123 | for i in range(1, 7): 124 | curr_path = os.path.join(stanford_out_path, f'Area_{i}') 125 | files = glob.glob(os.path.join(curr_path, '*.ply')) 126 | files = [os.path.relpath(full_path, stanford_out_path) for full_path in files] 127 | out_txt = os.path.join(split_path, f'area{i}.txt') 128 | with open(out_txt, 'w') as f: 129 | f.write('\n'.join(files)) 130 | 131 | 132 | if __name__ == '__main__': 133 | Stanford3DDatasetConverter.convert_to_ply(STANFORD_3D_IN_PATH, STANFORD_3D_OUT_PATH) 134 | generate_splits(STANFORD_3D_OUT_PATH) 135 | -------------------------------------------------------------------------------- /downstream/semseg/lib/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from MinkowskiEngine import MinkowskiGlobalPooling, MinkowskiBroadcastAddition, MinkowskiBroadcastMultiplication 10 | 11 | 12 | class MinkowskiLayerNorm(nn.Module): 13 | 14 | def __init__(self, num_features, eps=1e-5, D=-1): 15 | super(MinkowskiLayerNorm, self).__init__() 16 | self.num_features = num_features 17 | self.eps = eps 18 | self.weight = nn.Parameter(torch.ones(1, num_features)) 19 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 20 | 21 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 22 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 23 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 24 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 25 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 26 | self.D = D 27 | self.reset_parameters() 28 | 29 | def __repr__(self): 30 | s = f'(D={self.D})' 31 | return self.__class__.__name__ + s 32 | 33 | def reset_parameters(self): 34 | self.weight.data.fill_(1) 35 | self.bias.data.zero_() 36 | 37 | def _check_input_dim(self, input): 38 | if input.F.dim() != 2: 39 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 40 | 41 | def forward(self, x): 42 | self._check_input_dim(x) 43 | mean = self.mean_in(x).F.mean(-1, keepdim=True) 44 | mean = mean + torch.zeros(mean.size(0), self.num_features).type_as(mean) 45 | temp = self.glob_sum(x.F, -mean)**2 46 | var = self.glob_mean(temp.data).mean(-1, keepdim=True) 47 | var = var + torch.zeros(var.size(0), self.num_features).type_as(var) 48 | instd = 1 / (var + self.eps).sqrt() 49 | 50 | x = self.glob_times(self.glob_sum2(x, -mean), instd) 51 | return x * self.weight + self.bias 52 | 53 | 54 | class MinkowskiInstanceNorm(nn.Module): 55 | 56 | def __init__(self, num_features, eps=1e-5, D=-1): 57 | super(MinkowskiInstanceNorm, self).__init__() 58 | self.eps = eps 59 | self.weight = nn.Parameter(torch.ones(1, num_features)) 60 | self.bias = nn.Parameter(torch.zeros(1, num_features)) 61 | 62 | self.mean_in = MinkowskiGlobalPooling(dimension=D) 63 | self.glob_sum = MinkowskiBroadcastAddition(dimension=D) 64 | self.glob_sum2 = MinkowskiBroadcastAddition(dimension=D) 65 | self.glob_mean = MinkowskiGlobalPooling(dimension=D) 66 | self.glob_times = MinkowskiBroadcastMultiplication(dimension=D) 67 | self.D = D 68 | self.reset_parameters() 69 | 70 | def __repr__(self): 71 | s = f'(pixel_dist={self.pixel_dist}, D={self.D})' 72 | return self.__class__.__name__ + s 73 | 74 | def reset_parameters(self): 75 | self.weight.data.fill_(1) 76 | self.bias.data.zero_() 77 | 78 | def _check_input_dim(self, input): 79 | if input.dim() != 2: 80 | raise ValueError('expected 2D input (got {}D input)'.format(input.dim())) 81 | 82 | def forward(self, x): 83 | self._check_input_dim(x) 84 | mean_in = self.mean_in(x) 85 | temp = self.glob_sum(x, -mean_in)**2 86 | var_in = self.glob_mean(temp.data) 87 | instd_in = 1 / (var_in + self.eps).sqrt() 88 | 89 | x = self.glob_times(self.glob_sum2(x, -mean_in), instd_in) 90 | return x * self.weight + self.bias 91 | -------------------------------------------------------------------------------- /downstream/semseg/lib/math_functions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from scipy.sparse import csr_matrix 7 | import torch 8 | 9 | 10 | class SparseMM(torch.autograd.Function): 11 | """ 12 | Sparse x dense matrix multiplication with autograd support. 13 | Implementation by Soumith Chintala: 14 | https://discuss.pytorch.org/t/ 15 | does-pytorch-support-autograd-on-sparse-matrix/6156/7 16 | """ 17 | 18 | def forward(self, matrix1, matrix2): 19 | self.save_for_backward(matrix1, matrix2) 20 | return torch.mm(matrix1, matrix2) 21 | 22 | def backward(self, grad_output): 23 | matrix1, matrix2 = self.saved_tensors 24 | grad_matrix1 = grad_matrix2 = None 25 | 26 | if self.needs_input_grad[0]: 27 | grad_matrix1 = torch.mm(grad_output, matrix2.t()) 28 | 29 | if self.needs_input_grad[1]: 30 | grad_matrix2 = torch.mm(matrix1.t(), grad_output) 31 | 32 | return grad_matrix1, grad_matrix2 33 | 34 | 35 | def sparse_float_tensor(values, indices, size=None): 36 | """ 37 | Return a torch sparse matrix give values and indices (row_ind, col_ind). 38 | If the size is an integer, return a square matrix with side size. 39 | If the size is a torch.Size, use it to initialize the out tensor. 40 | If none, the size is inferred. 41 | """ 42 | indices = torch.stack(indices).int() 43 | sargs = [indices, values.float()] 44 | if size is not None: 45 | # Use the provided size 46 | if isinstance(size, int): 47 | size = torch.Size((size, size)) 48 | sargs.append(size) 49 | if values.is_cuda: 50 | return torch.cuda.sparse.FloatTensor(*sargs) 51 | else: 52 | return torch.sparse.FloatTensor(*sargs) 53 | 54 | 55 | def diags(values, size=None): 56 | values = values.view(-1) 57 | n = values.nelement() 58 | size = torch.Size((n, n)) 59 | indices = (torch.arange(0, n), torch.arange(0, n)) 60 | return sparse_float_tensor(values, indices, size) 61 | 62 | 63 | def sparse_to_csr_matrix(tensor): 64 | tensor = tensor.cpu() 65 | inds = tensor._indices().numpy() 66 | vals = tensor._values().numpy() 67 | return csr_matrix((vals, (inds[0], inds[1])), shape=[s for s in tensor.shape]) 68 | 69 | 70 | def csr_matrix_to_sparse(mat): 71 | row_ind, col_ind = mat.nonzero() 72 | return sparse_float_tensor( 73 | torch.from_numpy(mat.data), 74 | (torch.from_numpy(row_ind), torch.from_numpy(col_ind)), 75 | size=torch.Size(mat.shape)) 76 | -------------------------------------------------------------------------------- /downstream/semseg/lib/solvers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import logging 7 | 8 | from torch.optim import SGD, Adam 9 | from torch.optim.lr_scheduler import LambdaLR, StepLR 10 | 11 | 12 | class LambdaStepLR(LambdaLR): 13 | 14 | def __init__(self, optimizer, lr_lambda, last_step=-1): 15 | super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step) 16 | 17 | @property 18 | def last_step(self): 19 | """Use last_epoch for the step counter""" 20 | return self.last_epoch 21 | 22 | @last_step.setter 23 | def last_step(self, v): 24 | self.last_epoch = v 25 | 26 | 27 | class PolyLR(LambdaStepLR): 28 | """DeepLab learning rate policy""" 29 | 30 | def __init__(self, optimizer, max_iter, power=0.9, last_step=-1): 31 | super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step) 32 | 33 | 34 | class SquaredLR(LambdaStepLR): 35 | """ Used for SGD Lars""" 36 | 37 | def __init__(self, optimizer, max_iter, last_step=-1): 38 | super(SquaredLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**2, last_step) 39 | 40 | 41 | class ExpLR(LambdaStepLR): 42 | 43 | def __init__(self, optimizer, step_size, gamma=0.9, last_step=-1): 44 | # (0.9 ** 21.854) = 0.1, (0.95 ** 44.8906) = 0.1 45 | # To get 0.1 every N using gamma 0.9, N * log(0.9)/log(0.1) = 0.04575749 N 46 | # To get 0.1 every N using gamma g, g ** N = 0.1 -> N * log(g) = log(0.1) -> g = np.exp(log(0.1) / N) 47 | super(ExpLR, self).__init__(optimizer, lambda s: gamma**(s / step_size), last_step) 48 | 49 | 50 | def initialize_optimizer(params, config): 51 | assert config.optimizer in ['SGD', 'Adagrad', 'Adam', 'RMSProp', 'Rprop', 'SGDLars'] 52 | 53 | if config.optimizer == 'SGD': 54 | return SGD( 55 | params, 56 | lr=config.lr, 57 | momentum=config.sgd_momentum, 58 | dampening=config.sgd_dampening, 59 | weight_decay=config.weight_decay) 60 | elif config.optimizer == 'Adam': 61 | return Adam( 62 | params, 63 | lr=config.lr, 64 | betas=(config.adam_beta1, config.adam_beta2), 65 | weight_decay=config.weight_decay) 66 | else: 67 | logging.error('Optimizer type not supported') 68 | raise ValueError('Optimizer type not supported') 69 | 70 | 71 | def initialize_scheduler(optimizer, config, last_step=-1): 72 | if config.scheduler == 'StepLR': 73 | return StepLR( 74 | optimizer, step_size=config.step_size, gamma=config.step_gamma, last_epoch=last_step) 75 | elif config.scheduler == 'PolyLR': 76 | return PolyLR(optimizer, max_iter=config.max_iter, power=config.poly_power, last_step=last_step) 77 | elif config.scheduler == 'SquaredLR': 78 | return SquaredLR(optimizer, max_iter=config.max_iter, last_step=last_step) 79 | elif config.scheduler == 'ExpLR': 80 | return ExpLR( 81 | optimizer, step_size=config.exp_step_size, gamma=config.exp_gamma, last_step=last_step) 82 | else: 83 | logging.error('Scheduler not supported') 84 | -------------------------------------------------------------------------------- /downstream/semseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import models.resunet as resunet 7 | import models.res16unet as res16unet 8 | 9 | MODELS = [] 10 | 11 | 12 | def add_models(module): 13 | MODELS.extend([getattr(module, a) for a in dir(module) if 'Net' in a]) 14 | 15 | 16 | add_models(resunet) 17 | add_models(res16unet) 18 | 19 | def get_models(): 20 | '''Returns a tuple of sample models.''' 21 | return MODELS 22 | 23 | 24 | def get_wrappers(): 25 | return WRAPPERS 26 | 27 | 28 | def load_model(name): 29 | '''Creates and returns an instance of the model given its class name. 30 | ''' 31 | # Find the model class from its name 32 | all_models = get_models() 33 | mdict = {model.__name__: model for model in all_models} 34 | if name not in mdict: 35 | print('Invalid model index. Options are:') 36 | # Display a list of valid model names 37 | for model in all_models: 38 | print('\t* {}'.format(model.__name__)) 39 | return None 40 | NetClass = mdict[name] 41 | 42 | return NetClass 43 | 44 | 45 | def load_wrapper(name): 46 | '''Creates and returns an instance of the model given its class name. 47 | ''' 48 | # Find the model class from its name 49 | all_wrappers = get_wrappers() 50 | mdict = {wrapper.__name__: wrapper for wrapper in all_wrappers} 51 | if name not in mdict: 52 | print('Invalid wrapper index. Options are:') 53 | # Display a list of valid model names 54 | for wrapper in all_wrappers: 55 | print('\t* {}'.format(wrapper.__name__)) 56 | return None 57 | WrapperClass = mdict[name] 58 | 59 | return WrapperClass 60 | -------------------------------------------------------------------------------- /downstream/semseg/models/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import MinkowskiEngine as ME 7 | 8 | 9 | def get_norm(norm_type, num_feats, bn_momentum=0.05, D=-1): 10 | if norm_type == 'BN': 11 | return ME.MinkowskiBatchNorm(num_feats, momentum=bn_momentum) 12 | elif norm_type == 'IN': 13 | return ME.MinkowskiInstanceNorm(num_feats, dimension=D) 14 | else: 15 | raise ValueError(f'Type {norm_type}, not defined') 16 | -------------------------------------------------------------------------------- /downstream/semseg/models/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from MinkowskiEngine import MinkowskiNetwork 7 | 8 | 9 | class Model(MinkowskiNetwork): 10 | """ 11 | Base network for all sparse convnet 12 | 13 | By default, all networks are segmentation networks. 14 | """ 15 | OUT_PIXEL_DIST = -1 16 | 17 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 18 | super(Model, self).__init__(D) 19 | self.in_channels = in_channels 20 | self.out_channels = out_channels 21 | self.config = config 22 | 23 | 24 | class HighDimensionalModel(Model): 25 | """ 26 | Base network for all spatio (temporal) chromatic sparse convnet 27 | """ 28 | 29 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 30 | assert D > 4, "Num dimension smaller than 5" 31 | super(HighDimensionalModel, self).__init__(in_channels, out_channels, config, D, **kwargs) 32 | -------------------------------------------------------------------------------- /downstream/semseg/models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | -------------------------------------------------------------------------------- /downstream/semseg/models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | from models.modules.common import ConvType, NormType, get_norm, conv 9 | 10 | from MinkowskiEngine import MinkowskiReLU 11 | 12 | 13 | class BasicBlockBase(nn.Module): 14 | expansion = 1 15 | NORM_TYPE = NormType.BATCH_NORM 16 | 17 | def __init__(self, 18 | inplanes, 19 | planes, 20 | stride=1, 21 | dilation=1, 22 | downsample=None, 23 | conv_type=ConvType.HYPERCUBE, 24 | bn_momentum=0.1, 25 | D=3): 26 | super(BasicBlockBase, self).__init__() 27 | 28 | self.conv1 = conv( 29 | inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 30 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 31 | self.conv2 = conv( 32 | planes, 33 | planes, 34 | kernel_size=3, 35 | stride=1, 36 | dilation=dilation, 37 | bias=False, 38 | conv_type=conv_type, 39 | D=D) 40 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 41 | self.relu = MinkowskiReLU(inplace=True) 42 | self.downsample = downsample 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.norm1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.norm2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class BasicBlock(BasicBlockBase): 64 | NORM_TYPE = NormType.BATCH_NORM 65 | 66 | 67 | class BasicBlockIN(BasicBlockBase): 68 | NORM_TYPE = NormType.INSTANCE_NORM 69 | 70 | 71 | class BasicBlockINBN(BasicBlockBase): 72 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 73 | 74 | 75 | class BottleneckBase(nn.Module): 76 | expansion = 4 77 | NORM_TYPE = NormType.BATCH_NORM 78 | 79 | def __init__(self, 80 | inplanes, 81 | planes, 82 | stride=1, 83 | dilation=1, 84 | downsample=None, 85 | conv_type=ConvType.HYPERCUBE, 86 | bn_momentum=0.1, 87 | D=3): 88 | super(BottleneckBase, self).__init__() 89 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 90 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 91 | 92 | self.conv2 = conv( 93 | planes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 94 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 95 | 96 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 97 | self.norm3 = get_norm(self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum) 98 | 99 | self.relu = MinkowskiReLU(inplace=True) 100 | self.downsample = downsample 101 | 102 | def forward(self, x): 103 | residual = x 104 | 105 | out = self.conv1(x) 106 | out = self.norm1(out) 107 | out = self.relu(out) 108 | 109 | out = self.conv2(out) 110 | out = self.norm2(out) 111 | out = self.relu(out) 112 | 113 | out = self.conv3(out) 114 | out = self.norm3(out) 115 | 116 | if self.downsample is not None: 117 | residual = self.downsample(x) 118 | 119 | out += residual 120 | out = self.relu(out) 121 | 122 | return out 123 | 124 | 125 | class Bottleneck(BottleneckBase): 126 | NORM_TYPE = NormType.BATCH_NORM 127 | 128 | 129 | class BottleneckIN(BottleneckBase): 130 | NORM_TYPE = NormType.INSTANCE_NORM 131 | 132 | 133 | class BottleneckINBN(BottleneckBase): 134 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 135 | -------------------------------------------------------------------------------- /downstream/semseg/models/residual_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | from models.common import get_norm 9 | 10 | import MinkowskiEngine as ME 11 | import MinkowskiEngine.MinkowskiFunctional as MEF 12 | 13 | 14 | class BasicBlockBase(nn.Module): 15 | expansion = 1 16 | NORM_TYPE = 'BN' 17 | 18 | def __init__(self, 19 | inplanes, 20 | planes, 21 | stride=1, 22 | dilation=1, 23 | downsample=None, 24 | bn_momentum=0.1, 25 | D=3): 26 | super(BasicBlockBase, self).__init__() 27 | 28 | self.conv1 = ME.MinkowskiConvolution( 29 | inplanes, planes, kernel_size=3, stride=stride, dimension=D) 30 | self.norm1 = get_norm(self.NORM_TYPE, planes, bn_momentum=bn_momentum, D=D) 31 | self.conv2 = ME.MinkowskiConvolution( 32 | planes, 33 | planes, 34 | kernel_size=3, 35 | stride=1, 36 | dilation=dilation, 37 | has_bias=False, 38 | dimension=D) 39 | self.norm2 = get_norm(self.NORM_TYPE, planes, bn_momentum=bn_momentum, D=D) 40 | self.downsample = downsample 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.norm1(out) 47 | out = MEF.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.norm2(out) 51 | 52 | if self.downsample is not None: 53 | residual = self.downsample(x) 54 | 55 | out += residual 56 | out = MEF.relu(out) 57 | 58 | return out 59 | 60 | 61 | class BasicBlockBN(BasicBlockBase): 62 | NORM_TYPE = 'BN' 63 | 64 | 65 | class BasicBlockIN(BasicBlockBase): 66 | NORM_TYPE = 'IN' 67 | 68 | 69 | def get_block(norm_type, 70 | inplanes, 71 | planes, 72 | stride=1, 73 | dilation=1, 74 | downsample=None, 75 | bn_momentum=0.1, 76 | D=3): 77 | if norm_type == 'BN': 78 | return BasicBlockBN(inplanes, planes, stride, dilation, downsample, bn_momentum, D) 79 | elif norm_type == 'IN': 80 | return BasicBlockIN(inplanes, planes, stride, dilation, downsample, bn_momentum, D) 81 | else: 82 | raise ValueError(f'Type {norm_type}, not defined') 83 | -------------------------------------------------------------------------------- /downstream/semseg/models/wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import random 7 | from torch.nn import Module 8 | 9 | from MinkowskiEngine import SparseTensor 10 | 11 | 12 | class Wrapper(Module): 13 | """ 14 | Wrapper for the segmentation networks. 15 | """ 16 | OUT_PIXEL_DIST = -1 17 | 18 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 19 | super(Wrapper, self).__init__() 20 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 21 | 22 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 23 | raise NotImplementedError('Must initialize a model and a filter') 24 | 25 | def forward(self, x, coords, colors=None): 26 | soutput = self.model(x) 27 | 28 | # During training, make the network invariant to the filter 29 | if not self.training or random.random() < 0.5: 30 | # Filter requires the model to finish the forward pass 31 | wrapper_coords = self.filter.initialize_coords(self.model, coords, colors) 32 | finput = SparseTensor(soutput.F, wrapper_coords) 33 | soutput = self.filter(finput) 34 | 35 | return soutput 36 | -------------------------------------------------------------------------------- /downstream/semseg/scripts/train_scannet.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/bin/bash 7 | 8 | DATAPATH=~/data/scannet # Download ScanNet segmentation dataset and change the path here 9 | PRETRAIN="none" # For finetuning, use the checkpoint path here. 10 | MODEL=Res16UNet34C 11 | BATCH_SIZE=${BATCH_SIZE:-6} 12 | LOG_DIR=./tmp_dir_scannet 13 | 14 | python ddp_main.py \ 15 | train.train_phase=train \ 16 | train.is_train=True \ 17 | train.lenient_weight_loading=True \ 18 | train.stat_freq=1 \ 19 | train.val_freq=500 \ 20 | train.save_freq=500 \ 21 | net.model=${MODEL} \ 22 | net.conv1_kernel_size=3 \ 23 | augmentation.normalize_color=True \ 24 | data.dataset=ScannetVoxelization2cmDataset \ 25 | data.batch_size=$BATCH_SIZE \ 26 | data.num_workers=1 \ 27 | data.num_val_workers=1 \ 28 | data.scannet_path=${DATAPATH} \ 29 | data.return_transformation=False \ 30 | test.test_original_pointcloud=False \ 31 | test.save_prediction=False \ 32 | optimizer.lr=0.8 \ 33 | optimizer.scheduler=PolyLR \ 34 | optimizer.max_iter=60000 \ 35 | misc.log_dir=${LOG_DIR} \ 36 | distributed=local \ 37 | net.weights=${PRETRAIN} \ 38 | 39 | -------------------------------------------------------------------------------- /downstream/semseg/scripts/train_stanford3d.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/bin/bash 7 | 8 | DATAPATH=~/data/Stanford3D/ # Download Stanford3D dataset and change the path here 9 | PRETRAIN="none" 10 | 11 | MODEL=Res16UNet34C 12 | BATCH_SIZE=${BATCH_SIZE:-6} 13 | LOG_DIR=./tmp_dir_stanford 14 | 15 | python ddp_main.py \ 16 | train.train_phase=train \ 17 | train.is_train=True \ 18 | train.lenient_weight_loading=True \ 19 | train.stat_freq=1 \ 20 | train.val_freq=200 \ 21 | train.save_freq=100 \ 22 | net.model=${MODEL} \ 23 | net.conv1_kernel_size=3 \ 24 | data.dataset=StanfordArea5Dataset \ 25 | data.batch_size=$BATCH_SIZE \ 26 | data.voxel_size=0.05 \ 27 | data.num_workers=1 \ 28 | data.stanford3d_path=${DATAPATH} \ 29 | augmentation.data_aug_color_trans_ratio=0.05 \ 30 | augmentation.data_aug_color_jitter_std=0.005 \ 31 | optimizer.lr=0.1 \ 32 | optimizer.scheduler=PolyLR \ 33 | optimizer.max_iter=60000 \ 34 | misc.log_dir=${LOG_DIR} \ 35 | distributed=local \ 36 | net.weights=$PRETRAIN \ 37 | -------------------------------------------------------------------------------- /downstream/semseg/splits/scannet/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | test/scene0707_00.ply 2 | test/scene0708_00.ply 3 | test/scene0709_00.ply 4 | test/scene0710_00.ply 5 | test/scene0711_00.ply 6 | test/scene0712_00.ply 7 | test/scene0713_00.ply 8 | test/scene0714_00.ply 9 | test/scene0715_00.ply 10 | test/scene0716_00.ply 11 | test/scene0717_00.ply 12 | test/scene0718_00.ply 13 | test/scene0719_00.ply 14 | test/scene0720_00.ply 15 | test/scene0721_00.ply 16 | test/scene0722_00.ply 17 | test/scene0723_00.ply 18 | test/scene0724_00.ply 19 | test/scene0725_00.ply 20 | test/scene0726_00.ply 21 | test/scene0727_00.ply 22 | test/scene0728_00.ply 23 | test/scene0729_00.ply 24 | test/scene0730_00.ply 25 | test/scene0731_00.ply 26 | test/scene0732_00.ply 27 | test/scene0733_00.ply 28 | test/scene0734_00.ply 29 | test/scene0735_00.ply 30 | test/scene0736_00.ply 31 | test/scene0737_00.ply 32 | test/scene0738_00.ply 33 | test/scene0739_00.ply 34 | test/scene0740_00.ply 35 | test/scene0741_00.ply 36 | test/scene0742_00.ply 37 | test/scene0743_00.ply 38 | test/scene0744_00.ply 39 | test/scene0745_00.ply 40 | test/scene0746_00.ply 41 | test/scene0747_00.ply 42 | test/scene0748_00.ply 43 | test/scene0749_00.ply 44 | test/scene0750_00.ply 45 | test/scene0751_00.ply 46 | test/scene0752_00.ply 47 | test/scene0753_00.ply 48 | test/scene0754_00.ply 49 | test/scene0755_00.ply 50 | test/scene0756_00.ply 51 | test/scene0757_00.ply 52 | test/scene0758_00.ply 53 | test/scene0759_00.ply 54 | test/scene0760_00.ply 55 | test/scene0761_00.ply 56 | test/scene0762_00.ply 57 | test/scene0763_00.ply 58 | test/scene0764_00.ply 59 | test/scene0765_00.ply 60 | test/scene0766_00.ply 61 | test/scene0767_00.ply 62 | test/scene0768_00.ply 63 | test/scene0769_00.ply 64 | test/scene0770_00.ply 65 | test/scene0771_00.ply 66 | test/scene0772_00.ply 67 | test/scene0773_00.ply 68 | test/scene0774_00.ply 69 | test/scene0775_00.ply 70 | test/scene0776_00.ply 71 | test/scene0777_00.ply 72 | test/scene0778_00.ply 73 | test/scene0779_00.ply 74 | test/scene0780_00.ply 75 | test/scene0781_00.ply 76 | test/scene0782_00.ply 77 | test/scene0783_00.ply 78 | test/scene0784_00.ply 79 | test/scene0785_00.ply 80 | test/scene0786_00.ply 81 | test/scene0787_00.ply 82 | test/scene0788_00.ply 83 | test/scene0789_00.ply 84 | test/scene0790_00.ply 85 | test/scene0791_00.ply 86 | test/scene0792_00.ply 87 | test/scene0793_00.ply 88 | test/scene0794_00.ply 89 | test/scene0795_00.ply 90 | test/scene0796_00.ply 91 | test/scene0797_00.ply 92 | test/scene0798_00.ply 93 | test/scene0799_00.ply 94 | test/scene0800_00.ply 95 | test/scene0801_00.ply 96 | test/scene0802_00.ply 97 | test/scene0803_00.ply 98 | test/scene0804_00.ply 99 | test/scene0805_00.ply 100 | test/scene0806_00.ply 101 | -------------------------------------------------------------------------------- /downstream/semseg/splits/scannet_old/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00_vh_clean_2.ply 2 | scene0708_00_vh_clean_2.ply 3 | scene0709_00_vh_clean_2.ply 4 | scene0710_00_vh_clean_2.ply 5 | scene0711_00_vh_clean_2.ply 6 | scene0712_00_vh_clean_2.ply 7 | scene0713_00_vh_clean_2.ply 8 | scene0714_00_vh_clean_2.ply 9 | scene0715_00_vh_clean_2.ply 10 | scene0716_00_vh_clean_2.ply 11 | scene0717_00_vh_clean_2.ply 12 | scene0718_00_vh_clean_2.ply 13 | scene0719_00_vh_clean_2.ply 14 | scene0720_00_vh_clean_2.ply 15 | scene0721_00_vh_clean_2.ply 16 | scene0722_00_vh_clean_2.ply 17 | scene0723_00_vh_clean_2.ply 18 | scene0724_00_vh_clean_2.ply 19 | scene0725_00_vh_clean_2.ply 20 | scene0726_00_vh_clean_2.ply 21 | scene0727_00_vh_clean_2.ply 22 | scene0728_00_vh_clean_2.ply 23 | scene0729_00_vh_clean_2.ply 24 | scene0730_00_vh_clean_2.ply 25 | scene0731_00_vh_clean_2.ply 26 | scene0732_00_vh_clean_2.ply 27 | scene0733_00_vh_clean_2.ply 28 | scene0734_00_vh_clean_2.ply 29 | scene0735_00_vh_clean_2.ply 30 | scene0736_00_vh_clean_2.ply 31 | scene0737_00_vh_clean_2.ply 32 | scene0738_00_vh_clean_2.ply 33 | scene0739_00_vh_clean_2.ply 34 | scene0740_00_vh_clean_2.ply 35 | scene0741_00_vh_clean_2.ply 36 | scene0742_00_vh_clean_2.ply 37 | scene0743_00_vh_clean_2.ply 38 | scene0744_00_vh_clean_2.ply 39 | scene0745_00_vh_clean_2.ply 40 | scene0746_00_vh_clean_2.ply 41 | scene0747_00_vh_clean_2.ply 42 | scene0748_00_vh_clean_2.ply 43 | scene0749_00_vh_clean_2.ply 44 | scene0750_00_vh_clean_2.ply 45 | scene0751_00_vh_clean_2.ply 46 | scene0752_00_vh_clean_2.ply 47 | scene0753_00_vh_clean_2.ply 48 | scene0754_00_vh_clean_2.ply 49 | scene0755_00_vh_clean_2.ply 50 | scene0756_00_vh_clean_2.ply 51 | scene0757_00_vh_clean_2.ply 52 | scene0758_00_vh_clean_2.ply 53 | scene0759_00_vh_clean_2.ply 54 | scene0760_00_vh_clean_2.ply 55 | scene0761_00_vh_clean_2.ply 56 | scene0762_00_vh_clean_2.ply 57 | scene0763_00_vh_clean_2.ply 58 | scene0764_00_vh_clean_2.ply 59 | scene0765_00_vh_clean_2.ply 60 | scene0766_00_vh_clean_2.ply 61 | scene0767_00_vh_clean_2.ply 62 | scene0768_00_vh_clean_2.ply 63 | scene0769_00_vh_clean_2.ply 64 | scene0770_00_vh_clean_2.ply 65 | scene0771_00_vh_clean_2.ply 66 | scene0772_00_vh_clean_2.ply 67 | scene0773_00_vh_clean_2.ply 68 | scene0774_00_vh_clean_2.ply 69 | scene0775_00_vh_clean_2.ply 70 | scene0776_00_vh_clean_2.ply 71 | scene0777_00_vh_clean_2.ply 72 | scene0778_00_vh_clean_2.ply 73 | scene0779_00_vh_clean_2.ply 74 | scene0780_00_vh_clean_2.ply 75 | scene0781_00_vh_clean_2.ply 76 | scene0782_00_vh_clean_2.ply 77 | scene0783_00_vh_clean_2.ply 78 | scene0784_00_vh_clean_2.ply 79 | scene0785_00_vh_clean_2.ply 80 | scene0786_00_vh_clean_2.ply 81 | scene0787_00_vh_clean_2.ply 82 | scene0788_00_vh_clean_2.ply 83 | scene0789_00_vh_clean_2.ply 84 | scene0790_00_vh_clean_2.ply 85 | scene0791_00_vh_clean_2.ply 86 | scene0792_00_vh_clean_2.ply 87 | scene0793_00_vh_clean_2.ply 88 | scene0794_00_vh_clean_2.ply 89 | scene0795_00_vh_clean_2.ply 90 | scene0796_00_vh_clean_2.ply 91 | scene0797_00_vh_clean_2.ply 92 | scene0798_00_vh_clean_2.ply 93 | scene0799_00_vh_clean_2.ply 94 | scene0800_00_vh_clean_2.ply 95 | scene0801_00_vh_clean_2.ply 96 | scene0802_00_vh_clean_2.ply 97 | scene0803_00_vh_clean_2.ply 98 | scene0804_00_vh_clean_2.ply 99 | scene0805_00_vh_clean_2.ply 100 | scene0806_00_vh_clean_2.ply 101 | -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area1.txt: -------------------------------------------------------------------------------- 1 | Area_1/office_8.ply 2 | Area_1/office_3.ply 3 | Area_1/office_24.ply 4 | Area_1/hallway_3.ply 5 | Area_1/office_9.ply 6 | Area_1/pantry_1.ply 7 | Area_1/office_2.ply 8 | Area_1/copyRoom_1.ply 9 | Area_1/office_30.ply 10 | Area_1/office_5.ply 11 | Area_1/office_13.ply 12 | Area_1/office_21.ply 13 | Area_1/office_25.ply 14 | Area_1/hallway_6.ply 15 | Area_1/office_29.ply 16 | Area_1/hallway_8.ply 17 | Area_1/office_4.ply 18 | Area_1/office_23.ply 19 | Area_1/office_7.ply 20 | Area_1/hallway_5.ply 21 | Area_1/office_20.ply 22 | Area_1/office_17.ply 23 | Area_1/office_18.ply 24 | Area_1/office_15.ply 25 | Area_1/hallway_1.ply 26 | Area_1/hallway_4.ply 27 | Area_1/hallway_7.ply 28 | Area_1/office_6.ply 29 | Area_1/office_14.ply 30 | Area_1/office_27.ply 31 | Area_1/office_22.ply 32 | Area_1/hallway_2.ply 33 | Area_1/WC_1.ply 34 | Area_1/office_12.ply 35 | Area_1/office_16.ply 36 | Area_1/conferenceRoom_2.ply 37 | Area_1/office_26.ply 38 | Area_1/office_11.ply 39 | Area_1/office_1.ply 40 | Area_1/office_31.ply 41 | Area_1/office_10.ply 42 | Area_1/office_19.ply 43 | Area_1/office_28.ply 44 | Area_1/conferenceRoom_1.ply -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area2.txt: -------------------------------------------------------------------------------- 1 | Area_2/auditorium_2.ply 2 | Area_2/storage_6.ply 3 | Area_2/office_8.ply 4 | Area_2/office_3.ply 5 | Area_2/storage_8.ply 6 | Area_2/hallway_3.ply 7 | Area_2/office_9.ply 8 | Area_2/auditorium_1.ply 9 | Area_2/storage_2.ply 10 | Area_2/office_2.ply 11 | Area_2/office_5.ply 12 | Area_2/office_13.ply 13 | Area_2/hallway_9.ply 14 | Area_2/hallway_6.ply 15 | Area_2/hallway_8.ply 16 | Area_2/storage_5.ply 17 | Area_2/office_4.ply 18 | Area_2/office_7.ply 19 | Area_2/hallway_5.ply 20 | Area_2/hallway_12.ply 21 | Area_2/storage_4.ply 22 | Area_2/hallway_1.ply 23 | Area_2/hallway_11.ply 24 | Area_2/hallway_4.ply 25 | Area_2/hallway_7.ply 26 | Area_2/office_6.ply 27 | Area_2/office_14.ply 28 | Area_2/hallway_10.ply 29 | Area_2/hallway_2.ply 30 | Area_2/WC_1.ply 31 | Area_2/office_12.ply 32 | Area_2/storage_3.ply 33 | Area_2/office_11.ply 34 | Area_2/storage_7.ply 35 | Area_2/WC_2.ply 36 | Area_2/office_1.ply 37 | Area_2/office_10.ply 38 | Area_2/storage_1.ply 39 | Area_2/storage_9.ply 40 | Area_2/conferenceRoom_1.ply -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area3.txt: -------------------------------------------------------------------------------- 1 | Area_3/office_8.ply 2 | Area_3/office_3.ply 3 | Area_3/hallway_3.ply 4 | Area_3/office_9.ply 5 | Area_3/storage_2.ply 6 | Area_3/office_2.ply 7 | Area_3/office_5.ply 8 | Area_3/lounge_2.ply 9 | Area_3/hallway_6.ply 10 | Area_3/office_4.ply 11 | Area_3/office_7.ply 12 | Area_3/hallway_5.ply 13 | Area_3/hallway_1.ply 14 | Area_3/hallway_4.ply 15 | Area_3/office_6.ply 16 | Area_3/lounge_1.ply 17 | Area_3/hallway_2.ply 18 | Area_3/WC_1.ply 19 | Area_3/WC_2.ply 20 | Area_3/office_1.ply 21 | Area_3/office_10.ply 22 | Area_3/storage_1.ply 23 | Area_3/conferenceRoom_1.ply -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area4.txt: -------------------------------------------------------------------------------- 1 | Area_4/office_8.ply 2 | Area_4/WC_3.ply 3 | Area_4/office_3.ply 4 | Area_4/hallway_13.ply 5 | Area_4/hallway_3.ply 6 | Area_4/office_9.ply 7 | Area_4/storage_2.ply 8 | Area_4/office_2.ply 9 | Area_4/office_5.ply 10 | Area_4/office_13.ply 11 | Area_4/office_21.ply 12 | Area_4/hallway_9.ply 13 | Area_4/hallway_6.ply 14 | Area_4/hallway_8.ply 15 | Area_4/lobby_2.ply 16 | Area_4/office_4.ply 17 | Area_4/office_7.ply 18 | Area_4/hallway_5.ply 19 | Area_4/hallway_12.ply 20 | Area_4/office_20.ply 21 | Area_4/office_17.ply 22 | Area_4/office_18.ply 23 | Area_4/office_15.ply 24 | Area_4/storage_4.ply 25 | Area_4/hallway_1.ply 26 | Area_4/hallway_11.ply 27 | Area_4/hallway_4.ply 28 | Area_4/hallway_7.ply 29 | Area_4/office_6.ply 30 | Area_4/office_14.ply 31 | Area_4/conferenceRoom_3.ply 32 | Area_4/hallway_10.ply 33 | Area_4/office_22.ply 34 | Area_4/hallway_2.ply 35 | Area_4/WC_1.ply 36 | Area_4/office_12.ply 37 | Area_4/storage_3.ply 38 | Area_4/office_16.ply 39 | Area_4/conferenceRoom_2.ply 40 | Area_4/hallway_14.ply 41 | Area_4/WC_4.ply 42 | Area_4/office_11.ply 43 | Area_4/WC_2.ply 44 | Area_4/office_1.ply 45 | Area_4/office_10.ply 46 | Area_4/storage_1.ply 47 | Area_4/office_19.ply 48 | Area_4/conferenceRoom_1.ply 49 | Area_4/lobby_1.ply -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area5.txt: -------------------------------------------------------------------------------- 1 | Area_5/office_8.ply 2 | Area_5/office_3.ply 3 | Area_5/hallway_13.ply 4 | Area_5/office_41.ply 5 | Area_5/office_24.ply 6 | Area_5/hallway_3.ply 7 | Area_5/office_9.ply 8 | Area_5/pantry_1.ply 9 | Area_5/storage_2.ply 10 | Area_5/office_2.ply 11 | Area_5/office_30.ply 12 | Area_5/office_39.ply 13 | Area_5/office_5.ply 14 | Area_5/office_13.ply 15 | Area_5/office_21.ply 16 | Area_5/office_25.ply 17 | Area_5/office_33.ply 18 | Area_5/hallway_9.ply 19 | Area_5/hallway_6.ply 20 | Area_5/office_29.ply 21 | Area_5/hallway_8.ply 22 | Area_5/office_4.ply 23 | Area_5/office_23.ply 24 | Area_5/office_40.ply 25 | Area_5/office_7.ply 26 | Area_5/hallway_5.ply 27 | Area_5/hallway_12.ply 28 | Area_5/hallway_15.ply 29 | Area_5/office_20.ply 30 | Area_5/office_17.ply 31 | Area_5/office_18.ply 32 | Area_5/office_15.ply 33 | Area_5/storage_4.ply 34 | Area_5/hallway_1.ply 35 | Area_5/hallway_11.ply 36 | Area_5/hallway_4.ply 37 | Area_5/office_35.ply 38 | Area_5/hallway_7.ply 39 | Area_5/office_6.ply 40 | Area_5/office_37.ply 41 | Area_5/office_14.ply 42 | Area_5/office_27.ply 43 | Area_5/office_34.ply 44 | Area_5/conferenceRoom_3.ply 45 | Area_5/hallway_10.ply 46 | Area_5/office_22.ply 47 | Area_5/office_42.ply 48 | Area_5/hallway_2.ply 49 | Area_5/WC_1.ply 50 | Area_5/office_38.ply 51 | Area_5/office_12.ply 52 | Area_5/storage_3.ply 53 | Area_5/office_16.ply 54 | Area_5/conferenceRoom_2.ply 55 | Area_5/office_36.ply 56 | Area_5/hallway_14.ply 57 | Area_5/office_26.ply 58 | Area_5/office_11.ply 59 | Area_5/WC_2.ply 60 | Area_5/office_1.ply 61 | Area_5/office_31.ply 62 | Area_5/office_10.ply 63 | Area_5/office_32.ply 64 | Area_5/storage_1.ply 65 | Area_5/office_19.ply 66 | Area_5/office_28.ply 67 | Area_5/conferenceRoom_1.ply 68 | Area_5/lobby_1.ply -------------------------------------------------------------------------------- /downstream/semseg/splits/stanford/area6.txt: -------------------------------------------------------------------------------- 1 | Area_6/office_8.ply 2 | Area_6/office_3.ply 3 | Area_6/office_24.ply 4 | Area_6/hallway_3.ply 5 | Area_6/office_9.ply 6 | Area_6/pantry_1.ply 7 | Area_6/office_2.ply 8 | Area_6/copyRoom_1.ply 9 | Area_6/office_30.ply 10 | Area_6/office_5.ply 11 | Area_6/office_13.ply 12 | Area_6/openspace_1.ply 13 | Area_6/office_21.ply 14 | Area_6/office_25.ply 15 | Area_6/office_33.ply 16 | Area_6/hallway_6.ply 17 | Area_6/office_29.ply 18 | Area_6/office_4.ply 19 | Area_6/office_23.ply 20 | Area_6/office_7.ply 21 | Area_6/hallway_5.ply 22 | Area_6/office_20.ply 23 | Area_6/office_17.ply 24 | Area_6/office_18.ply 25 | Area_6/office_15.ply 26 | Area_6/hallway_1.ply 27 | Area_6/hallway_4.ply 28 | Area_6/office_35.ply 29 | Area_6/office_6.ply 30 | Area_6/lounge_1.ply 31 | Area_6/office_37.ply 32 | Area_6/office_14.ply 33 | Area_6/office_27.ply 34 | Area_6/office_34.ply 35 | Area_6/office_22.ply 36 | Area_6/hallway_2.ply 37 | Area_6/office_12.ply 38 | Area_6/office_16.ply 39 | Area_6/office_36.ply 40 | Area_6/office_26.ply 41 | Area_6/office_11.ply 42 | Area_6/office_1.ply 43 | Area_6/office_31.ply 44 | Area_6/office_10.ply 45 | Area_6/office_32.ply 46 | Area_6/office_19.ply 47 | Area_6/office_28.ply 48 | Area_6/conferenceRoom_1.ply -------------------------------------------------------------------------------- /downstream/votenet_det_new/config/default.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra/launcher: submitit_slurm 3 | - hydra/hydra_logging: colorlog 4 | 5 | net: 6 | model: votenet 7 | is_train: True 8 | backbone: pointnet2 # sparseconv 9 | weights: 10 | num_target: 256 11 | vote_factor: 1 12 | cluster_sampling: vote_fps 13 | 14 | data: 15 | dataset: sunrgbd 16 | data_ratio: 1.0 17 | num_points: 20000 18 | num_workers: 4 19 | batch_size: 8 20 | no_height: False 21 | use_color: False 22 | use_sunrgbd_v2: False 23 | overwrite: False 24 | dump_results: False 25 | voxelization: False 26 | voxel_size: 0.05 27 | shuffle_datast: False 28 | 29 | optimizer: 30 | max_epoch: 180 31 | learning_rate: 0.001 32 | lr_decay_steps: [80, 120, 160] 33 | lr_decay_rates: [0.1, 0.1, 0.1] 34 | weight_decay: 0 35 | bn_decay_step: 20 36 | bn_decay_rate: 0.5 37 | 38 | test: 39 | use_3d_nms: False 40 | use_cls_nms: False 41 | use_old_type_nms: False 42 | per_class_proposal: False 43 | nms_iou: 0.25 44 | conf_thresh: 0.05 45 | faster_eval: False, #Faster evaluation by skippling empty bounding box removal. 46 | ap_iou_thresholds: [0.25, 0.5] 47 | 48 | misc: 49 | log_dir: log 50 | 51 | hydra: 52 | run: 53 | dir: ${misc.log_dir} 54 | sweep: 55 | dir: ${misc.log_dir} 56 | launcher: 57 | partition: dev 58 | submitit_folder: ${hydra.sweep.dir}/.submitit/%j 59 | name: ${hydra.job.name} 60 | timeout_min: 2000 61 | cpus_per_task: 20 62 | gpus_per_node: 1 63 | tasks_per_node: 1 64 | mem_gb: 256 65 | nodes: 1 66 | constraint: volta32gb 67 | max_num_timeout: 3 68 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare ScanNet Data 2 | 3 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Move/link the `scans` folder such that under `scans` there should be folders with names such as `scene0001_01`. 4 | 5 | 2. Extract point clouds and annotations (semantic seg, instance seg etc.) by running `python batch_load_scannet_data.py`, which will create a folder named `scannet_train_detection_data` here. 6 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/batch_load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | 9 | Usage example: python ./batch_load_scannet_data.py 10 | """ 11 | import os 12 | import sys 13 | import datetime 14 | import numpy as np 15 | from load_scannet_data import export 16 | import pdb 17 | 18 | SCANNET_DIR = 'scans' 19 | TRAIN_SCAN_NAMES = [line.rstrip() for line in open('meta_data/scannet_train.txt')] 20 | LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv' 21 | DONOTCARE_CLASS_IDS = np.array([]) 22 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 23 | MAX_NUM_POINT = 50000 24 | OUTPUT_FOLDER = './scannet_train_detection_data' 25 | 26 | def export_one_scan(scan_name, output_filename_prefix): 27 | mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply') 28 | agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json') 29 | seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json') 30 | meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans. 31 | mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = \ 32 | export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None) 33 | 34 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 35 | mesh_vertices = mesh_vertices[mask,:] 36 | semantic_labels = semantic_labels[mask] 37 | instance_labels = instance_labels[mask] 38 | 39 | num_instances = len(np.unique(instance_labels)) 40 | print('Num of instances: ', num_instances) 41 | 42 | bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS) 43 | instance_bboxes = instance_bboxes[bbox_mask,:] 44 | print('Num of care instances: ', instance_bboxes.shape[0]) 45 | 46 | N = mesh_vertices.shape[0] 47 | if N > MAX_NUM_POINT: 48 | choices = np.random.choice(N, MAX_NUM_POINT, replace=False) 49 | mesh_vertices = mesh_vertices[choices, :] 50 | semantic_labels = semantic_labels[choices] 51 | instance_labels = instance_labels[choices] 52 | 53 | np.save(output_filename_prefix+'_vert.npy', mesh_vertices) 54 | np.save(output_filename_prefix+'_sem_label.npy', semantic_labels) 55 | np.save(output_filename_prefix+'_ins_label.npy', instance_labels) 56 | np.save(output_filename_prefix+'_bbox.npy', instance_bboxes) 57 | 58 | def batch_export(): 59 | if not os.path.exists(OUTPUT_FOLDER): 60 | print('Creating new data folder: {}'.format(OUTPUT_FOLDER)) 61 | os.mkdir(OUTPUT_FOLDER) 62 | 63 | for scan_name in TRAIN_SCAN_NAMES: 64 | print('-'*20+'begin') 65 | print(datetime.datetime.now()) 66 | print(scan_name) 67 | output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 68 | if os.path.isfile(output_filename_prefix+'_vert.npy'): 69 | print('File already exists. skipping.') 70 | print('-'*20+'done') 71 | continue 72 | try: 73 | export_one_scan(scan_name, output_filename_prefix) 74 | except: 75 | print('Failed export scan: %s'%(scan_name)) 76 | print('-'*20+'done') 77 | 78 | if __name__=='__main__': 79 | batch_export() 80 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/data_viz.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import sys 7 | import os 8 | 9 | BASE_DIR = os.path.dirname(__file__) 10 | sys.path.append(BASE_DIR) 11 | 12 | import numpy as np 13 | import pc_util 14 | 15 | scene_name = 'scannet_train_detection_data/scene0002_00' 16 | output_folder = 'data_viz_dump' 17 | 18 | data = np.load(scene_name+'_vert.npy') 19 | scene_points = data[:,0:3] 20 | colors = data[:,3:] 21 | instance_labels = np.load(scene_name+'_ins_label.npy') 22 | semantic_labels = np.load(scene_name+'_sem_label.npy') 23 | instance_bboxes = np.load(scene_name+'_bbox.npy') 24 | 25 | print(np.unique(instance_labels)) 26 | print(np.unique(semantic_labels)) 27 | input() 28 | if not os.path.exists(output_folder): 29 | os.mkdir(output_folder) 30 | 31 | # Write scene as OBJ file for visualization 32 | pc_util.write_ply_rgb(scene_points, colors, os.path.join(output_folder, 'scene.obj')) 33 | pc_util.write_ply_color(scene_points, instance_labels, os.path.join(output_folder, 'scene_instance.obj')) 34 | pc_util.write_ply_color(scene_points, semantic_labels, os.path.join(output_folder, 'scene_semantic.obj')) 35 | 36 | from model_util_scannet import ScannetDatasetConfig 37 | DC = ScannetDatasetConfig() 38 | print(instance_bboxes.shape) 39 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/downstream/votenet_det_new/lib/datasets/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 313 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/model_util_scannet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | from box_util import get_3d_box 14 | 15 | class ScannetDatasetConfig(object): 16 | def __init__(self): 17 | self.num_class = 18 18 | self.num_heading_bin = 1 19 | self.num_size_cluster = 18 20 | 21 | self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5, 22 | 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11, 23 | 'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17} 24 | self.class2type = {self.type2class[t]:t for t in self.type2class} 25 | self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 26 | self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))} 27 | self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0'] 28 | self.type_mean_size = {} 29 | for i in range(self.num_size_cluster): 30 | self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:] 31 | 32 | def angle2class(self, angle): 33 | ''' Convert continuous angle to discrete class 34 | [optinal] also small regression number from 35 | class center angle to current angle. 36 | 37 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 38 | return is class of int32 of 0,1,...,N-1 and a number such that 39 | class*(2pi/N) + number = angle 40 | 41 | NOT USED. 42 | ''' 43 | assert(False) 44 | 45 | def class2angle(self, pred_cls, residual, to_label_format=True): 46 | ''' Inverse function to angle2class. 47 | 48 | As ScanNet only has axis-alined boxes so angles are always 0. ''' 49 | return 0 50 | 51 | def size2class(self, size, type_name): 52 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 53 | size_class = self.type2class[type_name] 54 | size_residual = size - self.type_mean_size[type_name] 55 | return size_class, size_residual 56 | 57 | def class2size(self, pred_cls, residual): 58 | ''' Inverse function to size2class ''' 59 | return self.mean_size_arr[pred_cls, :] + residual 60 | 61 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 62 | heading_angle = self.class2angle(heading_class, heading_residual) 63 | box_size = self.class2size(int(size_class), size_residual) 64 | obb = np.zeros((7,)) 65 | obb[0:3] = center 66 | obb[3:6] = box_size 67 | obb[6] = heading_angle*-1 68 | return obb 69 | 70 | def rotate_aligned_boxes(input_boxes, rot_mat): 71 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 72 | new_centers = np.dot(centers, np.transpose(rot_mat)) 73 | 74 | dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0 75 | new_x = np.zeros((dx.shape[0], 4)) 76 | new_y = np.zeros((dx.shape[0], 4)) 77 | 78 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 79 | crnrs = np.zeros((dx.shape[0], 3)) 80 | crnrs[:,0] = crnr[0]*dx 81 | crnrs[:,1] = crnr[1]*dy 82 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 83 | new_x[:,i] = crnrs[:,0] 84 | new_y[:,i] = crnrs[:,1] 85 | 86 | 87 | new_dx = 2.0*np.max(new_x, 1) 88 | new_dy = 2.0*np.max(new_y, 1) 89 | new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1) 90 | 91 | return np.concatenate([new_centers, new_lengths], axis=1) 92 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts ''' 7 | import os 8 | import sys 9 | import json 10 | import csv 11 | 12 | try: 13 | import numpy as np 14 | except: 15 | print("Failed to import numpy package.") 16 | sys.exit(-1) 17 | 18 | try: 19 | from plyfile import PlyData, PlyElement 20 | except: 21 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 22 | print("pip install plyfile") 23 | sys.exit(-1) 24 | 25 | def represents_int(s): 26 | ''' if string s represents an int. ''' 27 | try: 28 | int(s) 29 | return True 30 | except ValueError: 31 | return False 32 | 33 | 34 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 35 | assert os.path.isfile(filename) 36 | mapping = dict() 37 | with open(filename) as csvfile: 38 | reader = csv.DictReader(csvfile, delimiter='\t') 39 | for row in reader: 40 | mapping[row[label_from]] = int(row[label_to]) 41 | if represents_int(list(mapping.keys())[0]): 42 | mapping = {int(k):v for k,v in mapping.items()} 43 | return mapping 44 | 45 | def read_mesh_vertices(filename): 46 | """ read XYZ for each vertex. 47 | """ 48 | assert os.path.isfile(filename) 49 | with open(filename, 'rb') as f: 50 | plydata = PlyData.read(f) 51 | num_verts = plydata['vertex'].count 52 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 53 | vertices[:,0] = plydata['vertex'].data['x'] 54 | vertices[:,1] = plydata['vertex'].data['y'] 55 | vertices[:,2] = plydata['vertex'].data['z'] 56 | return vertices 57 | 58 | def read_mesh_vertices_rgb(filename): 59 | """ read XYZ RGB for each vertex. 60 | Note: RGB values are in 0-255 61 | """ 62 | assert os.path.isfile(filename) 63 | with open(filename, 'rb') as f: 64 | plydata = PlyData.read(f) 65 | num_verts = plydata['vertex'].count 66 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 67 | vertices[:,0] = plydata['vertex'].data['x'] 68 | vertices[:,1] = plydata['vertex'].data['y'] 69 | vertices[:,2] = plydata['vertex'].data['z'] 70 | vertices[:,3] = plydata['vertex'].data['red'] 71 | vertices[:,4] = plydata['vertex'].data['green'] 72 | vertices[:,5] = plydata['vertex'].data['blue'] 73 | return vertices 74 | 75 | 76 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/OFFICIAL_SUNRGBD/SUNRGBD: -------------------------------------------------------------------------------- 1 | /private/home/jgu/data/3d_ssl2/SUN_RGBD/SUNRGBD -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/OFFICIAL_SUNRGBD/SUNRGBDMeta2DBB_v2.mat: -------------------------------------------------------------------------------- 1 | /private/home/jgu/data/3d_ssl2/SUN_RGBD/SUNRGBDMeta2DBB_v2.mat -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat: -------------------------------------------------------------------------------- 1 | /private/home/jgu/data/3d_ssl2/SUN_RGBD/SUNRGBDMeta3DBB_v2.mat -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/OFFICIAL_SUNRGBD/SUNRGBDtoolbox: -------------------------------------------------------------------------------- 1 | /private/home/jgu/data/3d_ssl2/SUN_RGBD/SUNRGBDtoolbox -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/OFFICIAL_SUNRGBD/__MACOSX: -------------------------------------------------------------------------------- 1 | /private/home/jgu/data/3d_ssl2/SUN_RGBD/__MACOSX -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare SUN RGB-D Data 2 | 3 | 1. Download SUNRGBD v2 data [HERE](http://rgbd.cs.princeton.edu/data/) (SUNRGBD.zip, SUNRGBDMeta2DBB_v2.mat, SUNRGBDMeta3DBB_v2.mat) and the toolkits (SUNRGBDtoolbox.zip). Move all the downloaded files under OFFICIAL_SUNRGBD. Unzip the zip files. 4 | 5 | 2. Extract point clouds and annotations (class, v2 2D -- xmin,ymin,xmax,ymax, and 3D bounding boxes -- centroids, size, 2D heading) by running `extract_split.m`, `extract_rgbd_data_v2.m` and `extract_rgbd_data_v1.m` under the `matlab` folder. 6 | 7 | 3. Prepare data by running `python sunrgbd_data.py --gen_v1_data` 8 | 9 | You can also examine and visualize the data with `python sunrgbd_data.py --viz` and use MeshLab to view the generated PLY files at `data_viz_dump`. 10 | 11 | NOTE: SUNRGBDtoolbox.zip should have MD5 hash `18d22e1761d36352f37232cba102f91f` (you can check the hash with `md5 SUNRGBDtoolbox.zip` on Mac OS or `md5sum SUNRGBDtoolbox.zip` on Linux) 12 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/matlab/extract_rgbd_data_v1.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Extract using V1 labels. 11 | % 12 | % Author: Charles R. Qi 13 | % 14 | clear; close all; clc; 15 | addpath(genpath('.')) 16 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 17 | %% V1 2D&3D BB and Seg masks 18 | load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/Metadata/SUNRGBDMeta.mat') 19 | % load('./Metadata/SUNRGBD2Dseg.mat') 20 | 21 | %% Create folders 22 | det_label_folder = '../sunrgbd_trainval/label_v1/'; 23 | mkdir(det_label_folder); 24 | %% Read 25 | for imageId = 1:10335 26 | imageId 27 | try 28 | data = SUNRGBDMeta(imageId); 29 | data.depthpath(1:16) = ''; 30 | data.depthpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.depthpath); 31 | data.rgbpath(1:16) = ''; 32 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.rgbpath); 33 | 34 | % MAT files are 3x smaller than TXT files. In Python we can use 35 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 36 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 37 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 38 | 39 | % Write 2D and 3D box label 40 | data2d = data; 41 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 42 | for j = 1:length(data.groundtruth3DBB) 43 | centroid = data.groundtruth3DBB(j).centroid; 44 | classname = data.groundtruth3DBB(j).classname; 45 | orientation = data.groundtruth3DBB(j).orientation; 46 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 47 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 48 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 49 | end 50 | fclose(fid); 51 | 52 | catch 53 | end 54 | 55 | end 56 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/matlab/extract_rgbd_data_v2.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Compared to extract_rgbd_data.m in frustum_pointents, use v2 2D and 3D 11 | % bboxes. 12 | % 13 | % Author: Charles R. Qi 14 | % 15 | clear; close all; clc; 16 | addpath(genpath('.')) 17 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/readData') 18 | %% V1 2D&3D BB and Seg masks 19 | % load('./Metadata/SUNRGBDMeta.mat') 20 | % load('./Metadata/SUNRGBD2Dseg.mat') 21 | 22 | %% V2 3DBB annotations (overwrites SUNRGBDMeta) 23 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 24 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta2DBB_v2.mat'); 25 | %% Create folders 26 | depth_folder = '../sunrgbd_trainval/depth/'; 27 | image_folder = '../sunrgbd_trainval/image/'; 28 | calib_folder = '../sunrgbd_trainval/calib/'; 29 | det_label_folder = '../sunrgbd_trainval/label/'; 30 | seg_label_folder = '../sunrgbd_trainval/seg_label/'; 31 | mkdir(depth_folder); 32 | mkdir(image_folder); 33 | mkdir(calib_folder); 34 | mkdir(det_label_folder); 35 | mkdir(seg_label_folder); 36 | %% Read 37 | parfor imageId = 1:10335 38 | imageId 39 | try 40 | data = SUNRGBDMeta(imageId); 41 | data.depthpath(1:16) = ''; 42 | data.depthpath = strcat('../OFFICIAL_SUNRGBD', data.depthpath); 43 | data.rgbpath(1:16) = ''; 44 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD', data.rgbpath); 45 | 46 | % Write point cloud in depth map 47 | [rgb,points3d,depthInpaint,imsize]=read3dPoints(data); 48 | rgb(isnan(points3d(:,1)),:) = []; 49 | points3d(isnan(points3d(:,1)),:) = []; 50 | points3d_rgb = [points3d, rgb]; 51 | 52 | % MAT files are 3x smaller than TXT files. In Python we can use 53 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 54 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 55 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 56 | parsave(strcat(depth_folder, mat_filename), points3d_rgb); 57 | 58 | % Write images 59 | copyfile(data.rgbpath, sprintf('%s/%06d.jpg', image_folder, imageId)); 60 | 61 | % Write calibration 62 | dlmwrite(strcat(calib_folder, txt_filename), data.Rtilt(:)', 'delimiter', ' '); 63 | dlmwrite(strcat(calib_folder, txt_filename), data.K(:)', 'delimiter', ' ', '-append'); 64 | 65 | % Write 2D and 3D box label 66 | data2d = SUNRGBDMeta2DBB(imageId); 67 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 68 | for j = 1:length(data.groundtruth3DBB) 69 | centroid = data.groundtruth3DBB(j).centroid; 70 | classname = data.groundtruth3DBB(j).classname; 71 | orientation = data.groundtruth3DBB(j).orientation; 72 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 73 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 74 | assert(strcmp(data2d.groundtruth2DBB(j).classname, classname)); 75 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 76 | end 77 | fclose(fid); 78 | 79 | catch 80 | end 81 | 82 | end 83 | 84 | function parsave(filename, instance) 85 | save(filename, 'instance'); 86 | end 87 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/matlab/extract_split.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump train/val split. 7 | % Author: Charles R. Qi 8 | 9 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 10 | 11 | %% Construct Hash Map 12 | hash_train = java.util.Hashtable; 13 | hash_val = java.util.Hashtable; 14 | 15 | split = load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat'); 16 | 17 | N_train = length(split.alltrain); 18 | N_val = length(split.alltest); 19 | 20 | for i = 1:N_train 21 | folder_path = split.alltrain{i}; 22 | folder_path(1:16) = ''; 23 | hash_train.put(folder_path,0); 24 | end 25 | for i = 1:N_val 26 | folder_path = split.alltest{i}; 27 | folder_path(1:16) = ''; 28 | hash_val.put(folder_path,0); 29 | end 30 | 31 | %% Map data to train or val set. 32 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 33 | 34 | fid_train = fopen('../sunrgbd_trainval/train_data_idx.txt', 'w'); 35 | fid_val = fopen('../sunrgbd_trainval/val_data_idx.txt', 'w'); 36 | 37 | for imageId = 1:10335 38 | data = SUNRGBDMeta(imageId); 39 | depthpath = data.depthpath; 40 | depthpath(1:16) = ''; 41 | [filepath,name,ext] = fileparts(depthpath); 42 | [filepath,name,ext] = fileparts(filepath); 43 | if hash_train.containsKey(filepath) 44 | fprintf(fid_train, '%d\n', imageId); 45 | elseif hash_val.containsKey(filepath) 46 | fprintf(fid_val, '%d\n', imageId); 47 | else 48 | a = 1; 49 | end 50 | end 51 | fclose(fid_train); 52 | fclose(fid_val); 53 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/model_util_sunrgbd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | 14 | class SunrgbdDatasetConfig(object): 15 | def __init__(self): 16 | self.num_class = 10 17 | self.num_heading_bin = 12 18 | self.num_size_cluster = 10 19 | 20 | self.type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 21 | self.class2type = {self.type2class[t]:t for t in self.type2class} 22 | self.type2onehotclass={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 23 | self.type_mean_size = {'bathtub': np.array([0.765840,1.398258,0.472728]), 24 | 'bed': np.array([2.114256,1.620300,0.927272]), 25 | 'bookshelf': np.array([0.404671,1.071108,1.688889]), 26 | 'chair': np.array([0.591958,0.552978,0.827272]), 27 | 'desk': np.array([0.695190,1.346299,0.736364]), 28 | 'dresser': np.array([0.528526,1.002642,1.172878]), 29 | 'night_stand': np.array([0.500618,0.632163,0.683424]), 30 | 'sofa': np.array([0.923508,1.867419,0.845495]), 31 | 'table': np.array([0.791118,1.279516,0.718182]), 32 | 'toilet': np.array([0.699104,0.454178,0.756250])} 33 | 34 | self.mean_size_arr = np.zeros((self.num_size_cluster, 3)) 35 | for i in range(self.num_size_cluster): 36 | self.mean_size_arr[i,:] = self.type_mean_size[self.class2type[i]] 37 | 38 | def size2class(self, size, type_name): 39 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 40 | size_class = self.type2class[type_name] 41 | size_residual = size - self.type_mean_size[type_name] 42 | return size_class, size_residual 43 | 44 | def class2size(self, pred_cls, residual): 45 | ''' Inverse function to size2class ''' 46 | mean_size = self.type_mean_size[self.class2type[pred_cls]] 47 | return mean_size + residual 48 | 49 | def angle2class(self, angle): 50 | ''' Convert continuous angle to discrete class 51 | [optinal] also small regression number from 52 | class center angle to current angle. 53 | 54 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 55 | return is class of int32 of 0,1,...,N-1 and a number such that 56 | class*(2pi/N) + number = angle 57 | ''' 58 | num_class = self.num_heading_bin 59 | angle = angle%(2*np.pi) 60 | assert(angle>=0 and angle<=2*np.pi) 61 | angle_per_class = 2*np.pi/float(num_class) 62 | shifted_angle = (angle+angle_per_class/2)%(2*np.pi) 63 | class_id = int(shifted_angle/angle_per_class) 64 | residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2) 65 | return class_id, residual_angle 66 | 67 | def class2angle(self, pred_cls, residual, to_label_format=True): 68 | ''' Inverse function to angle2class ''' 69 | num_class = self.num_heading_bin 70 | angle_per_class = 2*np.pi/float(num_class) 71 | angle_center = pred_cls * angle_per_class 72 | angle = angle_center + residual 73 | if to_label_format and angle>np.pi: 74 | angle = angle - 2*np.pi 75 | return angle 76 | 77 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 78 | heading_angle = self.class2angle(heading_class, heading_residual) 79 | box_size = self.class2size(int(size_class), size_residual) 80 | obb = np.zeros((7,)) 81 | obb[0:3] = center 82 | obb[3:6] = box_size 83 | obb[6] = heading_angle*-1 84 | return obb 85 | 86 | 87 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/calib: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/calib -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/depth: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/depth -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/eulerangles.py: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/eulerangles.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/image: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/image -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/label: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/label -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/label_v1: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/label_v1 -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/pc_util.py: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/pc_util.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/pcrgb.obj: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/pcrgb.obj -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/plyfile.py: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/plyfile.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/seg_label: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/seg_label -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/tmp.py: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/tmp.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/train_data_idx.txt: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/train_data_idx.txt -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/training: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/training -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/datasets/sunrgbd/sunrgbd_trainval/val_data_idx.txt: -------------------------------------------------------------------------------- 1 | /private/home/xinleic/data/sunrgbd_v2/val_data_idx.txt -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Evaluation routine for 3D object detection with SUN RGB-D and ScanNet. 7 | """ 8 | 9 | import os 10 | import sys 11 | import logging 12 | import numpy as np 13 | from datetime import datetime 14 | import argparse 15 | import importlib 16 | import torch 17 | import torch.nn as nn 18 | import torch.optim as optim 19 | from torch.utils.data import DataLoader 20 | from models.ap_helper import APCalculator, parse_predictions, parse_groundtruths 21 | from models.dump_helper import dump_results 22 | from models.loss_helper import get_loss as criterion 23 | 24 | def test(net, test_dataloader, dataset_config, config): 25 | if config.test.use_cls_nms: 26 | assert(config.test.use_3d_nms) 27 | # Used for AP calculation 28 | CONFIG_DICT = {'remove_empty_box': (not config.test.faster_eval), 29 | 'use_3d_nms': config.test.use_3d_nms, 30 | 'nms_iou': config.test.nms_iou, 31 | 'use_old_type_nms': config.test.use_old_type_nms, 32 | 'cls_nms': config.test.use_cls_nms, 33 | 'per_class_proposal': config.test.per_class_proposal, 34 | 'conf_thresh': config.test.conf_thresh, 35 | 'dataset_config': dataset_config} 36 | 37 | AP_IOU_THRESHOLDS = config.test.ap_iou_thresholds 38 | logging.info(str(datetime.now())) 39 | # Reset numpy seed. 40 | # REF: https://github.com/pytorch/pytorch/issues/5059 41 | np.random.seed() 42 | 43 | stat_dict = {} 44 | ap_calculator_list = [APCalculator(iou_thresh, CONFIG_DICT['dataset_config'].class2type) \ 45 | for iou_thresh in AP_IOU_THRESHOLDS] 46 | net.eval() # set model to eval mode (for bn and dp) 47 | for batch_idx, batch_data_label in enumerate(test_dataloader): 48 | if batch_idx % 10 == 0: 49 | print('Eval batch: %d'%(batch_idx)) 50 | for key in batch_data_label: 51 | batch_data_label[key] = batch_data_label[key].cuda() 52 | # Forward pass 53 | inputs = {'point_clouds': batch_data_label['point_clouds']} 54 | if 'voxel_coords' in batch_data_label: 55 | inputs.update({ 56 | 'voxel_coords': batch_data_label['voxel_coords'], 57 | 'voxel_inds': batch_data_label['voxel_inds'], 58 | 'voxel_feats': batch_data_label['voxel_feats']}) 59 | with torch.no_grad(): 60 | end_points = net(inputs) 61 | 62 | # Compute loss 63 | for key in batch_data_label: 64 | assert(key not in end_points) 65 | end_points[key] = batch_data_label[key] 66 | loss, end_points = criterion(end_points, CONFIG_DICT['dataset_config']) 67 | 68 | # Accumulate statistics and print out 69 | for key in end_points: 70 | if 'loss' in key or 'acc' in key or 'ratio' in key: 71 | if key not in stat_dict: stat_dict[key] = 0 72 | stat_dict[key] += end_points[key].item() 73 | 74 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT) 75 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 76 | for ap_calculator in ap_calculator_list: 77 | ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls) 78 | 79 | # Dump evaluation results for visualization 80 | if batch_idx == 0: 81 | dump_results(end_points, 'visualization', CONFIG_DICT['dataset_config']) 82 | 83 | # Log statistics 84 | for key in sorted(stat_dict.keys()): 85 | logging.info('eval mean %s: %f'%(key, stat_dict[key]/(float(batch_idx+1)))) 86 | 87 | # Evaluate average precision 88 | for i, ap_calculator in enumerate(ap_calculator_list): 89 | logging.info('-'*10 + 'iou_thresh: %f'%(AP_IOU_THRESHOLDS[i]) + '-'*10) 90 | metrics_dict = ap_calculator.compute_metrics() 91 | for key in metrics_dict: 92 | logging.info('eval %s: %f'%(key, metrics_dict[key])) 93 | 94 | mean_loss = stat_dict['loss']/float(batch_idx+1) 95 | return mean_loss 96 | 97 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | from pc_util import bbox_corner_dist_measure 8 | 9 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 10 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 11 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 12 | ''' 13 | def nms_2d(boxes, overlap_threshold): 14 | x1 = boxes[:,0] 15 | y1 = boxes[:,1] 16 | x2 = boxes[:,2] 17 | y2 = boxes[:,3] 18 | score = boxes[:,4] 19 | area = (x2-x1)*(y2-y1) 20 | 21 | I = np.argsort(score) 22 | pick = [] 23 | while (I.size!=0): 24 | last = I.size 25 | i = I[-1] 26 | pick.append(i) 27 | suppress = [last-1] 28 | for pos in range(last-1): 29 | j = I[pos] 30 | xx1 = max(x1[i],x1[j]) 31 | yy1 = max(y1[i],y1[j]) 32 | xx2 = min(x2[i],x2[j]) 33 | yy2 = min(y2[i],y2[j]) 34 | w = xx2-xx1 35 | h = yy2-yy1 36 | if (w>0 and h>0): 37 | o = w*h/area[j] 38 | print('Overlap is', o) 39 | if (o>overlap_threshold): 40 | suppress.append(pos) 41 | I = np.delete(I,suppress) 42 | return pick 43 | 44 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 45 | x1 = boxes[:,0] 46 | y1 = boxes[:,1] 47 | x2 = boxes[:,2] 48 | y2 = boxes[:,3] 49 | score = boxes[:,4] 50 | area = (x2-x1)*(y2-y1) 51 | 52 | I = np.argsort(score) 53 | pick = [] 54 | while (I.size!=0): 55 | last = I.size 56 | i = I[-1] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 60 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 61 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 62 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 63 | 64 | w = np.maximum(0, xx2-xx1) 65 | h = np.maximum(0, yy2-yy1) 66 | 67 | if old_type: 68 | o = (w*h)/area[I[:last-1]] 69 | else: 70 | inter = w*h 71 | o = inter / (area[i] + area[I[:last-1]] - inter) 72 | 73 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 74 | 75 | return pick 76 | 77 | def nms_3d_faster(boxes, overlap_threshold, old_type=False): 78 | x1 = boxes[:,0] 79 | y1 = boxes[:,1] 80 | z1 = boxes[:,2] 81 | x2 = boxes[:,3] 82 | y2 = boxes[:,4] 83 | z2 = boxes[:,5] 84 | score = boxes[:,6] 85 | area = (x2-x1)*(y2-y1)*(z2-z1) 86 | 87 | I = np.argsort(score) 88 | pick = [] 89 | while (I.size!=0): 90 | last = I.size 91 | i = I[-1] 92 | pick.append(i) 93 | 94 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 95 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 96 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 97 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 98 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 99 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 100 | 101 | l = np.maximum(0, xx2-xx1) 102 | w = np.maximum(0, yy2-yy1) 103 | h = np.maximum(0, zz2-zz1) 104 | 105 | if old_type: 106 | o = (l*w*h)/area[I[:last-1]] 107 | else: 108 | inter = l*w*h 109 | o = inter / (area[i] + area[I[:last-1]] - inter) 110 | 111 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 112 | 113 | return pick 114 | 115 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False): 116 | x1 = boxes[:,0] 117 | y1 = boxes[:,1] 118 | z1 = boxes[:,2] 119 | x2 = boxes[:,3] 120 | y2 = boxes[:,4] 121 | z2 = boxes[:,5] 122 | score = boxes[:,6] 123 | cls = boxes[:,7] 124 | area = (x2-x1)*(y2-y1)*(z2-z1) 125 | 126 | I = np.argsort(score) 127 | pick = [] 128 | while (I.size!=0): 129 | last = I.size 130 | i = I[-1] 131 | pick.append(i) 132 | 133 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 134 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 135 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 136 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 137 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 138 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 139 | cls1 = cls[i] 140 | cls2 = cls[I[:last-1]] 141 | 142 | l = np.maximum(0, xx2-xx1) 143 | w = np.maximum(0, yy2-yy1) 144 | h = np.maximum(0, zz2-zz1) 145 | 146 | if old_type: 147 | o = (l*w*h)/area[I[:last-1]] 148 | else: 149 | inter = l*w*h 150 | o = inter / (area[i] + area[I[:last-1]] - inter) 151 | o = o * (cls1==cls2) 152 | 153 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 154 | 155 | return pick 156 | 157 | 158 | def nms_crnr_dist(boxes, conf, overlap_threshold): 159 | 160 | I = np.argsort(conf) 161 | pick = [] 162 | while (I.size!=0): 163 | last = I.size 164 | i = I[-1] 165 | pick.append(i) 166 | 167 | scores = [] 168 | for ind in I[:-1]: 169 | scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :])) 170 | 171 | I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0]))) 172 | 173 | return pick 174 | 175 | if __name__=='__main__': 176 | a = np.random.random((100,5)) 177 | print(nms_2d(a,0.9)) 178 | print(nms_2d_faster(a,0.9)) 179 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = (abs_error - quadratic) 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | def demo_nn_distance(): 64 | np.random.seed(0) 65 | pc1arr = np.random.random((1,5,3)) 66 | pc2arr = np.random.random((1,6,3)) 67 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 68 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 69 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 70 | print(dist1) 71 | print(idx1) 72 | dist = np.zeros((5,6)) 73 | for i in range(5): 74 | for j in range(6): 75 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 76 | print(dist) 77 | print('-'*30) 78 | print('L1smooth dists:') 79 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 80 | print(dist1) 81 | print(idx1) 82 | dist = np.zeros((5,6)) 83 | for i in range(5): 84 | for j in range(6): 85 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 86 | quad = np.minimum(error, 1.0) 87 | linear = error - quad 88 | loss = 0.5*quad**2 + 1.0*linear 89 | dist[i,j] = np.sum(loss) 90 | print(dist) 91 | 92 | 93 | if __name__ == '__main__': 94 | demo_nn_distance() 95 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | 15 | class Logger(object): 16 | 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 24 | self.writer.add_summary(summary, step) 25 | 26 | def image_summary(self, tag, images, step): 27 | """Log a list of images.""" 28 | 29 | img_summaries = [] 30 | for i, img in enumerate(images): 31 | # Write the image to a string 32 | try: 33 | s = StringIO() 34 | except: 35 | s = BytesIO() 36 | scipy.misc.toimage(img).save(s, format="png") 37 | 38 | # Create an Image object 39 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 40 | height=img.shape[0], 41 | width=img.shape[1]) 42 | # Create a Summary value 43 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 44 | 45 | # Create and write Summary 46 | summary = tf.Summary(value=img_summaries) 47 | self.writer.add_summary(summary, step) 48 | 49 | def histo_summary(self, tag, values, step, bins=1000): 50 | """Log a histogram of the tensor of values.""" 51 | 52 | # Create a histogram using numpy 53 | counts, bin_edges = np.histogram(values, bins=bins) 54 | 55 | # Fill the fields of the histogram proto 56 | hist = tf.HistogramProto() 57 | hist.min = float(np.min(values)) 58 | hist.max = float(np.max(values)) 59 | hist.num = int(np.prod(values.shape)) 60 | hist.sum = float(np.sum(values)) 61 | hist.sum_squares = float(np.sum(values**2)) 62 | 63 | # Drop the start of the first bin 64 | bin_edges = bin_edges[1:] 65 | 66 | # Add bin edges and counts 67 | for edge in bin_edges: 68 | hist.bucket_limit.append(edge) 69 | for c in counts: 70 | hist.bucket.append(c) 71 | 72 | # Create and write Summary 73 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 74 | self.writer.add_summary(summary, step) 75 | self.writer.flush() 76 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/lib/utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 7 | import os 8 | import time 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | import sys 11 | sys.path.append(BASE_DIR) 12 | import tf_logger 13 | 14 | 15 | class Visualizer(): 16 | def __init__(self, opt, name='train'): 17 | # self.opt = opt 18 | #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 19 | #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 20 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 21 | self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') 22 | with open(self.log_name, "a") as log_file: 23 | now = time.strftime("%c") 24 | log_file.write('================ Training Loss (%s) ================\n' % now) 25 | 26 | # |visuals|: dictionary of images to save 27 | def log_images(self, visuals, step): 28 | for label, image_numpy in visuals.items(): 29 | self.logger.image_summary( 30 | label, [image_numpy], step) 31 | 32 | # scalars: dictionary of scalar labels and values 33 | def log_scalars(self, scalars, step): 34 | for label, val in scalars.items(): 35 | self.logger.scalar_summary(label, val, step) 36 | 37 | # scatter plots 38 | def plot_current_points(self, points, disp_offset=10): 39 | pass 40 | 41 | # scalars: same format as |scalars| of plot_current_scalars 42 | def print_current_scalars(self, epoch, i, scalars): 43 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 44 | for k, v in scalars.items(): 45 | message += '%s: %.3f ' % (k, v) 46 | 47 | print(message) 48 | with open(self.log_name, "a") as log_file: 49 | log_file.write('%s\n' % message) 50 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 10 | const int nsample); 11 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #ifndef _CUDA_UTILS_H 7 | #define _CUDA_UTILS_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define TOTAL_THREADS 512 19 | 20 | inline int opt_n_threads(int work_size) { 21 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 22 | 23 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 24 | } 25 | 26 | inline dim3 opt_block_config(int x, int y) { 27 | const int x_threads = opt_n_threads(x); 28 | const int y_threads = 29 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 30 | dim3 block_config(x_threads, y_threads, 1); 31 | 32 | return block_config; 33 | } 34 | 35 | #define CUDA_CHECK_ERRORS() \ 36 | do { \ 37 | cudaError_t err = cudaGetLastError(); \ 38 | if (cudaSuccess != err) { \ 39 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 40 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 41 | __FILE__); \ 42 | exit(-1); \ 43 | } \ 44 | } while (0) 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 13 | at::Tensor weight); 14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 15 | at::Tensor weight, const int m); 16 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 12 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | #include 9 | 10 | #define CHECK_CUDA(x) \ 11 | do { \ 12 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_CONTIGUOUS(x) \ 16 | do { \ 17 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 18 | } while (0) 19 | 20 | #define CHECK_IS_INT(x) \ 21 | do { \ 22 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 23 | #x " must be an int tensor"); \ 24 | } while (0) 25 | 26 | #define CHECK_IS_FLOAT(x) \ 27 | do { \ 28 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 29 | #x " must be a float tensor"); \ 30 | } while (0) 31 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "utils.h" 8 | 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 10 | int nsample, const float *new_xyz, 11 | const float *xyz, int *idx); 12 | 13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 14 | const int nsample) { 15 | CHECK_CONTIGUOUS(new_xyz); 16 | CHECK_CONTIGUOUS(xyz); 17 | CHECK_IS_FLOAT(new_xyz); 18 | CHECK_IS_FLOAT(xyz); 19 | 20 | if (new_xyz.is_cuda()) { 21 | CHECK_CUDA(xyz); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 26 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 27 | 28 | if (new_xyz.is_cuda()) { 29 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 30 | radius, nsample, new_xyz.data_ptr(), 31 | xyz.data_ptr(), idx.data_ptr()); 32 | } else { 33 | AT_ASSERT(false, "CPU not supported"); 34 | } 35 | 36 | return idx; 37 | } 38 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 13 | // output: idx(b, m, nsample) 14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 15 | int nsample, 16 | const float *__restrict__ new_xyz, 17 | const float *__restrict__ xyz, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | xyz += batch_index * n * 3; 21 | new_xyz += batch_index * m * 3; 22 | idx += m * nsample * batch_index; 23 | 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | 27 | float radius2 = radius * radius; 28 | for (int j = index; j < m; j += stride) { 29 | float new_x = new_xyz[j * 3 + 0]; 30 | float new_y = new_xyz[j * 3 + 1]; 31 | float new_z = new_xyz[j * 3 + 2]; 32 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 37 | (new_z - z) * (new_z - z); 38 | if (d2 < radius2) { 39 | if (cnt == 0) { 40 | for (int l = 0; l < nsample; ++l) { 41 | idx[j * nsample + l] = k; 42 | } 43 | } 44 | idx[j * nsample + cnt] = k; 45 | ++cnt; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 52 | int nsample, const float *new_xyz, 53 | const float *xyz, int *idx) { 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | query_ball_point_kernel<<>>( 56 | b, n, m, radius, nsample, new_xyz, xyz, idx); 57 | 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "group_points.h" 8 | #include "interpolate.h" 9 | #include "sampling.h" 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("gather_points", &gather_points); 13 | m.def("gather_points_grad", &gather_points_grad); 14 | m.def("furthest_point_sampling", &furthest_point_sampling); 15 | 16 | m.def("three_nn", &three_nn); 17 | m.def("three_interpolate", &three_interpolate); 18 | m.def("three_interpolate_grad", &three_interpolate_grad); 19 | 20 | m.def("ball_query", &ball_query); 21 | 22 | m.def("group_points", &group_points); 23 | m.def("group_points_grad", &group_points_grad); 24 | } 25 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "group_points.h" 7 | #include "utils.h" 8 | 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 10 | const float *points, const int *idx, 11 | float *out); 12 | 13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 14 | int nsample, const float *grad_out, 15 | const int *idx, float *grad_points); 16 | 17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.is_cuda()) { 32 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), idx.size(2), 34 | points.data_ptr(), idx.data_ptr(), 35 | output.data_ptr()); 36 | } else { 37 | AT_ASSERT(false, "CPU not supported"); 38 | } 39 | 40 | return output; 41 | } 42 | 43 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 44 | CHECK_CONTIGUOUS(grad_out); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_IS_FLOAT(grad_out); 47 | CHECK_IS_INT(idx); 48 | 49 | if (grad_out.is_cuda()) { 50 | CHECK_CUDA(idx); 51 | } 52 | 53 | at::Tensor output = 54 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 55 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 56 | 57 | if (grad_out.is_cuda()) { 58 | group_points_grad_kernel_wrapper( 59 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 60 | grad_out.data_ptr(), idx.data_ptr(), 61 | output.data_ptr()); 62 | } else { 63 | AT_ASSERT(false, "CPU not supported"); 64 | } 65 | 66 | return output; 67 | } 68 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, npoints, nsample) 12 | // output: out(b, c, npoints, nsample) 13 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 14 | int nsample, 15 | const float *__restrict__ points, 16 | const int *__restrict__ idx, 17 | float *__restrict__ out) { 18 | int batch_index = blockIdx.x; 19 | points += batch_index * n * c; 20 | idx += batch_index * npoints * nsample; 21 | out += batch_index * npoints * nsample * c; 22 | 23 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 24 | const int stride = blockDim.y * blockDim.x; 25 | for (int i = index; i < c * npoints; i += stride) { 26 | const int l = i / npoints; 27 | const int j = i % npoints; 28 | for (int k = 0; k < nsample; ++k) { 29 | int ii = idx[j * nsample + k]; 30 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 31 | } 32 | } 33 | } 34 | 35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 36 | const float *points, const int *idx, 37 | float *out) { 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | group_points_kernel<<>>( 41 | b, c, n, npoints, nsample, points, idx, out); 42 | 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points) { 74 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 75 | 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | CUDA_CHECK_ERRORS(); 80 | } 81 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "interpolate.h" 7 | #include "utils.h" 8 | 9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 10 | const float *known, float *dist2, int *idx); 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 12 | const float *points, const int *idx, 13 | const float *weight, float *out); 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 15 | const float *grad_out, 16 | const int *idx, const float *weight, 17 | float *grad_points); 18 | 19 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 20 | CHECK_CONTIGUOUS(unknowns); 21 | CHECK_CONTIGUOUS(knows); 22 | CHECK_IS_FLOAT(unknowns); 23 | CHECK_IS_FLOAT(knows); 24 | 25 | if (unknowns.is_cuda()) { 26 | CHECK_CUDA(knows); 27 | } 28 | 29 | at::Tensor idx = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 32 | at::Tensor dist2 = 33 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 34 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 35 | 36 | if (unknowns.is_cuda()) { 37 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 38 | unknowns.data_ptr(), knows.data_ptr(), 39 | dist2.data_ptr(), idx.data_ptr()); 40 | } else { 41 | AT_ASSERT(false, "CPU not supported"); 42 | } 43 | 44 | return {dist2, idx}; 45 | } 46 | 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 48 | at::Tensor weight) { 49 | CHECK_CONTIGUOUS(points); 50 | CHECK_CONTIGUOUS(idx); 51 | CHECK_CONTIGUOUS(weight); 52 | CHECK_IS_FLOAT(points); 53 | CHECK_IS_INT(idx); 54 | CHECK_IS_FLOAT(weight); 55 | 56 | if (points.is_cuda()) { 57 | CHECK_CUDA(idx); 58 | CHECK_CUDA(weight); 59 | } 60 | 61 | at::Tensor output = 62 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 63 | at::device(points.device()).dtype(at::ScalarType::Float)); 64 | 65 | if (points.is_cuda()) { 66 | three_interpolate_kernel_wrapper( 67 | points.size(0), points.size(1), points.size(2), idx.size(1), 68 | points.data_ptr(), idx.data_ptr(), weight.data_ptr(), 69 | output.data_ptr()); 70 | } else { 71 | AT_ASSERT(false, "CPU not supported"); 72 | } 73 | 74 | return output; 75 | } 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 77 | at::Tensor weight, const int m) { 78 | CHECK_CONTIGUOUS(grad_out); 79 | CHECK_CONTIGUOUS(idx); 80 | CHECK_CONTIGUOUS(weight); 81 | CHECK_IS_FLOAT(grad_out); 82 | CHECK_IS_INT(idx); 83 | CHECK_IS_FLOAT(weight); 84 | 85 | if (grad_out.is_cuda()) { 86 | CHECK_CUDA(idx); 87 | CHECK_CUDA(weight); 88 | } 89 | 90 | at::Tensor output = 91 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 92 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 93 | 94 | if (grad_out.is_cuda()) { 95 | three_interpolate_grad_kernel_wrapper( 96 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 97 | grad_out.data_ptr(), idx.data_ptr(), 98 | weight.data_ptr(), output.data_ptr()); 99 | } else { 100 | AT_ASSERT(false, "CPU not supported"); 101 | } 102 | 103 | return output; 104 | } 105 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "sampling.h" 7 | #include "utils.h" 8 | 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *points, const int *idx, 11 | float *out); 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs); 19 | 20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 21 | CHECK_CONTIGUOUS(points); 22 | CHECK_CONTIGUOUS(idx); 23 | CHECK_IS_FLOAT(points); 24 | CHECK_IS_INT(idx); 25 | 26 | if (points.is_cuda()) { 27 | CHECK_CUDA(idx); 28 | } 29 | 30 | at::Tensor output = 31 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 32 | at::device(points.device()).dtype(at::ScalarType::Float)); 33 | 34 | if (points.is_cuda()) { 35 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 36 | idx.size(1), points.data_ptr(), 37 | idx.data_ptr(), output.data_ptr()); 38 | } else { 39 | AT_ASSERT(false, "CPU not supported"); 40 | } 41 | 42 | return output; 43 | } 44 | 45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 46 | const int n) { 47 | CHECK_CONTIGUOUS(grad_out); 48 | CHECK_CONTIGUOUS(idx); 49 | CHECK_IS_FLOAT(grad_out); 50 | CHECK_IS_INT(idx); 51 | 52 | if (grad_out.is_cuda()) { 53 | CHECK_CUDA(idx); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 58 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (grad_out.is_cuda()) { 61 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 62 | idx.size(1), grad_out.data_ptr(), 63 | idx.data_ptr(), 64 | output.data_ptr()); 65 | } else { 66 | AT_ASSERT(false, "CPU not supported"); 67 | } 68 | 69 | return output; 70 | } 71 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 72 | CHECK_CONTIGUOUS(points); 73 | CHECK_IS_FLOAT(points); 74 | 75 | at::Tensor output = 76 | torch::zeros({points.size(0), nsamples}, 77 | at::device(points.device()).dtype(at::ScalarType::Int)); 78 | 79 | at::Tensor tmp = 80 | torch::full({points.size(0), points.size(1)}, 1e10, 81 | at::device(points.device()).dtype(at::ScalarType::Float)); 82 | 83 | if (points.is_cuda()) { 84 | furthest_point_sampling_kernel_wrapper( 85 | points.size(0), points.size(1), nsamples, points.data_ptr(), 86 | tmp.data_ptr(), output.data_ptr()); 87 | } else { 88 | AT_ASSERT(false, "CPU not supported"); 89 | } 90 | 91 | return output; 92 | } 93 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Testing customized ops. ''' 7 | 8 | import torch 9 | from torch.autograd import gradcheck 10 | import numpy as np 11 | 12 | import os 13 | import sys 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | import pointnet2_utils 17 | 18 | def test_interpolation_grad(): 19 | batch_size = 1 20 | feat_dim = 2 21 | m = 4 22 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | 24 | def interpolate_func(inputs): 25 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 26 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 27 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 28 | return interpolated_feats 29 | 30 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 31 | 32 | if __name__=='__main__': 33 | test_interpolation_grad() 34 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import glob 7 | import os 8 | 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 11 | 12 | this_dir = os.path.dirname(os.path.abspath(__file__)) 13 | _ext_src_root = "_ext_src" 14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 15 | "{}/src/*.cu".format(_ext_src_root) 16 | ) 17 | 18 | setup( 19 | name='pointnet2', 20 | ext_modules=[ 21 | CUDAExtension( 22 | name='pointnet2._ext', 23 | sources=_ext_sources, 24 | extra_compile_args={ 25 | "cxx": ["-O3"], 26 | "nvcc": ["-O3", "-Xfatbin", "-compress-all"], 27 | }, 28 | include_dirs=[os.path.join(this_dir, _ext_src_root, "include")], 29 | ) 30 | ], 31 | cmdclass={ 32 | 'build_ext': BuildExtension 33 | } 34 | ) 35 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/downstream/votenet_det_new/models/backbone/sparseconv/__init__.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/downstream/votenet_det_new/models/backbone/sparseconv/lib/__init__.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/lib/math_functions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from scipy.sparse import csr_matrix 7 | import torch 8 | 9 | 10 | class SparseMM(torch.autograd.Function): 11 | """ 12 | Sparse x dense matrix multiplication with autograd support. 13 | Implementation by Soumith Chintala: 14 | https://discuss.pytorch.org/t/ 15 | does-pytorch-support-autograd-on-sparse-matrix/6156/7 16 | """ 17 | 18 | def forward(self, matrix1, matrix2): 19 | self.save_for_backward(matrix1, matrix2) 20 | return torch.mm(matrix1, matrix2) 21 | 22 | def backward(self, grad_output): 23 | matrix1, matrix2 = self.saved_tensors 24 | grad_matrix1 = grad_matrix2 = None 25 | 26 | if self.needs_input_grad[0]: 27 | grad_matrix1 = torch.mm(grad_output, matrix2.t()) 28 | 29 | if self.needs_input_grad[1]: 30 | grad_matrix2 = torch.mm(matrix1.t(), grad_output) 31 | 32 | return grad_matrix1, grad_matrix2 33 | 34 | 35 | def sparse_float_tensor(values, indices, size=None): 36 | """ 37 | Return a torch sparse matrix give values and indices (row_ind, col_ind). 38 | If the size is an integer, return a square matrix with side size. 39 | If the size is a torch.Size, use it to initialize the out tensor. 40 | If none, the size is inferred. 41 | """ 42 | indices = torch.stack(indices).int() 43 | sargs = [indices, values.float()] 44 | if size is not None: 45 | # Use the provided size 46 | if isinstance(size, int): 47 | size = torch.Size((size, size)) 48 | sargs.append(size) 49 | if values.is_cuda: 50 | return torch.cuda.sparse.FloatTensor(*sargs) 51 | else: 52 | return torch.sparse.FloatTensor(*sargs) 53 | 54 | 55 | def diags(values, size=None): 56 | values = values.view(-1) 57 | n = values.nelement() 58 | size = torch.Size((n, n)) 59 | indices = (torch.arange(0, n), torch.arange(0, n)) 60 | return sparse_float_tensor(values, indices, size) 61 | 62 | 63 | def sparse_to_csr_matrix(tensor): 64 | tensor = tensor.cpu() 65 | inds = tensor._indices().numpy() 66 | vals = tensor._values().numpy() 67 | return csr_matrix((vals, (inds[0], inds[1])), shape=[s for s in tensor.shape]) 68 | 69 | 70 | def csr_matrix_to_sparse(mat): 71 | row_ind, col_ind = mat.nonzero() 72 | return sparse_float_tensor( 73 | torch.from_numpy(mat.data), 74 | (torch.from_numpy(row_ind), torch.from_numpy(col_ind)), 75 | size=torch.Size(mat.shape)) 76 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import models.backbone.sparseconv.models.resunet as resunet 8 | import models.backbone.sparseconv.models.res16unet as res16unet 9 | 10 | # from models.trilateral_crf import TrilateralCRF 11 | from models.backbone.sparseconv.models.conditional_random_fields import BilateralCRF, TrilateralCRF 12 | 13 | MODELS = [] 14 | 15 | 16 | def add_models(module): 17 | MODELS.extend([getattr(module, a) for a in dir(module) if 'Net' in a]) 18 | 19 | 20 | add_models(resunet) 21 | add_models(res16unet) 22 | 23 | WRAPPERS = [BilateralCRF, TrilateralCRF] 24 | 25 | 26 | def get_models(): 27 | '''Returns a tuple of sample models.''' 28 | return MODELS 29 | 30 | 31 | def get_wrappers(): 32 | return WRAPPERS 33 | 34 | 35 | def load_model(name): 36 | '''Creates and returns an instance of the model given its class name. 37 | ''' 38 | # Find the model class from its name 39 | all_models = get_models() 40 | mdict = {model.__name__: model for model in all_models} 41 | if name not in mdict: 42 | print('Invalid model index. Options are:') 43 | # Display a list of valid model names 44 | for model in all_models: 45 | print('\t* {}'.format(model.__name__)) 46 | return None 47 | NetClass = mdict[name] 48 | 49 | return NetClass 50 | 51 | 52 | def load_wrapper(name): 53 | '''Creates and returns an instance of the model given its class name. 54 | ''' 55 | # Find the model class from its name 56 | all_wrappers = get_wrappers() 57 | mdict = {wrapper.__name__: wrapper for wrapper in all_wrappers} 58 | if name not in mdict: 59 | print('Invalid wrapper index. Options are:') 60 | # Display a list of valid model names 61 | for wrapper in all_wrappers: 62 | print('\t* {}'.format(wrapper.__name__)) 63 | return None 64 | WrapperClass = mdict[name] 65 | 66 | return WrapperClass 67 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from MinkowskiEngine import MinkowskiNetwork 7 | 8 | 9 | class Model(MinkowskiNetwork): 10 | """ 11 | Base network for all sparse convnet 12 | 13 | By default, all networks are segmentation networks. 14 | """ 15 | OUT_PIXEL_DIST = -1 16 | 17 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 18 | super(Model, self).__init__(D) 19 | self.in_channels = in_channels 20 | self.out_channels = out_channels 21 | self.config = config 22 | 23 | 24 | class HighDimensionalModel(Model): 25 | """ 26 | Base network for all spatio (temporal) chromatic sparse convnet 27 | """ 28 | 29 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 30 | assert D > 4, "Num dimension smaller than 5" 31 | super(HighDimensionalModel, self).__init__(in_channels, out_channels, config, D, **kwargs) 32 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/PointContrast/47e868281d35b26e186ca41a9b6d1906b50dbcfa/downstream/votenet_det_new/models/backbone/sparseconv/models/modules/__init__.py -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | from models.backbone.sparseconv.models.modules.common import ConvType, NormType, get_norm, conv 9 | 10 | from MinkowskiEngine import MinkowskiReLU 11 | 12 | 13 | class BasicBlockBase(nn.Module): 14 | expansion = 1 15 | NORM_TYPE = NormType.BATCH_NORM 16 | 17 | def __init__(self, 18 | inplanes, 19 | planes, 20 | stride=1, 21 | dilation=1, 22 | downsample=None, 23 | conv_type=ConvType.HYPERCUBE, 24 | bn_momentum=0.1, 25 | D=3): 26 | super(BasicBlockBase, self).__init__() 27 | 28 | self.conv1 = conv( 29 | inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 30 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 31 | self.conv2 = conv( 32 | planes, 33 | planes, 34 | kernel_size=3, 35 | stride=1, 36 | dilation=dilation, 37 | bias=False, 38 | conv_type=conv_type, 39 | D=D) 40 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 41 | self.relu = MinkowskiReLU(inplace=True) 42 | self.downsample = downsample 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.norm1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.norm2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class BasicBlock(BasicBlockBase): 64 | NORM_TYPE = NormType.BATCH_NORM 65 | 66 | 67 | class BasicBlockIN(BasicBlockBase): 68 | NORM_TYPE = NormType.INSTANCE_NORM 69 | 70 | 71 | class BasicBlockINBN(BasicBlockBase): 72 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 73 | 74 | 75 | class BottleneckBase(nn.Module): 76 | expansion = 4 77 | NORM_TYPE = NormType.BATCH_NORM 78 | 79 | def __init__(self, 80 | inplanes, 81 | planes, 82 | stride=1, 83 | dilation=1, 84 | downsample=None, 85 | conv_type=ConvType.HYPERCUBE, 86 | bn_momentum=0.1, 87 | D=3): 88 | super(BottleneckBase, self).__init__() 89 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 90 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 91 | 92 | self.conv2 = conv( 93 | planes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 94 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 95 | 96 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 97 | self.norm3 = get_norm(self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum) 98 | 99 | self.relu = MinkowskiReLU(inplace=True) 100 | self.downsample = downsample 101 | 102 | def forward(self, x): 103 | residual = x 104 | 105 | out = self.conv1(x) 106 | out = self.norm1(out) 107 | out = self.relu(out) 108 | 109 | out = self.conv2(out) 110 | out = self.norm2(out) 111 | out = self.relu(out) 112 | 113 | out = self.conv3(out) 114 | out = self.norm3(out) 115 | 116 | if self.downsample is not None: 117 | residual = self.downsample(x) 118 | 119 | out += residual 120 | out = self.relu(out) 121 | 122 | return out 123 | 124 | 125 | class Bottleneck(BottleneckBase): 126 | NORM_TYPE = NormType.BATCH_NORM 127 | 128 | 129 | class BottleneckIN(BottleneckBase): 130 | NORM_TYPE = NormType.INSTANCE_NORM 131 | 132 | 133 | class BottleneckINBN(BottleneckBase): 134 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 135 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/modules/senet_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | import MinkowskiEngine as ME 9 | 10 | from models.modules.common import ConvType, NormType 11 | from models.modules.resnet_block import BasicBlock, Bottleneck 12 | 13 | 14 | class SELayer(nn.Module): 15 | 16 | def __init__(self, channel, reduction=16, D=-1): 17 | # Global coords does not require coords_key 18 | super(SELayer, self).__init__() 19 | self.fc = nn.Sequential( 20 | ME.MinkowskiLinear(channel, channel // reduction), ME.MinkowskiReLU(inplace=True), 21 | ME.MinkowskiLinear(channel // reduction, channel), ME.MinkowskiSigmoid()) 22 | self.pooling = ME.MinkowskiGlobalPooling(dimension=D) 23 | self.broadcast_mul = ME.MinkowskiBroadcastMultiplication(dimension=D) 24 | 25 | def forward(self, x): 26 | y = self.pooling(x) 27 | y = self.fc(y) 28 | return self.broadcast_mul(x, y) 29 | 30 | 31 | class SEBasicBlock(BasicBlock): 32 | 33 | def __init__(self, 34 | inplanes, 35 | planes, 36 | stride=1, 37 | dilation=1, 38 | downsample=None, 39 | conv_type=ConvType.HYPERCUBE, 40 | reduction=16, 41 | D=-1): 42 | super(SEBasicBlock, self).__init__( 43 | inplanes, 44 | planes, 45 | stride=stride, 46 | dilation=dilation, 47 | downsample=downsample, 48 | conv_type=conv_type, 49 | D=D) 50 | self.se = SELayer(planes, reduction=reduction, D=D) 51 | 52 | def forward(self, x): 53 | residual = x 54 | 55 | out = self.conv1(x) 56 | out = self.norm1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.norm2(out) 61 | out = self.se(out) 62 | 63 | if self.downsample is not None: 64 | residual = self.downsample(x) 65 | 66 | out += residual 67 | out = self.relu(out) 68 | 69 | return out 70 | 71 | 72 | class SEBasicBlockSN(SEBasicBlock): 73 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 74 | 75 | 76 | class SEBasicBlockIN(SEBasicBlock): 77 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 78 | 79 | 80 | class SEBasicBlockLN(SEBasicBlock): 81 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 82 | 83 | 84 | class SEBottleneck(Bottleneck): 85 | 86 | def __init__(self, 87 | inplanes, 88 | planes, 89 | stride=1, 90 | dilation=1, 91 | downsample=None, 92 | conv_type=ConvType.HYPERCUBE, 93 | D=3, 94 | reduction=16): 95 | super(SEBottleneck, self).__init__( 96 | inplanes, 97 | planes, 98 | stride=stride, 99 | dilation=dilation, 100 | downsample=downsample, 101 | conv_type=conv_type, 102 | D=D) 103 | self.se = SELayer(planes * self.expansion, reduction=reduction, D=D) 104 | 105 | def forward(self, x): 106 | residual = x 107 | 108 | out = self.conv1(x) 109 | out = self.norm1(out) 110 | out = self.relu(out) 111 | 112 | out = self.conv2(out) 113 | out = self.norm2(out) 114 | out = self.relu(out) 115 | 116 | out = self.conv3(out) 117 | out = self.norm3(out) 118 | out = self.se(out) 119 | 120 | if self.downsample is not None: 121 | residual = self.downsample(x) 122 | 123 | out += residual 124 | out = self.relu(out) 125 | 126 | return out 127 | 128 | 129 | class SEBottleneckSN(SEBottleneck): 130 | NORM_TYPE = NormType.SPARSE_SWITCH_NORM 131 | 132 | 133 | class SEBottleneckIN(SEBottleneck): 134 | NORM_TYPE = NormType.SPARSE_INSTANCE_NORM 135 | 136 | 137 | class SEBottleneckLN(SEBottleneck): 138 | NORM_TYPE = NormType.SPARSE_LAYER_NORM 139 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/models/wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import random 7 | from torch.nn import Module 8 | 9 | from MinkowskiEngine import SparseTensor 10 | 11 | 12 | class Wrapper(Module): 13 | """ 14 | Wrapper for the segmentation networks. 15 | """ 16 | OUT_PIXEL_DIST = -1 17 | 18 | def __init__(self, NetClass, in_nchannel, out_nchannel, config): 19 | super(Wrapper, self).__init__() 20 | self.initialize_filter(NetClass, in_nchannel, out_nchannel, config) 21 | 22 | def initialize_filter(self, NetClass, in_nchannel, out_nchannel, config): 23 | raise NotImplementedError('Must initialize a model and a filter') 24 | 25 | def forward(self, x, coords, colors=None): 26 | soutput = self.model(x) 27 | 28 | # During training, make the network invariant to the filter 29 | if not self.training or random.random() < 0.5: 30 | # Filter requires the model to finish the forward pass 31 | wrapper_coords = self.filter.initialize_coords(self.model, coords, colors) 32 | finput = SparseTensor(soutput.F, wrapper_coords) 33 | soutput = self.filter(finput) 34 | 35 | return soutput 36 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/backbone/sparseconv/voxelized_dataset.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import sys 9 | import numpy as np 10 | import torch 11 | 12 | from torch.utils.data import Dataset 13 | from torch.utils.data._utils.collate import default_collate 14 | 15 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 16 | ROOT_DIR = os.path.dirname(BASE_DIR) 17 | sys.path.append(BASE_DIR) 18 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 19 | 20 | import MinkowskiEngine as ME 21 | 22 | class VoxelizationDataset(Dataset): 23 | """ 24 | Wrapper dataset which voxelize the original point clouds 25 | """ 26 | def __init__(self, dataset, voxel_size=0.05): 27 | self.dataset = dataset 28 | self.VOXEL_SIZE = voxel_size 29 | 30 | def __len__(self): 31 | return len(self.dataset) 32 | 33 | def __getitem__(self, idx): 34 | ret_dict = self.dataset[idx] 35 | 36 | # voxelization 37 | coords = np.floor(ret_dict['point_clouds'] / self.VOXEL_SIZE) 38 | inds = ME.utils.sparse_quantize(coords, return_index=True) 39 | coords = coords[inds].astype(np.int32) 40 | 41 | ret_dict['voxel'] = (coords, np.array(inds, dtype=np.int32)) 42 | return ret_dict 43 | 44 | 45 | def collate_fn(samples): 46 | data, voxel = [], [] 47 | for sample in samples: 48 | data.append({w: sample[w] for w in sample if w != 'voxel'}) 49 | voxel.append(sample['voxel']) 50 | 51 | # for non-voxel data, use default collate 52 | data_batch = default_collate(data) 53 | 54 | batch_ids = np.array( 55 | [b for b, v in enumerate(voxel) for _ in range(v[0].shape[0])]) 56 | voxel_ids = np.concatenate([v[1] for v in voxel], 0) 57 | 58 | coords = np.concatenate([v[0] for v in voxel], 0) 59 | coords = np.concatenate([batch_ids[:, None], coords], 1) 60 | 61 | data_batch['voxel_coords'] = torch.from_numpy(coords) 62 | data_batch['voxel_inds'] = torch.from_numpy(voxel_ids) 63 | data_batch['voxel_feats'] = data_batch['point_clouds'].new_ones(batch_ids.shape[0], 3) 64 | 65 | return data_batch 66 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/boxnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(BASE_DIR) 14 | from backbone_module import Pointnet2Backbone 15 | from proposal_module import ProposalModule 16 | from dump_helper import dump_results 17 | from loss_helper_boxnet import get_loss 18 | 19 | 20 | class BoxNet(nn.Module): 21 | r""" 22 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 23 | 24 | Parameters 25 | ---------- 26 | num_class: int 27 | Number of semantics classes to predict over -- size of softmax classifier 28 | num_heading_bin: int 29 | num_size_cluster: int 30 | input_feature_dim: (default: 0) 31 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 32 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 33 | num_proposal: int (default: 128) 34 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 35 | vote_factor: (default: 1) 36 | Number of votes generated from each seed point. 37 | """ 38 | 39 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 40 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps', backbone=None): 41 | super().__init__() 42 | 43 | self.num_class = num_class 44 | self.num_heading_bin = num_heading_bin 45 | self.num_size_cluster = num_size_cluster 46 | self.mean_size_arr = mean_size_arr 47 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 48 | self.input_feature_dim = input_feature_dim 49 | self.num_proposal = num_proposal 50 | self.vote_factor = vote_factor 51 | self.sampling=sampling 52 | 53 | # Backbone point feature learning 54 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 55 | 56 | # Box proposal, aggregation and detection 57 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 58 | mean_size_arr, num_proposal, sampling) 59 | 60 | def forward(self, inputs): 61 | """ Forward pass of the network 62 | 63 | Args: 64 | inputs: dict 65 | {point_clouds} 66 | 67 | point_clouds: Variable(torch.cuda.FloatTensor) 68 | (B, N, 3 + input_channels) tensor 69 | Point cloud to run predicts on 70 | Each point in the point-cloud MUST 71 | be formated as (x, y, z, features...) 72 | Returns: 73 | end_points: dict 74 | """ 75 | end_points = {} 76 | batch_size = inputs['point_clouds'].shape[0] 77 | 78 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 79 | xyz = end_points['fp2_xyz'] 80 | features = end_points['fp2_features'] 81 | end_points['seed_inds'] = end_points['fp2_inds'] 82 | end_points['seed_xyz'] = xyz 83 | end_points['seed_features'] = features 84 | 85 | # Directly predict bounding boxes (skips voting) 86 | end_points = self.pnet(xyz, features, end_points) 87 | 88 | return end_points 89 | 90 | 91 | if __name__=='__main__': 92 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 93 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 94 | 95 | # Define dataset 96 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 97 | 98 | # Define model 99 | model = BoxNet(10,12,10,np.random.random((10,3))).cuda() 100 | 101 | # Model forward pass 102 | sample = TRAIN_DATASET[5] 103 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 104 | end_points = model(inputs) 105 | for key in end_points: 106 | print(key, end_points[key]) 107 | 108 | # Compute loss 109 | for key in sample: 110 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 111 | loss, end_points = get_loss(end_points, DC) 112 | print('loss', loss) 113 | end_points['point_clouds'] = inputs['point_clouds'] 114 | end_points['pred_mask'] = np.ones((1,128)) 115 | dump_results(end_points, 'tmp', DC) 116 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/loss_helper_boxnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 14 | from nn_distance import nn_distance, huber_loss 15 | sys.path.append(BASE_DIR) 16 | from loss_helper import compute_box_and_sem_cls_loss 17 | 18 | OBJECTNESS_CLS_WEIGHTS = [0.2,0.8] # put larger weights on positive objectness 19 | 20 | def compute_objectness_loss(end_points): 21 | """ Compute objectness loss for the proposals. 22 | 23 | Args: 24 | end_points: dict (read-only) 25 | 26 | Returns: 27 | objectness_loss: scalar Tensor 28 | objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 29 | objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 30 | object_assignment: (batch_size, num_seed) Tensor with long int 31 | within [0,num_gt_object-1] 32 | """ 33 | # Associate proposal and GT objects by point-to-point distances 34 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'] 35 | gt_center = end_points['center_label'][:,:,0:3] 36 | B = gt_center.shape[0] 37 | K = aggregated_vote_xyz.shape[1] 38 | K2 = gt_center.shape[1] 39 | dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 40 | 41 | # Generate objectness label and mask 42 | # NOTE: Different from VoteNet, here we use seed label as objectness label. 43 | seed_inds = end_points['seed_inds'].long() # B,num_seed in [0,num_points-1] 44 | seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds) 45 | end_points['seed_labels'] = seed_gt_votes_mask 46 | aggregated_vote_inds = end_points['aggregated_vote_inds'] 47 | objectness_label = torch.gather(end_points['seed_labels'], 1, aggregated_vote_inds.long()) # select (B,K) from (B,1024) 48 | objectness_mask = torch.ones((objectness_label.shape[0], objectness_label.shape[1])).cuda() # no ignore zone anymore 49 | 50 | # Compute objectness loss 51 | objectness_scores = end_points['objectness_scores'] 52 | criterion = nn.CrossEntropyLoss(torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') 53 | objectness_loss = criterion(objectness_scores.transpose(2,1), objectness_label) 54 | objectness_loss = torch.sum(objectness_loss * objectness_mask)/(torch.sum(objectness_mask)+1e-6) 55 | 56 | # Set assignment 57 | object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 58 | 59 | return objectness_loss, objectness_label, objectness_mask, object_assignment 60 | 61 | 62 | def get_loss(end_points, config): 63 | """ Loss functions 64 | 65 | Args: 66 | end_points: dict 67 | { 68 | seed_xyz, seed_inds, 69 | center, 70 | heading_scores, heading_residuals_normalized, 71 | size_scores, size_residuals_normalized, 72 | sem_cls_scores, #seed_logits,# 73 | center_label, 74 | heading_class_label, heading_residual_label, 75 | size_class_label, size_residual_label, 76 | sem_cls_label, 77 | box_label_mask, 78 | vote_label, vote_label_mask 79 | } 80 | config: dataset config instance 81 | Returns: 82 | loss: pytorch scalar tensor 83 | end_points: dict 84 | """ 85 | 86 | # Obj loss 87 | objectness_loss, objectness_label, objectness_mask, object_assignment = \ 88 | compute_objectness_loss(end_points) 89 | end_points['objectness_loss'] = objectness_loss 90 | end_points['objectness_label'] = objectness_label 91 | end_points['objectness_mask'] = objectness_mask 92 | end_points['object_assignment'] = object_assignment 93 | total_num_proposal = objectness_label.shape[0]*objectness_label.shape[1] 94 | end_points['pos_ratio'] = \ 95 | torch.sum(objectness_label.float().cuda())/float(total_num_proposal) 96 | end_points['neg_ratio'] = \ 97 | torch.sum(objectness_mask.float())/float(total_num_proposal) - end_points['pos_ratio'] 98 | 99 | # Box loss and sem cls loss 100 | center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = \ 101 | compute_box_and_sem_cls_loss(end_points, config) 102 | end_points['center_loss'] = center_loss 103 | end_points['heading_cls_loss'] = heading_cls_loss 104 | end_points['heading_reg_loss'] = heading_reg_loss 105 | end_points['size_cls_loss'] = size_cls_loss 106 | end_points['size_reg_loss'] = size_reg_loss 107 | end_points['sem_cls_loss'] = sem_cls_loss 108 | box_loss = center_loss + 0.1*heading_cls_loss + heading_reg_loss + 0.1*size_cls_loss + size_reg_loss 109 | end_points['box_loss'] = box_loss 110 | 111 | # Final loss function 112 | loss = 0.5*objectness_loss + box_loss + 0.1*sem_cls_loss 113 | loss *= 10 114 | end_points['loss'] = loss 115 | 116 | # -------------------------------------------- 117 | # Some other statistics 118 | obj_pred_val = torch.argmax(end_points['objectness_scores'], 2) # B,K 119 | obj_acc = torch.sum((obj_pred_val==objectness_label.long()).float()*objectness_mask)/(torch.sum(objectness_mask)+1e-6) 120 | end_points['obj_acc'] = obj_acc 121 | 122 | return loss, end_points 123 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/models/voting_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Voting module: generate votes from XYZ and features of seed points. 7 | 8 | Date: July, 2019 9 | Author: Charles R. Qi and Or Litany 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | class VotingModule(nn.Module): 17 | def __init__(self, vote_factor, seed_feature_dim): 18 | """ Votes generation from seed point features. 19 | 20 | Args: 21 | vote_facotr: int 22 | number of votes generated from each seed point 23 | seed_feature_dim: int 24 | number of channels of seed point features 25 | vote_feature_dim: int 26 | number of channels of vote features 27 | """ 28 | super().__init__() 29 | self.vote_factor = vote_factor 30 | self.in_dim = seed_feature_dim 31 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 32 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 33 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 34 | self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 35 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 36 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 37 | 38 | def forward(self, seed_xyz, seed_features): 39 | """ Forward pass. 40 | 41 | Arguments: 42 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 43 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 44 | Returns: 45 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 46 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 47 | """ 48 | batch_size = seed_xyz.shape[0] 49 | num_seed = seed_xyz.shape[1] 50 | num_vote = num_seed*self.vote_factor 51 | net = F.relu(self.bn1(self.conv1(seed_features))) 52 | net = F.relu(self.bn2(self.conv2(net))) 53 | net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 54 | 55 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim) 56 | offset = net[:,:,:,0:3] 57 | vote_xyz = seed_xyz.unsqueeze(2) + offset 58 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 59 | 60 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 61 | vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features 62 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 63 | vote_features = vote_features.transpose(2,1).contiguous() 64 | 65 | return vote_xyz, vote_features 66 | 67 | if __name__=='__main__': 68 | net = VotingModule(2, 256).cuda() 69 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 70 | print('xyz', xyz.shape) 71 | print('features', features.shape) 72 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/scripts/train_scannet.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | #! /bin/bash 8 | export MODEL= 9 | export LOGDIR= 10 | mkdir -p $LOGDIR 11 | 12 | # main script 13 | python ddp_main.py \ 14 | net.backbone=sparseconv \ 15 | data.dataset=scannet \ 16 | data.num_workers=8 \ 17 | data.batch_size=32 \ 18 | data.num_points=40000 \ 19 | data.no_height=True \ 20 | optimizer.learning_rate=0.001 \ 21 | data.voxelization=True \ 22 | data.voxel_size=0.025 \ 23 | misc.log_dir=$LOGDIR \ 24 | net.is_train=True \ 25 | net.weights=$MODEL \ 26 | -------------------------------------------------------------------------------- /downstream/votenet_det_new/scripts/train_sunrgbd.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | #! /bin/bash 8 | export MODEL= 9 | export LOGDIR= 10 | mkdir -p $LOGDIR 11 | 12 | # main script 13 | python ddp_main.py \ 14 | net.is_train=True \ 15 | net.backbone=sparseconv \ 16 | data.dataset=sunrgbd \ 17 | data.num_workers=8 \ 18 | data.batch_size=64 \ 19 | data.no_height=True \ 20 | data.voxelization=True \ 21 | data.voxel_size=0.025 \ 22 | optimizer.learning_rate=0.001 \ 23 | misc.log_dir=$LOGDIR \ 24 | net.weights=$MODEL \ 25 | -------------------------------------------------------------------------------- /pretrain/data_preprocess/README.md: -------------------------------------------------------------------------------- 1 | Preprocessing ScanNet Pair Dataset 2 | ==== 3 | 1. Request downloading the ScanNet dataset from https://github.com/ScanNet/ScanNet and unzip to ``SCANNET_DIR``. 4 | 2. Extract scene data and construct the pre-training corpus to ``TARGET_DIR``. 5 | 6 | 7 | Following is an example to extract the training data for every 25 frames. 8 | 9 | ```bash 10 | export TARGET_DIR= 11 | export SCANNET_DIR= 12 | export FRAME_SKIP=25 13 | export JOBS=50 14 | 15 | reader() { 16 | filename=$1 17 | 18 | scene=$(basename -- "$filename") 19 | scene="${scene%.*}" 20 | echo "Find sens data: $filename $scene" 21 | python -u reader.py --filename $filename --output_path $TARGET_DIR/$scene --frame_skip $FRAME_SKIP --export_depth_images --export_poses --export_intrinsics 22 | echo "Extract point-cloud data" 23 | python -u point_cloud_extractor.py --input_path $TARGET_DIR/$scene --output_path $TARGET_DIR/$scene/pcd --save_npz 24 | echo "Compute partial scan overlapping" 25 | python -u compute_full_overlapping.py --input_path $TARGET_DIR/$scene/pcd 26 | } 27 | export -f reader 28 | 29 | 30 | parallel -j $JOBS --linebuffer time reader ::: `find $SCANNET_DIR/scans/scene*/*.sens` 31 | ``` 32 | 33 | Then generate the dataset list file (filtering out pairs with less than 30% overlap) which will be used in the PointContrast code: 34 | ``` 35 | python generate_list.py --target_dir $TARGET_DIR 36 | ``` 37 | 38 | ### Notes 39 | 40 | The full data generation process will sample 843K pairs of point cloud (you can download the full ``example_dataset/overlp-30-full.txt`` list [here](https://www.dropbox.com/s/vqvrmg0umve364n/overlap-30-full.txt?dl=0) for reference). Using *50* processes, it takes around *15* hours to fully extract and preprocess the datasets and could use up to 1TB of disk space. 41 | 42 | For debugging purpose, we provide a 50-pair *example* dataset that can be downloaded from [here](https://www.dropbox.com/s/9ppm0s4veow0yst/data_f25.tar?dl=0). Please extract it to ``example_dataset/`` after the download. The pair list for training is provided at ``example_dataset/overlap-30-50p-subset.txt``. This will help you walk through the training process though the results will not be useful. 43 | 44 | Given limited resources, we recommend subsmapling the scannet dataset before sampling the pairs. In our experience, 20K pairs shoud be good enough to provide good pretraining performance. -------------------------------------------------------------------------------- /pretrain/data_preprocess/scannet_pair/compute_full_overlapping.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import copy 8 | import numpy as np 9 | import math 10 | import glob, os 11 | import argparse 12 | import open3d as o3d 13 | 14 | 15 | def make_open3d_point_cloud(xyz, color=None, voxel_size=None): 16 | if np.isnan(xyz).any(): 17 | return None 18 | 19 | pcd = o3d.geometry.PointCloud() 20 | pcd.points = o3d.utility.Vector3dVector(xyz) 21 | if color is not None: 22 | pcd.colors = o3d.utility.Vector3dVector(color) 23 | if voxel_size is not None: 24 | pcd = pcd.voxel_down_sample(voxel_size) 25 | 26 | return pcd 27 | 28 | 29 | def compute_overlap_ratio(pcd0, pcd1, voxel_size): 30 | pcd0_down = pcd0.voxel_down_sample(voxel_size) 31 | pcd1_down = pcd1.voxel_down_sample(voxel_size) 32 | matching01 = get_matching_indices(pcd0_down, pcd1_down, voxel_size * 1.5, 1) 33 | matching10 = get_matching_indices(pcd1_down, pcd0_down, voxel_size * 1.5, 1) 34 | overlap0 = float(len(matching01)) / float(len(pcd0_down.points)) 35 | overlap1 = float(len(matching10)) / float(len(pcd1_down.points)) 36 | return max(overlap0, overlap1) 37 | 38 | 39 | def get_matching_indices(source, pcd_tree, search_voxel_size, K=None): 40 | match_inds = [] 41 | for i, point in enumerate(source.points): 42 | [_, idx, _] = pcd_tree.search_radius_vector_3d(point, search_voxel_size) 43 | if K is not None: 44 | idx = idx[:K] 45 | for j in idx: 46 | match_inds.append((i, j)) 47 | return match_inds 48 | 49 | # params 50 | parser = argparse.ArgumentParser() 51 | # data paths 52 | parser.add_argument('--input_path', required=True, help='path to sens file to read') 53 | parser.add_argument('--voxel_size', type=float, default=0.05) 54 | opt = parser.parse_args() 55 | print(opt) 56 | 57 | print('load point clouds and downsampling...') 58 | 59 | _points = [ 60 | (pcd_name, make_open3d_point_cloud(np.load(pcd_name)['pcd'], voxel_size=opt.voxel_size)) 61 | for pcd_name in glob.glob(os.path.join(opt.input_path, "*.npz")) 62 | ] 63 | points = [(pcd_name, pcd) for (pcd_name, pcd) in _points if pcd is not None] 64 | print('load {} point clouds ({} invalid has been filtered), computing matching/overlapping'.format( 65 | len(points), len(_points) - len(points))) 66 | 67 | matching_matrix = np.zeros((len(points), len(points))) 68 | for i, (pcd0_name, pcd0) in enumerate(points): 69 | print('matching to...{}'.format(pcd0_name)) 70 | pcd0_tree = o3d.geometry.KDTreeFlann(copy.deepcopy(pcd0)) 71 | for j, (pcd1_name, pcd1) in enumerate(points): 72 | if i == j: 73 | continue 74 | matching_matrix[i, j] = float(len(get_matching_indices(pcd1, pcd0_tree, 1.5 * opt.voxel_size, 1))) / float(len(pcd1.points)) 75 | 76 | # write to file 77 | print('writing to file') 78 | with open(os.path.join(opt.input_path, "overlap.txt"), 'w') as f: 79 | for i, (pcd0_name, pcd0) in enumerate(points): 80 | for j, (pcd1_name, pcd1) in enumerate(points): 81 | if i < j: 82 | overlap = max(matching_matrix[i, j], matching_matrix[j, i]) 83 | f.write("{} {} {}\n".format(pcd0_name, pcd1_name, overlap)) 84 | 85 | print('done.') -------------------------------------------------------------------------------- /pretrain/data_preprocess/scannet_pair/generate_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import argparse 8 | import glob, os, sys 9 | 10 | from SensorData import SensorData 11 | 12 | # params 13 | parser = argparse.ArgumentParser() 14 | # data paths 15 | parser.add_argument('--target_dir', required=True, help='path to the target dir') 16 | 17 | opt = parser.parse_args() 18 | print(opt) 19 | 20 | def main(): 21 | overlaps = glob.glob(os.path.join(opt.target_dir, "*/pcd/overlap.txt")) 22 | with open(os.path.join(opt.target_dir, 'overlap-30-full.txt'), 'w') as f: 23 | for fo in overlaps: 24 | for line in open(fo): 25 | pcd0, pcd1, op = line.strip().split() 26 | if float(op) >= 0.3: 27 | print('{} {} {}'.format(pcd0, pcd1, op), file=f) 28 | print('done') 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /pretrain/data_preprocess/scannet_pair/point_cloud_extractor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import glob, os 8 | import numpy as np 9 | import cv2 10 | import argparse 11 | 12 | from plyfile import PlyData, PlyElement 13 | 14 | # params 15 | parser = argparse.ArgumentParser() 16 | # data paths 17 | parser.add_argument('--input_path', required=True, help='path to sens file to read') 18 | parser.add_argument('--output_path', required=True, help='path to output folder') 19 | parser.add_argument('--save_npz', action='store_true') 20 | opt = parser.parse_args() 21 | print(opt) 22 | 23 | if not os.path.exists(opt.output_path): 24 | os.mkdir(opt.output_path) 25 | 26 | def write_ply(points, filename, text=True): 27 | """ input: Nx3, write points to filename as PLY format. """ 28 | points = [(points[i,0], points[i,1], points[i,2]) for i in range(points.shape[0])] 29 | vertex = np.array(points, dtype=[('x', 'f4'), ('y', 'f4'),('z', 'f4')]) 30 | el = PlyElement.describe(vertex, 'vertex', comments=['vertices']) 31 | PlyData([el], text=text).write(filename) 32 | 33 | # Load Depth Camera Intrinsic 34 | depth_intrinsic = np.loadtxt(opt.input_path + '/intrinsic/intrinsic_depth.txt') 35 | print('Depth intrinsic: ') 36 | print(depth_intrinsic) 37 | 38 | # Compute Camrea Distance (just for demo, so you can choose the camera distance in frame sampling) 39 | poses = sorted(glob.glob(opt.input_path + '/pose/*.txt'), key=lambda a: int(os.path.basename(a).split('.')[0])) 40 | depths = sorted(glob.glob(opt.input_path + '/depth/*.png'), key=lambda a: int(os.path.basename(a).split('.')[0])) 41 | 42 | # # Get Aligned Point Clouds. 43 | for ind, (pose, depth) in enumerate(zip(poses, depths)): 44 | name = os.path.basename(pose).split('.')[0] 45 | print('='*50, ': {}'.format(pose)) 46 | depth_img = cv2.imread(depth, -1) # read 16bit grayscale image 47 | pose = np.loadtxt(poses[ind]) 48 | print('Camera pose: ') 49 | print(pose) 50 | 51 | depth_shift = 1000.0 52 | x,y = np.meshgrid(np.linspace(0,depth_img.shape[1]-1,depth_img.shape[1]), np.linspace(0,depth_img.shape[0]-1,depth_img.shape[0])) 53 | uv_depth = np.zeros((depth_img.shape[0], depth_img.shape[1], 3)) 54 | uv_depth[:,:,0] = x 55 | uv_depth[:,:,1] = y 56 | uv_depth[:,:,2] = depth_img/depth_shift 57 | uv_depth = np.reshape(uv_depth, [-1,3]) 58 | uv_depth = uv_depth[np.where(uv_depth[:,2]!=0),:].squeeze() 59 | 60 | intrinsic_inv = np.linalg.inv(depth_intrinsic) 61 | fx = depth_intrinsic[0,0] 62 | fy = depth_intrinsic[1,1] 63 | cx = depth_intrinsic[0,2] 64 | cy = depth_intrinsic[1,2] 65 | bx = depth_intrinsic[0,3] 66 | by = depth_intrinsic[1,3] 67 | point_list = [] 68 | n = uv_depth.shape[0] 69 | points = np.ones((n,4)) 70 | X = (uv_depth[:,0]-cx)*uv_depth[:,2]/fx + bx 71 | Y = (uv_depth[:,1]-cy)*uv_depth[:,2]/fy + by 72 | points[:,0] = X 73 | points[:,1] = Y 74 | points[:,2] = uv_depth[:,2] 75 | points_world = np.dot(points, np.transpose(pose)) 76 | print(points_world.shape) 77 | 78 | if opt.save_npz: 79 | print('Saving npz file...') 80 | np.savez(opt.output_path + '/{}.npz'.format(name), pcd=points_world[:, :3]) 81 | else: 82 | print('Saving ply file...') 83 | write_ply(points_world, opt.output_path + '/{}.ply'.format(name)) 84 | -------------------------------------------------------------------------------- /pretrain/data_preprocess/scannet_pair/reader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import argparse 8 | import os, sys 9 | 10 | from SensorData import SensorData 11 | 12 | # params 13 | parser = argparse.ArgumentParser() 14 | # data paths 15 | parser.add_argument('--filename', required=True, help='path to sens file to read') 16 | parser.add_argument('--output_path', required=True, help='path to output folder') 17 | parser.add_argument('--export_depth_images', dest='export_depth_images', action='store_true') 18 | parser.add_argument('--export_color_images', dest='export_color_images', action='store_true') 19 | parser.add_argument('--export_poses', dest='export_poses', action='store_true') 20 | parser.add_argument('--export_intrinsics', dest='export_intrinsics', action='store_true') 21 | parser.add_argument('--frame_skip', type=int, default=1) 22 | parser.set_defaults(export_depth_images=False, export_color_images=False, export_poses=False, export_intrinsics=False) 23 | 24 | opt = parser.parse_args() 25 | print(opt) 26 | 27 | 28 | def main(): 29 | if not os.path.exists(opt.output_path): 30 | os.makedirs(opt.output_path) 31 | # load the data 32 | print('loading %s...' % opt.filename) 33 | sd = SensorData(opt.filename) 34 | print('loaded!\n') 35 | if opt.export_depth_images: 36 | sd.export_depth_images(os.path.join(opt.output_path, 'depth'), frame_skip=opt.frame_skip) 37 | if opt.export_color_images: 38 | sd.export_color_images(os.path.join(opt.output_path, 'color'), frame_skip=opt.frame_skip) 39 | if opt.export_poses: 40 | sd.export_poses(os.path.join(opt.output_path, 'pose'), frame_skip=opt.frame_skip) 41 | if opt.export_intrinsics: 42 | sd.export_intrinsics(os.path.join(opt.output_path, 'intrinsic')) 43 | 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/.gitignore: -------------------------------------------------------------------------------- 1 | # Temp files 2 | __pycache__ 3 | *.swp 4 | *.swo 5 | *.orig 6 | .idea 7 | .nfs* 8 | outputs/ 9 | outputs_dir/ 10 | *.pyc 11 | data/ 12 | debug/ 13 | launch_scripts 14 | *.npy 15 | *.ipynb 16 | *.pcd 17 | .*/ 18 | .vscode/settings.json 19 | example_dataset/data_f25* 20 | example_dataset/overlap-30-full.txt -------------------------------------------------------------------------------- /pretrain/pointcontrast/config/defaults.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - hydra/launcher: submitit_slurm 3 | - hydra/hydra_logging: colorlog 4 | 5 | trainer: 6 | trainer: HardestContrastiveLossTrainer 7 | save_freq_epoch: 1 8 | batch_size: 4 9 | 10 | # Hard negative mining 11 | use_hard_negative: True 12 | hard_negative_max_num: 3000 13 | num_pos_per_batch: 1024 14 | num_hn_samples_per_batch: 256 15 | 16 | # Metric learning loss 17 | neg_thresh: 1.4 18 | pos_thresh: 0.1 19 | neg_weight: 1.0 20 | 21 | # Data augmentation 22 | use_random_scale: False 23 | min_scale: 0.8 24 | max_scale: 1.2 25 | use_random_rotation: True 26 | rotation_range: 360 27 | 28 | # Data loader configs 29 | stat_freq: 40 30 | lr_update_freq: 1000 31 | positive_pair_search_voxel_size_multiplier: 1.5 32 | 33 | # Network specific configurations 34 | net: 35 | model: Res16UNet34C 36 | model_n_out: 32 #Feature dimension 37 | conv1_kernel_size: 3 38 | normalize_feature: True 39 | dist_type: L2 40 | 41 | # Optimizer arguments 42 | opt: 43 | optimizer: SGD 44 | max_iter: 300000 45 | lr: 1e-1 46 | momentum: 0.8 47 | sgd_momentum: 0.9 48 | sgd_dampening: 0.1 49 | adam_beta1: 0.9 50 | adam_beta2: 0.999 51 | weight_decay: 1e-4 52 | bn_momentum: 0.05 53 | exp_gamma: 0.99 54 | scheduler: ExpLR 55 | 56 | misc: 57 | out_dir: /checkpoint/point_contrast/checkpoints/default 58 | use_gpu: True 59 | num_gpus: 1 60 | weight: 61 | config: 62 | lenient_weight_loading: False # Weights with the same size will be loaded 63 | 64 | train_num_thread: 2 65 | nn_max_n: 500 # The maximum number of features to find nearest neighbors in batch 66 | 67 | # NCE related 68 | nceT: 0.07 69 | npos: 4096 70 | 71 | # TODO(s9xie): all args for scannet training 72 | num_workers: 2 73 | train_limit_numpoints: 0 74 | data_aug_scale_min: 0.9 75 | data_aug_scale_max: 1.1 76 | 77 | cache_data: False 78 | 79 | ignore_label: 255 80 | return_transformation: False 81 | 82 | # Dataset specific configurations 83 | data: 84 | dataset: ScanNetMatchPairDataset 85 | voxel_size: 0.025 86 | dataset_root_dir: /private/home/PointContrast/pretrain/pointcontrast/example_dataset 87 | scannet_match_dir: overlap-30-50p-subset.txt 88 | 89 | hydra: 90 | run: 91 | dir: ${misc.out_dir} 92 | sweep: 93 | dir: ${misc.out_dir} 94 | launcher: 95 | partition: dev 96 | submitit_folder: ${hydra.sweep.dir}/.submitit/%j 97 | name: ${hydra.job.name} 98 | timeout_min: 3600 99 | cpus_per_task: 80 100 | gpus_per_node: 8 101 | tasks_per_node: 1 102 | mem_gb: 480 103 | nodes: 1 104 | constraint: volta32gb 105 | max_num_timeout: 3 106 | signal_delay_s: 300 107 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/ddp_train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import open3d as o3d # prevent loading error 7 | 8 | import sys 9 | import os 10 | import json 11 | import logging 12 | import torch 13 | from omegaconf import OmegaConf 14 | 15 | from easydict import EasyDict as edict 16 | 17 | from lib.ddp_data_loaders import make_data_loader 18 | import lib.multiprocessing as mpu 19 | import hydra 20 | 21 | from lib.ddp_trainer import HardestContrastiveLossTrainer, PointNCELossTrainer 22 | 23 | ch = logging.StreamHandler(sys.stdout) 24 | logging.getLogger().setLevel(logging.INFO) 25 | logging.basicConfig( 26 | format='%(asctime)s %(message)s', datefmt='%m/%d %H:%M:%S', handlers=[ch]) 27 | 28 | torch.manual_seed(0) 29 | torch.cuda.manual_seed(0) 30 | 31 | logging.basicConfig(level=logging.INFO, format="") 32 | 33 | def get_trainer(trainer): 34 | if trainer == 'HardestContrastiveLossTrainer': 35 | return HardestContrastiveLossTrainer 36 | elif trainer == 'PointNCELossTrainer': 37 | return PointNCELossTrainer 38 | else: 39 | raise ValueError(f'Trainer {trainer} not found') 40 | 41 | @hydra.main(config_path='config', config_name='defaults.yaml') 42 | def main(config): 43 | logger = logging.getLogger() 44 | if config.misc.config: 45 | resume_config = OmegaConf.load(config.misc.config) 46 | if config.misc.weight: 47 | weight = config.misc.weight 48 | config = resume_config 49 | config.misc.weight = weight 50 | else: 51 | config = resume_config 52 | 53 | logging.info('===> Configurations') 54 | logging.info(config.pretty()) 55 | 56 | # Convert to dict 57 | if config.misc.num_gpus > 1: 58 | mpu.multi_proc_run(config.misc.num_gpus, 59 | fun=single_proc_run, fun_args=(config,)) 60 | else: 61 | single_proc_run(config) 62 | 63 | def single_proc_run(config): 64 | train_loader = make_data_loader( 65 | config, 66 | config.trainer.batch_size, 67 | num_threads=config.misc.train_num_thread) 68 | 69 | Trainer = get_trainer(config.trainer.trainer) 70 | trainer = Trainer( 71 | config=config, 72 | data_loader=train_loader, 73 | ) 74 | trainer.train() 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/example_dataset/overlap-30-50p-subset.txt: -------------------------------------------------------------------------------- 1 | data_f25/scene0589_00/pcd/850.npz data_f25/scene0589_00/pcd/1150.npz 0.794144556267 2 | data_f25/scene0571_00/pcd/125.npz data_f25/scene0571_00/pcd/1275.npz 0.765413533835 3 | data_f25/scene0502_01/pcd/725.npz data_f25/scene0502_01/pcd/750.npz 0.545100899587 4 | data_f25/scene0599_01/pcd/725.npz data_f25/scene0599_01/pcd/1600.npz 0.52854612345 5 | data_f25/scene0330_00/pcd/0.npz data_f25/scene0330_00/pcd/75.npz 0.816279482178 6 | data_f25/scene0412_00/pcd/1525.npz data_f25/scene0412_00/pcd/1600.npz 0.798928919182 7 | data_f25/scene0137_02/pcd/800.npz data_f25/scene0137_02/pcd/875.npz 0.65932792584 8 | data_f25/scene0055_01/pcd/675.npz data_f25/scene0055_01/pcd/1575.npz 0.4172783738 9 | data_f25/scene0648_00/pcd/3350.npz data_f25/scene0648_00/pcd/4000.npz 0.753277711561 10 | data_f25/scene0653_01/pcd/2275.npz data_f25/scene0653_01/pcd/4500.npz 0.388599616333 11 | data_f25/scene0250_01/pcd/0.npz data_f25/scene0250_01/pcd/300.npz 0.797496318115 12 | data_f25/scene0501_00/pcd/575.npz data_f25/scene0501_00/pcd/725.npz 0.770364623739 13 | data_f25/scene0487_00/pcd/200.npz data_f25/scene0487_00/pcd/300.npz 0.547636909227 14 | data_f25/scene0465_01/pcd/1700.npz data_f25/scene0465_01/pcd/2925.npz 0.40204865557 15 | data_f25/scene0663_00/pcd/1000.npz data_f25/scene0663_00/pcd/1850.npz 0.936451169188 16 | data_f25/scene0640_01/pcd/1650.npz data_f25/scene0640_01/pcd/2575.npz 0.708106559725 17 | data_f25/scene0272_01/pcd/225.npz data_f25/scene0272_01/pcd/875.npz 0.581001472754 18 | data_f25/scene0057_00/pcd/100.npz data_f25/scene0057_00/pcd/750.npz 0.645107097858 19 | data_f25/scene0031_02/pcd/100.npz data_f25/scene0031_02/pcd/1150.npz 0.427319004525 20 | data_f25/scene0124_01/pcd/1175.npz data_f25/scene0124_01/pcd/1550.npz 0.370967741935 21 | data_f25/scene0115_01/pcd/150.npz data_f25/scene0115_01/pcd/375.npz 0.459632516704 22 | data_f25/scene0301_01/pcd/125.npz data_f25/scene0301_01/pcd/800.npz 0.305742787813 23 | data_f25/scene0608_00/pcd/150.npz data_f25/scene0608_00/pcd/3275.npz 0.606004618938 24 | data_f25/scene0270_00/pcd/75.npz data_f25/scene0270_00/pcd/150.npz 0.47983310153 25 | data_f25/scene0348_00/pcd/200.npz data_f25/scene0348_00/pcd/225.npz 0.973609802074 26 | data_f25/scene0508_02/pcd/100.npz data_f25/scene0508_02/pcd/800.npz 0.515041681769 27 | data_f25/scene0670_01/pcd/1950.npz data_f25/scene0670_01/pcd/2250.npz 0.315849486887 28 | data_f25/scene0233_00/pcd/6150.npz data_f25/scene0233_00/pcd/7200.npz 0.547153780799 29 | data_f25/scene0312_01/pcd/900.npz data_f25/scene0312_01/pcd/950.npz 0.610154327891 30 | data_f25/scene0248_02/pcd/350.npz data_f25/scene0248_02/pcd/1125.npz 0.310483870968 31 | data_f25/scene0114_02/pcd/1425.npz data_f25/scene0114_02/pcd/1500.npz 0.689078386356 32 | data_f25/scene0026_00/pcd/250.npz data_f25/scene0026_00/pcd/2200.npz 0.565703634669 33 | data_f25/scene0669_00/pcd/1000.npz data_f25/scene0669_00/pcd/1125.npz 0.673843530164 34 | data_f25/scene0656_01/pcd/525.npz data_f25/scene0656_01/pcd/925.npz 0.781538461538 35 | data_f25/scene0220_01/pcd/225.npz data_f25/scene0220_01/pcd/1775.npz 0.589433962264 36 | data_f25/scene0151_00/pcd/75.npz data_f25/scene0151_00/pcd/825.npz 0.404442313838 37 | data_f25/scene0140_00/pcd/2425.npz data_f25/scene0140_00/pcd/4100.npz 0.61125 38 | data_f25/scene0106_00/pcd/1150.npz data_f25/scene0106_00/pcd/1200.npz 0.47761589404 39 | data_f25/scene0029_01/pcd/0.npz data_f25/scene0029_01/pcd/1100.npz 0.576585365854 40 | data_f25/scene0641_00/pcd/850.npz data_f25/scene0641_00/pcd/1225.npz 0.375791930644 41 | data_f25/scene0012_01/pcd/475.npz data_f25/scene0012_01/pcd/550.npz 0.650238473768 42 | data_f25/scene0452_01/pcd/375.npz data_f25/scene0452_01/pcd/575.npz 0.3016 43 | data_f25/scene0656_02/pcd/975.npz data_f25/scene0656_02/pcd/1250.npz 0.349909584087 44 | data_f25/scene0166_00/pcd/150.npz data_f25/scene0166_00/pcd/3225.npz 0.386178861789 45 | data_f25/scene0111_02/pcd/1275.npz data_f25/scene0111_02/pcd/1450.npz 0.304035378662 46 | data_f25/scene0404_02/pcd/1225.npz data_f25/scene0404_02/pcd/2675.npz 0.698825503356 47 | data_f25/scene0496_00/pcd/475.npz data_f25/scene0496_00/pcd/1325.npz 0.499608457322 48 | data_f25/scene0449_02/pcd/75.npz data_f25/scene0449_02/pcd/200.npz 0.44043715847 49 | data_f25/scene0401_00/pcd/275.npz data_f25/scene0401_00/pcd/1050.npz 0.548514851485 50 | data_f25/scene0457_01/pcd/2000.npz data_f25/scene0457_01/pcd/2150.npz 0.579056865465 51 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/criterion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import torch 8 | from torch import nn 9 | 10 | class NCESoftmaxLoss(nn.Module): 11 | def __init__(self): 12 | super(NCESoftmaxLoss, self).__init__() 13 | self.criterion = nn.CrossEntropyLoss() 14 | 15 | def forward(self, x, label): 16 | bsz = x.shape[0] 17 | x = x.squeeze() 18 | loss = self.criterion(x, label) 19 | return loss 20 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/data_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import torch 8 | from torch.utils.data.sampler import Sampler 9 | import torch.distributed as dist 10 | 11 | import math 12 | 13 | class InfSampler(Sampler): 14 | def __init__(self, data_source, shuffle=False): 15 | self.data_source = data_source 16 | self.shuffle = shuffle 17 | self.reset_permutation() 18 | 19 | def reset_permutation(self): 20 | perm = len(self.data_source) 21 | if self.shuffle: 22 | perm = torch.randperm(perm) 23 | else: 24 | perm = torch.arange(perm) 25 | self._perm = perm.tolist() 26 | 27 | def __iter__(self): 28 | return self 29 | 30 | def __next__(self): 31 | if len(self._perm) == 0: 32 | self.reset_permutation() 33 | return self._perm.pop() 34 | 35 | def __len__(self): 36 | return len(self.data_source) 37 | 38 | next = __next__ # Python 2 compatibility 39 | 40 | 41 | class DistributedInfSampler(InfSampler): 42 | def __init__(self, data_source, num_replicas=None, rank=None, shuffle=True): 43 | if num_replicas is None: 44 | if not dist.is_available(): 45 | raise RuntimeError("Requires distributed package to be available") 46 | num_replicas = dist.get_world_size() 47 | if rank is None: 48 | if not dist.is_available(): 49 | raise RuntimeError("Requires distributed package to be available") 50 | rank = dist.get_rank() 51 | 52 | self.data_source = data_source 53 | self.num_replicas = num_replicas 54 | self.rank = rank 55 | self.epoch = 0 56 | self.it = 0 57 | self.num_samples = int(math.ceil(len(self.data_source) * 1.0 / self.num_replicas)) 58 | self.total_size = self.num_samples * self.num_replicas 59 | self.shuffle = shuffle 60 | self.reset_permutation() 61 | 62 | def __next__(self): 63 | it = self.it * self.num_replicas + self.rank 64 | value = self._perm[it % len(self._perm)] 65 | self.it = self.it + 1 66 | 67 | if (self.it * self.num_replicas) >= len(self._perm): 68 | self.reset_permutation() 69 | self.it = 0 70 | return value 71 | 72 | def __len__(self): 73 | return self.num_samples 74 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/error_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/usr/bin/env python3 7 | 8 | """Multiprocessing error handler.""" 9 | 10 | import os 11 | import signal 12 | import threading 13 | 14 | 15 | class ChildException(Exception): 16 | """Wraps an exception from a child process.""" 17 | 18 | def __init__(self, child_trace): 19 | super(ChildException, self).__init__(child_trace) 20 | 21 | 22 | class ErrorHandler(object): 23 | """Multiprocessing error handler (based on fairseq's). 24 | 25 | Listens for errors in child processes and 26 | propagates the tracebacks to the parent process. 27 | """ 28 | 29 | def __init__(self, error_queue): 30 | # Shared error queue 31 | self.error_queue = error_queue 32 | # Children processes sharing the error queue 33 | self.children_pids = [] 34 | # Start a thread listening to errors 35 | self.error_listener = threading.Thread(target=self.listen, daemon=True) 36 | self.error_listener.start() 37 | # Register the signal handler 38 | signal.signal(signal.SIGUSR1, self.signal_handler) 39 | 40 | def add_child(self, pid): 41 | """Registers a child process.""" 42 | self.children_pids.append(pid) 43 | 44 | def listen(self): 45 | """Listens for errors in the error queue.""" 46 | # Wait until there is an error in the queue 47 | child_trace = self.error_queue.get() 48 | # Put the error back for the signal handler 49 | self.error_queue.put(child_trace) 50 | # Invoke the signal handler 51 | os.kill(os.getpid(), signal.SIGUSR1) 52 | 53 | def signal_handler(self, sig_num, stack_frame): 54 | """Signal handler.""" 55 | # Kill children processes 56 | for pid in self.children_pids: 57 | os.kill(pid, signal.SIGINT) 58 | # Propagate the error from the child process 59 | raise ChildException(self.error_queue.get()) 60 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/multiprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/usr/bin/env python3 7 | 8 | """Multiprocessing helpers.""" 9 | 10 | import multiprocessing as mp 11 | import traceback 12 | 13 | from lib.error_handler import ErrorHandler 14 | 15 | import lib.distributed as du 16 | 17 | 18 | def run(proc_rank, world_size, error_queue, fun, fun_args, fun_kwargs): 19 | """Runs a function from a child process.""" 20 | try: 21 | # Initialize the process group 22 | du.init_process_group(proc_rank, world_size) 23 | # Run the function 24 | fun(*fun_args, **fun_kwargs) 25 | except KeyboardInterrupt: 26 | # Killed by the parent process 27 | pass 28 | except Exception: 29 | # Propagate exception to the parent process 30 | error_queue.put(traceback.format_exc()) 31 | finally: 32 | # Destroy the process group 33 | du.destroy_process_group() 34 | 35 | 36 | def multi_proc_run(num_proc, fun, fun_args=(), fun_kwargs={}): 37 | """Runs a function in a multi-proc setting.""" 38 | 39 | # Handle errors from training subprocesses 40 | error_queue = mp.SimpleQueue() 41 | error_handler = ErrorHandler(error_queue) 42 | 43 | # Run each training subprocess 44 | ps = [] 45 | for i in range(num_proc): 46 | p_i = mp.Process( 47 | target=run, 48 | args=(i, num_proc, error_queue, fun, fun_args, fun_kwargs) 49 | ) 50 | ps.append(p_i) 51 | p_i.start() 52 | error_handler.add_child(p_i.pid) 53 | 54 | # Wait for each subprocess to finish 55 | for p in ps: 56 | p.join() 57 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import time 7 | 8 | 9 | class AverageMeter(object): 10 | """Computes and stores the average and current value""" 11 | 12 | def __init__(self): 13 | self.reset() 14 | 15 | def reset(self): 16 | self.val = 0 17 | self.avg = 0 18 | self.sum = 0.0 19 | self.sq_sum = 0.0 20 | self.count = 0 21 | 22 | def update(self, val, n=1): 23 | self.val = val 24 | self.sum += val * n 25 | self.count += n 26 | self.avg = self.sum / self.count 27 | self.sq_sum += val**2 * n 28 | self.var = self.sq_sum / self.count - self.avg ** 2 29 | 30 | 31 | class Timer(object): 32 | """A simple timer.""" 33 | 34 | def __init__(self): 35 | self.total_time = 0. 36 | self.calls = 0 37 | self.start_time = 0. 38 | self.diff = 0. 39 | self.avg = 0. 40 | 41 | def reset(self): 42 | self.total_time = 0 43 | self.calls = 0 44 | self.start_time = 0 45 | self.diff = 0 46 | self.avg = 0 47 | 48 | def tic(self): 49 | # using time.time instead of time.clock because time time.clock 50 | # does not normalize for multithreading 51 | self.start_time = time.time() 52 | 53 | def toc(self, average=True): 54 | self.diff = time.time() - self.start_time 55 | self.total_time += self.diff 56 | self.calls += 1 57 | self.avg = self.total_time / self.calls 58 | if average: 59 | return self.avg 60 | else: 61 | return self.diff 62 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/lib/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | import numpy as np 8 | import random 9 | 10 | class Compose: 11 | 12 | def __init__(self, transforms): 13 | self.transforms = transforms 14 | 15 | def __call__(self, coords, feats): 16 | for transform in self.transforms: 17 | coords, feats = transform(coords, feats) 18 | return coords, feats 19 | 20 | 21 | class Jitter: 22 | 23 | def __init__(self, mu=0, sigma=0.01): 24 | self.mu = mu 25 | self.sigma = sigma 26 | 27 | def __call__(self, coords, feats): 28 | if random.random() < 0.95: 29 | feats += np.random.normal(self.mu, self.sigma, (feats.shape[0], feats.shape[1])) 30 | return coords, feats 31 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import model.res16unet as res16unet 7 | 8 | MODELS = [] 9 | 10 | 11 | def add_models(module): 12 | MODELS.extend([getattr(module, a) for a in dir(module) if 'Net' in a]) 13 | 14 | add_models(res16unet) 15 | 16 | def get_models(): 17 | '''Returns a tuple of sample models.''' 18 | return MODELS 19 | 20 | def load_model(name): 21 | '''Creates and returns an instance of the model given its class name. 22 | ''' 23 | all_models = get_models() 24 | mdict = {model.__name__: model for model in all_models} 25 | if name not in mdict: 26 | print('Invalid model index. Options are:') 27 | for model in all_models: 28 | print('\t* {}'.format(model.__name__)) 29 | return None 30 | NetClass = mdict[name] 31 | return NetClass 32 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/model/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/model/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | from model.modules.common import ConvType, NormType, get_norm, conv 9 | 10 | from MinkowskiEngine import MinkowskiReLU 11 | 12 | 13 | class BasicBlockBase(nn.Module): 14 | expansion = 1 15 | NORM_TYPE = NormType.BATCH_NORM 16 | 17 | def __init__(self, 18 | inplanes, 19 | planes, 20 | stride=1, 21 | dilation=1, 22 | downsample=None, 23 | conv_type=ConvType.HYPERCUBE, 24 | bn_momentum=0.1, 25 | D=3): 26 | super(BasicBlockBase, self).__init__() 27 | 28 | self.conv1 = conv( 29 | inplanes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 30 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 31 | self.conv2 = conv( 32 | planes, 33 | planes, 34 | kernel_size=3, 35 | stride=1, 36 | dilation=dilation, 37 | bias=False, 38 | conv_type=conv_type, 39 | D=D) 40 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 41 | self.relu = MinkowskiReLU(inplace=True) 42 | self.downsample = downsample 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.norm1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.norm2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | class BasicBlock(BasicBlockBase): 63 | NORM_TYPE = NormType.BATCH_NORM 64 | 65 | class BottleneckBase(nn.Module): 66 | expansion = 4 67 | NORM_TYPE = NormType.BATCH_NORM 68 | 69 | def __init__(self, 70 | inplanes, 71 | planes, 72 | stride=1, 73 | dilation=1, 74 | downsample=None, 75 | conv_type=ConvType.HYPERCUBE, 76 | bn_momentum=0.1, 77 | D=3): 78 | super(BottleneckBase, self).__init__() 79 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 80 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 81 | 82 | self.conv2 = conv( 83 | planes, planes, kernel_size=3, stride=stride, dilation=dilation, conv_type=conv_type, D=D) 84 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 85 | 86 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 87 | self.norm3 = get_norm(self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum) 88 | 89 | self.relu = MinkowskiReLU(inplace=True) 90 | self.downsample = downsample 91 | 92 | def forward(self, x): 93 | residual = x 94 | 95 | out = self.conv1(x) 96 | out = self.norm1(out) 97 | out = self.relu(out) 98 | 99 | out = self.conv2(out) 100 | out = self.norm2(out) 101 | out = self.relu(out) 102 | 103 | out = self.conv3(out) 104 | out = self.norm3(out) 105 | 106 | if self.downsample is not None: 107 | residual = self.downsample(x) 108 | 109 | out += residual 110 | out = self.relu(out) 111 | 112 | return out 113 | 114 | 115 | class Bottleneck(BottleneckBase): 116 | NORM_TYPE = NormType.BATCH_NORM 117 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/model/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch.nn as nn 7 | 8 | import MinkowskiEngine as ME 9 | from MinkowskiEngine import MinkowskiNetwork 10 | 11 | from model.modules.common import ConvType, NormType, get_norm, conv, sum_pool 12 | from model.modules.resnet_block import BasicBlock, Bottleneck 13 | 14 | 15 | class Model(MinkowskiNetwork): 16 | OUT_PIXEL_DIST = -1 17 | 18 | def __init__(self, in_channels, out_channels, config, D, **kwargs): 19 | super(Model, self).__init__(D) 20 | self.in_channels = in_channels 21 | self.out_channels = out_channels 22 | self.config = config 23 | 24 | 25 | class ResNetBase(Model): 26 | BLOCK = None 27 | LAYERS = () 28 | INIT_DIM = 64 29 | PLANES = (64, 128, 256, 512) 30 | OUT_PIXEL_DIST = 32 31 | HAS_LAST_BLOCK = False 32 | CONV_TYPE = ConvType.HYPERCUBE 33 | 34 | def __init__(self, in_channels, out_channels, config, D=3, **kwargs): 35 | assert self.BLOCK is not None 36 | assert self.OUT_PIXEL_DIST > 0 37 | 38 | super(ResNetBase, self).__init__(in_channels, out_channels, config, D, **kwargs) 39 | 40 | self.network_initialization(in_channels, out_channels, config, D) 41 | self.weight_initialization() 42 | 43 | def network_initialization(self, in_channels, out_channels, config, D): 44 | 45 | def space_n_time_m(n, m): 46 | return n if D == 3 else [n, n, n, m] 47 | 48 | if D == 4: 49 | self.OUT_PIXEL_DIST = space_n_time_m(self.OUT_PIXEL_DIST, 1) 50 | 51 | dilations = config.dilations 52 | bn_momentum = config.opt.bn_momentum 53 | self.inplanes = self.INIT_DIM 54 | self.conv1 = conv( 55 | in_channels, 56 | self.inplanes, 57 | kernel_size=space_n_time_m(config.conv1_kernel_size, 1), 58 | stride=1, 59 | D=D) 60 | 61 | self.bn1 = get_norm(NormType.BATCH_NORM, self.inplanes, D=self.D, bn_momentum=bn_momentum) 62 | self.relu = ME.MinkowskiReLU(inplace=True) 63 | self.pool = sum_pool(kernel_size=space_n_time_m(2, 1), stride=space_n_time_m(2, 1), D=D) 64 | 65 | self.layer1 = self._make_layer( 66 | self.BLOCK, 67 | self.PLANES[0], 68 | self.LAYERS[0], 69 | stride=space_n_time_m(2, 1), 70 | dilation=space_n_time_m(dilations[0], 1)) 71 | self.layer2 = self._make_layer( 72 | self.BLOCK, 73 | self.PLANES[1], 74 | self.LAYERS[1], 75 | stride=space_n_time_m(2, 1), 76 | dilation=space_n_time_m(dilations[1], 1)) 77 | self.layer3 = self._make_layer( 78 | self.BLOCK, 79 | self.PLANES[2], 80 | self.LAYERS[2], 81 | stride=space_n_time_m(2, 1), 82 | dilation=space_n_time_m(dilations[2], 1)) 83 | self.layer4 = self._make_layer( 84 | self.BLOCK, 85 | self.PLANES[3], 86 | self.LAYERS[3], 87 | stride=space_n_time_m(2, 1), 88 | dilation=space_n_time_m(dilations[3], 1)) 89 | 90 | self.final = conv( 91 | self.PLANES[3] * self.BLOCK.expansion, out_channels, kernel_size=1, bias=True, D=D) 92 | 93 | def weight_initialization(self): 94 | for m in self.modules(): 95 | if isinstance(m, ME.MinkowskiBatchNorm): 96 | nn.init.constant_(m.bn.weight, 1) 97 | nn.init.constant_(m.bn.bias, 0) 98 | 99 | def _make_layer(self, 100 | block, 101 | planes, 102 | blocks, 103 | stride=1, 104 | dilation=1, 105 | norm_type=NormType.BATCH_NORM, 106 | bn_momentum=0.1): 107 | downsample = None 108 | if stride != 1 or self.inplanes != planes * block.expansion: 109 | downsample = nn.Sequential( 110 | conv( 111 | self.inplanes, 112 | planes * block.expansion, 113 | kernel_size=1, 114 | stride=stride, 115 | bias=False, 116 | D=self.D), 117 | get_norm(norm_type, planes * block.expansion, D=self.D, bn_momentum=bn_momentum), 118 | ) 119 | layers = [] 120 | layers.append( 121 | block( 122 | self.inplanes, 123 | planes, 124 | stride=stride, 125 | dilation=dilation, 126 | downsample=downsample, 127 | conv_type=self.CONV_TYPE, 128 | D=self.D)) 129 | self.inplanes = planes * block.expansion 130 | for i in range(1, blocks): 131 | layers.append( 132 | block( 133 | self.inplanes, 134 | planes, 135 | stride=1, 136 | dilation=dilation, 137 | conv_type=self.CONV_TYPE, 138 | D=self.D)) 139 | 140 | return nn.Sequential(*layers) 141 | 142 | def forward(self, x): 143 | x = self.conv1(x) 144 | x = self.bn1(x) 145 | x = self.relu(x) 146 | x = self.pool(x) 147 | 148 | x = self.layer1(x) 149 | x = self.layer2(x) 150 | x = self.layer3(x) 151 | x = self.layer4(x) 152 | 153 | x = self.final(x) 154 | return x 155 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/scripts/ddp_launch.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/bin/bash 7 | 8 | export OUT_DIR=./tmp_out_dir # Specify your output directory here 9 | 10 | python ddp_train.py -m \ 11 | net.model=Res16UNet34C \ 12 | net.conv1_kernel_size=3 \ 13 | opt.lr=0.05 \ 14 | opt.max_iter=60000 \ 15 | data.dataset=ScanNetMatchPairDataset \ 16 | data.voxel_size=0.025 \ 17 | trainer.trainer=PointNCELossTrainer \ 18 | trainer.batch_size=32 \ 19 | trainer.stat_freq=1 \ 20 | trainer.lr_update_freq=250 \ 21 | misc.num_gpus=8 \ 22 | misc.free_rot=True \ 23 | misc.npos=4096 \ 24 | misc.nceT=0.4 \ 25 | misc.use_color_feat=True \ 26 | misc.out_dir=${OUT_DIR} \ 27 | hydra.launcher.partition=dev \ 28 | hydra.launcher.timeout_min=3600 \ 29 | hydra.launcher.max_num_timeout=3 \ 30 | hydra.launcher.signal_delay_s=300 \ 31 | #trainer.trainer=HardestContrastiveLossTrainer \ 32 | -------------------------------------------------------------------------------- /pretrain/pointcontrast/scripts/ddp_local.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | #!/bin/bash 7 | 8 | export OUT_DIR=./tmp_out_dir 9 | 10 | python ddp_train.py \ 11 | net.model=Res16UNet34C \ 12 | net.conv1_kernel_size=3 \ 13 | opt.lr=0.1 \ 14 | opt.max_iter=60000 \ 15 | data.dataset=ScanNetMatchPairDataset \ 16 | data.voxel_size=0.025 \ 17 | trainer.batch_size=32 \ 18 | trainer.stat_freq=1 \ 19 | trainer.lr_update_freq=250 \ 20 | misc.num_gpus=8 \ 21 | misc.npos=4096 \ 22 | misc.nceT=0.4 \ 23 | misc.out_dir=${OUT_DIR} \ 24 | trainer.trainer=HardestContrastiveLossTrainer \ 25 | data.dataset_root_dir=~/pointcontrast/pretrain/pointcontrast/example_dataset \ 26 | data.scannet_match_dir=overlap-30-50p-subset.txt \ 27 | # trainer.trainer=PointNCELossTrainer \ 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | open3d 5 | tensorboardX 6 | future-fstrings 7 | easydict 8 | joblib 9 | scikit-learn 10 | tqdm 11 | --------------------------------------------------------------------------------