├── LICENSE ├── README.md ├── assets ├── scene_0019.gif ├── scene_0025.gif ├── scene_003.gif ├── scene_005.gif └── scene_0164.gif ├── docs ├── DATASET.md ├── INFER.md ├── INSTALL.md ├── MODEL.md ├── association_module.png └── framework.png ├── pcseg ├── __init__.py ├── config.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── dataset.cpython-38.pyc │ │ └── indoor_dataset.cpython-38.pyc │ ├── augmentor │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── augmentor_utils.cpython-38.pyc │ │ │ └── data_augmentor.cpython-38.pyc │ │ ├── augmentor_utils.py │ │ └── data_augmentor.py │ ├── dataset.py │ ├── indoor_dataset.py │ ├── processor │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── data_processor.cpython-38.pyc │ │ └── data_processor.py │ ├── s3dis │ │ ├── __pycache__ │ │ │ └── s3dis_dataset.cpython-38.pyc │ │ ├── meta │ │ │ ├── all_data_label.txt │ │ │ ├── anno_paths.txt │ │ │ ├── area6_data_label.txt │ │ │ └── class_names.txt │ │ ├── preprocess.py │ │ └── s3dis_dataset.py │ └── scannet │ │ ├── __pycache__ │ │ └── scannet_dataset.cpython-38.pyc │ │ └── scannet_dataset.py ├── external_libs │ └── softgroup_ops │ │ ├── ops │ │ ├── __init__.py │ │ ├── functions.py │ │ ├── setup.py │ │ └── src │ │ │ ├── bfs_cluster │ │ │ ├── bfs_cluster.cpp │ │ │ ├── bfs_cluster.cu │ │ │ └── bfs_cluster.h │ │ │ ├── cal_iou_and_masklabel │ │ │ ├── cal_iou_and_masklabel.cpp │ │ │ ├── cal_iou_and_masklabel.cu │ │ │ └── cal_iou_and_masklabel.h │ │ │ ├── cuda.cu │ │ │ ├── cuda_utils.h │ │ │ ├── datatype │ │ │ ├── datatype.cpp │ │ │ └── datatype.h │ │ │ ├── roipool │ │ │ ├── roipool.cpp │ │ │ ├── roipool.cu │ │ │ └── roipool.h │ │ │ ├── sec_mean │ │ │ ├── sec_mean.cpp │ │ │ ├── sec_mean.cu │ │ │ └── sec_mean.h │ │ │ ├── softgroup_api.cpp │ │ │ ├── softgroup_ops.cpp │ │ │ ├── softgroup_ops.h │ │ │ └── voxelize │ │ │ ├── voxelize.cpp │ │ │ ├── voxelize.cu │ │ │ └── voxelize.h │ │ ├── setup.cfg │ │ └── setup.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-38.pyc │ ├── adapter │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── vl_adapter.cpython-38.pyc │ │ └── vl_adapter.py │ ├── head │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── binary_head.cpython-38.pyc │ │ │ ├── caption_head.cpython-38.pyc │ │ │ ├── inst_head.cpython-38.pyc │ │ │ ├── linear_head.cpython-38.pyc │ │ │ └── text_seg_head.cpython-38.pyc │ │ ├── binary_head.py │ │ ├── caption_head.py │ │ ├── inst_head.py │ │ ├── linear_head.py │ │ └── text_seg_head.py │ ├── model_utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── basic_block_1d.cpython-38.pyc │ │ │ ├── fp16.cpython-38.pyc │ │ │ ├── rle_utils.cpython-38.pyc │ │ │ └── unet_blocks.cpython-38.pyc │ │ ├── basic_block_1d.py │ │ ├── basic_block_2d.py │ │ ├── fp16.py │ │ ├── rle_utils.py │ │ └── unet_blocks.py │ ├── text_networks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── prompt_template.cpython-38.pyc │ │ │ └── text_models.cpython-38.pyc │ │ ├── prompt_template.py │ │ └── text_models.py │ ├── vision_backbones_3d │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── spconv_unet_indoor.cpython-38.pyc │ │ ├── spconv_unet_indoor.py │ │ └── vfe │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── indoor_vfe.cpython-38.pyc │ │ │ └── vfe_template.cpython-38.pyc │ │ │ ├── indoor_vfe.py │ │ │ └── vfe_template.py │ └── vision_networks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── network_template.cpython-38.pyc │ │ └── sparseunet_textseg.cpython-38.pyc │ │ ├── network_template.py │ │ ├── sparseunet.py │ │ └── sparseunet_textseg.py └── utils │ ├── __init__.py │ ├── arnold_utils.py │ ├── caption_utils.py │ ├── common_utils.py │ ├── commu_utils.py │ ├── loss_utils.py │ ├── metric_utils.py │ ├── spconv_utils.py │ └── voxelize_utils.py ├── requirements.txt ├── setup.py └── tools ├── _init_path.py ├── cfgs ├── dataset_configs │ ├── s3dis_dataset.yaml │ ├── s3dis_dataset_image.yaml │ ├── scannet_dataset.yaml │ └── scannet_dataset_image.yaml ├── s3dis_models │ ├── inst │ │ ├── softgroup_clip_adamw.yaml │ │ ├── softgroup_clip_base6_caption_adamw.yaml │ │ └── softgroup_clip_base8_caption_adamw.yaml │ ├── spconv_clip_adamw.yaml │ ├── spconv_clip_base6_caption_adamw.yaml │ └── spconv_clip_base8_caption_adamw.yaml └── scannet_models │ ├── inst │ ├── softgroup_clip_adamw.yaml │ ├── softgroup_clip_base10_caption_adamw.yaml │ ├── softgroup_clip_base13_caption_adamw.yaml │ ├── softgroup_clip_base8_caption_adamw.yaml │ └── softgroup_clip_openvocab_test.yaml │ ├── spconv_clip_adamw.yaml │ ├── spconv_clip_base10_caption_adamw.yaml │ ├── spconv_clip_base12_caption_adamw.yaml │ ├── spconv_clip_base15_caption_adamw.yaml │ └── spconv_clip_openvocab_test.yaml ├── eval_utils ├── eval_utils.py ├── inst_eval │ ├── __init__.py │ ├── eval_utils.py │ ├── instance_eval_utils.py │ └── pointwise_eval_utils.py └── save_utils.py ├── process_tools ├── __pycache__ │ ├── generate_caption.cpython-38.pyc │ ├── generate_caption_idx.cpython-310.pyc │ └── generate_caption_idx.cpython-38.pyc ├── combine_multiple_caption_files.py ├── filter_caption_without_points.py ├── generate_caption.py ├── generate_caption_idx.py └── generate_category_embedding.py ├── scripts ├── dist_test.sh └── dist_train.sh ├── test.py ├── train.py ├── train_utils ├── optimization │ ├── __init__.py │ ├── fastai_optim.py │ └── learning_schedules_fastai.py └── train_utils.py └── visual_utils ├── indoor_utils ├── color_utils.py └── ply_utils.py ├── open3d_vis_utils.py ├── visualize_indoor.py └── visualize_utils.py /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

PLA & RegionPLC

4 |

This repo contains the official implementation of PLA (CVPR2023) and RegionPLC (CVPR 2024)

5 | 6 |
7 | 8 |

PLA: Language-Driven Open-Vocabulary 3D Scene Understanding

9 | 10 |
11 | Runyu Ding*, 12 | Jihan Yang*, 13 | Chuhui Xue, 14 | Wenqing Zhang, 15 | Song Bai, 16 | Xiaojuan Qi, 17 |
18 | 19 |

CVPR 2023

20 | 21 | [project page](https://dingry.github.io/projects/PLA) | [arXiv](https://arxiv.org/abs/2211.16312) 22 | 23 |
24 | 25 |

RegionPLC: Regional Point-Language Contrastive Learning for Open-World 3D Scene Understanding

26 | 27 |
28 | Jihan Yang*, 29 | Runyu Ding*, 30 | Weipeng Deng, 31 | Zhe Wang, 32 | Xiaojuan Qi, 33 |
34 |

CVPR 2024

35 | 36 |

project page | arXiv

37 | 38 |
39 | 40 | ##### Highlights: 41 | - Official PLA implementation is contained in the `main` branch 42 | - Official RegionPLC implementation is contained in the `regionplc` branch 43 | 44 | ### Release 45 | - [2024-05-05] Releasing **RegionPLC** implementation. Please checkout `regionplc` branch to try it! 46 | 47 | ### Getting Started 48 | 49 | #### Installation 50 | Please refer to [INSTALL.md](docs/INSTALL.md) for the installation. 51 | 52 | #### Dataset Preparation 53 | Please refer to [DATASET.md](docs/DATASET.md) for dataset preparation. 54 | 55 | #### Training & Inference 56 | 57 | Please refer to [MODEL.md](docs/MODEL.md) for training and inference scripts and pretrained models. 58 | 59 | 60 | ### Citation 61 | If you find this project useful in your research, please consider cite: 62 | ```bibtex 63 | @inproceedings{ding2022language, 64 | title={PLA: Language-Driven Open-Vocabulary 3D Scene Understanding}, 65 | author={Ding, Runyu and Yang, Jihan and Xue, Chuhui and Zhang, Wenqing and Bai, Song and Qi, Xiaojuan}, 66 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 67 | year={2023} 68 | } 69 | ``` 70 | 71 | ```bibtex 72 | @inproceedings{yang2024regionplc, 73 | title={RegionPLC: Regional point-language contrastive learning for open-world 3d scene understanding}, 74 | author={Yang, Jihan and Ding, Runyu and Deng, Weipeng and Wang, Zhe and Qi, Xiaojuan}, 75 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 76 | year={2024} 77 | } 78 | ``` 79 | 80 | ### Acknowledgement 81 | Code is partly borrowed from [OpenPCDet](https://github.com/open-mmlab/OpenPCDet), [PointGroup](https://github.com/dvlab-research/PointGroup) and [SoftGroup](https://github.com/thangvubk/SoftGroup). -------------------------------------------------------------------------------- /assets/scene_0019.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0019.gif -------------------------------------------------------------------------------- /assets/scene_0025.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0025.gif -------------------------------------------------------------------------------- /assets/scene_003.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_003.gif -------------------------------------------------------------------------------- /assets/scene_005.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_005.gif -------------------------------------------------------------------------------- /assets/scene_0164.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0164.gif -------------------------------------------------------------------------------- /docs/DATASET.md: -------------------------------------------------------------------------------- 1 | The dataset configs are located within [tools/cfgs/dataset_configs](../tools/cfgs/dataset_configs), and the model configs are located within [tools/cfgs](../tools/cfgs) for different settings. 2 | 3 | #### ScanNet Dataset 4 | - Please download the [ScanNet Dataset](http://www.scan-net.org/) and follow [PointGroup](https://github.com/dvlab-research/PointGroup/blob/master/dataset/scannetv2/prepare_data_inst.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD). 5 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD). If you want to generate captions on your own, please download image data ([scannet_frames_25k]((http://www.scan-net.org/))) from ScanNet and follow scripts [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py). 6 | 7 | - The directory organization should be as follows: 8 | 9 | ``` 10 | PLA 11 | ├── data 12 | │ ├── scannetv2 13 | │ │ │── train 14 | │ │ │ │── scene0000_00.pth 15 | │ │ │ │── ... 16 | │ │ │── val 17 | │ │ │── text_embed 18 | │ │ │── caption_idx 19 | │ │ │── scannetv2_train.txt 20 | │ │ │── scannetv2_val.txt 21 | │ │ │—— scannet_frames_25k (optional, only for caption generation) 22 | ├── pcseg 23 | ├── tools 24 | ``` 25 | 26 | #### S3DIS Dataset 27 | - Please download the [S3DIS Dataset](http://buildingparser.stanford.edu/dataset.html#Download) and follow [dataset/s3dis/preprocess.py](../dataset/s3dis/preprocess.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7). 28 | ```bash 29 | python3 pcseg/datasets/s3dis/preprocess.py 30 | ``` 31 | 32 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7). If you want to generate captions on your own, please download image data [here](https://github.com/alexsax/2D-3D-Semantics) and follows scripts here: [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py). 33 | 34 | - The directory organization should be as follows: 35 | 36 | ``` 37 | PLA 38 | ├── data 39 | │ ├── s3dis 40 | │ │ │── stanford_indoor3d_inst 41 | │ │ │ │── Area_1_Conference_1.npy 42 | │ │ │ │── ... 43 | │ │ │── text_embed 44 | │ │ │── caption_idx 45 | │ │ │—— s3dis_2d (optional, only for caption generation) 46 | ├── pcseg 47 | ├── tools 48 | ``` 49 | -------------------------------------------------------------------------------- /docs/INFER.md: -------------------------------------------------------------------------------- 1 | If you wish to test on custom 3D scenes or categories, you can utilize our example configs: 2 | `tools/cfgs/scannet_models/spconv_clip_openvocab.yaml` and `tools/cfgs/scannet_models/inst/softgroup_clip_openvocab.yaml` 3 | 4 | The key parameters to consider are as follows: 5 | - `TEXT_EMBED.CATEGORY_NAMES` 6 | 7 | This parameter allows you to define the category list for segmentation. 8 | 9 | - `TASK_HEAD.CORRECT_SEG_PRED_BINARY` and `INST_HEAD.CORRECT_SEG_PRED_BINARY` 10 | 11 | These parameters allow you to decide using binary head to rectify semantic scores or not. 12 | 13 | 14 | To save the results, you can use the command `--save_results semantic,instance`. Afterward, you can employ the visualization utilities found in tools/visual_utils/visualize_indoor.py to visualize the predicted results. 15 | 16 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | #### Requirements 2 | All the codes are tested in the following environment: 3 | - Python 3.7+ 4 | - PyTorch 1.8 5 | - CUDA 11.1 6 | - [spconv v2.x](https://github.com/traveller59/spconv) 7 | 8 | #### Install dependent libraries 9 | a. Clone this repository. 10 | ```bash 11 | git clone https://github.com/CVMI-Lab/PLA.git 12 | ``` 13 | 14 | b. Install the dependent libraries as follows: 15 | 16 | * Install the dependent Python libraries (Please note that you need to install the correct version of `torch` and `spconv` according to your CUDA version): 17 | ```bash 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | * Install [SoftGroup](https://github.com/thangvubk/SoftGroup) following its [official guidance](https://github.com/thangvubk/SoftGroup/blob/main/docs/installation.md). 22 | ```bash 23 | cd pcseg/external_libs/softgroup_ops 24 | python3 setup.py build_ext develop 25 | cd ../../.. 26 | ``` 27 | 28 | * Install [pcseg](../pcseg) 29 | ```bash 30 | python3 setup.py develop 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/MODEL.md: -------------------------------------------------------------------------------- 1 | #### Training 2 | 3 | ```bash 4 | cd tools 5 | sh scripts/dist_train.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} ${PY_ARGS} 6 | ``` 7 | 8 | For instance, 9 | - train B15/N4 semantic segmentation on ScanNet: 10 | ```bash 11 | cd tools 12 | sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --extra_tag exp_tag 13 | ``` 14 | - train B13/N4 instance segmentation on ScanNet: 15 | ```bash 16 | cd tools 17 | sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml --extra_tag exp_tag 18 | ``` 19 | 20 | #### Inference 21 | 22 | ```bash 23 | cd tools 24 | sh scripts/dist_test.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} --ckpt ${CKPT_PATH} 25 | ``` 26 | 27 | For instance, 28 | - to test a B15/N4 model on ScanNet: 29 | ```bash 30 | cd tools 31 | sh scripts/dist_test.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --ckpt output/scannet_models/spconv_clip_base15_caption/exp_tag/ckpt/checkpoint_ep128.pth 32 | ``` 33 | 34 | ### Model Zoo 35 | - semantic segmentation 36 | 37 | | Dataset | Partition | hIoU / mIoU(B) / mIoU(N) | Path | 38 | |:---:|:---:|:---:|:---:| 39 | | ScanNet | B15/N4 | 64.9 / 67.8 / 62.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef8xk_X0ortMjC0F8PBQl2wBacVPgO72La8h_ZTDsKj__Q?e=Uq6W8I) | 40 | | ScanNet | B12/N7 | 55.9 / 70.4 / 46.4 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EVl7SdeUEPFAvrj2xnWSb-sBCOtWYyVOwBo6ggFb9x7dNA?e=feZaxH) | 41 | | ScanNet | B10/N9 | 52.8 / 76.6 / 40.3 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef0P_6XraDpCo0RRgOJ1wGQB-xOW7T6lecvVRi5P90Edbw?e=hqrP8X) | 42 | | S3DIS | B8/N4 | 35.6 / 58.3 / 25.6 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EYIW4SNX5B9Go_LKiim1KFEB_abYv0bDZMggE_6Ifjau0g?e=8BD0K3) | 43 | | S3DIS | B6/N6 | 38.4 / 53.9 / 29.8 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EeNYtkS3pmhAvc3Hxj7__SwB8SMzZdzmljRtCYuYG8NHcA?e=aC0aE2) | 44 | 45 | 46 | - instance segmentation 47 | 48 | | Dataset | Partition | hAP50 / mAP50(B) / mAP50(N) | Path | 49 | |:---:|:---:|:---:|:---:| 50 | | ScanNet | B13/N4 | 57.8 / 58.7 / 56.9| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Eb4N2hfCevlBlBxWlK9DtioBP6RX7gtXUmY0Huu4MknUHA?e=YDydlj) | 51 | | ScanNet | B10/N7 | 31.6 / 54.8 / 22.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETsHZCFElvdCmk8ulRzBk-EBxm8fHk8rLJnpUdk9_n3i1Q?e=4SGy1N) | 52 | | ScanNet | B8/N9 | 36.9 / 63.1 / 26.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EXAaU8RDecJFn_1J2Q-IqdsBALbv-5d_L_RyIOrdIjB66g?e=c8dFD6) | 53 | | S3DIS | B8/N4 | 17.2 / 60.9 / 10.0| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETzzD-pEhvtMkJGnIxzgIP0Bk3f2He9_hkgfVtexEMFqpg?e=xJpaOV) | 54 | | S3DIS | B6/N6 |15.8 / 48.2 / 9.5| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EWoqIoBWfSRBqQwahLTKQGkB5Gwp8zs0EvT3MkGMDiBOrw?e=daBppj) | 55 | 56 | -------------------------------------------------------------------------------- /docs/association_module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/association_module.png -------------------------------------------------------------------------------- /docs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/framework.png -------------------------------------------------------------------------------- /pcseg/__init__.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from pathlib import Path 3 | 4 | from .version import __version__ 5 | 6 | __all__ = [ 7 | '__version__' 8 | ] 9 | 10 | 11 | def get_git_commit_number(): 12 | if not (Path(__file__).parent / '../.git').exists(): 13 | return '0000000' 14 | 15 | cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE) 16 | git_commit_number = cmd_out.stdout.decode('utf-8')[:7] 17 | return git_commit_number 18 | 19 | 20 | script_version = get_git_commit_number() 21 | 22 | 23 | if script_version not in __version__: 24 | __version__ = __version__ + '+py%s' % script_version 25 | -------------------------------------------------------------------------------- /pcseg/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | from easydict import EasyDict 5 | 6 | 7 | def log_config_to_file(cfg, pre='cfg', logger=None): 8 | for key, val in cfg.items(): 9 | if isinstance(cfg[key], EasyDict): 10 | logger.info('\n%s.%s = edict()' % (pre, key)) 11 | log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger) 12 | continue 13 | logger.info('%s.%s: %s' % (pre, key, val)) 14 | 15 | 16 | def cfg_from_list(cfg_list, config): 17 | """Set config keys via list (e.g., from command line).""" 18 | from ast import literal_eval 19 | assert len(cfg_list) % 2 == 0 20 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 21 | key_list = k.split('.') 22 | d = config 23 | for subkey in key_list[:-1]: 24 | assert subkey in d, 'NotFoundKey: %s' % subkey 25 | d = d[subkey] 26 | subkey = key_list[-1] 27 | assert subkey in d, 'NotFoundKey: %s' % subkey 28 | try: 29 | value = literal_eval(v) 30 | except: 31 | value = v 32 | 33 | if type(value) != type(d[subkey]) and isinstance(d[subkey], EasyDict): 34 | key_val_list = value.split(',') 35 | for src in key_val_list: 36 | cur_key, cur_val = src.split(':') 37 | val_type = type(d[subkey][cur_key]) 38 | cur_val = val_type(cur_val) 39 | d[subkey][cur_key] = cur_val 40 | elif type(value) != type(d[subkey]) and isinstance(d[subkey], list): 41 | val_list = value.split(',') 42 | for k, x in enumerate(val_list): 43 | val_list[k] = type(d[subkey][0])(x) 44 | d[subkey] = val_list 45 | else: 46 | assert type(value) == type(d[subkey]), \ 47 | 'type {} does not match original type {}'.format(type(value), type(d[subkey])) 48 | d[subkey] = value 49 | 50 | 51 | def merge_new_config(config, new_config): 52 | if '_BASE_CONFIG_' in new_config: 53 | with open(new_config['_BASE_CONFIG_'], 'r') as f: 54 | try: 55 | yaml_config = yaml.safe_load(f, Loader=yaml.FullLoader) 56 | except: 57 | yaml_config = yaml.safe_load(f) 58 | # config.update(EasyDict(yaml_config)) 59 | merge_new_config(config, yaml_config) 60 | 61 | for key, val in new_config.items(): 62 | if not isinstance(val, dict): 63 | config[key] = val 64 | continue 65 | if key not in config: 66 | config[key] = EasyDict() 67 | merge_new_config(config[key], val) 68 | 69 | return config 70 | 71 | 72 | def cfg_from_yaml_file(cfg_file, config): 73 | with open(cfg_file, 'r') as f: 74 | try: 75 | new_config = yaml.safe_load(f, Loader=yaml.FullLoader) 76 | except: 77 | new_config = yaml.safe_load(f) 78 | 79 | merge_new_config(config=config, new_config=new_config) 80 | 81 | return config 82 | 83 | 84 | cfg = EasyDict() 85 | cfg.ROOT_DIR = (Path(__file__).resolve().parent / '../').resolve() 86 | cfg.LOCAL_RANK = 0 87 | -------------------------------------------------------------------------------- /pcseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from functools import partial 3 | from torch.utils.data import DataLoader 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | from pcseg.utils import common_utils 7 | 8 | from .dataset import DatasetTemplate 9 | from .scannet.scannet_dataset import ScanNetDataset, ScanNetInstDataset 10 | from .s3dis.s3dis_dataset import S3DISDataset, S3DISInstDataset 11 | 12 | 13 | __all__ = { 14 | 'DatasetTemplate': DatasetTemplate, 15 | 'ScanNetDataset': ScanNetDataset, 16 | 'ScanNetInstDataset': ScanNetInstDataset, 17 | 'S3DISDataset': S3DISDataset, 18 | 'S3DISInstDataset': S3DISInstDataset 19 | } 20 | 21 | 22 | class DistributedSampler(_DistributedSampler): 23 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 24 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 25 | self.shuffle = shuffle 26 | 27 | def __iter__(self): 28 | if self.shuffle: 29 | g = torch.Generator() 30 | g.manual_seed(self.epoch) 31 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 32 | else: 33 | indices = torch.arange(len(self.dataset)).tolist() 34 | 35 | indices += indices[:(self.total_size - len(indices))] 36 | assert len(indices) == self.total_size 37 | 38 | indices = indices[self.rank:self.total_size:self.num_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | 43 | 44 | def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, seed=None, 45 | logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0, 46 | multi_epoch_loader=False): 47 | 48 | dataset = __all__[dataset_cfg.DATASET]( 49 | dataset_cfg=dataset_cfg, 50 | class_names=class_names, 51 | root_path=root_path, 52 | training=training, 53 | logger=logger, 54 | ) 55 | 56 | if merge_all_iters_to_one_epoch: 57 | assert hasattr(dataset, 'merge_all_iters_to_one_epoch') 58 | dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs) 59 | 60 | if dist: 61 | if training: 62 | sampler = torch.utils.data.distributed.DistributedSampler(dataset) 63 | else: 64 | rank, world_size = common_utils.get_dist_info() 65 | sampler = DistributedSampler(dataset, world_size, rank, shuffle=False) 66 | else: 67 | sampler = None 68 | 69 | if multi_epoch_loader: 70 | loader = MultiEpochsDataLoader 71 | else: 72 | loader = DataLoader 73 | 74 | dataloader = loader( 75 | dataset, batch_size=batch_size, pin_memory=True, num_workers=workers, 76 | shuffle=(sampler is None) and training, drop_last=training, sampler=sampler, 77 | collate_fn=getattr(dataset, dataset_cfg.COLLATE_FN), 78 | timeout=0, worker_init_fn=partial(common_utils.worker_init_fn, seed=seed) 79 | ) 80 | 81 | return dataset, dataloader, sampler 82 | 83 | 84 | class _RepeatSampler(object): 85 | """ Sampler that repeats forever. 86 | Args: 87 | sampler (Sampler) 88 | """ 89 | 90 | def __init__(self, sampler): 91 | self.sampler = sampler 92 | 93 | def __iter__(self): 94 | while True: 95 | yield from iter(self.sampler) 96 | -------------------------------------------------------------------------------- /pcseg/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__init__.py -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/augmentor_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import scipy.ndimage 4 | import scipy.interpolate 5 | import scipy.stats 6 | 7 | 8 | def check_key(key): 9 | exist = key is not None 10 | if not exist: 11 | return False 12 | if isinstance(key, bool): 13 | enabled = key 14 | elif isinstance(key, dict): 15 | enabled = key.get('enabled', True) 16 | else: 17 | enabled = True 18 | return enabled 19 | 20 | 21 | def check_p(key): 22 | return (not isinstance(key, dict)) or ('p' not in key) or (np.random.rand() < key['p']) 23 | 24 | 25 | def elastic(x, gran, mag): 26 | blur0 = np.ones((3, 1, 1)).astype('float32') / 3 27 | blur1 = np.ones((1, 3, 1)).astype('float32') / 3 28 | blur2 = np.ones((1, 1, 3)).astype('float32') / 3 29 | 30 | bb = np.abs(x).max(0).astype(np.int32) // gran + 3 31 | noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)] 32 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise] 33 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise] 34 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise] 35 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise] 36 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise] 37 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise] 38 | ax = [np.linspace(-(b - 1) * gran, (b - 1) * gran, b) for b in bb] 39 | interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise] 40 | 41 | def g(x_): 42 | return np.hstack([i(x_)[:, None] for i in interp]) 43 | 44 | return x + g(x) * mag 45 | 46 | 47 | def scene_aug(aug, xyz, rgb=None): 48 | assert xyz.ndim == 2 49 | m = np.eye(3) 50 | if check_key(aug.jitter): 51 | m += np.random.randn(3, 3) * 0.1 52 | if check_key(aug.flip) and check_p(aug.flip): 53 | m[0][0] *= -1 # np.random.randint(0, 2) * 2 - 1 # flip x randomly 54 | if check_key(aug.rotation) and check_p(aug.rotation): 55 | theta_x = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[0] 56 | theta_y = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[1] 57 | theta_z = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[2] 58 | Rx = np.array \ 59 | ([[1, 0, 0], [0, math.cos(theta_x), -math.sin(theta_x)], [0, math.sin(theta_x), math.cos(theta_x)]]) 60 | Ry = np.array \ 61 | ([[math.cos(theta_y), 0, math.sin(theta_y)], [0, 1, 0], [-math.sin(theta_y), 0, math.cos(theta_y)]]) 62 | Rz = np.array \ 63 | ([[math.cos(theta_z), math.sin(theta_z), 0], [-math.sin(theta_z), math.cos(theta_z), 0], [0, 0, 1]]) 64 | rot_mats = [Rx, Ry, Rz] 65 | if aug.rotation.get('shuffle', False): 66 | np.random.shuffle(rot_mats) 67 | m = np.matmul(m, rot_mats[0].dot(rot_mats[1]).dot(rot_mats[2])) 68 | xyz = np.matmul(xyz, m) 69 | if check_key(aug.random_jitter) and check_p(aug.random_jitter): 70 | if aug.random_jitter.accord_to_size: 71 | jitter_scale = (xyz.max(0) - xyz.min(0)).mean() * 0.1 72 | else: 73 | jitter_scale = aug.random_jitter.value 74 | random_noise = (np.random.rand(xyz.shape[0], xyz.shape[1]) - 0.5) * jitter_scale 75 | xyz += random_noise 76 | if check_key(aug.scaling_scene) and check_p(aug.scaling_scene): 77 | scaling_fac = np.random.rand() * (aug.scaling_scene.value[1] - aug.scaling_scene.value[0]) \ 78 | + aug.scaling_scene.value[0] 79 | xyz_center = (xyz.max(0) + xyz.min(0)) / 2.0 80 | xyz = (xyz - xyz_center) * scaling_fac + xyz_center 81 | 82 | if rgb is not None and check_key(aug.color_jitter): 83 | rgb += np.random.randn(3) * 0.1 84 | return xyz, rgb 85 | 86 | 87 | def crop(xyz, full_scale, max_npoint, step=32): 88 | xyz_offset = xyz.copy() 89 | valid_idxs = (xyz_offset.min(1) >= 0) 90 | assert valid_idxs.sum() == xyz.shape[0] 91 | full_scale = np.array([full_scale[1]] * 3) 92 | room_range = xyz.max(0) - xyz.min(0) 93 | 94 | while valid_idxs.sum() > max_npoint: 95 | step_temp = step 96 | if valid_idxs.sum() > 1e6: 97 | step_temp = step * 2 98 | offset = np.clip(full_scale - room_range + 0.001, None, 0) * np.random.rand(3) 99 | xyz_offset = xyz + offset 100 | valid_idxs = (xyz_offset.min(1) >= 0) * ((xyz_offset < full_scale).sum(1) == 3) 101 | full_scale[:2] -= step_temp 102 | 103 | return xyz_offset, valid_idxs 104 | 105 | -------------------------------------------------------------------------------- /pcseg/datasets/augmentor/data_augmentor.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import numpy as np 4 | 5 | from . import augmentor_utils 6 | 7 | 8 | class DataAugmentor(object): 9 | def __init__(self, dataset_cfg, **kwargs): 10 | self.data_augmentor_queue = [] 11 | self.aug_cfg = dataset_cfg.DATA_AUG 12 | self.kwargs = kwargs 13 | aug_config_list = self.aug_cfg.AUG_LIST 14 | 15 | self.data_augmentor_queue = [] 16 | for aug in aug_config_list: 17 | if aug not in self.aug_cfg: 18 | continue 19 | cur_augmentor = partial(getattr(self, aug), config=self.aug_cfg[aug]) 20 | self.data_augmentor_queue.append(cur_augmentor) 21 | 22 | def __getstate__(self): 23 | d = dict(self.__dict__) 24 | del d['logger'] 25 | return d 26 | 27 | def __setstate__(self, d): 28 | self.__dict__.update(d) 29 | 30 | def shuffle(self, data_dict=None, config=None): 31 | shuffle_idx = np.random.permutation(data_dict['points_xyz'].shape[0]) 32 | data_dict = self.update_data_dict(data_dict, shuffle_idx) 33 | return data_dict 34 | 35 | def crop(self, data_dict=None, config=None): 36 | data_dict['points_xyz_voxel_scale'], valid_idxs = augmentor_utils.crop( 37 | data_dict['points_xyz_voxel_scale'], self.kwargs['full_scale'], self.kwargs['max_npoint'], config.step, 38 | ) 39 | data_dict = self.update_data_dict(data_dict, valid_idxs) 40 | if data_dict['points_xyz'].shape[0] == 0: 41 | data_dict['valid'] = False 42 | return data_dict 43 | 44 | def forward(self, data_dict): 45 | """ 46 | Args: 47 | data_dict: 48 | points: (N, 3 + C_in) 49 | gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] 50 | gt_names: optional, (N), string 51 | ... 52 | 53 | Returns: 54 | """ 55 | data_dict['valid'] = True 56 | for cur_augmentor in self.data_augmentor_queue: 57 | data_dict = cur_augmentor(data_dict=data_dict) 58 | return data_dict 59 | 60 | def scene_aug(self, data_dict=None, config=None): 61 | if self.check_func(config) and self.check_data(data_dict): 62 | data_dict['points_xyz'], data_dict['rgb'] = augmentor_utils.scene_aug( 63 | config, data_dict['points_xyz'], data_dict['rgb'] 64 | ) 65 | if data_dict['points_xyz'].shape[0] == 0: 66 | data_dict['valid'] = False 67 | return data_dict 68 | 69 | @staticmethod 70 | def update_data_dict(data_dict, idx): 71 | for key in data_dict: 72 | if key in ['points_xyz', 'points', 'points_xyz_voxel_scale', 'rgb', 'labels', 73 | 'inst_label', 'binary_labels', 'origin_idx']: 74 | if data_dict[key] is not None: 75 | data_dict[key] = data_dict[key][idx] 76 | return data_dict 77 | 78 | @staticmethod 79 | def check_func(key): 80 | return augmentor_utils.check_key(key) and augmentor_utils.check_p(key) 81 | 82 | def elastic(self, data_dict=None, config=None): 83 | data_dict['points_xyz_voxel_scale'] = data_dict['points_xyz'] * self.kwargs['voxel_scale'] 84 | if self.check_func(config) and self.check_data(data_dict): 85 | for (gran_fac, mag_fac) in config.value: 86 | data_dict['points_xyz_voxel_scale'] = augmentor_utils.elastic( 87 | data_dict['points_xyz_voxel_scale'], gran_fac * self.kwargs['voxel_scale'] // 50, 88 | mag_fac * self.kwargs['voxel_scale'] / 50 89 | ) 90 | if config.apply_to_feat: 91 | data_dict['points_xyz'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale'] 92 | 93 | # offset 94 | data_dict['points'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale'] 95 | data_dict['points_xyz_voxel_scale'] -= data_dict['points_xyz_voxel_scale'].min(0) 96 | return data_dict 97 | 98 | @staticmethod 99 | def check_data(data_dict): 100 | return ('valid' not in data_dict) or data_dict['valid'] 101 | 102 | ################### 103 | # Used in outdoor # 104 | ################### 105 | @staticmethod 106 | def random_world_rotation(data_dict=None, config=None): 107 | points = data_dict['points'] 108 | rotate_rad = np.deg2rad(np.random.random() * 360) - np.pi 109 | c, s = np.cos(rotate_rad), np.sin(rotate_rad) 110 | j = np.matrix([[c, s], [-s, c]]) 111 | data_dict['points'][:, :2] = np.dot(points[:, :2], j) 112 | 113 | return data_dict 114 | 115 | @staticmethod 116 | def random_world_flip(data_dict=None, config=None): 117 | points = data_dict['points'] 118 | flip_type = np.random.choice(4, 1) 119 | 120 | if flip_type == 0: 121 | # flip x only 122 | points[:, 0] = -points[:, 0] 123 | elif flip_type == 1: 124 | # flip y only 125 | points[:, 1] = -points[:, 1] 126 | elif flip_type == 2: 127 | # flip x+y 128 | points[:, :2] = -points[:, :2] 129 | 130 | data_dict['points'] = points 131 | return data_dict 132 | 133 | @staticmethod 134 | def random_world_scaling(data_dict=None, config=None): 135 | points = data_dict['points'] 136 | noise_scale = np.random.uniform(config[0], config[1]) 137 | points[:, :2] = noise_scale * points[:, :2] 138 | 139 | data_dict['points'] = points 140 | return data_dict 141 | 142 | @staticmethod 143 | def random_world_translation(data_dict=None, config=None): 144 | points = data_dict['points'] 145 | noise_translate = np.array( 146 | [np.random.normal(0, config[0], 1), np.random.normal(0, config[1], 1), np.random.normal(0, config[2], 1)] 147 | ).T 148 | points[:, 0:3] += noise_translate 149 | 150 | data_dict['points'] = points 151 | return data_dict 152 | -------------------------------------------------------------------------------- /pcseg/datasets/processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__init__.py -------------------------------------------------------------------------------- /pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/datasets/s3dis/meta/all_data_label.txt: -------------------------------------------------------------------------------- 1 | Area_1_conferenceRoom_1.npy 2 | Area_1_conferenceRoom_2.npy 3 | Area_1_copyRoom_1.npy 4 | Area_1_hallway_1.npy 5 | Area_1_hallway_2.npy 6 | Area_1_hallway_3.npy 7 | Area_1_hallway_4.npy 8 | Area_1_hallway_5.npy 9 | Area_1_hallway_6.npy 10 | Area_1_hallway_7.npy 11 | Area_1_hallway_8.npy 12 | Area_1_office_10.npy 13 | Area_1_office_11.npy 14 | Area_1_office_12.npy 15 | Area_1_office_13.npy 16 | Area_1_office_14.npy 17 | Area_1_office_15.npy 18 | Area_1_office_16.npy 19 | Area_1_office_17.npy 20 | Area_1_office_18.npy 21 | Area_1_office_19.npy 22 | Area_1_office_1.npy 23 | Area_1_office_20.npy 24 | Area_1_office_21.npy 25 | Area_1_office_22.npy 26 | Area_1_office_23.npy 27 | Area_1_office_24.npy 28 | Area_1_office_25.npy 29 | Area_1_office_26.npy 30 | Area_1_office_27.npy 31 | Area_1_office_28.npy 32 | Area_1_office_29.npy 33 | Area_1_office_2.npy 34 | Area_1_office_30.npy 35 | Area_1_office_31.npy 36 | Area_1_office_3.npy 37 | Area_1_office_4.npy 38 | Area_1_office_5.npy 39 | Area_1_office_6.npy 40 | Area_1_office_7.npy 41 | Area_1_office_8.npy 42 | Area_1_office_9.npy 43 | Area_1_pantry_1.npy 44 | Area_1_WC_1.npy 45 | Area_2_auditorium_1.npy 46 | Area_2_auditorium_2.npy 47 | Area_2_conferenceRoom_1.npy 48 | Area_2_hallway_10.npy 49 | Area_2_hallway_11.npy 50 | Area_2_hallway_12.npy 51 | Area_2_hallway_1.npy 52 | Area_2_hallway_2.npy 53 | Area_2_hallway_3.npy 54 | Area_2_hallway_4.npy 55 | Area_2_hallway_5.npy 56 | Area_2_hallway_6.npy 57 | Area_2_hallway_7.npy 58 | Area_2_hallway_8.npy 59 | Area_2_hallway_9.npy 60 | Area_2_office_10.npy 61 | Area_2_office_11.npy 62 | Area_2_office_12.npy 63 | Area_2_office_13.npy 64 | Area_2_office_14.npy 65 | Area_2_office_1.npy 66 | Area_2_office_2.npy 67 | Area_2_office_3.npy 68 | Area_2_office_4.npy 69 | Area_2_office_5.npy 70 | Area_2_office_6.npy 71 | Area_2_office_7.npy 72 | Area_2_office_8.npy 73 | Area_2_office_9.npy 74 | Area_2_storage_1.npy 75 | Area_2_storage_2.npy 76 | Area_2_storage_3.npy 77 | Area_2_storage_4.npy 78 | Area_2_storage_5.npy 79 | Area_2_storage_6.npy 80 | Area_2_storage_7.npy 81 | Area_2_storage_8.npy 82 | Area_2_storage_9.npy 83 | Area_2_WC_1.npy 84 | Area_2_WC_2.npy 85 | Area_3_conferenceRoom_1.npy 86 | Area_3_hallway_1.npy 87 | Area_3_hallway_2.npy 88 | Area_3_hallway_3.npy 89 | Area_3_hallway_4.npy 90 | Area_3_hallway_5.npy 91 | Area_3_hallway_6.npy 92 | Area_3_lounge_1.npy 93 | Area_3_lounge_2.npy 94 | Area_3_office_10.npy 95 | Area_3_office_1.npy 96 | Area_3_office_2.npy 97 | Area_3_office_3.npy 98 | Area_3_office_4.npy 99 | Area_3_office_5.npy 100 | Area_3_office_6.npy 101 | Area_3_office_7.npy 102 | Area_3_office_8.npy 103 | Area_3_office_9.npy 104 | Area_3_storage_1.npy 105 | Area_3_storage_2.npy 106 | Area_3_WC_1.npy 107 | Area_3_WC_2.npy 108 | Area_4_conferenceRoom_1.npy 109 | Area_4_conferenceRoom_2.npy 110 | Area_4_conferenceRoom_3.npy 111 | Area_4_hallway_10.npy 112 | Area_4_hallway_11.npy 113 | Area_4_hallway_12.npy 114 | Area_4_hallway_13.npy 115 | Area_4_hallway_14.npy 116 | Area_4_hallway_1.npy 117 | Area_4_hallway_2.npy 118 | Area_4_hallway_3.npy 119 | Area_4_hallway_4.npy 120 | Area_4_hallway_5.npy 121 | Area_4_hallway_6.npy 122 | Area_4_hallway_7.npy 123 | Area_4_hallway_8.npy 124 | Area_4_hallway_9.npy 125 | Area_4_lobby_1.npy 126 | Area_4_lobby_2.npy 127 | Area_4_office_10.npy 128 | Area_4_office_11.npy 129 | Area_4_office_12.npy 130 | Area_4_office_13.npy 131 | Area_4_office_14.npy 132 | Area_4_office_15.npy 133 | Area_4_office_16.npy 134 | Area_4_office_17.npy 135 | Area_4_office_18.npy 136 | Area_4_office_19.npy 137 | Area_4_office_1.npy 138 | Area_4_office_20.npy 139 | Area_4_office_21.npy 140 | Area_4_office_22.npy 141 | Area_4_office_2.npy 142 | Area_4_office_3.npy 143 | Area_4_office_4.npy 144 | Area_4_office_5.npy 145 | Area_4_office_6.npy 146 | Area_4_office_7.npy 147 | Area_4_office_8.npy 148 | Area_4_office_9.npy 149 | Area_4_storage_1.npy 150 | Area_4_storage_2.npy 151 | Area_4_storage_3.npy 152 | Area_4_storage_4.npy 153 | Area_4_WC_1.npy 154 | Area_4_WC_2.npy 155 | Area_4_WC_3.npy 156 | Area_4_WC_4.npy 157 | Area_5_conferenceRoom_1.npy 158 | Area_5_conferenceRoom_2.npy 159 | Area_5_conferenceRoom_3.npy 160 | Area_5_hallway_10.npy 161 | Area_5_hallway_11.npy 162 | Area_5_hallway_12.npy 163 | Area_5_hallway_13.npy 164 | Area_5_hallway_14.npy 165 | Area_5_hallway_15.npy 166 | Area_5_hallway_1.npy 167 | Area_5_hallway_2.npy 168 | Area_5_hallway_3.npy 169 | Area_5_hallway_4.npy 170 | Area_5_hallway_5.npy 171 | Area_5_hallway_6.npy 172 | Area_5_hallway_7.npy 173 | Area_5_hallway_8.npy 174 | Area_5_hallway_9.npy 175 | Area_5_lobby_1.npy 176 | Area_5_office_10.npy 177 | Area_5_office_11.npy 178 | Area_5_office_12.npy 179 | Area_5_office_13.npy 180 | Area_5_office_14.npy 181 | Area_5_office_15.npy 182 | Area_5_office_16.npy 183 | Area_5_office_17.npy 184 | Area_5_office_18.npy 185 | Area_5_office_19.npy 186 | Area_5_office_1.npy 187 | Area_5_office_20.npy 188 | Area_5_office_21.npy 189 | Area_5_office_22.npy 190 | Area_5_office_23.npy 191 | Area_5_office_24.npy 192 | Area_5_office_25.npy 193 | Area_5_office_26.npy 194 | Area_5_office_27.npy 195 | Area_5_office_28.npy 196 | Area_5_office_29.npy 197 | Area_5_office_2.npy 198 | Area_5_office_30.npy 199 | Area_5_office_31.npy 200 | Area_5_office_32.npy 201 | Area_5_office_33.npy 202 | Area_5_office_34.npy 203 | Area_5_office_35.npy 204 | Area_5_office_36.npy 205 | Area_5_office_37.npy 206 | Area_5_office_38.npy 207 | Area_5_office_39.npy 208 | Area_5_office_3.npy 209 | Area_5_office_40.npy 210 | Area_5_office_41.npy 211 | Area_5_office_42.npy 212 | Area_5_office_4.npy 213 | Area_5_office_5.npy 214 | Area_5_office_6.npy 215 | Area_5_office_7.npy 216 | Area_5_office_8.npy 217 | Area_5_office_9.npy 218 | Area_5_pantry_1.npy 219 | Area_5_storage_1.npy 220 | Area_5_storage_2.npy 221 | Area_5_storage_3.npy 222 | Area_5_storage_4.npy 223 | Area_5_WC_1.npy 224 | Area_5_WC_2.npy 225 | Area_6_conferenceRoom_1.npy 226 | Area_6_copyRoom_1.npy 227 | Area_6_hallway_1.npy 228 | Area_6_hallway_2.npy 229 | Area_6_hallway_3.npy 230 | Area_6_hallway_4.npy 231 | Area_6_hallway_5.npy 232 | Area_6_hallway_6.npy 233 | Area_6_lounge_1.npy 234 | Area_6_office_10.npy 235 | Area_6_office_11.npy 236 | Area_6_office_12.npy 237 | Area_6_office_13.npy 238 | Area_6_office_14.npy 239 | Area_6_office_15.npy 240 | Area_6_office_16.npy 241 | Area_6_office_17.npy 242 | Area_6_office_18.npy 243 | Area_6_office_19.npy 244 | Area_6_office_1.npy 245 | Area_6_office_20.npy 246 | Area_6_office_21.npy 247 | Area_6_office_22.npy 248 | Area_6_office_23.npy 249 | Area_6_office_24.npy 250 | Area_6_office_25.npy 251 | Area_6_office_26.npy 252 | Area_6_office_27.npy 253 | Area_6_office_28.npy 254 | Area_6_office_29.npy 255 | Area_6_office_2.npy 256 | Area_6_office_30.npy 257 | Area_6_office_31.npy 258 | Area_6_office_32.npy 259 | Area_6_office_33.npy 260 | Area_6_office_34.npy 261 | Area_6_office_35.npy 262 | Area_6_office_36.npy 263 | Area_6_office_37.npy 264 | Area_6_office_3.npy 265 | Area_6_office_4.npy 266 | Area_6_office_5.npy 267 | Area_6_office_6.npy 268 | Area_6_office_7.npy 269 | Area_6_office_8.npy 270 | Area_6_office_9.npy 271 | Area_6_openspace_1.npy 272 | Area_6_pantry_1.npy 273 | -------------------------------------------------------------------------------- /pcseg/datasets/s3dis/meta/area6_data_label.txt: -------------------------------------------------------------------------------- 1 | data/stanford_indoor3d/Area_6_conferenceRoom_1.npy 2 | data/stanford_indoor3d/Area_6_copyRoom_1.npy 3 | data/stanford_indoor3d/Area_6_hallway_1.npy 4 | data/stanford_indoor3d/Area_6_hallway_2.npy 5 | data/stanford_indoor3d/Area_6_hallway_3.npy 6 | data/stanford_indoor3d/Area_6_hallway_4.npy 7 | data/stanford_indoor3d/Area_6_hallway_5.npy 8 | data/stanford_indoor3d/Area_6_hallway_6.npy 9 | data/stanford_indoor3d/Area_6_lounge_1.npy 10 | data/stanford_indoor3d/Area_6_office_10.npy 11 | data/stanford_indoor3d/Area_6_office_11.npy 12 | data/stanford_indoor3d/Area_6_office_12.npy 13 | data/stanford_indoor3d/Area_6_office_13.npy 14 | data/stanford_indoor3d/Area_6_office_14.npy 15 | data/stanford_indoor3d/Area_6_office_15.npy 16 | data/stanford_indoor3d/Area_6_office_16.npy 17 | data/stanford_indoor3d/Area_6_office_17.npy 18 | data/stanford_indoor3d/Area_6_office_18.npy 19 | data/stanford_indoor3d/Area_6_office_19.npy 20 | data/stanford_indoor3d/Area_6_office_1.npy 21 | data/stanford_indoor3d/Area_6_office_20.npy 22 | data/stanford_indoor3d/Area_6_office_21.npy 23 | data/stanford_indoor3d/Area_6_office_22.npy 24 | data/stanford_indoor3d/Area_6_office_23.npy 25 | data/stanford_indoor3d/Area_6_office_24.npy 26 | data/stanford_indoor3d/Area_6_office_25.npy 27 | data/stanford_indoor3d/Area_6_office_26.npy 28 | data/stanford_indoor3d/Area_6_office_27.npy 29 | data/stanford_indoor3d/Area_6_office_28.npy 30 | data/stanford_indoor3d/Area_6_office_29.npy 31 | data/stanford_indoor3d/Area_6_office_2.npy 32 | data/stanford_indoor3d/Area_6_office_30.npy 33 | data/stanford_indoor3d/Area_6_office_31.npy 34 | data/stanford_indoor3d/Area_6_office_32.npy 35 | data/stanford_indoor3d/Area_6_office_33.npy 36 | data/stanford_indoor3d/Area_6_office_34.npy 37 | data/stanford_indoor3d/Area_6_office_35.npy 38 | data/stanford_indoor3d/Area_6_office_36.npy 39 | data/stanford_indoor3d/Area_6_office_37.npy 40 | data/stanford_indoor3d/Area_6_office_3.npy 41 | data/stanford_indoor3d/Area_6_office_4.npy 42 | data/stanford_indoor3d/Area_6_office_5.npy 43 | data/stanford_indoor3d/Area_6_office_6.npy 44 | data/stanford_indoor3d/Area_6_office_7.npy 45 | data/stanford_indoor3d/Area_6_office_8.npy 46 | data/stanford_indoor3d/Area_6_office_9.npy 47 | data/stanford_indoor3d/Area_6_openspace_1.npy 48 | data/stanford_indoor3d/Area_6_pantry_1.npy 49 | -------------------------------------------------------------------------------- /pcseg/datasets/s3dis/meta/class_names.txt: -------------------------------------------------------------------------------- 1 | ceiling 2 | floor 3 | wall 4 | beam 5 | column 6 | window 7 | door 8 | table 9 | chair 10 | sofa 11 | bookcase 12 | board 13 | clutter 14 | -------------------------------------------------------------------------------- /pcseg/datasets/s3dis/preprocess.py: -------------------------------------------------------------------------------- 1 | # https://github.com/charlesq34/pointnet/blob/master/sem_seg/ 2 | 3 | import os 4 | import sys 5 | import glob 6 | import numpy as np 7 | 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | ROOT_DIR = os.path.dirname(BASE_DIR) 10 | sys.path.append(BASE_DIR) 11 | 12 | DATA_PATH = os.path.join('data', 's3dis/Stanford3dDataset_v1.2_Aligned_Version') 13 | g_classes = [x.rstrip() for x in open(os.path.join(BASE_DIR, 'meta/class_names.txt'))] 14 | g_class2label = {cls: i for i, cls in enumerate(g_classes)} 15 | g_class2color = {'ceiling': [0,255,0], 16 | 'floor': [0,0,255], 17 | 'wall': [0,255,255], 18 | 'beam': [255,255,0], 19 | 'column': [255,0,255], 20 | 'window': [100,100,255], 21 | 'door': [200,200,100], 22 | 'table': [170,120,200], 23 | 'chair': [255,0,0], 24 | 'sofa': [200,100,100], 25 | 'bookcase': [10,200,100], 26 | 'board': [200,200,200], 27 | 'clutter': [50,50,50]} 28 | g_easy_view_labels = [7,8,9,10,11,1] 29 | g_label2color = {g_classes.index(cls): g_class2color[cls] for cls in g_classes} 30 | 31 | 32 | def collect_point_label(anno_path, out_filename, file_format='txt'): 33 | """ Convert original dataset files to data_label file (each line is XYZRGBL). 34 | We aggregated all the points from each instance in the room. 35 | Args: 36 | anno_path: path to annotations. e.g. Area_1/office_2/Annotations/ 37 | out_filename: path to save collected points and labels (each line is XYZRGBL) 38 | file_format: txt or numpy, determines what file format to save. 39 | Returns: 40 | None 41 | Note: 42 | the points are shifted before save, the most negative point is now at origin. 43 | """ 44 | points_list = [] 45 | 46 | num_inst = 0 47 | for f in sorted(glob.glob(os.path.join(anno_path, '*.txt'))): 48 | cls = os.path.basename(f).split('_')[0] 49 | num_inst += 1 50 | if cls not in g_classes: # note: in some room there is 'staris' class.. 51 | cls = 'clutter' 52 | points = np.loadtxt(f) 53 | labels = np.ones((points.shape[0], 1)) * g_class2label[cls] 54 | inst_labels = np.ones((points.shape[0], 1)) * num_inst 55 | points_list.append(np.concatenate([points, labels, inst_labels], 1)) # Nx8 56 | 57 | data_label = np.concatenate(points_list, 0) 58 | xyz_min = np.amin(data_label, axis=0)[0:3] 59 | data_label[:, 0:3] -= xyz_min 60 | 61 | if file_format == 'txt': 62 | fout = open(out_filename, 'w') 63 | for i in range(data_label.shape[0]): 64 | fout.write('%f %f %f %d %d %d %d %d\n' % \ 65 | (data_label[i, 0], data_label[i, 1], data_label[i, 2], 66 | data_label[i, 3], data_label[i, 4], data_label[i, 5], 67 | data_label[i, 6], data_label[i, 7])) 68 | fout.close() 69 | elif file_format == 'numpy': 70 | np.save(out_filename, data_label) 71 | else: 72 | print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \ 73 | (file_format)) 74 | exit() 75 | 76 | 77 | def main(): 78 | anno_paths = [line.rstrip() for line in open(os.path.join(BASE_DIR, 'meta/anno_paths.txt'))] 79 | anno_paths = [os.path.join(DATA_PATH, p) for p in anno_paths] 80 | 81 | output_folder = './data/s3dis/stanford_indoor3d_inst' 82 | if not os.path.exists(output_folder): 83 | os.mkdir(output_folder) 84 | 85 | # Note: there is an extra character in the v1.2 data in Area_5/hallway_6. It's fixed manually. 86 | for anno_path in anno_paths: 87 | print(anno_path) 88 | # try: 89 | elements = anno_path.split('/') 90 | out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy 91 | collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy') 92 | # except: 93 | # print(anno_path, 'ERROR!!') 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | setup( 6 | name='SOFTGROUP_OP', 7 | ext_modules=[ 8 | CUDAExtension( 9 | 'SOFTGROUP_OP', ['src/softgroup_api.cpp', 'src/softgroup_ops.cpp', 'src/cuda.cu'], 10 | extra_compile_args={ 11 | 'cxx': ['-g'], 12 | 'nvcc': ['-O2'] 13 | }) 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx & Clustering Algorithm 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | 6 | Modified by Thang Vu - Remove semantic label in clustering 7 | */ 8 | 9 | #include "bfs_cluster.h" 10 | 11 | /* =================== ballquery_batch_p================================= */ 12 | // input xyz: (n, 3) float 13 | // input batch_idxs: (n) int 14 | // input batch_offsets: (B+1) int, batch_offsets[-1] 15 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n 16 | // output start_len: (n, 2), int 17 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, 18 | at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, 19 | at::Tensor start_len_tensor, int n, int meanActive, 20 | float radius) { 21 | const float *xyz = xyz_tensor.data_ptr(); 22 | const int *batch_idxs = batch_idxs_tensor.data_ptr(); 23 | const int *batch_offsets = batch_offsets_tensor.data_ptr(); 24 | int *idx = idx_tensor.data_ptr(); 25 | int *start_len = start_len_tensor.data_ptr(); 26 | 27 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 28 | int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs, 29 | batch_offsets, idx, start_len, stream); 30 | return cumsum; 31 | } 32 | 33 | ConnectedComponent find_cc(Int idx, Int *ball_query_idxs, int *start_len, 34 | int *visited) { 35 | ConnectedComponent cc; 36 | cc.addPoint(idx); 37 | visited[idx] = 1; 38 | 39 | std::queue Q; 40 | assert(Q.empty()); 41 | Q.push(idx); 42 | 43 | while (!Q.empty()) { 44 | Int cur = Q.front(); 45 | Q.pop(); 46 | int start = start_len[cur * 2]; 47 | int len = start_len[cur * 2 + 1]; 48 | for (Int i = start; i < start + len; i++) { 49 | Int idx_i = ball_query_idxs[i]; 50 | if (visited[idx_i] == 1) 51 | continue; 52 | cc.addPoint(idx_i); 53 | visited[idx_i] = 1; 54 | Q.push(idx_i); 55 | } 56 | } 57 | return cc; 58 | } 59 | 60 | int get_clusters(float *class_numpoint_mean, int *ball_query_idxs, 61 | int *start_len, const int nPoint, float threshold, 62 | ConnectedComponents &clusters, const int class_id) { 63 | int *visited = new int[nPoint]{0}; 64 | float _class_numpoint_mean, thr; 65 | int sumNPoint = 0; 66 | 67 | for (int i = 0; i < nPoint; i++) { 68 | if (visited[i] == 0) { 69 | ConnectedComponent CC = find_cc(i, ball_query_idxs, start_len, visited); 70 | _class_numpoint_mean = class_numpoint_mean[class_id]; 71 | 72 | // if _class_num_point_mean is not defined (-1) directly use threshold 73 | if (_class_numpoint_mean == -1) { 74 | thr = threshold; 75 | } else { 76 | thr = threshold * _class_numpoint_mean; 77 | } 78 | if ((int)CC.pt_idxs.size() >= thr) { 79 | clusters.push_back(CC); 80 | sumNPoint += (int)CC.pt_idxs.size(); 81 | } 82 | } 83 | } 84 | delete[] visited; 85 | return sumNPoint; 86 | } 87 | 88 | // convert from ConnectedComponents to (idxs, offsets) representation 89 | void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, 90 | int *cluster_offsets) { 91 | for (int i = 0; i < (int)CCs.size(); i++) { 92 | cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size(); 93 | for (int j = 0; j < (int)CCs[i].pt_idxs.size(); j++) { 94 | int idx = CCs[i].pt_idxs[j]; 95 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i; 96 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx; 97 | } 98 | } 99 | } 100 | 101 | // input: class_numpoint_mean_tensor 102 | // input: ball_query_idxs, int, (nActive) 103 | // input: start_len, int, (N, 2) 104 | // output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for 105 | // corresponding point idxs in N 106 | // output: cluster_offsets, int (nCluster + 1) 107 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor, 108 | at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, 109 | at::Tensor cluster_idxs_tensor, 110 | at::Tensor cluster_offsets_tensor, const int N, 111 | float threshold, const int class_id) { 112 | float *class_numpoint_mean = class_numpoint_mean_tensor.data_ptr(); 113 | Int *ball_query_idxs = ball_query_idxs_tensor.data_ptr(); 114 | int *start_len = start_len_tensor.data_ptr(); 115 | ConnectedComponents CCs; 116 | int sumNPoint = get_clusters(class_numpoint_mean, ball_query_idxs, start_len, 117 | N, threshold, CCs, class_id); 118 | int nCluster = (int)CCs.size(); 119 | cluster_idxs_tensor.resize_({sumNPoint, 2}); 120 | cluster_offsets_tensor.resize_({nCluster + 1}); 121 | cluster_idxs_tensor.zero_(); 122 | cluster_offsets_tensor.zero_(); 123 | int *cluster_idxs = cluster_idxs_tensor.data_ptr(); 124 | int *cluster_offsets = cluster_offsets_tensor.data_ptr(); 125 | fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets); 126 | } 127 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | #include "../cuda_utils.h" 7 | #include "bfs_cluster.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | /* ================================== ballquery_batch_p 14 | * ================================== */ 15 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, 16 | const float *xyz, const int *batch_idxs, 17 | const int *batch_offsets, int *idx, 18 | int *start_len, int *cumsum) { 19 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 20 | if (pt_idx >= n) 21 | return; 22 | 23 | start_len += (pt_idx * 2); 24 | int idx_temp[1000]; 25 | 26 | float radius2 = radius * radius; 27 | float o_x = xyz[pt_idx * 3 + 0]; 28 | float o_y = xyz[pt_idx * 3 + 1]; 29 | float o_z = xyz[pt_idx * 3 + 2]; 30 | 31 | int batch_idx = batch_idxs[pt_idx]; 32 | int start = batch_offsets[batch_idx]; 33 | int end = batch_offsets[batch_idx + 1]; 34 | 35 | int cnt = 0; 36 | for (int k = start; k < end; k++) { 37 | float x = xyz[k * 3 + 0]; 38 | float y = xyz[k * 3 + 1]; 39 | float z = xyz[k * 3 + 2]; 40 | float d2 = 41 | (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); 42 | if (d2 < radius2) { 43 | if (cnt < 1000) { 44 | idx_temp[cnt] = k; 45 | } else { 46 | break; 47 | } 48 | ++cnt; 49 | } 50 | } 51 | 52 | start_len[0] = atomicAdd(cumsum, cnt); 53 | start_len[1] = cnt; 54 | 55 | int thre = n * meanActive; 56 | if (start_len[0] >= thre) 57 | return; 58 | 59 | idx += start_len[0]; 60 | if (start_len[0] + cnt >= thre) 61 | cnt = thre - start_len[0]; 62 | 63 | for (int k = 0; k < cnt; k++) { 64 | idx[k] = idx_temp[k]; 65 | } 66 | } 67 | 68 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, 69 | const float *xyz, const int *batch_idxs, 70 | const int *batch_offsets, int *idx, int *start_len, 71 | cudaStream_t stream) { 72 | // param xyz: (n, 3) 73 | // param batch_idxs: (n) 74 | // param batch_offsets: (B + 1) 75 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in 76 | // n 77 | // output start_len: (n, 2), int 78 | 79 | cudaError_t err; 80 | 81 | dim3 blocks(DIVUP(n, MAX_THREADS_PER_BLOCK)); 82 | dim3 threads(MAX_THREADS_PER_BLOCK); 83 | 84 | int cumsum = 0; 85 | int *p_cumsum; 86 | cudaMalloc((void **)&p_cumsum, sizeof(int)); 87 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); 88 | 89 | ballquery_batch_p_cuda_<<>>( 90 | n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, 91 | p_cumsum); 92 | 93 | err = cudaGetLastError(); 94 | if (cudaSuccess != err) { 95 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 96 | exit(-1); 97 | } 98 | 99 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); 100 | return cumsum; 101 | } 102 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.h: -------------------------------------------------------------------------------- 1 | /* 2 | Ball Query with BatchIdx & Clustering Algorithm 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef BFS_CLUSTER_H 8 | #define BFS_CLUSTER_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, 15 | at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, 16 | at::Tensor start_len_tensor, int n, int meanActive, 17 | float radius); 18 | int ballquery_batch_p_cuda(int n, int meanActive, float radius, 19 | const float *xyz, const int *batch_idxs, 20 | const int *batch_offsets, int *idx, int *start_len, 21 | cudaStream_t stream); 22 | 23 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor, 24 | at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, 25 | at::Tensor cluster_idxs_tensor, 26 | at::Tensor cluster_offsets_tensor, const int N, 27 | float threshold, const int class_id); 28 | 29 | #endif // BFS_CLUSTER_H 30 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Get the IoU between predictions and gt masks 3 | */ 4 | 5 | #include "cal_iou_and_masklabel.h" 6 | 7 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor, 8 | at::Tensor proposals_offset_tensor, 9 | at::Tensor instance_labels_tensor, 10 | at::Tensor instance_pointnum_tensor, 11 | at::Tensor proposals_iou_tensor, int nInstance, 12 | int nProposal) { 13 | int *proposals_idx = proposals_idx_tensor.data_ptr(); 14 | int *proposals_offset = proposals_offset_tensor.data_ptr(); 15 | long *instance_labels = instance_labels_tensor.data_ptr(); 16 | int *instance_pointnum = instance_pointnum_tensor.data_ptr(); 17 | float *proposals_iou = proposals_iou_tensor.data_ptr(); 18 | 19 | // input: nInstance (1,), int 20 | // input: nProposal (1,), int 21 | // input: proposals_idx (sumNPoint), int 22 | // input: proposals_offset (nProposal + 1), int 23 | // input: instance_labels (N), long, 0~total_nInst-1, -100 24 | // input: instance_pointnum (total_nInst), int 25 | // input: mask_scores_sigmoid (sumNPoint, 1), float 26 | // output: proposals_iou (nProposal, total_nInst), float 27 | // output: mask_label (sumNPoint, 1), float 28 | get_mask_iou_on_cluster_cuda(nInstance, nProposal, proposals_idx, 29 | proposals_offset, instance_labels, 30 | instance_pointnum, proposals_iou); 31 | } 32 | 33 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor, 34 | at::Tensor proposals_offset_tensor, 35 | at::Tensor instance_labels_tensor, 36 | at::Tensor instance_pointnum_tensor, 37 | at::Tensor proposals_iou_tensor, int nInstance, 38 | int nProposal, 39 | at::Tensor mask_scores_sigmoid_tensor) { 40 | int *proposals_idx = proposals_idx_tensor.data_ptr(); 41 | int *proposals_offset = proposals_offset_tensor.data_ptr(); 42 | long *instance_labels = instance_labels_tensor.data_ptr(); 43 | int *instance_pointnum = instance_pointnum_tensor.data_ptr(); 44 | float *proposals_iou = proposals_iou_tensor.data_ptr(); 45 | float *mask_scores_sigmoid = mask_scores_sigmoid_tensor.data_ptr(); 46 | 47 | // input: nInstance (1,), int 48 | // input: nProposal (1,), int 49 | // input: proposals_idx (sumNPoint), int 50 | // input: proposals_offset (nProposal + 1), int 51 | // input: instance_labels (N), long, 0~total_nInst-1, -100 52 | // input: instance_pointnum (total_nInst), int 53 | // input: mask_scores_sigmoid (sumNPoint, 1), float 54 | // output: proposals_iou (nProposal, total_nInst), float 55 | // output: mask_label (sumNPoint, 1), float 56 | get_mask_iou_on_pred_cuda( 57 | nInstance, nProposal, proposals_idx, proposals_offset, instance_labels, 58 | instance_pointnum, proposals_iou, mask_scores_sigmoid); 59 | } 60 | 61 | void get_mask_label(at::Tensor proposals_idx_tensor, 62 | at::Tensor proposals_offset_tensor, 63 | at::Tensor instance_labels_tensor, 64 | at::Tensor instance_cls_tensor, 65 | at::Tensor proposals_iou_tensor, int nInstance, 66 | int nProposal, float iou_thr, 67 | at::Tensor mask_labels_tensor) { 68 | int *proposals_idx = proposals_idx_tensor.data_ptr(); 69 | int *proposals_offset = proposals_offset_tensor.data_ptr(); 70 | long *instance_labels = instance_labels_tensor.data_ptr(); 71 | long *instance_cls = instance_cls_tensor.data_ptr(); 72 | float *proposals_iou = proposals_iou_tensor.data_ptr(); 73 | float *mask_label = mask_labels_tensor.data_ptr(); 74 | 75 | // input: nInstance (1,), int 76 | // input: nProposal (1,), int 77 | // input: proposals_idx (sumNPoint), int 78 | // input: proposals_offset (nProposal + 1), int 79 | // input: instance_labels (N), long, 0~total_nInst-1, -100 80 | // input: instance_pointnum (total_nInst), int 81 | // input: mask_scores_sigmoid (sumNPoint, 1), float 82 | // output: proposals_iou (nProposal, total_nInst), float 83 | // output: mask_label (sumNPoint, 1), float 84 | get_mask_label_cuda(nInstance, nProposal, iou_thr, proposals_idx, 85 | proposals_offset, instance_labels, instance_cls, 86 | proposals_iou, mask_label); 87 | } 88 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.h: -------------------------------------------------------------------------------- 1 | /* 2 | Get the IoU between predictions and gt masks 3 | */ 4 | 5 | #ifndef CAL_IOU_AND_MASKLABEL_H 6 | #define CAL_IOU_AND_MASKLABEL_H 7 | #include 8 | #include 9 | 10 | #include "../datatype/datatype.h" 11 | 12 | void get_mask_iou_on_cluster_cuda(int nInstance, int nProposal, 13 | int *proposals_idx, int *proposals_offset, 14 | long *instance_labels, int *instance_pointnum, 15 | float *proposals_iou); 16 | 17 | void get_mask_iou_on_pred_cuda(int nInstance, int nProposal, int *proposals_idx, 18 | int *proposals_offset, long *instance_labels, 19 | int *instance_pointnum, float *proposals_iou, 20 | float *mask_scores_sigmoid); 21 | 22 | void get_mask_label_cuda(int nInstance, int nProposal, float iou_thr, 23 | int *proposals_idx, int *proposals_offset, 24 | long *instance_labels, long *instance_cls, 25 | float *proposals_iou, float *mask_label); 26 | 27 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor, 28 | at::Tensor proposals_offset_tensor, 29 | at::Tensor instance_labels_tensor, 30 | at::Tensor instance_pointnum_tensor, 31 | at::Tensor proposals_iou_tensor, int nInstance, 32 | int nProposal); 33 | 34 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor, 35 | at::Tensor proposals_offset_tensor, 36 | at::Tensor instance_labels_tensor, 37 | at::Tensor instance_pointnum_tensor, 38 | at::Tensor proposals_iou_tensor, int nInstance, 39 | int nProposal, at::Tensor mask_scores_sigmoid_tensor); 40 | 41 | void get_mask_label(at::Tensor proposals_idx_tensor, 42 | at::Tensor proposals_offset_tensor, 43 | at::Tensor instance_labels_tensor, 44 | at::Tensor instance_cls_tensor, 45 | at::Tensor proposals_iou_tensor, int nInstance, 46 | int nProposal, float iou_thr, 47 | at::Tensor mask_labels_tensor); 48 | 49 | #endif // CAL_IOU_AND_MASKLABEL_H 50 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/cuda.cu: -------------------------------------------------------------------------------- 1 | #include "datatype/datatype.h" 2 | #include 3 | 4 | #include "bfs_cluster/bfs_cluster.cu" 5 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu" 6 | #include "roipool/roipool.cu" 7 | #include "sec_mean/sec_mean.cu" 8 | #include "voxelize/voxelize.cu" 9 | 10 | template void voxelize_fp_cuda(Int nOutputRows, Int maxActive, 11 | Int nPlanes, float *feats, 12 | float *output_feats, Int *rules, 13 | bool average); 14 | 15 | template void voxelize_bp_cuda(Int nOutputRows, Int maxActive, 16 | Int nPlanes, float *d_output_feats, 17 | float *d_feats, Int *rules, bool average); 18 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | 8 | #define MAX_THREADS_PER_BLOCK 512 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 10 | 11 | inline int opt_n_threads(int work_size) { 12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | 16 | inline dim3 opt_block_config(int x, int y) { 17 | const int x_threads = opt_n_threads(x); 18 | const int y_threads = 19 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 20 | dim3 block_config(x_threads, y_threads, 1); 21 | return block_config; 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.cpp: -------------------------------------------------------------------------------- 1 | #include "datatype.h" 2 | 3 | template SparseGrid::SparseGrid() : ctr(0) { 4 | // Sparsehash needs a key to be set aside and never used 5 | Point empty_key; 6 | for (Int i = 0; i < dimension; i++) { 7 | empty_key[i] = std::numeric_limits::min(); 8 | } 9 | mp.set_empty_key(empty_key); 10 | } 11 | 12 | ConnectedComponent::ConnectedComponent() {} 13 | 14 | void ConnectedComponent::addPoint(Int pt_idx) { pt_idxs.push_back(pt_idx); } 15 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.h: -------------------------------------------------------------------------------- 1 | #ifndef DATATYPE_H 2 | #define DATATYPE_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using Int = int32_t; 10 | 11 | template using Point = std::array; 12 | 13 | template struct IntArrayHash { 14 | std::size_t operator()(Point const &p) const { 15 | Int hash = 16777619; 16 | for (auto x : p) { 17 | hash *= 2166136261; 18 | hash ^= x; 19 | } 20 | return hash; 21 | } 22 | }; 23 | 24 | template 25 | using SparseGridMap = google::dense_hash_map< 26 | Point, Int, IntArrayHash, 27 | std::equal_to>>; // 28 | 29 | template class SparseGrid { 30 | public: 31 | Int ctr; 32 | SparseGridMap mp; 33 | SparseGrid(); 34 | }; 35 | 36 | template using SparseGrids = std::vector>; 37 | 38 | using RuleBook = std::vector>; 39 | 40 | class ConnectedComponent { 41 | public: 42 | std::vector pt_idxs; 43 | float accum_x = 0.; 44 | float accum_y = 0.; 45 | float accum_z = 0.; 46 | int cls_label = -100; 47 | int batch_idx = -1; 48 | // int npoint = 0; 49 | 50 | ConnectedComponent(); 51 | void addPoint(Int pt_idx); 52 | }; 53 | 54 | using ConnectedComponents = std::vector; 55 | 56 | #endif // DATATYPE_H 57 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ROI Max Pool 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "roipool.h" 8 | 9 | void global_avg_pool_fp(at::Tensor feats_tensor, 10 | at::Tensor proposals_offset_tensor, 11 | at::Tensor output_feats_tensor, int nProposal, int C) { 12 | float *feats = feats_tensor.data_ptr(); 13 | int *proposals_offset = proposals_offset_tensor.data_ptr(); 14 | float *output_feats = output_feats_tensor.data_ptr(); 15 | 16 | global_avg_pool_fp_cuda(nProposal, C, feats, proposals_offset, output_feats); 17 | } 18 | 19 | void global_avg_pool_bp(at::Tensor d_feats_tensor, 20 | at::Tensor proposals_offset_tensor, 21 | at::Tensor d_output_feats_tensor, int nProposal, 22 | int C) { 23 | float *d_feats = d_feats_tensor.data_ptr(); 24 | int *proposals_offset = proposals_offset_tensor.data_ptr(); 25 | float *d_output_feats = d_output_feats_tensor.data_ptr(); 26 | 27 | global_avg_pool_bp_cuda(nProposal, C, d_feats, proposals_offset, 28 | d_output_feats); 29 | } 30 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cu: -------------------------------------------------------------------------------- 1 | /* 2 | ROI Max Pool 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "roipool.h" 8 | #include 9 | #include 10 | 11 | // fp 12 | __global__ void global_avg_pool_fp_cuda_(int nProposal, int C, float *feats, 13 | int *proposals_offset, 14 | float *output_feats) { 15 | for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) { 16 | int start = proposals_offset[pp_id]; 17 | int end = proposals_offset[pp_id + 1]; 18 | int n_points = end - start; 19 | 20 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) { 21 | // int argmax_idx = -1; 22 | // float max_val = -1e50; 23 | float val = 0; 24 | 25 | for (int i = start; i < end; i++) { 26 | val += feats[i * C + plane]; 27 | } 28 | // output_maxidx[pp_id * C + plane] = argmax_idx; 29 | output_feats[pp_id * C + plane] = val / (float)n_points; 30 | } 31 | } 32 | } 33 | 34 | // input: feats (sumNPoint, C) float 35 | // input: proposals_offset (nProposal + 1) int 36 | // output: output_feats (nProposal, C) float 37 | // output: output_maxidx (nProposal, C) int 38 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats, 39 | int *proposals_offset, float *output_feats) { 40 | global_avg_pool_fp_cuda_<<>>( 42 | nProposal, C, feats, proposals_offset, output_feats); 43 | } 44 | 45 | // bp 46 | __global__ void global_avg_pool_bp_cuda_(int nProposal, int C, float *d_feats, 47 | int *proposals_offset, 48 | float *d_output_feats) { 49 | for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) { 50 | int start = proposals_offset[pp_id]; 51 | int end = proposals_offset[pp_id + 1]; 52 | int n_points = end - start; 53 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) { 54 | for (int i = start; i < end; i++) { 55 | atomicAdd(&d_feats[i * C + plane], 56 | d_output_feats[pp_id * C + plane] / (float)n_points); 57 | } 58 | } 59 | } 60 | } 61 | 62 | // input: d_output_feats (nProposal, C) float 63 | // input: output_maxidx (nProposal, C) int 64 | // input: proposals_offset (nProposal + 1) int 65 | // output: d_feats (sumNPoint, C) float 66 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats, 67 | int *proposals_offset, float *d_output_feats) { 68 | global_avg_pool_bp_cuda_<<>>( 70 | nProposal, C, d_feats, proposals_offset, d_output_feats); 71 | } 72 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.h: -------------------------------------------------------------------------------- 1 | /* 2 | ROI Max Pool 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef ROIPOOL_H 8 | #define ROIPOOL_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats, 15 | int *proposals_offset, float *output_feats); 16 | 17 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats, 18 | int *proposals_offset, float *d_output_feats); 19 | 20 | void global_avg_pool_fp(at::Tensor feats_tensor, 21 | at::Tensor proposals_offset_tensor, 22 | at::Tensor output_feats_tensor, int nProposal, int C); 23 | 24 | void global_avg_pool_bp(at::Tensor d_feats_tensor, 25 | at::Tensor proposals_offset_tensor, 26 | at::Tensor d_output_feats_tensor, int nProposal, int C); 27 | 28 | #endif // ROIPOOL_H 29 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "sec_mean.h" 8 | 9 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor, 10 | at::Tensor out_tensor, int nProposal, int C) { 11 | int *offsets = offsets_tensor.data_ptr(); 12 | float *inp = inp_tensor.data_ptr(); 13 | float *out = out_tensor.data_ptr(); 14 | 15 | sec_mean_cuda(nProposal, C, inp, offsets, out); 16 | } 17 | 18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor, 19 | at::Tensor out_tensor, int nProposal, int C) { 20 | int *offsets = offsets_tensor.data_ptr(); 21 | float *inp = inp_tensor.data_ptr(); 22 | float *out = out_tensor.data_ptr(); 23 | 24 | sec_min_cuda(nProposal, C, inp, offsets, out); 25 | } 26 | 27 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor, 28 | at::Tensor out_tensor, int nProposal, int C) { 29 | int *offsets = offsets_tensor.data_ptr(); 30 | float *inp = inp_tensor.data_ptr(); 31 | float *out = out_tensor.data_ptr(); 32 | 33 | sec_max_cuda(nProposal, C, inp, offsets, out); 34 | } 35 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) (no bp) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "sec_mean.h" 8 | #include 9 | #include 10 | 11 | /* ================================== sec_mean 12 | * ================================== */ 13 | __global__ void sec_mean_cuda_(int nProposal, int C, float *inp, int *offsets, 14 | float *out) { 15 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) { 16 | int start = offsets[p_id]; 17 | int end = offsets[p_id + 1]; 18 | 19 | float count = (float)(end - start); 20 | 21 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) { 22 | float mean = 0; 23 | for (int i = start; i < end; i++) { 24 | mean += (inp[i * C + plane] / count); 25 | } 26 | out[p_id * C + plane] = mean; 27 | } 28 | } 29 | } 30 | 31 | // input: inp (N, C) float 32 | // input: offsets (nProposal + 1) int 33 | // output: out (nProposal, C) float 34 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out) { 35 | sec_mean_cuda_<<>>( 36 | nProposal, C, inp, offsets, out); 37 | } 38 | 39 | /* ================================== sec_min ================================== 40 | */ 41 | __global__ void sec_min_cuda_(int nProposal, int C, float *inp, int *offsets, 42 | float *out) { 43 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) { 44 | int start = offsets[p_id]; 45 | int end = offsets[p_id + 1]; 46 | 47 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) { 48 | float min_val = 1e50; 49 | for (int i = start; i < end; i++) { 50 | if (inp[i * C + plane] < min_val) { 51 | min_val = inp[i * C + plane]; 52 | } 53 | } 54 | out[p_id * C + plane] = min_val; 55 | } 56 | } 57 | } 58 | 59 | // input: inp (N, C) float 60 | // input: offsets (nProposal + 1) int 61 | // output: out (nProposal, C) float 62 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out) { 63 | sec_min_cuda_<<>>( 64 | nProposal, C, inp, offsets, out); 65 | } 66 | 67 | /* ================================== sec_max ================================== 68 | */ 69 | __global__ void sec_max_cuda_(int nProposal, int C, float *inp, int *offsets, 70 | float *out) { 71 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) { 72 | int start = offsets[p_id]; 73 | int end = offsets[p_id + 1]; 74 | 75 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) { 76 | float max_val = -1e50; 77 | for (int i = start; i < end; i++) { 78 | if (inp[i * C + plane] > max_val) { 79 | max_val = inp[i * C + plane]; 80 | } 81 | } 82 | out[p_id * C + plane] = max_val; 83 | } 84 | } 85 | } 86 | 87 | // input: inp (N, C) float 88 | // input: offsets (nProposal + 1) int 89 | // output: out (nProposal, C) float 90 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out) { 91 | sec_max_cuda_<<>>( 92 | nProposal, C, inp, offsets, out); 93 | } 94 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.h: -------------------------------------------------------------------------------- 1 | /* 2 | Segment Operations (mean, max, min) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef SEC_MEAN_H 8 | #define SEC_MEAN_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor, 15 | at::Tensor out_tensor, int nProposal, int C); 16 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 17 | 18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor, 19 | at::Tensor out_tensor, int nProposal, int C); 20 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 21 | 22 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor, 23 | at::Tensor out_tensor, int nProposal, int C); 24 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out); 25 | 26 | #endif // SEC_MEAN_H 27 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/softgroup_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "softgroup_ops.h" 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 7 | 8 | m.def("get_mask_iou_on_cluster", &get_mask_iou_on_cluster, 9 | "get_mask_iou_on_cluster"); 10 | m.def("get_mask_iou_on_pred", &get_mask_iou_on_pred, "get_mask_iou_on_pred"); 11 | m.def("get_mask_label", &get_mask_label, "get_mask_label"); 12 | 13 | m.def("voxelize_idx", &voxelize_idx_3d, "voxelize_idx"); 14 | m.def("voxelize_fp", &voxelize_fp_feat, "voxelize_fp"); 15 | m.def("voxelize_bp", &voxelize_bp_feat, "voxelize_bp"); 16 | 17 | m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p"); 18 | m.def("bfs_cluster", &bfs_cluster, "bfs_cluster"); 19 | 20 | m.def("global_avg_pool_fp", &global_avg_pool_fp, "global_avg_pool_fp"); 21 | m.def("global_avg_pool_bp", &global_avg_pool_bp, "global_avg_pool_bp"); 22 | 23 | m.def("sec_mean", &sec_mean, "sec_mean"); 24 | m.def("sec_min", &sec_min, "sec_min"); 25 | m.def("sec_max", &sec_max, "sec_max"); 26 | } 27 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "bfs_cluster/bfs_cluster.cpp" 6 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp" 7 | #include "datatype/datatype.cpp" 8 | #include "roipool/roipool.cpp" 9 | #include "sec_mean/sec_mean.cpp" 10 | #include "voxelize/voxelize.cpp" 11 | 12 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords, 13 | /* long M*4 */ at::Tensor output_coords, 14 | /* Int N */ at::Tensor input_map, 15 | /* Int M*(maxActive+1) */ at::Tensor output_map, 16 | Int batchSize, Int mode) { 17 | voxelize_idx<3>(coords, output_coords, input_map, output_map, batchSize, 18 | mode); 19 | } 20 | 21 | void voxelize_fp_feat( 22 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 23 | /* cuda float M*C */ at::Tensor output_feats, 24 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, 25 | Int maxActive, Int nPlane) { 26 | voxelize_fp(feats, output_feats, output_map, mode, nActive, maxActive, 27 | nPlane); 28 | } 29 | 30 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats, 31 | /* cuda float N*C */ at::Tensor d_feats, 32 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, 33 | Int mode, Int nActive, Int maxActive, Int nPlane) { 34 | voxelize_bp(d_output_feats, d_feats, output_map, mode, nActive, 35 | maxActive, nPlane); 36 | } 37 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.h: -------------------------------------------------------------------------------- 1 | #ifndef HAIS_H 2 | #define HAIS_H 3 | #include "bfs_cluster/bfs_cluster.h" 4 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.h" 5 | #include "datatype/datatype.h" 6 | #include "roipool/roipool.h" 7 | #include "sec_mean/sec_mean.h" 8 | 9 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords, 10 | /* long M*4 */ at::Tensor output_coords, 11 | /* Int N */ at::Tensor input_map, 12 | /* Int M*(maxActive+1) */ at::Tensor output_map, 13 | Int batchSize, Int mode); 14 | 15 | void voxelize_fp_feat( 16 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 17 | /* cuda float M*C */ at::Tensor output_feats, 18 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, 19 | Int maxActive, Int nPlane); 20 | 21 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats, 22 | /* cuda float N*C */ at::Tensor d_feats, 23 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, 24 | Int mode, Int nActive, Int maxActive, Int nPlane); 25 | 26 | #endif // HAIS_H 27 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Points to Voxels & Voxels to Points (Modified from SparseConv) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "voxelize.h" 8 | 9 | /* ================================== voxelize_idx 10 | * ================================== */ 11 | template 12 | void voxelize_idx(/* long N*4 */ at::Tensor coords, 13 | /* long M*4 */ at::Tensor output_coords, 14 | /* Int N */ at::Tensor input_map, 15 | /* Int M*(maxActive+1) */ at::Tensor output_map, 16 | Int batchSize, Int mode) { 17 | assert(coords.ndimension() == 2); 18 | assert(coords.size(1) >= dimension and coords.size(1) <= dimension + 1); 19 | 20 | RuleBook voxelizeRuleBook; // rule[1]: M voxels -> N points output_map 21 | SparseGrids inputSGs; // voxel_coords -> voxel_idx in M voxels 22 | // input_map: N points -> M voxels 23 | Int nActive = 0; 24 | 25 | Int maxActive = voxelize_inputmap( 26 | inputSGs, input_map.data_ptr(), voxelizeRuleBook, nActive, 27 | coords.data_ptr(), coords.size(0), coords.size(1), batchSize, mode); 28 | 29 | output_map.resize_({nActive, maxActive + 1}); 30 | output_map.zero_(); 31 | 32 | output_coords.resize_({nActive, coords.size(1)}); 33 | output_coords.zero_(); 34 | 35 | Int *oM = output_map.data_ptr(); 36 | long *oC = output_coords.data_ptr(); 37 | voxelize_outputmap(coords.data_ptr(), oC, oM, 38 | &voxelizeRuleBook[1][0], nActive, maxActive); 39 | } 40 | 41 | template 42 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map, 43 | Int *rule, Int nOutputRows, Int maxActive) { 44 | for (Int i = 0; i < nOutputRows; i++) { 45 | for (Int j = 0; j <= maxActive; j++) 46 | output_map[j] = rule[j]; 47 | Int inputIdx = rule[1]; 48 | rule += (1 + maxActive); 49 | output_map += (1 + maxActive); 50 | 51 | long *coord = coords + inputIdx * (dimension + 1); 52 | long *output_coord = output_coords + i * (dimension + 1); 53 | for (Int j = 0; j <= dimension; j++) { 54 | output_coord[j] = coord[j]; 55 | } 56 | } 57 | } 58 | 59 | // mode 0=guaranteed unique 1=last item(overwrite) 2=first item(keep) 3=sum, 60 | // 4=mean 61 | // input: coords 62 | // output: SGs: one map for each batch: map from voxel_coord to voxel_idx(in M 63 | // voxels) 64 | // output: input_map: N, N points -> M voxels 65 | // output: rules 66 | // output: nActive 67 | // output: maxActive 68 | template 69 | Int voxelize_inputmap(SparseGrids &SGs, Int *input_map, 70 | RuleBook &rules, Int &nActive, long *coords, 71 | Int nInputRows, Int nInputColumns, Int batchSize, 72 | Int mode) { 73 | assert(nActive == 0); 74 | assert(rules.size() == 0); 75 | assert(SGs.size() == 0); 76 | 77 | SGs.resize(batchSize); 78 | Point p; 79 | 80 | std::vector> outputRows; 81 | if (nInputColumns == dimension) { 82 | SGs.resize(1); 83 | auto &sg = SGs[0]; 84 | for (Int i = 0; i < nInputRows; i++) { 85 | for (Int j = 0; j < dimension; j++) 86 | p[j] = coords[j]; 87 | coords += dimension; 88 | auto iter = sg.mp.find(p); 89 | if (iter == sg.mp.end()) { 90 | sg.mp[p] = nActive++; 91 | outputRows.resize(nActive); 92 | } 93 | outputRows[sg.mp[p]].push_back(i); 94 | 95 | input_map[i] = sg.mp[p]; 96 | } 97 | } else { // nInputColumns == dimension + 1 (1 in index 0 for batchidx) 98 | Int batchIdx; 99 | for (Int i = 0; i < nInputRows; i++) { 100 | batchIdx = coords[0]; 101 | for (Int j = 0; j < dimension; j++) 102 | p[j] = coords[j + 1]; 103 | coords += (dimension + 1); 104 | if (batchIdx + 1 >= (Int)SGs.size()) { 105 | SGs.resize(batchIdx + 1); 106 | } 107 | auto &sg = SGs[batchIdx]; 108 | auto iter = sg.mp.find(p); 109 | if (iter == sg.mp.end()) { 110 | sg.mp[p] = nActive++; 111 | outputRows.resize(nActive); 112 | } 113 | outputRows[sg.mp[p]].push_back(i); 114 | 115 | input_map[i] = sg.mp[p]; 116 | } 117 | } 118 | 119 | // Rulebook Format 120 | // rules[0][0] == mode 121 | // rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2) 122 | // rules[0][2] == nInputRows 123 | // rules[0][3] == nOutputRows 124 | // rules[1] nOutputRows x (1+maxActive) 125 | rules.resize(2); 126 | rules[0].push_back(mode); 127 | rules[0].push_back(1); 128 | rules[0].push_back(nInputRows); 129 | rules[0].push_back(outputRows.size()); 130 | auto &rule = rules[1]; 131 | if (mode == 0) { 132 | assert(nInputRows == (Int)outputRows.size()); 133 | for (Int i = 0; i < nActive; i++) { 134 | rule.push_back(1); 135 | assert((Int)outputRows[i].size() == 1); 136 | rule.push_back(outputRows[i][0]); 137 | } 138 | } 139 | if (mode == 1) { 140 | for (Int i = 0; i < nActive; i++) { 141 | rule.push_back(1); 142 | rule.push_back(outputRows[i].front()); 143 | } 144 | } 145 | if (mode == 2) { 146 | for (Int i = 0; i < nActive; i++) { 147 | rule.push_back(1); 148 | rule.push_back(outputRows[i].back()); 149 | } 150 | } 151 | Int maxActive = 1; 152 | if (mode == 3 or mode == 4) { 153 | for (auto &row : outputRows) 154 | maxActive = std::max(maxActive, (Int)row.size()); 155 | rules[0][1] = maxActive; 156 | for (auto &row : outputRows) { 157 | rule.push_back(row.size()); 158 | for (auto &r : row) 159 | rule.push_back(r); 160 | rule.resize((rule.size() + maxActive) / (maxActive + 1) * 161 | (maxActive + 1)); 162 | } 163 | } 164 | return maxActive; 165 | } 166 | 167 | /* ================================== voxelize 168 | * ================================== */ 169 | template 170 | void voxelize_fp( 171 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 172 | /* cuda float M*C */ at::Tensor output_feats, 173 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, 174 | Int maxActive, Int nPlane) { 175 | 176 | auto iF = feats.data_ptr(); 177 | auto oF = output_feats.data_ptr(); 178 | 179 | Int *rules = output_map.data_ptr(); 180 | 181 | voxelize_fp_cuda(nActive, maxActive, nPlane, iF, oF, rules, mode == 4); 182 | } 183 | 184 | template 185 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats, 186 | /* cuda float N*C */ at::Tensor d_feats, 187 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, 188 | Int nActive, Int maxActive, Int nPlane) { 189 | auto d_oF = d_output_feats.data_ptr(); 190 | auto d_iF = d_feats.data_ptr(); 191 | 192 | Int *rules = output_map.data_ptr(); 193 | 194 | voxelize_bp_cuda(nActive, maxActive, nPlane, d_oF, d_iF, rules, mode == 4); 195 | } 196 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Points to Voxels & Voxels to Points (Modified from SparseConv) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #include "voxelize.h" 8 | 9 | template 10 | __global__ void voxelize_fp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes, 11 | T *feats, T *output_feats, Int *rules, 12 | bool average) { 13 | for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) { 14 | T *out = output_feats + row * nPlanes; 15 | Int *r = rules + row * (maxActive + 1); 16 | Int nActive = r[0]; 17 | T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; 18 | for (int i = 1; i <= nActive; i++) { 19 | T *inp = feats + r[i] * nPlanes; 20 | for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) { 21 | atomicAdd(&out[plane], multiplier * inp[plane]); 22 | } 23 | } 24 | } 25 | } 26 | 27 | // input: feats N * C 28 | // input: rules M * (1 + maxActive) 29 | // output: output_feats M * C 30 | template 31 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats, 32 | T *output_feats, Int *rules, bool average) { 33 | voxelize_fp_cuda_< 34 | T><<>>( 35 | nOutputRows, maxActive, nPlanes, feats, output_feats, rules, average); 36 | } 37 | 38 | template 39 | __global__ void voxelize_bp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes, 40 | T *d_output_feats, T *d_feats, Int *rules, 41 | bool average) { 42 | for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) { 43 | T *out = d_output_feats + row * nPlanes; 44 | Int *r = rules + row * (maxActive + 1); 45 | Int nActive = r[0]; 46 | T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1; 47 | for (int i = 1; i <= nActive; i++) { 48 | T *inp = d_feats + r[i] * nPlanes; 49 | for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) { 50 | atomicAdd(&inp[plane], multiplier * out[plane]); 51 | } 52 | } 53 | } 54 | } 55 | 56 | template 57 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, 58 | T *d_output_feats, T *d_feats, Int *rules, bool average) { 59 | voxelize_bp_cuda_< 60 | T><<>>( 61 | nOutputRows, maxActive, nPlanes, d_output_feats, d_feats, rules, average); 62 | } 63 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.h: -------------------------------------------------------------------------------- 1 | /* 2 | Points to Voxels & Voxels to Points (Modified from SparseConv) 3 | Written by Li Jiang 4 | All Rights Reserved 2020. 5 | */ 6 | 7 | #ifndef VOXELIZE_H 8 | #define VOXELIZE_H 9 | #include 10 | #include 11 | 12 | #include "../datatype/datatype.h" 13 | 14 | /* ================================== voxelize_idx 15 | * ================================== */ 16 | template 17 | void voxelize_idx(/* long N*4 */ at::Tensor coords, 18 | /* long M*4 */ at::Tensor output_coords, 19 | /* Int N */ at::Tensor input_map, 20 | /* Int M*(maxActive+1) */ at::Tensor output_map, 21 | Int batchSize, Int mode); 22 | 23 | template 24 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map, 25 | Int *rule, Int nOutputRows, Int maxActive); 26 | 27 | template 28 | Int voxelize_inputmap(SparseGrids &SGs, Int *input_map, 29 | RuleBook &rules, Int &nActive, long *coords, 30 | Int nInputRows, Int nInputColumns, Int batchSize, 31 | Int mode); 32 | 33 | /* ================================== voxelize 34 | * ================================== */ 35 | template 36 | void voxelize_fp( 37 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M) 38 | /* cuda float M*C */ at::Tensor output_feats, 39 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive, 40 | Int maxActive, Int nPlane); 41 | 42 | template 43 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats, 44 | T *output_feats, Int *rules, bool average); 45 | 46 | // 47 | template 48 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats, 49 | /* cuda float N*C */ at::Tensor d_feats, 50 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, 51 | Int nActive, Int maxActive, Int nPlane); 52 | 53 | template 54 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, 55 | T *d_output_feats, T *d_feats, Int *rules, bool average); 56 | 57 | #endif // VOXELIZE_H 58 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 100 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_third_party = munch,numpy,pandas,plyfile,scannet_util,scipy,sklearn,spconv,tensorboardX,torch,tqdm,yaml 6 | no_lines_before = STDLIB,LOCALFOLDER 7 | default_section = THIRDPARTY 8 | 9 | [yapf] 10 | BASED_ON_STYLE = pep8 11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 13 | COLUMN_LIMIT = 100 14 | -------------------------------------------------------------------------------- /pcseg/external_libs/softgroup_ops/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | if __name__ == '__main__': 6 | setup( 7 | name='softgroup', 8 | version='1.0', 9 | description='SoftGroup: SoftGroup for 3D Instance Segmentation [CVPR 2022]', 10 | author='Thang Vu', 11 | author_email='thangvubk@kaist.ac.kr', 12 | # packages=['softgroup'], 13 | package_data={'ops': ['*/*.so']}, 14 | ext_modules=[ 15 | CUDAExtension( 16 | name='softgroup_ops', 17 | sources=[ 18 | 'ops/src/softgroup_api.cpp', 'ops/src/softgroup_ops.cpp', 19 | 'ops/src/cuda.cu' 20 | ], 21 | extra_compile_args={ 22 | 'cxx': ['-g'], 23 | 'nvcc': ['-O2'] 24 | }, 25 | include_dirs=['/data/anaconda3/envs/pt18/include/']) 26 | ], 27 | cmdclass={'build_ext': BuildExtension}) 28 | -------------------------------------------------------------------------------- /pcseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import numpy as np 4 | import torch 5 | 6 | from .vision_networks import build_model 7 | from .text_networks import build_text_model 8 | 9 | try: 10 | import kornia 11 | except: 12 | pass 13 | # print('Warning: kornia is not installed. This package is only required by CaDDN') 14 | 15 | 16 | def build_vision_network(model_cfg, num_class, dataset): 17 | model = build_model( 18 | model_cfg=model_cfg, num_class=num_class, dataset=dataset 19 | ) 20 | return model 21 | 22 | 23 | def build_text_network(model_cfg): 24 | text_encoder = build_text_model(model_cfg=model_cfg) 25 | return text_encoder 26 | 27 | 28 | def load_data_to_gpu(batch_dict): 29 | for key, val in batch_dict.items(): 30 | if isinstance(val, torch.Tensor): 31 | batch_dict[key] = batch_dict[key].cuda() 32 | elif not isinstance(val, np.ndarray) or key in ['calib', 'point_img_idx', 'point_img']: 33 | continue 34 | elif key in ['ids', 'metadata', 'scene_name']: 35 | continue 36 | elif key in ['points_xyz_voxel_scale', 'labels', 'inst_label', 'origin_idx', 'offsets', 'inst_cls']: 37 | batch_dict[key] = torch.from_numpy(val).long().cuda() 38 | elif key in ['inst_pointnum', 'batch_idxs']: 39 | batch_dict[key] = torch.from_numpy(val).int().cuda() 40 | else: 41 | batch_dict[key] = torch.from_numpy(val).float().cuda() 42 | -------------------------------------------------------------------------------- /pcseg/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/adapter/__init__.py: -------------------------------------------------------------------------------- 1 | from .vl_adapter import VLAdapter 2 | 3 | __all__ = { 4 | 'VLAdapter': VLAdapter 5 | } -------------------------------------------------------------------------------- /pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/adapter/vl_adapter.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from ..model_utils import basic_block_1d 7 | 8 | 9 | class VLAdapter(nn.Module): 10 | def __init__(self, model_cfg, in_channel): 11 | super(VLAdapter, self).__init__() 12 | self.model_cfg = model_cfg 13 | self.in_feature_name = model_cfg.get('IN_FEAT_NAME', 'backbone_3d_feats') 14 | self.eval_only = model_cfg.get('EVAL_ONLY', None) 15 | self.text_channel = model_cfg.TEXT_DIM 16 | 17 | # vision adapter 18 | adapter_last_norm = self.model_cfg.get('LAST_NORM', True) 19 | self.adapter = self.build_vl_adapter(self.model_cfg.NUM_ADAPTER_LAYERS, in_channel, adapter_last_norm) 20 | 21 | def build_vl_adapter(self, num_adapter_layers, in_channel, last_norm): 22 | """build vision language adapter 23 | 24 | Args: 25 | num_adapter_layers (_type_): _description_ 26 | in_channel (_type_): _description_ 27 | 28 | Raises: 29 | NotImplementedError: _description_ 30 | 31 | Returns: 32 | _type_: _description_ 33 | """ 34 | if num_adapter_layers < 1 or self.eval_only: 35 | return None 36 | 37 | if num_adapter_layers == 1: 38 | mid_channel_list = [in_channel, self.text_channel] 39 | elif num_adapter_layers == 2: 40 | multiplier = int(np.log2(self.text_channel / in_channel)) 41 | mid_channel_list = [in_channel, in_channel * multiplier, self.text_channel] 42 | else: 43 | raise NotImplementedError 44 | 45 | adapter = basic_block_1d.MLP( 46 | mid_channel_list, 47 | norm_fn=functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1), 48 | num_layers=num_adapter_layers, last_norm_fn=last_norm 49 | ) 50 | return adapter 51 | 52 | def forward(self, batch_dict): 53 | if self.eval_only and self.training: 54 | return batch_dict 55 | 56 | backbone3d_feats = batch_dict[self.in_feature_name] 57 | 58 | # forward adapter 59 | if hasattr(self, 'adapter') and self.adapter is not None: 60 | adapter_feats = self.adapter(backbone3d_feats) 61 | else: 62 | adapter_feats = backbone3d_feats 63 | 64 | batch_dict['adapter_feats'] = adapter_feats 65 | return batch_dict 66 | -------------------------------------------------------------------------------- /pcseg/models/head/__init__.py: -------------------------------------------------------------------------------- 1 | from .text_seg_head import TextSegHead 2 | from .binary_head import BinaryHead 3 | from .caption_head import CaptionHead 4 | from .linear_head import LinearHead 5 | from .inst_head import InstHead 6 | 7 | __all__ = { 8 | 'TextSegHead': TextSegHead, 9 | 'BinaryHead': BinaryHead, 10 | 'CaptionHead': CaptionHead, 11 | 'LinearHead': LinearHead, 12 | 'InstHead': InstHead 13 | } 14 | -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/binary_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/binary_head.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/caption_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/caption_head.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/inst_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/inst_head.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/linear_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/linear_head.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/head/binary_head.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import torch 3 | import torch.nn as nn 4 | 5 | from pcseg.utils.spconv_utils import spconv 6 | from pcseg.models.model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlockDecoder 7 | from pcseg.utils import common_utils 8 | 9 | 10 | class BinaryHead(nn.Module): 11 | def __init__(self, model_cfg, ignore_label, in_channel, block_reps, block_residual): 12 | super().__init__() 13 | self.model_cfg = model_cfg 14 | self.binary_feat_input = [] 15 | self.binary_thresh = model_cfg.THRESH 16 | self.in_channel = in_channel 17 | self.ignore_label = ignore_label 18 | self.num_filters = model_cfg.get('NUM_FILTERS', None) 19 | 20 | norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1) 21 | if block_residual: 22 | block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False)) 23 | else: 24 | block = VGGBlock 25 | 26 | if self.num_filters is not None: 27 | block_channels = self.num_filters 28 | else: 29 | # assert self.num_blocks is not None 30 | block_channels = [in_channel, 2 * in_channel, 3 * in_channel, 4 * in_channel, 5 * in_channel, 6 * in_channel, 7 * in_channel] 31 | 32 | self.binary_encoder = UBlockDecoder( 33 | block_channels, norm_fn, block_reps, block, indice_key_id=1, detach=model_cfg.get('DETACH', True) 34 | ) 35 | 36 | self.binary_classifier = spconv.SparseSequential( 37 | norm_fn(in_channel), 38 | nn.ReLU(), 39 | nn.Linear(in_channel, 1) 40 | ) 41 | self.forward_ret_dict = {} 42 | self.binary_loss_func = nn.BCEWithLogitsLoss() 43 | 44 | self.apply(self.set_bn_init) 45 | 46 | @staticmethod 47 | def set_bn_init(m): 48 | classname = m.__class__.__name__ 49 | if classname.find('BatchNorm') != -1: 50 | m.weight.data.fill_(1.0) 51 | m.bias.data.fill_(0.0) 52 | 53 | def forward(self, batch_dict): 54 | self.forward_ret_dict = {} 55 | binary_scores = self.binary_encoder(self.binary_feat_input) 56 | binary_scores = self.binary_classifier(binary_scores).features 57 | 58 | if self.training and self.model_cfg.get('VOXEL_LOSS', None): 59 | pass 60 | else: 61 | binary_scores = binary_scores[batch_dict['v2p_map'].long()] 62 | 63 | if not self.training and batch_dict['test_x4_split']: 64 | binary_scores = common_utils.merge_4_parts(binary_scores) 65 | 66 | binary_preds = (torch.sigmoid(binary_scores) > self.binary_thresh).long() 67 | 68 | self.binary_feat_input = [] 69 | self.forward_ret_dict['binary_scores'] = binary_scores 70 | self.forward_ret_dict['binary_preds'] = binary_preds 71 | if self.training: 72 | self.forward_ret_dict['binary_labels'] = batch_dict['binary_labels'] 73 | 74 | batch_dict['binary_ret_dict'] = self.forward_ret_dict 75 | return batch_dict 76 | 77 | def register_hook_for_binary_head(self, backbone): 78 | def get_features(): 79 | def hook(model, input, output): 80 | self.binary_feat_input.append(output) 81 | return hook 82 | 83 | for module_name in self.model_cfg.HOOK_FEATURE_LIST: 84 | eval('backbone.' + module_name).register_forward_hook(get_features()) 85 | 86 | def get_loss(self): 87 | binary_scores = self.forward_ret_dict['binary_scores'] 88 | binary_labels = self.forward_ret_dict['binary_labels'] 89 | 90 | # filter unannotated categories 91 | mask = binary_labels != self.ignore_label 92 | binary_scores = binary_scores[mask] 93 | binary_labels = binary_labels[mask] 94 | 95 | binary_loss = self.binary_loss_func(binary_scores, binary_labels.reshape(-1, 1)) 96 | binary_loss = binary_loss * self.model_cfg.get('LOSS_WEIGHT', 1.0) 97 | 98 | tb_dict = {'binary_loss': binary_loss.item()} 99 | return binary_loss, tb_dict 100 | -------------------------------------------------------------------------------- /pcseg/models/head/linear_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from pcseg.config import cfg 5 | 6 | 7 | class LinearHead(nn.Module): 8 | def __init__(self, model_cfg, in_channel, ignore_label, num_class): 9 | super(LinearHead, self).__init__() 10 | self.model_cfg = model_cfg 11 | self.in_channel = in_channel 12 | self.ignore_label = ignore_label 13 | self.num_class = num_class 14 | 15 | self.cls_head = nn.Linear(self.in_channel, self.num_class) 16 | 17 | self.valid_class_idx = [i for i in range(self.num_class)] 18 | if hasattr(cfg.DATA_CONFIG, 'ignore_class_idx'): 19 | self.ignore_class_idx = cfg.DATA_CONFIG.ignore_class_idx 20 | for i in self.ignore_class_idx: 21 | self.valid_class_idx.remove(i) 22 | 23 | self.seg_loss_func = nn.CrossEntropyLoss(ignore_index=self.ignore_label).cuda() 24 | self.forward_ret_dict = {} 25 | 26 | def forward(self, batch_dict): 27 | self.forward_ret_dict = {} 28 | backbone3d_feats = batch_dict['backbone_3d_feats'] 29 | 30 | semantic_scores = self.cls_head(backbone3d_feats) 31 | if self.training and self.model_cfg.get('VOXEL_LOSS', None): 32 | pass 33 | else: 34 | semantic_scores = semantic_scores[batch_dict['v2p_map']] 35 | 36 | semantic_scores = semantic_scores[..., self.valid_class_idx] 37 | semantic_preds = semantic_scores.max(1)[1] 38 | 39 | self.forward_ret_dict['seg_scores'] = semantic_scores 40 | self.forward_ret_dict['seg_preds'] = semantic_preds 41 | 42 | # save gt label to forward_ret_dict 43 | self.forward_ret_dict['seg_labels'] = batch_dict['labels'] 44 | 45 | def get_loss(self): 46 | semantic_scores = self.forward_ret_dict['seg_scores'] 47 | semantic_labels = self.forward_ret_dict['seg_labels'] 48 | 49 | seg_loss = self.seg_loss_func(semantic_scores, semantic_labels) 50 | 51 | tb_dict = {'loss_seg': seg_loss.item()} 52 | return seg_loss, tb_dict 53 | -------------------------------------------------------------------------------- /pcseg/models/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | 5 | def load_best_metric(ckpt_save_dir): 6 | best_metric, best_epoch = 0.0, -1 7 | best_metric_record_list = glob.glob(str(ckpt_save_dir / '*.txt')) 8 | if len(best_metric_record_list) > 0: 9 | best_metric_record_name = os.path.basename(best_metric_record_list[0]) 10 | best_split_list = os.path.splitext(best_metric_record_name)[0].split('_') 11 | best_metric = float(best_split_list[2]) 12 | best_epoch = int(best_split_list[-1]) 13 | return best_metric, best_epoch 14 | -------------------------------------------------------------------------------- /pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/model_utils/basic_block_1d.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class MLP(nn.Sequential): 5 | def __init__(self, channels, norm_fn=None, num_layers=2, last_norm_fn=False, last_bias=True): 6 | assert len(channels) >= 2 7 | modules = [] 8 | for i in range(num_layers - 1): 9 | modules.append(nn.Linear(channels[i], channels[i + 1])) 10 | if norm_fn: 11 | modules.append(norm_fn(channels[i + 1])) 12 | modules.append(nn.ReLU()) 13 | modules.append(nn.Linear(channels[-2], channels[-1], bias=last_bias)) 14 | if last_norm_fn: 15 | modules.append(norm_fn(channels[-1])) 16 | modules.append(nn.ReLU()) 17 | return super().__init__(*modules) 18 | 19 | def init_weights(self): 20 | for m in self.modules(): 21 | if isinstance(m, nn.Linear): 22 | nn.init.xavier_uniform_(m.weight) 23 | nn.init.constant_(m.bias, 0) 24 | if isinstance(self[-1], nn.Linear): 25 | nn.init.normal_(self[-1].weight, 0, 0.01) 26 | nn.init.constant_(self[-1].bias, 0) 27 | 28 | 29 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm1d, **kwargs): 30 | if name == 'BasicBlock1D': 31 | block = [ 32 | nn.Linear(in_channels, out_channels), 33 | norm_layer(out_channels, eps=1e-3, momentum=0.01), 34 | act_fn() 35 | ] 36 | elif name == 'DeConv1dBlock': 37 | block = [ 38 | nn.ConvTranspose1d(in_channels, out_channels, **kwargs), 39 | norm_layer(out_channels, eps=1e-3, momentum=0.01), 40 | act_fn() 41 | ] 42 | else: 43 | raise NotImplementedError 44 | 45 | return block 46 | -------------------------------------------------------------------------------- /pcseg/models/model_utils/basic_block_2d.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class BasicBlock2D(nn.Module): 5 | def __init__(self, in_channels, out_channels, **kwargs): 6 | """ 7 | Initializes convolutional block 8 | Args: 9 | in_channels: int, Number of input channels 10 | out_channels: int, Number of output channels 11 | **kwargs: Dict, Extra arguments for nn.Conv2d 12 | """ 13 | super().__init__() 14 | self.in_channels = in_channels 15 | self.out_channels = out_channels 16 | self.conv = nn.Conv2d(in_channels=in_channels, 17 | out_channels=out_channels, 18 | **kwargs) 19 | self.bn = nn.BatchNorm2d(out_channels) 20 | self.relu = nn.ReLU(inplace=True) 21 | 22 | def forward(self, features): 23 | """ 24 | Applies convolutional block 25 | Args: 26 | features: (B, C_in, H, W), Input features 27 | Returns: 28 | x: (B, C_out, H, W), Output features 29 | """ 30 | x = self.conv(features) 31 | x = self.bn(x) 32 | x = self.relu(x) 33 | return x 34 | 35 | 36 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm2d, **kwargs): 37 | if name == 'BasicBlock2D': 38 | block = [ 39 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, **kwargs), 40 | norm_layer(out_channels, eps=1e-3, momentum=0.01), 41 | act_fn() 42 | ] 43 | elif name == 'DeConv2dBlock': 44 | block = [ 45 | nn.ConvTranspose2d(in_channels, out_channels, **kwargs), 46 | norm_layer(out_channels, eps=1e-3, momentum=0.01), 47 | act_fn() 48 | ] 49 | else: 50 | raise NotImplementedError 51 | 52 | return block 53 | -------------------------------------------------------------------------------- /pcseg/models/model_utils/fp16.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/thangvubk/SoftGroup/blob/11dcbfd74b7660a2b82ac6473af107849c7d545f/softgroup/util/fp16.py 2 | import functools 3 | from collections import abc 4 | from inspect import getfullargspec 5 | 6 | import spconv.pytorch as spconv 7 | import torch 8 | 9 | 10 | def cast_tensor_type(inputs, src_type, dst_type): 11 | if isinstance(inputs, torch.Tensor): 12 | return inputs.to(dst_type) if inputs.dtype == src_type else inputs 13 | elif isinstance(inputs, spconv.SparseConvTensor): 14 | if inputs.features.dtype == src_type: 15 | features = inputs.features.to(dst_type) 16 | inputs = inputs.replace_feature(features) 17 | return inputs 18 | elif isinstance(inputs, abc.Mapping): 19 | return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()}) 20 | elif isinstance(inputs, abc.Iterable): 21 | return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | 25 | 26 | def force_fp32(apply_to=None, out_fp16=False): 27 | 28 | def force_fp32_wrapper(old_func): 29 | 30 | @functools.wraps(old_func) 31 | def new_func(*args, **kwargs): 32 | if not isinstance(args[0], torch.nn.Module): 33 | raise TypeError('@force_fp32 can only be used to decorate the ' 34 | 'method of nn.Module') 35 | # get the arg spec of the decorated method 36 | args_info = getfullargspec(old_func) 37 | # get the argument names to be casted 38 | args_to_cast = args_info.args if apply_to is None else apply_to 39 | # convert the args that need to be processed 40 | new_args = [] 41 | if args: 42 | arg_names = args_info.args[:len(args)] 43 | for i, arg_name in enumerate(arg_names): 44 | if arg_name in args_to_cast: 45 | new_args.append(cast_tensor_type(args[i], torch.half, torch.float)) 46 | else: 47 | new_args.append(args[i]) 48 | # convert the kwargs that need to be processed 49 | new_kwargs = dict() 50 | if kwargs: 51 | for arg_name, arg_value in kwargs.items(): 52 | if arg_name in args_to_cast: 53 | new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float) 54 | else: 55 | new_kwargs[arg_name] = arg_value 56 | with torch.cuda.amp.autocast(enabled=False): 57 | output = old_func(*new_args, **new_kwargs) 58 | # cast the results back to fp32 if necessary 59 | if out_fp16: 60 | output = cast_tensor_type(output, torch.float, torch.half) 61 | return output 62 | 63 | return new_func 64 | 65 | return force_fp32_wrapper 66 | -------------------------------------------------------------------------------- /pcseg/models/model_utils/rle_utils.py: -------------------------------------------------------------------------------- 1 | # Modify from https://www.kaggle.com/paulorzp/run-length-encode-and-decode 2 | import numpy as np 3 | 4 | 5 | def rle_encode(mask): 6 | """Encode RLE (Run-length-encode) from 1D binary mask. 7 | 8 | Args: 9 | mask (np.ndarray): 1D binary mask 10 | Returns: 11 | rle (dict): encoded RLE 12 | """ 13 | length = mask.shape[0] 14 | mask = np.concatenate([[0], mask, [0]]) 15 | runs = np.where(mask[1:] != mask[:-1])[0] + 1 16 | runs[1::2] -= runs[::2] 17 | counts = ' '.join(str(x) for x in runs) 18 | rle = dict(length=length, counts=counts) 19 | return rle 20 | 21 | 22 | def rle_decode(rle): 23 | """Decode rle to get binary mask. 24 | 25 | Args: 26 | rle (dict): rle of encoded mask 27 | Returns: 28 | mask (np.ndarray): decoded mask 29 | """ 30 | length = rle['length'] 31 | counts = rle['counts'] 32 | s = counts.split() 33 | starts, nums = [np.asarray(x, dtype=np.int32) for x in (s[0:][::2], s[1:][::2])] 34 | starts -= 1 35 | ends = starts + nums 36 | mask = np.zeros(length, dtype=np.uint8) 37 | for lo, hi in zip(starts, ends): 38 | mask[lo:hi] = 1 39 | return mask 40 | -------------------------------------------------------------------------------- /pcseg/models/text_networks/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import torch 4 | 5 | from . import text_models 6 | from .prompt_template import template_meta 7 | from ...config import cfg 8 | 9 | 10 | def build_text_model(model_cfg): 11 | tokenizer, text_encoder = getattr( 12 | text_models, f'get_{model_cfg.NAME.lower()}_model' 13 | )(model_cfg.BACKBONE) 14 | 15 | text_encoder.tokenizer = tokenizer 16 | return text_encoder 17 | 18 | 19 | def load_text_embedding_from_path(text_emb_cfg): 20 | text_emb_path = os.path.join(cfg.DATA_CONFIG.DATA_PATH, text_emb_cfg.PATH) 21 | text_embedding = torch.load(text_emb_path, map_location=torch.device('cpu')).detach() 22 | if text_emb_cfg.get('NORM', True): 23 | text_embedding /= text_embedding.norm(dim=-1, keepdim=True) 24 | print("=> loaded text embedding from path '{}'".format(text_emb_path)) 25 | return text_embedding 26 | 27 | 28 | def is_bg_class(c): 29 | return (c.lower() == 'wall') or (c.lower() == 'floor') or (c.lower() == 'ceiling') or (c.lower() =='otherfurniture') 30 | 31 | 32 | def build_text_token_from_class_names(model_cfg, class_names): 33 | if model_cfg.TEMPLATE == 'lseg': # only instance classes are encoded with prompt 34 | return [template_meta[model_cfg.TEMPLATE][0].format(c) if not is_bg_class(c) else c for c in class_names] 35 | else: 36 | return [template_meta[model_cfg.TEMPLATE][0].format(c) for c in class_names] 37 | 38 | 39 | def load_text_embedding_from_encoder(model_cfg, text_encoder, logger=logging.getLogger()): 40 | text_encoder.cuda() 41 | class_names = cfg.TEXT_ENCODER.CATEGORY_NAMES 42 | text = build_text_token_from_class_names(model_cfg, class_names) 43 | 44 | if model_cfg.NAME == 'CLIP': 45 | text_tokens = text_encoder.tokenizer(text).cuda() 46 | text_embedding = text_encoder.encode_text(text_tokens) 47 | elif model_cfg.NAME == 'BERT': 48 | text_tokens = text_encoder.tokenizer(text, return_tensors="pt", padding=True).to('cuda') 49 | text_embedding = text_encoder(**text_tokens).pooler_output 50 | else: 51 | raise NotImplementedError 52 | 53 | if model_cfg.get('NORM', True): 54 | text_embedding /= text_embedding.norm(dim=-1, keepdim=True) 55 | logger.info("=> loaded text embedding from '{}'".format(model_cfg.NAME)) 56 | return text_embedding.detach().cpu() 57 | -------------------------------------------------------------------------------- /pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/text_networks/prompt_template.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # MIT License 3 | # 4 | # Copyright (c) 2021 OpenAI 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | # 24 | # Modified by Jiarui Xu 25 | # ------------------------------------------------------------------------- 26 | 27 | full_imagenet_templates = [ 28 | 'a bad photo of a {}.', 29 | 'a photo of many {}.', 30 | 'a sculpture of a {}.', 31 | 'a photo of the hard to see {}.', 32 | 'a low resolution photo of the {}.', 33 | 'a rendering of a {}.', 34 | 'graffiti of a {}.', 35 | 'a bad photo of the {}.', 36 | 'a cropped photo of the {}.', 37 | 'a tattoo of a {}.', 38 | 'the embroidered {}.', 39 | 'a photo of a hard to see {}.', 40 | 'a bright photo of a {}.', 41 | 'a photo of a clean {}.', 42 | 'a photo of a dirty {}.', 43 | 'a dark photo of the {}.', 44 | 'a drawing of a {}.', 45 | 'a photo of my {}.', 46 | 'the plastic {}.', 47 | 'a photo of the cool {}.', 48 | 'a close-up photo of a {}.', 49 | 'a black and white photo of the {}.', 50 | 'a painting of the {}.', 51 | 'a painting of a {}.', 52 | 'a pixelated photo of the {}.', 53 | 'a sculpture of the {}.', 54 | 'a bright photo of the {}.', 55 | 'a cropped photo of a {}.', 56 | 'a plastic {}.', 57 | 'a photo of the dirty {}.', 58 | 'a jpeg corrupted photo of a {}.', 59 | 'a blurry photo of the {}.', 60 | 'a photo of the {}.', 61 | 'a good photo of the {}.', 62 | 'a rendering of the {}.', 63 | 'a {} in a video game.', 64 | 'a photo of one {}.', 65 | 'a doodle of a {}.', 66 | 'a close-up photo of the {}.', 67 | 'a photo of a {}.', 68 | 'the origami {}.', 69 | 'the {} in a video game.', 70 | 'a sketch of a {}.', 71 | 'a doodle of the {}.', 72 | 'a origami {}.', 73 | 'a low resolution photo of a {}.', 74 | 'the toy {}.', 75 | 'a rendition of the {}.', 76 | 'a photo of the clean {}.', 77 | 'a photo of a large {}.', 78 | 'a rendition of a {}.', 79 | 'a photo of a nice {}.', 80 | 'a photo of a weird {}.', 81 | 'a blurry photo of a {}.', 82 | 'a cartoon {}.', 83 | 'art of a {}.', 84 | 'a sketch of the {}.', 85 | 'a embroidered {}.', 86 | 'a pixelated photo of a {}.', 87 | 'itap of the {}.', 88 | 'a jpeg corrupted photo of the {}.', 89 | 'a good photo of a {}.', 90 | 'a plushie {}.', 91 | 'a photo of the nice {}.', 92 | 'a photo of the small {}.', 93 | 'a photo of the weird {}.', 94 | 'the cartoon {}.', 95 | 'art of the {}.', 96 | 'a drawing of the {}.', 97 | 'a photo of the large {}.', 98 | 'a black and white photo of a {}.', 99 | 'the plushie {}.', 100 | 'a dark photo of a {}.', 101 | 'itap of a {}.', 102 | 'graffiti of the {}.', 103 | 'a toy {}.', 104 | 'itap of my {}.', 105 | 'a photo of a cool {}.', 106 | 'a photo of a small {}.', 107 | 'a tattoo of the {}.', 108 | ] 109 | 110 | sub_imagenet_template = [ 111 | 'itap of a {}.', 'a bad photo of a {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.', 112 | 'art of the {}.', 'a photo of the small {}.' 113 | ] 114 | 115 | simple_imagenet_template = [ 116 | 'a photo of a {}.', 117 | ] 118 | 119 | identity_template = [ 120 | '{}', 121 | ] 122 | 123 | lseg_template = [ 124 | 'a {} in a scene', 125 | ] 126 | 127 | template_meta = { 128 | 'full': full_imagenet_templates, 129 | 'subset': sub_imagenet_template, 130 | 'simple': simple_imagenet_template, 131 | 'identity': identity_template, 132 | 'lseg': lseg_template, 133 | } 134 | -------------------------------------------------------------------------------- /pcseg/models/text_networks/text_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from clip import clip 5 | 6 | from ...config import cfg 7 | from ...utils import commu_utils 8 | 9 | 10 | def get_clip_model(backbone_name): 11 | url = clip._MODELS[backbone_name] 12 | if cfg.LOCAL_RANK == 0: # only download once at master node 13 | model_path = clip._download(url, os.path.expanduser("~/.cache/clip")) 14 | else: 15 | model_path = _return_clip_path(url, os.path.expanduser("~/.cache/clip")) 16 | commu_utils.synchronize() 17 | 18 | try: 19 | # loading JIT archive 20 | model = torch.jit.load(model_path, map_location="cpu").eval() 21 | state_dict = model.state_dict() 22 | except RuntimeError: 23 | state_dict = torch.load(model_path, map_location="cpu") 24 | 25 | model = clip.build_model(state_dict) 26 | return clip.tokenize, model 27 | 28 | 29 | def get_bert_model(name): 30 | from transformers import AutoTokenizer, AutoModel 31 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 32 | tokenizer = AutoTokenizer.from_pretrained(name, local_files_only=True) 33 | model = AutoModel.from_pretrained(name, local_files_only=True) 34 | return tokenizer, model 35 | 36 | 37 | def _return_clip_path(url: str, root: str): 38 | filename = os.path.basename(url) 39 | download_target = os.path.join(root, filename) 40 | return download_target 41 | -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .spconv_unet_indoor import SparseUNetIndoor 2 | 3 | __all__ = { 4 | 'SparseUNetIndoor': SparseUNetIndoor 5 | } 6 | -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/spconv_unet_indoor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import functools 3 | import torch.nn as nn 4 | 5 | from ...utils.spconv_utils import spconv 6 | from ..model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlock 7 | from ...utils import common_utils 8 | 9 | 10 | class SparseUNetIndoor(nn.Module): 11 | def __init__(self, model_cfg): 12 | super(SparseUNetIndoor, self).__init__() 13 | norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1) 14 | 15 | self.model_cfg = model_cfg 16 | self.in_channel = model_cfg.IN_CHANNEL 17 | self.mid_channel = model_cfg.MID_CHANNEL 18 | self.block_reps = model_cfg.BLOCK_REPS 19 | self.block_residual = model_cfg.BLOCK_RESIDUAL 20 | self.num_blocks = model_cfg.get('NUM_BLOCKS', None) 21 | self.num_filters = model_cfg.get('NUM_FILTERS', None) 22 | 23 | if self.block_residual: 24 | block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False)) 25 | else: 26 | block = VGGBlock 27 | 28 | self.input_conv = spconv.SparseSequential( 29 | spconv.SubMConv3d( 30 | self.in_channel, self.mid_channel, kernel_size=3, padding=1, bias=False, indice_key='subm1' 31 | ) 32 | ) 33 | 34 | if self.num_filters is not None: 35 | block_channels = self.num_filters 36 | else: 37 | assert self.num_blocks is not None 38 | block_channels = [self.mid_channel * (i + 1) for i in range(self.num_blocks)] 39 | 40 | self.unet = UBlock(block_channels, norm_fn, self.block_reps, block, indice_key_id=1) 41 | self.output_layer = spconv.SparseSequential( 42 | norm_fn(self.mid_channel), nn.ReLU() 43 | ) 44 | 45 | # init parameters 46 | self.apply(self.set_bn_init) 47 | 48 | @staticmethod 49 | def set_bn_init(m): 50 | classname = m.__class__.__name__ 51 | if classname.find('BatchNorm') != -1: 52 | m.weight.data.fill_(1.0) 53 | m.bias.data.fill_(0.0) 54 | 55 | def forward(self, batch_dict): 56 | input_sp_tensor = spconv.SparseConvTensor( 57 | batch_dict['voxel_features'], batch_dict['voxel_coords'].int(), 58 | batch_dict['spatial_shape'], batch_dict['batch_size'] 59 | ) 60 | output = self.input_conv(input_sp_tensor) 61 | output = self.unet(output) 62 | output = self.output_layer(output) 63 | output_feats = output.features 64 | # if not self.training and batch_dict['test_x4_split']: 65 | # output_feats = common_utils.merge_4_parts(output_feats) 66 | 67 | batch_dict['backbone_3d_feats'] = output_feats 68 | return batch_dict 69 | -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/__init__.py: -------------------------------------------------------------------------------- 1 | from .vfe_template import VFETemplate 2 | from .indoor_vfe import IndoorVFE 3 | 4 | 5 | __all__ = { 6 | 'VFETemplate': VFETemplate, 7 | 'IndoorVFE': IndoorVFE 8 | } 9 | -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/indoor_vfe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .vfe_template import VFETemplate 4 | from ....external_libs.softgroup_ops.ops import functions as sg_ops 5 | 6 | 7 | class IndoorVFE(VFETemplate): 8 | def __init__(self, model_cfg, voxel_mode, **kwargs): 9 | super(IndoorVFE, self).__init__(model_cfg) 10 | self.use_xyz = model_cfg.get('USE_XYZ', False) 11 | self.voxel_mode = voxel_mode 12 | 13 | def forward(self, batch): 14 | batch_size = batch['batch_size'] 15 | # voxelization 16 | # current implementation cannot support cuda 17 | # TODO: modify the voxelization part 18 | voxel_coords, v2p_map, p2v_map = sg_ops.voxelization_idx( 19 | batch['points_xyz_voxel_scale'].cpu(), batch_size, self.voxel_mode 20 | ) 21 | voxel_coords, v2p_map, p2v_map = voxel_coords.cuda(), v2p_map.cuda(), p2v_map.cuda() 22 | 23 | feats = batch['feats'] # (N, C), float32, cuda 24 | 25 | voxel_feats = sg_ops.voxelization(feats, p2v_map, self.voxel_mode) 26 | 27 | batch.update({ 28 | 'voxel_features': voxel_feats, 29 | 'v2p_map': v2p_map.long(), 30 | 'voxel_coords': voxel_coords 31 | }) 32 | 33 | return batch 34 | -------------------------------------------------------------------------------- /pcseg/models/vision_backbones_3d/vfe/vfe_template.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class VFETemplate(nn.Module): 5 | def __init__(self, model_cfg, **kwargs): 6 | super().__init__() 7 | self.model_cfg = model_cfg 8 | 9 | def get_output_feature_dim(self): 10 | raise NotImplementedError 11 | 12 | def forward(self, **kwargs): 13 | """ 14 | Args: 15 | **kwargs: 16 | 17 | Returns: 18 | batch_dict: 19 | ... 20 | vfe_features: (num_voxels, C) 21 | """ 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /pcseg/models/vision_networks/__init__.py: -------------------------------------------------------------------------------- 1 | from .network_template import ModelTemplate 2 | from .sparseunet_textseg import SparseUNetTextSeg 3 | 4 | __all__ = { 5 | 'ModelTemplate': ModelTemplate, 6 | 'SparseUNetTextSeg': SparseUNetTextSeg 7 | } 8 | 9 | 10 | def build_model(model_cfg, num_class, dataset): 11 | model = __all__[model_cfg.NAME]( 12 | model_cfg=model_cfg, num_class=num_class, dataset=dataset 13 | ) 14 | 15 | return model 16 | -------------------------------------------------------------------------------- /pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc -------------------------------------------------------------------------------- /pcseg/models/vision_networks/sparseunet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/sparseunet.py -------------------------------------------------------------------------------- /pcseg/models/vision_networks/sparseunet_textseg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .network_template import ModelTemplate 3 | 4 | 5 | class SparseUNetTextSeg(ModelTemplate): 6 | def __init__(self, model_cfg, num_class, dataset): 7 | super().__init__(model_cfg, num_class, dataset) 8 | if model_cfg.get('BINARY_HEAD', None): 9 | self.binary_head.register_hook_for_binary_head(self.backbone_3d) 10 | 11 | def forward(self, batch_dict): 12 | batch_dict['test_x4_split'] = self.test_x4_split 13 | # Order: vfe, backbone_3d, binary_head, seg_head, caption_head 14 | for cur_module in self.module_list: 15 | batch_dict = cur_module(batch_dict) 16 | 17 | ret_dict = self.task_head.forward_ret_dict 18 | if self.training: 19 | loss, tb_dict, disp_dict = self.get_training_loss() 20 | 21 | ret_dict['loss'] = loss 22 | return ret_dict, tb_dict, disp_dict 23 | else: 24 | if hasattr(self, 'inst_head') and self.inst_head is not None: 25 | ret_dict.update(self.inst_head.forward_ret_dict) 26 | return ret_dict 27 | 28 | def get_training_loss(self): 29 | disp_dict = {} 30 | tb_dict = {} 31 | 32 | # for segmentation loss 33 | if not self.task_head.eval_only: 34 | seg_loss, tb_dict_seg = self.task_head.get_loss() 35 | tb_dict.update(tb_dict_seg) 36 | else: 37 | seg_loss = 0 38 | 39 | # for binary loss 40 | if self.binary_head is not None: 41 | binary_loss, tb_dict_binary = self.binary_head.get_loss() 42 | tb_dict.update(tb_dict_binary) 43 | else: 44 | binary_loss = 0 45 | 46 | # for caption loss 47 | if self.caption_head is not None: 48 | caption_loss, tb_dict_caption = self.caption_head.get_loss() 49 | tb_dict.update(tb_dict_caption) 50 | else: 51 | caption_loss = 0 52 | 53 | # for inst loss 54 | if self.inst_head is not None: 55 | inst_loss, tb_dict_inst = self.inst_head.get_loss() 56 | tb_dict.update(tb_dict_inst) 57 | else: 58 | inst_loss = 0 59 | 60 | loss = seg_loss + binary_loss + caption_loss + inst_loss 61 | tb_dict['loss'] = loss.item() 62 | disp_dict.update(tb_dict) 63 | 64 | return loss, tb_dict, disp_dict 65 | -------------------------------------------------------------------------------- /pcseg/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/utils/__init__.py -------------------------------------------------------------------------------- /pcseg/utils/arnold_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class ArnoldUtils(): 5 | def __init__(self, enabled, arnold_dir, logger) -> None: 6 | self.enabled = enabled 7 | self.logger = logger 8 | self.dir = arnold_dir 9 | 10 | def save_ckpt(self, ckpt_path, last_epoch=False): 11 | if self.enabled: 12 | ckpt_dir, file_name = os.path.split(ckpt_path) 13 | # import ipdb; ipdb.set_trace(context=10) 14 | _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:] 15 | os.system('hdfs dfs -mkdir -p hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir, _ckpt_dir)) 16 | if last_epoch: 17 | tgt_path = os.path.join(self.dir, _ckpt_dir, 'last_train.pth') 18 | else: 19 | tgt_path = os.path.join(self.dir, _ckpt_dir, file_name) 20 | os.system('hdfs dfs -put -f {} hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(ckpt_path, tgt_path)) 21 | self.logger.info('Put model to hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(tgt_path)) 22 | 23 | def load_ckpt(self, ckpt_dir): 24 | if self.enabled: 25 | try: 26 | _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:] 27 | os.system('hdfs dfs -get hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}/* {}'.format(self.dir, _ckpt_dir, ckpt_dir)) 28 | self.logger.info('Get model from hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir,_ckpt_dir)) 29 | except: 30 | pass 31 | -------------------------------------------------------------------------------- /pcseg/utils/caption_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import numpy as np 5 | 6 | from . import commu_utils 7 | from ..config import cfg 8 | 9 | 10 | def get_caption_batch(caption_cfg, text_cfg, batch_dict, text_encoder): 11 | caption_infos = {} 12 | caption_data = batch_dict['caption_data'] 13 | 14 | num_captions = 0 15 | for key in caption_cfg: 16 | if key in caption_cfg['KEY'] and caption_cfg[key].ENABLED: 17 | caption, idx = caption_data[key.lower()]['caption'], caption_data[key.lower()]['idx'] 18 | num_captions += len(caption) 19 | 20 | # caption_embed: (K, 512), caption_idx: (N), (N > K) 21 | caption_embed, caption_idx = extract_caption_embed(caption, caption_cfg[key], text_cfg, text_encoder, cfg.LOCAL_RANK) 22 | normed_caption_embed = torch.nn.functional.normalize(caption_embed, dim=-1) 23 | 24 | caption_infos['caption_{}'.format(key.lower())] = { 25 | 'caption_embed': normed_caption_embed, 'caption_idx': caption_idx, 'select_image_corr': idx 26 | } 27 | 28 | batch_dict['caption_infos'] = caption_infos 29 | batch_dict['num_caption'] = num_captions / batch_dict['batch_size'] 30 | return batch_dict 31 | 32 | 33 | def extract_caption_embed(image_captions, caption_cfg, text_cfg, text_encoder, rank): 34 | # (B*K, 512) 35 | 36 | if caption_cfg.get('GATHER_CAPTION', True): 37 | image_captions_list = commu_utils.all_gather(image_captions) 38 | image_captions_all = [jj for ii in image_captions_list for jj in ii] 39 | num_caption_list = [len(ii) for ii in image_captions_list] 40 | else: 41 | image_captions_all = image_captions 42 | num_caption_list = [0] * 100 43 | num_caption_list[rank] = len(image_captions_all) 44 | caption_embed_all = forward_text_encoder(image_captions_all, text_encoder) 45 | 46 | # remove duplicate captions and re-index them 47 | if text_cfg.get('REMOVE_DUPLICATE_CAPTIONS', True): 48 | num_caption_list = torch.LongTensor([0] + num_caption_list).cuda() 49 | idx = torch.arange(num_caption_list[rank + 1]).long().cuda() + torch.cumsum(num_caption_list, 0)[rank] 50 | caption_embeds, unique_indices = torch.unique(caption_embed_all, dim=0, return_inverse=True) 51 | caption_idx = unique_indices[idx] 52 | else: 53 | caption_embeds = caption_embed_all 54 | caption_idx = torch.arange(caption_embed_all.shape[0]).long().cuda() 55 | 56 | return caption_embeds, caption_idx 57 | 58 | 59 | def forward_text_encoder(image_captions, text_encoder): 60 | with torch.no_grad(): 61 | if len(image_captions) > 0: 62 | if cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'CLIP': 63 | text_tokens = text_encoder.tokenizer(image_captions, truncate=True).cuda() 64 | text_embed = text_encoder.encode_text(text_tokens).float() 65 | elif cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'Bert': 66 | text_tokens = text_encoder.tokenizer(image_captions, return_tensors="pt", padding=True).to('cuda') 67 | text_embed = text_encoder(**text_tokens).pooler_output 68 | else: 69 | raise NotImplementedError 70 | else: 71 | text_embed = torch.zeros((0, cfg.MODEL.TASK_HEAD.TEXT_EMBED.CHANNEL), dtype=torch.float32).cuda() 72 | return text_embed 73 | 74 | 75 | def select_images(caption_cfg, image_name, image_corr): 76 | """ 77 | TODO: put this part into dataset 78 | Select part of images for training 79 | """ 80 | batch_size = len(image_name) 81 | if caption_cfg.get('SAMPLE', 1) > 1: 82 | random_start = np.random.randint(caption_cfg.SAMPLE) 83 | image_name = [(np.array(image_name[i])[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)] 84 | image_corr = [(np.array(image_corr[i], dtype=object)[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)] 85 | if caption_cfg.SELECT == 'ratio' and caption_cfg.RATIO == 1.0: 86 | return image_name, image_corr 87 | 88 | selected_image_name = [] 89 | selected_image_corr = [] 90 | 91 | for i in range(batch_size): 92 | if image_name[i] is None or len(image_name[i]) == 0: # lack 2d data 93 | selected_image_name.append([]) 94 | selected_image_corr.append([]) 95 | selected_idx = None 96 | elif caption_cfg.SELECT == 'fixed': 97 | # view-level caotion: random select fixed number 98 | num = int(caption_cfg.NUM) 99 | selected_idx = np.random.choice(len(image_name[i]), min(num, len(image_name[i])), replace=False) 100 | elif caption_cfg.SELECT == 'ratio': 101 | # sequence slicing 102 | ratio = caption_cfg.RATIO 103 | selected_idx = np.random.choice(len(image_name[i]), max(1, int(len(image_name[i]) * ratio)), replace=False) 104 | elif caption_cfg.SELECT == 'hybrid': 105 | num = max(int(caption_cfg.NUM), int(len(image_name[i]) * caption_cfg.RATIO)) 106 | selected_idx = np.random.choice(len(image_name[i]), min(max(1, num), len(image_name[i])), replace=False) 107 | else: 108 | raise NotImplementedError 109 | 110 | if selected_idx is not None: 111 | selected_image_name.append(np.array(image_name[i])[selected_idx].tolist()) 112 | selected_image_corr.append( 113 | np.array(image_corr[i], dtype=object)[selected_idx].tolist() 114 | ) 115 | 116 | return selected_image_name, selected_image_corr 117 | 118 | -------------------------------------------------------------------------------- /pcseg/utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class CosineSimilarityLoss(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, input, target, mask): 11 | selected_input = input[mask] 12 | cos_similarity = nn.functional.cosine_similarity(selected_input, target).mean() 13 | return 1 - cos_similarity 14 | 15 | 16 | class BYOLLoss(nn.Module): 17 | def __init__(self) -> None: 18 | super().__init__() 19 | 20 | def forward(self, input, target): 21 | loss = 2 - 2 * (input * target).sum(dim=-1) 22 | return loss.mean() 23 | -------------------------------------------------------------------------------- /pcseg/utils/metric_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_open_vocab_metric(metric_class, base_class_idx, novel_class_idx): 5 | if isinstance(metric_class, list): 6 | metric_class = np.array(metric_class) 7 | metric_base = np.mean(metric_class[base_class_idx]) 8 | metric_novel = np.mean(metric_class[novel_class_idx]) 9 | h_metric = 2 * metric_base * metric_novel / (metric_base + metric_novel + 10e-10) 10 | m_metric = (metric_base * len(base_class_idx) + metric_novel * len(novel_class_idx)) / (len(base_class_idx) + len(novel_class_idx)) 11 | return h_metric, m_metric, metric_base, metric_novel 12 | 13 | 14 | def cal_ov_metrics(cfg, logger, class_names, iou_class, acc_class, binary_acc_class): 15 | base_class_idx = cfg.DATA_CONFIG.base_class_idx 16 | novel_class_idx = cfg.DATA_CONFIG.novel_class_idx 17 | if cfg.DATA_CONFIG.get('trainonly_class_idx', None): 18 | trainonly_class_idx = cfg.DATA_CONFIG.trainonly_class_idx 19 | base_class_idx = [idx for idx in base_class_idx if idx not in trainonly_class_idx] 20 | novel_class_idx = [idx for idx in novel_class_idx if idx not in trainonly_class_idx] 21 | 22 | logger.info('----------- base class -----------') 23 | for i in base_class_idx: 24 | logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format( 25 | class_names[i], iou_class[i], acc_class[i], binary_acc_class[i]) 26 | ) 27 | logger.info('----------- novel class -----------') 28 | for i in novel_class_idx: 29 | logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format( 30 | class_names[i], iou_class[i], acc_class[i], binary_acc_class[i]) 31 | ) 32 | hiou, miou, iou_base, iou_novel = get_open_vocab_metric( 33 | iou_class, base_class_idx, novel_class_idx 34 | ) 35 | hacc, macc, acc_base, acc_novel = get_open_vocab_metric( 36 | acc_class, base_class_idx, novel_class_idx 37 | ) 38 | return hiou, miou, iou_base, iou_novel, hacc, macc, acc_base, acc_novel 39 | -------------------------------------------------------------------------------- /pcseg/utils/spconv_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Set 2 | 3 | try: 4 | import spconv.pytorch as spconv 5 | except: 6 | import spconv as spconv 7 | 8 | import torch.nn as nn 9 | 10 | 11 | def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]: 12 | """ 13 | Finds all spconv keys that need to have weight's transposed 14 | """ 15 | found_keys: Set[str] = set() 16 | for name, child in model.named_children(): 17 | new_prefix = f"{prefix}.{name}" if prefix != "" else name 18 | 19 | if isinstance(child, spconv.conv.SparseConvolution): 20 | new_prefix = f"{new_prefix}.weight" 21 | found_keys.add(new_prefix) 22 | 23 | found_keys.update(find_all_spconv_keys(child, prefix=new_prefix)) 24 | 25 | return found_keys 26 | 27 | 28 | def replace_feature(out, new_features): 29 | if "replace_feature" in out.__dir__(): 30 | # spconv 2.x behaviour 31 | return out.replace_feature(new_features) 32 | else: 33 | out.features = new_features 34 | return out 35 | -------------------------------------------------------------------------------- /pcseg/utils/voxelize_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Voxelization manner from Xin Lai 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def ravel_hash_vec(arr): 9 | """ 10 | Ravel the coordinates after subtracting the min coordinates. 11 | """ 12 | assert arr.ndim == 2 13 | arr = arr.copy() 14 | arr -= arr.min(0) 15 | arr = arr.astype(np.uint64, copy=False) 16 | arr_max = arr.max(0).astype(np.uint64) + 1 17 | 18 | keys = np.zeros(arr.shape[0], dtype=np.uint64) 19 | # Fortran style indexing 20 | for j in range(arr.shape[1] - 1): 21 | keys += arr[:, j] 22 | keys *= arr_max[j + 1] 23 | keys += arr[:, -1] 24 | return keys 25 | 26 | 27 | def fnv_hash_vec(arr): 28 | """ 29 | FNV64-1A 30 | """ 31 | assert arr.ndim == 2 32 | # Floor first for negative coordinates 33 | arr = arr.copy() 34 | arr = arr.astype(np.uint64, copy=False) 35 | hashed_arr = np.uint64(14695981039346656037) * np.ones(arr.shape[0], dtype=np.uint64) 36 | for j in range(arr.shape[1]): 37 | hashed_arr *= np.uint64(1099511628211) 38 | hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) 39 | return hashed_arr 40 | 41 | 42 | def voxelize_with_rec_idx(coord, voxel_size=0.05, hash_type='fnv', training=True): 43 | discrete_coord = np.floor(coord / np.array(voxel_size)) 44 | if hash_type == 'ravel': 45 | key = ravel_hash_vec(discrete_coord) 46 | else: 47 | key = fnv_hash_vec(discrete_coord) 48 | 49 | idx_sort = np.argsort(key) 50 | key_sort = key[idx_sort] 51 | _, count = np.unique(key_sort, return_counts=True) 52 | if training: 53 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + np.random.randint(0, count.max(), count.size) % count 54 | else: 55 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) 56 | 57 | idx_unique = idx_sort[idx_select] 58 | sorted_idx = np.zeros(key.shape[0]).astype(np.int) 59 | sorted_idx[idx_select] = 1 60 | sorted_idx = np.cumsum(sorted_idx) - 1 61 | idx_recon = np.zeros(key.shape[0]).astype(np.int) 62 | idx_recon[idx_sort] = sorted_idx 63 | return idx_unique, idx_recon 64 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | open3d 2 | numpy 3 | torch==1.8.1+cu111 4 | torchvision==0.9.1+cu111 5 | tensorboardX 6 | easydict 7 | pyyaml 8 | tqdm 9 | SharedArray 10 | scipy 11 | opencv-python 12 | plyfile 13 | matplotlib 14 | scikit-learn 15 | scikit-image 16 | pandas 17 | transformers 18 | clip @ git+https://github.com/openai/CLIP.git 19 | spconv-cu111 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | from setuptools import find_packages, setup 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 6 | 7 | 8 | def get_git_commit_number(): 9 | if not os.path.exists('.git'): 10 | return '0000000' 11 | 12 | cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE) 13 | git_commit_number = cmd_out.stdout.decode('utf-8')[:7] 14 | return git_commit_number 15 | 16 | 17 | def make_cuda_ext(name, module, sources): 18 | cuda_ext = CUDAExtension( 19 | name='%s.%s' % (module, name), 20 | sources=[os.path.join(*module.split('.'), src) for src in sources] 21 | ) 22 | return cuda_ext 23 | 24 | 25 | def write_version_to_file(version, target_file): 26 | with open(target_file, 'w') as f: 27 | print('__version__ = "%s"' % version, file=f) 28 | 29 | 30 | if __name__ == '__main__': 31 | version = '0.1.0+%s' % get_git_commit_number() 32 | write_version_to_file(version, 'pcseg/version.py') 33 | 34 | setup( 35 | name='pcseg', 36 | version=version, 37 | description='PCSeg', 38 | install_requires=[ 39 | 'numpy', 40 | 'tensorboardX', 41 | 'easydict', 42 | 'pyyaml', 43 | 'tqdm', 44 | 'SharedArray', 45 | # 'spconv', # spconv has different names depending on the cuda version 46 | ], 47 | 48 | author='Jihan Yang', 49 | author_email='jihanyang13@gmail.com', 50 | license='Apache License 2.0', 51 | packages=find_packages(exclude=['tools', 'data', 'output']), 52 | cmdclass={ 53 | 'build_ext': BuildExtension, 54 | }, 55 | ext_modules=[], 56 | ) 57 | -------------------------------------------------------------------------------- /tools/_init_path.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '../') -------------------------------------------------------------------------------- /tools/cfgs/dataset_configs/s3dis_dataset.yaml: -------------------------------------------------------------------------------- 1 | DATA_PATH: ../data/s3dis 2 | DATASET: S3DISDataset 3 | 4 | COLLATE_FN: collate_batch_indoor 5 | MIN_SPATIAL_SCALE: 128 6 | 7 | DATA_SPLIT: 8 | train: train 9 | test: val 10 | data_suffix: .npy 11 | test_area: 5 12 | 13 | IGNORE_LABEL: -100 14 | 15 | DATA_AUG: 16 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ] 17 | scene_aug: 18 | scaling_scene: 19 | enabled: False 20 | p: 1.0 21 | value: [0.9, 1.1] 22 | 23 | rotation: 24 | p: 1.0 25 | value: [0.0, 0.0, 1.0] 26 | 27 | jitter: True 28 | color_jitter: True 29 | 30 | flip: 31 | p: 0.5 32 | 33 | random_jitter: 34 | enabled: False 35 | value: 0.01 36 | accord_to_size: False 37 | p: 1.0 38 | 39 | elastic: 40 | enabled: True 41 | value: [[6, 40], [20, 160]] 42 | apply_to_feat: False 43 | p: 1.0 44 | 45 | crop: 46 | step: 64 47 | 48 | shuffle: True 49 | 50 | DATA_PROCESSOR: 51 | repeat: 20 52 | rgb_norm: True 53 | point_range: 200000000 54 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m) 55 | cache: True 56 | max_npoint: 250000 57 | full_scale: [128, 512] 58 | voxel_mode: 4 59 | xyz_norm: False 60 | x4_split: True 61 | downsampling_scale: 4 62 | xyz_as_feat: True 63 | rgb_as_feat: True 64 | 65 | PROCESS_LIST: [] 66 | -------------------------------------------------------------------------------- /tools/cfgs/dataset_configs/s3dis_dataset_image.yaml: -------------------------------------------------------------------------------- 1 | DATA_PATH: ../data/s3dis 2 | DATASET: S3DISDataset 3 | 4 | COLLATE_FN: collate_batch_indoor 5 | MIN_SPATIAL_SCALE: 128 6 | 7 | DATA_SPLIT: 8 | train: train 9 | test: val 10 | data_suffix: .npy 11 | test_area: 5 12 | 13 | IGNORE_LABEL: -100 14 | 15 | DATA_AUG: 16 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ] 17 | scene_aug: 18 | scaling_scene: 19 | enabled: False 20 | p: 1.0 21 | value: [0.9, 1.1] 22 | 23 | rotation: 24 | p: 1.0 25 | value: [0.0, 0.0, 1.0] 26 | 27 | jitter: True 28 | color_jitter: True 29 | 30 | flip: 31 | p: 0.5 32 | 33 | random_jitter: 34 | enabled: False 35 | value: 0.01 36 | accord_to_size: False 37 | p: 1.0 38 | 39 | elastic: 40 | enabled: True 41 | value: [[6, 40], [20, 160]] 42 | apply_to_feat: False 43 | p: 1.0 44 | 45 | crop: 46 | step: 64 47 | 48 | shuffle: True 49 | 50 | DATA_PROCESSOR: 51 | repeat: 20 52 | rgb_norm: True 53 | point_range: 200000000 54 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m) 55 | cache: False 56 | max_npoint: 250000 57 | full_scale: [128, 512] 58 | voxel_mode: 4 59 | xyz_norm: False 60 | x4_split: True 61 | downsampling_scale: 4 62 | xyz_as_feat: True 63 | rgb_as_feat: True 64 | 65 | PROCESS_LIST: [] 66 | 67 | 68 | IMAGE_PATH: s3dis_2d 69 | DEPTH_IMAGE_SCALE: [1080, 1080] 70 | LOAD_IMAGE: True 71 | MERGE_IDX: True 72 | -------------------------------------------------------------------------------- /tools/cfgs/dataset_configs/scannet_dataset.yaml: -------------------------------------------------------------------------------- 1 | DATA_PATH: ../data/scannetv2 2 | DATASET: ScanNetDataset 3 | 4 | COLLATE_FN: collate_batch_indoor 5 | MIN_SPATIAL_SCALE: 128 6 | 7 | DATA_SPLIT: 8 | train: train 9 | test: val 10 | data_suffix: .pth 11 | 12 | IGNORE_LABEL: -100 13 | 14 | DATA_AUG: 15 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ] 16 | scene_aug: 17 | scaling_scene: 18 | enabled: False 19 | p: 1.0 20 | value: [0.9, 1.1] 21 | 22 | rotation: 23 | p: 1.0 24 | value: [0.0, 0.0, 1.0] 25 | 26 | jitter: True 27 | color_jitter: True 28 | 29 | flip: 30 | p: 0.5 31 | 32 | random_jitter: 33 | enabled: False 34 | value: 0.01 35 | accord_to_size: False 36 | p: 1.0 37 | 38 | elastic: 39 | enabled: True 40 | value: [[6, 40], [20, 160]] 41 | apply_to_feat: False 42 | p: 1.0 43 | 44 | crop: 45 | step: 32 46 | 47 | shuffle: True 48 | 49 | DATA_PROCESSOR: 50 | repeat: 4 51 | rgb_norm: True 52 | point_range: 200000000 53 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m) 54 | cache: True 55 | max_npoint: 250000 56 | full_scale: [128, 512] 57 | voxel_mode: 4 58 | xyz_norm: False 59 | xyz_as_feat: True 60 | rgb_as_feat: True 61 | 62 | PROCESS_LIST: [] 63 | -------------------------------------------------------------------------------- /tools/cfgs/dataset_configs/scannet_dataset_image.yaml: -------------------------------------------------------------------------------- 1 | DATA_PATH: ../data/scannetv2 2 | DATASET: ScanNetDataset 3 | 4 | COLLATE_FN: collate_batch_indoor 5 | MIN_SPATIAL_SCALE: 128 6 | 7 | DATA_SPLIT: 8 | train: train 9 | test: val 10 | data_suffix: .pth 11 | 12 | IGNORE_LABEL: -100 13 | 14 | DATA_AUG: 15 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ] 16 | scene_aug: 17 | scaling_scene: 18 | enabled: False 19 | p: 1.0 20 | value: [0.9, 1.1] 21 | 22 | rotation: 23 | p: 1.0 24 | value: [0.0, 0.0, 1.0] 25 | 26 | jitter: True 27 | color_jitter: True 28 | 29 | flip: 30 | p: 0.5 31 | 32 | random_jitter: 33 | enabled: False 34 | value: 0.01 35 | accord_to_size: False 36 | p: 1.0 37 | 38 | elastic: 39 | enabled: True 40 | value: [[6, 40], [20, 160]] 41 | apply_to_feat: False 42 | p: 1.0 43 | 44 | crop: 45 | step: 32 46 | 47 | shuffle: True 48 | 49 | DATA_PROCESSOR: 50 | repeat: 4 51 | rgb_norm: True 52 | point_range: 200000000 53 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m) 54 | cache: True 55 | max_npoint: 250000 56 | full_scale: [128, 512] 57 | voxel_mode: 4 58 | xyz_norm: False 59 | xyz_as_feat: True 60 | rgb_as_feat: True 61 | 62 | PROCESS_LIST: [] 63 | 64 | 65 | IMAGE_PATH: scannet_frames_25k 66 | DEPTH_IMAGE_SCALE: [480, 640] 67 | LOAD_IMAGE: True 68 | MERGE_IDX: True -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml: -------------------------------------------------------------------------------- 1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter] 2 | 3 | DATA_CONFIG: 4 | _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml 5 | DATASET: S3DISInstDataset 6 | inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] 7 | sem2ins_classes: [ 0, 1 ] 8 | inst_label_shift: 0 9 | 10 | ignore_class_idx: [12] 11 | 12 | MODEL: 13 | NAME: SparseUNetTextSeg 14 | REMAP_FROM_3DLANG: False 15 | 16 | VFE: 17 | NAME: IndoorVFE 18 | USE_XYZ: True 19 | 20 | BACKBONE_3D: 21 | NAME: SparseUNetIndoor 22 | IN_CHANNEL: 6 23 | MID_CHANNEL: 16 24 | BLOCK_RESIDUAL: True 25 | BLOCK_REPS: 2 26 | NUM_BLOCKS: 7 27 | CUSTOM_SP1X1: True 28 | 29 | ADAPTER: 30 | NAME: VLAdapter 31 | EVAL_ONLY: False 32 | NUM_ADAPTER_LAYERS: 2 33 | TEXT_DIM: -1 34 | LAST_NORM: True 35 | 36 | TASK_HEAD: 37 | NAME: TextSegHead 38 | FEAT_NORM: False 39 | 40 | LAST_NORM: True 41 | TEXT_EMBED: 42 | NAME: CLIP 43 | NORM: True 44 | PATH: text_embed/s3dis_clip-ViT-B16_id.pth 45 | FEAT_NORM: False 46 | LOGIT_SCALE: 47 | value: 1.0 48 | learnable: False 49 | 50 | INST_HEAD: 51 | NAME: InstHead 52 | 53 | BLOCK_RESIDUAL: True 54 | CUSTOM_SP1X1: True 55 | 56 | CLUSTERING: 57 | PREPARE_EPOCH: 20 58 | GROUPING_CFG: 59 | SCORE_THR: 0.2 60 | RADIUS: 0.04 61 | MEAN_ACTIVE: 300 62 | CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 63 | -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.] 64 | NPOINT_THR: 500 # absolute if class_numpoint == -1, relative if class_numpoint != -1 65 | IGNORE_CLASSES: [0, 1] 66 | INST_VOXEL_CFG: 67 | SCALE: 50 68 | SPATIAL_SHAPE: 20 69 | LOSS_CFG: 70 | MAX_PROPOSAL_NUM: 200 71 | POS_IOU_THR: 0.5 72 | TEST_CFG: 73 | # x4_split: False 74 | CLS_SCORE_THR: 0.001 75 | MASK_SCORE_THR: -0.5 76 | MIN_NPOINT: 100 77 | 78 | FIXED_MODULES: [] 79 | SEMANTIC_ONLY: False 80 | 81 | 82 | TEXT_ENCODER: 83 | NAME: CLIP 84 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14'] 85 | TEMPLATE: identity 86 | EXTRACT_EMBED: False # Online extract text embeding from class or not 87 | # BERT: 88 | # BACKBONE: bert-base-uncased 89 | 90 | OPTIMIZATION: 91 | TEST_BATCH_SIZE_PER_GPU: 1 92 | BATCH_SIZE_PER_GPU: 4 93 | NUM_EPOCHS: 64 94 | LR: 0.004 # 4e-3 95 | SCHEDULER: cos_after_step 96 | OPTIMIZER: adamw 97 | WEIGHT_DECAY: 0.0001 98 | MOMENTUM: 0.9 99 | STEP_EPOCH: 40 100 | MULTIPLIER: 0.1 101 | CLIP_GRAD: False 102 | PCT_START: 0.52 103 | DIV_FACTOR: 2 104 | MOMS: [0.95, 0.85] 105 | LR_CLIP: 0.000001 106 | 107 | OTHERS: 108 | PRINT_FREQ: 20 109 | SYNC_BN: False 110 | USE_AMP: True 111 | EVAL_FREQ: 5 112 | FIND_UNUSED_PARAMETERS: True 113 | -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/inst/softgroup_clip_base6_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | base_class_idx: [0, 2, 3, 4, 8, 9] 5 | novel_class_idx: [1, 5, 6, 7, 10, 11] 6 | ignore_class_idx: [12] 7 | -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] 5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ] 6 | novel_class_idx: [ 5, 7, 9, 10 ] 7 | ignore_class_idx: [ 12 ] 8 | 9 | CAPTION_INFO: 10 | 11 | KEY: [SCENE, VIEW, ENTITY] 12 | SCENE: 13 | ENABLED: False 14 | CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json 15 | GATHER_CAPTION: True 16 | 17 | VIEW: 18 | ENABLED: True 19 | CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json 20 | IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx 21 | SELECT: ratio 22 | NUM: 1 23 | RATIO: 0.2 24 | GATHER_CAPTION: True 25 | 26 | ENTITY: 27 | ENABLED: True 28 | CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json 29 | IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx 30 | SELECT: ratio 31 | NUM: 1 32 | RATIO: 1.0 33 | GATHER_CAPTION: True 34 | 35 | CAPTION_CORR_PATH_IN_ONE_FILE: False 36 | 37 | 38 | MODEL: 39 | 40 | BINARY_HEAD: 41 | NAME: BinaryHead 42 | DETACH: True 43 | THRESH: 0.5 44 | CUSTOM_SP1X1: True 45 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1', 46 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1', 47 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1', 48 | 'unet.u.u.u.u.u.u.blocks.block1' ] 49 | 50 | TASK_HEAD: 51 | NAME: TextSegHead 52 | CORRECT_SEG_PRED_BINARY: True 53 | 54 | CAPTION_HEAD: 55 | NAME: CaptionHead 56 | FEAT_NORM: True 57 | LOGIT_SCALE: 58 | value: 100.0 59 | learnable: True 60 | LOSS_WEIGHT: 61 | SCENE: 0.0 62 | VIEW: 0.08 63 | ENTITY: 0.02 64 | 65 | INST_HEAD: 66 | CORRECT_SEG_PRED_BINARY: Tru -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/spconv_clip_adamw.yaml: -------------------------------------------------------------------------------- 1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter] 2 | 3 | DATA_CONFIG: 4 | _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml 5 | ignore_class_idx: [12] 6 | 7 | MODEL: 8 | NAME: SparseUNetTextSeg 9 | REMAP_FROM_3DLANG: False 10 | REMAP_FROM_NOADAPTER: False 11 | 12 | VFE: 13 | NAME: IndoorVFE 14 | USE_XYZ: True 15 | 16 | BACKBONE_3D: 17 | NAME: SparseUNetIndoor 18 | IN_CHANNEL: 6 19 | MID_CHANNEL: 16 20 | BLOCK_RESIDUAL: True 21 | BLOCK_REPS: 2 22 | NUM_BLOCKS: 7 23 | CUSTOM_SP1X1: True 24 | 25 | ADAPTER: 26 | NAME: VLAdapter 27 | EVAL_ONLY: False 28 | NUM_ADAPTER_LAYERS: 2 29 | TEXT_DIM: -1 30 | LAST_NORM: True 31 | FEAT_NORM: False 32 | 33 | TASK_HEAD: 34 | NAME: TextSegHead 35 | 36 | TEXT_EMBED: 37 | NAME: CLIP 38 | NORM: True 39 | PATH: text_embed/s3dis_clip-ViT-B16_id.pth 40 | 41 | LOGIT_SCALE: 42 | value: 1.0 43 | learnable: False 44 | 45 | TEXT_ENCODER: 46 | NAME: CLIP 47 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14'] 48 | TEMPLATE: identity 49 | EXTRACT_EMBED: False # Online extract text embeding from class or not 50 | # BERT: 51 | # BACKBONE: bert-base-uncased 52 | 53 | OPTIMIZATION: 54 | TEST_BATCH_SIZE_PER_GPU: 1 55 | BATCH_SIZE_PER_GPU: 4 56 | NUM_EPOCHS: 32 57 | LR: 0.004 # 4e-3 58 | SCHEDULER: cos_after_step 59 | OPTIMIZER: adamw 60 | WEIGHT_DECAY: 0.0001 61 | MOMENTUM: 0.9 62 | STEP_EPOCH: 20 63 | MULTIPLIER: 0.1 64 | CLIP_GRAD: False 65 | PCT_START: 0.39 66 | DIV_FACTOR: 1 67 | MOMS: [0.95, 0.85] 68 | LR_CLIP: 0.000001 69 | 70 | OTHERS: 71 | PRINT_FREQ: 20 72 | EVAL_FREQ: 5 73 | SYNC_BN: False 74 | USE_AMP: True -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/spconv_clip_base6_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | base_class_idx: [ 0, 2, 3, 4, 8, 9 ] 5 | novel_class_idx: [ 1, 5, 6, 7, 10, 11 ] 6 | ignore_class_idx: [ 12 ] 7 | -------------------------------------------------------------------------------- /tools/cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ] 5 | novel_class_idx: [ 5, 7, 9, 10 ] 6 | ignore_class_idx: [ 12 ] 7 | 8 | CAPTION_INFO: 9 | 10 | KEY: [SCENE, VIEW, ENTITY] 11 | SCENE: 12 | ENABLED: False 13 | CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json 14 | GATHER_CAPTION: True 15 | 16 | VIEW: 17 | ENABLED: True 18 | CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json 19 | IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx 20 | SELECT: ratio 21 | NUM: 1 22 | RATIO: 0.2 23 | GATHER_CAPTION: True 24 | 25 | ENTITY: 26 | ENABLED: True 27 | CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json 28 | IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx 29 | SELECT: ratio 30 | NUM: 1 31 | RATIO: 1.0 32 | GATHER_CAPTION: True 33 | 34 | CAPTION_CORR_PATH_IN_ONE_FILE: False 35 | 36 | 37 | MODEL: 38 | 39 | BINARY_HEAD: 40 | NAME: BinaryHead 41 | DETACH: True 42 | THRESH: 0.5 43 | CUSTOM_SP1X1: True 44 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1', 45 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1', 46 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1', 47 | 'unet.u.u.u.u.u.u.blocks.block1' ] 48 | 49 | TASK_HEAD: 50 | NAME: TextSegHead 51 | CORRECT_SEG_PRED_BINARY: True 52 | 53 | 54 | CAPTION_HEAD: 55 | NAME: CaptionHead 56 | FEAT_NORM: True 57 | LOGIT_SCALE: 58 | value: 100.0 59 | learnable: True 60 | LOSS_WEIGHT: 61 | SCENE: 0.0 62 | VIEW: 0.08 63 | ENTITY: 0.02 64 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/inst/softgroup_clip_adamw.yaml: -------------------------------------------------------------------------------- 1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk, 2 | curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture] 3 | 4 | DATA_CONFIG: 5 | _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml 6 | DATASET: ScanNetInstDataset 7 | inst_class_idx: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 8 | sem2ins_classes: [] 9 | inst_label_shift: 2 10 | 11 | ignore_class_idx: [19] 12 | 13 | MODEL: 14 | NAME: SparseUNetTextSeg 15 | REMAP_FROM_3DLANG: False 16 | 17 | VFE: 18 | NAME: IndoorVFE 19 | USE_XYZ: True 20 | 21 | BACKBONE_3D: 22 | NAME: SparseUNetIndoor 23 | IN_CHANNEL: 6 24 | MID_CHANNEL: 16 25 | BLOCK_RESIDUAL: True 26 | BLOCK_REPS: 2 27 | NUM_BLOCKS: 7 28 | CUSTOM_SP1X1: True 29 | 30 | ADAPTER: 31 | NAME: VLAdapter 32 | EVAL_ONLY: False 33 | NUM_ADAPTER_LAYERS: 2 34 | TEXT_DIM: -1 35 | LAST_NORM: True 36 | 37 | TASK_HEAD: 38 | NAME: TextSegHead 39 | FEAT_NORM: False 40 | 41 | LAST_NORM: True 42 | TEXT_EMBED: 43 | NAME: CLIP 44 | NORM: True 45 | PATH: text_embed/scannet_clip-ViT-B16_id.pth 46 | FEAT_NORM: False 47 | LOGIT_SCALE: 48 | value: 1.0 49 | learnable: False 50 | 51 | INST_HEAD: 52 | NAME: InstHead 53 | 54 | BLOCK_RESIDUAL: True 55 | CUSTOM_SP1X1: True 56 | 57 | CLUSTERING: 58 | PREPARE_EPOCH: 32 59 | GROUPING_CFG: 60 | SCORE_THR: 0.2 61 | RADIUS: 0.04 62 | MEAN_ACTIVE: 300 63 | CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 64 | -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.] 65 | NPOINT_THR: 50 # absolute if class_numpoint == -1, relative if class_numpoint != -1 66 | IGNORE_CLASSES: [0, 1] 67 | INST_VOXEL_CFG: 68 | SCALE: 50 69 | SPATIAL_SHAPE: 20 70 | LOSS_CFG: 71 | MAX_PROPOSAL_NUM: 200 72 | POS_IOU_THR: 0.5 73 | TEST_CFG: 74 | # x4_split: False 75 | CLS_SCORE_THR: 0.001 76 | MASK_SCORE_THR: -0.5 77 | MIN_NPOINT: 100 78 | 79 | FIXED_MODULES: [] 80 | SEMANTIC_ONLY: False 81 | 82 | 83 | TEXT_ENCODER: 84 | NAME: CLIP 85 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14'] 86 | TEMPLATE: identity 87 | EXTRACT_EMBED: False # Online extract text embeding from class or not 88 | # BERT: 89 | # BACKBONE: bert-base-uncased 90 | 91 | OPTIMIZATION: 92 | TEST_BATCH_SIZE_PER_GPU: 1 93 | BATCH_SIZE_PER_GPU: 4 94 | NUM_EPOCHS: 150 95 | LR: 0.004 # 4e-3 96 | SCHEDULER: cos_after_step 97 | OPTIMIZER: adamw 98 | WEIGHT_DECAY: 0.0001 99 | MOMENTUM: 0.9 100 | STEP_EPOCH: 82 101 | MULTIPLIER: 0.1 102 | CLIP_GRAD: False 103 | PCT_START: 0.52 104 | DIV_FACTOR: 2 105 | MOMS: [0.95, 0.85] 106 | LR_CLIP: 0.000001 107 | 108 | OTHERS: 109 | PRINT_FREQ: 20 110 | SYNC_BN: False 111 | USE_AMP: True 112 | EVAL_FREQ: 10 113 | FIND_UNUSED_PARAMETERS: True 114 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/inst/softgroup_clip_base10_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | # TODO: make base + novel = all. 5 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ] 6 | base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ] 7 | novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ] 8 | ignore_class_idx: [ 19 ] 9 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ] 5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ] 6 | novel_class_idx: [ 5, 9, 12, 16 ] 7 | ignore_class_idx: [ 19 ] 8 | 9 | CAPTION_INFO: 10 | 11 | KEY: [SCENE, VIEW, ENTITY] 12 | 13 | SCENE: 14 | ENABLED: False 15 | CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json 16 | GATHER_CAPTION: True 17 | 18 | VIEW: 19 | ENABLED: True 20 | CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json 21 | IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle 22 | SELECT: ratio 23 | NUM: 1 24 | RATIO: 0.5 25 | GATHER_CAPTION: True 26 | 27 | ENTITY: 28 | ENABLED: True 29 | CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json 30 | IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle 31 | SELECT: ratio 32 | NUM: 1 33 | RATIO: 1.0 34 | GATHER_CAPTION: True 35 | 36 | CAPTION_CORR_PATH_IN_ONE_FILE: True 37 | 38 | 39 | MODEL: 40 | 41 | BINARY_HEAD: 42 | NAME: BinaryHead 43 | DETACH: True 44 | THRESH: 0.5 45 | CUSTOM_SP1X1: True 46 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1', 47 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1', 48 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1', 49 | 'unet.u.u.u.u.u.u.blocks.block1' ] 50 | 51 | TASK_HEAD: 52 | NAME: TextSegHead 53 | CORRECT_SEG_PRED_BINARY: True 54 | 55 | CAPTION_HEAD: 56 | NAME: CaptionHead 57 | FEAT_NORM: True 58 | LOGIT_SCALE: 59 | value: 100.0 60 | learnable: True 61 | LOSS_WEIGHT: 62 | SCENE: 0.0 63 | VIEW: 0.05 64 | ENTITY: 0.05 65 | 66 | INST_HEAD: 67 | CORRECT_SEG_PRED_BINARY: True 68 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/inst/softgroup_clip_base8_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | # TODO: make base + novel = all. 5 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ] 6 | base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ] 7 | novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ] 8 | ignore_class_idx: [ 19 ] 9 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/inst/softgroup_clip_openvocab_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | # TODO: split the input categories into base/novel/ignore. 5 | # Note that if you has gropud-truth annotations for the test samples, 6 | # you need to carefully set thoese parameters to evaluate the performance quantitatively. 7 | # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx. 8 | base_class_idx: [ 0, 1, 2, 3, 4] 9 | novel_class_idx: [] 10 | ignore_class_idx: [ ] 11 | 12 | # TODO: split the categories into inst_base/inst_novel 13 | inst_class_idx: [2, 3] 14 | base_inst_class_idx: [0, 1] # the base category indices for instance categories. The length of this list should be the same as or smaller than the length of inst_class_idx 15 | novel_inst_class_idx: [] 16 | 17 | MODEL: 18 | TASK_HEAD: 19 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance 20 | 21 | INST_HEAD: 22 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance 23 | CLUSTERING: 24 | PREPARE_EPOCH: -1 25 | 26 | TEXT_ENCODER: 27 | EXTRACT_EMBED: True 28 | CATEGORY_NAMES: [door, window, desk, keyboard, others] # TODO: input your custom categories -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/spconv_clip_adamw.yaml: -------------------------------------------------------------------------------- 1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, 2 | desk, curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture] 3 | 4 | DATA_CONFIG: 5 | _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml 6 | ignore_class_idx: [19] 7 | 8 | MODEL: 9 | NAME: SparseUNetTextSeg 10 | REMAP_FROM_3DLANG: False 11 | REMAP_FROM_NOADAPTER: False 12 | 13 | VFE: 14 | NAME: IndoorVFE 15 | USE_XYZ: True 16 | 17 | BACKBONE_3D: 18 | NAME: SparseUNetIndoor 19 | IN_CHANNEL: 6 20 | MID_CHANNEL: 16 21 | BLOCK_RESIDUAL: True 22 | BLOCK_REPS: 2 23 | NUM_BLOCKS: 7 24 | CUSTOM_SP1X1: True 25 | 26 | ADAPTER: 27 | NAME: VLAdapter 28 | EVAL_ONLY: False 29 | NUM_ADAPTER_LAYERS: 2 30 | TEXT_DIM: -1 31 | LAST_NORM: True 32 | 33 | TASK_HEAD: 34 | NAME: TextSegHead 35 | FEAT_NORM: False 36 | 37 | TEXT_EMBED: 38 | NAME: CLIP 39 | NORM: True 40 | PATH: text_embed/scannet_clip-ViT-B16_id.pth 41 | 42 | LOGIT_SCALE: 43 | value: 1.0 44 | learnable: False 45 | 46 | TEXT_ENCODER: 47 | NAME: CLIP 48 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14'] 49 | TEMPLATE: identity 50 | EXTRACT_EMBED: False # Online extract text embeding from class or not 51 | # BERT: 52 | # BACKBONE: bert-base-uncased 53 | 54 | 55 | OPTIMIZATION: 56 | BATCH_SIZE_PER_GPU: 4 57 | NUM_EPOCHS: 128 58 | LR: 0.004 # 4e-3 59 | SCHEDULER: cos_after_step 60 | OPTIMIZER: adamw 61 | WEIGHT_DECAY: 0.0001 62 | MOMENTUM: 0.9 63 | STEP_EPOCH: 50 64 | MULTIPLIER: 0.1 65 | CLIP_GRAD: False 66 | PCT_START: 0.39 67 | DIV_FACTOR: 1 68 | MOMS: [0.95, 0.85] 69 | LR_CLIP: 0.000001 70 | 71 | OTHERS: 72 | PRINT_FREQ: 20 73 | SYNC_BN: False 74 | USE_AMP: True -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/spconv_clip_base10_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml 2 | 3 | 4 | DATA_CONFIG: 5 | # TODO: make base + novel = all. 6 | base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ] 7 | novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ] 8 | ignore_class_idx: [ 19 ] 9 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/spconv_clip_base12_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | # TODO: make base + novel = all. 5 | base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ] 6 | novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ] 7 | ignore_class_idx: [ 19 ] 8 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_adamw.yaml 2 | 3 | 4 | DATA_CONFIG: 5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ] 6 | novel_class_idx: [ 5, 9, 12, 16 ] 7 | ignore_class_idx: [ 19 ] 8 | 9 | CAPTION_INFO: 10 | 11 | KEY: [SCENE, VIEW, ENTITY] 12 | 13 | SCENE: 14 | ENABLED: False 15 | CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json 16 | GATHER_CAPTION: True 17 | 18 | VIEW: 19 | ENABLED: True 20 | CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json 21 | IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle 22 | SELECT: ratio 23 | NUM: 1 24 | RATIO: 0.5 25 | GATHER_CAPTION: True 26 | 27 | ENTITY: 28 | ENABLED: True 29 | CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json 30 | IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle 31 | SELECT: ratio 32 | NUM: 1 33 | RATIO: 1.0 34 | GATHER_CAPTION: True 35 | 36 | CAPTION_CORR_PATH_IN_ONE_FILE: True 37 | 38 | 39 | MODEL: 40 | 41 | BINARY_HEAD: 42 | NAME: BinaryHead 43 | DETACH: True 44 | THRESH: 0.5 45 | CUSTOM_SP1X1: True 46 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1', 47 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1', 48 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1', 49 | 'unet.u.u.u.u.u.u.blocks.block1' ] 50 | 51 | TASK_HEAD: 52 | NAME: TextSegHead 53 | CORRECT_SEG_PRED_BINARY: True 54 | 55 | CAPTION_HEAD: 56 | NAME: CaptionHead 57 | FEAT_NORM: True 58 | LOGIT_SCALE: 59 | value: 100.0 60 | learnable: True 61 | LOSS_FUNC: CrossEntropy 62 | LOSS_WEIGHT: 63 | SCENE: 0.0 64 | VIEW: 0.05 65 | ENTITY: 0.05 66 | -------------------------------------------------------------------------------- /tools/cfgs/scannet_models/spconv_clip_openvocab_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml 2 | 3 | DATA_CONFIG: 4 | # TODO: split the input categories into base/novel/ignore. 5 | # Note that if you has gropud-truth annotations for the test samples, 6 | # you need to carefully set thoese parameters to evaluate the performance quantitatively. 7 | # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx. 8 | base_class_idx: [ 0, 1, 2, 3, 4] 9 | novel_class_idx: [] 10 | ignore_class_idx: [ ] 11 | 12 | MODEL: 13 | TASK_HEAD: 14 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance 15 | 16 | TEXT_ENCODER: 17 | EXTRACT_EMBED: True 18 | CATEGORY_NAMES: [door, window, desk, keyboard, others] # TODO: input your custom categories -------------------------------------------------------------------------------- /tools/eval_utils/inst_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/eval_utils/inst_eval/__init__.py -------------------------------------------------------------------------------- /tools/eval_utils/inst_eval/instance_eval_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import numpy as np 5 | from plyfile import PlyData 6 | 7 | 8 | # matrix: 4x4 np array 9 | # points Nx3 np array 10 | def transform_points(matrix, points): 11 | assert len(points.shape) == 2 and points.shape[1] == 3 12 | num_points = points.shape[0] 13 | p = np.concatenate([points, np.ones((num_points, 1))], axis=1) 14 | p = np.matmul(matrix, np.transpose(p)) 15 | p = np.transpose(p) 16 | p[:, :3] /= p[:, 3, None] 17 | return p[:, :3] 18 | 19 | 20 | def export_ids(filename, ids): 21 | with open(filename, 'w') as f: 22 | for id in ids: 23 | f.write('%d\n' % id) 24 | 25 | 26 | def load_ids(filename): 27 | ids = open(filename).read().splitlines() 28 | ids = np.array(ids, dtype=np.int64) 29 | return ids 30 | 31 | 32 | def read_mesh_vertices(filename): 33 | assert os.path.isfile(filename) 34 | with open(filename, 'rb') as f: 35 | plydata = PlyData.read(f) 36 | num_verts = plydata['vertex'].count 37 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 38 | vertices[:, 0] = plydata['vertex'].data['x'] 39 | vertices[:, 1] = plydata['vertex'].data['y'] 40 | vertices[:, 2] = plydata['vertex'].data['z'] 41 | return vertices 42 | 43 | 44 | # export 3d instance labels for instance evaluation 45 | def export_instance_ids_for_eval(filename, label_ids, instance_ids): 46 | assert label_ids.shape[0] == instance_ids.shape[0] 47 | output_mask_path_relative = 'pred_mask' 48 | name = os.path.splitext(os.path.basename(filename))[0] 49 | output_mask_path = os.path.join(os.path.dirname(filename), output_mask_path_relative) 50 | if not os.path.isdir(output_mask_path): 51 | os.mkdir(output_mask_path) 52 | insts = np.unique(instance_ids) 53 | zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32) 54 | with open(filename, 'w') as f: 55 | for idx, inst_id in enumerate(insts): 56 | if inst_id == 0: # 0 -> no instance for this vertex 57 | continue 58 | output_mask_file = os.path.join(output_mask_path_relative, 59 | name + '_' + str(idx) + '.txt') 60 | loc = np.where(instance_ids == inst_id) 61 | label_id = label_ids[loc[0][0]] 62 | f.write('%s %d %f\n' % (output_mask_file, label_id, 1.0)) 63 | # write mask 64 | mask = np.copy(zero_mask) 65 | mask[loc[0]] = 1 66 | export_ids(output_mask_file, mask) 67 | 68 | 69 | # ------------ Instance Utils ------------ # 70 | 71 | 72 | class Instance(object): 73 | instance_id = 0 74 | label_id = 0 75 | vert_count = 0 76 | med_dist = -1 77 | dist_conf = 0.0 78 | 79 | def __init__(self, mesh_vert_instances, instance_id): 80 | if (instance_id == -1): 81 | return 82 | self.instance_id = int(instance_id) 83 | self.label_id = int(self.get_label_id(instance_id)) 84 | self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id)) 85 | 86 | def get_label_id(self, instance_id): 87 | return int(instance_id // 1000) 88 | 89 | def get_instance_verts(self, mesh_vert_instances, instance_id): 90 | return (mesh_vert_instances == instance_id).sum() 91 | 92 | def to_json(self): 93 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) 94 | 95 | def to_dict(self): 96 | dict = {} 97 | dict['instance_id'] = self.instance_id 98 | dict['label_id'] = self.label_id 99 | dict['vert_count'] = self.vert_count 100 | dict['med_dist'] = self.med_dist 101 | dict['dist_conf'] = self.dist_conf 102 | return dict 103 | 104 | def from_json(self, data): 105 | self.instance_id = int(data['instance_id']) 106 | self.label_id = int(data['label_id']) 107 | self.vert_count = int(data['vert_count']) 108 | if ('med_dist' in data): 109 | self.med_dist = float(data['med_dist']) 110 | self.dist_conf = float(data['dist_conf']) 111 | 112 | def __str__(self): 113 | return '(' + str(self.instance_id) + ')' 114 | 115 | 116 | def read_instance_prediction_file(filename, pred_path): 117 | lines = open(filename).read().splitlines() 118 | instance_info = {} 119 | abs_pred_path = os.path.abspath(pred_path) 120 | for line in lines: 121 | parts = line.split(' ') 122 | if len(parts) != 3: 123 | print('invalid instance prediction file. Expected (per line): \ 124 | [rel path prediction] [label id prediction] \ 125 | [confidence prediction]') 126 | if os.path.isabs(parts[0]): 127 | print('invalid instance prediction file. \ 128 | First entry in line must be a relative path') 129 | mask_file = os.path.join(os.path.dirname(filename), parts[0]) 130 | mask_file = os.path.abspath(mask_file) 131 | # check that mask_file lives inside prediction path 132 | if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path: 133 | print(('predicted mask {} in prediction text file {}' + 134 | 'points outside of prediction path.').format(mask_file, filename)) 135 | 136 | info = {} 137 | info['label_id'] = int(float(parts[1])) 138 | info['conf'] = float(parts[2]) 139 | instance_info[mask_file] = info 140 | return instance_info 141 | 142 | 143 | def get_instances(ids, class_ids, class_labels, id2label): 144 | instances = {} 145 | for label in class_labels: 146 | instances[label] = [] 147 | instance_ids = np.unique(ids) 148 | for id in instance_ids: 149 | if id == 0: 150 | continue 151 | inst = Instance(ids, id) 152 | if inst.label_id in class_ids: 153 | instances[id2label[inst.label_id]].append(inst.to_dict()) 154 | return instances 155 | -------------------------------------------------------------------------------- /tools/eval_utils/inst_eval/pointwise_eval_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def evaluate_semantic_acc(pred_list, gt_list, ignore_label=-100, logger=None): 5 | gt = np.concatenate(gt_list, axis=0) 6 | pred = np.concatenate(pred_list, axis=0) 7 | assert gt.shape == pred.shape 8 | correct = (gt[gt != ignore_label] == pred[gt != ignore_label]).sum() 9 | whole = (gt != ignore_label).sum() 10 | acc = correct.astype(float) / whole * 100 11 | logger.info(f'Acc: {acc:.1f}') 12 | return acc 13 | 14 | 15 | def evaluate_semantic_miou(n_classes, pred_list, gt_list, ignore_label=-100, logger=None): 16 | gt = np.concatenate(gt_list, axis=0) 17 | pred = np.concatenate(pred_list, axis=0) 18 | pos_inds = gt != ignore_label 19 | gt = gt[pos_inds] 20 | pred = pred[pos_inds] 21 | assert gt.shape == pred.shape 22 | iou_list = [] 23 | for _index in range(n_classes): 24 | if _index != ignore_label: 25 | intersection = ((gt == _index) & (pred == _index)).sum() 26 | union = ((gt == _index) | (pred == _index)).sum() 27 | iou = intersection.astype(float) / (union + 1e-10) * 100 28 | iou_list.append(iou) 29 | miou = np.nanmean(iou_list) 30 | logger.info('Class-wise mIoU: ' + ' '.join(f'{x:.1f}' for x in iou_list)) 31 | logger.info(f'mIoU: {miou:.1f}') 32 | return miou, iou_list 33 | 34 | 35 | def evaluate_offset_mae(pred_list, gt_list, gt_instance_list, ignore_label=-100, logger=None): 36 | gt = np.concatenate(gt_list, axis=0) 37 | pred = np.concatenate(pred_list, axis=0) 38 | gt_instance = np.concatenate(gt_instance_list, axis=0) 39 | pos_inds = gt_instance != ignore_label 40 | gt = gt[pos_inds] 41 | pred = pred[pos_inds] 42 | mae = np.abs(gt - pred).sum() / pos_inds.sum() 43 | logger.info(f'Offset MAE: {mae:.3f}') 44 | return mae -------------------------------------------------------------------------------- /tools/eval_utils/save_utils.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | from pcseg.models.model_utils.rle_utils import rle_decode 6 | 7 | 8 | def save_npy(root, name, scan_ids, arrs): 9 | root = osp.join(root, name) 10 | os.makedirs(root, exist_ok=True) 11 | paths = [osp.join(root, f'{i}.npy') for i in scan_ids] 12 | pool = mp.Pool() 13 | pool.starmap(np.save, zip(paths, arrs)) 14 | pool.close() 15 | pool.join() 16 | 17 | 18 | def save_single_instance(root, scan_id, insts, nyu_id=None): 19 | f = open(osp.join(root, f'{scan_id}.txt'), 'w') 20 | os.makedirs(osp.join(root, 'predicted_masks'), exist_ok=True) 21 | for i, inst in enumerate(insts): 22 | # assert scan_id == inst['scan_id'] 23 | label_id = inst['label_id'] 24 | # scannet dataset use nyu_id for evaluation 25 | if nyu_id is not None: 26 | label_id = nyu_id[label_id - 1] 27 | conf = inst['conf'] 28 | f.write(f'predicted_masks/{scan_id}_{i:03d}.txt {label_id} {conf:.4f}\n') 29 | mask_path = osp.join(root, 'predicted_masks', f'{scan_id}_{i:03d}.txt') 30 | mask = rle_decode(inst['pred_mask']) 31 | np.savetxt(mask_path, mask, fmt='%d') 32 | f.close() 33 | 34 | 35 | def save_pred_instances(root, name, scan_ids, pred_insts, nyu_id=None): 36 | root = osp.join(root, name) 37 | os.makedirs(root, exist_ok=True) 38 | roots = [root] * len(scan_ids) 39 | nyu_ids = [nyu_id] * len(scan_ids) 40 | pool = mp.Pool() 41 | pool.starmap(save_single_instance, zip(roots, scan_ids, pred_insts, nyu_ids)) 42 | pool.close() 43 | pool.join() 44 | 45 | 46 | def save_gt_instance(path, gt_inst, nyu_id=None): 47 | if nyu_id is not None: 48 | sem = gt_inst // 1000 49 | ignore = sem == 0 50 | ins = gt_inst % 1000 51 | nyu_id = np.array(nyu_id) 52 | sem = nyu_id[sem - 1] 53 | sem[ignore] = 0 54 | gt_inst = sem * 1000 + ins 55 | np.savetxt(path, gt_inst, fmt='%d') 56 | 57 | 58 | def save_gt_instances(root, name, scan_ids, gt_insts, nyu_id=None): 59 | root = osp.join(root, name) 60 | os.makedirs(root, exist_ok=True) 61 | paths = [osp.join(root, f'{i}.txt') for i in scan_ids] 62 | pool = mp.Pool() 63 | nyu_ids = [nyu_id] * len(scan_ids) 64 | pool.starmap(save_gt_instance, zip(paths, gt_insts, nyu_ids)) 65 | pool.close() 66 | pool.join() 67 | -------------------------------------------------------------------------------- /tools/process_tools/__pycache__/generate_caption.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption.cpython-38.pyc -------------------------------------------------------------------------------- /tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc -------------------------------------------------------------------------------- /tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc -------------------------------------------------------------------------------- /tools/process_tools/combine_multiple_caption_files.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import tqdm 4 | import pickle 5 | 6 | 7 | def write_caption_to_file(data, path): 8 | with open(path, 'w') as f: 9 | json.dump(data, f) 10 | 11 | print(f'The caption is dump to {path}') 12 | 13 | 14 | def replace_dict_keys_with_new_keys(origin_dict, new_key_list): 15 | curr_key_list = list(origin_dict.keys()) 16 | new_dict = {} 17 | for i, key in enumerate(curr_key_list): 18 | new_dict[new_key_list[i]] = origin_dict[key] 19 | 20 | return new_dict 21 | 22 | 23 | def merge_captions_with_path_list(caption_path_list, caption_save_path): 24 | new_caption = {} 25 | scene_caption_num = {} 26 | 27 | for caption_path in caption_path_list: 28 | current_caption = json.load(open(caption_path, 'r')) 29 | for scene_name, curr_scene_caption in tqdm.tqdm(current_caption.items(), total=len(current_caption)): 30 | counter = scene_caption_num[scene_name] if scene_name in scene_caption_num else 0 31 | 32 | image_name_list = [f'{counter + i}' for i in range(len(curr_scene_caption))] 33 | new_scene_caption = replace_dict_keys_with_new_keys(curr_scene_caption, image_name_list) 34 | if scene_name in new_caption: 35 | new_caption[scene_name].update(new_scene_caption) 36 | else: 37 | new_caption[scene_name] = new_scene_caption 38 | 39 | counter += len(curr_scene_caption) 40 | scene_caption_num[scene_name] = counter 41 | 42 | write_caption_to_file(new_caption, caption_save_path) 43 | 44 | 45 | def merge_caption_idx_with_path_list(caption_idx_path_list, caption_idx_save_path): 46 | new_caption_idx = [] 47 | caption_idx_list = [] 48 | for caption_idx_path in caption_idx_path_list: 49 | caption_idx = pickle.load(open(caption_idx_path, 'rb')) 50 | caption_idx_list.append(caption_idx) 51 | 52 | for i in tqdm.tqdm(range(len(caption_idx_list[0]))): 53 | scene_caption = {} 54 | scene_caption_infos = {} 55 | counter = 0 56 | for _, caption_idx in enumerate(caption_idx_list): 57 | if 'scene_name' not in scene_caption: 58 | scene_caption['scene_name'] = caption_idx[i]['scene_name'] 59 | 60 | new_image_name_list = [f'{counter + i}' for i in range(len(caption_idx[i]['infos']))] 61 | new_scene_caption_idx = replace_dict_keys_with_new_keys(caption_idx[i]['infos'], new_image_name_list) 62 | 63 | scene_caption_infos.update(new_scene_caption_idx) 64 | counter += len(caption_idx[i]['infos']) 65 | 66 | scene_caption['infos'] = scene_caption_infos 67 | new_caption_idx.append(scene_caption) 68 | 69 | with open(caption_idx_save_path, 'wb') as f: 70 | pickle.dump(new_caption_idx, f) 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser('') 75 | parser.add_argument('--caption_path_list', 76 | default=['data/nuscenes/text_embed/caption_basic_crop_nuscenes_v1.0-mini_vit-gpt2-image' 77 | '-captioning_w400-500_overlap0.3.json', 78 | 'data/nuscenes/text_embed/caption_detic_crop_cap_nuscenes_v1.0-mini_vit-gpt2-image' 79 | '-captioning_.json'], 80 | type=list, help='') 81 | parser.add_argument('--caption_idx_path_list', 82 | default=['data/nuscenes/v1.0-mini/nuscenes_caption_idx_basic_crop.pkl', 83 | 'data/nuscenes/v1.0-mini/nuscenes_caption_idx_detic_crop_cap.pkl'], 84 | type=list, help='') 85 | parser.add_argument('--caption_save_path', required=True, type=str, help='') 86 | parser.add_argument('--caption_idx_save_path', required=True, type=str, help='') 87 | 88 | args = parser.parse_args() 89 | 90 | print('Start to merge captions ........') 91 | merge_captions_with_path_list(args.caption_path_list, args.caption_save_path) 92 | print('Finish merging captions ........') 93 | 94 | print('Start to merge captions idx file ........') 95 | merge_caption_idx_with_path_list(args.caption_idx_path_list, args.caption_idx_save_path) 96 | print('Finish merging captions idx file ........') 97 | 98 | -------------------------------------------------------------------------------- /tools/process_tools/filter_caption_without_points.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | import json 4 | import tqdm 5 | 6 | 7 | def write_caption_to_file(data, path): 8 | with open(path, 'w') as f: 9 | json.dump(data, f) 10 | 11 | print(f'The caption is dump to {path}') 12 | 13 | 14 | def filter_captions_without_points(caption_info, caption_idx_info): 15 | for idx, scene_caption_idx_info in tqdm.tqdm(enumerate(caption_idx_info), total=len(caption_idx_info)): 16 | scene_name = scene_caption_idx_info['scene_name'] 17 | scene_caption_idx = scene_caption_idx_info['infos'] 18 | scene_captions = caption_info[scene_name] 19 | 20 | image_name_list = list(scene_caption_idx.keys()) 21 | for image_name in image_name_list: 22 | image_caption_idx = scene_caption_idx[image_name] 23 | if image_caption_idx.shape[0] == 0: 24 | scene_caption_idx.pop(image_name) 25 | scene_captions.pop(image_name) 26 | 27 | write_caption_to_file(caption_info, args.save_caption_info_path) 28 | with open(args.save_caption_idx_info_path, 'wb') as f: 29 | pickle.dump(caption_idx_info, f) 30 | 31 | 32 | if __name__ == '__main__': 33 | parser = argparse.ArgumentParser('') 34 | parser.add_argument('--caption_info_path', type=str, help='') 35 | parser.add_argument('--caption_idx_info_path', type=str, help='') 36 | 37 | parser.add_argument('--save_caption_info_path', type=str, help='') 38 | parser.add_argument('--save_caption_idx_info_path', type=str, help='') 39 | 40 | global args 41 | args = parser.parse_args() 42 | 43 | caption_info = json.load(open(args.caption_info_path, 'r')) 44 | caption_idx_info = pickle.load(open(args.caption_idx_info_path, 'rb')) 45 | 46 | filter_captions_without_points(caption_info, caption_idx_info) 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /tools/process_tools/generate_category_embedding.py: -------------------------------------------------------------------------------- 1 | import os 2 | import clip 3 | import torch 4 | from transformers import AutoTokenizer, AutoModel 5 | 6 | from pcseg.models.text_networks.text_models import get_clip_model 7 | 8 | 9 | class_names = { 10 | 'scannet': ['wall', 'floor', 'cabinet', 'bed', 'chair', 11 | 'sofa', 'table', 'door', 'window', 'bookshelf', 12 | 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 13 | 'showercurtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'], 14 | 's3dis': ['ceiling', 'floor', 'wall', 'beam', 'column', 15 | 'window', 'door', 'table', 'chair', 'sofa', 16 | 'bookcase', 'board', 'clutter'] 17 | } 18 | 19 | 20 | def construct_input_from_class_name(input, tokenizer): 21 | inputs = tokenizer(input, return_tensors="pt", padding=True) 22 | return inputs 23 | 24 | 25 | def get_embedding(args): 26 | if args.model.startswith('clip'): 27 | backbone_name = args.model[5:] 28 | input = class_names[args.dataset] 29 | _, model = get_clip_model(backbone_name) 30 | model = model.cuda() 31 | text = clip.tokenize(input).cuda() 32 | output = model.encode_text(text) 33 | print(output.shape) 34 | else: 35 | tokenizer = AutoTokenizer.from_pretrained(args.model) 36 | model = AutoModel.from_pretrained(args.model) 37 | 38 | inputs = construct_input_from_class_name(class_names[args.dataset], tokenizer) 39 | outputs = model(**inputs) 40 | output = outputs.pooler_output 41 | print(outputs.pooler_output.shape) 42 | 43 | return output 44 | 45 | 46 | if __name__ == '__main__': 47 | import argparse 48 | 49 | parser = argparse.ArgumentParser('language model') 50 | parser.add_argument('--model', default='clip-ViT-B/16', type=str, help='language model name') 51 | parser.add_argument('--dataset_path', default='../data/scannetv2', type=str, help='language model name') 52 | parser.add_argument('--dataset', default='scannet', type=str, help='dataset name') 53 | args = parser.parse_args() 54 | 55 | category_embedding = get_embedding(args) 56 | 57 | file_name = '{}_{}_{}_text_embed.pth'.format( 58 | args.dataset, len(class_names[args.dataset]), args.model.replace('/', '') 59 | ) 60 | save_dir = os.path.join(args.dataset_path, 'text_embed') 61 | os.makedirs(save_dir, exist_ok=True) 62 | save_path = os.path.join(save_dir, file_name) 63 | 64 | torch.save(category_embedding, save_path) 65 | print("Saving category embedding into: ", save_path) 66 | -------------------------------------------------------------------------------- /tools/scripts/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | NGPUS=$1 5 | PY_ARGS=${@:2} 6 | 7 | while true 8 | do 9 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) 10 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" 11 | if [ "${status}" != "0" ]; then 12 | break; 13 | fi 14 | done 15 | echo $PORT 16 | 17 | ulimit -n 64000 18 | 19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} test.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS} 20 | 21 | -------------------------------------------------------------------------------- /tools/scripts/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | NGPUS=$1 5 | PY_ARGS=${@:2} 6 | 7 | while true 8 | do 9 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 )) 10 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)" 11 | if [ "${status}" != "0" ]; then 12 | break; 13 | fi 14 | done 15 | echo $PORT 16 | 17 | ulimit -n 64000 18 | 19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} train.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS} 20 | 21 | -------------------------------------------------------------------------------- /tools/train_utils/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import torch.optim.lr_scheduler as lr_sched 6 | import numpy as np 7 | 8 | from .fastai_optim import OptimWrapper 9 | from .learning_schedules_fastai import CosineWarmupLR, OneCycle 10 | 11 | 12 | def build_optimizer(model, optim_cfg): 13 | if optim_cfg.OPTIMIZER == 'adam': 14 | optimizer = optim.Adam(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY) 15 | elif optim_cfg.OPTIMIZER == 'sgd': 16 | optimizer = optim.SGD( 17 | model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY, 18 | momentum=optim_cfg.MOMENTUM 19 | ) 20 | elif optim_cfg.OPTIMIZER == 'adamw': 21 | optimizer = optim.AdamW(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY) 22 | elif optim_cfg.OPTIMIZER == 'adam_onecycle': 23 | def children(m: nn.Module): 24 | return list(m.children()) 25 | 26 | def num_children(m: nn.Module) -> int: 27 | return len(children(m)) 28 | 29 | flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m] 30 | get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))] 31 | 32 | optimizer_func = partial(optim.Adam, betas=(0.9, 0.99)) 33 | optimizer = OptimWrapper.create( 34 | optimizer_func, 3e-3, get_layer_groups(model), wd=optim_cfg.WEIGHT_DECAY, true_wd=True, bn_wd=True 35 | ) 36 | else: 37 | raise NotImplementedError 38 | 39 | return optimizer 40 | 41 | 42 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg): 43 | total_steps = total_iters_each_epoch * total_epochs 44 | 45 | if optim_cfg.SCHEDULER == 'poly': 46 | lr_scheduler = PolyLR(optimizer, max_iter=total_steps, power=optim_cfg.POWER) 47 | elif optim_cfg.OPTIMIZER == 'adam_onecycle': 48 | lr_scheduler = OneCycle( 49 | optimizer, total_steps, optim_cfg.LR, list(optim_cfg.MOMS), optim_cfg.DIV_FACTOR, optim_cfg.PCT_START 50 | ) 51 | else: 52 | lr_scheduler = None 53 | 54 | return lr_scheduler 55 | 56 | 57 | class LambdaStepLR(lr_sched.LambdaLR): 58 | def __init__(self, optimizer, lr_lambda, last_step=-1): 59 | super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step) 60 | 61 | @property 62 | def last_step(self): 63 | """Use last_epoch for the step counter""" 64 | return self.last_epoch 65 | 66 | @last_step.setter 67 | def last_step(self, v): 68 | self.last_epoch = v 69 | 70 | 71 | class PolyLR(LambdaStepLR): 72 | """DeepLab learning rate policy""" 73 | def __init__(self, optimizer, max_iter, power=0.9, last_step=-1): 74 | super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step) 75 | 76 | 77 | class CosLR(LambdaStepLR): 78 | """Runyu's LR policy""" 79 | def __init__(self, optimizer, cos_lambda_func, last_step=-1): 80 | super(CosLR, self).__init__(optimizer, cos_lambda_func, last_step) 81 | 82 | 83 | def cosine_lr_after_step(optimizer, base_lr, epoch, step_epoch, total_epochs, clip=1e-6): 84 | if epoch < step_epoch: 85 | lr = base_lr 86 | else: 87 | lr = clip + 0.5 * (base_lr - clip) * \ 88 | (1 + np.cos(np.pi * ((epoch - step_epoch) / (total_epochs - step_epoch)))) 89 | 90 | for param_group in optimizer.param_groups: 91 | param_group['lr'] = lr 92 | 93 | 94 | def adjust_lr(optim_cfg, optimizer, scheduler, total_epochs, total_iters_per_epoch, epoch, iter, accumulated_iter, no_step=False): 95 | # adjust learning rate 96 | if optim_cfg.SCHEDULER == 'cos': 97 | max_iter = total_iters_per_epoch * total_epochs 98 | cos_learning_rate( 99 | optimizer, optim_cfg.LR, epoch * total_iters_per_epoch + iter + 1, max_iter, 0, 0) 100 | elif optim_cfg.SCHEDULER == 'cos_after_step': 101 | cosine_lr_after_step(optimizer, optim_cfg.LR, epoch, optim_cfg.STEP_EPOCH, total_epochs) 102 | elif optim_cfg.SCHEDULER in ['adam_onecycle', 'poly']: 103 | assert scheduler is not None 104 | if not no_step: 105 | scheduler.step(accumulated_iter) 106 | elif optim_cfg.SCHEDULER in ['multistep']: 107 | pass 108 | else: 109 | raise NotImplementedError 110 | -------------------------------------------------------------------------------- /tools/train_utils/optimization/learning_schedules_fastai.py: -------------------------------------------------------------------------------- 1 | # This file is modified from https://github.com/traveller59/second.pytorch 2 | 3 | import math 4 | from functools import partial 5 | 6 | import numpy as np 7 | import torch.optim.lr_scheduler as lr_sched 8 | 9 | from .fastai_optim import OptimWrapper 10 | 11 | 12 | class LRSchedulerStep(object): 13 | def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases, 14 | mom_phases): 15 | # if not isinstance(fai_optimizer, OptimWrapper): 16 | # raise TypeError('{} is not a fastai OptimWrapper'.format( 17 | # type(fai_optimizer).__name__)) 18 | self.optimizer = fai_optimizer 19 | self.total_step = total_step 20 | self.lr_phases = [] 21 | 22 | for i, (start, lambda_func) in enumerate(lr_phases): 23 | if len(self.lr_phases) != 0: 24 | assert self.lr_phases[-1][0] < start 25 | if isinstance(lambda_func, str): 26 | lambda_func = eval(lambda_func) 27 | if i < len(lr_phases) - 1: 28 | self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func)) 29 | else: 30 | self.lr_phases.append((int(start * total_step), total_step, lambda_func)) 31 | assert self.lr_phases[0][0] == 0 32 | self.mom_phases = [] 33 | for i, (start, lambda_func) in enumerate(mom_phases): 34 | if len(self.mom_phases) != 0: 35 | assert self.mom_phases[-1][0] < start 36 | if isinstance(lambda_func, str): 37 | lambda_func = eval(lambda_func) 38 | if i < len(mom_phases) - 1: 39 | self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func)) 40 | else: 41 | self.mom_phases.append((int(start * total_step), total_step, lambda_func)) 42 | assert self.mom_phases[0][0] == 0 43 | 44 | def step(self, step): 45 | for start, end, func in self.lr_phases: 46 | if step >= start: 47 | self.optimizer.lr = func((step - start) / (end - start)) 48 | for start, end, func in self.mom_phases: 49 | if step >= start: 50 | self.optimizer.mom = func((step - start) / (end - start)) 51 | 52 | 53 | def annealing_cos(start, end, pct): 54 | # print(pct, start, end) 55 | "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." 56 | cos_out = np.cos(np.pi * pct) + 1 57 | return end + (start - end) / 2 * cos_out 58 | 59 | 60 | class OneCycle(LRSchedulerStep): 61 | def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor, 62 | pct_start): 63 | self.lr_max = lr_max 64 | self.moms = moms 65 | self.div_factor = div_factor 66 | self.pct_start = pct_start 67 | a1 = int(total_step * self.pct_start) 68 | a2 = total_step - a1 69 | low_lr = self.lr_max / self.div_factor 70 | lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)), 71 | (self.pct_start, 72 | partial(annealing_cos, self.lr_max, low_lr / 1e4))) 73 | mom_phases = ((0, partial(annealing_cos, *self.moms)), 74 | (self.pct_start, partial(annealing_cos, 75 | *self.moms[::-1]))) 76 | fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0] 77 | super().__init__(fai_optimizer, total_step, lr_phases, mom_phases) 78 | 79 | 80 | class CosineWarmupLR(lr_sched._LRScheduler): 81 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): 82 | self.T_max = T_max 83 | self.eta_min = eta_min 84 | super(CosineWarmupLR, self).__init__(optimizer, last_epoch) 85 | 86 | def get_lr(self): 87 | return [self.eta_min + (base_lr - self.eta_min) * 88 | (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2 89 | for base_lr in self.base_lrs] 90 | 91 | 92 | class FakeOptim: 93 | def __init__(self): 94 | self.lr = 0 95 | self.mom = 0 96 | 97 | 98 | if __name__ == "__main__": 99 | import matplotlib.pyplot as plt 100 | 101 | opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10 102 | schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1) 103 | 104 | lrs = [] 105 | moms = [] 106 | for i in range(100): 107 | schd.step(i) 108 | lrs.append(opt.lr) 109 | moms.append(opt.mom) 110 | plt.plot(lrs) 111 | # plt.plot(moms) 112 | plt.show() 113 | plt.plot(moms) 114 | plt.show() 115 | -------------------------------------------------------------------------------- /tools/visual_utils/indoor_utils/ply_utils.py: -------------------------------------------------------------------------------- 1 | from plyfile import PlyData 2 | import numpy as np 3 | 4 | 5 | def read_ply(path): 6 | plydata = PlyData.read(path) 7 | num_verts = plydata['vertex'].count 8 | 9 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 10 | vertices[:, 0] = plydata['vertex']['x'] 11 | vertices[:, 1] = plydata['vertex']['y'] 12 | vertices[:, 2] = plydata['vertex']['z'] 13 | 14 | rgb = np.zeros(shape=[num_verts, 3], dtype=np.float32) 15 | rgb[:, 0] = plydata['vertex']['red'] 16 | rgb[:, 1] = plydata['vertex']['green'] 17 | rgb[:, 2] = plydata['vertex']['blue'] 18 | alpha = np.array(plydata['vertex']['alpha']) 19 | 20 | face_indices = plydata['face']['vertex_indices'] 21 | 22 | return vertices, rgb, alpha, face_indices 23 | 24 | 25 | def write_ply(output_file, data_dict): 26 | verts, colors = data_dict['xyz'], data_dict['rgb'] 27 | if 'indices' not in data_dict: 28 | data_dict['indices'] = [] 29 | 30 | file = open(output_file, 'w') 31 | file.write('ply \n') 32 | file.write('format ascii 1.0\n') 33 | file.write('element vertex {:d}\n'.format(len(verts))) 34 | file.write('property float x\n') 35 | file.write('property float y\n') 36 | file.write('property float z\n') 37 | file.write('property uchar red\n') 38 | file.write('property uchar green\n') 39 | file.write('property uchar blue\n') 40 | if 'alpha' in data_dict: 41 | file.write('property uchar alpha\n') 42 | file.write('element face {:d}\n'.format(len(data_dict['indices']))) 43 | file.write('property list uchar uint vertex_indices\n') 44 | file.write('end_header\n') 45 | 46 | if 'alpha' in data_dict: 47 | for vert, color, a in zip(verts, colors, data_dict['alpha']): 48 | file.write('{:f} {:f} {:f} {:d} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2], 49 | int(color[0]), 50 | int(color[1]), 51 | int(color[2]), 52 | int(a))) 53 | else: 54 | for vert, color in zip(verts, colors): 55 | file.write('{:f} {:f} {:f} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2], 56 | int(color[0]), 57 | int(color[1]), 58 | int(color[2]))) 59 | for ind in data_dict['indices']: 60 | file.write('3 {:d} {:d} {:d}\n'.format(ind[0], ind[1], ind[2])) 61 | file.close() 62 | --------------------------------------------------------------------------------