├── LICENSE
├── README.md
├── assets
├── scene_0019.gif
├── scene_0025.gif
├── scene_003.gif
├── scene_005.gif
└── scene_0164.gif
├── docs
├── DATASET.md
├── INFER.md
├── INSTALL.md
├── MODEL.md
├── association_module.png
└── framework.png
├── pcseg
├── __init__.py
├── config.py
├── datasets
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── dataset.cpython-38.pyc
│ │ └── indoor_dataset.cpython-38.pyc
│ ├── augmentor
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── augmentor_utils.cpython-38.pyc
│ │ │ └── data_augmentor.cpython-38.pyc
│ │ ├── augmentor_utils.py
│ │ └── data_augmentor.py
│ ├── dataset.py
│ ├── indoor_dataset.py
│ ├── processor
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── data_processor.cpython-38.pyc
│ │ └── data_processor.py
│ ├── s3dis
│ │ ├── __pycache__
│ │ │ └── s3dis_dataset.cpython-38.pyc
│ │ ├── meta
│ │ │ ├── all_data_label.txt
│ │ │ ├── anno_paths.txt
│ │ │ ├── area6_data_label.txt
│ │ │ └── class_names.txt
│ │ ├── preprocess.py
│ │ └── s3dis_dataset.py
│ └── scannet
│ │ ├── __pycache__
│ │ └── scannet_dataset.cpython-38.pyc
│ │ └── scannet_dataset.py
├── external_libs
│ └── softgroup_ops
│ │ ├── ops
│ │ ├── __init__.py
│ │ ├── functions.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── bfs_cluster
│ │ │ ├── bfs_cluster.cpp
│ │ │ ├── bfs_cluster.cu
│ │ │ └── bfs_cluster.h
│ │ │ ├── cal_iou_and_masklabel
│ │ │ ├── cal_iou_and_masklabel.cpp
│ │ │ ├── cal_iou_and_masklabel.cu
│ │ │ └── cal_iou_and_masklabel.h
│ │ │ ├── cuda.cu
│ │ │ ├── cuda_utils.h
│ │ │ ├── datatype
│ │ │ ├── datatype.cpp
│ │ │ └── datatype.h
│ │ │ ├── roipool
│ │ │ ├── roipool.cpp
│ │ │ ├── roipool.cu
│ │ │ └── roipool.h
│ │ │ ├── sec_mean
│ │ │ ├── sec_mean.cpp
│ │ │ ├── sec_mean.cu
│ │ │ └── sec_mean.h
│ │ │ ├── softgroup_api.cpp
│ │ │ ├── softgroup_ops.cpp
│ │ │ ├── softgroup_ops.h
│ │ │ └── voxelize
│ │ │ ├── voxelize.cpp
│ │ │ ├── voxelize.cu
│ │ │ └── voxelize.h
│ │ ├── setup.cfg
│ │ └── setup.py
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-38.pyc
│ ├── adapter
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── vl_adapter.cpython-38.pyc
│ │ └── vl_adapter.py
│ ├── head
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── binary_head.cpython-38.pyc
│ │ │ ├── caption_head.cpython-38.pyc
│ │ │ ├── inst_head.cpython-38.pyc
│ │ │ ├── linear_head.cpython-38.pyc
│ │ │ └── text_seg_head.cpython-38.pyc
│ │ ├── binary_head.py
│ │ ├── caption_head.py
│ │ ├── inst_head.py
│ │ ├── linear_head.py
│ │ └── text_seg_head.py
│ ├── model_utils
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── basic_block_1d.cpython-38.pyc
│ │ │ ├── fp16.cpython-38.pyc
│ │ │ ├── rle_utils.cpython-38.pyc
│ │ │ └── unet_blocks.cpython-38.pyc
│ │ ├── basic_block_1d.py
│ │ ├── basic_block_2d.py
│ │ ├── fp16.py
│ │ ├── rle_utils.py
│ │ └── unet_blocks.py
│ ├── text_networks
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── prompt_template.cpython-38.pyc
│ │ │ └── text_models.cpython-38.pyc
│ │ ├── prompt_template.py
│ │ └── text_models.py
│ ├── vision_backbones_3d
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── spconv_unet_indoor.cpython-38.pyc
│ │ ├── spconv_unet_indoor.py
│ │ └── vfe
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── indoor_vfe.cpython-38.pyc
│ │ │ └── vfe_template.cpython-38.pyc
│ │ │ ├── indoor_vfe.py
│ │ │ └── vfe_template.py
│ └── vision_networks
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── network_template.cpython-38.pyc
│ │ └── sparseunet_textseg.cpython-38.pyc
│ │ ├── network_template.py
│ │ ├── sparseunet.py
│ │ └── sparseunet_textseg.py
└── utils
│ ├── __init__.py
│ ├── arnold_utils.py
│ ├── caption_utils.py
│ ├── common_utils.py
│ ├── commu_utils.py
│ ├── loss_utils.py
│ ├── metric_utils.py
│ ├── spconv_utils.py
│ └── voxelize_utils.py
├── requirements.txt
├── setup.py
└── tools
├── _init_path.py
├── cfgs
├── dataset_configs
│ ├── s3dis_dataset.yaml
│ ├── s3dis_dataset_image.yaml
│ ├── scannet_dataset.yaml
│ └── scannet_dataset_image.yaml
├── s3dis_models
│ ├── inst
│ │ ├── softgroup_clip_adamw.yaml
│ │ ├── softgroup_clip_base6_caption_adamw.yaml
│ │ └── softgroup_clip_base8_caption_adamw.yaml
│ ├── spconv_clip_adamw.yaml
│ ├── spconv_clip_base6_caption_adamw.yaml
│ └── spconv_clip_base8_caption_adamw.yaml
└── scannet_models
│ ├── inst
│ ├── softgroup_clip_adamw.yaml
│ ├── softgroup_clip_base10_caption_adamw.yaml
│ ├── softgroup_clip_base13_caption_adamw.yaml
│ ├── softgroup_clip_base8_caption_adamw.yaml
│ └── softgroup_clip_openvocab_test.yaml
│ ├── spconv_clip_adamw.yaml
│ ├── spconv_clip_base10_caption_adamw.yaml
│ ├── spconv_clip_base12_caption_adamw.yaml
│ ├── spconv_clip_base15_caption_adamw.yaml
│ └── spconv_clip_openvocab_test.yaml
├── eval_utils
├── eval_utils.py
├── inst_eval
│ ├── __init__.py
│ ├── eval_utils.py
│ ├── instance_eval_utils.py
│ └── pointwise_eval_utils.py
└── save_utils.py
├── process_tools
├── __pycache__
│ ├── generate_caption.cpython-38.pyc
│ ├── generate_caption_idx.cpython-310.pyc
│ └── generate_caption_idx.cpython-38.pyc
├── combine_multiple_caption_files.py
├── filter_caption_without_points.py
├── generate_caption.py
├── generate_caption_idx.py
└── generate_category_embedding.py
├── scripts
├── dist_test.sh
└── dist_train.sh
├── test.py
├── train.py
├── train_utils
├── optimization
│ ├── __init__.py
│ ├── fastai_optim.py
│ └── learning_schedules_fastai.py
└── train_utils.py
└── visual_utils
├── indoor_utils
├── color_utils.py
└── ply_utils.py
├── open3d_vis_utils.py
├── visualize_indoor.py
└── visualize_utils.py
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
PLA & RegionPLC
4 |
This repo contains the official implementation of PLA (CVPR2023) and RegionPLC (CVPR 2024)
5 |
6 |
7 |
8 |
PLA: Language-Driven Open-Vocabulary 3D Scene Understanding
9 |
10 |
18 |
19 |
CVPR 2023
20 |
21 | [project page](https://dingry.github.io/projects/PLA) | [arXiv](https://arxiv.org/abs/2211.16312)
22 |
23 |
24 |
25 |
RegionPLC: Regional Point-Language Contrastive Learning for Open-World 3D Scene Understanding
26 |
27 |
34 |
CVPR 2024
35 |
36 |
project page | arXiv
37 |
38 |
39 |
40 | ##### Highlights:
41 | - Official PLA implementation is contained in the `main` branch
42 | - Official RegionPLC implementation is contained in the `regionplc` branch
43 |
44 | ### Release
45 | - [2024-05-05] Releasing **RegionPLC** implementation. Please checkout `regionplc` branch to try it!
46 |
47 | ### Getting Started
48 |
49 | #### Installation
50 | Please refer to [INSTALL.md](docs/INSTALL.md) for the installation.
51 |
52 | #### Dataset Preparation
53 | Please refer to [DATASET.md](docs/DATASET.md) for dataset preparation.
54 |
55 | #### Training & Inference
56 |
57 | Please refer to [MODEL.md](docs/MODEL.md) for training and inference scripts and pretrained models.
58 |
59 |
60 | ### Citation
61 | If you find this project useful in your research, please consider cite:
62 | ```bibtex
63 | @inproceedings{ding2022language,
64 | title={PLA: Language-Driven Open-Vocabulary 3D Scene Understanding},
65 | author={Ding, Runyu and Yang, Jihan and Xue, Chuhui and Zhang, Wenqing and Bai, Song and Qi, Xiaojuan},
66 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
67 | year={2023}
68 | }
69 | ```
70 |
71 | ```bibtex
72 | @inproceedings{yang2024regionplc,
73 | title={RegionPLC: Regional point-language contrastive learning for open-world 3d scene understanding},
74 | author={Yang, Jihan and Ding, Runyu and Deng, Weipeng and Wang, Zhe and Qi, Xiaojuan},
75 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
76 | year={2024}
77 | }
78 | ```
79 |
80 | ### Acknowledgement
81 | Code is partly borrowed from [OpenPCDet](https://github.com/open-mmlab/OpenPCDet), [PointGroup](https://github.com/dvlab-research/PointGroup) and [SoftGroup](https://github.com/thangvubk/SoftGroup).
--------------------------------------------------------------------------------
/assets/scene_0019.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0019.gif
--------------------------------------------------------------------------------
/assets/scene_0025.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0025.gif
--------------------------------------------------------------------------------
/assets/scene_003.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_003.gif
--------------------------------------------------------------------------------
/assets/scene_005.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_005.gif
--------------------------------------------------------------------------------
/assets/scene_0164.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0164.gif
--------------------------------------------------------------------------------
/docs/DATASET.md:
--------------------------------------------------------------------------------
1 | The dataset configs are located within [tools/cfgs/dataset_configs](../tools/cfgs/dataset_configs), and the model configs are located within [tools/cfgs](../tools/cfgs) for different settings.
2 |
3 | #### ScanNet Dataset
4 | - Please download the [ScanNet Dataset](http://www.scan-net.org/) and follow [PointGroup](https://github.com/dvlab-research/PointGroup/blob/master/dataset/scannetv2/prepare_data_inst.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD).
5 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD). If you want to generate captions on your own, please download image data ([scannet_frames_25k]((http://www.scan-net.org/))) from ScanNet and follow scripts [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py).
6 |
7 | - The directory organization should be as follows:
8 |
9 | ```
10 | PLA
11 | ├── data
12 | │ ├── scannetv2
13 | │ │ │── train
14 | │ │ │ │── scene0000_00.pth
15 | │ │ │ │── ...
16 | │ │ │── val
17 | │ │ │── text_embed
18 | │ │ │── caption_idx
19 | │ │ │── scannetv2_train.txt
20 | │ │ │── scannetv2_val.txt
21 | │ │ │—— scannet_frames_25k (optional, only for caption generation)
22 | ├── pcseg
23 | ├── tools
24 | ```
25 |
26 | #### S3DIS Dataset
27 | - Please download the [S3DIS Dataset](http://buildingparser.stanford.edu/dataset.html#Download) and follow [dataset/s3dis/preprocess.py](../dataset/s3dis/preprocess.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7).
28 | ```bash
29 | python3 pcseg/datasets/s3dis/preprocess.py
30 | ```
31 |
32 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7). If you want to generate captions on your own, please download image data [here](https://github.com/alexsax/2D-3D-Semantics) and follows scripts here: [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py).
33 |
34 | - The directory organization should be as follows:
35 |
36 | ```
37 | PLA
38 | ├── data
39 | │ ├── s3dis
40 | │ │ │── stanford_indoor3d_inst
41 | │ │ │ │── Area_1_Conference_1.npy
42 | │ │ │ │── ...
43 | │ │ │── text_embed
44 | │ │ │── caption_idx
45 | │ │ │—— s3dis_2d (optional, only for caption generation)
46 | ├── pcseg
47 | ├── tools
48 | ```
49 |
--------------------------------------------------------------------------------
/docs/INFER.md:
--------------------------------------------------------------------------------
1 | If you wish to test on custom 3D scenes or categories, you can utilize our example configs:
2 | `tools/cfgs/scannet_models/spconv_clip_openvocab.yaml` and `tools/cfgs/scannet_models/inst/softgroup_clip_openvocab.yaml`
3 |
4 | The key parameters to consider are as follows:
5 | - `TEXT_EMBED.CATEGORY_NAMES`
6 |
7 | This parameter allows you to define the category list for segmentation.
8 |
9 | - `TASK_HEAD.CORRECT_SEG_PRED_BINARY` and `INST_HEAD.CORRECT_SEG_PRED_BINARY`
10 |
11 | These parameters allow you to decide using binary head to rectify semantic scores or not.
12 |
13 |
14 | To save the results, you can use the command `--save_results semantic,instance`. Afterward, you can employ the visualization utilities found in tools/visual_utils/visualize_indoor.py to visualize the predicted results.
15 |
16 |
--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
1 | #### Requirements
2 | All the codes are tested in the following environment:
3 | - Python 3.7+
4 | - PyTorch 1.8
5 | - CUDA 11.1
6 | - [spconv v2.x](https://github.com/traveller59/spconv)
7 |
8 | #### Install dependent libraries
9 | a. Clone this repository.
10 | ```bash
11 | git clone https://github.com/CVMI-Lab/PLA.git
12 | ```
13 |
14 | b. Install the dependent libraries as follows:
15 |
16 | * Install the dependent Python libraries (Please note that you need to install the correct version of `torch` and `spconv` according to your CUDA version):
17 | ```bash
18 | pip install -r requirements.txt
19 | ```
20 |
21 | * Install [SoftGroup](https://github.com/thangvubk/SoftGroup) following its [official guidance](https://github.com/thangvubk/SoftGroup/blob/main/docs/installation.md).
22 | ```bash
23 | cd pcseg/external_libs/softgroup_ops
24 | python3 setup.py build_ext develop
25 | cd ../../..
26 | ```
27 |
28 | * Install [pcseg](../pcseg)
29 | ```bash
30 | python3 setup.py develop
31 | ```
32 |
--------------------------------------------------------------------------------
/docs/MODEL.md:
--------------------------------------------------------------------------------
1 | #### Training
2 |
3 | ```bash
4 | cd tools
5 | sh scripts/dist_train.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} ${PY_ARGS}
6 | ```
7 |
8 | For instance,
9 | - train B15/N4 semantic segmentation on ScanNet:
10 | ```bash
11 | cd tools
12 | sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --extra_tag exp_tag
13 | ```
14 | - train B13/N4 instance segmentation on ScanNet:
15 | ```bash
16 | cd tools
17 | sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml --extra_tag exp_tag
18 | ```
19 |
20 | #### Inference
21 |
22 | ```bash
23 | cd tools
24 | sh scripts/dist_test.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} --ckpt ${CKPT_PATH}
25 | ```
26 |
27 | For instance,
28 | - to test a B15/N4 model on ScanNet:
29 | ```bash
30 | cd tools
31 | sh scripts/dist_test.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --ckpt output/scannet_models/spconv_clip_base15_caption/exp_tag/ckpt/checkpoint_ep128.pth
32 | ```
33 |
34 | ### Model Zoo
35 | - semantic segmentation
36 |
37 | | Dataset | Partition | hIoU / mIoU(B) / mIoU(N) | Path |
38 | |:---:|:---:|:---:|:---:|
39 | | ScanNet | B15/N4 | 64.9 / 67.8 / 62.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef8xk_X0ortMjC0F8PBQl2wBacVPgO72La8h_ZTDsKj__Q?e=Uq6W8I) |
40 | | ScanNet | B12/N7 | 55.9 / 70.4 / 46.4 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EVl7SdeUEPFAvrj2xnWSb-sBCOtWYyVOwBo6ggFb9x7dNA?e=feZaxH) |
41 | | ScanNet | B10/N9 | 52.8 / 76.6 / 40.3 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef0P_6XraDpCo0RRgOJ1wGQB-xOW7T6lecvVRi5P90Edbw?e=hqrP8X) |
42 | | S3DIS | B8/N4 | 35.6 / 58.3 / 25.6 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EYIW4SNX5B9Go_LKiim1KFEB_abYv0bDZMggE_6Ifjau0g?e=8BD0K3) |
43 | | S3DIS | B6/N6 | 38.4 / 53.9 / 29.8 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EeNYtkS3pmhAvc3Hxj7__SwB8SMzZdzmljRtCYuYG8NHcA?e=aC0aE2) |
44 |
45 |
46 | - instance segmentation
47 |
48 | | Dataset | Partition | hAP50 / mAP50(B) / mAP50(N) | Path |
49 | |:---:|:---:|:---:|:---:|
50 | | ScanNet | B13/N4 | 57.8 / 58.7 / 56.9| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Eb4N2hfCevlBlBxWlK9DtioBP6RX7gtXUmY0Huu4MknUHA?e=YDydlj) |
51 | | ScanNet | B10/N7 | 31.6 / 54.8 / 22.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETsHZCFElvdCmk8ulRzBk-EBxm8fHk8rLJnpUdk9_n3i1Q?e=4SGy1N) |
52 | | ScanNet | B8/N9 | 36.9 / 63.1 / 26.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EXAaU8RDecJFn_1J2Q-IqdsBALbv-5d_L_RyIOrdIjB66g?e=c8dFD6) |
53 | | S3DIS | B8/N4 | 17.2 / 60.9 / 10.0| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETzzD-pEhvtMkJGnIxzgIP0Bk3f2He9_hkgfVtexEMFqpg?e=xJpaOV) |
54 | | S3DIS | B6/N6 |15.8 / 48.2 / 9.5| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EWoqIoBWfSRBqQwahLTKQGkB5Gwp8zs0EvT3MkGMDiBOrw?e=daBppj) |
55 |
56 |
--------------------------------------------------------------------------------
/docs/association_module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/association_module.png
--------------------------------------------------------------------------------
/docs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/framework.png
--------------------------------------------------------------------------------
/pcseg/__init__.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | from pathlib import Path
3 |
4 | from .version import __version__
5 |
6 | __all__ = [
7 | '__version__'
8 | ]
9 |
10 |
11 | def get_git_commit_number():
12 | if not (Path(__file__).parent / '../.git').exists():
13 | return '0000000'
14 |
15 | cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
16 | git_commit_number = cmd_out.stdout.decode('utf-8')[:7]
17 | return git_commit_number
18 |
19 |
20 | script_version = get_git_commit_number()
21 |
22 |
23 | if script_version not in __version__:
24 | __version__ = __version__ + '+py%s' % script_version
25 |
--------------------------------------------------------------------------------
/pcseg/config.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import yaml
4 | from easydict import EasyDict
5 |
6 |
7 | def log_config_to_file(cfg, pre='cfg', logger=None):
8 | for key, val in cfg.items():
9 | if isinstance(cfg[key], EasyDict):
10 | logger.info('\n%s.%s = edict()' % (pre, key))
11 | log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger)
12 | continue
13 | logger.info('%s.%s: %s' % (pre, key, val))
14 |
15 |
16 | def cfg_from_list(cfg_list, config):
17 | """Set config keys via list (e.g., from command line)."""
18 | from ast import literal_eval
19 | assert len(cfg_list) % 2 == 0
20 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
21 | key_list = k.split('.')
22 | d = config
23 | for subkey in key_list[:-1]:
24 | assert subkey in d, 'NotFoundKey: %s' % subkey
25 | d = d[subkey]
26 | subkey = key_list[-1]
27 | assert subkey in d, 'NotFoundKey: %s' % subkey
28 | try:
29 | value = literal_eval(v)
30 | except:
31 | value = v
32 |
33 | if type(value) != type(d[subkey]) and isinstance(d[subkey], EasyDict):
34 | key_val_list = value.split(',')
35 | for src in key_val_list:
36 | cur_key, cur_val = src.split(':')
37 | val_type = type(d[subkey][cur_key])
38 | cur_val = val_type(cur_val)
39 | d[subkey][cur_key] = cur_val
40 | elif type(value) != type(d[subkey]) and isinstance(d[subkey], list):
41 | val_list = value.split(',')
42 | for k, x in enumerate(val_list):
43 | val_list[k] = type(d[subkey][0])(x)
44 | d[subkey] = val_list
45 | else:
46 | assert type(value) == type(d[subkey]), \
47 | 'type {} does not match original type {}'.format(type(value), type(d[subkey]))
48 | d[subkey] = value
49 |
50 |
51 | def merge_new_config(config, new_config):
52 | if '_BASE_CONFIG_' in new_config:
53 | with open(new_config['_BASE_CONFIG_'], 'r') as f:
54 | try:
55 | yaml_config = yaml.safe_load(f, Loader=yaml.FullLoader)
56 | except:
57 | yaml_config = yaml.safe_load(f)
58 | # config.update(EasyDict(yaml_config))
59 | merge_new_config(config, yaml_config)
60 |
61 | for key, val in new_config.items():
62 | if not isinstance(val, dict):
63 | config[key] = val
64 | continue
65 | if key not in config:
66 | config[key] = EasyDict()
67 | merge_new_config(config[key], val)
68 |
69 | return config
70 |
71 |
72 | def cfg_from_yaml_file(cfg_file, config):
73 | with open(cfg_file, 'r') as f:
74 | try:
75 | new_config = yaml.safe_load(f, Loader=yaml.FullLoader)
76 | except:
77 | new_config = yaml.safe_load(f)
78 |
79 | merge_new_config(config=config, new_config=new_config)
80 |
81 | return config
82 |
83 |
84 | cfg = EasyDict()
85 | cfg.ROOT_DIR = (Path(__file__).resolve().parent / '../').resolve()
86 | cfg.LOCAL_RANK = 0
87 |
--------------------------------------------------------------------------------
/pcseg/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from functools import partial
3 | from torch.utils.data import DataLoader
4 | from torch.utils.data import DistributedSampler as _DistributedSampler
5 |
6 | from pcseg.utils import common_utils
7 |
8 | from .dataset import DatasetTemplate
9 | from .scannet.scannet_dataset import ScanNetDataset, ScanNetInstDataset
10 | from .s3dis.s3dis_dataset import S3DISDataset, S3DISInstDataset
11 |
12 |
13 | __all__ = {
14 | 'DatasetTemplate': DatasetTemplate,
15 | 'ScanNetDataset': ScanNetDataset,
16 | 'ScanNetInstDataset': ScanNetInstDataset,
17 | 'S3DISDataset': S3DISDataset,
18 | 'S3DISInstDataset': S3DISInstDataset
19 | }
20 |
21 |
22 | class DistributedSampler(_DistributedSampler):
23 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
24 | super().__init__(dataset, num_replicas=num_replicas, rank=rank)
25 | self.shuffle = shuffle
26 |
27 | def __iter__(self):
28 | if self.shuffle:
29 | g = torch.Generator()
30 | g.manual_seed(self.epoch)
31 | indices = torch.randperm(len(self.dataset), generator=g).tolist()
32 | else:
33 | indices = torch.arange(len(self.dataset)).tolist()
34 |
35 | indices += indices[:(self.total_size - len(indices))]
36 | assert len(indices) == self.total_size
37 |
38 | indices = indices[self.rank:self.total_size:self.num_replicas]
39 | assert len(indices) == self.num_samples
40 |
41 | return iter(indices)
42 |
43 |
44 | def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, seed=None,
45 | logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0,
46 | multi_epoch_loader=False):
47 |
48 | dataset = __all__[dataset_cfg.DATASET](
49 | dataset_cfg=dataset_cfg,
50 | class_names=class_names,
51 | root_path=root_path,
52 | training=training,
53 | logger=logger,
54 | )
55 |
56 | if merge_all_iters_to_one_epoch:
57 | assert hasattr(dataset, 'merge_all_iters_to_one_epoch')
58 | dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs)
59 |
60 | if dist:
61 | if training:
62 | sampler = torch.utils.data.distributed.DistributedSampler(dataset)
63 | else:
64 | rank, world_size = common_utils.get_dist_info()
65 | sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
66 | else:
67 | sampler = None
68 |
69 | if multi_epoch_loader:
70 | loader = MultiEpochsDataLoader
71 | else:
72 | loader = DataLoader
73 |
74 | dataloader = loader(
75 | dataset, batch_size=batch_size, pin_memory=True, num_workers=workers,
76 | shuffle=(sampler is None) and training, drop_last=training, sampler=sampler,
77 | collate_fn=getattr(dataset, dataset_cfg.COLLATE_FN),
78 | timeout=0, worker_init_fn=partial(common_utils.worker_init_fn, seed=seed)
79 | )
80 |
81 | return dataset, dataloader, sampler
82 |
83 |
84 | class _RepeatSampler(object):
85 | """ Sampler that repeats forever.
86 | Args:
87 | sampler (Sampler)
88 | """
89 |
90 | def __init__(self, sampler):
91 | self.sampler = sampler
92 |
93 | def __iter__(self):
94 | while True:
95 | yield from iter(self.sampler)
96 |
--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__init__.py
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/augmentor_utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | import scipy.ndimage
4 | import scipy.interpolate
5 | import scipy.stats
6 |
7 |
8 | def check_key(key):
9 | exist = key is not None
10 | if not exist:
11 | return False
12 | if isinstance(key, bool):
13 | enabled = key
14 | elif isinstance(key, dict):
15 | enabled = key.get('enabled', True)
16 | else:
17 | enabled = True
18 | return enabled
19 |
20 |
21 | def check_p(key):
22 | return (not isinstance(key, dict)) or ('p' not in key) or (np.random.rand() < key['p'])
23 |
24 |
25 | def elastic(x, gran, mag):
26 | blur0 = np.ones((3, 1, 1)).astype('float32') / 3
27 | blur1 = np.ones((1, 3, 1)).astype('float32') / 3
28 | blur2 = np.ones((1, 1, 3)).astype('float32') / 3
29 |
30 | bb = np.abs(x).max(0).astype(np.int32) // gran + 3
31 | noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)]
32 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
33 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
34 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
35 | noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
36 | noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
37 | noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
38 | ax = [np.linspace(-(b - 1) * gran, (b - 1) * gran, b) for b in bb]
39 | interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise]
40 |
41 | def g(x_):
42 | return np.hstack([i(x_)[:, None] for i in interp])
43 |
44 | return x + g(x) * mag
45 |
46 |
47 | def scene_aug(aug, xyz, rgb=None):
48 | assert xyz.ndim == 2
49 | m = np.eye(3)
50 | if check_key(aug.jitter):
51 | m += np.random.randn(3, 3) * 0.1
52 | if check_key(aug.flip) and check_p(aug.flip):
53 | m[0][0] *= -1 # np.random.randint(0, 2) * 2 - 1 # flip x randomly
54 | if check_key(aug.rotation) and check_p(aug.rotation):
55 | theta_x = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[0]
56 | theta_y = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[1]
57 | theta_z = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[2]
58 | Rx = np.array \
59 | ([[1, 0, 0], [0, math.cos(theta_x), -math.sin(theta_x)], [0, math.sin(theta_x), math.cos(theta_x)]])
60 | Ry = np.array \
61 | ([[math.cos(theta_y), 0, math.sin(theta_y)], [0, 1, 0], [-math.sin(theta_y), 0, math.cos(theta_y)]])
62 | Rz = np.array \
63 | ([[math.cos(theta_z), math.sin(theta_z), 0], [-math.sin(theta_z), math.cos(theta_z), 0], [0, 0, 1]])
64 | rot_mats = [Rx, Ry, Rz]
65 | if aug.rotation.get('shuffle', False):
66 | np.random.shuffle(rot_mats)
67 | m = np.matmul(m, rot_mats[0].dot(rot_mats[1]).dot(rot_mats[2]))
68 | xyz = np.matmul(xyz, m)
69 | if check_key(aug.random_jitter) and check_p(aug.random_jitter):
70 | if aug.random_jitter.accord_to_size:
71 | jitter_scale = (xyz.max(0) - xyz.min(0)).mean() * 0.1
72 | else:
73 | jitter_scale = aug.random_jitter.value
74 | random_noise = (np.random.rand(xyz.shape[0], xyz.shape[1]) - 0.5) * jitter_scale
75 | xyz += random_noise
76 | if check_key(aug.scaling_scene) and check_p(aug.scaling_scene):
77 | scaling_fac = np.random.rand() * (aug.scaling_scene.value[1] - aug.scaling_scene.value[0]) \
78 | + aug.scaling_scene.value[0]
79 | xyz_center = (xyz.max(0) + xyz.min(0)) / 2.0
80 | xyz = (xyz - xyz_center) * scaling_fac + xyz_center
81 |
82 | if rgb is not None and check_key(aug.color_jitter):
83 | rgb += np.random.randn(3) * 0.1
84 | return xyz, rgb
85 |
86 |
87 | def crop(xyz, full_scale, max_npoint, step=32):
88 | xyz_offset = xyz.copy()
89 | valid_idxs = (xyz_offset.min(1) >= 0)
90 | assert valid_idxs.sum() == xyz.shape[0]
91 | full_scale = np.array([full_scale[1]] * 3)
92 | room_range = xyz.max(0) - xyz.min(0)
93 |
94 | while valid_idxs.sum() > max_npoint:
95 | step_temp = step
96 | if valid_idxs.sum() > 1e6:
97 | step_temp = step * 2
98 | offset = np.clip(full_scale - room_range + 0.001, None, 0) * np.random.rand(3)
99 | xyz_offset = xyz + offset
100 | valid_idxs = (xyz_offset.min(1) >= 0) * ((xyz_offset < full_scale).sum(1) == 3)
101 | full_scale[:2] -= step_temp
102 |
103 | return xyz_offset, valid_idxs
104 |
105 |
--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/data_augmentor.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 |
3 | import numpy as np
4 |
5 | from . import augmentor_utils
6 |
7 |
8 | class DataAugmentor(object):
9 | def __init__(self, dataset_cfg, **kwargs):
10 | self.data_augmentor_queue = []
11 | self.aug_cfg = dataset_cfg.DATA_AUG
12 | self.kwargs = kwargs
13 | aug_config_list = self.aug_cfg.AUG_LIST
14 |
15 | self.data_augmentor_queue = []
16 | for aug in aug_config_list:
17 | if aug not in self.aug_cfg:
18 | continue
19 | cur_augmentor = partial(getattr(self, aug), config=self.aug_cfg[aug])
20 | self.data_augmentor_queue.append(cur_augmentor)
21 |
22 | def __getstate__(self):
23 | d = dict(self.__dict__)
24 | del d['logger']
25 | return d
26 |
27 | def __setstate__(self, d):
28 | self.__dict__.update(d)
29 |
30 | def shuffle(self, data_dict=None, config=None):
31 | shuffle_idx = np.random.permutation(data_dict['points_xyz'].shape[0])
32 | data_dict = self.update_data_dict(data_dict, shuffle_idx)
33 | return data_dict
34 |
35 | def crop(self, data_dict=None, config=None):
36 | data_dict['points_xyz_voxel_scale'], valid_idxs = augmentor_utils.crop(
37 | data_dict['points_xyz_voxel_scale'], self.kwargs['full_scale'], self.kwargs['max_npoint'], config.step,
38 | )
39 | data_dict = self.update_data_dict(data_dict, valid_idxs)
40 | if data_dict['points_xyz'].shape[0] == 0:
41 | data_dict['valid'] = False
42 | return data_dict
43 |
44 | def forward(self, data_dict):
45 | """
46 | Args:
47 | data_dict:
48 | points: (N, 3 + C_in)
49 | gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
50 | gt_names: optional, (N), string
51 | ...
52 |
53 | Returns:
54 | """
55 | data_dict['valid'] = True
56 | for cur_augmentor in self.data_augmentor_queue:
57 | data_dict = cur_augmentor(data_dict=data_dict)
58 | return data_dict
59 |
60 | def scene_aug(self, data_dict=None, config=None):
61 | if self.check_func(config) and self.check_data(data_dict):
62 | data_dict['points_xyz'], data_dict['rgb'] = augmentor_utils.scene_aug(
63 | config, data_dict['points_xyz'], data_dict['rgb']
64 | )
65 | if data_dict['points_xyz'].shape[0] == 0:
66 | data_dict['valid'] = False
67 | return data_dict
68 |
69 | @staticmethod
70 | def update_data_dict(data_dict, idx):
71 | for key in data_dict:
72 | if key in ['points_xyz', 'points', 'points_xyz_voxel_scale', 'rgb', 'labels',
73 | 'inst_label', 'binary_labels', 'origin_idx']:
74 | if data_dict[key] is not None:
75 | data_dict[key] = data_dict[key][idx]
76 | return data_dict
77 |
78 | @staticmethod
79 | def check_func(key):
80 | return augmentor_utils.check_key(key) and augmentor_utils.check_p(key)
81 |
82 | def elastic(self, data_dict=None, config=None):
83 | data_dict['points_xyz_voxel_scale'] = data_dict['points_xyz'] * self.kwargs['voxel_scale']
84 | if self.check_func(config) and self.check_data(data_dict):
85 | for (gran_fac, mag_fac) in config.value:
86 | data_dict['points_xyz_voxel_scale'] = augmentor_utils.elastic(
87 | data_dict['points_xyz_voxel_scale'], gran_fac * self.kwargs['voxel_scale'] // 50,
88 | mag_fac * self.kwargs['voxel_scale'] / 50
89 | )
90 | if config.apply_to_feat:
91 | data_dict['points_xyz'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale']
92 |
93 | # offset
94 | data_dict['points'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale']
95 | data_dict['points_xyz_voxel_scale'] -= data_dict['points_xyz_voxel_scale'].min(0)
96 | return data_dict
97 |
98 | @staticmethod
99 | def check_data(data_dict):
100 | return ('valid' not in data_dict) or data_dict['valid']
101 |
102 | ###################
103 | # Used in outdoor #
104 | ###################
105 | @staticmethod
106 | def random_world_rotation(data_dict=None, config=None):
107 | points = data_dict['points']
108 | rotate_rad = np.deg2rad(np.random.random() * 360) - np.pi
109 | c, s = np.cos(rotate_rad), np.sin(rotate_rad)
110 | j = np.matrix([[c, s], [-s, c]])
111 | data_dict['points'][:, :2] = np.dot(points[:, :2], j)
112 |
113 | return data_dict
114 |
115 | @staticmethod
116 | def random_world_flip(data_dict=None, config=None):
117 | points = data_dict['points']
118 | flip_type = np.random.choice(4, 1)
119 |
120 | if flip_type == 0:
121 | # flip x only
122 | points[:, 0] = -points[:, 0]
123 | elif flip_type == 1:
124 | # flip y only
125 | points[:, 1] = -points[:, 1]
126 | elif flip_type == 2:
127 | # flip x+y
128 | points[:, :2] = -points[:, :2]
129 |
130 | data_dict['points'] = points
131 | return data_dict
132 |
133 | @staticmethod
134 | def random_world_scaling(data_dict=None, config=None):
135 | points = data_dict['points']
136 | noise_scale = np.random.uniform(config[0], config[1])
137 | points[:, :2] = noise_scale * points[:, :2]
138 |
139 | data_dict['points'] = points
140 | return data_dict
141 |
142 | @staticmethod
143 | def random_world_translation(data_dict=None, config=None):
144 | points = data_dict['points']
145 | noise_translate = np.array(
146 | [np.random.normal(0, config[0], 1), np.random.normal(0, config[1], 1), np.random.normal(0, config[2], 1)]
147 | ).T
148 | points[:, 0:3] += noise_translate
149 |
150 | data_dict['points'] = points
151 | return data_dict
152 |
--------------------------------------------------------------------------------
/pcseg/datasets/processor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__init__.py
--------------------------------------------------------------------------------
/pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/all_data_label.txt:
--------------------------------------------------------------------------------
1 | Area_1_conferenceRoom_1.npy
2 | Area_1_conferenceRoom_2.npy
3 | Area_1_copyRoom_1.npy
4 | Area_1_hallway_1.npy
5 | Area_1_hallway_2.npy
6 | Area_1_hallway_3.npy
7 | Area_1_hallway_4.npy
8 | Area_1_hallway_5.npy
9 | Area_1_hallway_6.npy
10 | Area_1_hallway_7.npy
11 | Area_1_hallway_8.npy
12 | Area_1_office_10.npy
13 | Area_1_office_11.npy
14 | Area_1_office_12.npy
15 | Area_1_office_13.npy
16 | Area_1_office_14.npy
17 | Area_1_office_15.npy
18 | Area_1_office_16.npy
19 | Area_1_office_17.npy
20 | Area_1_office_18.npy
21 | Area_1_office_19.npy
22 | Area_1_office_1.npy
23 | Area_1_office_20.npy
24 | Area_1_office_21.npy
25 | Area_1_office_22.npy
26 | Area_1_office_23.npy
27 | Area_1_office_24.npy
28 | Area_1_office_25.npy
29 | Area_1_office_26.npy
30 | Area_1_office_27.npy
31 | Area_1_office_28.npy
32 | Area_1_office_29.npy
33 | Area_1_office_2.npy
34 | Area_1_office_30.npy
35 | Area_1_office_31.npy
36 | Area_1_office_3.npy
37 | Area_1_office_4.npy
38 | Area_1_office_5.npy
39 | Area_1_office_6.npy
40 | Area_1_office_7.npy
41 | Area_1_office_8.npy
42 | Area_1_office_9.npy
43 | Area_1_pantry_1.npy
44 | Area_1_WC_1.npy
45 | Area_2_auditorium_1.npy
46 | Area_2_auditorium_2.npy
47 | Area_2_conferenceRoom_1.npy
48 | Area_2_hallway_10.npy
49 | Area_2_hallway_11.npy
50 | Area_2_hallway_12.npy
51 | Area_2_hallway_1.npy
52 | Area_2_hallway_2.npy
53 | Area_2_hallway_3.npy
54 | Area_2_hallway_4.npy
55 | Area_2_hallway_5.npy
56 | Area_2_hallway_6.npy
57 | Area_2_hallway_7.npy
58 | Area_2_hallway_8.npy
59 | Area_2_hallway_9.npy
60 | Area_2_office_10.npy
61 | Area_2_office_11.npy
62 | Area_2_office_12.npy
63 | Area_2_office_13.npy
64 | Area_2_office_14.npy
65 | Area_2_office_1.npy
66 | Area_2_office_2.npy
67 | Area_2_office_3.npy
68 | Area_2_office_4.npy
69 | Area_2_office_5.npy
70 | Area_2_office_6.npy
71 | Area_2_office_7.npy
72 | Area_2_office_8.npy
73 | Area_2_office_9.npy
74 | Area_2_storage_1.npy
75 | Area_2_storage_2.npy
76 | Area_2_storage_3.npy
77 | Area_2_storage_4.npy
78 | Area_2_storage_5.npy
79 | Area_2_storage_6.npy
80 | Area_2_storage_7.npy
81 | Area_2_storage_8.npy
82 | Area_2_storage_9.npy
83 | Area_2_WC_1.npy
84 | Area_2_WC_2.npy
85 | Area_3_conferenceRoom_1.npy
86 | Area_3_hallway_1.npy
87 | Area_3_hallway_2.npy
88 | Area_3_hallway_3.npy
89 | Area_3_hallway_4.npy
90 | Area_3_hallway_5.npy
91 | Area_3_hallway_6.npy
92 | Area_3_lounge_1.npy
93 | Area_3_lounge_2.npy
94 | Area_3_office_10.npy
95 | Area_3_office_1.npy
96 | Area_3_office_2.npy
97 | Area_3_office_3.npy
98 | Area_3_office_4.npy
99 | Area_3_office_5.npy
100 | Area_3_office_6.npy
101 | Area_3_office_7.npy
102 | Area_3_office_8.npy
103 | Area_3_office_9.npy
104 | Area_3_storage_1.npy
105 | Area_3_storage_2.npy
106 | Area_3_WC_1.npy
107 | Area_3_WC_2.npy
108 | Area_4_conferenceRoom_1.npy
109 | Area_4_conferenceRoom_2.npy
110 | Area_4_conferenceRoom_3.npy
111 | Area_4_hallway_10.npy
112 | Area_4_hallway_11.npy
113 | Area_4_hallway_12.npy
114 | Area_4_hallway_13.npy
115 | Area_4_hallway_14.npy
116 | Area_4_hallway_1.npy
117 | Area_4_hallway_2.npy
118 | Area_4_hallway_3.npy
119 | Area_4_hallway_4.npy
120 | Area_4_hallway_5.npy
121 | Area_4_hallway_6.npy
122 | Area_4_hallway_7.npy
123 | Area_4_hallway_8.npy
124 | Area_4_hallway_9.npy
125 | Area_4_lobby_1.npy
126 | Area_4_lobby_2.npy
127 | Area_4_office_10.npy
128 | Area_4_office_11.npy
129 | Area_4_office_12.npy
130 | Area_4_office_13.npy
131 | Area_4_office_14.npy
132 | Area_4_office_15.npy
133 | Area_4_office_16.npy
134 | Area_4_office_17.npy
135 | Area_4_office_18.npy
136 | Area_4_office_19.npy
137 | Area_4_office_1.npy
138 | Area_4_office_20.npy
139 | Area_4_office_21.npy
140 | Area_4_office_22.npy
141 | Area_4_office_2.npy
142 | Area_4_office_3.npy
143 | Area_4_office_4.npy
144 | Area_4_office_5.npy
145 | Area_4_office_6.npy
146 | Area_4_office_7.npy
147 | Area_4_office_8.npy
148 | Area_4_office_9.npy
149 | Area_4_storage_1.npy
150 | Area_4_storage_2.npy
151 | Area_4_storage_3.npy
152 | Area_4_storage_4.npy
153 | Area_4_WC_1.npy
154 | Area_4_WC_2.npy
155 | Area_4_WC_3.npy
156 | Area_4_WC_4.npy
157 | Area_5_conferenceRoom_1.npy
158 | Area_5_conferenceRoom_2.npy
159 | Area_5_conferenceRoom_3.npy
160 | Area_5_hallway_10.npy
161 | Area_5_hallway_11.npy
162 | Area_5_hallway_12.npy
163 | Area_5_hallway_13.npy
164 | Area_5_hallway_14.npy
165 | Area_5_hallway_15.npy
166 | Area_5_hallway_1.npy
167 | Area_5_hallway_2.npy
168 | Area_5_hallway_3.npy
169 | Area_5_hallway_4.npy
170 | Area_5_hallway_5.npy
171 | Area_5_hallway_6.npy
172 | Area_5_hallway_7.npy
173 | Area_5_hallway_8.npy
174 | Area_5_hallway_9.npy
175 | Area_5_lobby_1.npy
176 | Area_5_office_10.npy
177 | Area_5_office_11.npy
178 | Area_5_office_12.npy
179 | Area_5_office_13.npy
180 | Area_5_office_14.npy
181 | Area_5_office_15.npy
182 | Area_5_office_16.npy
183 | Area_5_office_17.npy
184 | Area_5_office_18.npy
185 | Area_5_office_19.npy
186 | Area_5_office_1.npy
187 | Area_5_office_20.npy
188 | Area_5_office_21.npy
189 | Area_5_office_22.npy
190 | Area_5_office_23.npy
191 | Area_5_office_24.npy
192 | Area_5_office_25.npy
193 | Area_5_office_26.npy
194 | Area_5_office_27.npy
195 | Area_5_office_28.npy
196 | Area_5_office_29.npy
197 | Area_5_office_2.npy
198 | Area_5_office_30.npy
199 | Area_5_office_31.npy
200 | Area_5_office_32.npy
201 | Area_5_office_33.npy
202 | Area_5_office_34.npy
203 | Area_5_office_35.npy
204 | Area_5_office_36.npy
205 | Area_5_office_37.npy
206 | Area_5_office_38.npy
207 | Area_5_office_39.npy
208 | Area_5_office_3.npy
209 | Area_5_office_40.npy
210 | Area_5_office_41.npy
211 | Area_5_office_42.npy
212 | Area_5_office_4.npy
213 | Area_5_office_5.npy
214 | Area_5_office_6.npy
215 | Area_5_office_7.npy
216 | Area_5_office_8.npy
217 | Area_5_office_9.npy
218 | Area_5_pantry_1.npy
219 | Area_5_storage_1.npy
220 | Area_5_storage_2.npy
221 | Area_5_storage_3.npy
222 | Area_5_storage_4.npy
223 | Area_5_WC_1.npy
224 | Area_5_WC_2.npy
225 | Area_6_conferenceRoom_1.npy
226 | Area_6_copyRoom_1.npy
227 | Area_6_hallway_1.npy
228 | Area_6_hallway_2.npy
229 | Area_6_hallway_3.npy
230 | Area_6_hallway_4.npy
231 | Area_6_hallway_5.npy
232 | Area_6_hallway_6.npy
233 | Area_6_lounge_1.npy
234 | Area_6_office_10.npy
235 | Area_6_office_11.npy
236 | Area_6_office_12.npy
237 | Area_6_office_13.npy
238 | Area_6_office_14.npy
239 | Area_6_office_15.npy
240 | Area_6_office_16.npy
241 | Area_6_office_17.npy
242 | Area_6_office_18.npy
243 | Area_6_office_19.npy
244 | Area_6_office_1.npy
245 | Area_6_office_20.npy
246 | Area_6_office_21.npy
247 | Area_6_office_22.npy
248 | Area_6_office_23.npy
249 | Area_6_office_24.npy
250 | Area_6_office_25.npy
251 | Area_6_office_26.npy
252 | Area_6_office_27.npy
253 | Area_6_office_28.npy
254 | Area_6_office_29.npy
255 | Area_6_office_2.npy
256 | Area_6_office_30.npy
257 | Area_6_office_31.npy
258 | Area_6_office_32.npy
259 | Area_6_office_33.npy
260 | Area_6_office_34.npy
261 | Area_6_office_35.npy
262 | Area_6_office_36.npy
263 | Area_6_office_37.npy
264 | Area_6_office_3.npy
265 | Area_6_office_4.npy
266 | Area_6_office_5.npy
267 | Area_6_office_6.npy
268 | Area_6_office_7.npy
269 | Area_6_office_8.npy
270 | Area_6_office_9.npy
271 | Area_6_openspace_1.npy
272 | Area_6_pantry_1.npy
273 |
--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/area6_data_label.txt:
--------------------------------------------------------------------------------
1 | data/stanford_indoor3d/Area_6_conferenceRoom_1.npy
2 | data/stanford_indoor3d/Area_6_copyRoom_1.npy
3 | data/stanford_indoor3d/Area_6_hallway_1.npy
4 | data/stanford_indoor3d/Area_6_hallway_2.npy
5 | data/stanford_indoor3d/Area_6_hallway_3.npy
6 | data/stanford_indoor3d/Area_6_hallway_4.npy
7 | data/stanford_indoor3d/Area_6_hallway_5.npy
8 | data/stanford_indoor3d/Area_6_hallway_6.npy
9 | data/stanford_indoor3d/Area_6_lounge_1.npy
10 | data/stanford_indoor3d/Area_6_office_10.npy
11 | data/stanford_indoor3d/Area_6_office_11.npy
12 | data/stanford_indoor3d/Area_6_office_12.npy
13 | data/stanford_indoor3d/Area_6_office_13.npy
14 | data/stanford_indoor3d/Area_6_office_14.npy
15 | data/stanford_indoor3d/Area_6_office_15.npy
16 | data/stanford_indoor3d/Area_6_office_16.npy
17 | data/stanford_indoor3d/Area_6_office_17.npy
18 | data/stanford_indoor3d/Area_6_office_18.npy
19 | data/stanford_indoor3d/Area_6_office_19.npy
20 | data/stanford_indoor3d/Area_6_office_1.npy
21 | data/stanford_indoor3d/Area_6_office_20.npy
22 | data/stanford_indoor3d/Area_6_office_21.npy
23 | data/stanford_indoor3d/Area_6_office_22.npy
24 | data/stanford_indoor3d/Area_6_office_23.npy
25 | data/stanford_indoor3d/Area_6_office_24.npy
26 | data/stanford_indoor3d/Area_6_office_25.npy
27 | data/stanford_indoor3d/Area_6_office_26.npy
28 | data/stanford_indoor3d/Area_6_office_27.npy
29 | data/stanford_indoor3d/Area_6_office_28.npy
30 | data/stanford_indoor3d/Area_6_office_29.npy
31 | data/stanford_indoor3d/Area_6_office_2.npy
32 | data/stanford_indoor3d/Area_6_office_30.npy
33 | data/stanford_indoor3d/Area_6_office_31.npy
34 | data/stanford_indoor3d/Area_6_office_32.npy
35 | data/stanford_indoor3d/Area_6_office_33.npy
36 | data/stanford_indoor3d/Area_6_office_34.npy
37 | data/stanford_indoor3d/Area_6_office_35.npy
38 | data/stanford_indoor3d/Area_6_office_36.npy
39 | data/stanford_indoor3d/Area_6_office_37.npy
40 | data/stanford_indoor3d/Area_6_office_3.npy
41 | data/stanford_indoor3d/Area_6_office_4.npy
42 | data/stanford_indoor3d/Area_6_office_5.npy
43 | data/stanford_indoor3d/Area_6_office_6.npy
44 | data/stanford_indoor3d/Area_6_office_7.npy
45 | data/stanford_indoor3d/Area_6_office_8.npy
46 | data/stanford_indoor3d/Area_6_office_9.npy
47 | data/stanford_indoor3d/Area_6_openspace_1.npy
48 | data/stanford_indoor3d/Area_6_pantry_1.npy
49 |
--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/class_names.txt:
--------------------------------------------------------------------------------
1 | ceiling
2 | floor
3 | wall
4 | beam
5 | column
6 | window
7 | door
8 | table
9 | chair
10 | sofa
11 | bookcase
12 | board
13 | clutter
14 |
--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/preprocess.py:
--------------------------------------------------------------------------------
1 | # https://github.com/charlesq34/pointnet/blob/master/sem_seg/
2 |
3 | import os
4 | import sys
5 | import glob
6 | import numpy as np
7 |
8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
9 | ROOT_DIR = os.path.dirname(BASE_DIR)
10 | sys.path.append(BASE_DIR)
11 |
12 | DATA_PATH = os.path.join('data', 's3dis/Stanford3dDataset_v1.2_Aligned_Version')
13 | g_classes = [x.rstrip() for x in open(os.path.join(BASE_DIR, 'meta/class_names.txt'))]
14 | g_class2label = {cls: i for i, cls in enumerate(g_classes)}
15 | g_class2color = {'ceiling': [0,255,0],
16 | 'floor': [0,0,255],
17 | 'wall': [0,255,255],
18 | 'beam': [255,255,0],
19 | 'column': [255,0,255],
20 | 'window': [100,100,255],
21 | 'door': [200,200,100],
22 | 'table': [170,120,200],
23 | 'chair': [255,0,0],
24 | 'sofa': [200,100,100],
25 | 'bookcase': [10,200,100],
26 | 'board': [200,200,200],
27 | 'clutter': [50,50,50]}
28 | g_easy_view_labels = [7,8,9,10,11,1]
29 | g_label2color = {g_classes.index(cls): g_class2color[cls] for cls in g_classes}
30 |
31 |
32 | def collect_point_label(anno_path, out_filename, file_format='txt'):
33 | """ Convert original dataset files to data_label file (each line is XYZRGBL).
34 | We aggregated all the points from each instance in the room.
35 | Args:
36 | anno_path: path to annotations. e.g. Area_1/office_2/Annotations/
37 | out_filename: path to save collected points and labels (each line is XYZRGBL)
38 | file_format: txt or numpy, determines what file format to save.
39 | Returns:
40 | None
41 | Note:
42 | the points are shifted before save, the most negative point is now at origin.
43 | """
44 | points_list = []
45 |
46 | num_inst = 0
47 | for f in sorted(glob.glob(os.path.join(anno_path, '*.txt'))):
48 | cls = os.path.basename(f).split('_')[0]
49 | num_inst += 1
50 | if cls not in g_classes: # note: in some room there is 'staris' class..
51 | cls = 'clutter'
52 | points = np.loadtxt(f)
53 | labels = np.ones((points.shape[0], 1)) * g_class2label[cls]
54 | inst_labels = np.ones((points.shape[0], 1)) * num_inst
55 | points_list.append(np.concatenate([points, labels, inst_labels], 1)) # Nx8
56 |
57 | data_label = np.concatenate(points_list, 0)
58 | xyz_min = np.amin(data_label, axis=0)[0:3]
59 | data_label[:, 0:3] -= xyz_min
60 |
61 | if file_format == 'txt':
62 | fout = open(out_filename, 'w')
63 | for i in range(data_label.shape[0]):
64 | fout.write('%f %f %f %d %d %d %d %d\n' % \
65 | (data_label[i, 0], data_label[i, 1], data_label[i, 2],
66 | data_label[i, 3], data_label[i, 4], data_label[i, 5],
67 | data_label[i, 6], data_label[i, 7]))
68 | fout.close()
69 | elif file_format == 'numpy':
70 | np.save(out_filename, data_label)
71 | else:
72 | print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \
73 | (file_format))
74 | exit()
75 |
76 |
77 | def main():
78 | anno_paths = [line.rstrip() for line in open(os.path.join(BASE_DIR, 'meta/anno_paths.txt'))]
79 | anno_paths = [os.path.join(DATA_PATH, p) for p in anno_paths]
80 |
81 | output_folder = './data/s3dis/stanford_indoor3d_inst'
82 | if not os.path.exists(output_folder):
83 | os.mkdir(output_folder)
84 |
85 | # Note: there is an extra character in the v1.2 data in Area_5/hallway_6. It's fixed manually.
86 | for anno_path in anno_paths:
87 | print(anno_path)
88 | # try:
89 | elements = anno_path.split('/')
90 | out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy
91 | collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')
92 | # except:
93 | # print(anno_path, 'ERROR!!')
94 |
95 |
96 | if __name__ == '__main__':
97 | main()
98 |
--------------------------------------------------------------------------------
/pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
4 |
5 | setup(
6 | name='SOFTGROUP_OP',
7 | ext_modules=[
8 | CUDAExtension(
9 | 'SOFTGROUP_OP', ['src/softgroup_api.cpp', 'src/softgroup_ops.cpp', 'src/cuda.cu'],
10 | extra_compile_args={
11 | 'cxx': ['-g'],
12 | 'nvcc': ['-O2']
13 | })
14 | ],
15 | cmdclass={'build_ext': BuildExtension})
16 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Ball Query with BatchIdx & Clustering Algorithm
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 |
6 | Modified by Thang Vu - Remove semantic label in clustering
7 | */
8 |
9 | #include "bfs_cluster.h"
10 |
11 | /* =================== ballquery_batch_p================================= */
12 | // input xyz: (n, 3) float
13 | // input batch_idxs: (n) int
14 | // input batch_offsets: (B+1) int, batch_offsets[-1]
15 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n
16 | // output start_len: (n, 2), int
17 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor,
18 | at::Tensor batch_offsets_tensor, at::Tensor idx_tensor,
19 | at::Tensor start_len_tensor, int n, int meanActive,
20 | float radius) {
21 | const float *xyz = xyz_tensor.data_ptr();
22 | const int *batch_idxs = batch_idxs_tensor.data_ptr();
23 | const int *batch_offsets = batch_offsets_tensor.data_ptr();
24 | int *idx = idx_tensor.data_ptr();
25 | int *start_len = start_len_tensor.data_ptr();
26 |
27 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
28 | int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs,
29 | batch_offsets, idx, start_len, stream);
30 | return cumsum;
31 | }
32 |
33 | ConnectedComponent find_cc(Int idx, Int *ball_query_idxs, int *start_len,
34 | int *visited) {
35 | ConnectedComponent cc;
36 | cc.addPoint(idx);
37 | visited[idx] = 1;
38 |
39 | std::queue Q;
40 | assert(Q.empty());
41 | Q.push(idx);
42 |
43 | while (!Q.empty()) {
44 | Int cur = Q.front();
45 | Q.pop();
46 | int start = start_len[cur * 2];
47 | int len = start_len[cur * 2 + 1];
48 | for (Int i = start; i < start + len; i++) {
49 | Int idx_i = ball_query_idxs[i];
50 | if (visited[idx_i] == 1)
51 | continue;
52 | cc.addPoint(idx_i);
53 | visited[idx_i] = 1;
54 | Q.push(idx_i);
55 | }
56 | }
57 | return cc;
58 | }
59 |
60 | int get_clusters(float *class_numpoint_mean, int *ball_query_idxs,
61 | int *start_len, const int nPoint, float threshold,
62 | ConnectedComponents &clusters, const int class_id) {
63 | int *visited = new int[nPoint]{0};
64 | float _class_numpoint_mean, thr;
65 | int sumNPoint = 0;
66 |
67 | for (int i = 0; i < nPoint; i++) {
68 | if (visited[i] == 0) {
69 | ConnectedComponent CC = find_cc(i, ball_query_idxs, start_len, visited);
70 | _class_numpoint_mean = class_numpoint_mean[class_id];
71 |
72 | // if _class_num_point_mean is not defined (-1) directly use threshold
73 | if (_class_numpoint_mean == -1) {
74 | thr = threshold;
75 | } else {
76 | thr = threshold * _class_numpoint_mean;
77 | }
78 | if ((int)CC.pt_idxs.size() >= thr) {
79 | clusters.push_back(CC);
80 | sumNPoint += (int)CC.pt_idxs.size();
81 | }
82 | }
83 | }
84 | delete[] visited;
85 | return sumNPoint;
86 | }
87 |
88 | // convert from ConnectedComponents to (idxs, offsets) representation
89 | void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs,
90 | int *cluster_offsets) {
91 | for (int i = 0; i < (int)CCs.size(); i++) {
92 | cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size();
93 | for (int j = 0; j < (int)CCs[i].pt_idxs.size(); j++) {
94 | int idx = CCs[i].pt_idxs[j];
95 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i;
96 | cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx;
97 | }
98 | }
99 | }
100 |
101 | // input: class_numpoint_mean_tensor
102 | // input: ball_query_idxs, int, (nActive)
103 | // input: start_len, int, (N, 2)
104 | // output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for
105 | // corresponding point idxs in N
106 | // output: cluster_offsets, int (nCluster + 1)
107 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
108 | at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
109 | at::Tensor cluster_idxs_tensor,
110 | at::Tensor cluster_offsets_tensor, const int N,
111 | float threshold, const int class_id) {
112 | float *class_numpoint_mean = class_numpoint_mean_tensor.data_ptr();
113 | Int *ball_query_idxs = ball_query_idxs_tensor.data_ptr();
114 | int *start_len = start_len_tensor.data_ptr();
115 | ConnectedComponents CCs;
116 | int sumNPoint = get_clusters(class_numpoint_mean, ball_query_idxs, start_len,
117 | N, threshold, CCs, class_id);
118 | int nCluster = (int)CCs.size();
119 | cluster_idxs_tensor.resize_({sumNPoint, 2});
120 | cluster_offsets_tensor.resize_({nCluster + 1});
121 | cluster_idxs_tensor.zero_();
122 | cluster_offsets_tensor.zero_();
123 | int *cluster_idxs = cluster_idxs_tensor.data_ptr();
124 | int *cluster_offsets = cluster_offsets_tensor.data_ptr();
125 | fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets);
126 | }
127 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cu:
--------------------------------------------------------------------------------
1 | /*
2 | Ball Query with BatchIdx
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 | #include "../cuda_utils.h"
7 | #include "bfs_cluster.h"
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | /* ================================== ballquery_batch_p
14 | * ================================== */
15 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius,
16 | const float *xyz, const int *batch_idxs,
17 | const int *batch_offsets, int *idx,
18 | int *start_len, int *cumsum) {
19 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
20 | if (pt_idx >= n)
21 | return;
22 |
23 | start_len += (pt_idx * 2);
24 | int idx_temp[1000];
25 |
26 | float radius2 = radius * radius;
27 | float o_x = xyz[pt_idx * 3 + 0];
28 | float o_y = xyz[pt_idx * 3 + 1];
29 | float o_z = xyz[pt_idx * 3 + 2];
30 |
31 | int batch_idx = batch_idxs[pt_idx];
32 | int start = batch_offsets[batch_idx];
33 | int end = batch_offsets[batch_idx + 1];
34 |
35 | int cnt = 0;
36 | for (int k = start; k < end; k++) {
37 | float x = xyz[k * 3 + 0];
38 | float y = xyz[k * 3 + 1];
39 | float z = xyz[k * 3 + 2];
40 | float d2 =
41 | (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z);
42 | if (d2 < radius2) {
43 | if (cnt < 1000) {
44 | idx_temp[cnt] = k;
45 | } else {
46 | break;
47 | }
48 | ++cnt;
49 | }
50 | }
51 |
52 | start_len[0] = atomicAdd(cumsum, cnt);
53 | start_len[1] = cnt;
54 |
55 | int thre = n * meanActive;
56 | if (start_len[0] >= thre)
57 | return;
58 |
59 | idx += start_len[0];
60 | if (start_len[0] + cnt >= thre)
61 | cnt = thre - start_len[0];
62 |
63 | for (int k = 0; k < cnt; k++) {
64 | idx[k] = idx_temp[k];
65 | }
66 | }
67 |
68 | int ballquery_batch_p_cuda(int n, int meanActive, float radius,
69 | const float *xyz, const int *batch_idxs,
70 | const int *batch_offsets, int *idx, int *start_len,
71 | cudaStream_t stream) {
72 | // param xyz: (n, 3)
73 | // param batch_idxs: (n)
74 | // param batch_offsets: (B + 1)
75 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in
76 | // n
77 | // output start_len: (n, 2), int
78 |
79 | cudaError_t err;
80 |
81 | dim3 blocks(DIVUP(n, MAX_THREADS_PER_BLOCK));
82 | dim3 threads(MAX_THREADS_PER_BLOCK);
83 |
84 | int cumsum = 0;
85 | int *p_cumsum;
86 | cudaMalloc((void **)&p_cumsum, sizeof(int));
87 | cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice);
88 |
89 | ballquery_batch_p_cuda_<<>>(
90 | n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len,
91 | p_cumsum);
92 |
93 | err = cudaGetLastError();
94 | if (cudaSuccess != err) {
95 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
96 | exit(-1);
97 | }
98 |
99 | cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost);
100 | return cumsum;
101 | }
102 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.h:
--------------------------------------------------------------------------------
1 | /*
2 | Ball Query with BatchIdx & Clustering Algorithm
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #ifndef BFS_CLUSTER_H
8 | #define BFS_CLUSTER_H
9 | #include
10 | #include
11 |
12 | #include "../datatype/datatype.h"
13 |
14 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor,
15 | at::Tensor batch_offsets_tensor, at::Tensor idx_tensor,
16 | at::Tensor start_len_tensor, int n, int meanActive,
17 | float radius);
18 | int ballquery_batch_p_cuda(int n, int meanActive, float radius,
19 | const float *xyz, const int *batch_idxs,
20 | const int *batch_offsets, int *idx, int *start_len,
21 | cudaStream_t stream);
22 |
23 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
24 | at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
25 | at::Tensor cluster_idxs_tensor,
26 | at::Tensor cluster_offsets_tensor, const int N,
27 | float threshold, const int class_id);
28 |
29 | #endif // BFS_CLUSTER_H
30 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Get the IoU between predictions and gt masks
3 | */
4 |
5 | #include "cal_iou_and_masklabel.h"
6 |
7 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor,
8 | at::Tensor proposals_offset_tensor,
9 | at::Tensor instance_labels_tensor,
10 | at::Tensor instance_pointnum_tensor,
11 | at::Tensor proposals_iou_tensor, int nInstance,
12 | int nProposal) {
13 | int *proposals_idx = proposals_idx_tensor.data_ptr();
14 | int *proposals_offset = proposals_offset_tensor.data_ptr();
15 | long *instance_labels = instance_labels_tensor.data_ptr();
16 | int *instance_pointnum = instance_pointnum_tensor.data_ptr();
17 | float *proposals_iou = proposals_iou_tensor.data_ptr();
18 |
19 | // input: nInstance (1,), int
20 | // input: nProposal (1,), int
21 | // input: proposals_idx (sumNPoint), int
22 | // input: proposals_offset (nProposal + 1), int
23 | // input: instance_labels (N), long, 0~total_nInst-1, -100
24 | // input: instance_pointnum (total_nInst), int
25 | // input: mask_scores_sigmoid (sumNPoint, 1), float
26 | // output: proposals_iou (nProposal, total_nInst), float
27 | // output: mask_label (sumNPoint, 1), float
28 | get_mask_iou_on_cluster_cuda(nInstance, nProposal, proposals_idx,
29 | proposals_offset, instance_labels,
30 | instance_pointnum, proposals_iou);
31 | }
32 |
33 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor,
34 | at::Tensor proposals_offset_tensor,
35 | at::Tensor instance_labels_tensor,
36 | at::Tensor instance_pointnum_tensor,
37 | at::Tensor proposals_iou_tensor, int nInstance,
38 | int nProposal,
39 | at::Tensor mask_scores_sigmoid_tensor) {
40 | int *proposals_idx = proposals_idx_tensor.data_ptr();
41 | int *proposals_offset = proposals_offset_tensor.data_ptr();
42 | long *instance_labels = instance_labels_tensor.data_ptr();
43 | int *instance_pointnum = instance_pointnum_tensor.data_ptr();
44 | float *proposals_iou = proposals_iou_tensor.data_ptr();
45 | float *mask_scores_sigmoid = mask_scores_sigmoid_tensor.data_ptr();
46 |
47 | // input: nInstance (1,), int
48 | // input: nProposal (1,), int
49 | // input: proposals_idx (sumNPoint), int
50 | // input: proposals_offset (nProposal + 1), int
51 | // input: instance_labels (N), long, 0~total_nInst-1, -100
52 | // input: instance_pointnum (total_nInst), int
53 | // input: mask_scores_sigmoid (sumNPoint, 1), float
54 | // output: proposals_iou (nProposal, total_nInst), float
55 | // output: mask_label (sumNPoint, 1), float
56 | get_mask_iou_on_pred_cuda(
57 | nInstance, nProposal, proposals_idx, proposals_offset, instance_labels,
58 | instance_pointnum, proposals_iou, mask_scores_sigmoid);
59 | }
60 |
61 | void get_mask_label(at::Tensor proposals_idx_tensor,
62 | at::Tensor proposals_offset_tensor,
63 | at::Tensor instance_labels_tensor,
64 | at::Tensor instance_cls_tensor,
65 | at::Tensor proposals_iou_tensor, int nInstance,
66 | int nProposal, float iou_thr,
67 | at::Tensor mask_labels_tensor) {
68 | int *proposals_idx = proposals_idx_tensor.data_ptr();
69 | int *proposals_offset = proposals_offset_tensor.data_ptr();
70 | long *instance_labels = instance_labels_tensor.data_ptr();
71 | long *instance_cls = instance_cls_tensor.data_ptr();
72 | float *proposals_iou = proposals_iou_tensor.data_ptr();
73 | float *mask_label = mask_labels_tensor.data_ptr();
74 |
75 | // input: nInstance (1,), int
76 | // input: nProposal (1,), int
77 | // input: proposals_idx (sumNPoint), int
78 | // input: proposals_offset (nProposal + 1), int
79 | // input: instance_labels (N), long, 0~total_nInst-1, -100
80 | // input: instance_pointnum (total_nInst), int
81 | // input: mask_scores_sigmoid (sumNPoint, 1), float
82 | // output: proposals_iou (nProposal, total_nInst), float
83 | // output: mask_label (sumNPoint, 1), float
84 | get_mask_label_cuda(nInstance, nProposal, iou_thr, proposals_idx,
85 | proposals_offset, instance_labels, instance_cls,
86 | proposals_iou, mask_label);
87 | }
88 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.h:
--------------------------------------------------------------------------------
1 | /*
2 | Get the IoU between predictions and gt masks
3 | */
4 |
5 | #ifndef CAL_IOU_AND_MASKLABEL_H
6 | #define CAL_IOU_AND_MASKLABEL_H
7 | #include
8 | #include
9 |
10 | #include "../datatype/datatype.h"
11 |
12 | void get_mask_iou_on_cluster_cuda(int nInstance, int nProposal,
13 | int *proposals_idx, int *proposals_offset,
14 | long *instance_labels, int *instance_pointnum,
15 | float *proposals_iou);
16 |
17 | void get_mask_iou_on_pred_cuda(int nInstance, int nProposal, int *proposals_idx,
18 | int *proposals_offset, long *instance_labels,
19 | int *instance_pointnum, float *proposals_iou,
20 | float *mask_scores_sigmoid);
21 |
22 | void get_mask_label_cuda(int nInstance, int nProposal, float iou_thr,
23 | int *proposals_idx, int *proposals_offset,
24 | long *instance_labels, long *instance_cls,
25 | float *proposals_iou, float *mask_label);
26 |
27 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor,
28 | at::Tensor proposals_offset_tensor,
29 | at::Tensor instance_labels_tensor,
30 | at::Tensor instance_pointnum_tensor,
31 | at::Tensor proposals_iou_tensor, int nInstance,
32 | int nProposal);
33 |
34 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor,
35 | at::Tensor proposals_offset_tensor,
36 | at::Tensor instance_labels_tensor,
37 | at::Tensor instance_pointnum_tensor,
38 | at::Tensor proposals_iou_tensor, int nInstance,
39 | int nProposal, at::Tensor mask_scores_sigmoid_tensor);
40 |
41 | void get_mask_label(at::Tensor proposals_idx_tensor,
42 | at::Tensor proposals_offset_tensor,
43 | at::Tensor instance_labels_tensor,
44 | at::Tensor instance_cls_tensor,
45 | at::Tensor proposals_iou_tensor, int nInstance,
46 | int nProposal, float iou_thr,
47 | at::Tensor mask_labels_tensor);
48 |
49 | #endif // CAL_IOU_AND_MASKLABEL_H
50 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cuda.cu:
--------------------------------------------------------------------------------
1 | #include "datatype/datatype.h"
2 | #include
3 |
4 | #include "bfs_cluster/bfs_cluster.cu"
5 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu"
6 | #include "roipool/roipool.cu"
7 | #include "sec_mean/sec_mean.cu"
8 | #include "voxelize/voxelize.cu"
9 |
10 | template void voxelize_fp_cuda(Int nOutputRows, Int maxActive,
11 | Int nPlanes, float *feats,
12 | float *output_feats, Int *rules,
13 | bool average);
14 |
15 | template void voxelize_bp_cuda(Int nOutputRows, Int maxActive,
16 | Int nPlanes, float *d_output_feats,
17 | float *d_feats, Int *rules, bool average);
18 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cuda_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_UTILS_H
2 | #define _CUDA_UTILS_H
3 |
4 | #include
5 |
6 | #define TOTAL_THREADS 1024
7 |
8 | #define MAX_THREADS_PER_BLOCK 512
9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 |
11 | inline int opt_n_threads(int work_size) {
12 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
13 | return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 |
16 | inline dim3 opt_block_config(int x, int y) {
17 | const int x_threads = opt_n_threads(x);
18 | const int y_threads =
19 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
20 | dim3 block_config(x_threads, y_threads, 1);
21 | return block_config;
22 | }
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.cpp:
--------------------------------------------------------------------------------
1 | #include "datatype.h"
2 |
3 | template SparseGrid::SparseGrid() : ctr(0) {
4 | // Sparsehash needs a key to be set aside and never used
5 | Point empty_key;
6 | for (Int i = 0; i < dimension; i++) {
7 | empty_key[i] = std::numeric_limits::min();
8 | }
9 | mp.set_empty_key(empty_key);
10 | }
11 |
12 | ConnectedComponent::ConnectedComponent() {}
13 |
14 | void ConnectedComponent::addPoint(Int pt_idx) { pt_idxs.push_back(pt_idx); }
15 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.h:
--------------------------------------------------------------------------------
1 | #ifndef DATATYPE_H
2 | #define DATATYPE_H
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | using Int = int32_t;
10 |
11 | template using Point = std::array;
12 |
13 | template struct IntArrayHash {
14 | std::size_t operator()(Point const &p) const {
15 | Int hash = 16777619;
16 | for (auto x : p) {
17 | hash *= 2166136261;
18 | hash ^= x;
19 | }
20 | return hash;
21 | }
22 | };
23 |
24 | template
25 | using SparseGridMap = google::dense_hash_map<
26 | Point, Int, IntArrayHash,
27 | std::equal_to>>; //
28 |
29 | template class SparseGrid {
30 | public:
31 | Int ctr;
32 | SparseGridMap mp;
33 | SparseGrid();
34 | };
35 |
36 | template using SparseGrids = std::vector>;
37 |
38 | using RuleBook = std::vector>;
39 |
40 | class ConnectedComponent {
41 | public:
42 | std::vector pt_idxs;
43 | float accum_x = 0.;
44 | float accum_y = 0.;
45 | float accum_z = 0.;
46 | int cls_label = -100;
47 | int batch_idx = -1;
48 | // int npoint = 0;
49 |
50 | ConnectedComponent();
51 | void addPoint(Int pt_idx);
52 | };
53 |
54 | using ConnectedComponents = std::vector;
55 |
56 | #endif // DATATYPE_H
57 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | ROI Max Pool
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "roipool.h"
8 |
9 | void global_avg_pool_fp(at::Tensor feats_tensor,
10 | at::Tensor proposals_offset_tensor,
11 | at::Tensor output_feats_tensor, int nProposal, int C) {
12 | float *feats = feats_tensor.data_ptr();
13 | int *proposals_offset = proposals_offset_tensor.data_ptr();
14 | float *output_feats = output_feats_tensor.data_ptr();
15 |
16 | global_avg_pool_fp_cuda(nProposal, C, feats, proposals_offset, output_feats);
17 | }
18 |
19 | void global_avg_pool_bp(at::Tensor d_feats_tensor,
20 | at::Tensor proposals_offset_tensor,
21 | at::Tensor d_output_feats_tensor, int nProposal,
22 | int C) {
23 | float *d_feats = d_feats_tensor.data_ptr();
24 | int *proposals_offset = proposals_offset_tensor.data_ptr();
25 | float *d_output_feats = d_output_feats_tensor.data_ptr();
26 |
27 | global_avg_pool_bp_cuda(nProposal, C, d_feats, proposals_offset,
28 | d_output_feats);
29 | }
30 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cu:
--------------------------------------------------------------------------------
1 | /*
2 | ROI Max Pool
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "roipool.h"
8 | #include
9 | #include
10 |
11 | // fp
12 | __global__ void global_avg_pool_fp_cuda_(int nProposal, int C, float *feats,
13 | int *proposals_offset,
14 | float *output_feats) {
15 | for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) {
16 | int start = proposals_offset[pp_id];
17 | int end = proposals_offset[pp_id + 1];
18 | int n_points = end - start;
19 |
20 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
21 | // int argmax_idx = -1;
22 | // float max_val = -1e50;
23 | float val = 0;
24 |
25 | for (int i = start; i < end; i++) {
26 | val += feats[i * C + plane];
27 | }
28 | // output_maxidx[pp_id * C + plane] = argmax_idx;
29 | output_feats[pp_id * C + plane] = val / (float)n_points;
30 | }
31 | }
32 | }
33 |
34 | // input: feats (sumNPoint, C) float
35 | // input: proposals_offset (nProposal + 1) int
36 | // output: output_feats (nProposal, C) float
37 | // output: output_maxidx (nProposal, C) int
38 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats,
39 | int *proposals_offset, float *output_feats) {
40 | global_avg_pool_fp_cuda_<<>>(
42 | nProposal, C, feats, proposals_offset, output_feats);
43 | }
44 |
45 | // bp
46 | __global__ void global_avg_pool_bp_cuda_(int nProposal, int C, float *d_feats,
47 | int *proposals_offset,
48 | float *d_output_feats) {
49 | for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) {
50 | int start = proposals_offset[pp_id];
51 | int end = proposals_offset[pp_id + 1];
52 | int n_points = end - start;
53 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
54 | for (int i = start; i < end; i++) {
55 | atomicAdd(&d_feats[i * C + plane],
56 | d_output_feats[pp_id * C + plane] / (float)n_points);
57 | }
58 | }
59 | }
60 | }
61 |
62 | // input: d_output_feats (nProposal, C) float
63 | // input: output_maxidx (nProposal, C) int
64 | // input: proposals_offset (nProposal + 1) int
65 | // output: d_feats (sumNPoint, C) float
66 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats,
67 | int *proposals_offset, float *d_output_feats) {
68 | global_avg_pool_bp_cuda_<<>>(
70 | nProposal, C, d_feats, proposals_offset, d_output_feats);
71 | }
72 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.h:
--------------------------------------------------------------------------------
1 | /*
2 | ROI Max Pool
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #ifndef ROIPOOL_H
8 | #define ROIPOOL_H
9 | #include
10 | #include
11 |
12 | #include "../datatype/datatype.h"
13 |
14 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats,
15 | int *proposals_offset, float *output_feats);
16 |
17 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats,
18 | int *proposals_offset, float *d_output_feats);
19 |
20 | void global_avg_pool_fp(at::Tensor feats_tensor,
21 | at::Tensor proposals_offset_tensor,
22 | at::Tensor output_feats_tensor, int nProposal, int C);
23 |
24 | void global_avg_pool_bp(at::Tensor d_feats_tensor,
25 | at::Tensor proposals_offset_tensor,
26 | at::Tensor d_output_feats_tensor, int nProposal, int C);
27 |
28 | #endif // ROIPOOL_H
29 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Segment Operations (mean, max, min)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "sec_mean.h"
8 |
9 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor,
10 | at::Tensor out_tensor, int nProposal, int C) {
11 | int *offsets = offsets_tensor.data_ptr();
12 | float *inp = inp_tensor.data_ptr();
13 | float *out = out_tensor.data_ptr();
14 |
15 | sec_mean_cuda(nProposal, C, inp, offsets, out);
16 | }
17 |
18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor,
19 | at::Tensor out_tensor, int nProposal, int C) {
20 | int *offsets = offsets_tensor.data_ptr();
21 | float *inp = inp_tensor.data_ptr();
22 | float *out = out_tensor.data_ptr();
23 |
24 | sec_min_cuda(nProposal, C, inp, offsets, out);
25 | }
26 |
27 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor,
28 | at::Tensor out_tensor, int nProposal, int C) {
29 | int *offsets = offsets_tensor.data_ptr();
30 | float *inp = inp_tensor.data_ptr();
31 | float *out = out_tensor.data_ptr();
32 |
33 | sec_max_cuda(nProposal, C, inp, offsets, out);
34 | }
35 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cu:
--------------------------------------------------------------------------------
1 | /*
2 | Segment Operations (mean, max, min) (no bp)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "sec_mean.h"
8 | #include
9 | #include
10 |
11 | /* ================================== sec_mean
12 | * ================================== */
13 | __global__ void sec_mean_cuda_(int nProposal, int C, float *inp, int *offsets,
14 | float *out) {
15 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
16 | int start = offsets[p_id];
17 | int end = offsets[p_id + 1];
18 |
19 | float count = (float)(end - start);
20 |
21 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
22 | float mean = 0;
23 | for (int i = start; i < end; i++) {
24 | mean += (inp[i * C + plane] / count);
25 | }
26 | out[p_id * C + plane] = mean;
27 | }
28 | }
29 | }
30 |
31 | // input: inp (N, C) float
32 | // input: offsets (nProposal + 1) int
33 | // output: out (nProposal, C) float
34 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
35 | sec_mean_cuda_<<>>(
36 | nProposal, C, inp, offsets, out);
37 | }
38 |
39 | /* ================================== sec_min ==================================
40 | */
41 | __global__ void sec_min_cuda_(int nProposal, int C, float *inp, int *offsets,
42 | float *out) {
43 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
44 | int start = offsets[p_id];
45 | int end = offsets[p_id + 1];
46 |
47 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
48 | float min_val = 1e50;
49 | for (int i = start; i < end; i++) {
50 | if (inp[i * C + plane] < min_val) {
51 | min_val = inp[i * C + plane];
52 | }
53 | }
54 | out[p_id * C + plane] = min_val;
55 | }
56 | }
57 | }
58 |
59 | // input: inp (N, C) float
60 | // input: offsets (nProposal + 1) int
61 | // output: out (nProposal, C) float
62 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
63 | sec_min_cuda_<<>>(
64 | nProposal, C, inp, offsets, out);
65 | }
66 |
67 | /* ================================== sec_max ==================================
68 | */
69 | __global__ void sec_max_cuda_(int nProposal, int C, float *inp, int *offsets,
70 | float *out) {
71 | for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
72 | int start = offsets[p_id];
73 | int end = offsets[p_id + 1];
74 |
75 | for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
76 | float max_val = -1e50;
77 | for (int i = start; i < end; i++) {
78 | if (inp[i * C + plane] > max_val) {
79 | max_val = inp[i * C + plane];
80 | }
81 | }
82 | out[p_id * C + plane] = max_val;
83 | }
84 | }
85 | }
86 |
87 | // input: inp (N, C) float
88 | // input: offsets (nProposal + 1) int
89 | // output: out (nProposal, C) float
90 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
91 | sec_max_cuda_<<>>(
92 | nProposal, C, inp, offsets, out);
93 | }
94 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.h:
--------------------------------------------------------------------------------
1 | /*
2 | Segment Operations (mean, max, min)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #ifndef SEC_MEAN_H
8 | #define SEC_MEAN_H
9 | #include
10 | #include
11 |
12 | #include "../datatype/datatype.h"
13 |
14 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor,
15 | at::Tensor out_tensor, int nProposal, int C);
16 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
17 |
18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor,
19 | at::Tensor out_tensor, int nProposal, int C);
20 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
21 |
22 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor,
23 | at::Tensor out_tensor, int nProposal, int C);
24 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
25 |
26 | #endif // SEC_MEAN_H
27 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_api.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "softgroup_ops.h"
5 |
6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
7 |
8 | m.def("get_mask_iou_on_cluster", &get_mask_iou_on_cluster,
9 | "get_mask_iou_on_cluster");
10 | m.def("get_mask_iou_on_pred", &get_mask_iou_on_pred, "get_mask_iou_on_pred");
11 | m.def("get_mask_label", &get_mask_label, "get_mask_label");
12 |
13 | m.def("voxelize_idx", &voxelize_idx_3d, "voxelize_idx");
14 | m.def("voxelize_fp", &voxelize_fp_feat, "voxelize_fp");
15 | m.def("voxelize_bp", &voxelize_bp_feat, "voxelize_bp");
16 |
17 | m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p");
18 | m.def("bfs_cluster", &bfs_cluster, "bfs_cluster");
19 |
20 | m.def("global_avg_pool_fp", &global_avg_pool_fp, "global_avg_pool_fp");
21 | m.def("global_avg_pool_bp", &global_avg_pool_bp, "global_avg_pool_bp");
22 |
23 | m.def("sec_mean", &sec_mean, "sec_mean");
24 | m.def("sec_min", &sec_min, "sec_min");
25 | m.def("sec_max", &sec_max, "sec_max");
26 | }
27 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "bfs_cluster/bfs_cluster.cpp"
6 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp"
7 | #include "datatype/datatype.cpp"
8 | #include "roipool/roipool.cpp"
9 | #include "sec_mean/sec_mean.cpp"
10 | #include "voxelize/voxelize.cpp"
11 |
12 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords,
13 | /* long M*4 */ at::Tensor output_coords,
14 | /* Int N */ at::Tensor input_map,
15 | /* Int M*(maxActive+1) */ at::Tensor output_map,
16 | Int batchSize, Int mode) {
17 | voxelize_idx<3>(coords, output_coords, input_map, output_map, batchSize,
18 | mode);
19 | }
20 |
21 | void voxelize_fp_feat(
22 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
23 | /* cuda float M*C */ at::Tensor output_feats,
24 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
25 | Int maxActive, Int nPlane) {
26 | voxelize_fp(feats, output_feats, output_map, mode, nActive, maxActive,
27 | nPlane);
28 | }
29 |
30 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats,
31 | /* cuda float N*C */ at::Tensor d_feats,
32 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map,
33 | Int mode, Int nActive, Int maxActive, Int nPlane) {
34 | voxelize_bp(d_output_feats, d_feats, output_map, mode, nActive,
35 | maxActive, nPlane);
36 | }
37 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.h:
--------------------------------------------------------------------------------
1 | #ifndef HAIS_H
2 | #define HAIS_H
3 | #include "bfs_cluster/bfs_cluster.h"
4 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.h"
5 | #include "datatype/datatype.h"
6 | #include "roipool/roipool.h"
7 | #include "sec_mean/sec_mean.h"
8 |
9 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords,
10 | /* long M*4 */ at::Tensor output_coords,
11 | /* Int N */ at::Tensor input_map,
12 | /* Int M*(maxActive+1) */ at::Tensor output_map,
13 | Int batchSize, Int mode);
14 |
15 | void voxelize_fp_feat(
16 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
17 | /* cuda float M*C */ at::Tensor output_feats,
18 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
19 | Int maxActive, Int nPlane);
20 |
21 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats,
22 | /* cuda float N*C */ at::Tensor d_feats,
23 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map,
24 | Int mode, Int nActive, Int maxActive, Int nPlane);
25 |
26 | #endif // HAIS_H
27 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "voxelize.h"
8 |
9 | /* ================================== voxelize_idx
10 | * ================================== */
11 | template
12 | void voxelize_idx(/* long N*4 */ at::Tensor coords,
13 | /* long M*4 */ at::Tensor output_coords,
14 | /* Int N */ at::Tensor input_map,
15 | /* Int M*(maxActive+1) */ at::Tensor output_map,
16 | Int batchSize, Int mode) {
17 | assert(coords.ndimension() == 2);
18 | assert(coords.size(1) >= dimension and coords.size(1) <= dimension + 1);
19 |
20 | RuleBook voxelizeRuleBook; // rule[1]: M voxels -> N points output_map
21 | SparseGrids inputSGs; // voxel_coords -> voxel_idx in M voxels
22 | // input_map: N points -> M voxels
23 | Int nActive = 0;
24 |
25 | Int maxActive = voxelize_inputmap(
26 | inputSGs, input_map.data_ptr(), voxelizeRuleBook, nActive,
27 | coords.data_ptr(), coords.size(0), coords.size(1), batchSize, mode);
28 |
29 | output_map.resize_({nActive, maxActive + 1});
30 | output_map.zero_();
31 |
32 | output_coords.resize_({nActive, coords.size(1)});
33 | output_coords.zero_();
34 |
35 | Int *oM = output_map.data_ptr();
36 | long *oC = output_coords.data_ptr();
37 | voxelize_outputmap(coords.data_ptr(), oC, oM,
38 | &voxelizeRuleBook[1][0], nActive, maxActive);
39 | }
40 |
41 | template
42 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map,
43 | Int *rule, Int nOutputRows, Int maxActive) {
44 | for (Int i = 0; i < nOutputRows; i++) {
45 | for (Int j = 0; j <= maxActive; j++)
46 | output_map[j] = rule[j];
47 | Int inputIdx = rule[1];
48 | rule += (1 + maxActive);
49 | output_map += (1 + maxActive);
50 |
51 | long *coord = coords + inputIdx * (dimension + 1);
52 | long *output_coord = output_coords + i * (dimension + 1);
53 | for (Int j = 0; j <= dimension; j++) {
54 | output_coord[j] = coord[j];
55 | }
56 | }
57 | }
58 |
59 | // mode 0=guaranteed unique 1=last item(overwrite) 2=first item(keep) 3=sum,
60 | // 4=mean
61 | // input: coords
62 | // output: SGs: one map for each batch: map from voxel_coord to voxel_idx(in M
63 | // voxels)
64 | // output: input_map: N, N points -> M voxels
65 | // output: rules
66 | // output: nActive
67 | // output: maxActive
68 | template
69 | Int voxelize_inputmap(SparseGrids &SGs, Int *input_map,
70 | RuleBook &rules, Int &nActive, long *coords,
71 | Int nInputRows, Int nInputColumns, Int batchSize,
72 | Int mode) {
73 | assert(nActive == 0);
74 | assert(rules.size() == 0);
75 | assert(SGs.size() == 0);
76 |
77 | SGs.resize(batchSize);
78 | Point p;
79 |
80 | std::vector> outputRows;
81 | if (nInputColumns == dimension) {
82 | SGs.resize(1);
83 | auto &sg = SGs[0];
84 | for (Int i = 0; i < nInputRows; i++) {
85 | for (Int j = 0; j < dimension; j++)
86 | p[j] = coords[j];
87 | coords += dimension;
88 | auto iter = sg.mp.find(p);
89 | if (iter == sg.mp.end()) {
90 | sg.mp[p] = nActive++;
91 | outputRows.resize(nActive);
92 | }
93 | outputRows[sg.mp[p]].push_back(i);
94 |
95 | input_map[i] = sg.mp[p];
96 | }
97 | } else { // nInputColumns == dimension + 1 (1 in index 0 for batchidx)
98 | Int batchIdx;
99 | for (Int i = 0; i < nInputRows; i++) {
100 | batchIdx = coords[0];
101 | for (Int j = 0; j < dimension; j++)
102 | p[j] = coords[j + 1];
103 | coords += (dimension + 1);
104 | if (batchIdx + 1 >= (Int)SGs.size()) {
105 | SGs.resize(batchIdx + 1);
106 | }
107 | auto &sg = SGs[batchIdx];
108 | auto iter = sg.mp.find(p);
109 | if (iter == sg.mp.end()) {
110 | sg.mp[p] = nActive++;
111 | outputRows.resize(nActive);
112 | }
113 | outputRows[sg.mp[p]].push_back(i);
114 |
115 | input_map[i] = sg.mp[p];
116 | }
117 | }
118 |
119 | // Rulebook Format
120 | // rules[0][0] == mode
121 | // rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2)
122 | // rules[0][2] == nInputRows
123 | // rules[0][3] == nOutputRows
124 | // rules[1] nOutputRows x (1+maxActive)
125 | rules.resize(2);
126 | rules[0].push_back(mode);
127 | rules[0].push_back(1);
128 | rules[0].push_back(nInputRows);
129 | rules[0].push_back(outputRows.size());
130 | auto &rule = rules[1];
131 | if (mode == 0) {
132 | assert(nInputRows == (Int)outputRows.size());
133 | for (Int i = 0; i < nActive; i++) {
134 | rule.push_back(1);
135 | assert((Int)outputRows[i].size() == 1);
136 | rule.push_back(outputRows[i][0]);
137 | }
138 | }
139 | if (mode == 1) {
140 | for (Int i = 0; i < nActive; i++) {
141 | rule.push_back(1);
142 | rule.push_back(outputRows[i].front());
143 | }
144 | }
145 | if (mode == 2) {
146 | for (Int i = 0; i < nActive; i++) {
147 | rule.push_back(1);
148 | rule.push_back(outputRows[i].back());
149 | }
150 | }
151 | Int maxActive = 1;
152 | if (mode == 3 or mode == 4) {
153 | for (auto &row : outputRows)
154 | maxActive = std::max(maxActive, (Int)row.size());
155 | rules[0][1] = maxActive;
156 | for (auto &row : outputRows) {
157 | rule.push_back(row.size());
158 | for (auto &r : row)
159 | rule.push_back(r);
160 | rule.resize((rule.size() + maxActive) / (maxActive + 1) *
161 | (maxActive + 1));
162 | }
163 | }
164 | return maxActive;
165 | }
166 |
167 | /* ================================== voxelize
168 | * ================================== */
169 | template
170 | void voxelize_fp(
171 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
172 | /* cuda float M*C */ at::Tensor output_feats,
173 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
174 | Int maxActive, Int nPlane) {
175 |
176 | auto iF = feats.data_ptr();
177 | auto oF = output_feats.data_ptr();
178 |
179 | Int *rules = output_map.data_ptr();
180 |
181 | voxelize_fp_cuda(nActive, maxActive, nPlane, iF, oF, rules, mode == 4);
182 | }
183 |
184 | template
185 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats,
186 | /* cuda float N*C */ at::Tensor d_feats,
187 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode,
188 | Int nActive, Int maxActive, Int nPlane) {
189 | auto d_oF = d_output_feats.data_ptr();
190 | auto d_iF = d_feats.data_ptr();
191 |
192 | Int *rules = output_map.data_ptr();
193 |
194 | voxelize_bp_cuda(nActive, maxActive, nPlane, d_oF, d_iF, rules, mode == 4);
195 | }
196 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cu:
--------------------------------------------------------------------------------
1 | /*
2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #include "voxelize.h"
8 |
9 | template
10 | __global__ void voxelize_fp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes,
11 | T *feats, T *output_feats, Int *rules,
12 | bool average) {
13 | for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) {
14 | T *out = output_feats + row * nPlanes;
15 | Int *r = rules + row * (maxActive + 1);
16 | Int nActive = r[0];
17 | T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
18 | for (int i = 1; i <= nActive; i++) {
19 | T *inp = feats + r[i] * nPlanes;
20 | for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) {
21 | atomicAdd(&out[plane], multiplier * inp[plane]);
22 | }
23 | }
24 | }
25 | }
26 |
27 | // input: feats N * C
28 | // input: rules M * (1 + maxActive)
29 | // output: output_feats M * C
30 | template
31 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats,
32 | T *output_feats, Int *rules, bool average) {
33 | voxelize_fp_cuda_<
34 | T><<>>(
35 | nOutputRows, maxActive, nPlanes, feats, output_feats, rules, average);
36 | }
37 |
38 | template
39 | __global__ void voxelize_bp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes,
40 | T *d_output_feats, T *d_feats, Int *rules,
41 | bool average) {
42 | for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) {
43 | T *out = d_output_feats + row * nPlanes;
44 | Int *r = rules + row * (maxActive + 1);
45 | Int nActive = r[0];
46 | T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
47 | for (int i = 1; i <= nActive; i++) {
48 | T *inp = d_feats + r[i] * nPlanes;
49 | for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) {
50 | atomicAdd(&inp[plane], multiplier * out[plane]);
51 | }
52 | }
53 | }
54 | }
55 |
56 | template
57 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes,
58 | T *d_output_feats, T *d_feats, Int *rules, bool average) {
59 | voxelize_bp_cuda_<
60 | T><<>>(
61 | nOutputRows, maxActive, nPlanes, d_output_feats, d_feats, rules, average);
62 | }
63 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.h:
--------------------------------------------------------------------------------
1 | /*
2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
3 | Written by Li Jiang
4 | All Rights Reserved 2020.
5 | */
6 |
7 | #ifndef VOXELIZE_H
8 | #define VOXELIZE_H
9 | #include
10 | #include
11 |
12 | #include "../datatype/datatype.h"
13 |
14 | /* ================================== voxelize_idx
15 | * ================================== */
16 | template
17 | void voxelize_idx(/* long N*4 */ at::Tensor coords,
18 | /* long M*4 */ at::Tensor output_coords,
19 | /* Int N */ at::Tensor input_map,
20 | /* Int M*(maxActive+1) */ at::Tensor output_map,
21 | Int batchSize, Int mode);
22 |
23 | template
24 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map,
25 | Int *rule, Int nOutputRows, Int maxActive);
26 |
27 | template
28 | Int voxelize_inputmap(SparseGrids &SGs, Int *input_map,
29 | RuleBook &rules, Int &nActive, long *coords,
30 | Int nInputRows, Int nInputColumns, Int batchSize,
31 | Int mode);
32 |
33 | /* ================================== voxelize
34 | * ================================== */
35 | template
36 | void voxelize_fp(
37 | /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
38 | /* cuda float M*C */ at::Tensor output_feats,
39 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
40 | Int maxActive, Int nPlane);
41 |
42 | template
43 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats,
44 | T *output_feats, Int *rules, bool average);
45 |
46 | //
47 | template
48 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats,
49 | /* cuda float N*C */ at::Tensor d_feats,
50 | /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode,
51 | Int nActive, Int maxActive, Int nPlane);
52 |
53 | template
54 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes,
55 | T *d_output_feats, T *d_feats, Int *rules, bool average);
56 |
57 | #endif // VOXELIZE_H
58 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 100
3 | multi_line_output = 0
4 | known_standard_library = setuptools
5 | known_third_party = munch,numpy,pandas,plyfile,scannet_util,scipy,sklearn,spconv,tensorboardX,torch,tqdm,yaml
6 | no_lines_before = STDLIB,LOCALFOLDER
7 | default_section = THIRDPARTY
8 |
9 | [yapf]
10 | BASED_ON_STYLE = pep8
11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
13 | COLUMN_LIMIT = 100
14 |
--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
4 |
5 | if __name__ == '__main__':
6 | setup(
7 | name='softgroup',
8 | version='1.0',
9 | description='SoftGroup: SoftGroup for 3D Instance Segmentation [CVPR 2022]',
10 | author='Thang Vu',
11 | author_email='thangvubk@kaist.ac.kr',
12 | # packages=['softgroup'],
13 | package_data={'ops': ['*/*.so']},
14 | ext_modules=[
15 | CUDAExtension(
16 | name='softgroup_ops',
17 | sources=[
18 | 'ops/src/softgroup_api.cpp', 'ops/src/softgroup_ops.cpp',
19 | 'ops/src/cuda.cu'
20 | ],
21 | extra_compile_args={
22 | 'cxx': ['-g'],
23 | 'nvcc': ['-O2']
24 | },
25 | include_dirs=['/data/anaconda3/envs/pt18/include/'])
26 | ],
27 | cmdclass={'build_ext': BuildExtension})
28 |
--------------------------------------------------------------------------------
/pcseg/models/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | import numpy as np
4 | import torch
5 |
6 | from .vision_networks import build_model
7 | from .text_networks import build_text_model
8 |
9 | try:
10 | import kornia
11 | except:
12 | pass
13 | # print('Warning: kornia is not installed. This package is only required by CaDDN')
14 |
15 |
16 | def build_vision_network(model_cfg, num_class, dataset):
17 | model = build_model(
18 | model_cfg=model_cfg, num_class=num_class, dataset=dataset
19 | )
20 | return model
21 |
22 |
23 | def build_text_network(model_cfg):
24 | text_encoder = build_text_model(model_cfg=model_cfg)
25 | return text_encoder
26 |
27 |
28 | def load_data_to_gpu(batch_dict):
29 | for key, val in batch_dict.items():
30 | if isinstance(val, torch.Tensor):
31 | batch_dict[key] = batch_dict[key].cuda()
32 | elif not isinstance(val, np.ndarray) or key in ['calib', 'point_img_idx', 'point_img']:
33 | continue
34 | elif key in ['ids', 'metadata', 'scene_name']:
35 | continue
36 | elif key in ['points_xyz_voxel_scale', 'labels', 'inst_label', 'origin_idx', 'offsets', 'inst_cls']:
37 | batch_dict[key] = torch.from_numpy(val).long().cuda()
38 | elif key in ['inst_pointnum', 'batch_idxs']:
39 | batch_dict[key] = torch.from_numpy(val).int().cuda()
40 | else:
41 | batch_dict[key] = torch.from_numpy(val).float().cuda()
42 |
--------------------------------------------------------------------------------
/pcseg/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/adapter/__init__.py:
--------------------------------------------------------------------------------
1 | from .vl_adapter import VLAdapter
2 |
3 | __all__ = {
4 | 'VLAdapter': VLAdapter
5 | }
--------------------------------------------------------------------------------
/pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/adapter/vl_adapter.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import torch.nn as nn
4 | import numpy as np
5 |
6 | from ..model_utils import basic_block_1d
7 |
8 |
9 | class VLAdapter(nn.Module):
10 | def __init__(self, model_cfg, in_channel):
11 | super(VLAdapter, self).__init__()
12 | self.model_cfg = model_cfg
13 | self.in_feature_name = model_cfg.get('IN_FEAT_NAME', 'backbone_3d_feats')
14 | self.eval_only = model_cfg.get('EVAL_ONLY', None)
15 | self.text_channel = model_cfg.TEXT_DIM
16 |
17 | # vision adapter
18 | adapter_last_norm = self.model_cfg.get('LAST_NORM', True)
19 | self.adapter = self.build_vl_adapter(self.model_cfg.NUM_ADAPTER_LAYERS, in_channel, adapter_last_norm)
20 |
21 | def build_vl_adapter(self, num_adapter_layers, in_channel, last_norm):
22 | """build vision language adapter
23 |
24 | Args:
25 | num_adapter_layers (_type_): _description_
26 | in_channel (_type_): _description_
27 |
28 | Raises:
29 | NotImplementedError: _description_
30 |
31 | Returns:
32 | _type_: _description_
33 | """
34 | if num_adapter_layers < 1 or self.eval_only:
35 | return None
36 |
37 | if num_adapter_layers == 1:
38 | mid_channel_list = [in_channel, self.text_channel]
39 | elif num_adapter_layers == 2:
40 | multiplier = int(np.log2(self.text_channel / in_channel))
41 | mid_channel_list = [in_channel, in_channel * multiplier, self.text_channel]
42 | else:
43 | raise NotImplementedError
44 |
45 | adapter = basic_block_1d.MLP(
46 | mid_channel_list,
47 | norm_fn=functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1),
48 | num_layers=num_adapter_layers, last_norm_fn=last_norm
49 | )
50 | return adapter
51 |
52 | def forward(self, batch_dict):
53 | if self.eval_only and self.training:
54 | return batch_dict
55 |
56 | backbone3d_feats = batch_dict[self.in_feature_name]
57 |
58 | # forward adapter
59 | if hasattr(self, 'adapter') and self.adapter is not None:
60 | adapter_feats = self.adapter(backbone3d_feats)
61 | else:
62 | adapter_feats = backbone3d_feats
63 |
64 | batch_dict['adapter_feats'] = adapter_feats
65 | return batch_dict
66 |
--------------------------------------------------------------------------------
/pcseg/models/head/__init__.py:
--------------------------------------------------------------------------------
1 | from .text_seg_head import TextSegHead
2 | from .binary_head import BinaryHead
3 | from .caption_head import CaptionHead
4 | from .linear_head import LinearHead
5 | from .inst_head import InstHead
6 |
7 | __all__ = {
8 | 'TextSegHead': TextSegHead,
9 | 'BinaryHead': BinaryHead,
10 | 'CaptionHead': CaptionHead,
11 | 'LinearHead': LinearHead,
12 | 'InstHead': InstHead
13 | }
14 |
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/binary_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/binary_head.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/caption_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/caption_head.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/inst_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/inst_head.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/linear_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/linear_head.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/head/binary_head.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import torch
3 | import torch.nn as nn
4 |
5 | from pcseg.utils.spconv_utils import spconv
6 | from pcseg.models.model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlockDecoder
7 | from pcseg.utils import common_utils
8 |
9 |
10 | class BinaryHead(nn.Module):
11 | def __init__(self, model_cfg, ignore_label, in_channel, block_reps, block_residual):
12 | super().__init__()
13 | self.model_cfg = model_cfg
14 | self.binary_feat_input = []
15 | self.binary_thresh = model_cfg.THRESH
16 | self.in_channel = in_channel
17 | self.ignore_label = ignore_label
18 | self.num_filters = model_cfg.get('NUM_FILTERS', None)
19 |
20 | norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1)
21 | if block_residual:
22 | block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False))
23 | else:
24 | block = VGGBlock
25 |
26 | if self.num_filters is not None:
27 | block_channels = self.num_filters
28 | else:
29 | # assert self.num_blocks is not None
30 | block_channels = [in_channel, 2 * in_channel, 3 * in_channel, 4 * in_channel, 5 * in_channel, 6 * in_channel, 7 * in_channel]
31 |
32 | self.binary_encoder = UBlockDecoder(
33 | block_channels, norm_fn, block_reps, block, indice_key_id=1, detach=model_cfg.get('DETACH', True)
34 | )
35 |
36 | self.binary_classifier = spconv.SparseSequential(
37 | norm_fn(in_channel),
38 | nn.ReLU(),
39 | nn.Linear(in_channel, 1)
40 | )
41 | self.forward_ret_dict = {}
42 | self.binary_loss_func = nn.BCEWithLogitsLoss()
43 |
44 | self.apply(self.set_bn_init)
45 |
46 | @staticmethod
47 | def set_bn_init(m):
48 | classname = m.__class__.__name__
49 | if classname.find('BatchNorm') != -1:
50 | m.weight.data.fill_(1.0)
51 | m.bias.data.fill_(0.0)
52 |
53 | def forward(self, batch_dict):
54 | self.forward_ret_dict = {}
55 | binary_scores = self.binary_encoder(self.binary_feat_input)
56 | binary_scores = self.binary_classifier(binary_scores).features
57 |
58 | if self.training and self.model_cfg.get('VOXEL_LOSS', None):
59 | pass
60 | else:
61 | binary_scores = binary_scores[batch_dict['v2p_map'].long()]
62 |
63 | if not self.training and batch_dict['test_x4_split']:
64 | binary_scores = common_utils.merge_4_parts(binary_scores)
65 |
66 | binary_preds = (torch.sigmoid(binary_scores) > self.binary_thresh).long()
67 |
68 | self.binary_feat_input = []
69 | self.forward_ret_dict['binary_scores'] = binary_scores
70 | self.forward_ret_dict['binary_preds'] = binary_preds
71 | if self.training:
72 | self.forward_ret_dict['binary_labels'] = batch_dict['binary_labels']
73 |
74 | batch_dict['binary_ret_dict'] = self.forward_ret_dict
75 | return batch_dict
76 |
77 | def register_hook_for_binary_head(self, backbone):
78 | def get_features():
79 | def hook(model, input, output):
80 | self.binary_feat_input.append(output)
81 | return hook
82 |
83 | for module_name in self.model_cfg.HOOK_FEATURE_LIST:
84 | eval('backbone.' + module_name).register_forward_hook(get_features())
85 |
86 | def get_loss(self):
87 | binary_scores = self.forward_ret_dict['binary_scores']
88 | binary_labels = self.forward_ret_dict['binary_labels']
89 |
90 | # filter unannotated categories
91 | mask = binary_labels != self.ignore_label
92 | binary_scores = binary_scores[mask]
93 | binary_labels = binary_labels[mask]
94 |
95 | binary_loss = self.binary_loss_func(binary_scores, binary_labels.reshape(-1, 1))
96 | binary_loss = binary_loss * self.model_cfg.get('LOSS_WEIGHT', 1.0)
97 |
98 | tb_dict = {'binary_loss': binary_loss.item()}
99 | return binary_loss, tb_dict
100 |
--------------------------------------------------------------------------------
/pcseg/models/head/linear_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from pcseg.config import cfg
5 |
6 |
7 | class LinearHead(nn.Module):
8 | def __init__(self, model_cfg, in_channel, ignore_label, num_class):
9 | super(LinearHead, self).__init__()
10 | self.model_cfg = model_cfg
11 | self.in_channel = in_channel
12 | self.ignore_label = ignore_label
13 | self.num_class = num_class
14 |
15 | self.cls_head = nn.Linear(self.in_channel, self.num_class)
16 |
17 | self.valid_class_idx = [i for i in range(self.num_class)]
18 | if hasattr(cfg.DATA_CONFIG, 'ignore_class_idx'):
19 | self.ignore_class_idx = cfg.DATA_CONFIG.ignore_class_idx
20 | for i in self.ignore_class_idx:
21 | self.valid_class_idx.remove(i)
22 |
23 | self.seg_loss_func = nn.CrossEntropyLoss(ignore_index=self.ignore_label).cuda()
24 | self.forward_ret_dict = {}
25 |
26 | def forward(self, batch_dict):
27 | self.forward_ret_dict = {}
28 | backbone3d_feats = batch_dict['backbone_3d_feats']
29 |
30 | semantic_scores = self.cls_head(backbone3d_feats)
31 | if self.training and self.model_cfg.get('VOXEL_LOSS', None):
32 | pass
33 | else:
34 | semantic_scores = semantic_scores[batch_dict['v2p_map']]
35 |
36 | semantic_scores = semantic_scores[..., self.valid_class_idx]
37 | semantic_preds = semantic_scores.max(1)[1]
38 |
39 | self.forward_ret_dict['seg_scores'] = semantic_scores
40 | self.forward_ret_dict['seg_preds'] = semantic_preds
41 |
42 | # save gt label to forward_ret_dict
43 | self.forward_ret_dict['seg_labels'] = batch_dict['labels']
44 |
45 | def get_loss(self):
46 | semantic_scores = self.forward_ret_dict['seg_scores']
47 | semantic_labels = self.forward_ret_dict['seg_labels']
48 |
49 | seg_loss = self.seg_loss_func(semantic_scores, semantic_labels)
50 |
51 | tb_dict = {'loss_seg': seg_loss.item()}
52 | return seg_loss, tb_dict
53 |
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 |
4 |
5 | def load_best_metric(ckpt_save_dir):
6 | best_metric, best_epoch = 0.0, -1
7 | best_metric_record_list = glob.glob(str(ckpt_save_dir / '*.txt'))
8 | if len(best_metric_record_list) > 0:
9 | best_metric_record_name = os.path.basename(best_metric_record_list[0])
10 | best_split_list = os.path.splitext(best_metric_record_name)[0].split('_')
11 | best_metric = float(best_split_list[2])
12 | best_epoch = int(best_split_list[-1])
13 | return best_metric, best_epoch
14 |
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/model_utils/basic_block_1d.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class MLP(nn.Sequential):
5 | def __init__(self, channels, norm_fn=None, num_layers=2, last_norm_fn=False, last_bias=True):
6 | assert len(channels) >= 2
7 | modules = []
8 | for i in range(num_layers - 1):
9 | modules.append(nn.Linear(channels[i], channels[i + 1]))
10 | if norm_fn:
11 | modules.append(norm_fn(channels[i + 1]))
12 | modules.append(nn.ReLU())
13 | modules.append(nn.Linear(channels[-2], channels[-1], bias=last_bias))
14 | if last_norm_fn:
15 | modules.append(norm_fn(channels[-1]))
16 | modules.append(nn.ReLU())
17 | return super().__init__(*modules)
18 |
19 | def init_weights(self):
20 | for m in self.modules():
21 | if isinstance(m, nn.Linear):
22 | nn.init.xavier_uniform_(m.weight)
23 | nn.init.constant_(m.bias, 0)
24 | if isinstance(self[-1], nn.Linear):
25 | nn.init.normal_(self[-1].weight, 0, 0.01)
26 | nn.init.constant_(self[-1].bias, 0)
27 |
28 |
29 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm1d, **kwargs):
30 | if name == 'BasicBlock1D':
31 | block = [
32 | nn.Linear(in_channels, out_channels),
33 | norm_layer(out_channels, eps=1e-3, momentum=0.01),
34 | act_fn()
35 | ]
36 | elif name == 'DeConv1dBlock':
37 | block = [
38 | nn.ConvTranspose1d(in_channels, out_channels, **kwargs),
39 | norm_layer(out_channels, eps=1e-3, momentum=0.01),
40 | act_fn()
41 | ]
42 | else:
43 | raise NotImplementedError
44 |
45 | return block
46 |
--------------------------------------------------------------------------------
/pcseg/models/model_utils/basic_block_2d.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class BasicBlock2D(nn.Module):
5 | def __init__(self, in_channels, out_channels, **kwargs):
6 | """
7 | Initializes convolutional block
8 | Args:
9 | in_channels: int, Number of input channels
10 | out_channels: int, Number of output channels
11 | **kwargs: Dict, Extra arguments for nn.Conv2d
12 | """
13 | super().__init__()
14 | self.in_channels = in_channels
15 | self.out_channels = out_channels
16 | self.conv = nn.Conv2d(in_channels=in_channels,
17 | out_channels=out_channels,
18 | **kwargs)
19 | self.bn = nn.BatchNorm2d(out_channels)
20 | self.relu = nn.ReLU(inplace=True)
21 |
22 | def forward(self, features):
23 | """
24 | Applies convolutional block
25 | Args:
26 | features: (B, C_in, H, W), Input features
27 | Returns:
28 | x: (B, C_out, H, W), Output features
29 | """
30 | x = self.conv(features)
31 | x = self.bn(x)
32 | x = self.relu(x)
33 | return x
34 |
35 |
36 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm2d, **kwargs):
37 | if name == 'BasicBlock2D':
38 | block = [
39 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, **kwargs),
40 | norm_layer(out_channels, eps=1e-3, momentum=0.01),
41 | act_fn()
42 | ]
43 | elif name == 'DeConv2dBlock':
44 | block = [
45 | nn.ConvTranspose2d(in_channels, out_channels, **kwargs),
46 | norm_layer(out_channels, eps=1e-3, momentum=0.01),
47 | act_fn()
48 | ]
49 | else:
50 | raise NotImplementedError
51 |
52 | return block
53 |
--------------------------------------------------------------------------------
/pcseg/models/model_utils/fp16.py:
--------------------------------------------------------------------------------
1 | # From https://github.com/thangvubk/SoftGroup/blob/11dcbfd74b7660a2b82ac6473af107849c7d545f/softgroup/util/fp16.py
2 | import functools
3 | from collections import abc
4 | from inspect import getfullargspec
5 |
6 | import spconv.pytorch as spconv
7 | import torch
8 |
9 |
10 | def cast_tensor_type(inputs, src_type, dst_type):
11 | if isinstance(inputs, torch.Tensor):
12 | return inputs.to(dst_type) if inputs.dtype == src_type else inputs
13 | elif isinstance(inputs, spconv.SparseConvTensor):
14 | if inputs.features.dtype == src_type:
15 | features = inputs.features.to(dst_type)
16 | inputs = inputs.replace_feature(features)
17 | return inputs
18 | elif isinstance(inputs, abc.Mapping):
19 | return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()})
20 | elif isinstance(inputs, abc.Iterable):
21 | return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 | else:
23 | return inputs
24 |
25 |
26 | def force_fp32(apply_to=None, out_fp16=False):
27 |
28 | def force_fp32_wrapper(old_func):
29 |
30 | @functools.wraps(old_func)
31 | def new_func(*args, **kwargs):
32 | if not isinstance(args[0], torch.nn.Module):
33 | raise TypeError('@force_fp32 can only be used to decorate the '
34 | 'method of nn.Module')
35 | # get the arg spec of the decorated method
36 | args_info = getfullargspec(old_func)
37 | # get the argument names to be casted
38 | args_to_cast = args_info.args if apply_to is None else apply_to
39 | # convert the args that need to be processed
40 | new_args = []
41 | if args:
42 | arg_names = args_info.args[:len(args)]
43 | for i, arg_name in enumerate(arg_names):
44 | if arg_name in args_to_cast:
45 | new_args.append(cast_tensor_type(args[i], torch.half, torch.float))
46 | else:
47 | new_args.append(args[i])
48 | # convert the kwargs that need to be processed
49 | new_kwargs = dict()
50 | if kwargs:
51 | for arg_name, arg_value in kwargs.items():
52 | if arg_name in args_to_cast:
53 | new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float)
54 | else:
55 | new_kwargs[arg_name] = arg_value
56 | with torch.cuda.amp.autocast(enabled=False):
57 | output = old_func(*new_args, **new_kwargs)
58 | # cast the results back to fp32 if necessary
59 | if out_fp16:
60 | output = cast_tensor_type(output, torch.float, torch.half)
61 | return output
62 |
63 | return new_func
64 |
65 | return force_fp32_wrapper
66 |
--------------------------------------------------------------------------------
/pcseg/models/model_utils/rle_utils.py:
--------------------------------------------------------------------------------
1 | # Modify from https://www.kaggle.com/paulorzp/run-length-encode-and-decode
2 | import numpy as np
3 |
4 |
5 | def rle_encode(mask):
6 | """Encode RLE (Run-length-encode) from 1D binary mask.
7 |
8 | Args:
9 | mask (np.ndarray): 1D binary mask
10 | Returns:
11 | rle (dict): encoded RLE
12 | """
13 | length = mask.shape[0]
14 | mask = np.concatenate([[0], mask, [0]])
15 | runs = np.where(mask[1:] != mask[:-1])[0] + 1
16 | runs[1::2] -= runs[::2]
17 | counts = ' '.join(str(x) for x in runs)
18 | rle = dict(length=length, counts=counts)
19 | return rle
20 |
21 |
22 | def rle_decode(rle):
23 | """Decode rle to get binary mask.
24 |
25 | Args:
26 | rle (dict): rle of encoded mask
27 | Returns:
28 | mask (np.ndarray): decoded mask
29 | """
30 | length = rle['length']
31 | counts = rle['counts']
32 | s = counts.split()
33 | starts, nums = [np.asarray(x, dtype=np.int32) for x in (s[0:][::2], s[1:][::2])]
34 | starts -= 1
35 | ends = starts + nums
36 | mask = np.zeros(length, dtype=np.uint8)
37 | for lo, hi in zip(starts, ends):
38 | mask[lo:hi] = 1
39 | return mask
40 |
--------------------------------------------------------------------------------
/pcseg/models/text_networks/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import torch
4 |
5 | from . import text_models
6 | from .prompt_template import template_meta
7 | from ...config import cfg
8 |
9 |
10 | def build_text_model(model_cfg):
11 | tokenizer, text_encoder = getattr(
12 | text_models, f'get_{model_cfg.NAME.lower()}_model'
13 | )(model_cfg.BACKBONE)
14 |
15 | text_encoder.tokenizer = tokenizer
16 | return text_encoder
17 |
18 |
19 | def load_text_embedding_from_path(text_emb_cfg):
20 | text_emb_path = os.path.join(cfg.DATA_CONFIG.DATA_PATH, text_emb_cfg.PATH)
21 | text_embedding = torch.load(text_emb_path, map_location=torch.device('cpu')).detach()
22 | if text_emb_cfg.get('NORM', True):
23 | text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
24 | print("=> loaded text embedding from path '{}'".format(text_emb_path))
25 | return text_embedding
26 |
27 |
28 | def is_bg_class(c):
29 | return (c.lower() == 'wall') or (c.lower() == 'floor') or (c.lower() == 'ceiling') or (c.lower() =='otherfurniture')
30 |
31 |
32 | def build_text_token_from_class_names(model_cfg, class_names):
33 | if model_cfg.TEMPLATE == 'lseg': # only instance classes are encoded with prompt
34 | return [template_meta[model_cfg.TEMPLATE][0].format(c) if not is_bg_class(c) else c for c in class_names]
35 | else:
36 | return [template_meta[model_cfg.TEMPLATE][0].format(c) for c in class_names]
37 |
38 |
39 | def load_text_embedding_from_encoder(model_cfg, text_encoder, logger=logging.getLogger()):
40 | text_encoder.cuda()
41 | class_names = cfg.TEXT_ENCODER.CATEGORY_NAMES
42 | text = build_text_token_from_class_names(model_cfg, class_names)
43 |
44 | if model_cfg.NAME == 'CLIP':
45 | text_tokens = text_encoder.tokenizer(text).cuda()
46 | text_embedding = text_encoder.encode_text(text_tokens)
47 | elif model_cfg.NAME == 'BERT':
48 | text_tokens = text_encoder.tokenizer(text, return_tensors="pt", padding=True).to('cuda')
49 | text_embedding = text_encoder(**text_tokens).pooler_output
50 | else:
51 | raise NotImplementedError
52 |
53 | if model_cfg.get('NORM', True):
54 | text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
55 | logger.info("=> loaded text embedding from '{}'".format(model_cfg.NAME))
56 | return text_embedding.detach().cpu()
57 |
--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/text_networks/prompt_template.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # MIT License
3 | #
4 | # Copyright (c) 2021 OpenAI
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | # SOFTWARE.
23 | #
24 | # Modified by Jiarui Xu
25 | # -------------------------------------------------------------------------
26 |
27 | full_imagenet_templates = [
28 | 'a bad photo of a {}.',
29 | 'a photo of many {}.',
30 | 'a sculpture of a {}.',
31 | 'a photo of the hard to see {}.',
32 | 'a low resolution photo of the {}.',
33 | 'a rendering of a {}.',
34 | 'graffiti of a {}.',
35 | 'a bad photo of the {}.',
36 | 'a cropped photo of the {}.',
37 | 'a tattoo of a {}.',
38 | 'the embroidered {}.',
39 | 'a photo of a hard to see {}.',
40 | 'a bright photo of a {}.',
41 | 'a photo of a clean {}.',
42 | 'a photo of a dirty {}.',
43 | 'a dark photo of the {}.',
44 | 'a drawing of a {}.',
45 | 'a photo of my {}.',
46 | 'the plastic {}.',
47 | 'a photo of the cool {}.',
48 | 'a close-up photo of a {}.',
49 | 'a black and white photo of the {}.',
50 | 'a painting of the {}.',
51 | 'a painting of a {}.',
52 | 'a pixelated photo of the {}.',
53 | 'a sculpture of the {}.',
54 | 'a bright photo of the {}.',
55 | 'a cropped photo of a {}.',
56 | 'a plastic {}.',
57 | 'a photo of the dirty {}.',
58 | 'a jpeg corrupted photo of a {}.',
59 | 'a blurry photo of the {}.',
60 | 'a photo of the {}.',
61 | 'a good photo of the {}.',
62 | 'a rendering of the {}.',
63 | 'a {} in a video game.',
64 | 'a photo of one {}.',
65 | 'a doodle of a {}.',
66 | 'a close-up photo of the {}.',
67 | 'a photo of a {}.',
68 | 'the origami {}.',
69 | 'the {} in a video game.',
70 | 'a sketch of a {}.',
71 | 'a doodle of the {}.',
72 | 'a origami {}.',
73 | 'a low resolution photo of a {}.',
74 | 'the toy {}.',
75 | 'a rendition of the {}.',
76 | 'a photo of the clean {}.',
77 | 'a photo of a large {}.',
78 | 'a rendition of a {}.',
79 | 'a photo of a nice {}.',
80 | 'a photo of a weird {}.',
81 | 'a blurry photo of a {}.',
82 | 'a cartoon {}.',
83 | 'art of a {}.',
84 | 'a sketch of the {}.',
85 | 'a embroidered {}.',
86 | 'a pixelated photo of a {}.',
87 | 'itap of the {}.',
88 | 'a jpeg corrupted photo of the {}.',
89 | 'a good photo of a {}.',
90 | 'a plushie {}.',
91 | 'a photo of the nice {}.',
92 | 'a photo of the small {}.',
93 | 'a photo of the weird {}.',
94 | 'the cartoon {}.',
95 | 'art of the {}.',
96 | 'a drawing of the {}.',
97 | 'a photo of the large {}.',
98 | 'a black and white photo of a {}.',
99 | 'the plushie {}.',
100 | 'a dark photo of a {}.',
101 | 'itap of a {}.',
102 | 'graffiti of the {}.',
103 | 'a toy {}.',
104 | 'itap of my {}.',
105 | 'a photo of a cool {}.',
106 | 'a photo of a small {}.',
107 | 'a tattoo of the {}.',
108 | ]
109 |
110 | sub_imagenet_template = [
111 | 'itap of a {}.', 'a bad photo of a {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.',
112 | 'art of the {}.', 'a photo of the small {}.'
113 | ]
114 |
115 | simple_imagenet_template = [
116 | 'a photo of a {}.',
117 | ]
118 |
119 | identity_template = [
120 | '{}',
121 | ]
122 |
123 | lseg_template = [
124 | 'a {} in a scene',
125 | ]
126 |
127 | template_meta = {
128 | 'full': full_imagenet_templates,
129 | 'subset': sub_imagenet_template,
130 | 'simple': simple_imagenet_template,
131 | 'identity': identity_template,
132 | 'lseg': lseg_template,
133 | }
134 |
--------------------------------------------------------------------------------
/pcseg/models/text_networks/text_models.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 | from clip import clip
5 |
6 | from ...config import cfg
7 | from ...utils import commu_utils
8 |
9 |
10 | def get_clip_model(backbone_name):
11 | url = clip._MODELS[backbone_name]
12 | if cfg.LOCAL_RANK == 0: # only download once at master node
13 | model_path = clip._download(url, os.path.expanduser("~/.cache/clip"))
14 | else:
15 | model_path = _return_clip_path(url, os.path.expanduser("~/.cache/clip"))
16 | commu_utils.synchronize()
17 |
18 | try:
19 | # loading JIT archive
20 | model = torch.jit.load(model_path, map_location="cpu").eval()
21 | state_dict = model.state_dict()
22 | except RuntimeError:
23 | state_dict = torch.load(model_path, map_location="cpu")
24 |
25 | model = clip.build_model(state_dict)
26 | return clip.tokenize, model
27 |
28 |
29 | def get_bert_model(name):
30 | from transformers import AutoTokenizer, AutoModel
31 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
32 | tokenizer = AutoTokenizer.from_pretrained(name, local_files_only=True)
33 | model = AutoModel.from_pretrained(name, local_files_only=True)
34 | return tokenizer, model
35 |
36 |
37 | def _return_clip_path(url: str, root: str):
38 | filename = os.path.basename(url)
39 | download_target = os.path.join(root, filename)
40 | return download_target
41 |
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .spconv_unet_indoor import SparseUNetIndoor
2 |
3 | __all__ = {
4 | 'SparseUNetIndoor': SparseUNetIndoor
5 | }
6 |
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/spconv_unet_indoor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import functools
3 | import torch.nn as nn
4 |
5 | from ...utils.spconv_utils import spconv
6 | from ..model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlock
7 | from ...utils import common_utils
8 |
9 |
10 | class SparseUNetIndoor(nn.Module):
11 | def __init__(self, model_cfg):
12 | super(SparseUNetIndoor, self).__init__()
13 | norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1)
14 |
15 | self.model_cfg = model_cfg
16 | self.in_channel = model_cfg.IN_CHANNEL
17 | self.mid_channel = model_cfg.MID_CHANNEL
18 | self.block_reps = model_cfg.BLOCK_REPS
19 | self.block_residual = model_cfg.BLOCK_RESIDUAL
20 | self.num_blocks = model_cfg.get('NUM_BLOCKS', None)
21 | self.num_filters = model_cfg.get('NUM_FILTERS', None)
22 |
23 | if self.block_residual:
24 | block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False))
25 | else:
26 | block = VGGBlock
27 |
28 | self.input_conv = spconv.SparseSequential(
29 | spconv.SubMConv3d(
30 | self.in_channel, self.mid_channel, kernel_size=3, padding=1, bias=False, indice_key='subm1'
31 | )
32 | )
33 |
34 | if self.num_filters is not None:
35 | block_channels = self.num_filters
36 | else:
37 | assert self.num_blocks is not None
38 | block_channels = [self.mid_channel * (i + 1) for i in range(self.num_blocks)]
39 |
40 | self.unet = UBlock(block_channels, norm_fn, self.block_reps, block, indice_key_id=1)
41 | self.output_layer = spconv.SparseSequential(
42 | norm_fn(self.mid_channel), nn.ReLU()
43 | )
44 |
45 | # init parameters
46 | self.apply(self.set_bn_init)
47 |
48 | @staticmethod
49 | def set_bn_init(m):
50 | classname = m.__class__.__name__
51 | if classname.find('BatchNorm') != -1:
52 | m.weight.data.fill_(1.0)
53 | m.bias.data.fill_(0.0)
54 |
55 | def forward(self, batch_dict):
56 | input_sp_tensor = spconv.SparseConvTensor(
57 | batch_dict['voxel_features'], batch_dict['voxel_coords'].int(),
58 | batch_dict['spatial_shape'], batch_dict['batch_size']
59 | )
60 | output = self.input_conv(input_sp_tensor)
61 | output = self.unet(output)
62 | output = self.output_layer(output)
63 | output_feats = output.features
64 | # if not self.training and batch_dict['test_x4_split']:
65 | # output_feats = common_utils.merge_4_parts(output_feats)
66 |
67 | batch_dict['backbone_3d_feats'] = output_feats
68 | return batch_dict
69 |
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__init__.py:
--------------------------------------------------------------------------------
1 | from .vfe_template import VFETemplate
2 | from .indoor_vfe import IndoorVFE
3 |
4 |
5 | __all__ = {
6 | 'VFETemplate': VFETemplate,
7 | 'IndoorVFE': IndoorVFE
8 | }
9 |
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/indoor_vfe.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .vfe_template import VFETemplate
4 | from ....external_libs.softgroup_ops.ops import functions as sg_ops
5 |
6 |
7 | class IndoorVFE(VFETemplate):
8 | def __init__(self, model_cfg, voxel_mode, **kwargs):
9 | super(IndoorVFE, self).__init__(model_cfg)
10 | self.use_xyz = model_cfg.get('USE_XYZ', False)
11 | self.voxel_mode = voxel_mode
12 |
13 | def forward(self, batch):
14 | batch_size = batch['batch_size']
15 | # voxelization
16 | # current implementation cannot support cuda
17 | # TODO: modify the voxelization part
18 | voxel_coords, v2p_map, p2v_map = sg_ops.voxelization_idx(
19 | batch['points_xyz_voxel_scale'].cpu(), batch_size, self.voxel_mode
20 | )
21 | voxel_coords, v2p_map, p2v_map = voxel_coords.cuda(), v2p_map.cuda(), p2v_map.cuda()
22 |
23 | feats = batch['feats'] # (N, C), float32, cuda
24 |
25 | voxel_feats = sg_ops.voxelization(feats, p2v_map, self.voxel_mode)
26 |
27 | batch.update({
28 | 'voxel_features': voxel_feats,
29 | 'v2p_map': v2p_map.long(),
30 | 'voxel_coords': voxel_coords
31 | })
32 |
33 | return batch
34 |
--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/vfe_template.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class VFETemplate(nn.Module):
5 | def __init__(self, model_cfg, **kwargs):
6 | super().__init__()
7 | self.model_cfg = model_cfg
8 |
9 | def get_output_feature_dim(self):
10 | raise NotImplementedError
11 |
12 | def forward(self, **kwargs):
13 | """
14 | Args:
15 | **kwargs:
16 |
17 | Returns:
18 | batch_dict:
19 | ...
20 | vfe_features: (num_voxels, C)
21 | """
22 | raise NotImplementedError
23 |
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__init__.py:
--------------------------------------------------------------------------------
1 | from .network_template import ModelTemplate
2 | from .sparseunet_textseg import SparseUNetTextSeg
3 |
4 | __all__ = {
5 | 'ModelTemplate': ModelTemplate,
6 | 'SparseUNetTextSeg': SparseUNetTextSeg
7 | }
8 |
9 |
10 | def build_model(model_cfg, num_class, dataset):
11 | model = __all__[model_cfg.NAME](
12 | model_cfg=model_cfg, num_class=num_class, dataset=dataset
13 | )
14 |
15 | return model
16 |
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/sparseunet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/sparseunet.py
--------------------------------------------------------------------------------
/pcseg/models/vision_networks/sparseunet_textseg.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from .network_template import ModelTemplate
3 |
4 |
5 | class SparseUNetTextSeg(ModelTemplate):
6 | def __init__(self, model_cfg, num_class, dataset):
7 | super().__init__(model_cfg, num_class, dataset)
8 | if model_cfg.get('BINARY_HEAD', None):
9 | self.binary_head.register_hook_for_binary_head(self.backbone_3d)
10 |
11 | def forward(self, batch_dict):
12 | batch_dict['test_x4_split'] = self.test_x4_split
13 | # Order: vfe, backbone_3d, binary_head, seg_head, caption_head
14 | for cur_module in self.module_list:
15 | batch_dict = cur_module(batch_dict)
16 |
17 | ret_dict = self.task_head.forward_ret_dict
18 | if self.training:
19 | loss, tb_dict, disp_dict = self.get_training_loss()
20 |
21 | ret_dict['loss'] = loss
22 | return ret_dict, tb_dict, disp_dict
23 | else:
24 | if hasattr(self, 'inst_head') and self.inst_head is not None:
25 | ret_dict.update(self.inst_head.forward_ret_dict)
26 | return ret_dict
27 |
28 | def get_training_loss(self):
29 | disp_dict = {}
30 | tb_dict = {}
31 |
32 | # for segmentation loss
33 | if not self.task_head.eval_only:
34 | seg_loss, tb_dict_seg = self.task_head.get_loss()
35 | tb_dict.update(tb_dict_seg)
36 | else:
37 | seg_loss = 0
38 |
39 | # for binary loss
40 | if self.binary_head is not None:
41 | binary_loss, tb_dict_binary = self.binary_head.get_loss()
42 | tb_dict.update(tb_dict_binary)
43 | else:
44 | binary_loss = 0
45 |
46 | # for caption loss
47 | if self.caption_head is not None:
48 | caption_loss, tb_dict_caption = self.caption_head.get_loss()
49 | tb_dict.update(tb_dict_caption)
50 | else:
51 | caption_loss = 0
52 |
53 | # for inst loss
54 | if self.inst_head is not None:
55 | inst_loss, tb_dict_inst = self.inst_head.get_loss()
56 | tb_dict.update(tb_dict_inst)
57 | else:
58 | inst_loss = 0
59 |
60 | loss = seg_loss + binary_loss + caption_loss + inst_loss
61 | tb_dict['loss'] = loss.item()
62 | disp_dict.update(tb_dict)
63 |
64 | return loss, tb_dict, disp_dict
65 |
--------------------------------------------------------------------------------
/pcseg/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/utils/__init__.py
--------------------------------------------------------------------------------
/pcseg/utils/arnold_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | class ArnoldUtils():
5 | def __init__(self, enabled, arnold_dir, logger) -> None:
6 | self.enabled = enabled
7 | self.logger = logger
8 | self.dir = arnold_dir
9 |
10 | def save_ckpt(self, ckpt_path, last_epoch=False):
11 | if self.enabled:
12 | ckpt_dir, file_name = os.path.split(ckpt_path)
13 | # import ipdb; ipdb.set_trace(context=10)
14 | _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:]
15 | os.system('hdfs dfs -mkdir -p hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir, _ckpt_dir))
16 | if last_epoch:
17 | tgt_path = os.path.join(self.dir, _ckpt_dir, 'last_train.pth')
18 | else:
19 | tgt_path = os.path.join(self.dir, _ckpt_dir, file_name)
20 | os.system('hdfs dfs -put -f {} hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(ckpt_path, tgt_path))
21 | self.logger.info('Put model to hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(tgt_path))
22 |
23 | def load_ckpt(self, ckpt_dir):
24 | if self.enabled:
25 | try:
26 | _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:]
27 | os.system('hdfs dfs -get hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}/* {}'.format(self.dir, _ckpt_dir, ckpt_dir))
28 | self.logger.info('Get model from hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir,_ckpt_dir))
29 | except:
30 | pass
31 |
--------------------------------------------------------------------------------
/pcseg/utils/caption_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import torch
4 | import numpy as np
5 |
6 | from . import commu_utils
7 | from ..config import cfg
8 |
9 |
10 | def get_caption_batch(caption_cfg, text_cfg, batch_dict, text_encoder):
11 | caption_infos = {}
12 | caption_data = batch_dict['caption_data']
13 |
14 | num_captions = 0
15 | for key in caption_cfg:
16 | if key in caption_cfg['KEY'] and caption_cfg[key].ENABLED:
17 | caption, idx = caption_data[key.lower()]['caption'], caption_data[key.lower()]['idx']
18 | num_captions += len(caption)
19 |
20 | # caption_embed: (K, 512), caption_idx: (N), (N > K)
21 | caption_embed, caption_idx = extract_caption_embed(caption, caption_cfg[key], text_cfg, text_encoder, cfg.LOCAL_RANK)
22 | normed_caption_embed = torch.nn.functional.normalize(caption_embed, dim=-1)
23 |
24 | caption_infos['caption_{}'.format(key.lower())] = {
25 | 'caption_embed': normed_caption_embed, 'caption_idx': caption_idx, 'select_image_corr': idx
26 | }
27 |
28 | batch_dict['caption_infos'] = caption_infos
29 | batch_dict['num_caption'] = num_captions / batch_dict['batch_size']
30 | return batch_dict
31 |
32 |
33 | def extract_caption_embed(image_captions, caption_cfg, text_cfg, text_encoder, rank):
34 | # (B*K, 512)
35 |
36 | if caption_cfg.get('GATHER_CAPTION', True):
37 | image_captions_list = commu_utils.all_gather(image_captions)
38 | image_captions_all = [jj for ii in image_captions_list for jj in ii]
39 | num_caption_list = [len(ii) for ii in image_captions_list]
40 | else:
41 | image_captions_all = image_captions
42 | num_caption_list = [0] * 100
43 | num_caption_list[rank] = len(image_captions_all)
44 | caption_embed_all = forward_text_encoder(image_captions_all, text_encoder)
45 |
46 | # remove duplicate captions and re-index them
47 | if text_cfg.get('REMOVE_DUPLICATE_CAPTIONS', True):
48 | num_caption_list = torch.LongTensor([0] + num_caption_list).cuda()
49 | idx = torch.arange(num_caption_list[rank + 1]).long().cuda() + torch.cumsum(num_caption_list, 0)[rank]
50 | caption_embeds, unique_indices = torch.unique(caption_embed_all, dim=0, return_inverse=True)
51 | caption_idx = unique_indices[idx]
52 | else:
53 | caption_embeds = caption_embed_all
54 | caption_idx = torch.arange(caption_embed_all.shape[0]).long().cuda()
55 |
56 | return caption_embeds, caption_idx
57 |
58 |
59 | def forward_text_encoder(image_captions, text_encoder):
60 | with torch.no_grad():
61 | if len(image_captions) > 0:
62 | if cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'CLIP':
63 | text_tokens = text_encoder.tokenizer(image_captions, truncate=True).cuda()
64 | text_embed = text_encoder.encode_text(text_tokens).float()
65 | elif cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'Bert':
66 | text_tokens = text_encoder.tokenizer(image_captions, return_tensors="pt", padding=True).to('cuda')
67 | text_embed = text_encoder(**text_tokens).pooler_output
68 | else:
69 | raise NotImplementedError
70 | else:
71 | text_embed = torch.zeros((0, cfg.MODEL.TASK_HEAD.TEXT_EMBED.CHANNEL), dtype=torch.float32).cuda()
72 | return text_embed
73 |
74 |
75 | def select_images(caption_cfg, image_name, image_corr):
76 | """
77 | TODO: put this part into dataset
78 | Select part of images for training
79 | """
80 | batch_size = len(image_name)
81 | if caption_cfg.get('SAMPLE', 1) > 1:
82 | random_start = np.random.randint(caption_cfg.SAMPLE)
83 | image_name = [(np.array(image_name[i])[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)]
84 | image_corr = [(np.array(image_corr[i], dtype=object)[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)]
85 | if caption_cfg.SELECT == 'ratio' and caption_cfg.RATIO == 1.0:
86 | return image_name, image_corr
87 |
88 | selected_image_name = []
89 | selected_image_corr = []
90 |
91 | for i in range(batch_size):
92 | if image_name[i] is None or len(image_name[i]) == 0: # lack 2d data
93 | selected_image_name.append([])
94 | selected_image_corr.append([])
95 | selected_idx = None
96 | elif caption_cfg.SELECT == 'fixed':
97 | # view-level caotion: random select fixed number
98 | num = int(caption_cfg.NUM)
99 | selected_idx = np.random.choice(len(image_name[i]), min(num, len(image_name[i])), replace=False)
100 | elif caption_cfg.SELECT == 'ratio':
101 | # sequence slicing
102 | ratio = caption_cfg.RATIO
103 | selected_idx = np.random.choice(len(image_name[i]), max(1, int(len(image_name[i]) * ratio)), replace=False)
104 | elif caption_cfg.SELECT == 'hybrid':
105 | num = max(int(caption_cfg.NUM), int(len(image_name[i]) * caption_cfg.RATIO))
106 | selected_idx = np.random.choice(len(image_name[i]), min(max(1, num), len(image_name[i])), replace=False)
107 | else:
108 | raise NotImplementedError
109 |
110 | if selected_idx is not None:
111 | selected_image_name.append(np.array(image_name[i])[selected_idx].tolist())
112 | selected_image_corr.append(
113 | np.array(image_corr[i], dtype=object)[selected_idx].tolist()
114 | )
115 |
116 | return selected_image_name, selected_image_corr
117 |
118 |
--------------------------------------------------------------------------------
/pcseg/utils/loss_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class CosineSimilarityLoss(nn.Module):
7 | def __init__(self) -> None:
8 | super().__init__()
9 |
10 | def forward(self, input, target, mask):
11 | selected_input = input[mask]
12 | cos_similarity = nn.functional.cosine_similarity(selected_input, target).mean()
13 | return 1 - cos_similarity
14 |
15 |
16 | class BYOLLoss(nn.Module):
17 | def __init__(self) -> None:
18 | super().__init__()
19 |
20 | def forward(self, input, target):
21 | loss = 2 - 2 * (input * target).sum(dim=-1)
22 | return loss.mean()
23 |
--------------------------------------------------------------------------------
/pcseg/utils/metric_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def get_open_vocab_metric(metric_class, base_class_idx, novel_class_idx):
5 | if isinstance(metric_class, list):
6 | metric_class = np.array(metric_class)
7 | metric_base = np.mean(metric_class[base_class_idx])
8 | metric_novel = np.mean(metric_class[novel_class_idx])
9 | h_metric = 2 * metric_base * metric_novel / (metric_base + metric_novel + 10e-10)
10 | m_metric = (metric_base * len(base_class_idx) + metric_novel * len(novel_class_idx)) / (len(base_class_idx) + len(novel_class_idx))
11 | return h_metric, m_metric, metric_base, metric_novel
12 |
13 |
14 | def cal_ov_metrics(cfg, logger, class_names, iou_class, acc_class, binary_acc_class):
15 | base_class_idx = cfg.DATA_CONFIG.base_class_idx
16 | novel_class_idx = cfg.DATA_CONFIG.novel_class_idx
17 | if cfg.DATA_CONFIG.get('trainonly_class_idx', None):
18 | trainonly_class_idx = cfg.DATA_CONFIG.trainonly_class_idx
19 | base_class_idx = [idx for idx in base_class_idx if idx not in trainonly_class_idx]
20 | novel_class_idx = [idx for idx in novel_class_idx if idx not in trainonly_class_idx]
21 |
22 | logger.info('----------- base class -----------')
23 | for i in base_class_idx:
24 | logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format(
25 | class_names[i], iou_class[i], acc_class[i], binary_acc_class[i])
26 | )
27 | logger.info('----------- novel class -----------')
28 | for i in novel_class_idx:
29 | logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format(
30 | class_names[i], iou_class[i], acc_class[i], binary_acc_class[i])
31 | )
32 | hiou, miou, iou_base, iou_novel = get_open_vocab_metric(
33 | iou_class, base_class_idx, novel_class_idx
34 | )
35 | hacc, macc, acc_base, acc_novel = get_open_vocab_metric(
36 | acc_class, base_class_idx, novel_class_idx
37 | )
38 | return hiou, miou, iou_base, iou_novel, hacc, macc, acc_base, acc_novel
39 |
--------------------------------------------------------------------------------
/pcseg/utils/spconv_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Set
2 |
3 | try:
4 | import spconv.pytorch as spconv
5 | except:
6 | import spconv as spconv
7 |
8 | import torch.nn as nn
9 |
10 |
11 | def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]:
12 | """
13 | Finds all spconv keys that need to have weight's transposed
14 | """
15 | found_keys: Set[str] = set()
16 | for name, child in model.named_children():
17 | new_prefix = f"{prefix}.{name}" if prefix != "" else name
18 |
19 | if isinstance(child, spconv.conv.SparseConvolution):
20 | new_prefix = f"{new_prefix}.weight"
21 | found_keys.add(new_prefix)
22 |
23 | found_keys.update(find_all_spconv_keys(child, prefix=new_prefix))
24 |
25 | return found_keys
26 |
27 |
28 | def replace_feature(out, new_features):
29 | if "replace_feature" in out.__dir__():
30 | # spconv 2.x behaviour
31 | return out.replace_feature(new_features)
32 | else:
33 | out.features = new_features
34 | return out
35 |
--------------------------------------------------------------------------------
/pcseg/utils/voxelize_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Voxelization manner from Xin Lai
3 | """
4 |
5 | import numpy as np
6 |
7 |
8 | def ravel_hash_vec(arr):
9 | """
10 | Ravel the coordinates after subtracting the min coordinates.
11 | """
12 | assert arr.ndim == 2
13 | arr = arr.copy()
14 | arr -= arr.min(0)
15 | arr = arr.astype(np.uint64, copy=False)
16 | arr_max = arr.max(0).astype(np.uint64) + 1
17 |
18 | keys = np.zeros(arr.shape[0], dtype=np.uint64)
19 | # Fortran style indexing
20 | for j in range(arr.shape[1] - 1):
21 | keys += arr[:, j]
22 | keys *= arr_max[j + 1]
23 | keys += arr[:, -1]
24 | return keys
25 |
26 |
27 | def fnv_hash_vec(arr):
28 | """
29 | FNV64-1A
30 | """
31 | assert arr.ndim == 2
32 | # Floor first for negative coordinates
33 | arr = arr.copy()
34 | arr = arr.astype(np.uint64, copy=False)
35 | hashed_arr = np.uint64(14695981039346656037) * np.ones(arr.shape[0], dtype=np.uint64)
36 | for j in range(arr.shape[1]):
37 | hashed_arr *= np.uint64(1099511628211)
38 | hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j])
39 | return hashed_arr
40 |
41 |
42 | def voxelize_with_rec_idx(coord, voxel_size=0.05, hash_type='fnv', training=True):
43 | discrete_coord = np.floor(coord / np.array(voxel_size))
44 | if hash_type == 'ravel':
45 | key = ravel_hash_vec(discrete_coord)
46 | else:
47 | key = fnv_hash_vec(discrete_coord)
48 |
49 | idx_sort = np.argsort(key)
50 | key_sort = key[idx_sort]
51 | _, count = np.unique(key_sort, return_counts=True)
52 | if training:
53 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + np.random.randint(0, count.max(), count.size) % count
54 | else:
55 | idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1])
56 |
57 | idx_unique = idx_sort[idx_select]
58 | sorted_idx = np.zeros(key.shape[0]).astype(np.int)
59 | sorted_idx[idx_select] = 1
60 | sorted_idx = np.cumsum(sorted_idx) - 1
61 | idx_recon = np.zeros(key.shape[0]).astype(np.int)
62 | idx_recon[idx_sort] = sorted_idx
63 | return idx_unique, idx_recon
64 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | open3d
2 | numpy
3 | torch==1.8.1+cu111
4 | torchvision==0.9.1+cu111
5 | tensorboardX
6 | easydict
7 | pyyaml
8 | tqdm
9 | SharedArray
10 | scipy
11 | opencv-python
12 | plyfile
13 | matplotlib
14 | scikit-learn
15 | scikit-image
16 | pandas
17 | transformers
18 | clip @ git+https://github.com/openai/CLIP.git
19 | spconv-cu111
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | from setuptools import find_packages, setup
5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
6 |
7 |
8 | def get_git_commit_number():
9 | if not os.path.exists('.git'):
10 | return '0000000'
11 |
12 | cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
13 | git_commit_number = cmd_out.stdout.decode('utf-8')[:7]
14 | return git_commit_number
15 |
16 |
17 | def make_cuda_ext(name, module, sources):
18 | cuda_ext = CUDAExtension(
19 | name='%s.%s' % (module, name),
20 | sources=[os.path.join(*module.split('.'), src) for src in sources]
21 | )
22 | return cuda_ext
23 |
24 |
25 | def write_version_to_file(version, target_file):
26 | with open(target_file, 'w') as f:
27 | print('__version__ = "%s"' % version, file=f)
28 |
29 |
30 | if __name__ == '__main__':
31 | version = '0.1.0+%s' % get_git_commit_number()
32 | write_version_to_file(version, 'pcseg/version.py')
33 |
34 | setup(
35 | name='pcseg',
36 | version=version,
37 | description='PCSeg',
38 | install_requires=[
39 | 'numpy',
40 | 'tensorboardX',
41 | 'easydict',
42 | 'pyyaml',
43 | 'tqdm',
44 | 'SharedArray',
45 | # 'spconv', # spconv has different names depending on the cuda version
46 | ],
47 |
48 | author='Jihan Yang',
49 | author_email='jihanyang13@gmail.com',
50 | license='Apache License 2.0',
51 | packages=find_packages(exclude=['tools', 'data', 'output']),
52 | cmdclass={
53 | 'build_ext': BuildExtension,
54 | },
55 | ext_modules=[],
56 | )
57 |
--------------------------------------------------------------------------------
/tools/_init_path.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, '../')
--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/s3dis_dataset.yaml:
--------------------------------------------------------------------------------
1 | DATA_PATH: ../data/s3dis
2 | DATASET: S3DISDataset
3 |
4 | COLLATE_FN: collate_batch_indoor
5 | MIN_SPATIAL_SCALE: 128
6 |
7 | DATA_SPLIT:
8 | train: train
9 | test: val
10 | data_suffix: .npy
11 | test_area: 5
12 |
13 | IGNORE_LABEL: -100
14 |
15 | DATA_AUG:
16 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
17 | scene_aug:
18 | scaling_scene:
19 | enabled: False
20 | p: 1.0
21 | value: [0.9, 1.1]
22 |
23 | rotation:
24 | p: 1.0
25 | value: [0.0, 0.0, 1.0]
26 |
27 | jitter: True
28 | color_jitter: True
29 |
30 | flip:
31 | p: 0.5
32 |
33 | random_jitter:
34 | enabled: False
35 | value: 0.01
36 | accord_to_size: False
37 | p: 1.0
38 |
39 | elastic:
40 | enabled: True
41 | value: [[6, 40], [20, 160]]
42 | apply_to_feat: False
43 | p: 1.0
44 |
45 | crop:
46 | step: 64
47 |
48 | shuffle: True
49 |
50 | DATA_PROCESSOR:
51 | repeat: 20
52 | rgb_norm: True
53 | point_range: 200000000
54 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m)
55 | cache: True
56 | max_npoint: 250000
57 | full_scale: [128, 512]
58 | voxel_mode: 4
59 | xyz_norm: False
60 | x4_split: True
61 | downsampling_scale: 4
62 | xyz_as_feat: True
63 | rgb_as_feat: True
64 |
65 | PROCESS_LIST: []
66 |
--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/s3dis_dataset_image.yaml:
--------------------------------------------------------------------------------
1 | DATA_PATH: ../data/s3dis
2 | DATASET: S3DISDataset
3 |
4 | COLLATE_FN: collate_batch_indoor
5 | MIN_SPATIAL_SCALE: 128
6 |
7 | DATA_SPLIT:
8 | train: train
9 | test: val
10 | data_suffix: .npy
11 | test_area: 5
12 |
13 | IGNORE_LABEL: -100
14 |
15 | DATA_AUG:
16 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
17 | scene_aug:
18 | scaling_scene:
19 | enabled: False
20 | p: 1.0
21 | value: [0.9, 1.1]
22 |
23 | rotation:
24 | p: 1.0
25 | value: [0.0, 0.0, 1.0]
26 |
27 | jitter: True
28 | color_jitter: True
29 |
30 | flip:
31 | p: 0.5
32 |
33 | random_jitter:
34 | enabled: False
35 | value: 0.01
36 | accord_to_size: False
37 | p: 1.0
38 |
39 | elastic:
40 | enabled: True
41 | value: [[6, 40], [20, 160]]
42 | apply_to_feat: False
43 | p: 1.0
44 |
45 | crop:
46 | step: 64
47 |
48 | shuffle: True
49 |
50 | DATA_PROCESSOR:
51 | repeat: 20
52 | rgb_norm: True
53 | point_range: 200000000
54 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m)
55 | cache: False
56 | max_npoint: 250000
57 | full_scale: [128, 512]
58 | voxel_mode: 4
59 | xyz_norm: False
60 | x4_split: True
61 | downsampling_scale: 4
62 | xyz_as_feat: True
63 | rgb_as_feat: True
64 |
65 | PROCESS_LIST: []
66 |
67 |
68 | IMAGE_PATH: s3dis_2d
69 | DEPTH_IMAGE_SCALE: [1080, 1080]
70 | LOAD_IMAGE: True
71 | MERGE_IDX: True
72 |
--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/scannet_dataset.yaml:
--------------------------------------------------------------------------------
1 | DATA_PATH: ../data/scannetv2
2 | DATASET: ScanNetDataset
3 |
4 | COLLATE_FN: collate_batch_indoor
5 | MIN_SPATIAL_SCALE: 128
6 |
7 | DATA_SPLIT:
8 | train: train
9 | test: val
10 | data_suffix: .pth
11 |
12 | IGNORE_LABEL: -100
13 |
14 | DATA_AUG:
15 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
16 | scene_aug:
17 | scaling_scene:
18 | enabled: False
19 | p: 1.0
20 | value: [0.9, 1.1]
21 |
22 | rotation:
23 | p: 1.0
24 | value: [0.0, 0.0, 1.0]
25 |
26 | jitter: True
27 | color_jitter: True
28 |
29 | flip:
30 | p: 0.5
31 |
32 | random_jitter:
33 | enabled: False
34 | value: 0.01
35 | accord_to_size: False
36 | p: 1.0
37 |
38 | elastic:
39 | enabled: True
40 | value: [[6, 40], [20, 160]]
41 | apply_to_feat: False
42 | p: 1.0
43 |
44 | crop:
45 | step: 32
46 |
47 | shuffle: True
48 |
49 | DATA_PROCESSOR:
50 | repeat: 4
51 | rgb_norm: True
52 | point_range: 200000000
53 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m)
54 | cache: True
55 | max_npoint: 250000
56 | full_scale: [128, 512]
57 | voxel_mode: 4
58 | xyz_norm: False
59 | xyz_as_feat: True
60 | rgb_as_feat: True
61 |
62 | PROCESS_LIST: []
63 |
--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/scannet_dataset_image.yaml:
--------------------------------------------------------------------------------
1 | DATA_PATH: ../data/scannetv2
2 | DATASET: ScanNetDataset
3 |
4 | COLLATE_FN: collate_batch_indoor
5 | MIN_SPATIAL_SCALE: 128
6 |
7 | DATA_SPLIT:
8 | train: train
9 | test: val
10 | data_suffix: .pth
11 |
12 | IGNORE_LABEL: -100
13 |
14 | DATA_AUG:
15 | AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
16 | scene_aug:
17 | scaling_scene:
18 | enabled: False
19 | p: 1.0
20 | value: [0.9, 1.1]
21 |
22 | rotation:
23 | p: 1.0
24 | value: [0.0, 0.0, 1.0]
25 |
26 | jitter: True
27 | color_jitter: True
28 |
29 | flip:
30 | p: 0.5
31 |
32 | random_jitter:
33 | enabled: False
34 | value: 0.01
35 | accord_to_size: False
36 | p: 1.0
37 |
38 | elastic:
39 | enabled: True
40 | value: [[6, 40], [20, 160]]
41 | apply_to_feat: False
42 | p: 1.0
43 |
44 | crop:
45 | step: 32
46 |
47 | shuffle: True
48 |
49 | DATA_PROCESSOR:
50 | repeat: 4
51 | rgb_norm: True
52 | point_range: 200000000
53 | voxel_scale: 50 # voxel_size = 1 / scale, scale 25(0.02m)
54 | cache: True
55 | max_npoint: 250000
56 | full_scale: [128, 512]
57 | voxel_mode: 4
58 | xyz_norm: False
59 | xyz_as_feat: True
60 | rgb_as_feat: True
61 |
62 | PROCESS_LIST: []
63 |
64 |
65 | IMAGE_PATH: scannet_frames_25k
66 | DEPTH_IMAGE_SCALE: [480, 640]
67 | LOAD_IMAGE: True
68 | MERGE_IDX: True
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml:
--------------------------------------------------------------------------------
1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter]
2 |
3 | DATA_CONFIG:
4 | _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml
5 | DATASET: S3DISInstDataset
6 | inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ]
7 | sem2ins_classes: [ 0, 1 ]
8 | inst_label_shift: 0
9 |
10 | ignore_class_idx: [12]
11 |
12 | MODEL:
13 | NAME: SparseUNetTextSeg
14 | REMAP_FROM_3DLANG: False
15 |
16 | VFE:
17 | NAME: IndoorVFE
18 | USE_XYZ: True
19 |
20 | BACKBONE_3D:
21 | NAME: SparseUNetIndoor
22 | IN_CHANNEL: 6
23 | MID_CHANNEL: 16
24 | BLOCK_RESIDUAL: True
25 | BLOCK_REPS: 2
26 | NUM_BLOCKS: 7
27 | CUSTOM_SP1X1: True
28 |
29 | ADAPTER:
30 | NAME: VLAdapter
31 | EVAL_ONLY: False
32 | NUM_ADAPTER_LAYERS: 2
33 | TEXT_DIM: -1
34 | LAST_NORM: True
35 |
36 | TASK_HEAD:
37 | NAME: TextSegHead
38 | FEAT_NORM: False
39 |
40 | LAST_NORM: True
41 | TEXT_EMBED:
42 | NAME: CLIP
43 | NORM: True
44 | PATH: text_embed/s3dis_clip-ViT-B16_id.pth
45 | FEAT_NORM: False
46 | LOGIT_SCALE:
47 | value: 1.0
48 | learnable: False
49 |
50 | INST_HEAD:
51 | NAME: InstHead
52 |
53 | BLOCK_RESIDUAL: True
54 | CUSTOM_SP1X1: True
55 |
56 | CLUSTERING:
57 | PREPARE_EPOCH: 20
58 | GROUPING_CFG:
59 | SCORE_THR: 0.2
60 | RADIUS: 0.04
61 | MEAN_ACTIVE: 300
62 | CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
63 | -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.]
64 | NPOINT_THR: 500 # absolute if class_numpoint == -1, relative if class_numpoint != -1
65 | IGNORE_CLASSES: [0, 1]
66 | INST_VOXEL_CFG:
67 | SCALE: 50
68 | SPATIAL_SHAPE: 20
69 | LOSS_CFG:
70 | MAX_PROPOSAL_NUM: 200
71 | POS_IOU_THR: 0.5
72 | TEST_CFG:
73 | # x4_split: False
74 | CLS_SCORE_THR: 0.001
75 | MASK_SCORE_THR: -0.5
76 | MIN_NPOINT: 100
77 |
78 | FIXED_MODULES: []
79 | SEMANTIC_ONLY: False
80 |
81 |
82 | TEXT_ENCODER:
83 | NAME: CLIP
84 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
85 | TEMPLATE: identity
86 | EXTRACT_EMBED: False # Online extract text embeding from class or not
87 | # BERT:
88 | # BACKBONE: bert-base-uncased
89 |
90 | OPTIMIZATION:
91 | TEST_BATCH_SIZE_PER_GPU: 1
92 | BATCH_SIZE_PER_GPU: 4
93 | NUM_EPOCHS: 64
94 | LR: 0.004 # 4e-3
95 | SCHEDULER: cos_after_step
96 | OPTIMIZER: adamw
97 | WEIGHT_DECAY: 0.0001
98 | MOMENTUM: 0.9
99 | STEP_EPOCH: 40
100 | MULTIPLIER: 0.1
101 | CLIP_GRAD: False
102 | PCT_START: 0.52
103 | DIV_FACTOR: 2
104 | MOMS: [0.95, 0.85]
105 | LR_CLIP: 0.000001
106 |
107 | OTHERS:
108 | PRINT_FREQ: 20
109 | SYNC_BN: False
110 | USE_AMP: True
111 | EVAL_FREQ: 5
112 | FIND_UNUSED_PARAMETERS: True
113 |
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_base6_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | base_class_idx: [0, 2, 3, 4, 8, 9]
5 | novel_class_idx: [1, 5, 6, 7, 10, 11]
6 | ignore_class_idx: [12]
7 |
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ]
6 | novel_class_idx: [ 5, 7, 9, 10 ]
7 | ignore_class_idx: [ 12 ]
8 |
9 | CAPTION_INFO:
10 |
11 | KEY: [SCENE, VIEW, ENTITY]
12 | SCENE:
13 | ENABLED: False
14 | CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json
15 | GATHER_CAPTION: True
16 |
17 | VIEW:
18 | ENABLED: True
19 | CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json
20 | IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx
21 | SELECT: ratio
22 | NUM: 1
23 | RATIO: 0.2
24 | GATHER_CAPTION: True
25 |
26 | ENTITY:
27 | ENABLED: True
28 | CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json
29 | IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx
30 | SELECT: ratio
31 | NUM: 1
32 | RATIO: 1.0
33 | GATHER_CAPTION: True
34 |
35 | CAPTION_CORR_PATH_IN_ONE_FILE: False
36 |
37 |
38 | MODEL:
39 |
40 | BINARY_HEAD:
41 | NAME: BinaryHead
42 | DETACH: True
43 | THRESH: 0.5
44 | CUSTOM_SP1X1: True
45 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
46 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
47 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
48 | 'unet.u.u.u.u.u.u.blocks.block1' ]
49 |
50 | TASK_HEAD:
51 | NAME: TextSegHead
52 | CORRECT_SEG_PRED_BINARY: True
53 |
54 | CAPTION_HEAD:
55 | NAME: CaptionHead
56 | FEAT_NORM: True
57 | LOGIT_SCALE:
58 | value: 100.0
59 | learnable: True
60 | LOSS_WEIGHT:
61 | SCENE: 0.0
62 | VIEW: 0.08
63 | ENTITY: 0.02
64 |
65 | INST_HEAD:
66 | CORRECT_SEG_PRED_BINARY: Tru
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_adamw.yaml:
--------------------------------------------------------------------------------
1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter]
2 |
3 | DATA_CONFIG:
4 | _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml
5 | ignore_class_idx: [12]
6 |
7 | MODEL:
8 | NAME: SparseUNetTextSeg
9 | REMAP_FROM_3DLANG: False
10 | REMAP_FROM_NOADAPTER: False
11 |
12 | VFE:
13 | NAME: IndoorVFE
14 | USE_XYZ: True
15 |
16 | BACKBONE_3D:
17 | NAME: SparseUNetIndoor
18 | IN_CHANNEL: 6
19 | MID_CHANNEL: 16
20 | BLOCK_RESIDUAL: True
21 | BLOCK_REPS: 2
22 | NUM_BLOCKS: 7
23 | CUSTOM_SP1X1: True
24 |
25 | ADAPTER:
26 | NAME: VLAdapter
27 | EVAL_ONLY: False
28 | NUM_ADAPTER_LAYERS: 2
29 | TEXT_DIM: -1
30 | LAST_NORM: True
31 | FEAT_NORM: False
32 |
33 | TASK_HEAD:
34 | NAME: TextSegHead
35 |
36 | TEXT_EMBED:
37 | NAME: CLIP
38 | NORM: True
39 | PATH: text_embed/s3dis_clip-ViT-B16_id.pth
40 |
41 | LOGIT_SCALE:
42 | value: 1.0
43 | learnable: False
44 |
45 | TEXT_ENCODER:
46 | NAME: CLIP
47 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
48 | TEMPLATE: identity
49 | EXTRACT_EMBED: False # Online extract text embeding from class or not
50 | # BERT:
51 | # BACKBONE: bert-base-uncased
52 |
53 | OPTIMIZATION:
54 | TEST_BATCH_SIZE_PER_GPU: 1
55 | BATCH_SIZE_PER_GPU: 4
56 | NUM_EPOCHS: 32
57 | LR: 0.004 # 4e-3
58 | SCHEDULER: cos_after_step
59 | OPTIMIZER: adamw
60 | WEIGHT_DECAY: 0.0001
61 | MOMENTUM: 0.9
62 | STEP_EPOCH: 20
63 | MULTIPLIER: 0.1
64 | CLIP_GRAD: False
65 | PCT_START: 0.39
66 | DIV_FACTOR: 1
67 | MOMS: [0.95, 0.85]
68 | LR_CLIP: 0.000001
69 |
70 | OTHERS:
71 | PRINT_FREQ: 20
72 | EVAL_FREQ: 5
73 | SYNC_BN: False
74 | USE_AMP: True
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_base6_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | base_class_idx: [ 0, 2, 3, 4, 8, 9 ]
5 | novel_class_idx: [ 1, 5, 6, 7, 10, 11 ]
6 | ignore_class_idx: [ 12 ]
7 |
--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ]
5 | novel_class_idx: [ 5, 7, 9, 10 ]
6 | ignore_class_idx: [ 12 ]
7 |
8 | CAPTION_INFO:
9 |
10 | KEY: [SCENE, VIEW, ENTITY]
11 | SCENE:
12 | ENABLED: False
13 | CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json
14 | GATHER_CAPTION: True
15 |
16 | VIEW:
17 | ENABLED: True
18 | CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json
19 | IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx
20 | SELECT: ratio
21 | NUM: 1
22 | RATIO: 0.2
23 | GATHER_CAPTION: True
24 |
25 | ENTITY:
26 | ENABLED: True
27 | CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json
28 | IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx
29 | SELECT: ratio
30 | NUM: 1
31 | RATIO: 1.0
32 | GATHER_CAPTION: True
33 |
34 | CAPTION_CORR_PATH_IN_ONE_FILE: False
35 |
36 |
37 | MODEL:
38 |
39 | BINARY_HEAD:
40 | NAME: BinaryHead
41 | DETACH: True
42 | THRESH: 0.5
43 | CUSTOM_SP1X1: True
44 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
45 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
46 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
47 | 'unet.u.u.u.u.u.u.blocks.block1' ]
48 |
49 | TASK_HEAD:
50 | NAME: TextSegHead
51 | CORRECT_SEG_PRED_BINARY: True
52 |
53 |
54 | CAPTION_HEAD:
55 | NAME: CaptionHead
56 | FEAT_NORM: True
57 | LOGIT_SCALE:
58 | value: 100.0
59 | learnable: True
60 | LOSS_WEIGHT:
61 | SCENE: 0.0
62 | VIEW: 0.08
63 | ENTITY: 0.02
64 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_adamw.yaml:
--------------------------------------------------------------------------------
1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk,
2 | curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture]
3 |
4 | DATA_CONFIG:
5 | _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml
6 | DATASET: ScanNetInstDataset
7 | inst_class_idx: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
8 | sem2ins_classes: []
9 | inst_label_shift: 2
10 |
11 | ignore_class_idx: [19]
12 |
13 | MODEL:
14 | NAME: SparseUNetTextSeg
15 | REMAP_FROM_3DLANG: False
16 |
17 | VFE:
18 | NAME: IndoorVFE
19 | USE_XYZ: True
20 |
21 | BACKBONE_3D:
22 | NAME: SparseUNetIndoor
23 | IN_CHANNEL: 6
24 | MID_CHANNEL: 16
25 | BLOCK_RESIDUAL: True
26 | BLOCK_REPS: 2
27 | NUM_BLOCKS: 7
28 | CUSTOM_SP1X1: True
29 |
30 | ADAPTER:
31 | NAME: VLAdapter
32 | EVAL_ONLY: False
33 | NUM_ADAPTER_LAYERS: 2
34 | TEXT_DIM: -1
35 | LAST_NORM: True
36 |
37 | TASK_HEAD:
38 | NAME: TextSegHead
39 | FEAT_NORM: False
40 |
41 | LAST_NORM: True
42 | TEXT_EMBED:
43 | NAME: CLIP
44 | NORM: True
45 | PATH: text_embed/scannet_clip-ViT-B16_id.pth
46 | FEAT_NORM: False
47 | LOGIT_SCALE:
48 | value: 1.0
49 | learnable: False
50 |
51 | INST_HEAD:
52 | NAME: InstHead
53 |
54 | BLOCK_RESIDUAL: True
55 | CUSTOM_SP1X1: True
56 |
57 | CLUSTERING:
58 | PREPARE_EPOCH: 32
59 | GROUPING_CFG:
60 | SCORE_THR: 0.2
61 | RADIUS: 0.04
62 | MEAN_ACTIVE: 300
63 | CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
64 | -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.]
65 | NPOINT_THR: 50 # absolute if class_numpoint == -1, relative if class_numpoint != -1
66 | IGNORE_CLASSES: [0, 1]
67 | INST_VOXEL_CFG:
68 | SCALE: 50
69 | SPATIAL_SHAPE: 20
70 | LOSS_CFG:
71 | MAX_PROPOSAL_NUM: 200
72 | POS_IOU_THR: 0.5
73 | TEST_CFG:
74 | # x4_split: False
75 | CLS_SCORE_THR: 0.001
76 | MASK_SCORE_THR: -0.5
77 | MIN_NPOINT: 100
78 |
79 | FIXED_MODULES: []
80 | SEMANTIC_ONLY: False
81 |
82 |
83 | TEXT_ENCODER:
84 | NAME: CLIP
85 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
86 | TEMPLATE: identity
87 | EXTRACT_EMBED: False # Online extract text embeding from class or not
88 | # BERT:
89 | # BACKBONE: bert-base-uncased
90 |
91 | OPTIMIZATION:
92 | TEST_BATCH_SIZE_PER_GPU: 1
93 | BATCH_SIZE_PER_GPU: 4
94 | NUM_EPOCHS: 150
95 | LR: 0.004 # 4e-3
96 | SCHEDULER: cos_after_step
97 | OPTIMIZER: adamw
98 | WEIGHT_DECAY: 0.0001
99 | MOMENTUM: 0.9
100 | STEP_EPOCH: 82
101 | MULTIPLIER: 0.1
102 | CLIP_GRAD: False
103 | PCT_START: 0.52
104 | DIV_FACTOR: 2
105 | MOMS: [0.95, 0.85]
106 | LR_CLIP: 0.000001
107 |
108 | OTHERS:
109 | PRINT_FREQ: 20
110 | SYNC_BN: False
111 | USE_AMP: True
112 | EVAL_FREQ: 10
113 | FIND_UNUSED_PARAMETERS: True
114 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base10_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | # TODO: make base + novel = all.
5 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
6 | base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ]
7 | novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ]
8 | ignore_class_idx: [ 19 ]
9 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ]
6 | novel_class_idx: [ 5, 9, 12, 16 ]
7 | ignore_class_idx: [ 19 ]
8 |
9 | CAPTION_INFO:
10 |
11 | KEY: [SCENE, VIEW, ENTITY]
12 |
13 | SCENE:
14 | ENABLED: False
15 | CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json
16 | GATHER_CAPTION: True
17 |
18 | VIEW:
19 | ENABLED: True
20 | CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json
21 | IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle
22 | SELECT: ratio
23 | NUM: 1
24 | RATIO: 0.5
25 | GATHER_CAPTION: True
26 |
27 | ENTITY:
28 | ENABLED: True
29 | CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json
30 | IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle
31 | SELECT: ratio
32 | NUM: 1
33 | RATIO: 1.0
34 | GATHER_CAPTION: True
35 |
36 | CAPTION_CORR_PATH_IN_ONE_FILE: True
37 |
38 |
39 | MODEL:
40 |
41 | BINARY_HEAD:
42 | NAME: BinaryHead
43 | DETACH: True
44 | THRESH: 0.5
45 | CUSTOM_SP1X1: True
46 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
47 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
48 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
49 | 'unet.u.u.u.u.u.u.blocks.block1' ]
50 |
51 | TASK_HEAD:
52 | NAME: TextSegHead
53 | CORRECT_SEG_PRED_BINARY: True
54 |
55 | CAPTION_HEAD:
56 | NAME: CaptionHead
57 | FEAT_NORM: True
58 | LOGIT_SCALE:
59 | value: 100.0
60 | learnable: True
61 | LOSS_WEIGHT:
62 | SCENE: 0.0
63 | VIEW: 0.05
64 | ENTITY: 0.05
65 |
66 | INST_HEAD:
67 | CORRECT_SEG_PRED_BINARY: True
68 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | # TODO: make base + novel = all.
5 | inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
6 | base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ]
7 | novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ]
8 | ignore_class_idx: [ 19 ]
9 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_openvocab_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | # TODO: split the input categories into base/novel/ignore.
5 | # Note that if you has gropud-truth annotations for the test samples,
6 | # you need to carefully set thoese parameters to evaluate the performance quantitatively.
7 | # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx.
8 | base_class_idx: [ 0, 1, 2, 3, 4]
9 | novel_class_idx: []
10 | ignore_class_idx: [ ]
11 |
12 | # TODO: split the categories into inst_base/inst_novel
13 | inst_class_idx: [2, 3]
14 | base_inst_class_idx: [0, 1] # the base category indices for instance categories. The length of this list should be the same as or smaller than the length of inst_class_idx
15 | novel_inst_class_idx: []
16 |
17 | MODEL:
18 | TASK_HEAD:
19 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance
20 |
21 | INST_HEAD:
22 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance
23 | CLUSTERING:
24 | PREPARE_EPOCH: -1
25 |
26 | TEXT_ENCODER:
27 | EXTRACT_EMBED: True
28 | CATEGORY_NAMES: [door, window, desk, keyboard, others] # TODO: input your custom categories
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_adamw.yaml:
--------------------------------------------------------------------------------
1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter,
2 | desk, curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture]
3 |
4 | DATA_CONFIG:
5 | _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml
6 | ignore_class_idx: [19]
7 |
8 | MODEL:
9 | NAME: SparseUNetTextSeg
10 | REMAP_FROM_3DLANG: False
11 | REMAP_FROM_NOADAPTER: False
12 |
13 | VFE:
14 | NAME: IndoorVFE
15 | USE_XYZ: True
16 |
17 | BACKBONE_3D:
18 | NAME: SparseUNetIndoor
19 | IN_CHANNEL: 6
20 | MID_CHANNEL: 16
21 | BLOCK_RESIDUAL: True
22 | BLOCK_REPS: 2
23 | NUM_BLOCKS: 7
24 | CUSTOM_SP1X1: True
25 |
26 | ADAPTER:
27 | NAME: VLAdapter
28 | EVAL_ONLY: False
29 | NUM_ADAPTER_LAYERS: 2
30 | TEXT_DIM: -1
31 | LAST_NORM: True
32 |
33 | TASK_HEAD:
34 | NAME: TextSegHead
35 | FEAT_NORM: False
36 |
37 | TEXT_EMBED:
38 | NAME: CLIP
39 | NORM: True
40 | PATH: text_embed/scannet_clip-ViT-B16_id.pth
41 |
42 | LOGIT_SCALE:
43 | value: 1.0
44 | learnable: False
45 |
46 | TEXT_ENCODER:
47 | NAME: CLIP
48 | BACKBONE: ViT-B/16 # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
49 | TEMPLATE: identity
50 | EXTRACT_EMBED: False # Online extract text embeding from class or not
51 | # BERT:
52 | # BACKBONE: bert-base-uncased
53 |
54 |
55 | OPTIMIZATION:
56 | BATCH_SIZE_PER_GPU: 4
57 | NUM_EPOCHS: 128
58 | LR: 0.004 # 4e-3
59 | SCHEDULER: cos_after_step
60 | OPTIMIZER: adamw
61 | WEIGHT_DECAY: 0.0001
62 | MOMENTUM: 0.9
63 | STEP_EPOCH: 50
64 | MULTIPLIER: 0.1
65 | CLIP_GRAD: False
66 | PCT_START: 0.39
67 | DIV_FACTOR: 1
68 | MOMS: [0.95, 0.85]
69 | LR_CLIP: 0.000001
70 |
71 | OTHERS:
72 | PRINT_FREQ: 20
73 | SYNC_BN: False
74 | USE_AMP: True
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base10_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
2 |
3 |
4 | DATA_CONFIG:
5 | # TODO: make base + novel = all.
6 | base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ]
7 | novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ]
8 | ignore_class_idx: [ 19 ]
9 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base12_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | # TODO: make base + novel = all.
5 | base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ]
6 | novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ]
7 | ignore_class_idx: [ 19 ]
8 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_adamw.yaml
2 |
3 |
4 | DATA_CONFIG:
5 | base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ]
6 | novel_class_idx: [ 5, 9, 12, 16 ]
7 | ignore_class_idx: [ 19 ]
8 |
9 | CAPTION_INFO:
10 |
11 | KEY: [SCENE, VIEW, ENTITY]
12 |
13 | SCENE:
14 | ENABLED: False
15 | CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json
16 | GATHER_CAPTION: True
17 |
18 | VIEW:
19 | ENABLED: True
20 | CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json
21 | IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle
22 | SELECT: ratio
23 | NUM: 1
24 | RATIO: 0.5
25 | GATHER_CAPTION: True
26 |
27 | ENTITY:
28 | ENABLED: True
29 | CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json
30 | IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle
31 | SELECT: ratio
32 | NUM: 1
33 | RATIO: 1.0
34 | GATHER_CAPTION: True
35 |
36 | CAPTION_CORR_PATH_IN_ONE_FILE: True
37 |
38 |
39 | MODEL:
40 |
41 | BINARY_HEAD:
42 | NAME: BinaryHead
43 | DETACH: True
44 | THRESH: 0.5
45 | CUSTOM_SP1X1: True
46 | HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
47 | 'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
48 | 'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
49 | 'unet.u.u.u.u.u.u.blocks.block1' ]
50 |
51 | TASK_HEAD:
52 | NAME: TextSegHead
53 | CORRECT_SEG_PRED_BINARY: True
54 |
55 | CAPTION_HEAD:
56 | NAME: CaptionHead
57 | FEAT_NORM: True
58 | LOGIT_SCALE:
59 | value: 100.0
60 | learnable: True
61 | LOSS_FUNC: CrossEntropy
62 | LOSS_WEIGHT:
63 | SCENE: 0.0
64 | VIEW: 0.05
65 | ENTITY: 0.05
66 |
--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_openvocab_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
2 |
3 | DATA_CONFIG:
4 | # TODO: split the input categories into base/novel/ignore.
5 | # Note that if you has gropud-truth annotations for the test samples,
6 | # you need to carefully set thoese parameters to evaluate the performance quantitatively.
7 | # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx.
8 | base_class_idx: [ 0, 1, 2, 3, 4]
9 | novel_class_idx: []
10 | ignore_class_idx: [ ]
11 |
12 | MODEL:
13 | TASK_HEAD:
14 | CORRECT_SEG_PRED_BINARY: True # TODO: For out-of-domain data, set this to False probably leads to better performance
15 |
16 | TEXT_ENCODER:
17 | EXTRACT_EMBED: True
18 | CATEGORY_NAMES: [door, window, desk, keyboard, others] # TODO: input your custom categories
--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/eval_utils/inst_eval/__init__.py
--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/instance_eval_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import numpy as np
5 | from plyfile import PlyData
6 |
7 |
8 | # matrix: 4x4 np array
9 | # points Nx3 np array
10 | def transform_points(matrix, points):
11 | assert len(points.shape) == 2 and points.shape[1] == 3
12 | num_points = points.shape[0]
13 | p = np.concatenate([points, np.ones((num_points, 1))], axis=1)
14 | p = np.matmul(matrix, np.transpose(p))
15 | p = np.transpose(p)
16 | p[:, :3] /= p[:, 3, None]
17 | return p[:, :3]
18 |
19 |
20 | def export_ids(filename, ids):
21 | with open(filename, 'w') as f:
22 | for id in ids:
23 | f.write('%d\n' % id)
24 |
25 |
26 | def load_ids(filename):
27 | ids = open(filename).read().splitlines()
28 | ids = np.array(ids, dtype=np.int64)
29 | return ids
30 |
31 |
32 | def read_mesh_vertices(filename):
33 | assert os.path.isfile(filename)
34 | with open(filename, 'rb') as f:
35 | plydata = PlyData.read(f)
36 | num_verts = plydata['vertex'].count
37 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
38 | vertices[:, 0] = plydata['vertex'].data['x']
39 | vertices[:, 1] = plydata['vertex'].data['y']
40 | vertices[:, 2] = plydata['vertex'].data['z']
41 | return vertices
42 |
43 |
44 | # export 3d instance labels for instance evaluation
45 | def export_instance_ids_for_eval(filename, label_ids, instance_ids):
46 | assert label_ids.shape[0] == instance_ids.shape[0]
47 | output_mask_path_relative = 'pred_mask'
48 | name = os.path.splitext(os.path.basename(filename))[0]
49 | output_mask_path = os.path.join(os.path.dirname(filename), output_mask_path_relative)
50 | if not os.path.isdir(output_mask_path):
51 | os.mkdir(output_mask_path)
52 | insts = np.unique(instance_ids)
53 | zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32)
54 | with open(filename, 'w') as f:
55 | for idx, inst_id in enumerate(insts):
56 | if inst_id == 0: # 0 -> no instance for this vertex
57 | continue
58 | output_mask_file = os.path.join(output_mask_path_relative,
59 | name + '_' + str(idx) + '.txt')
60 | loc = np.where(instance_ids == inst_id)
61 | label_id = label_ids[loc[0][0]]
62 | f.write('%s %d %f\n' % (output_mask_file, label_id, 1.0))
63 | # write mask
64 | mask = np.copy(zero_mask)
65 | mask[loc[0]] = 1
66 | export_ids(output_mask_file, mask)
67 |
68 |
69 | # ------------ Instance Utils ------------ #
70 |
71 |
72 | class Instance(object):
73 | instance_id = 0
74 | label_id = 0
75 | vert_count = 0
76 | med_dist = -1
77 | dist_conf = 0.0
78 |
79 | def __init__(self, mesh_vert_instances, instance_id):
80 | if (instance_id == -1):
81 | return
82 | self.instance_id = int(instance_id)
83 | self.label_id = int(self.get_label_id(instance_id))
84 | self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id))
85 |
86 | def get_label_id(self, instance_id):
87 | return int(instance_id // 1000)
88 |
89 | def get_instance_verts(self, mesh_vert_instances, instance_id):
90 | return (mesh_vert_instances == instance_id).sum()
91 |
92 | def to_json(self):
93 | return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
94 |
95 | def to_dict(self):
96 | dict = {}
97 | dict['instance_id'] = self.instance_id
98 | dict['label_id'] = self.label_id
99 | dict['vert_count'] = self.vert_count
100 | dict['med_dist'] = self.med_dist
101 | dict['dist_conf'] = self.dist_conf
102 | return dict
103 |
104 | def from_json(self, data):
105 | self.instance_id = int(data['instance_id'])
106 | self.label_id = int(data['label_id'])
107 | self.vert_count = int(data['vert_count'])
108 | if ('med_dist' in data):
109 | self.med_dist = float(data['med_dist'])
110 | self.dist_conf = float(data['dist_conf'])
111 |
112 | def __str__(self):
113 | return '(' + str(self.instance_id) + ')'
114 |
115 |
116 | def read_instance_prediction_file(filename, pred_path):
117 | lines = open(filename).read().splitlines()
118 | instance_info = {}
119 | abs_pred_path = os.path.abspath(pred_path)
120 | for line in lines:
121 | parts = line.split(' ')
122 | if len(parts) != 3:
123 | print('invalid instance prediction file. Expected (per line): \
124 | [rel path prediction] [label id prediction] \
125 | [confidence prediction]')
126 | if os.path.isabs(parts[0]):
127 | print('invalid instance prediction file. \
128 | First entry in line must be a relative path')
129 | mask_file = os.path.join(os.path.dirname(filename), parts[0])
130 | mask_file = os.path.abspath(mask_file)
131 | # check that mask_file lives inside prediction path
132 | if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path:
133 | print(('predicted mask {} in prediction text file {}' +
134 | 'points outside of prediction path.').format(mask_file, filename))
135 |
136 | info = {}
137 | info['label_id'] = int(float(parts[1]))
138 | info['conf'] = float(parts[2])
139 | instance_info[mask_file] = info
140 | return instance_info
141 |
142 |
143 | def get_instances(ids, class_ids, class_labels, id2label):
144 | instances = {}
145 | for label in class_labels:
146 | instances[label] = []
147 | instance_ids = np.unique(ids)
148 | for id in instance_ids:
149 | if id == 0:
150 | continue
151 | inst = Instance(ids, id)
152 | if inst.label_id in class_ids:
153 | instances[id2label[inst.label_id]].append(inst.to_dict())
154 | return instances
155 |
--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/pointwise_eval_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def evaluate_semantic_acc(pred_list, gt_list, ignore_label=-100, logger=None):
5 | gt = np.concatenate(gt_list, axis=0)
6 | pred = np.concatenate(pred_list, axis=0)
7 | assert gt.shape == pred.shape
8 | correct = (gt[gt != ignore_label] == pred[gt != ignore_label]).sum()
9 | whole = (gt != ignore_label).sum()
10 | acc = correct.astype(float) / whole * 100
11 | logger.info(f'Acc: {acc:.1f}')
12 | return acc
13 |
14 |
15 | def evaluate_semantic_miou(n_classes, pred_list, gt_list, ignore_label=-100, logger=None):
16 | gt = np.concatenate(gt_list, axis=0)
17 | pred = np.concatenate(pred_list, axis=0)
18 | pos_inds = gt != ignore_label
19 | gt = gt[pos_inds]
20 | pred = pred[pos_inds]
21 | assert gt.shape == pred.shape
22 | iou_list = []
23 | for _index in range(n_classes):
24 | if _index != ignore_label:
25 | intersection = ((gt == _index) & (pred == _index)).sum()
26 | union = ((gt == _index) | (pred == _index)).sum()
27 | iou = intersection.astype(float) / (union + 1e-10) * 100
28 | iou_list.append(iou)
29 | miou = np.nanmean(iou_list)
30 | logger.info('Class-wise mIoU: ' + ' '.join(f'{x:.1f}' for x in iou_list))
31 | logger.info(f'mIoU: {miou:.1f}')
32 | return miou, iou_list
33 |
34 |
35 | def evaluate_offset_mae(pred_list, gt_list, gt_instance_list, ignore_label=-100, logger=None):
36 | gt = np.concatenate(gt_list, axis=0)
37 | pred = np.concatenate(pred_list, axis=0)
38 | gt_instance = np.concatenate(gt_instance_list, axis=0)
39 | pos_inds = gt_instance != ignore_label
40 | gt = gt[pos_inds]
41 | pred = pred[pos_inds]
42 | mae = np.abs(gt - pred).sum() / pos_inds.sum()
43 | logger.info(f'Offset MAE: {mae:.3f}')
44 | return mae
--------------------------------------------------------------------------------
/tools/eval_utils/save_utils.py:
--------------------------------------------------------------------------------
1 | import multiprocessing as mp
2 | import os
3 | import os.path as osp
4 | import numpy as np
5 | from pcseg.models.model_utils.rle_utils import rle_decode
6 |
7 |
8 | def save_npy(root, name, scan_ids, arrs):
9 | root = osp.join(root, name)
10 | os.makedirs(root, exist_ok=True)
11 | paths = [osp.join(root, f'{i}.npy') for i in scan_ids]
12 | pool = mp.Pool()
13 | pool.starmap(np.save, zip(paths, arrs))
14 | pool.close()
15 | pool.join()
16 |
17 |
18 | def save_single_instance(root, scan_id, insts, nyu_id=None):
19 | f = open(osp.join(root, f'{scan_id}.txt'), 'w')
20 | os.makedirs(osp.join(root, 'predicted_masks'), exist_ok=True)
21 | for i, inst in enumerate(insts):
22 | # assert scan_id == inst['scan_id']
23 | label_id = inst['label_id']
24 | # scannet dataset use nyu_id for evaluation
25 | if nyu_id is not None:
26 | label_id = nyu_id[label_id - 1]
27 | conf = inst['conf']
28 | f.write(f'predicted_masks/{scan_id}_{i:03d}.txt {label_id} {conf:.4f}\n')
29 | mask_path = osp.join(root, 'predicted_masks', f'{scan_id}_{i:03d}.txt')
30 | mask = rle_decode(inst['pred_mask'])
31 | np.savetxt(mask_path, mask, fmt='%d')
32 | f.close()
33 |
34 |
35 | def save_pred_instances(root, name, scan_ids, pred_insts, nyu_id=None):
36 | root = osp.join(root, name)
37 | os.makedirs(root, exist_ok=True)
38 | roots = [root] * len(scan_ids)
39 | nyu_ids = [nyu_id] * len(scan_ids)
40 | pool = mp.Pool()
41 | pool.starmap(save_single_instance, zip(roots, scan_ids, pred_insts, nyu_ids))
42 | pool.close()
43 | pool.join()
44 |
45 |
46 | def save_gt_instance(path, gt_inst, nyu_id=None):
47 | if nyu_id is not None:
48 | sem = gt_inst // 1000
49 | ignore = sem == 0
50 | ins = gt_inst % 1000
51 | nyu_id = np.array(nyu_id)
52 | sem = nyu_id[sem - 1]
53 | sem[ignore] = 0
54 | gt_inst = sem * 1000 + ins
55 | np.savetxt(path, gt_inst, fmt='%d')
56 |
57 |
58 | def save_gt_instances(root, name, scan_ids, gt_insts, nyu_id=None):
59 | root = osp.join(root, name)
60 | os.makedirs(root, exist_ok=True)
61 | paths = [osp.join(root, f'{i}.txt') for i in scan_ids]
62 | pool = mp.Pool()
63 | nyu_ids = [nyu_id] * len(scan_ids)
64 | pool.starmap(save_gt_instance, zip(paths, gt_insts, nyu_ids))
65 | pool.close()
66 | pool.join()
67 |
--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption.cpython-38.pyc
--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc
--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc
--------------------------------------------------------------------------------
/tools/process_tools/combine_multiple_caption_files.py:
--------------------------------------------------------------------------------
1 | import json
2 | import argparse
3 | import tqdm
4 | import pickle
5 |
6 |
7 | def write_caption_to_file(data, path):
8 | with open(path, 'w') as f:
9 | json.dump(data, f)
10 |
11 | print(f'The caption is dump to {path}')
12 |
13 |
14 | def replace_dict_keys_with_new_keys(origin_dict, new_key_list):
15 | curr_key_list = list(origin_dict.keys())
16 | new_dict = {}
17 | for i, key in enumerate(curr_key_list):
18 | new_dict[new_key_list[i]] = origin_dict[key]
19 |
20 | return new_dict
21 |
22 |
23 | def merge_captions_with_path_list(caption_path_list, caption_save_path):
24 | new_caption = {}
25 | scene_caption_num = {}
26 |
27 | for caption_path in caption_path_list:
28 | current_caption = json.load(open(caption_path, 'r'))
29 | for scene_name, curr_scene_caption in tqdm.tqdm(current_caption.items(), total=len(current_caption)):
30 | counter = scene_caption_num[scene_name] if scene_name in scene_caption_num else 0
31 |
32 | image_name_list = [f'{counter + i}' for i in range(len(curr_scene_caption))]
33 | new_scene_caption = replace_dict_keys_with_new_keys(curr_scene_caption, image_name_list)
34 | if scene_name in new_caption:
35 | new_caption[scene_name].update(new_scene_caption)
36 | else:
37 | new_caption[scene_name] = new_scene_caption
38 |
39 | counter += len(curr_scene_caption)
40 | scene_caption_num[scene_name] = counter
41 |
42 | write_caption_to_file(new_caption, caption_save_path)
43 |
44 |
45 | def merge_caption_idx_with_path_list(caption_idx_path_list, caption_idx_save_path):
46 | new_caption_idx = []
47 | caption_idx_list = []
48 | for caption_idx_path in caption_idx_path_list:
49 | caption_idx = pickle.load(open(caption_idx_path, 'rb'))
50 | caption_idx_list.append(caption_idx)
51 |
52 | for i in tqdm.tqdm(range(len(caption_idx_list[0]))):
53 | scene_caption = {}
54 | scene_caption_infos = {}
55 | counter = 0
56 | for _, caption_idx in enumerate(caption_idx_list):
57 | if 'scene_name' not in scene_caption:
58 | scene_caption['scene_name'] = caption_idx[i]['scene_name']
59 |
60 | new_image_name_list = [f'{counter + i}' for i in range(len(caption_idx[i]['infos']))]
61 | new_scene_caption_idx = replace_dict_keys_with_new_keys(caption_idx[i]['infos'], new_image_name_list)
62 |
63 | scene_caption_infos.update(new_scene_caption_idx)
64 | counter += len(caption_idx[i]['infos'])
65 |
66 | scene_caption['infos'] = scene_caption_infos
67 | new_caption_idx.append(scene_caption)
68 |
69 | with open(caption_idx_save_path, 'wb') as f:
70 | pickle.dump(new_caption_idx, f)
71 |
72 |
73 | if __name__ == '__main__':
74 | parser = argparse.ArgumentParser('')
75 | parser.add_argument('--caption_path_list',
76 | default=['data/nuscenes/text_embed/caption_basic_crop_nuscenes_v1.0-mini_vit-gpt2-image'
77 | '-captioning_w400-500_overlap0.3.json',
78 | 'data/nuscenes/text_embed/caption_detic_crop_cap_nuscenes_v1.0-mini_vit-gpt2-image'
79 | '-captioning_.json'],
80 | type=list, help='')
81 | parser.add_argument('--caption_idx_path_list',
82 | default=['data/nuscenes/v1.0-mini/nuscenes_caption_idx_basic_crop.pkl',
83 | 'data/nuscenes/v1.0-mini/nuscenes_caption_idx_detic_crop_cap.pkl'],
84 | type=list, help='')
85 | parser.add_argument('--caption_save_path', required=True, type=str, help='')
86 | parser.add_argument('--caption_idx_save_path', required=True, type=str, help='')
87 |
88 | args = parser.parse_args()
89 |
90 | print('Start to merge captions ........')
91 | merge_captions_with_path_list(args.caption_path_list, args.caption_save_path)
92 | print('Finish merging captions ........')
93 |
94 | print('Start to merge captions idx file ........')
95 | merge_caption_idx_with_path_list(args.caption_idx_path_list, args.caption_idx_save_path)
96 | print('Finish merging captions idx file ........')
97 |
98 |
--------------------------------------------------------------------------------
/tools/process_tools/filter_caption_without_points.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import pickle
3 | import json
4 | import tqdm
5 |
6 |
7 | def write_caption_to_file(data, path):
8 | with open(path, 'w') as f:
9 | json.dump(data, f)
10 |
11 | print(f'The caption is dump to {path}')
12 |
13 |
14 | def filter_captions_without_points(caption_info, caption_idx_info):
15 | for idx, scene_caption_idx_info in tqdm.tqdm(enumerate(caption_idx_info), total=len(caption_idx_info)):
16 | scene_name = scene_caption_idx_info['scene_name']
17 | scene_caption_idx = scene_caption_idx_info['infos']
18 | scene_captions = caption_info[scene_name]
19 |
20 | image_name_list = list(scene_caption_idx.keys())
21 | for image_name in image_name_list:
22 | image_caption_idx = scene_caption_idx[image_name]
23 | if image_caption_idx.shape[0] == 0:
24 | scene_caption_idx.pop(image_name)
25 | scene_captions.pop(image_name)
26 |
27 | write_caption_to_file(caption_info, args.save_caption_info_path)
28 | with open(args.save_caption_idx_info_path, 'wb') as f:
29 | pickle.dump(caption_idx_info, f)
30 |
31 |
32 | if __name__ == '__main__':
33 | parser = argparse.ArgumentParser('')
34 | parser.add_argument('--caption_info_path', type=str, help='')
35 | parser.add_argument('--caption_idx_info_path', type=str, help='')
36 |
37 | parser.add_argument('--save_caption_info_path', type=str, help='')
38 | parser.add_argument('--save_caption_idx_info_path', type=str, help='')
39 |
40 | global args
41 | args = parser.parse_args()
42 |
43 | caption_info = json.load(open(args.caption_info_path, 'r'))
44 | caption_idx_info = pickle.load(open(args.caption_idx_info_path, 'rb'))
45 |
46 | filter_captions_without_points(caption_info, caption_idx_info)
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/tools/process_tools/generate_category_embedding.py:
--------------------------------------------------------------------------------
1 | import os
2 | import clip
3 | import torch
4 | from transformers import AutoTokenizer, AutoModel
5 |
6 | from pcseg.models.text_networks.text_models import get_clip_model
7 |
8 |
9 | class_names = {
10 | 'scannet': ['wall', 'floor', 'cabinet', 'bed', 'chair',
11 | 'sofa', 'table', 'door', 'window', 'bookshelf',
12 | 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
13 | 'showercurtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'],
14 | 's3dis': ['ceiling', 'floor', 'wall', 'beam', 'column',
15 | 'window', 'door', 'table', 'chair', 'sofa',
16 | 'bookcase', 'board', 'clutter']
17 | }
18 |
19 |
20 | def construct_input_from_class_name(input, tokenizer):
21 | inputs = tokenizer(input, return_tensors="pt", padding=True)
22 | return inputs
23 |
24 |
25 | def get_embedding(args):
26 | if args.model.startswith('clip'):
27 | backbone_name = args.model[5:]
28 | input = class_names[args.dataset]
29 | _, model = get_clip_model(backbone_name)
30 | model = model.cuda()
31 | text = clip.tokenize(input).cuda()
32 | output = model.encode_text(text)
33 | print(output.shape)
34 | else:
35 | tokenizer = AutoTokenizer.from_pretrained(args.model)
36 | model = AutoModel.from_pretrained(args.model)
37 |
38 | inputs = construct_input_from_class_name(class_names[args.dataset], tokenizer)
39 | outputs = model(**inputs)
40 | output = outputs.pooler_output
41 | print(outputs.pooler_output.shape)
42 |
43 | return output
44 |
45 |
46 | if __name__ == '__main__':
47 | import argparse
48 |
49 | parser = argparse.ArgumentParser('language model')
50 | parser.add_argument('--model', default='clip-ViT-B/16', type=str, help='language model name')
51 | parser.add_argument('--dataset_path', default='../data/scannetv2', type=str, help='language model name')
52 | parser.add_argument('--dataset', default='scannet', type=str, help='dataset name')
53 | args = parser.parse_args()
54 |
55 | category_embedding = get_embedding(args)
56 |
57 | file_name = '{}_{}_{}_text_embed.pth'.format(
58 | args.dataset, len(class_names[args.dataset]), args.model.replace('/', '')
59 | )
60 | save_dir = os.path.join(args.dataset_path, 'text_embed')
61 | os.makedirs(save_dir, exist_ok=True)
62 | save_path = os.path.join(save_dir, file_name)
63 |
64 | torch.save(category_embedding, save_path)
65 | print("Saving category embedding into: ", save_path)
66 |
--------------------------------------------------------------------------------
/tools/scripts/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 | NGPUS=$1
5 | PY_ARGS=${@:2}
6 |
7 | while true
8 | do
9 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
10 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
11 | if [ "${status}" != "0" ]; then
12 | break;
13 | fi
14 | done
15 | echo $PORT
16 |
17 | ulimit -n 64000
18 |
19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} test.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS}
20 |
21 |
--------------------------------------------------------------------------------
/tools/scripts/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 | NGPUS=$1
5 | PY_ARGS=${@:2}
6 |
7 | while true
8 | do
9 | PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
10 | status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
11 | if [ "${status}" != "0" ]; then
12 | break;
13 | fi
14 | done
15 | echo $PORT
16 |
17 | ulimit -n 64000
18 |
19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} train.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS}
20 |
21 |
--------------------------------------------------------------------------------
/tools/train_utils/optimization/__init__.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 |
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | import torch.optim.lr_scheduler as lr_sched
6 | import numpy as np
7 |
8 | from .fastai_optim import OptimWrapper
9 | from .learning_schedules_fastai import CosineWarmupLR, OneCycle
10 |
11 |
12 | def build_optimizer(model, optim_cfg):
13 | if optim_cfg.OPTIMIZER == 'adam':
14 | optimizer = optim.Adam(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY)
15 | elif optim_cfg.OPTIMIZER == 'sgd':
16 | optimizer = optim.SGD(
17 | model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY,
18 | momentum=optim_cfg.MOMENTUM
19 | )
20 | elif optim_cfg.OPTIMIZER == 'adamw':
21 | optimizer = optim.AdamW(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY)
22 | elif optim_cfg.OPTIMIZER == 'adam_onecycle':
23 | def children(m: nn.Module):
24 | return list(m.children())
25 |
26 | def num_children(m: nn.Module) -> int:
27 | return len(children(m))
28 |
29 | flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m]
30 | get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))]
31 |
32 | optimizer_func = partial(optim.Adam, betas=(0.9, 0.99))
33 | optimizer = OptimWrapper.create(
34 | optimizer_func, 3e-3, get_layer_groups(model), wd=optim_cfg.WEIGHT_DECAY, true_wd=True, bn_wd=True
35 | )
36 | else:
37 | raise NotImplementedError
38 |
39 | return optimizer
40 |
41 |
42 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg):
43 | total_steps = total_iters_each_epoch * total_epochs
44 |
45 | if optim_cfg.SCHEDULER == 'poly':
46 | lr_scheduler = PolyLR(optimizer, max_iter=total_steps, power=optim_cfg.POWER)
47 | elif optim_cfg.OPTIMIZER == 'adam_onecycle':
48 | lr_scheduler = OneCycle(
49 | optimizer, total_steps, optim_cfg.LR, list(optim_cfg.MOMS), optim_cfg.DIV_FACTOR, optim_cfg.PCT_START
50 | )
51 | else:
52 | lr_scheduler = None
53 |
54 | return lr_scheduler
55 |
56 |
57 | class LambdaStepLR(lr_sched.LambdaLR):
58 | def __init__(self, optimizer, lr_lambda, last_step=-1):
59 | super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step)
60 |
61 | @property
62 | def last_step(self):
63 | """Use last_epoch for the step counter"""
64 | return self.last_epoch
65 |
66 | @last_step.setter
67 | def last_step(self, v):
68 | self.last_epoch = v
69 |
70 |
71 | class PolyLR(LambdaStepLR):
72 | """DeepLab learning rate policy"""
73 | def __init__(self, optimizer, max_iter, power=0.9, last_step=-1):
74 | super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step)
75 |
76 |
77 | class CosLR(LambdaStepLR):
78 | """Runyu's LR policy"""
79 | def __init__(self, optimizer, cos_lambda_func, last_step=-1):
80 | super(CosLR, self).__init__(optimizer, cos_lambda_func, last_step)
81 |
82 |
83 | def cosine_lr_after_step(optimizer, base_lr, epoch, step_epoch, total_epochs, clip=1e-6):
84 | if epoch < step_epoch:
85 | lr = base_lr
86 | else:
87 | lr = clip + 0.5 * (base_lr - clip) * \
88 | (1 + np.cos(np.pi * ((epoch - step_epoch) / (total_epochs - step_epoch))))
89 |
90 | for param_group in optimizer.param_groups:
91 | param_group['lr'] = lr
92 |
93 |
94 | def adjust_lr(optim_cfg, optimizer, scheduler, total_epochs, total_iters_per_epoch, epoch, iter, accumulated_iter, no_step=False):
95 | # adjust learning rate
96 | if optim_cfg.SCHEDULER == 'cos':
97 | max_iter = total_iters_per_epoch * total_epochs
98 | cos_learning_rate(
99 | optimizer, optim_cfg.LR, epoch * total_iters_per_epoch + iter + 1, max_iter, 0, 0)
100 | elif optim_cfg.SCHEDULER == 'cos_after_step':
101 | cosine_lr_after_step(optimizer, optim_cfg.LR, epoch, optim_cfg.STEP_EPOCH, total_epochs)
102 | elif optim_cfg.SCHEDULER in ['adam_onecycle', 'poly']:
103 | assert scheduler is not None
104 | if not no_step:
105 | scheduler.step(accumulated_iter)
106 | elif optim_cfg.SCHEDULER in ['multistep']:
107 | pass
108 | else:
109 | raise NotImplementedError
110 |
--------------------------------------------------------------------------------
/tools/train_utils/optimization/learning_schedules_fastai.py:
--------------------------------------------------------------------------------
1 | # This file is modified from https://github.com/traveller59/second.pytorch
2 |
3 | import math
4 | from functools import partial
5 |
6 | import numpy as np
7 | import torch.optim.lr_scheduler as lr_sched
8 |
9 | from .fastai_optim import OptimWrapper
10 |
11 |
12 | class LRSchedulerStep(object):
13 | def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases,
14 | mom_phases):
15 | # if not isinstance(fai_optimizer, OptimWrapper):
16 | # raise TypeError('{} is not a fastai OptimWrapper'.format(
17 | # type(fai_optimizer).__name__))
18 | self.optimizer = fai_optimizer
19 | self.total_step = total_step
20 | self.lr_phases = []
21 |
22 | for i, (start, lambda_func) in enumerate(lr_phases):
23 | if len(self.lr_phases) != 0:
24 | assert self.lr_phases[-1][0] < start
25 | if isinstance(lambda_func, str):
26 | lambda_func = eval(lambda_func)
27 | if i < len(lr_phases) - 1:
28 | self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func))
29 | else:
30 | self.lr_phases.append((int(start * total_step), total_step, lambda_func))
31 | assert self.lr_phases[0][0] == 0
32 | self.mom_phases = []
33 | for i, (start, lambda_func) in enumerate(mom_phases):
34 | if len(self.mom_phases) != 0:
35 | assert self.mom_phases[-1][0] < start
36 | if isinstance(lambda_func, str):
37 | lambda_func = eval(lambda_func)
38 | if i < len(mom_phases) - 1:
39 | self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func))
40 | else:
41 | self.mom_phases.append((int(start * total_step), total_step, lambda_func))
42 | assert self.mom_phases[0][0] == 0
43 |
44 | def step(self, step):
45 | for start, end, func in self.lr_phases:
46 | if step >= start:
47 | self.optimizer.lr = func((step - start) / (end - start))
48 | for start, end, func in self.mom_phases:
49 | if step >= start:
50 | self.optimizer.mom = func((step - start) / (end - start))
51 |
52 |
53 | def annealing_cos(start, end, pct):
54 | # print(pct, start, end)
55 | "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
56 | cos_out = np.cos(np.pi * pct) + 1
57 | return end + (start - end) / 2 * cos_out
58 |
59 |
60 | class OneCycle(LRSchedulerStep):
61 | def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor,
62 | pct_start):
63 | self.lr_max = lr_max
64 | self.moms = moms
65 | self.div_factor = div_factor
66 | self.pct_start = pct_start
67 | a1 = int(total_step * self.pct_start)
68 | a2 = total_step - a1
69 | low_lr = self.lr_max / self.div_factor
70 | lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)),
71 | (self.pct_start,
72 | partial(annealing_cos, self.lr_max, low_lr / 1e4)))
73 | mom_phases = ((0, partial(annealing_cos, *self.moms)),
74 | (self.pct_start, partial(annealing_cos,
75 | *self.moms[::-1])))
76 | fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0]
77 | super().__init__(fai_optimizer, total_step, lr_phases, mom_phases)
78 |
79 |
80 | class CosineWarmupLR(lr_sched._LRScheduler):
81 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
82 | self.T_max = T_max
83 | self.eta_min = eta_min
84 | super(CosineWarmupLR, self).__init__(optimizer, last_epoch)
85 |
86 | def get_lr(self):
87 | return [self.eta_min + (base_lr - self.eta_min) *
88 | (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2
89 | for base_lr in self.base_lrs]
90 |
91 |
92 | class FakeOptim:
93 | def __init__(self):
94 | self.lr = 0
95 | self.mom = 0
96 |
97 |
98 | if __name__ == "__main__":
99 | import matplotlib.pyplot as plt
100 |
101 | opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10
102 | schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1)
103 |
104 | lrs = []
105 | moms = []
106 | for i in range(100):
107 | schd.step(i)
108 | lrs.append(opt.lr)
109 | moms.append(opt.mom)
110 | plt.plot(lrs)
111 | # plt.plot(moms)
112 | plt.show()
113 | plt.plot(moms)
114 | plt.show()
115 |
--------------------------------------------------------------------------------
/tools/visual_utils/indoor_utils/ply_utils.py:
--------------------------------------------------------------------------------
1 | from plyfile import PlyData
2 | import numpy as np
3 |
4 |
5 | def read_ply(path):
6 | plydata = PlyData.read(path)
7 | num_verts = plydata['vertex'].count
8 |
9 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
10 | vertices[:, 0] = plydata['vertex']['x']
11 | vertices[:, 1] = plydata['vertex']['y']
12 | vertices[:, 2] = plydata['vertex']['z']
13 |
14 | rgb = np.zeros(shape=[num_verts, 3], dtype=np.float32)
15 | rgb[:, 0] = plydata['vertex']['red']
16 | rgb[:, 1] = plydata['vertex']['green']
17 | rgb[:, 2] = plydata['vertex']['blue']
18 | alpha = np.array(plydata['vertex']['alpha'])
19 |
20 | face_indices = plydata['face']['vertex_indices']
21 |
22 | return vertices, rgb, alpha, face_indices
23 |
24 |
25 | def write_ply(output_file, data_dict):
26 | verts, colors = data_dict['xyz'], data_dict['rgb']
27 | if 'indices' not in data_dict:
28 | data_dict['indices'] = []
29 |
30 | file = open(output_file, 'w')
31 | file.write('ply \n')
32 | file.write('format ascii 1.0\n')
33 | file.write('element vertex {:d}\n'.format(len(verts)))
34 | file.write('property float x\n')
35 | file.write('property float y\n')
36 | file.write('property float z\n')
37 | file.write('property uchar red\n')
38 | file.write('property uchar green\n')
39 | file.write('property uchar blue\n')
40 | if 'alpha' in data_dict:
41 | file.write('property uchar alpha\n')
42 | file.write('element face {:d}\n'.format(len(data_dict['indices'])))
43 | file.write('property list uchar uint vertex_indices\n')
44 | file.write('end_header\n')
45 |
46 | if 'alpha' in data_dict:
47 | for vert, color, a in zip(verts, colors, data_dict['alpha']):
48 | file.write('{:f} {:f} {:f} {:d} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2],
49 | int(color[0]),
50 | int(color[1]),
51 | int(color[2]),
52 | int(a)))
53 | else:
54 | for vert, color in zip(verts, colors):
55 | file.write('{:f} {:f} {:f} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2],
56 | int(color[0]),
57 | int(color[1]),
58 | int(color[2])))
59 | for ind in data_dict['indices']:
60 | file.write('3 {:d} {:d} {:d}\n'.format(ind[0], ind[1], ind[2]))
61 | file.close()
62 |
--------------------------------------------------------------------------------