├── LICENSE
├── README.md
├── assets
    ├── scene_0019.gif
    ├── scene_0025.gif
    ├── scene_003.gif
    ├── scene_005.gif
    └── scene_0164.gif
├── docs
    ├── DATASET.md
    ├── INFER.md
    ├── INSTALL.md
    ├── MODEL.md
    ├── association_module.png
    └── framework.png
├── pcseg
    ├── __init__.py
    ├── config.py
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── dataset.cpython-38.pyc
    │   │   └── indoor_dataset.cpython-38.pyc
    │   ├── augmentor
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── augmentor_utils.cpython-38.pyc
    │   │   │   └── data_augmentor.cpython-38.pyc
    │   │   ├── augmentor_utils.py
    │   │   └── data_augmentor.py
    │   ├── dataset.py
    │   ├── indoor_dataset.py
    │   ├── processor
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── data_processor.cpython-38.pyc
    │   │   └── data_processor.py
    │   ├── s3dis
    │   │   ├── __pycache__
    │   │   │   └── s3dis_dataset.cpython-38.pyc
    │   │   ├── meta
    │   │   │   ├── all_data_label.txt
    │   │   │   ├── anno_paths.txt
    │   │   │   ├── area6_data_label.txt
    │   │   │   └── class_names.txt
    │   │   ├── preprocess.py
    │   │   └── s3dis_dataset.py
    │   └── scannet
    │   │   ├── __pycache__
    │   │       └── scannet_dataset.cpython-38.pyc
    │   │   └── scannet_dataset.py
    ├── external_libs
    │   └── softgroup_ops
    │   │   ├── ops
    │   │       ├── __init__.py
    │   │       ├── functions.py
    │   │       ├── setup.py
    │   │       └── src
    │   │       │   ├── bfs_cluster
    │   │       │       ├── bfs_cluster.cpp
    │   │       │       ├── bfs_cluster.cu
    │   │       │       └── bfs_cluster.h
    │   │       │   ├── cal_iou_and_masklabel
    │   │       │       ├── cal_iou_and_masklabel.cpp
    │   │       │       ├── cal_iou_and_masklabel.cu
    │   │       │       └── cal_iou_and_masklabel.h
    │   │       │   ├── cuda.cu
    │   │       │   ├── cuda_utils.h
    │   │       │   ├── datatype
    │   │       │       ├── datatype.cpp
    │   │       │       └── datatype.h
    │   │       │   ├── roipool
    │   │       │       ├── roipool.cpp
    │   │       │       ├── roipool.cu
    │   │       │       └── roipool.h
    │   │       │   ├── sec_mean
    │   │       │       ├── sec_mean.cpp
    │   │       │       ├── sec_mean.cu
    │   │       │       └── sec_mean.h
    │   │       │   ├── softgroup_api.cpp
    │   │       │   ├── softgroup_ops.cpp
    │   │       │   ├── softgroup_ops.h
    │   │       │   └── voxelize
    │   │       │       ├── voxelize.cpp
    │   │       │       ├── voxelize.cu
    │   │       │       └── voxelize.h
    │   │   ├── setup.cfg
    │   │   └── setup.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-38.pyc
    │   ├── adapter
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── vl_adapter.cpython-38.pyc
    │   │   └── vl_adapter.py
    │   ├── head
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── binary_head.cpython-38.pyc
    │   │   │   ├── caption_head.cpython-38.pyc
    │   │   │   ├── inst_head.cpython-38.pyc
    │   │   │   ├── linear_head.cpython-38.pyc
    │   │   │   └── text_seg_head.cpython-38.pyc
    │   │   ├── binary_head.py
    │   │   ├── caption_head.py
    │   │   ├── inst_head.py
    │   │   ├── linear_head.py
    │   │   └── text_seg_head.py
    │   ├── model_utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── basic_block_1d.cpython-38.pyc
    │   │   │   ├── fp16.cpython-38.pyc
    │   │   │   ├── rle_utils.cpython-38.pyc
    │   │   │   └── unet_blocks.cpython-38.pyc
    │   │   ├── basic_block_1d.py
    │   │   ├── basic_block_2d.py
    │   │   ├── fp16.py
    │   │   ├── rle_utils.py
    │   │   └── unet_blocks.py
    │   ├── text_networks
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── prompt_template.cpython-38.pyc
    │   │   │   └── text_models.cpython-38.pyc
    │   │   ├── prompt_template.py
    │   │   └── text_models.py
    │   ├── vision_backbones_3d
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── spconv_unet_indoor.cpython-38.pyc
    │   │   ├── spconv_unet_indoor.py
    │   │   └── vfe
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-38.pyc
    │   │   │       ├── indoor_vfe.cpython-38.pyc
    │   │   │       └── vfe_template.cpython-38.pyc
    │   │   │   ├── indoor_vfe.py
    │   │   │   └── vfe_template.py
    │   └── vision_networks
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-38.pyc
    │   │       ├── network_template.cpython-38.pyc
    │   │       └── sparseunet_textseg.cpython-38.pyc
    │   │   ├── network_template.py
    │   │   ├── sparseunet.py
    │   │   └── sparseunet_textseg.py
    └── utils
    │   ├── __init__.py
    │   ├── arnold_utils.py
    │   ├── caption_utils.py
    │   ├── common_utils.py
    │   ├── commu_utils.py
    │   ├── loss_utils.py
    │   ├── metric_utils.py
    │   ├── spconv_utils.py
    │   └── voxelize_utils.py
├── requirements.txt
├── setup.py
└── tools
    ├── _init_path.py
    ├── cfgs
        ├── dataset_configs
        │   ├── s3dis_dataset.yaml
        │   ├── s3dis_dataset_image.yaml
        │   ├── scannet_dataset.yaml
        │   └── scannet_dataset_image.yaml
        ├── s3dis_models
        │   ├── inst
        │   │   ├── softgroup_clip_adamw.yaml
        │   │   ├── softgroup_clip_base6_caption_adamw.yaml
        │   │   └── softgroup_clip_base8_caption_adamw.yaml
        │   ├── spconv_clip_adamw.yaml
        │   ├── spconv_clip_base6_caption_adamw.yaml
        │   └── spconv_clip_base8_caption_adamw.yaml
        └── scannet_models
        │   ├── inst
        │       ├── softgroup_clip_adamw.yaml
        │       ├── softgroup_clip_base10_caption_adamw.yaml
        │       ├── softgroup_clip_base13_caption_adamw.yaml
        │       ├── softgroup_clip_base8_caption_adamw.yaml
        │       └── softgroup_clip_openvocab_test.yaml
        │   ├── spconv_clip_adamw.yaml
        │   ├── spconv_clip_base10_caption_adamw.yaml
        │   ├── spconv_clip_base12_caption_adamw.yaml
        │   ├── spconv_clip_base15_caption_adamw.yaml
        │   └── spconv_clip_openvocab_test.yaml
    ├── eval_utils
        ├── eval_utils.py
        ├── inst_eval
        │   ├── __init__.py
        │   ├── eval_utils.py
        │   ├── instance_eval_utils.py
        │   └── pointwise_eval_utils.py
        └── save_utils.py
    ├── process_tools
        ├── __pycache__
        │   ├── generate_caption.cpython-38.pyc
        │   ├── generate_caption_idx.cpython-310.pyc
        │   └── generate_caption_idx.cpython-38.pyc
        ├── combine_multiple_caption_files.py
        ├── filter_caption_without_points.py
        ├── generate_caption.py
        ├── generate_caption_idx.py
        └── generate_category_embedding.py
    ├── scripts
        ├── dist_test.sh
        └── dist_train.sh
    ├── test.py
    ├── train.py
    ├── train_utils
        ├── optimization
        │   ├── __init__.py
        │   ├── fastai_optim.py
        │   └── learning_schedules_fastai.py
        └── train_utils.py
    └── visual_utils
        ├── indoor_utils
            ├── color_utils.py
            └── ply_utils.py
        ├── open3d_vis_utils.py
        ├── visualize_indoor.py
        └── visualize_utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | 
 3 | <h1>PLA & RegionPLC</h1>
 4 | <p>This repo contains the official implementation of <a href="https://dingry.github.io/projects/PLA">PLA (CVPR2023)</a> and <a href="https://jihanyang.github.io/projects/RegionPLC">RegionPLC (CVPR 2024)</a></p>
 5 | 
 6 | <hr style="color: #333; height: 2px; width: 85%">
 7 | 
 8 | <h4>PLA: Language-Driven Open-Vocabulary 3D Scene Understanding</h4>
 9 | 
10 | <div>
11 |     <a href="https://dingry.github.io/" target="_blank">Runyu Ding</a><sup>*</sup>,</span>
12 |     <a href="https://jihanyang.github.io/" target="_blank">Jihan Yang</a><sup>*</sup>,</span>
13 |     <a href="https://scholar.google.com/citations?user=KJU5YRYAAAAJ&hl=en" target="_blank">Chuhui Xue</a><sup></sup>,</span>
14 |     <a href="https://github.com/HannibalAPE" target="_blank">Wenqing Zhang</a><sup></sup>,</span>
15 |     <a href="https://songbai.site/" target="_blank">Song Bai</a><sup>&#8224</sup>,</span>
16 |     <a href="https://xjqi.github.io/" target="_blank">Xiaojuan Qi</a><sup>&#8224</sup>,</span>  
17 | </div>
18 | 
19 | <p><em>CVPR 2023</em></p>
20 | 
21 | [project page](https://dingry.github.io/projects/PLA) | [arXiv](https://arxiv.org/abs/2211.16312)
22 | 
23 | <hr style="color: #333; height: 2px; width: 85%">
24 | 
25 | <h4>RegionPLC: Regional Point-Language Contrastive Learning for Open-World 3D Scene Understanding</h4>
26 | 
27 | <div>
28 |     <a href="https://jihanyang.github.io/" target="_blank">Jihan Yang</a><sup>*</sup>,</span>
29 |     <a href="https://dingry.github.io/" target="_blank">Runyu Ding</a><sup>*</sup>,</span>
30 |     <a href="https://github.com/VincentDENGP" target="_blank">Weipeng Deng</a>,</span>
31 |     <a href="https://wang-zhe.me/" target="_blank">Zhe Wang</a>,</span>
32 |     <a href="https://xjqi.github.io/" target="_blank">Xiaojuan Qi</a>,</span>  
33 | </div>
34 | <p><em>CVPR 2024</em></p>
35 | 
36 | <p><a href="https://jihanyang.github.io/projects/RegionPLC">project page</a> | <a href="https://arxiv.org/pdf/2304.00962">arXiv</a></p>
37 | 
38 | </div>
39 | 
40 | ##### Highlights:
41 | - Official PLA implementation is contained in the `main` branch
42 | - Official RegionPLC implementation is contained in the `regionplc` branch
43 | 
44 | ### Release
45 | - [2024-05-05] Releasing **RegionPLC** implementation. Please checkout `regionplc` branch to try it!
46 | 
47 | ### Getting Started
48 | 
49 | #### Installation
50 | Please refer to [INSTALL.md](docs/INSTALL.md) for the installation.
51 | 
52 | #### Dataset Preparation
53 | Please refer to [DATASET.md](docs/DATASET.md) for dataset preparation.
54 | 
55 | #### Training & Inference
56 | 
57 | Please refer to [MODEL.md](docs/MODEL.md) for training and inference scripts and pretrained models.
58 | 
59 | 
60 | ### Citation
61 | If you find this project useful in your research, please consider cite:
62 | ```bibtex
63 | @inproceedings{ding2022language,
64 |     title={PLA: Language-Driven Open-Vocabulary 3D Scene Understanding},
65 |     author={Ding, Runyu and Yang, Jihan and Xue, Chuhui and Zhang, Wenqing and Bai, Song and Qi, Xiaojuan},
66 |     booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
67 |     year={2023}
68 | }
69 | ```
70 | 
71 | ```bibtex
72 | @inproceedings{yang2024regionplc,
73 |     title={RegionPLC: Regional point-language contrastive learning for open-world 3d scene understanding},
74 |     author={Yang, Jihan and Ding, Runyu and Deng, Weipeng and Wang, Zhe and Qi, Xiaojuan},
75 |     booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
76 |     year={2024}
77 | }
78 | ```
79 | 
80 | ### Acknowledgement
81 | Code is partly borrowed from [OpenPCDet](https://github.com/open-mmlab/OpenPCDet), [PointGroup](https://github.com/dvlab-research/PointGroup) and [SoftGroup](https://github.com/thangvubk/SoftGroup).


--------------------------------------------------------------------------------
/assets/scene_0019.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0019.gif


--------------------------------------------------------------------------------
/assets/scene_0025.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0025.gif


--------------------------------------------------------------------------------
/assets/scene_003.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_003.gif


--------------------------------------------------------------------------------
/assets/scene_005.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_005.gif


--------------------------------------------------------------------------------
/assets/scene_0164.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/assets/scene_0164.gif


--------------------------------------------------------------------------------
/docs/DATASET.md:
--------------------------------------------------------------------------------
 1 | The dataset configs are located within [tools/cfgs/dataset_configs](../tools/cfgs/dataset_configs), and the model configs are located within [tools/cfgs](../tools/cfgs) for different settings.
 2 | 
 3 | #### ScanNet Dataset
 4 | - Please download the [ScanNet Dataset](http://www.scan-net.org/) and follow [PointGroup](https://github.com/dvlab-research/PointGroup/blob/master/dataset/scannetv2/prepare_data_inst.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD).
 5 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EpTBva1Ev0BLu7TYz_03UUQBpLnyFlijK9z645tavor68w?e=liM2HD). If you want to generate captions on your own, please download image data ([scannet_frames_25k]((http://www.scan-net.org/))) from ScanNet and follow scripts [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py).
 6 | 
 7 | - The directory organization should be as follows:
 8 | 
 9 |     ```
10 |     PLA
11 |     ├── data
12 |     │   ├── scannetv2
13 |     │   │   │── train
14 |     │   │   │   │── scene0000_00.pth
15 |     │   │   │   │── ...
16 |     │   │   │── val
17 |     │   │   │── text_embed
18 |     │   │   │── caption_idx
19 |     │   │   │── scannetv2_train.txt
20 |     │   │   │── scannetv2_val.txt
21 |     │   │   │—— scannet_frames_25k (optional, only for caption generation)
22 |     ├── pcseg
23 |     ├── tools
24 |     ```
25 | 
26 | #### S3DIS Dataset
27 | - Please download the [S3DIS Dataset](http://buildingparser.stanford.edu/dataset.html#Download) and follow [dataset/s3dis/preprocess.py](../dataset/s3dis/preprocess.py) to pre-process the dataset as follows or directly download the pre-processed data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7).
28 |     ```bash
29 |     python3 pcseg/datasets/s3dis/preprocess.py 
30 |     ```
31 |     
32 | - Additionally, please download the caption data [here](https://connecthkuhk-my.sharepoint.com/:f:/g/personal/u3007346_connect_hku_hk/EoNAsU5f8YRGtQYV8ewhwvQB7QPbxT-uwKqTk8FPiyUTtQ?e=wq58H7). If you want to generate captions on your own, please download image data [here](https://github.com/alexsax/2D-3D-Semantics) and follows scripts here: [generate_caption.py](../tools/process_tools/generate_caption.py) and [generate_caption_idx.py](../tools/process_tools/generate_caption_idx.py).
33 |  
34 | - The directory organization should be as follows:
35 | 
36 |     ```
37 |     PLA
38 |     ├── data
39 |     │   ├── s3dis
40 |     │   │   │── stanford_indoor3d_inst
41 |     │   │   │   │── Area_1_Conference_1.npy
42 |     │   │   │   │── ...
43 |     │   │   │── text_embed
44 |     │   │   │── caption_idx
45 |     │   │   │—— s3dis_2d (optional, only for caption generation)
46 |     ├── pcseg
47 |     ├── tools
48 |     ```
49 | 


--------------------------------------------------------------------------------
/docs/INFER.md:
--------------------------------------------------------------------------------
 1 | If you wish to test on custom 3D scenes or categories, you can utilize our example configs: 
 2 |  `tools/cfgs/scannet_models/spconv_clip_openvocab.yaml` and `tools/cfgs/scannet_models/inst/softgroup_clip_openvocab.yaml`
 3 | 
 4 | The key parameters to consider are as follows:
 5 | - `TEXT_EMBED.CATEGORY_NAMES`
 6 | 
 7 |     This parameter allows you to define the category list for segmentation.
 8 | 
 9 | - `TASK_HEAD.CORRECT_SEG_PRED_BINARY` and `INST_HEAD.CORRECT_SEG_PRED_BINARY`
10 | 
11 |     These parameters allow you to decide using binary head to rectify semantic scores or not.
12 | 
13 | 
14 | To save the results, you can use the command `--save_results semantic,instance`. Afterward, you can employ the visualization utilities found in tools/visual_utils/visualize_indoor.py to visualize the predicted results.
15 | 
16 | 


--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
 1 | #### Requirements
 2 | All the codes are tested in the following environment:
 3 | - Python 3.7+
 4 | - PyTorch 1.8
 5 | - CUDA 11.1
 6 | - [spconv v2.x](https://github.com/traveller59/spconv)
 7 | 
 8 | #### Install dependent libraries
 9 | a. Clone this repository.
10 | ```bash
11 | git clone https://github.com/CVMI-Lab/PLA.git
12 | ```
13 | 
14 | b. Install the dependent libraries as follows:
15 | 
16 | * Install the dependent Python libraries (Please note that you need to install the correct version of `torch` and `spconv` according to your CUDA version): 
17 |     ```bash
18 |     pip install -r requirements.txt 
19 |     ```
20 | 
21 | * Install [SoftGroup](https://github.com/thangvubk/SoftGroup) following its [official guidance](https://github.com/thangvubk/SoftGroup/blob/main/docs/installation.md).
22 |     ```bash
23 |     cd pcseg/external_libs/softgroup_ops
24 |     python3 setup.py build_ext develop
25 |     cd ../../..
26 |     ```
27 | 
28 | * Install [pcseg](../pcseg)
29 |     ```bash
30 |     python3 setup.py develop
31 |     ```
32 | 


--------------------------------------------------------------------------------
/docs/MODEL.md:
--------------------------------------------------------------------------------
 1 | #### Training
 2 | 
 3 | ```bash
 4 | cd tools
 5 | sh scripts/dist_train.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} ${PY_ARGS}
 6 | ```
 7 | 
 8 | For instance,
 9 | - train B15/N4 semantic segmentation on ScanNet:
10 |     ```bash
11 |     cd tools
12 |     sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --extra_tag exp_tag
13 |     ```
14 | - train B13/N4 instance segmentation on ScanNet:
15 |     ```bash
16 |     cd tools
17 |     sh scripts/dist_train.sh 8 --cfg_file cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml --extra_tag exp_tag
18 |     ```
19 | 
20 | #### Inference
21 | 
22 | ```bash
23 | cd tools
24 | sh scripts/dist_test.sh ${NUM_GPUS} --cfg_file ${CONFIG_FILE} --ckpt ${CKPT_PATH}
25 | ```
26 | 
27 | For instance,
28 | - to test a B15/N4 model on ScanNet:
29 |     ```bash
30 |     cd tools
31 |     sh scripts/dist_test.sh 8 --cfg_file cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml --ckpt output/scannet_models/spconv_clip_base15_caption/exp_tag/ckpt/checkpoint_ep128.pth
32 |     ```
33 | 
34 | ### Model Zoo
35 | - semantic segmentation
36 | 
37 |     | Dataset | Partition | hIoU / mIoU(B) / mIoU(N) | Path |
38 |     |:---:|:---:|:---:|:---:|
39 |     | ScanNet | B15/N4 | 64.9 / 67.8 / 62.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef8xk_X0ortMjC0F8PBQl2wBacVPgO72La8h_ZTDsKj__Q?e=Uq6W8I) |
40 |     | ScanNet | B12/N7 | 55.9 / 70.4 / 46.4 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EVl7SdeUEPFAvrj2xnWSb-sBCOtWYyVOwBo6ggFb9x7dNA?e=feZaxH) |
41 |     | ScanNet | B10/N9 | 52.8 / 76.6 / 40.3 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Ef0P_6XraDpCo0RRgOJ1wGQB-xOW7T6lecvVRi5P90Edbw?e=hqrP8X) |
42 |     | S3DIS | B8/N4 |  35.6 / 58.3 / 25.6 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EYIW4SNX5B9Go_LKiim1KFEB_abYv0bDZMggE_6Ifjau0g?e=8BD0K3) |
43 |     | S3DIS | B6/N6 | 38.4 / 53.9 / 29.8 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EeNYtkS3pmhAvc3Hxj7__SwB8SMzZdzmljRtCYuYG8NHcA?e=aC0aE2) |
44 | 
45 | 
46 | - instance segmentation
47 | 
48 |     | Dataset | Partition | hAP<sub>50</sub> / mAP<sub>50</sub>(B) / mAP<sub>50</sub>(N) | Path |
49 |     |:---:|:---:|:---:|:---:|
50 |     | ScanNet | B13/N4 | 57.8 / 58.7 / 56.9| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/Eb4N2hfCevlBlBxWlK9DtioBP6RX7gtXUmY0Huu4MknUHA?e=YDydlj) |
51 |     | ScanNet | B10/N7 | 31.6 / 54.8 / 22.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETsHZCFElvdCmk8ulRzBk-EBxm8fHk8rLJnpUdk9_n3i1Q?e=4SGy1N) |
52 |     | ScanNet | B8/N9 | 36.9 / 63.1 / 26.2 | [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EXAaU8RDecJFn_1J2Q-IqdsBALbv-5d_L_RyIOrdIjB66g?e=c8dFD6) |
53 |     | S3DIS | B8/N4 | 17.2 / 60.9 / 10.0| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/ETzzD-pEhvtMkJGnIxzgIP0Bk3f2He9_hkgfVtexEMFqpg?e=xJpaOV) |
54 |     | S3DIS | B6/N6 |15.8 / 48.2 / 9.5| [ckpt](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/u3007346_connect_hku_hk/EWoqIoBWfSRBqQwahLTKQGkB5Gwp8zs0EvT3MkGMDiBOrw?e=daBppj) |
55 | 
56 | 


--------------------------------------------------------------------------------
/docs/association_module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/association_module.png


--------------------------------------------------------------------------------
/docs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/docs/framework.png


--------------------------------------------------------------------------------
/pcseg/__init__.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from pathlib import Path
 3 | 
 4 | from .version import __version__
 5 | 
 6 | __all__ = [
 7 |     '__version__'
 8 | ]
 9 | 
10 | 
11 | def get_git_commit_number():
12 |     if not (Path(__file__).parent / '../.git').exists():
13 |         return '0000000'
14 | 
15 |     cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
16 |     git_commit_number = cmd_out.stdout.decode('utf-8')[:7]
17 |     return git_commit_number
18 | 
19 | 
20 | script_version = get_git_commit_number()
21 | 
22 | 
23 | if script_version not in __version__:
24 |     __version__ = __version__ + '+py%s' % script_version
25 | 


--------------------------------------------------------------------------------
/pcseg/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | from easydict import EasyDict
 5 | 
 6 | 
 7 | def log_config_to_file(cfg, pre='cfg', logger=None):
 8 |     for key, val in cfg.items():
 9 |         if isinstance(cfg[key], EasyDict):
10 |             logger.info('\n%s.%s = edict()' % (pre, key))
11 |             log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger)
12 |             continue
13 |         logger.info('%s.%s: %s' % (pre, key, val))
14 | 
15 | 
16 | def cfg_from_list(cfg_list, config):
17 |     """Set config keys via list (e.g., from command line)."""
18 |     from ast import literal_eval
19 |     assert len(cfg_list) % 2 == 0
20 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
21 |         key_list = k.split('.')
22 |         d = config
23 |         for subkey in key_list[:-1]:
24 |             assert subkey in d, 'NotFoundKey: %s' % subkey
25 |             d = d[subkey]
26 |         subkey = key_list[-1]
27 |         assert subkey in d, 'NotFoundKey: %s' % subkey
28 |         try:
29 |             value = literal_eval(v)
30 |         except:
31 |             value = v
32 | 
33 |         if type(value) != type(d[subkey]) and isinstance(d[subkey], EasyDict):
34 |             key_val_list = value.split(',')
35 |             for src in key_val_list:
36 |                 cur_key, cur_val = src.split(':')
37 |                 val_type = type(d[subkey][cur_key])
38 |                 cur_val = val_type(cur_val)
39 |                 d[subkey][cur_key] = cur_val
40 |         elif type(value) != type(d[subkey]) and isinstance(d[subkey], list):
41 |             val_list = value.split(',')
42 |             for k, x in enumerate(val_list):
43 |                 val_list[k] = type(d[subkey][0])(x)
44 |             d[subkey] = val_list
45 |         else:
46 |             assert type(value) == type(d[subkey]), \
47 |                 'type {} does not match original type {}'.format(type(value), type(d[subkey]))
48 |             d[subkey] = value
49 | 
50 | 
51 | def merge_new_config(config, new_config):
52 |     if '_BASE_CONFIG_' in new_config:
53 |         with open(new_config['_BASE_CONFIG_'], 'r') as f:
54 |             try:
55 |                 yaml_config = yaml.safe_load(f, Loader=yaml.FullLoader)
56 |             except:
57 |                 yaml_config = yaml.safe_load(f)
58 |         # config.update(EasyDict(yaml_config))
59 |         merge_new_config(config, yaml_config)
60 | 
61 |     for key, val in new_config.items():
62 |         if not isinstance(val, dict):
63 |             config[key] = val
64 |             continue
65 |         if key not in config:
66 |             config[key] = EasyDict()
67 |         merge_new_config(config[key], val)
68 | 
69 |     return config
70 | 
71 | 
72 | def cfg_from_yaml_file(cfg_file, config):
73 |     with open(cfg_file, 'r') as f:
74 |         try:
75 |             new_config = yaml.safe_load(f, Loader=yaml.FullLoader)
76 |         except:
77 |             new_config = yaml.safe_load(f)
78 | 
79 |         merge_new_config(config=config, new_config=new_config)
80 | 
81 |     return config
82 | 
83 | 
84 | cfg = EasyDict()
85 | cfg.ROOT_DIR = (Path(__file__).resolve().parent / '../').resolve()
86 | cfg.LOCAL_RANK = 0
87 | 


--------------------------------------------------------------------------------
/pcseg/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from functools import partial
 3 | from torch.utils.data import DataLoader
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | 
 6 | from pcseg.utils import common_utils
 7 | 
 8 | from .dataset import DatasetTemplate
 9 | from .scannet.scannet_dataset import ScanNetDataset, ScanNetInstDataset
10 | from .s3dis.s3dis_dataset import S3DISDataset, S3DISInstDataset
11 | 
12 | 
13 | __all__ = {
14 |     'DatasetTemplate': DatasetTemplate,
15 |     'ScanNetDataset': ScanNetDataset,
16 |     'ScanNetInstDataset': ScanNetInstDataset,
17 |     'S3DISDataset': S3DISDataset,
18 |     'S3DISInstDataset': S3DISInstDataset
19 | }
20 | 
21 | 
22 | class DistributedSampler(_DistributedSampler):
23 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
24 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
25 |         self.shuffle = shuffle
26 | 
27 |     def __iter__(self):
28 |         if self.shuffle:
29 |             g = torch.Generator()
30 |             g.manual_seed(self.epoch)
31 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
32 |         else:
33 |             indices = torch.arange(len(self.dataset)).tolist()
34 | 
35 |         indices += indices[:(self.total_size - len(indices))]
36 |         assert len(indices) == self.total_size
37 | 
38 |         indices = indices[self.rank:self.total_size:self.num_replicas]
39 |         assert len(indices) == self.num_samples
40 | 
41 |         return iter(indices)
42 | 
43 | 
44 | def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, seed=None,
45 |                      logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0,
46 |                      multi_epoch_loader=False):
47 | 
48 |     dataset = __all__[dataset_cfg.DATASET](
49 |         dataset_cfg=dataset_cfg,
50 |         class_names=class_names,
51 |         root_path=root_path,
52 |         training=training,
53 |         logger=logger,
54 |     )
55 | 
56 |     if merge_all_iters_to_one_epoch:
57 |         assert hasattr(dataset, 'merge_all_iters_to_one_epoch')
58 |         dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs)
59 | 
60 |     if dist:
61 |         if training:
62 |             sampler = torch.utils.data.distributed.DistributedSampler(dataset)
63 |         else:
64 |             rank, world_size = common_utils.get_dist_info()
65 |             sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
66 |     else:
67 |         sampler = None
68 | 
69 |     if multi_epoch_loader:
70 |         loader = MultiEpochsDataLoader
71 |     else:
72 |         loader = DataLoader
73 | 
74 |     dataloader = loader(
75 |         dataset, batch_size=batch_size, pin_memory=True, num_workers=workers,
76 |         shuffle=(sampler is None) and training, drop_last=training, sampler=sampler,
77 |         collate_fn=getattr(dataset, dataset_cfg.COLLATE_FN),
78 |         timeout=0, worker_init_fn=partial(common_utils.worker_init_fn, seed=seed)
79 |     )
80 | 
81 |     return dataset, dataloader, sampler
82 | 
83 | 
84 | class _RepeatSampler(object):
85 |     """ Sampler that repeats forever.
86 |     Args:
87 |         sampler (Sampler)
88 |     """
89 | 
90 |     def __init__(self, sampler):
91 |         self.sampler = sampler
92 | 
93 |     def __iter__(self):
94 |         while True:
95 |             yield from iter(self.sampler)
96 | 


--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/__pycache__/indoor_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__init__.py


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/augmentor_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/augmentor/__pycache__/data_augmentor.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/augmentor_utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import scipy.ndimage
  4 | import scipy.interpolate
  5 | import scipy.stats
  6 | 
  7 | 
  8 | def check_key(key):
  9 |     exist = key is not None
 10 |     if not exist:
 11 |         return False
 12 |     if isinstance(key, bool):
 13 |         enabled = key
 14 |     elif isinstance(key, dict):
 15 |         enabled = key.get('enabled', True)
 16 |     else:
 17 |         enabled = True
 18 |     return enabled
 19 | 
 20 | 
 21 | def check_p(key):
 22 |     return (not isinstance(key, dict)) or ('p' not in key) or (np.random.rand() < key['p'])
 23 | 
 24 | 
 25 | def elastic(x, gran, mag):
 26 |     blur0 = np.ones((3, 1, 1)).astype('float32') / 3
 27 |     blur1 = np.ones((1, 3, 1)).astype('float32') / 3
 28 |     blur2 = np.ones((1, 1, 3)).astype('float32') / 3
 29 | 
 30 |     bb = np.abs(x).max(0).astype(np.int32) // gran + 3
 31 |     noise = [np.random.randn(bb[0], bb[1], bb[2]).astype('float32') for _ in range(3)]
 32 |     noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
 33 |     noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
 34 |     noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
 35 |     noise = [scipy.ndimage.filters.convolve(n, blur0, mode='constant', cval=0) for n in noise]
 36 |     noise = [scipy.ndimage.filters.convolve(n, blur1, mode='constant', cval=0) for n in noise]
 37 |     noise = [scipy.ndimage.filters.convolve(n, blur2, mode='constant', cval=0) for n in noise]
 38 |     ax = [np.linspace(-(b - 1) * gran, (b - 1) * gran, b) for b in bb]
 39 |     interp = [scipy.interpolate.RegularGridInterpolator(ax, n, bounds_error=0, fill_value=0) for n in noise]
 40 | 
 41 |     def g(x_):
 42 |         return np.hstack([i(x_)[:, None] for i in interp])
 43 | 
 44 |     return x + g(x) * mag
 45 | 
 46 | 
 47 | def scene_aug(aug, xyz, rgb=None):
 48 |     assert xyz.ndim == 2
 49 |     m = np.eye(3)
 50 |     if check_key(aug.jitter):
 51 |         m += np.random.randn(3, 3) * 0.1
 52 |     if check_key(aug.flip) and check_p(aug.flip):
 53 |         m[0][0] *= -1  # np.random.randint(0, 2) * 2 - 1  # flip x randomly
 54 |     if check_key(aug.rotation) and check_p(aug.rotation):
 55 |         theta_x = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[0]
 56 |         theta_y = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[1]
 57 |         theta_z = (np.random.rand() * 2 * math.pi - math.pi) * aug.rotation.value[2]
 58 |         Rx = np.array \
 59 |             ([[1, 0, 0], [0, math.cos(theta_x), -math.sin(theta_x)], [0, math.sin(theta_x), math.cos(theta_x)]])
 60 |         Ry = np.array \
 61 |             ([[math.cos(theta_y), 0, math.sin(theta_y)], [0, 1, 0], [-math.sin(theta_y), 0, math.cos(theta_y)]])
 62 |         Rz = np.array \
 63 |             ([[math.cos(theta_z), math.sin(theta_z), 0], [-math.sin(theta_z), math.cos(theta_z), 0], [0, 0, 1]])
 64 |         rot_mats = [Rx, Ry, Rz]
 65 |         if aug.rotation.get('shuffle', False):
 66 |             np.random.shuffle(rot_mats)
 67 |         m = np.matmul(m, rot_mats[0].dot(rot_mats[1]).dot(rot_mats[2]))
 68 |     xyz = np.matmul(xyz, m)
 69 |     if check_key(aug.random_jitter) and check_p(aug.random_jitter):
 70 |         if aug.random_jitter.accord_to_size:
 71 |             jitter_scale = (xyz.max(0) - xyz.min(0)).mean() * 0.1
 72 |         else:
 73 |             jitter_scale = aug.random_jitter.value
 74 |         random_noise = (np.random.rand(xyz.shape[0], xyz.shape[1]) - 0.5) * jitter_scale
 75 |         xyz += random_noise
 76 |     if check_key(aug.scaling_scene) and check_p(aug.scaling_scene):
 77 |         scaling_fac = np.random.rand() * (aug.scaling_scene.value[1] - aug.scaling_scene.value[0]) \
 78 |                       + aug.scaling_scene.value[0]
 79 |         xyz_center = (xyz.max(0) + xyz.min(0)) / 2.0
 80 |         xyz = (xyz - xyz_center) * scaling_fac + xyz_center
 81 | 
 82 |     if rgb is not None and check_key(aug.color_jitter):
 83 |         rgb += np.random.randn(3) * 0.1
 84 |     return xyz, rgb
 85 | 
 86 | 
 87 | def crop(xyz, full_scale, max_npoint, step=32):
 88 |     xyz_offset = xyz.copy()
 89 |     valid_idxs = (xyz_offset.min(1) >= 0)
 90 |     assert valid_idxs.sum() == xyz.shape[0]
 91 |     full_scale = np.array([full_scale[1]] * 3)
 92 |     room_range = xyz.max(0) - xyz.min(0)
 93 | 
 94 |     while valid_idxs.sum() > max_npoint:
 95 |         step_temp = step
 96 |         if valid_idxs.sum() > 1e6:
 97 |             step_temp = step * 2
 98 |         offset = np.clip(full_scale - room_range + 0.001, None, 0) * np.random.rand(3)
 99 |         xyz_offset = xyz + offset
100 |         valid_idxs = (xyz_offset.min(1) >= 0) * ((xyz_offset < full_scale).sum(1) == 3)
101 |         full_scale[:2] -= step_temp
102 | 
103 |     return xyz_offset, valid_idxs
104 | 
105 | 


--------------------------------------------------------------------------------
/pcseg/datasets/augmentor/data_augmentor.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import numpy as np
  4 | 
  5 | from . import augmentor_utils
  6 | 
  7 | 
  8 | class DataAugmentor(object):
  9 |     def __init__(self, dataset_cfg, **kwargs):
 10 |         self.data_augmentor_queue = []
 11 |         self.aug_cfg = dataset_cfg.DATA_AUG
 12 |         self.kwargs = kwargs
 13 |         aug_config_list = self.aug_cfg.AUG_LIST
 14 | 
 15 |         self.data_augmentor_queue = []
 16 |         for aug in aug_config_list:
 17 |             if aug not in self.aug_cfg:
 18 |                 continue
 19 |             cur_augmentor = partial(getattr(self, aug), config=self.aug_cfg[aug])
 20 |             self.data_augmentor_queue.append(cur_augmentor)
 21 | 
 22 |     def __getstate__(self):
 23 |         d = dict(self.__dict__)
 24 |         del d['logger']
 25 |         return d
 26 | 
 27 |     def __setstate__(self, d):
 28 |         self.__dict__.update(d)
 29 | 
 30 |     def shuffle(self, data_dict=None, config=None):
 31 |         shuffle_idx = np.random.permutation(data_dict['points_xyz'].shape[0])
 32 |         data_dict = self.update_data_dict(data_dict, shuffle_idx)
 33 |         return data_dict
 34 | 
 35 |     def crop(self, data_dict=None, config=None):
 36 |         data_dict['points_xyz_voxel_scale'], valid_idxs = augmentor_utils.crop(
 37 |             data_dict['points_xyz_voxel_scale'], self.kwargs['full_scale'], self.kwargs['max_npoint'], config.step,
 38 |         )
 39 |         data_dict = self.update_data_dict(data_dict, valid_idxs)
 40 |         if data_dict['points_xyz'].shape[0] == 0:
 41 |             data_dict['valid'] = False
 42 |         return data_dict
 43 | 
 44 |     def forward(self, data_dict):
 45 |         """
 46 |         Args:
 47 |             data_dict:
 48 |                 points: (N, 3 + C_in)
 49 |                 gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
 50 |                 gt_names: optional, (N), string
 51 |                 ...
 52 | 
 53 |         Returns:
 54 |         """
 55 |         data_dict['valid'] = True
 56 |         for cur_augmentor in self.data_augmentor_queue:
 57 |             data_dict = cur_augmentor(data_dict=data_dict)
 58 |         return data_dict
 59 | 
 60 |     def scene_aug(self, data_dict=None, config=None):
 61 |         if self.check_func(config) and self.check_data(data_dict):
 62 |             data_dict['points_xyz'], data_dict['rgb'] = augmentor_utils.scene_aug(
 63 |                 config, data_dict['points_xyz'], data_dict['rgb']
 64 |             )
 65 |             if data_dict['points_xyz'].shape[0] == 0:
 66 |                 data_dict['valid'] = False
 67 |         return data_dict
 68 | 
 69 |     @staticmethod
 70 |     def update_data_dict(data_dict, idx):
 71 |         for key in data_dict:
 72 |             if key in ['points_xyz', 'points', 'points_xyz_voxel_scale', 'rgb', 'labels',
 73 |                        'inst_label', 'binary_labels', 'origin_idx']:
 74 |                 if data_dict[key] is not None:
 75 |                     data_dict[key] = data_dict[key][idx]
 76 |         return data_dict
 77 | 
 78 |     @staticmethod
 79 |     def check_func(key):
 80 |         return augmentor_utils.check_key(key) and augmentor_utils.check_p(key)
 81 | 
 82 |     def elastic(self, data_dict=None, config=None):
 83 |         data_dict['points_xyz_voxel_scale'] = data_dict['points_xyz'] * self.kwargs['voxel_scale']
 84 |         if self.check_func(config) and self.check_data(data_dict):
 85 |             for (gran_fac, mag_fac) in config.value:
 86 |                 data_dict['points_xyz_voxel_scale'] = augmentor_utils.elastic(
 87 |                     data_dict['points_xyz_voxel_scale'], gran_fac * self.kwargs['voxel_scale'] // 50,
 88 |                     mag_fac * self.kwargs['voxel_scale'] / 50
 89 |                 )
 90 |             if config.apply_to_feat:
 91 |                 data_dict['points_xyz'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale']
 92 | 
 93 |         # offset
 94 |         data_dict['points'] = data_dict['points_xyz_voxel_scale'] / self.kwargs['voxel_scale']
 95 |         data_dict['points_xyz_voxel_scale'] -= data_dict['points_xyz_voxel_scale'].min(0)
 96 |         return data_dict
 97 | 
 98 |     @staticmethod
 99 |     def check_data(data_dict):
100 |         return ('valid' not in data_dict) or data_dict['valid']
101 | 
102 |     ###################
103 |     # Used in outdoor #
104 |     ###################
105 |     @staticmethod
106 |     def random_world_rotation(data_dict=None, config=None):
107 |         points = data_dict['points']
108 |         rotate_rad = np.deg2rad(np.random.random() * 360) - np.pi
109 |         c, s = np.cos(rotate_rad), np.sin(rotate_rad)
110 |         j = np.matrix([[c, s], [-s, c]])
111 |         data_dict['points'][:, :2] = np.dot(points[:, :2], j)
112 | 
113 |         return data_dict
114 | 
115 |     @staticmethod
116 |     def random_world_flip(data_dict=None, config=None):
117 |         points = data_dict['points']
118 |         flip_type = np.random.choice(4, 1)
119 | 
120 |         if flip_type == 0:
121 |             # flip x only
122 |             points[:, 0] = -points[:, 0]
123 |         elif flip_type == 1:
124 |             # flip y only
125 |             points[:, 1] = -points[:, 1]
126 |         elif flip_type == 2:
127 |             # flip x+y
128 |             points[:, :2] = -points[:, :2]
129 | 
130 |         data_dict['points'] = points
131 |         return data_dict
132 | 
133 |     @staticmethod
134 |     def random_world_scaling(data_dict=None, config=None):
135 |         points = data_dict['points']
136 |         noise_scale = np.random.uniform(config[0], config[1])
137 |         points[:, :2] = noise_scale * points[:, :2]
138 | 
139 |         data_dict['points'] = points
140 |         return data_dict
141 | 
142 |     @staticmethod
143 |     def random_world_translation(data_dict=None, config=None):
144 |         points = data_dict['points']
145 |         noise_translate = np.array(
146 |             [np.random.normal(0, config[0], 1), np.random.normal(0, config[1], 1), np.random.normal(0, config[2], 1)]
147 |         ).T
148 |         points[:, 0:3] += noise_translate
149 | 
150 |         data_dict['points'] = points
151 |         return data_dict
152 | 


--------------------------------------------------------------------------------
/pcseg/datasets/processor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__init__.py


--------------------------------------------------------------------------------
/pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/processor/__pycache__/data_processor.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/s3dis/__pycache__/s3dis_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/all_data_label.txt:
--------------------------------------------------------------------------------
  1 | Area_1_conferenceRoom_1.npy
  2 | Area_1_conferenceRoom_2.npy
  3 | Area_1_copyRoom_1.npy
  4 | Area_1_hallway_1.npy
  5 | Area_1_hallway_2.npy
  6 | Area_1_hallway_3.npy
  7 | Area_1_hallway_4.npy
  8 | Area_1_hallway_5.npy
  9 | Area_1_hallway_6.npy
 10 | Area_1_hallway_7.npy
 11 | Area_1_hallway_8.npy
 12 | Area_1_office_10.npy
 13 | Area_1_office_11.npy
 14 | Area_1_office_12.npy
 15 | Area_1_office_13.npy
 16 | Area_1_office_14.npy
 17 | Area_1_office_15.npy
 18 | Area_1_office_16.npy
 19 | Area_1_office_17.npy
 20 | Area_1_office_18.npy
 21 | Area_1_office_19.npy
 22 | Area_1_office_1.npy
 23 | Area_1_office_20.npy
 24 | Area_1_office_21.npy
 25 | Area_1_office_22.npy
 26 | Area_1_office_23.npy
 27 | Area_1_office_24.npy
 28 | Area_1_office_25.npy
 29 | Area_1_office_26.npy
 30 | Area_1_office_27.npy
 31 | Area_1_office_28.npy
 32 | Area_1_office_29.npy
 33 | Area_1_office_2.npy
 34 | Area_1_office_30.npy
 35 | Area_1_office_31.npy
 36 | Area_1_office_3.npy
 37 | Area_1_office_4.npy
 38 | Area_1_office_5.npy
 39 | Area_1_office_6.npy
 40 | Area_1_office_7.npy
 41 | Area_1_office_8.npy
 42 | Area_1_office_9.npy
 43 | Area_1_pantry_1.npy
 44 | Area_1_WC_1.npy
 45 | Area_2_auditorium_1.npy
 46 | Area_2_auditorium_2.npy
 47 | Area_2_conferenceRoom_1.npy
 48 | Area_2_hallway_10.npy
 49 | Area_2_hallway_11.npy
 50 | Area_2_hallway_12.npy
 51 | Area_2_hallway_1.npy
 52 | Area_2_hallway_2.npy
 53 | Area_2_hallway_3.npy
 54 | Area_2_hallway_4.npy
 55 | Area_2_hallway_5.npy
 56 | Area_2_hallway_6.npy
 57 | Area_2_hallway_7.npy
 58 | Area_2_hallway_8.npy
 59 | Area_2_hallway_9.npy
 60 | Area_2_office_10.npy
 61 | Area_2_office_11.npy
 62 | Area_2_office_12.npy
 63 | Area_2_office_13.npy
 64 | Area_2_office_14.npy
 65 | Area_2_office_1.npy
 66 | Area_2_office_2.npy
 67 | Area_2_office_3.npy
 68 | Area_2_office_4.npy
 69 | Area_2_office_5.npy
 70 | Area_2_office_6.npy
 71 | Area_2_office_7.npy
 72 | Area_2_office_8.npy
 73 | Area_2_office_9.npy
 74 | Area_2_storage_1.npy
 75 | Area_2_storage_2.npy
 76 | Area_2_storage_3.npy
 77 | Area_2_storage_4.npy
 78 | Area_2_storage_5.npy
 79 | Area_2_storage_6.npy
 80 | Area_2_storage_7.npy
 81 | Area_2_storage_8.npy
 82 | Area_2_storage_9.npy
 83 | Area_2_WC_1.npy
 84 | Area_2_WC_2.npy
 85 | Area_3_conferenceRoom_1.npy
 86 | Area_3_hallway_1.npy
 87 | Area_3_hallway_2.npy
 88 | Area_3_hallway_3.npy
 89 | Area_3_hallway_4.npy
 90 | Area_3_hallway_5.npy
 91 | Area_3_hallway_6.npy
 92 | Area_3_lounge_1.npy
 93 | Area_3_lounge_2.npy
 94 | Area_3_office_10.npy
 95 | Area_3_office_1.npy
 96 | Area_3_office_2.npy
 97 | Area_3_office_3.npy
 98 | Area_3_office_4.npy
 99 | Area_3_office_5.npy
100 | Area_3_office_6.npy
101 | Area_3_office_7.npy
102 | Area_3_office_8.npy
103 | Area_3_office_9.npy
104 | Area_3_storage_1.npy
105 | Area_3_storage_2.npy
106 | Area_3_WC_1.npy
107 | Area_3_WC_2.npy
108 | Area_4_conferenceRoom_1.npy
109 | Area_4_conferenceRoom_2.npy
110 | Area_4_conferenceRoom_3.npy
111 | Area_4_hallway_10.npy
112 | Area_4_hallway_11.npy
113 | Area_4_hallway_12.npy
114 | Area_4_hallway_13.npy
115 | Area_4_hallway_14.npy
116 | Area_4_hallway_1.npy
117 | Area_4_hallway_2.npy
118 | Area_4_hallway_3.npy
119 | Area_4_hallway_4.npy
120 | Area_4_hallway_5.npy
121 | Area_4_hallway_6.npy
122 | Area_4_hallway_7.npy
123 | Area_4_hallway_8.npy
124 | Area_4_hallway_9.npy
125 | Area_4_lobby_1.npy
126 | Area_4_lobby_2.npy
127 | Area_4_office_10.npy
128 | Area_4_office_11.npy
129 | Area_4_office_12.npy
130 | Area_4_office_13.npy
131 | Area_4_office_14.npy
132 | Area_4_office_15.npy
133 | Area_4_office_16.npy
134 | Area_4_office_17.npy
135 | Area_4_office_18.npy
136 | Area_4_office_19.npy
137 | Area_4_office_1.npy
138 | Area_4_office_20.npy
139 | Area_4_office_21.npy
140 | Area_4_office_22.npy
141 | Area_4_office_2.npy
142 | Area_4_office_3.npy
143 | Area_4_office_4.npy
144 | Area_4_office_5.npy
145 | Area_4_office_6.npy
146 | Area_4_office_7.npy
147 | Area_4_office_8.npy
148 | Area_4_office_9.npy
149 | Area_4_storage_1.npy
150 | Area_4_storage_2.npy
151 | Area_4_storage_3.npy
152 | Area_4_storage_4.npy
153 | Area_4_WC_1.npy
154 | Area_4_WC_2.npy
155 | Area_4_WC_3.npy
156 | Area_4_WC_4.npy
157 | Area_5_conferenceRoom_1.npy
158 | Area_5_conferenceRoom_2.npy
159 | Area_5_conferenceRoom_3.npy
160 | Area_5_hallway_10.npy
161 | Area_5_hallway_11.npy
162 | Area_5_hallway_12.npy
163 | Area_5_hallway_13.npy
164 | Area_5_hallway_14.npy
165 | Area_5_hallway_15.npy
166 | Area_5_hallway_1.npy
167 | Area_5_hallway_2.npy
168 | Area_5_hallway_3.npy
169 | Area_5_hallway_4.npy
170 | Area_5_hallway_5.npy
171 | Area_5_hallway_6.npy
172 | Area_5_hallway_7.npy
173 | Area_5_hallway_8.npy
174 | Area_5_hallway_9.npy
175 | Area_5_lobby_1.npy
176 | Area_5_office_10.npy
177 | Area_5_office_11.npy
178 | Area_5_office_12.npy
179 | Area_5_office_13.npy
180 | Area_5_office_14.npy
181 | Area_5_office_15.npy
182 | Area_5_office_16.npy
183 | Area_5_office_17.npy
184 | Area_5_office_18.npy
185 | Area_5_office_19.npy
186 | Area_5_office_1.npy
187 | Area_5_office_20.npy
188 | Area_5_office_21.npy
189 | Area_5_office_22.npy
190 | Area_5_office_23.npy
191 | Area_5_office_24.npy
192 | Area_5_office_25.npy
193 | Area_5_office_26.npy
194 | Area_5_office_27.npy
195 | Area_5_office_28.npy
196 | Area_5_office_29.npy
197 | Area_5_office_2.npy
198 | Area_5_office_30.npy
199 | Area_5_office_31.npy
200 | Area_5_office_32.npy
201 | Area_5_office_33.npy
202 | Area_5_office_34.npy
203 | Area_5_office_35.npy
204 | Area_5_office_36.npy
205 | Area_5_office_37.npy
206 | Area_5_office_38.npy
207 | Area_5_office_39.npy
208 | Area_5_office_3.npy
209 | Area_5_office_40.npy
210 | Area_5_office_41.npy
211 | Area_5_office_42.npy
212 | Area_5_office_4.npy
213 | Area_5_office_5.npy
214 | Area_5_office_6.npy
215 | Area_5_office_7.npy
216 | Area_5_office_8.npy
217 | Area_5_office_9.npy
218 | Area_5_pantry_1.npy
219 | Area_5_storage_1.npy
220 | Area_5_storage_2.npy
221 | Area_5_storage_3.npy
222 | Area_5_storage_4.npy
223 | Area_5_WC_1.npy
224 | Area_5_WC_2.npy
225 | Area_6_conferenceRoom_1.npy
226 | Area_6_copyRoom_1.npy
227 | Area_6_hallway_1.npy
228 | Area_6_hallway_2.npy
229 | Area_6_hallway_3.npy
230 | Area_6_hallway_4.npy
231 | Area_6_hallway_5.npy
232 | Area_6_hallway_6.npy
233 | Area_6_lounge_1.npy
234 | Area_6_office_10.npy
235 | Area_6_office_11.npy
236 | Area_6_office_12.npy
237 | Area_6_office_13.npy
238 | Area_6_office_14.npy
239 | Area_6_office_15.npy
240 | Area_6_office_16.npy
241 | Area_6_office_17.npy
242 | Area_6_office_18.npy
243 | Area_6_office_19.npy
244 | Area_6_office_1.npy
245 | Area_6_office_20.npy
246 | Area_6_office_21.npy
247 | Area_6_office_22.npy
248 | Area_6_office_23.npy
249 | Area_6_office_24.npy
250 | Area_6_office_25.npy
251 | Area_6_office_26.npy
252 | Area_6_office_27.npy
253 | Area_6_office_28.npy
254 | Area_6_office_29.npy
255 | Area_6_office_2.npy
256 | Area_6_office_30.npy
257 | Area_6_office_31.npy
258 | Area_6_office_32.npy
259 | Area_6_office_33.npy
260 | Area_6_office_34.npy
261 | Area_6_office_35.npy
262 | Area_6_office_36.npy
263 | Area_6_office_37.npy
264 | Area_6_office_3.npy
265 | Area_6_office_4.npy
266 | Area_6_office_5.npy
267 | Area_6_office_6.npy
268 | Area_6_office_7.npy
269 | Area_6_office_8.npy
270 | Area_6_office_9.npy
271 | Area_6_openspace_1.npy
272 | Area_6_pantry_1.npy
273 | 


--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/area6_data_label.txt:
--------------------------------------------------------------------------------
 1 | data/stanford_indoor3d/Area_6_conferenceRoom_1.npy
 2 | data/stanford_indoor3d/Area_6_copyRoom_1.npy
 3 | data/stanford_indoor3d/Area_6_hallway_1.npy
 4 | data/stanford_indoor3d/Area_6_hallway_2.npy
 5 | data/stanford_indoor3d/Area_6_hallway_3.npy
 6 | data/stanford_indoor3d/Area_6_hallway_4.npy
 7 | data/stanford_indoor3d/Area_6_hallway_5.npy
 8 | data/stanford_indoor3d/Area_6_hallway_6.npy
 9 | data/stanford_indoor3d/Area_6_lounge_1.npy
10 | data/stanford_indoor3d/Area_6_office_10.npy
11 | data/stanford_indoor3d/Area_6_office_11.npy
12 | data/stanford_indoor3d/Area_6_office_12.npy
13 | data/stanford_indoor3d/Area_6_office_13.npy
14 | data/stanford_indoor3d/Area_6_office_14.npy
15 | data/stanford_indoor3d/Area_6_office_15.npy
16 | data/stanford_indoor3d/Area_6_office_16.npy
17 | data/stanford_indoor3d/Area_6_office_17.npy
18 | data/stanford_indoor3d/Area_6_office_18.npy
19 | data/stanford_indoor3d/Area_6_office_19.npy
20 | data/stanford_indoor3d/Area_6_office_1.npy
21 | data/stanford_indoor3d/Area_6_office_20.npy
22 | data/stanford_indoor3d/Area_6_office_21.npy
23 | data/stanford_indoor3d/Area_6_office_22.npy
24 | data/stanford_indoor3d/Area_6_office_23.npy
25 | data/stanford_indoor3d/Area_6_office_24.npy
26 | data/stanford_indoor3d/Area_6_office_25.npy
27 | data/stanford_indoor3d/Area_6_office_26.npy
28 | data/stanford_indoor3d/Area_6_office_27.npy
29 | data/stanford_indoor3d/Area_6_office_28.npy
30 | data/stanford_indoor3d/Area_6_office_29.npy
31 | data/stanford_indoor3d/Area_6_office_2.npy
32 | data/stanford_indoor3d/Area_6_office_30.npy
33 | data/stanford_indoor3d/Area_6_office_31.npy
34 | data/stanford_indoor3d/Area_6_office_32.npy
35 | data/stanford_indoor3d/Area_6_office_33.npy
36 | data/stanford_indoor3d/Area_6_office_34.npy
37 | data/stanford_indoor3d/Area_6_office_35.npy
38 | data/stanford_indoor3d/Area_6_office_36.npy
39 | data/stanford_indoor3d/Area_6_office_37.npy
40 | data/stanford_indoor3d/Area_6_office_3.npy
41 | data/stanford_indoor3d/Area_6_office_4.npy
42 | data/stanford_indoor3d/Area_6_office_5.npy
43 | data/stanford_indoor3d/Area_6_office_6.npy
44 | data/stanford_indoor3d/Area_6_office_7.npy
45 | data/stanford_indoor3d/Area_6_office_8.npy
46 | data/stanford_indoor3d/Area_6_office_9.npy
47 | data/stanford_indoor3d/Area_6_openspace_1.npy
48 | data/stanford_indoor3d/Area_6_pantry_1.npy
49 | 


--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/meta/class_names.txt:
--------------------------------------------------------------------------------
 1 | ceiling
 2 | floor
 3 | wall
 4 | beam
 5 | column
 6 | window
 7 | door
 8 | table
 9 | chair
10 | sofa
11 | bookcase
12 | board
13 | clutter
14 | 


--------------------------------------------------------------------------------
/pcseg/datasets/s3dis/preprocess.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/charlesq34/pointnet/blob/master/sem_seg/
 2 | 
 3 | import os
 4 | import sys
 5 | import glob
 6 | import numpy as np
 7 | 
 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 9 | ROOT_DIR = os.path.dirname(BASE_DIR)
10 | sys.path.append(BASE_DIR)
11 | 
12 | DATA_PATH = os.path.join('data', 's3dis/Stanford3dDataset_v1.2_Aligned_Version')
13 | g_classes = [x.rstrip() for x in open(os.path.join(BASE_DIR, 'meta/class_names.txt'))]
14 | g_class2label = {cls: i for i, cls in enumerate(g_classes)}
15 | g_class2color = {'ceiling':	[0,255,0],
16 |                  'floor':	[0,0,255],
17 |                  'wall':	[0,255,255],
18 |                  'beam':        [255,255,0],
19 |                  'column':      [255,0,255],
20 |                  'window':      [100,100,255],
21 |                  'door':        [200,200,100],
22 |                  'table':       [170,120,200],
23 |                  'chair':       [255,0,0],
24 |                  'sofa':        [200,100,100],
25 |                  'bookcase':    [10,200,100],
26 |                  'board':       [200,200,200],
27 |                  'clutter':     [50,50,50]}
28 | g_easy_view_labels = [7,8,9,10,11,1]
29 | g_label2color = {g_classes.index(cls): g_class2color[cls] for cls in g_classes}
30 | 
31 | 
32 | def collect_point_label(anno_path, out_filename, file_format='txt'):
33 |     """ Convert original dataset files to data_label file (each line is XYZRGBL).
34 |         We aggregated all the points from each instance in the room.
35 |     Args:
36 |         anno_path: path to annotations. e.g. Area_1/office_2/Annotations/
37 |         out_filename: path to save collected points and labels (each line is XYZRGBL)
38 |         file_format: txt or numpy, determines what file format to save.
39 |     Returns:
40 |         None
41 |     Note:
42 |         the points are shifted before save, the most negative point is now at origin.
43 |     """
44 |     points_list = []
45 | 
46 |     num_inst = 0
47 |     for f in sorted(glob.glob(os.path.join(anno_path, '*.txt'))):
48 |         cls = os.path.basename(f).split('_')[0]
49 |         num_inst += 1
50 |         if cls not in g_classes: # note: in some room there is 'staris' class..
51 |             cls = 'clutter'
52 |         points = np.loadtxt(f)
53 |         labels = np.ones((points.shape[0], 1)) * g_class2label[cls]
54 |         inst_labels = np.ones((points.shape[0], 1)) * num_inst
55 |         points_list.append(np.concatenate([points, labels, inst_labels], 1)) # Nx8
56 | 
57 |     data_label = np.concatenate(points_list, 0)
58 |     xyz_min = np.amin(data_label, axis=0)[0:3]
59 |     data_label[:, 0:3] -= xyz_min
60 | 
61 |     if file_format == 'txt':
62 |         fout = open(out_filename, 'w')
63 |         for i in range(data_label.shape[0]):
64 |             fout.write('%f %f %f %d %d %d %d %d\n' % \
65 |                        (data_label[i, 0], data_label[i, 1], data_label[i, 2],
66 |                         data_label[i, 3], data_label[i, 4], data_label[i, 5],
67 |                         data_label[i, 6], data_label[i, 7]))
68 |         fout.close()
69 |     elif file_format == 'numpy':
70 |         np.save(out_filename, data_label)
71 |     else:
72 |         print('ERROR!! Unknown file format: %s, please use txt or numpy.' % \
73 |               (file_format))
74 |         exit()
75 | 
76 | 
77 | def main():
78 |     anno_paths = [line.rstrip() for line in open(os.path.join(BASE_DIR, 'meta/anno_paths.txt'))]
79 |     anno_paths = [os.path.join(DATA_PATH, p) for p in anno_paths]
80 | 
81 |     output_folder = './data/s3dis/stanford_indoor3d_inst'
82 |     if not os.path.exists(output_folder):
83 |         os.mkdir(output_folder)
84 | 
85 |     # Note: there is an extra character in the v1.2 data in Area_5/hallway_6. It's fixed manually.
86 |     for anno_path in anno_paths:
87 |         print(anno_path)
88 |         # try:
89 |         elements = anno_path.split('/')
90 |         out_filename = elements[-3]+'_'+elements[-2]+'.npy' # Area_1_hallway_1.npy
91 |         collect_point_label(anno_path, os.path.join(output_folder, out_filename), 'numpy')
92 |         # except:
93 |         #     print(anno_path, 'ERROR!!')
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/datasets/scannet/__pycache__/scannet_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | setup(
 6 |     name='SOFTGROUP_OP',
 7 |     ext_modules=[
 8 |         CUDAExtension(
 9 |             'SOFTGROUP_OP', ['src/softgroup_api.cpp', 'src/softgroup_ops.cpp', 'src/cuda.cu'],
10 |             extra_compile_args={
11 |                 'cxx': ['-g'],
12 |                 'nvcc': ['-O2']
13 |             })
14 |     ],
15 |     cmdclass={'build_ext': BuildExtension})
16 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Ball Query with BatchIdx & Clustering Algorithm
  3 | Written by Li Jiang
  4 | All Rights Reserved 2020.
  5 | 
  6 | Modified by Thang Vu - Remove semantic label in clustering
  7 | */
  8 | 
  9 | #include "bfs_cluster.h"
 10 | 
 11 | /* =================== ballquery_batch_p================================= */
 12 | // input xyz: (n, 3) float
 13 | // input batch_idxs: (n) int
 14 | // input batch_offsets: (B+1) int, batch_offsets[-1]
 15 | // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n
 16 | // output start_len: (n, 2), int
 17 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor,
 18 |                       at::Tensor batch_offsets_tensor, at::Tensor idx_tensor,
 19 |                       at::Tensor start_len_tensor, int n, int meanActive,
 20 |                       float radius) {
 21 |   const float *xyz = xyz_tensor.data_ptr<float>();
 22 |   const int *batch_idxs = batch_idxs_tensor.data_ptr<int>();
 23 |   const int *batch_offsets = batch_offsets_tensor.data_ptr<int>();
 24 |   int *idx = idx_tensor.data_ptr<int>();
 25 |   int *start_len = start_len_tensor.data_ptr<int>();
 26 | 
 27 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 28 |   int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs,
 29 |                                       batch_offsets, idx, start_len, stream);
 30 |   return cumsum;
 31 | }
 32 | 
 33 | ConnectedComponent find_cc(Int idx, Int *ball_query_idxs, int *start_len,
 34 |                            int *visited) {
 35 |   ConnectedComponent cc;
 36 |   cc.addPoint(idx);
 37 |   visited[idx] = 1;
 38 | 
 39 |   std::queue<Int> Q;
 40 |   assert(Q.empty());
 41 |   Q.push(idx);
 42 | 
 43 |   while (!Q.empty()) {
 44 |     Int cur = Q.front();
 45 |     Q.pop();
 46 |     int start = start_len[cur * 2];
 47 |     int len = start_len[cur * 2 + 1];
 48 |     for (Int i = start; i < start + len; i++) {
 49 |       Int idx_i = ball_query_idxs[i];
 50 |       if (visited[idx_i] == 1)
 51 |         continue;
 52 |       cc.addPoint(idx_i);
 53 |       visited[idx_i] = 1;
 54 |       Q.push(idx_i);
 55 |     }
 56 |   }
 57 |   return cc;
 58 | }
 59 | 
 60 | int get_clusters(float *class_numpoint_mean, int *ball_query_idxs,
 61 |                  int *start_len, const int nPoint, float threshold,
 62 |                  ConnectedComponents &clusters, const int class_id) {
 63 |   int *visited = new int[nPoint]{0};
 64 |   float _class_numpoint_mean, thr;
 65 |   int sumNPoint = 0;
 66 | 
 67 |   for (int i = 0; i < nPoint; i++) {
 68 |     if (visited[i] == 0) {
 69 |       ConnectedComponent CC = find_cc(i, ball_query_idxs, start_len, visited);
 70 |       _class_numpoint_mean = class_numpoint_mean[class_id];
 71 | 
 72 |       // if _class_num_point_mean is not defined (-1) directly use threshold
 73 |       if (_class_numpoint_mean == -1) {
 74 |         thr = threshold;
 75 |       } else {
 76 |         thr = threshold * _class_numpoint_mean;
 77 |       }
 78 |       if ((int)CC.pt_idxs.size() >= thr) {
 79 |         clusters.push_back(CC);
 80 |         sumNPoint += (int)CC.pt_idxs.size();
 81 |       }
 82 |     }
 83 |   }
 84 |   delete[] visited;
 85 |   return sumNPoint;
 86 | }
 87 | 
 88 | // convert from ConnectedComponents to (idxs, offsets) representation
 89 | void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs,
 90 |                         int *cluster_offsets) {
 91 |   for (int i = 0; i < (int)CCs.size(); i++) {
 92 |     cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size();
 93 |     for (int j = 0; j < (int)CCs[i].pt_idxs.size(); j++) {
 94 |       int idx = CCs[i].pt_idxs[j];
 95 |       cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i;
 96 |       cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx;
 97 |     }
 98 |   }
 99 | }
100 | 
101 | // input: class_numpoint_mean_tensor
102 | // input: ball_query_idxs, int, (nActive)
103 | // input: start_len, int, (N, 2)
104 | // output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for
105 | // corresponding point idxs in N
106 | // output: cluster_offsets, int (nCluster + 1)
107 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
108 |                  at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
109 |                  at::Tensor cluster_idxs_tensor,
110 |                  at::Tensor cluster_offsets_tensor, const int N,
111 |                  float threshold, const int class_id) {
112 |   float *class_numpoint_mean = class_numpoint_mean_tensor.data_ptr<float>();
113 |   Int *ball_query_idxs = ball_query_idxs_tensor.data_ptr<Int>();
114 |   int *start_len = start_len_tensor.data_ptr<int>();
115 |   ConnectedComponents CCs;
116 |   int sumNPoint = get_clusters(class_numpoint_mean, ball_query_idxs, start_len,
117 |                                N, threshold, CCs, class_id);
118 |   int nCluster = (int)CCs.size();
119 |   cluster_idxs_tensor.resize_({sumNPoint, 2});
120 |   cluster_offsets_tensor.resize_({nCluster + 1});
121 |   cluster_idxs_tensor.zero_();
122 |   cluster_offsets_tensor.zero_();
123 |   int *cluster_idxs = cluster_idxs_tensor.data_ptr<int>();
124 |   int *cluster_offsets = cluster_offsets_tensor.data_ptr<int>();
125 |   fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets);
126 | }
127 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | Ball Query with BatchIdx
  3 | Written by Li Jiang
  4 | All Rights Reserved 2020.
  5 | */
  6 | #include "../cuda_utils.h"
  7 | #include "bfs_cluster.h"
  8 | 
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <time.h>
 12 | 
 13 | /* ================================== ballquery_batch_p
 14 |  * ================================== */
 15 | __global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius,
 16 |                                         const float *xyz, const int *batch_idxs,
 17 |                                         const int *batch_offsets, int *idx,
 18 |                                         int *start_len, int *cumsum) {
 19 |   int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 20 |   if (pt_idx >= n)
 21 |     return;
 22 | 
 23 |   start_len += (pt_idx * 2);
 24 |   int idx_temp[1000];
 25 | 
 26 |   float radius2 = radius * radius;
 27 |   float o_x = xyz[pt_idx * 3 + 0];
 28 |   float o_y = xyz[pt_idx * 3 + 1];
 29 |   float o_z = xyz[pt_idx * 3 + 2];
 30 | 
 31 |   int batch_idx = batch_idxs[pt_idx];
 32 |   int start = batch_offsets[batch_idx];
 33 |   int end = batch_offsets[batch_idx + 1];
 34 | 
 35 |   int cnt = 0;
 36 |   for (int k = start; k < end; k++) {
 37 |     float x = xyz[k * 3 + 0];
 38 |     float y = xyz[k * 3 + 1];
 39 |     float z = xyz[k * 3 + 2];
 40 |     float d2 =
 41 |         (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z);
 42 |     if (d2 < radius2) {
 43 |       if (cnt < 1000) {
 44 |         idx_temp[cnt] = k;
 45 |       } else {
 46 |         break;
 47 |       }
 48 |       ++cnt;
 49 |     }
 50 |   }
 51 | 
 52 |   start_len[0] = atomicAdd(cumsum, cnt);
 53 |   start_len[1] = cnt;
 54 | 
 55 |   int thre = n * meanActive;
 56 |   if (start_len[0] >= thre)
 57 |     return;
 58 | 
 59 |   idx += start_len[0];
 60 |   if (start_len[0] + cnt >= thre)
 61 |     cnt = thre - start_len[0];
 62 | 
 63 |   for (int k = 0; k < cnt; k++) {
 64 |     idx[k] = idx_temp[k];
 65 |   }
 66 | }
 67 | 
 68 | int ballquery_batch_p_cuda(int n, int meanActive, float radius,
 69 |                            const float *xyz, const int *batch_idxs,
 70 |                            const int *batch_offsets, int *idx, int *start_len,
 71 |                            cudaStream_t stream) {
 72 |   // param xyz: (n, 3)
 73 |   // param batch_idxs: (n)
 74 |   // param batch_offsets: (B + 1)
 75 |   // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in
 76 |   // n
 77 |   // output start_len: (n, 2), int
 78 | 
 79 |   cudaError_t err;
 80 | 
 81 |   dim3 blocks(DIVUP(n, MAX_THREADS_PER_BLOCK));
 82 |   dim3 threads(MAX_THREADS_PER_BLOCK);
 83 | 
 84 |   int cumsum = 0;
 85 |   int *p_cumsum;
 86 |   cudaMalloc((void **)&p_cumsum, sizeof(int));
 87 |   cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice);
 88 | 
 89 |   ballquery_batch_p_cuda_<<<blocks, threads, 0, stream>>>(
 90 |       n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len,
 91 |       p_cumsum);
 92 | 
 93 |   err = cudaGetLastError();
 94 |   if (cudaSuccess != err) {
 95 |     fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 96 |     exit(-1);
 97 |   }
 98 | 
 99 |   cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost);
100 |   return cumsum;
101 | }
102 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/bfs_cluster/bfs_cluster.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Ball Query with BatchIdx & Clustering Algorithm
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #ifndef BFS_CLUSTER_H
 8 | #define BFS_CLUSTER_H
 9 | #include <ATen/cuda/CUDAContext.h>
10 | #include <torch/serialize/tensor.h>
11 | 
12 | #include "../datatype/datatype.h"
13 | 
14 | int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor,
15 |                       at::Tensor batch_offsets_tensor, at::Tensor idx_tensor,
16 |                       at::Tensor start_len_tensor, int n, int meanActive,
17 |                       float radius);
18 | int ballquery_batch_p_cuda(int n, int meanActive, float radius,
19 |                            const float *xyz, const int *batch_idxs,
20 |                            const int *batch_offsets, int *idx, int *start_len,
21 |                            cudaStream_t stream);
22 | 
23 | void bfs_cluster(at::Tensor class_numpoint_mean_tensor,
24 |                  at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
25 |                  at::Tensor cluster_idxs_tensor,
26 |                  at::Tensor cluster_offsets_tensor, const int N,
27 |                  float threshold, const int class_id);
28 | 
29 | #endif // BFS_CLUSTER_H
30 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Get the IoU between predictions and gt masks
 3 | */
 4 | 
 5 | #include "cal_iou_and_masklabel.h"
 6 | 
 7 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor,
 8 |                              at::Tensor proposals_offset_tensor,
 9 |                              at::Tensor instance_labels_tensor,
10 |                              at::Tensor instance_pointnum_tensor,
11 |                              at::Tensor proposals_iou_tensor, int nInstance,
12 |                              int nProposal) {
13 |   int *proposals_idx = proposals_idx_tensor.data_ptr<int>();
14 |   int *proposals_offset = proposals_offset_tensor.data_ptr<int>();
15 |   long *instance_labels = instance_labels_tensor.data_ptr<long>();
16 |   int *instance_pointnum = instance_pointnum_tensor.data_ptr<int>();
17 |   float *proposals_iou = proposals_iou_tensor.data_ptr<float>();
18 | 
19 |   // input: nInstance (1,), int
20 |   // input: nProposal (1,), int
21 |   // input: proposals_idx (sumNPoint), int
22 |   // input: proposals_offset (nProposal + 1), int
23 |   // input: instance_labels (N), long, 0~total_nInst-1, -100
24 |   // input: instance_pointnum (total_nInst), int
25 |   // input: mask_scores_sigmoid (sumNPoint, 1), float
26 |   // output: proposals_iou (nProposal, total_nInst), float
27 |   // output: mask_label (sumNPoint, 1), float
28 |   get_mask_iou_on_cluster_cuda(nInstance, nProposal, proposals_idx,
29 |                                proposals_offset, instance_labels,
30 |                                instance_pointnum, proposals_iou);
31 | }
32 | 
33 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor,
34 |                           at::Tensor proposals_offset_tensor,
35 |                           at::Tensor instance_labels_tensor,
36 |                           at::Tensor instance_pointnum_tensor,
37 |                           at::Tensor proposals_iou_tensor, int nInstance,
38 |                           int nProposal,
39 |                           at::Tensor mask_scores_sigmoid_tensor) {
40 |   int *proposals_idx = proposals_idx_tensor.data_ptr<int>();
41 |   int *proposals_offset = proposals_offset_tensor.data_ptr<int>();
42 |   long *instance_labels = instance_labels_tensor.data_ptr<long>();
43 |   int *instance_pointnum = instance_pointnum_tensor.data_ptr<int>();
44 |   float *proposals_iou = proposals_iou_tensor.data_ptr<float>();
45 |   float *mask_scores_sigmoid = mask_scores_sigmoid_tensor.data_ptr<float>();
46 | 
47 |   // input: nInstance (1,), int
48 |   // input: nProposal (1,), int
49 |   // input: proposals_idx (sumNPoint), int
50 |   // input: proposals_offset (nProposal + 1), int
51 |   // input: instance_labels (N), long, 0~total_nInst-1, -100
52 |   // input: instance_pointnum (total_nInst), int
53 |   // input: mask_scores_sigmoid (sumNPoint, 1), float
54 |   // output: proposals_iou (nProposal, total_nInst), float
55 |   // output: mask_label (sumNPoint, 1), float
56 |   get_mask_iou_on_pred_cuda(
57 |       nInstance, nProposal, proposals_idx, proposals_offset, instance_labels,
58 |       instance_pointnum, proposals_iou, mask_scores_sigmoid);
59 | }
60 | 
61 | void get_mask_label(at::Tensor proposals_idx_tensor,
62 |                     at::Tensor proposals_offset_tensor,
63 |                     at::Tensor instance_labels_tensor,
64 |                     at::Tensor instance_cls_tensor,
65 |                     at::Tensor proposals_iou_tensor, int nInstance,
66 |                     int nProposal, float iou_thr,
67 |                     at::Tensor mask_labels_tensor) {
68 |   int *proposals_idx = proposals_idx_tensor.data_ptr<int>();
69 |   int *proposals_offset = proposals_offset_tensor.data_ptr<int>();
70 |   long *instance_labels = instance_labels_tensor.data_ptr<long>();
71 |   long *instance_cls = instance_cls_tensor.data_ptr<long>();
72 |   float *proposals_iou = proposals_iou_tensor.data_ptr<float>();
73 |   float *mask_label = mask_labels_tensor.data_ptr<float>();
74 | 
75 |   // input: nInstance (1,), int
76 |   // input: nProposal (1,), int
77 |   // input: proposals_idx (sumNPoint), int
78 |   // input: proposals_offset (nProposal + 1), int
79 |   // input: instance_labels (N), long, 0~total_nInst-1, -100
80 |   // input: instance_pointnum (total_nInst), int
81 |   // input: mask_scores_sigmoid (sumNPoint, 1), float
82 |   // output: proposals_iou (nProposal, total_nInst), float
83 |   // output: mask_label (sumNPoint, 1), float
84 |   get_mask_label_cuda(nInstance, nProposal, iou_thr, proposals_idx,
85 |                       proposals_offset, instance_labels, instance_cls,
86 |                       proposals_iou, mask_label);
87 | }
88 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cal_iou_and_masklabel/cal_iou_and_masklabel.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Get the IoU between predictions and gt masks
 3 | */
 4 | 
 5 | #ifndef CAL_IOU_AND_MASKLABEL_H
 6 | #define CAL_IOU_AND_MASKLABEL_H
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | #include <torch/serialize/tensor.h>
 9 | 
10 | #include "../datatype/datatype.h"
11 | 
12 | void get_mask_iou_on_cluster_cuda(int nInstance, int nProposal,
13 |                                   int *proposals_idx, int *proposals_offset,
14 |                                   long *instance_labels, int *instance_pointnum,
15 |                                   float *proposals_iou);
16 | 
17 | void get_mask_iou_on_pred_cuda(int nInstance, int nProposal, int *proposals_idx,
18 |                                int *proposals_offset, long *instance_labels,
19 |                                int *instance_pointnum, float *proposals_iou,
20 |                                float *mask_scores_sigmoid);
21 | 
22 | void get_mask_label_cuda(int nInstance, int nProposal, float iou_thr,
23 |                          int *proposals_idx, int *proposals_offset,
24 |                          long *instance_labels, long *instance_cls,
25 |                          float *proposals_iou, float *mask_label);
26 | 
27 | void get_mask_iou_on_cluster(at::Tensor proposals_idx_tensor,
28 |                              at::Tensor proposals_offset_tensor,
29 |                              at::Tensor instance_labels_tensor,
30 |                              at::Tensor instance_pointnum_tensor,
31 |                              at::Tensor proposals_iou_tensor, int nInstance,
32 |                              int nProposal);
33 | 
34 | void get_mask_iou_on_pred(at::Tensor proposals_idx_tensor,
35 |                           at::Tensor proposals_offset_tensor,
36 |                           at::Tensor instance_labels_tensor,
37 |                           at::Tensor instance_pointnum_tensor,
38 |                           at::Tensor proposals_iou_tensor, int nInstance,
39 |                           int nProposal, at::Tensor mask_scores_sigmoid_tensor);
40 | 
41 | void get_mask_label(at::Tensor proposals_idx_tensor,
42 |                     at::Tensor proposals_offset_tensor,
43 |                     at::Tensor instance_labels_tensor,
44 |                     at::Tensor instance_cls_tensor,
45 |                     at::Tensor proposals_iou_tensor, int nInstance,
46 |                     int nProposal, float iou_thr,
47 |                     at::Tensor mask_labels_tensor);
48 | 
49 | #endif // CAL_IOU_AND_MASKLABEL_H
50 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cuda.cu:
--------------------------------------------------------------------------------
 1 | #include "datatype/datatype.h"
 2 | #include <ATen/ATen.h>
 3 | 
 4 | #include "bfs_cluster/bfs_cluster.cu"
 5 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cu"
 6 | #include "roipool/roipool.cu"
 7 | #include "sec_mean/sec_mean.cu"
 8 | #include "voxelize/voxelize.cu"
 9 | 
10 | template void voxelize_fp_cuda<float>(Int nOutputRows, Int maxActive,
11 |                                       Int nPlanes, float *feats,
12 |                                       float *output_feats, Int *rules,
13 |                                       bool average);
14 | 
15 | template void voxelize_bp_cuda<float>(Int nOutputRows, Int maxActive,
16 |                                       Int nPlanes, float *d_output_feats,
17 |                                       float *d_feats, Int *rules, bool average);
18 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 1024
 7 | 
 8 | #define MAX_THREADS_PER_BLOCK 512
 9 | #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
10 | 
11 | inline int opt_n_threads(int work_size) {
12 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
13 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | 
16 | inline dim3 opt_block_config(int x, int y) {
17 |   const int x_threads = opt_n_threads(x);
18 |   const int y_threads =
19 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
20 |   dim3 block_config(x_threads, y_threads, 1);
21 |   return block_config;
22 | }
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.cpp:
--------------------------------------------------------------------------------
 1 | #include "datatype.h"
 2 | 
 3 | template <Int dimension> SparseGrid<dimension>::SparseGrid() : ctr(0) {
 4 |   // Sparsehash needs a key to be set aside and never used
 5 |   Point<dimension> empty_key;
 6 |   for (Int i = 0; i < dimension; i++) {
 7 |     empty_key[i] = std::numeric_limits<Int>::min();
 8 |   }
 9 |   mp.set_empty_key(empty_key);
10 | }
11 | 
12 | ConnectedComponent::ConnectedComponent() {}
13 | 
14 | void ConnectedComponent::addPoint(Int pt_idx) { pt_idxs.push_back(pt_idx); }
15 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/datatype/datatype.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATATYPE_H
 2 | #define DATATYPE_H
 3 | #include <array>
 4 | #include <cstdint>
 5 | #include <google/dense_hash_map>
 6 | #include <queue>
 7 | #include <vector>
 8 | 
 9 | using Int = int32_t;
10 | 
11 | template <Int dimension> using Point = std::array<Int, dimension>;
12 | 
13 | template <Int dimension> struct IntArrayHash {
14 |   std::size_t operator()(Point<dimension> const &p) const {
15 |     Int hash = 16777619;
16 |     for (auto x : p) {
17 |       hash *= 2166136261;
18 |       hash ^= x;
19 |     }
20 |     return hash;
21 |   }
22 | };
23 | 
24 | template <Int dimension>
25 | using SparseGridMap = google::dense_hash_map<
26 |     Point<dimension>, Int, IntArrayHash<dimension>,
27 |     std::equal_to<Point<dimension>>>; // <Key, Data, HashFcn, EqualKey, Alloc>
28 | 
29 | template <Int dimension> class SparseGrid {
30 | public:
31 |   Int ctr;
32 |   SparseGridMap<dimension> mp;
33 |   SparseGrid();
34 | };
35 | 
36 | template <Int dimension> using SparseGrids = std::vector<SparseGrid<dimension>>;
37 | 
38 | using RuleBook = std::vector<std::vector<Int>>;
39 | 
40 | class ConnectedComponent {
41 | public:
42 |   std::vector<Int> pt_idxs;
43 |   float accum_x = 0.;
44 |   float accum_y = 0.;
45 |   float accum_z = 0.;
46 |   int cls_label = -100;
47 |   int batch_idx = -1;
48 |   // int npoint = 0;
49 | 
50 |   ConnectedComponent();
51 |   void addPoint(Int pt_idx);
52 | };
53 | 
54 | using ConnectedComponents = std::vector<ConnectedComponent>;
55 | 
56 | #endif // DATATYPE_H
57 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | ROI Max Pool
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #include "roipool.h"
 8 | 
 9 | void global_avg_pool_fp(at::Tensor feats_tensor,
10 |                         at::Tensor proposals_offset_tensor,
11 |                         at::Tensor output_feats_tensor, int nProposal, int C) {
12 |   float *feats = feats_tensor.data_ptr<float>();
13 |   int *proposals_offset = proposals_offset_tensor.data_ptr<int>();
14 |   float *output_feats = output_feats_tensor.data_ptr<float>();
15 | 
16 |   global_avg_pool_fp_cuda(nProposal, C, feats, proposals_offset, output_feats);
17 | }
18 | 
19 | void global_avg_pool_bp(at::Tensor d_feats_tensor,
20 |                         at::Tensor proposals_offset_tensor,
21 |                         at::Tensor d_output_feats_tensor, int nProposal,
22 |                         int C) {
23 |   float *d_feats = d_feats_tensor.data_ptr<float>();
24 |   int *proposals_offset = proposals_offset_tensor.data_ptr<int>();
25 |   float *d_output_feats = d_output_feats_tensor.data_ptr<float>();
26 | 
27 |   global_avg_pool_bp_cuda(nProposal, C, d_feats, proposals_offset,
28 |                           d_output_feats);
29 | }
30 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | ROI Max Pool
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #include "roipool.h"
 8 | #include <math.h>
 9 | #include <stdio.h>
10 | 
11 | // fp
12 | __global__ void global_avg_pool_fp_cuda_(int nProposal, int C, float *feats,
13 |                                          int *proposals_offset,
14 |                                          float *output_feats) {
15 |   for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) {
16 |     int start = proposals_offset[pp_id];
17 |     int end = proposals_offset[pp_id + 1];
18 |     int n_points = end - start;
19 | 
20 |     for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
21 |       // int argmax_idx = -1;
22 |       // float max_val = -1e50;
23 |       float val = 0;
24 | 
25 |       for (int i = start; i < end; i++) {
26 |         val += feats[i * C + plane];
27 |       }
28 |       // output_maxidx[pp_id * C + plane] = argmax_idx;
29 |       output_feats[pp_id * C + plane] = val / (float)n_points;
30 |     }
31 |   }
32 | }
33 | 
34 | // input: feats (sumNPoint, C) float
35 | // input: proposals_offset (nProposal + 1) int
36 | // output: output_feats (nProposal, C) float
37 | // output: output_maxidx (nProposal, C) int
38 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats,
39 |                              int *proposals_offset, float *output_feats) {
40 |   global_avg_pool_fp_cuda_<<<std::min(nProposal, (int)32768),
41 |                              std::min(C, (int)32)>>>(
42 |       nProposal, C, feats, proposals_offset, output_feats);
43 | }
44 | 
45 | // bp
46 | __global__ void global_avg_pool_bp_cuda_(int nProposal, int C, float *d_feats,
47 |                                          int *proposals_offset,
48 |                                          float *d_output_feats) {
49 |   for (int pp_id = blockIdx.x; pp_id < nProposal; pp_id += gridDim.x) {
50 |     int start = proposals_offset[pp_id];
51 |     int end = proposals_offset[pp_id + 1];
52 |     int n_points = end - start;
53 |     for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
54 |       for (int i = start; i < end; i++) {
55 |         atomicAdd(&d_feats[i * C + plane],
56 |                   d_output_feats[pp_id * C + plane] / (float)n_points);
57 |       }
58 |     }
59 |   }
60 | }
61 | 
62 | // input: d_output_feats (nProposal, C) float
63 | // input: output_maxidx (nProposal, C) int
64 | // input: proposals_offset (nProposal + 1) int
65 | // output: d_feats (sumNPoint, C) float
66 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats,
67 |                              int *proposals_offset, float *d_output_feats) {
68 |   global_avg_pool_bp_cuda_<<<std::min(nProposal, (int)32768),
69 |                              std::min(C, (int)32)>>>(
70 |       nProposal, C, d_feats, proposals_offset, d_output_feats);
71 | }
72 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/roipool/roipool.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | ROI Max Pool
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #ifndef ROIPOOL_H
 8 | #define ROIPOOL_H
 9 | #include <ATen/cuda/CUDAContext.h>
10 | #include <torch/serialize/tensor.h>
11 | 
12 | #include "../datatype/datatype.h"
13 | 
14 | void global_avg_pool_fp_cuda(int nProposal, int C, float *feats,
15 |                              int *proposals_offset, float *output_feats);
16 | 
17 | void global_avg_pool_bp_cuda(int nProposal, int C, float *d_feats,
18 |                              int *proposals_offset, float *d_output_feats);
19 | 
20 | void global_avg_pool_fp(at::Tensor feats_tensor,
21 |                         at::Tensor proposals_offset_tensor,
22 |                         at::Tensor output_feats_tensor, int nProposal, int C);
23 | 
24 | void global_avg_pool_bp(at::Tensor d_feats_tensor,
25 |                         at::Tensor proposals_offset_tensor,
26 |                         at::Tensor d_output_feats_tensor, int nProposal, int C);
27 | 
28 | #endif // ROIPOOL_H
29 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Segment Operations (mean, max, min)
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #include "sec_mean.h"
 8 | 
 9 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor,
10 |               at::Tensor out_tensor, int nProposal, int C) {
11 |   int *offsets = offsets_tensor.data_ptr<int>();
12 |   float *inp = inp_tensor.data_ptr<float>();
13 |   float *out = out_tensor.data_ptr<float>();
14 | 
15 |   sec_mean_cuda(nProposal, C, inp, offsets, out);
16 | }
17 | 
18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor,
19 |              at::Tensor out_tensor, int nProposal, int C) {
20 |   int *offsets = offsets_tensor.data_ptr<int>();
21 |   float *inp = inp_tensor.data_ptr<float>();
22 |   float *out = out_tensor.data_ptr<float>();
23 | 
24 |   sec_min_cuda(nProposal, C, inp, offsets, out);
25 | }
26 | 
27 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor,
28 |              at::Tensor out_tensor, int nProposal, int C) {
29 |   int *offsets = offsets_tensor.data_ptr<int>();
30 |   float *inp = inp_tensor.data_ptr<float>();
31 |   float *out = out_tensor.data_ptr<float>();
32 | 
33 |   sec_max_cuda(nProposal, C, inp, offsets, out);
34 | }
35 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | Segment Operations (mean, max, min) (no bp)
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #include "sec_mean.h"
 8 | #include <math.h>
 9 | #include <stdio.h>
10 | 
11 | /* ================================== sec_mean
12 |  * ================================== */
13 | __global__ void sec_mean_cuda_(int nProposal, int C, float *inp, int *offsets,
14 |                                float *out) {
15 |   for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
16 |     int start = offsets[p_id];
17 |     int end = offsets[p_id + 1];
18 | 
19 |     float count = (float)(end - start);
20 | 
21 |     for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
22 |       float mean = 0;
23 |       for (int i = start; i < end; i++) {
24 |         mean += (inp[i * C + plane] / count);
25 |       }
26 |       out[p_id * C + plane] = mean;
27 |     }
28 |   }
29 | }
30 | 
31 | // input: inp (N, C) float
32 | // input: offsets (nProposal + 1) int
33 | // output: out (nProposal, C) float
34 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
35 |   sec_mean_cuda_<<<std::min(nProposal, (int)32768), std::min(C, (int)32)>>>(
36 |       nProposal, C, inp, offsets, out);
37 | }
38 | 
39 | /* ================================== sec_min ==================================
40 |  */
41 | __global__ void sec_min_cuda_(int nProposal, int C, float *inp, int *offsets,
42 |                               float *out) {
43 |   for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
44 |     int start = offsets[p_id];
45 |     int end = offsets[p_id + 1];
46 | 
47 |     for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
48 |       float min_val = 1e50;
49 |       for (int i = start; i < end; i++) {
50 |         if (inp[i * C + plane] < min_val) {
51 |           min_val = inp[i * C + plane];
52 |         }
53 |       }
54 |       out[p_id * C + plane] = min_val;
55 |     }
56 |   }
57 | }
58 | 
59 | // input: inp (N, C) float
60 | // input: offsets (nProposal + 1) int
61 | // output: out (nProposal, C) float
62 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
63 |   sec_min_cuda_<<<std::min(nProposal, (int)32768), std::min(C, (int)32)>>>(
64 |       nProposal, C, inp, offsets, out);
65 | }
66 | 
67 | /* ================================== sec_max ==================================
68 |  */
69 | __global__ void sec_max_cuda_(int nProposal, int C, float *inp, int *offsets,
70 |                               float *out) {
71 |   for (int p_id = blockIdx.x; p_id < nProposal; p_id += gridDim.x) {
72 |     int start = offsets[p_id];
73 |     int end = offsets[p_id + 1];
74 | 
75 |     for (int plane = threadIdx.x; plane < C; plane += blockDim.x) {
76 |       float max_val = -1e50;
77 |       for (int i = start; i < end; i++) {
78 |         if (inp[i * C + plane] > max_val) {
79 |           max_val = inp[i * C + plane];
80 |         }
81 |       }
82 |       out[p_id * C + plane] = max_val;
83 |     }
84 |   }
85 | }
86 | 
87 | // input: inp (N, C) float
88 | // input: offsets (nProposal + 1) int
89 | // output: out (nProposal, C) float
90 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out) {
91 |   sec_max_cuda_<<<std::min(nProposal, (int)32768), std::min(C, (int)32)>>>(
92 |       nProposal, C, inp, offsets, out);
93 | }
94 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/sec_mean/sec_mean.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Segment Operations (mean, max, min)
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #ifndef SEC_MEAN_H
 8 | #define SEC_MEAN_H
 9 | #include <ATen/cuda/CUDAContext.h>
10 | #include <torch/serialize/tensor.h>
11 | 
12 | #include "../datatype/datatype.h"
13 | 
14 | void sec_mean(at::Tensor inp_tensor, at::Tensor offsets_tensor,
15 |               at::Tensor out_tensor, int nProposal, int C);
16 | void sec_mean_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
17 | 
18 | void sec_min(at::Tensor inp_tensor, at::Tensor offsets_tensor,
19 |              at::Tensor out_tensor, int nProposal, int C);
20 | void sec_min_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
21 | 
22 | void sec_max(at::Tensor inp_tensor, at::Tensor offsets_tensor,
23 |              at::Tensor out_tensor, int nProposal, int C);
24 | void sec_max_cuda(int nProposal, int C, float *inp, int *offsets, float *out);
25 | 
26 | #endif // SEC_MEAN_H
27 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include <torch/serialize/tensor.h>
 3 | 
 4 | #include "softgroup_ops.h"
 5 | 
 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 7 | 
 8 |   m.def("get_mask_iou_on_cluster", &get_mask_iou_on_cluster,
 9 |         "get_mask_iou_on_cluster");
10 |   m.def("get_mask_iou_on_pred", &get_mask_iou_on_pred, "get_mask_iou_on_pred");
11 |   m.def("get_mask_label", &get_mask_label, "get_mask_label");
12 | 
13 |   m.def("voxelize_idx", &voxelize_idx_3d, "voxelize_idx");
14 |   m.def("voxelize_fp", &voxelize_fp_feat, "voxelize_fp");
15 |   m.def("voxelize_bp", &voxelize_bp_feat, "voxelize_bp");
16 | 
17 |   m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p");
18 |   m.def("bfs_cluster", &bfs_cluster, "bfs_cluster");
19 | 
20 |   m.def("global_avg_pool_fp", &global_avg_pool_fp, "global_avg_pool_fp");
21 |   m.def("global_avg_pool_bp", &global_avg_pool_bp, "global_avg_pool_bp");
22 | 
23 |   m.def("sec_mean", &sec_mean, "sec_mean");
24 |   m.def("sec_min", &sec_min, "sec_min");
25 |   m.def("sec_max", &sec_max, "sec_max");
26 | }
27 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.cpp:
--------------------------------------------------------------------------------
 1 | #include <cuda.h>
 2 | #include <cuda_runtime.h>
 3 | #include <torch/extension.h>
 4 | 
 5 | #include "bfs_cluster/bfs_cluster.cpp"
 6 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.cpp"
 7 | #include "datatype/datatype.cpp"
 8 | #include "roipool/roipool.cpp"
 9 | #include "sec_mean/sec_mean.cpp"
10 | #include "voxelize/voxelize.cpp"
11 | 
12 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords,
13 |                      /* long M*4 */ at::Tensor output_coords,
14 |                      /* Int N */ at::Tensor input_map,
15 |                      /* Int M*(maxActive+1) */ at::Tensor output_map,
16 |                      Int batchSize, Int mode) {
17 |   voxelize_idx<3>(coords, output_coords, input_map, output_map, batchSize,
18 |                   mode);
19 | }
20 | 
21 | void voxelize_fp_feat(
22 |     /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
23 |     /* cuda float M*C */ at::Tensor output_feats,
24 |     /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
25 |     Int maxActive, Int nPlane) {
26 |   voxelize_fp<float>(feats, output_feats, output_map, mode, nActive, maxActive,
27 |                      nPlane);
28 | }
29 | 
30 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats,
31 |                       /* cuda float N*C */ at::Tensor d_feats,
32 |                       /* cuda Int M*(maxActive+1) */ at::Tensor output_map,
33 |                       Int mode, Int nActive, Int maxActive, Int nPlane) {
34 |   voxelize_bp<float>(d_output_feats, d_feats, output_map, mode, nActive,
35 |                      maxActive, nPlane);
36 | }
37 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/softgroup_ops.h:
--------------------------------------------------------------------------------
 1 | #ifndef HAIS_H
 2 | #define HAIS_H
 3 | #include "bfs_cluster/bfs_cluster.h"
 4 | #include "cal_iou_and_masklabel/cal_iou_and_masklabel.h"
 5 | #include "datatype/datatype.h"
 6 | #include "roipool/roipool.h"
 7 | #include "sec_mean/sec_mean.h"
 8 | 
 9 | void voxelize_idx_3d(/* long N*4 */ at::Tensor coords,
10 |                      /* long M*4 */ at::Tensor output_coords,
11 |                      /* Int N */ at::Tensor input_map,
12 |                      /* Int M*(maxActive+1) */ at::Tensor output_map,
13 |                      Int batchSize, Int mode);
14 | 
15 | void voxelize_fp_feat(
16 |     /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
17 |     /* cuda float M*C */ at::Tensor output_feats,
18 |     /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
19 |     Int maxActive, Int nPlane);
20 | 
21 | void voxelize_bp_feat(/* cuda float M*C */ at::Tensor d_output_feats,
22 |                       /* cuda float N*C */ at::Tensor d_feats,
23 |                       /* cuda Int M*(maxActive+1) */ at::Tensor output_map,
24 |                       Int mode, Int nActive, Int maxActive, Int nPlane);
25 | 
26 | #endif // HAIS_H
27 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
  3 | Written by Li Jiang
  4 | All Rights Reserved 2020.
  5 | */
  6 | 
  7 | #include "voxelize.h"
  8 | 
  9 | /* ================================== voxelize_idx
 10 |  * ================================== */
 11 | template <Int dimension>
 12 | void voxelize_idx(/* long N*4 */ at::Tensor coords,
 13 |                   /* long M*4 */ at::Tensor output_coords,
 14 |                   /* Int N */ at::Tensor input_map,
 15 |                   /* Int M*(maxActive+1) */ at::Tensor output_map,
 16 |                   Int batchSize, Int mode) {
 17 |   assert(coords.ndimension() == 2);
 18 |   assert(coords.size(1) >= dimension and coords.size(1) <= dimension + 1);
 19 | 
 20 |   RuleBook voxelizeRuleBook;       // rule[1]: M voxels -> N points  output_map
 21 |   SparseGrids<dimension> inputSGs; // voxel_coords -> voxel_idx in M voxels
 22 |                                    // input_map: N points -> M voxels
 23 |   Int nActive = 0;
 24 | 
 25 |   Int maxActive = voxelize_inputmap<dimension>(
 26 |       inputSGs, input_map.data_ptr<Int>(), voxelizeRuleBook, nActive,
 27 |       coords.data_ptr<long>(), coords.size(0), coords.size(1), batchSize, mode);
 28 | 
 29 |   output_map.resize_({nActive, maxActive + 1});
 30 |   output_map.zero_();
 31 | 
 32 |   output_coords.resize_({nActive, coords.size(1)});
 33 |   output_coords.zero_();
 34 | 
 35 |   Int *oM = output_map.data_ptr<Int>();
 36 |   long *oC = output_coords.data_ptr<long>();
 37 |   voxelize_outputmap<dimension>(coords.data_ptr<long>(), oC, oM,
 38 |                                 &voxelizeRuleBook[1][0], nActive, maxActive);
 39 | }
 40 | 
 41 | template <Int dimension>
 42 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map,
 43 |                         Int *rule, Int nOutputRows, Int maxActive) {
 44 |   for (Int i = 0; i < nOutputRows; i++) {
 45 |     for (Int j = 0; j <= maxActive; j++)
 46 |       output_map[j] = rule[j];
 47 |     Int inputIdx = rule[1];
 48 |     rule += (1 + maxActive);
 49 |     output_map += (1 + maxActive);
 50 | 
 51 |     long *coord = coords + inputIdx * (dimension + 1);
 52 |     long *output_coord = output_coords + i * (dimension + 1);
 53 |     for (Int j = 0; j <= dimension; j++) {
 54 |       output_coord[j] = coord[j];
 55 |     }
 56 |   }
 57 | }
 58 | 
 59 | // mode 0=guaranteed unique 1=last item(overwrite) 2=first item(keep) 3=sum,
 60 | // 4=mean
 61 | // input: coords
 62 | // output: SGs: one map for each batch: map from voxel_coord to voxel_idx(in M
 63 | // voxels)
 64 | // output: input_map: N, N points -> M voxels
 65 | // output: rules
 66 | // output: nActive
 67 | // output: maxActive
 68 | template <Int dimension>
 69 | Int voxelize_inputmap(SparseGrids<dimension> &SGs, Int *input_map,
 70 |                       RuleBook &rules, Int &nActive, long *coords,
 71 |                       Int nInputRows, Int nInputColumns, Int batchSize,
 72 |                       Int mode) {
 73 |   assert(nActive == 0);
 74 |   assert(rules.size() == 0);
 75 |   assert(SGs.size() == 0);
 76 | 
 77 |   SGs.resize(batchSize);
 78 |   Point<dimension> p;
 79 | 
 80 |   std::vector<std::vector<Int>> outputRows;
 81 |   if (nInputColumns == dimension) {
 82 |     SGs.resize(1);
 83 |     auto &sg = SGs[0];
 84 |     for (Int i = 0; i < nInputRows; i++) {
 85 |       for (Int j = 0; j < dimension; j++)
 86 |         p[j] = coords[j];
 87 |       coords += dimension;
 88 |       auto iter = sg.mp.find(p);
 89 |       if (iter == sg.mp.end()) {
 90 |         sg.mp[p] = nActive++;
 91 |         outputRows.resize(nActive);
 92 |       }
 93 |       outputRows[sg.mp[p]].push_back(i);
 94 | 
 95 |       input_map[i] = sg.mp[p];
 96 |     }
 97 |   } else { // nInputColumns == dimension + 1 (1 in index 0 for batchidx)
 98 |     Int batchIdx;
 99 |     for (Int i = 0; i < nInputRows; i++) {
100 |       batchIdx = coords[0];
101 |       for (Int j = 0; j < dimension; j++)
102 |         p[j] = coords[j + 1];
103 |       coords += (dimension + 1);
104 |       if (batchIdx + 1 >= (Int)SGs.size()) {
105 |         SGs.resize(batchIdx + 1);
106 |       }
107 |       auto &sg = SGs[batchIdx];
108 |       auto iter = sg.mp.find(p);
109 |       if (iter == sg.mp.end()) {
110 |         sg.mp[p] = nActive++;
111 |         outputRows.resize(nActive);
112 |       }
113 |       outputRows[sg.mp[p]].push_back(i);
114 | 
115 |       input_map[i] = sg.mp[p];
116 |     }
117 |   }
118 | 
119 |   // Rulebook Format
120 |   // rules[0][0] == mode
121 |   // rules[0][1] == maxActive per spatial location (==1 for modes 0,1,2)
122 |   // rules[0][2] == nInputRows
123 |   // rules[0][3] == nOutputRows
124 |   // rules[1]   nOutputRows x (1+maxActive)
125 |   rules.resize(2);
126 |   rules[0].push_back(mode);
127 |   rules[0].push_back(1);
128 |   rules[0].push_back(nInputRows);
129 |   rules[0].push_back(outputRows.size());
130 |   auto &rule = rules[1];
131 |   if (mode == 0) {
132 |     assert(nInputRows == (Int)outputRows.size());
133 |     for (Int i = 0; i < nActive; i++) {
134 |       rule.push_back(1);
135 |       assert((Int)outputRows[i].size() == 1);
136 |       rule.push_back(outputRows[i][0]);
137 |     }
138 |   }
139 |   if (mode == 1) {
140 |     for (Int i = 0; i < nActive; i++) {
141 |       rule.push_back(1);
142 |       rule.push_back(outputRows[i].front());
143 |     }
144 |   }
145 |   if (mode == 2) {
146 |     for (Int i = 0; i < nActive; i++) {
147 |       rule.push_back(1);
148 |       rule.push_back(outputRows[i].back());
149 |     }
150 |   }
151 |   Int maxActive = 1;
152 |   if (mode == 3 or mode == 4) {
153 |     for (auto &row : outputRows)
154 |       maxActive = std::max(maxActive, (Int)row.size());
155 |     rules[0][1] = maxActive;
156 |     for (auto &row : outputRows) {
157 |       rule.push_back(row.size());
158 |       for (auto &r : row)
159 |         rule.push_back(r);
160 |       rule.resize((rule.size() + maxActive) / (maxActive + 1) *
161 |                   (maxActive + 1));
162 |     }
163 |   }
164 |   return maxActive;
165 | }
166 | 
167 | /* ================================== voxelize
168 |  * ================================== */
169 | template <typename T>
170 | void voxelize_fp(
171 |     /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
172 |     /* cuda float M*C */ at::Tensor output_feats,
173 |     /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
174 |     Int maxActive, Int nPlane) {
175 | 
176 |   auto iF = feats.data_ptr<T>();
177 |   auto oF = output_feats.data_ptr<T>();
178 | 
179 |   Int *rules = output_map.data_ptr<Int>();
180 | 
181 |   voxelize_fp_cuda<T>(nActive, maxActive, nPlane, iF, oF, rules, mode == 4);
182 | }
183 | 
184 | template <typename T>
185 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats,
186 |                  /* cuda float N*C */ at::Tensor d_feats,
187 |                  /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode,
188 |                  Int nActive, Int maxActive, Int nPlane) {
189 |   auto d_oF = d_output_feats.data_ptr<T>();
190 |   auto d_iF = d_feats.data_ptr<T>();
191 | 
192 |   Int *rules = output_map.data_ptr<Int>();
193 | 
194 |   voxelize_bp_cuda<T>(nActive, maxActive, nPlane, d_oF, d_iF, rules, mode == 4);
195 | }
196 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #include "voxelize.h"
 8 | 
 9 | template <typename T>
10 | __global__ void voxelize_fp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes,
11 |                                   T *feats, T *output_feats, Int *rules,
12 |                                   bool average) {
13 |   for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) {
14 |     T *out = output_feats + row * nPlanes;
15 |     Int *r = rules + row * (maxActive + 1);
16 |     Int nActive = r[0];
17 |     T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
18 |     for (int i = 1; i <= nActive; i++) {
19 |       T *inp = feats + r[i] * nPlanes;
20 |       for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) {
21 |         atomicAdd(&out[plane], multiplier * inp[plane]);
22 |       }
23 |     }
24 |   }
25 | }
26 | 
27 | // input: feats N * C
28 | // input: rules M * (1 + maxActive)
29 | // output: output_feats M * C
30 | template <typename T>
31 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats,
32 |                       T *output_feats, Int *rules, bool average) {
33 |   voxelize_fp_cuda_<
34 |       T><<<std::min(nOutputRows, (Int)32768), std::min(nPlanes, (Int)32)>>>(
35 |       nOutputRows, maxActive, nPlanes, feats, output_feats, rules, average);
36 | }
37 | 
38 | template <typename T>
39 | __global__ void voxelize_bp_cuda_(Int nOutputRows, Int maxActive, Int nPlanes,
40 |                                   T *d_output_feats, T *d_feats, Int *rules,
41 |                                   bool average) {
42 |   for (int row = blockIdx.x; row < nOutputRows; row += gridDim.x) {
43 |     T *out = d_output_feats + row * nPlanes;
44 |     Int *r = rules + row * (maxActive + 1);
45 |     Int nActive = r[0];
46 |     T multiplier = (average and nActive > 0) ? (T)1 / nActive : (T)1;
47 |     for (int i = 1; i <= nActive; i++) {
48 |       T *inp = d_feats + r[i] * nPlanes;
49 |       for (int plane = threadIdx.x; plane < nPlanes; plane += blockDim.x) {
50 |         atomicAdd(&inp[plane], multiplier * out[plane]);
51 |       }
52 |     }
53 |   }
54 | }
55 | 
56 | template <typename T>
57 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes,
58 |                       T *d_output_feats, T *d_feats, Int *rules, bool average) {
59 |   voxelize_bp_cuda_<
60 |       T><<<std::min(nOutputRows, (Int)32768), std::min(nPlanes, (Int)32)>>>(
61 |       nOutputRows, maxActive, nPlanes, d_output_feats, d_feats, rules, average);
62 | }
63 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/ops/src/voxelize/voxelize.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Points to Voxels & Voxels to Points (Modified from SparseConv)
 3 | Written by Li Jiang
 4 | All Rights Reserved 2020.
 5 | */
 6 | 
 7 | #ifndef VOXELIZE_H
 8 | #define VOXELIZE_H
 9 | #include <ATen/cuda/CUDAContext.h>
10 | #include <torch/serialize/tensor.h>
11 | 
12 | #include "../datatype/datatype.h"
13 | 
14 | /* ================================== voxelize_idx
15 |  * ================================== */
16 | template <Int dimension>
17 | void voxelize_idx(/* long N*4 */ at::Tensor coords,
18 |                   /* long M*4 */ at::Tensor output_coords,
19 |                   /* Int N */ at::Tensor input_map,
20 |                   /* Int M*(maxActive+1) */ at::Tensor output_map,
21 |                   Int batchSize, Int mode);
22 | 
23 | template <Int dimension>
24 | void voxelize_outputmap(long *coords, long *output_coords, Int *output_map,
25 |                         Int *rule, Int nOutputRows, Int maxActive);
26 | 
27 | template <Int dimension>
28 | Int voxelize_inputmap(SparseGrids<dimension> &SGs, Int *input_map,
29 |                       RuleBook &rules, Int &nActive, long *coords,
30 |                       Int nInputRows, Int nInputColumns, Int batchSize,
31 |                       Int mode);
32 | 
33 | /* ================================== voxelize
34 |  * ================================== */
35 | template <typename T>
36 | void voxelize_fp(
37 |     /* cuda float N*C */ at::Tensor feats, // N * 3 -> M * 3 (N >= M)
38 |     /* cuda float M*C */ at::Tensor output_feats,
39 |     /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode, Int nActive,
40 |     Int maxActive, Int nPlane);
41 | 
42 | template <typename T>
43 | void voxelize_fp_cuda(Int nOutputRows, Int maxActive, Int nPlanes, T *feats,
44 |                       T *output_feats, Int *rules, bool average);
45 | 
46 | //
47 | template <typename T>
48 | void voxelize_bp(/* cuda float M*C */ at::Tensor d_output_feats,
49 |                  /* cuda float N*C */ at::Tensor d_feats,
50 |                  /* cuda Int M*(maxActive+1) */ at::Tensor output_map, Int mode,
51 |                  Int nActive, Int maxActive, Int nPlane);
52 | 
53 | template <typename T>
54 | void voxelize_bp_cuda(Int nOutputRows, Int maxActive, Int nPlanes,
55 |                       T *d_output_feats, T *d_feats, Int *rules, bool average);
56 | 
57 | #endif // VOXELIZE_H
58 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 100
 3 | multi_line_output = 0
 4 | known_standard_library = setuptools
 5 | known_third_party = munch,numpy,pandas,plyfile,scannet_util,scipy,sklearn,spconv,tensorboardX,torch,tqdm,yaml
 6 | no_lines_before = STDLIB,LOCALFOLDER
 7 | default_section = THIRDPARTY
 8 | 
 9 | [yapf]
10 | BASED_ON_STYLE = pep8
11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
13 | COLUMN_LIMIT = 100
14 | 


--------------------------------------------------------------------------------
/pcseg/external_libs/softgroup_ops/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | if __name__ == '__main__':
 6 |     setup(
 7 |         name='softgroup',
 8 |         version='1.0',
 9 |         description='SoftGroup: SoftGroup for 3D Instance Segmentation [CVPR 2022]',
10 |         author='Thang Vu',
11 |         author_email='thangvubk@kaist.ac.kr',
12 |         # packages=['softgroup'],
13 |         package_data={'ops': ['*/*.so']},
14 |         ext_modules=[
15 |             CUDAExtension(
16 |                 name='softgroup_ops',
17 |                 sources=[
18 |                     'ops/src/softgroup_api.cpp', 'ops/src/softgroup_ops.cpp',
19 |                     'ops/src/cuda.cu'
20 |                 ],
21 |                 extra_compile_args={
22 |                     'cxx': ['-g'],
23 |                     'nvcc': ['-O2']
24 |                 },
25 |                 include_dirs=['/data/anaconda3/envs/pt18/include/'])
26 |         ],
27 |         cmdclass={'build_ext': BuildExtension})
28 | 


--------------------------------------------------------------------------------
/pcseg/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from .vision_networks import build_model
 7 | from .text_networks import build_text_model
 8 | 
 9 | try:
10 |     import kornia
11 | except:
12 |     pass 
13 |     # print('Warning: kornia is not installed. This package is only required by CaDDN')
14 | 
15 | 
16 | def build_vision_network(model_cfg, num_class, dataset):
17 |     model = build_model(
18 |         model_cfg=model_cfg, num_class=num_class, dataset=dataset
19 |     )
20 |     return model
21 | 
22 | 
23 | def build_text_network(model_cfg):
24 |     text_encoder = build_text_model(model_cfg=model_cfg)
25 |     return text_encoder
26 | 
27 | 
28 | def load_data_to_gpu(batch_dict):
29 |     for key, val in batch_dict.items():
30 |         if isinstance(val, torch.Tensor):
31 |             batch_dict[key] = batch_dict[key].cuda()
32 |         elif not isinstance(val, np.ndarray) or key in ['calib', 'point_img_idx', 'point_img']:
33 |             continue
34 |         elif key in ['ids', 'metadata', 'scene_name']:
35 |             continue
36 |         elif key in ['points_xyz_voxel_scale', 'labels', 'inst_label', 'origin_idx', 'offsets', 'inst_cls']:
37 |             batch_dict[key] = torch.from_numpy(val).long().cuda()
38 |         elif key in ['inst_pointnum', 'batch_idxs']:
39 |             batch_dict[key] = torch.from_numpy(val).int().cuda()
40 |         else:
41 |             batch_dict[key] = torch.from_numpy(val).float().cuda()
42 | 


--------------------------------------------------------------------------------
/pcseg/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/adapter/__init__.py:
--------------------------------------------------------------------------------
1 | from .vl_adapter import VLAdapter
2 | 
3 | __all__ = {
4 |     'VLAdapter': VLAdapter
5 | }


--------------------------------------------------------------------------------
/pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/adapter/__pycache__/vl_adapter.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/adapter/vl_adapter.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn as nn
 4 | import numpy as np
 5 | 
 6 | from ..model_utils import basic_block_1d
 7 | 
 8 | 
 9 | class VLAdapter(nn.Module):
10 |     def __init__(self, model_cfg, in_channel):
11 |         super(VLAdapter, self).__init__()
12 |         self.model_cfg = model_cfg
13 |         self.in_feature_name = model_cfg.get('IN_FEAT_NAME', 'backbone_3d_feats')
14 |         self.eval_only = model_cfg.get('EVAL_ONLY', None)
15 |         self.text_channel = model_cfg.TEXT_DIM
16 |         
17 |         # vision adapter
18 |         adapter_last_norm = self.model_cfg.get('LAST_NORM', True)
19 |         self.adapter = self.build_vl_adapter(self.model_cfg.NUM_ADAPTER_LAYERS, in_channel, adapter_last_norm)
20 |     
21 |     def build_vl_adapter(self, num_adapter_layers, in_channel, last_norm):
22 |         """build vision language adapter
23 | 
24 |         Args:
25 |             num_adapter_layers (_type_): _description_
26 |             in_channel (_type_): _description_
27 | 
28 |         Raises:
29 |             NotImplementedError: _description_
30 | 
31 |         Returns:
32 |             _type_: _description_
33 |         """
34 |         if num_adapter_layers < 1 or self.eval_only:
35 |             return None
36 | 
37 |         if num_adapter_layers == 1:
38 |             mid_channel_list = [in_channel, self.text_channel]
39 |         elif num_adapter_layers == 2:
40 |             multiplier = int(np.log2(self.text_channel / in_channel))
41 |             mid_channel_list = [in_channel, in_channel * multiplier, self.text_channel]
42 |         else:
43 |             raise NotImplementedError
44 | 
45 |         adapter = basic_block_1d.MLP(
46 |             mid_channel_list,
47 |             norm_fn=functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1),
48 |             num_layers=num_adapter_layers, last_norm_fn=last_norm
49 |         )
50 |         return adapter
51 | 
52 |     def forward(self, batch_dict):
53 |         if self.eval_only and self.training:
54 |             return batch_dict
55 |         
56 |         backbone3d_feats = batch_dict[self.in_feature_name]
57 | 
58 |         # forward adapter
59 |         if hasattr(self, 'adapter') and self.adapter is not None:
60 |             adapter_feats = self.adapter(backbone3d_feats)
61 |         else:
62 |             adapter_feats = backbone3d_feats
63 | 
64 |         batch_dict['adapter_feats'] = adapter_feats
65 |         return batch_dict
66 | 


--------------------------------------------------------------------------------
/pcseg/models/head/__init__.py:
--------------------------------------------------------------------------------
 1 | from .text_seg_head import TextSegHead
 2 | from .binary_head import BinaryHead
 3 | from .caption_head import CaptionHead
 4 | from .linear_head import LinearHead
 5 | from .inst_head import InstHead
 6 | 
 7 | __all__ = {
 8 |     'TextSegHead': TextSegHead,
 9 |     'BinaryHead': BinaryHead,
10 |     'CaptionHead': CaptionHead,
11 |     'LinearHead': LinearHead,
12 |     'InstHead': InstHead
13 | }
14 | 


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/binary_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/binary_head.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/caption_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/caption_head.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/inst_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/inst_head.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/linear_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/linear_head.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/head/__pycache__/text_seg_head.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/head/binary_head.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from pcseg.utils.spconv_utils import spconv
  6 | from pcseg.models.model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlockDecoder
  7 | from pcseg.utils import common_utils
  8 | 
  9 | 
 10 | class BinaryHead(nn.Module):
 11 |     def __init__(self, model_cfg, ignore_label, in_channel, block_reps, block_residual):
 12 |         super().__init__()
 13 |         self.model_cfg = model_cfg
 14 |         self.binary_feat_input = []
 15 |         self.binary_thresh = model_cfg.THRESH
 16 |         self.in_channel = in_channel
 17 |         self.ignore_label = ignore_label
 18 |         self.num_filters = model_cfg.get('NUM_FILTERS', None)
 19 | 
 20 |         norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1)
 21 |         if block_residual:
 22 |             block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False))
 23 |         else:
 24 |             block = VGGBlock
 25 | 
 26 |         if self.num_filters is not None:
 27 |             block_channels = self.num_filters
 28 |         else:
 29 |             # assert self.num_blocks is not None
 30 |             block_channels = [in_channel, 2 * in_channel, 3 * in_channel, 4 * in_channel, 5 * in_channel, 6 * in_channel, 7 * in_channel]
 31 | 
 32 |         self.binary_encoder = UBlockDecoder(
 33 |             block_channels, norm_fn, block_reps, block, indice_key_id=1, detach=model_cfg.get('DETACH', True)
 34 |         )
 35 |         
 36 |         self.binary_classifier = spconv.SparseSequential(
 37 |             norm_fn(in_channel),
 38 |             nn.ReLU(),
 39 |             nn.Linear(in_channel, 1)
 40 |         )
 41 |         self.forward_ret_dict = {}
 42 |         self.binary_loss_func = nn.BCEWithLogitsLoss()
 43 | 
 44 |         self.apply(self.set_bn_init)
 45 | 
 46 |     @staticmethod
 47 |     def set_bn_init(m):
 48 |         classname = m.__class__.__name__
 49 |         if classname.find('BatchNorm') != -1:
 50 |             m.weight.data.fill_(1.0)
 51 |             m.bias.data.fill_(0.0)
 52 | 
 53 |     def forward(self, batch_dict):
 54 |         self.forward_ret_dict = {}
 55 |         binary_scores = self.binary_encoder(self.binary_feat_input)
 56 |         binary_scores = self.binary_classifier(binary_scores).features
 57 | 
 58 |         if self.training and self.model_cfg.get('VOXEL_LOSS', None):
 59 |             pass
 60 |         else:
 61 |             binary_scores = binary_scores[batch_dict['v2p_map'].long()]
 62 | 
 63 |         if not self.training and batch_dict['test_x4_split']:
 64 |             binary_scores = common_utils.merge_4_parts(binary_scores)
 65 | 
 66 |         binary_preds = (torch.sigmoid(binary_scores) > self.binary_thresh).long()
 67 | 
 68 |         self.binary_feat_input = []
 69 |         self.forward_ret_dict['binary_scores'] = binary_scores
 70 |         self.forward_ret_dict['binary_preds'] = binary_preds
 71 |         if self.training:
 72 |             self.forward_ret_dict['binary_labels'] = batch_dict['binary_labels']
 73 | 
 74 |         batch_dict['binary_ret_dict'] = self.forward_ret_dict
 75 |         return batch_dict
 76 | 
 77 |     def register_hook_for_binary_head(self, backbone):
 78 |         def get_features():
 79 |             def hook(model, input, output):
 80 |                 self.binary_feat_input.append(output)
 81 |             return hook
 82 | 
 83 |         for module_name in self.model_cfg.HOOK_FEATURE_LIST:
 84 |             eval('backbone.' + module_name).register_forward_hook(get_features())
 85 |     
 86 |     def get_loss(self):
 87 |         binary_scores = self.forward_ret_dict['binary_scores']
 88 |         binary_labels = self.forward_ret_dict['binary_labels']
 89 | 
 90 |         # filter unannotated categories
 91 |         mask = binary_labels != self.ignore_label
 92 |         binary_scores = binary_scores[mask] 
 93 |         binary_labels = binary_labels[mask]
 94 | 
 95 |         binary_loss = self.binary_loss_func(binary_scores, binary_labels.reshape(-1, 1))
 96 |         binary_loss = binary_loss * self.model_cfg.get('LOSS_WEIGHT', 1.0)
 97 | 
 98 |         tb_dict = {'binary_loss': binary_loss.item()}
 99 |         return binary_loss, tb_dict
100 | 


--------------------------------------------------------------------------------
/pcseg/models/head/linear_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from pcseg.config import cfg
 5 | 
 6 | 
 7 | class LinearHead(nn.Module):
 8 |     def __init__(self, model_cfg, in_channel, ignore_label, num_class):
 9 |         super(LinearHead, self).__init__()
10 |         self.model_cfg = model_cfg
11 |         self.in_channel = in_channel
12 |         self.ignore_label = ignore_label
13 |         self.num_class = num_class
14 | 
15 |         self.cls_head = nn.Linear(self.in_channel, self.num_class)
16 | 
17 |         self.valid_class_idx = [i for i in range(self.num_class)]
18 |         if hasattr(cfg.DATA_CONFIG, 'ignore_class_idx'):
19 |             self.ignore_class_idx = cfg.DATA_CONFIG.ignore_class_idx
20 |             for i in self.ignore_class_idx:
21 |                 self.valid_class_idx.remove(i)
22 | 
23 |         self.seg_loss_func = nn.CrossEntropyLoss(ignore_index=self.ignore_label).cuda()
24 |         self.forward_ret_dict = {}
25 | 
26 |     def forward(self, batch_dict):
27 |         self.forward_ret_dict = {}
28 |         backbone3d_feats = batch_dict['backbone_3d_feats']
29 | 
30 |         semantic_scores = self.cls_head(backbone3d_feats)
31 |         if self.training and self.model_cfg.get('VOXEL_LOSS', None):
32 |             pass
33 |         else:
34 |             semantic_scores = semantic_scores[batch_dict['v2p_map']]
35 | 
36 |         semantic_scores = semantic_scores[..., self.valid_class_idx]
37 |         semantic_preds = semantic_scores.max(1)[1]
38 | 
39 |         self.forward_ret_dict['seg_scores'] = semantic_scores
40 |         self.forward_ret_dict['seg_preds'] = semantic_preds
41 | 
42 |         # save gt label to forward_ret_dict
43 |         self.forward_ret_dict['seg_labels'] = batch_dict['labels']
44 | 
45 |     def get_loss(self):
46 |         semantic_scores = self.forward_ret_dict['seg_scores']
47 |         semantic_labels = self.forward_ret_dict['seg_labels']
48 | 
49 |         seg_loss = self.seg_loss_func(semantic_scores, semantic_labels)
50 | 
51 |         tb_dict = {'loss_seg': seg_loss.item()}
52 |         return seg_loss, tb_dict
53 | 


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | 
 4 | 
 5 | def load_best_metric(ckpt_save_dir):
 6 |     best_metric, best_epoch = 0.0, -1
 7 |     best_metric_record_list = glob.glob(str(ckpt_save_dir / '*.txt'))
 8 |     if len(best_metric_record_list) > 0:
 9 |         best_metric_record_name = os.path.basename(best_metric_record_list[0])
10 |         best_split_list = os.path.splitext(best_metric_record_name)[0].split('_')
11 |         best_metric = float(best_split_list[2])
12 |         best_epoch = int(best_split_list[-1])
13 |     return best_metric, best_epoch
14 | 


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/basic_block_1d.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/fp16.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/rle_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/model_utils/__pycache__/unet_blocks.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/model_utils/basic_block_1d.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class MLP(nn.Sequential):
 5 |     def __init__(self, channels, norm_fn=None, num_layers=2, last_norm_fn=False, last_bias=True):
 6 |         assert len(channels) >= 2
 7 |         modules = []
 8 |         for i in range(num_layers - 1):
 9 |             modules.append(nn.Linear(channels[i], channels[i + 1]))
10 |             if norm_fn:
11 |                 modules.append(norm_fn(channels[i + 1]))
12 |             modules.append(nn.ReLU())
13 |         modules.append(nn.Linear(channels[-2], channels[-1], bias=last_bias))
14 |         if last_norm_fn:
15 |             modules.append(norm_fn(channels[-1]))
16 |             modules.append(nn.ReLU())
17 |         return super().__init__(*modules)
18 | 
19 |     def init_weights(self):
20 |         for m in self.modules():
21 |             if isinstance(m, nn.Linear):
22 |                 nn.init.xavier_uniform_(m.weight)
23 |                 nn.init.constant_(m.bias, 0)
24 |         if isinstance(self[-1], nn.Linear):
25 |             nn.init.normal_(self[-1].weight, 0, 0.01)
26 |             nn.init.constant_(self[-1].bias, 0)
27 | 
28 | 
29 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm1d, **kwargs):
30 |     if name == 'BasicBlock1D':
31 |         block = [
32 |             nn.Linear(in_channels, out_channels),
33 |             norm_layer(out_channels, eps=1e-3, momentum=0.01),
34 |             act_fn()
35 |         ]
36 |     elif name == 'DeConv1dBlock':
37 |         block = [
38 |             nn.ConvTranspose1d(in_channels, out_channels, **kwargs),
39 |             norm_layer(out_channels, eps=1e-3, momentum=0.01),
40 |             act_fn()
41 |         ]
42 |     else:
43 |         raise NotImplementedError
44 | 
45 |     return block
46 | 


--------------------------------------------------------------------------------
/pcseg/models/model_utils/basic_block_2d.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class BasicBlock2D(nn.Module):
 5 |     def __init__(self, in_channels, out_channels, **kwargs):
 6 |         """
 7 |         Initializes convolutional block
 8 |         Args:
 9 |             in_channels: int, Number of input channels
10 |             out_channels: int, Number of output channels
11 |             **kwargs: Dict, Extra arguments for nn.Conv2d
12 |         """
13 |         super().__init__()
14 |         self.in_channels = in_channels
15 |         self.out_channels = out_channels
16 |         self.conv = nn.Conv2d(in_channels=in_channels,
17 |                               out_channels=out_channels,
18 |                               **kwargs)
19 |         self.bn = nn.BatchNorm2d(out_channels)
20 |         self.relu = nn.ReLU(inplace=True)
21 | 
22 |     def forward(self, features):
23 |         """
24 |         Applies convolutional block
25 |         Args:
26 |             features: (B, C_in, H, W), Input features
27 |         Returns:
28 |             x: (B, C_out, H, W), Output features
29 |         """
30 |         x = self.conv(features)
31 |         x = self.bn(x)
32 |         x = self.relu(x)
33 |         return x
34 | 
35 | 
36 | def build_block(name, in_channels, out_channels, act_fn=nn.ReLU, norm_layer=nn.BatchNorm2d, **kwargs):
37 |     if name == 'BasicBlock2D':
38 |         block = [
39 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, **kwargs),
40 |             norm_layer(out_channels, eps=1e-3, momentum=0.01),
41 |             act_fn()
42 |         ]
43 |     elif name == 'DeConv2dBlock':
44 |         block = [
45 |             nn.ConvTranspose2d(in_channels, out_channels, **kwargs),
46 |             norm_layer(out_channels, eps=1e-3, momentum=0.01),
47 |             act_fn()
48 |         ]
49 |     else:
50 |         raise NotImplementedError
51 | 
52 |     return block
53 | 


--------------------------------------------------------------------------------
/pcseg/models/model_utils/fp16.py:
--------------------------------------------------------------------------------
 1 | # From https://github.com/thangvubk/SoftGroup/blob/11dcbfd74b7660a2b82ac6473af107849c7d545f/softgroup/util/fp16.py
 2 | import functools
 3 | from collections import abc
 4 | from inspect import getfullargspec
 5 | 
 6 | import spconv.pytorch as spconv
 7 | import torch
 8 | 
 9 | 
10 | def cast_tensor_type(inputs, src_type, dst_type):
11 |     if isinstance(inputs, torch.Tensor):
12 |         return inputs.to(dst_type) if inputs.dtype == src_type else inputs
13 |     elif isinstance(inputs, spconv.SparseConvTensor):
14 |         if inputs.features.dtype == src_type:
15 |             features = inputs.features.to(dst_type)
16 |             inputs = inputs.replace_feature(features)
17 |         return inputs
18 |     elif isinstance(inputs, abc.Mapping):
19 |         return type(inputs)({k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()})
20 |     elif isinstance(inputs, abc.Iterable):
21 |         return type(inputs)(cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 
25 | 
26 | def force_fp32(apply_to=None, out_fp16=False):
27 | 
28 |     def force_fp32_wrapper(old_func):
29 | 
30 |         @functools.wraps(old_func)
31 |         def new_func(*args, **kwargs):
32 |             if not isinstance(args[0], torch.nn.Module):
33 |                 raise TypeError('@force_fp32 can only be used to decorate the '
34 |                                 'method of nn.Module')
35 |             # get the arg spec of the decorated method
36 |             args_info = getfullargspec(old_func)
37 |             # get the argument names to be casted
38 |             args_to_cast = args_info.args if apply_to is None else apply_to
39 |             # convert the args that need to be processed
40 |             new_args = []
41 |             if args:
42 |                 arg_names = args_info.args[:len(args)]
43 |                 for i, arg_name in enumerate(arg_names):
44 |                     if arg_name in args_to_cast:
45 |                         new_args.append(cast_tensor_type(args[i], torch.half, torch.float))
46 |                     else:
47 |                         new_args.append(args[i])
48 |             # convert the kwargs that need to be processed
49 |             new_kwargs = dict()
50 |             if kwargs:
51 |                 for arg_name, arg_value in kwargs.items():
52 |                     if arg_name in args_to_cast:
53 |                         new_kwargs[arg_name] = cast_tensor_type(arg_value, torch.half, torch.float)
54 |                     else:
55 |                         new_kwargs[arg_name] = arg_value
56 |             with torch.cuda.amp.autocast(enabled=False):
57 |                 output = old_func(*new_args, **new_kwargs)
58 |             # cast the results back to fp32 if necessary
59 |             if out_fp16:
60 |                 output = cast_tensor_type(output, torch.float, torch.half)
61 |             return output
62 | 
63 |         return new_func
64 | 
65 |     return force_fp32_wrapper
66 | 


--------------------------------------------------------------------------------
/pcseg/models/model_utils/rle_utils.py:
--------------------------------------------------------------------------------
 1 | # Modify from https://www.kaggle.com/paulorzp/run-length-encode-and-decode
 2 | import numpy as np
 3 | 
 4 | 
 5 | def rle_encode(mask):
 6 |     """Encode RLE (Run-length-encode) from 1D binary mask.
 7 | 
 8 |     Args:
 9 |         mask (np.ndarray): 1D binary mask
10 |     Returns:
11 |         rle (dict): encoded RLE
12 |     """
13 |     length = mask.shape[0]
14 |     mask = np.concatenate([[0], mask, [0]])
15 |     runs = np.where(mask[1:] != mask[:-1])[0] + 1
16 |     runs[1::2] -= runs[::2]
17 |     counts = ' '.join(str(x) for x in runs)
18 |     rle = dict(length=length, counts=counts)
19 |     return rle
20 | 
21 | 
22 | def rle_decode(rle):
23 |     """Decode rle to get binary mask.
24 | 
25 |     Args:
26 |         rle (dict): rle of encoded mask
27 |     Returns:
28 |         mask (np.ndarray): decoded mask
29 |     """
30 |     length = rle['length']
31 |     counts = rle['counts']
32 |     s = counts.split()
33 |     starts, nums = [np.asarray(x, dtype=np.int32) for x in (s[0:][::2], s[1:][::2])]
34 |     starts -= 1
35 |     ends = starts + nums
36 |     mask = np.zeros(length, dtype=np.uint8)
37 |     for lo, hi in zip(starts, ends):
38 |         mask[lo:hi] = 1
39 |     return mask
40 | 


--------------------------------------------------------------------------------
/pcseg/models/text_networks/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import torch
 4 | 
 5 | from . import text_models
 6 | from .prompt_template import template_meta
 7 | from ...config import cfg
 8 | 
 9 | 
10 | def build_text_model(model_cfg):
11 |     tokenizer, text_encoder = getattr(
12 |         text_models, f'get_{model_cfg.NAME.lower()}_model'
13 |     )(model_cfg.BACKBONE)
14 | 
15 |     text_encoder.tokenizer = tokenizer
16 |     return text_encoder
17 | 
18 | 
19 | def load_text_embedding_from_path(text_emb_cfg):
20 |     text_emb_path = os.path.join(cfg.DATA_CONFIG.DATA_PATH, text_emb_cfg.PATH)
21 |     text_embedding = torch.load(text_emb_path, map_location=torch.device('cpu')).detach()
22 |     if text_emb_cfg.get('NORM', True):
23 |         text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
24 |     print("=> loaded text embedding from path '{}'".format(text_emb_path))
25 |     return text_embedding
26 | 
27 | 
28 | def is_bg_class(c):
29 |     return (c.lower() == 'wall') or (c.lower() == 'floor') or (c.lower() == 'ceiling') or (c.lower() =='otherfurniture')
30 | 
31 | 
32 | def build_text_token_from_class_names(model_cfg, class_names):
33 |     if model_cfg.TEMPLATE == 'lseg':  # only instance classes are encoded with prompt
34 |         return [template_meta[model_cfg.TEMPLATE][0].format(c) if not is_bg_class(c) else c for c in class_names]
35 |     else:
36 |         return [template_meta[model_cfg.TEMPLATE][0].format(c) for c in class_names]
37 | 
38 | 
39 | def load_text_embedding_from_encoder(model_cfg, text_encoder, logger=logging.getLogger()):
40 |     text_encoder.cuda()
41 |     class_names = cfg.TEXT_ENCODER.CATEGORY_NAMES
42 |     text = build_text_token_from_class_names(model_cfg, class_names)
43 | 
44 |     if model_cfg.NAME == 'CLIP':
45 |         text_tokens = text_encoder.tokenizer(text).cuda()
46 |         text_embedding = text_encoder.encode_text(text_tokens)
47 |     elif model_cfg.NAME == 'BERT':
48 |         text_tokens = text_encoder.tokenizer(text, return_tensors="pt", padding=True).to('cuda')
49 |         text_embedding = text_encoder(**text_tokens).pooler_output
50 |     else:
51 |         raise NotImplementedError
52 | 
53 |     if model_cfg.get('NORM', True):
54 |         text_embedding /= text_embedding.norm(dim=-1, keepdim=True)
55 |     logger.info("=> loaded text embedding from '{}'".format(model_cfg.NAME))
56 |     return text_embedding.detach().cpu()
57 | 


--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/prompt_template.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/text_networks/__pycache__/text_models.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/text_networks/prompt_template.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------------------------------
  2 | # MIT License
  3 | #
  4 | # Copyright (c) 2021 OpenAI
  5 | #
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | # of this software and associated documentation files (the "Software"), to deal
  8 | # in the Software without restriction, including without limitation the rights
  9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | # copies of the Software, and to permit persons to whom the Software is
 11 | # furnished to do so, subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be included in all
 14 | # copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | # SOFTWARE.
 23 | #
 24 | # Modified by Jiarui Xu
 25 | # -------------------------------------------------------------------------
 26 | 
 27 | full_imagenet_templates = [
 28 |     'a bad photo of a {}.',
 29 |     'a photo of many {}.',
 30 |     'a sculpture of a {}.',
 31 |     'a photo of the hard to see {}.',
 32 |     'a low resolution photo of the {}.',
 33 |     'a rendering of a {}.',
 34 |     'graffiti of a {}.',
 35 |     'a bad photo of the {}.',
 36 |     'a cropped photo of the {}.',
 37 |     'a tattoo of a {}.',
 38 |     'the embroidered {}.',
 39 |     'a photo of a hard to see {}.',
 40 |     'a bright photo of a {}.',
 41 |     'a photo of a clean {}.',
 42 |     'a photo of a dirty {}.',
 43 |     'a dark photo of the {}.',
 44 |     'a drawing of a {}.',
 45 |     'a photo of my {}.',
 46 |     'the plastic {}.',
 47 |     'a photo of the cool {}.',
 48 |     'a close-up photo of a {}.',
 49 |     'a black and white photo of the {}.',
 50 |     'a painting of the {}.',
 51 |     'a painting of a {}.',
 52 |     'a pixelated photo of the {}.',
 53 |     'a sculpture of the {}.',
 54 |     'a bright photo of the {}.',
 55 |     'a cropped photo of a {}.',
 56 |     'a plastic {}.',
 57 |     'a photo of the dirty {}.',
 58 |     'a jpeg corrupted photo of a {}.',
 59 |     'a blurry photo of the {}.',
 60 |     'a photo of the {}.',
 61 |     'a good photo of the {}.',
 62 |     'a rendering of the {}.',
 63 |     'a {} in a video game.',
 64 |     'a photo of one {}.',
 65 |     'a doodle of a {}.',
 66 |     'a close-up photo of the {}.',
 67 |     'a photo of a {}.',
 68 |     'the origami {}.',
 69 |     'the {} in a video game.',
 70 |     'a sketch of a {}.',
 71 |     'a doodle of the {}.',
 72 |     'a origami {}.',
 73 |     'a low resolution photo of a {}.',
 74 |     'the toy {}.',
 75 |     'a rendition of the {}.',
 76 |     'a photo of the clean {}.',
 77 |     'a photo of a large {}.',
 78 |     'a rendition of a {}.',
 79 |     'a photo of a nice {}.',
 80 |     'a photo of a weird {}.',
 81 |     'a blurry photo of a {}.',
 82 |     'a cartoon {}.',
 83 |     'art of a {}.',
 84 |     'a sketch of the {}.',
 85 |     'a embroidered {}.',
 86 |     'a pixelated photo of a {}.',
 87 |     'itap of the {}.',
 88 |     'a jpeg corrupted photo of the {}.',
 89 |     'a good photo of a {}.',
 90 |     'a plushie {}.',
 91 |     'a photo of the nice {}.',
 92 |     'a photo of the small {}.',
 93 |     'a photo of the weird {}.',
 94 |     'the cartoon {}.',
 95 |     'art of the {}.',
 96 |     'a drawing of the {}.',
 97 |     'a photo of the large {}.',
 98 |     'a black and white photo of a {}.',
 99 |     'the plushie {}.',
100 |     'a dark photo of a {}.',
101 |     'itap of a {}.',
102 |     'graffiti of the {}.',
103 |     'a toy {}.',
104 |     'itap of my {}.',
105 |     'a photo of a cool {}.',
106 |     'a photo of a small {}.',
107 |     'a tattoo of the {}.',
108 | ]
109 | 
110 | sub_imagenet_template = [
111 |     'itap of a {}.', 'a bad photo of a {}.', 'a origami {}.', 'a photo of the large {}.', 'a {} in a video game.',
112 |     'art of the {}.', 'a photo of the small {}.'
113 | ]
114 | 
115 | simple_imagenet_template = [
116 |     'a photo of a {}.',
117 | ]
118 | 
119 | identity_template = [
120 |     '{}',
121 | ]
122 | 
123 | lseg_template = [
124 |     'a {} in a scene',
125 | ]
126 | 
127 | template_meta = {
128 |     'full': full_imagenet_templates,
129 |     'subset': sub_imagenet_template,
130 |     'simple': simple_imagenet_template,
131 |     'identity': identity_template,
132 |     'lseg': lseg_template,
133 | }
134 | 


--------------------------------------------------------------------------------
/pcseg/models/text_networks/text_models.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from clip import clip
 5 | 
 6 | from ...config import cfg
 7 | from ...utils import commu_utils
 8 | 
 9 | 
10 | def get_clip_model(backbone_name):
11 |     url = clip._MODELS[backbone_name]
12 |     if cfg.LOCAL_RANK == 0:  # only download once at master node
13 |         model_path = clip._download(url, os.path.expanduser("~/.cache/clip"))
14 |     else:
15 |         model_path = _return_clip_path(url, os.path.expanduser("~/.cache/clip"))
16 |     commu_utils.synchronize()
17 | 
18 |     try:
19 |         # loading JIT archive
20 |         model = torch.jit.load(model_path, map_location="cpu").eval()
21 |         state_dict = model.state_dict()
22 |     except RuntimeError:
23 |         state_dict = torch.load(model_path, map_location="cpu")
24 | 
25 |     model = clip.build_model(state_dict)
26 |     return clip.tokenize, model
27 | 
28 | 
29 | def get_bert_model(name):
30 |     from transformers import AutoTokenizer, AutoModel
31 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
32 |     tokenizer = AutoTokenizer.from_pretrained(name, local_files_only=True)
33 |     model = AutoModel.from_pretrained(name, local_files_only=True)
34 |     return tokenizer, model
35 | 
36 | 
37 | def _return_clip_path(url: str, root: str):
38 |     filename = os.path.basename(url)
39 |     download_target = os.path.join(root, filename)
40 |     return download_target
41 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .spconv_unet_indoor import SparseUNetIndoor
2 | 
3 | __all__ = {
4 |     'SparseUNetIndoor': SparseUNetIndoor
5 | }
6 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/__pycache__/spconv_unet_indoor.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/spconv_unet_indoor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import functools
 3 | import torch.nn as nn
 4 | 
 5 | from ...utils.spconv_utils import spconv
 6 | from ..model_utils.unet_blocks import ResidualBlock, VGGBlock, UBlock
 7 | from ...utils import common_utils
 8 | 
 9 | 
10 | class SparseUNetIndoor(nn.Module):
11 |     def __init__(self, model_cfg):
12 |         super(SparseUNetIndoor, self).__init__()
13 |         norm_fn = functools.partial(nn.BatchNorm1d, eps=1e-4, momentum=0.1)
14 | 
15 |         self.model_cfg = model_cfg
16 |         self.in_channel = model_cfg.IN_CHANNEL
17 |         self.mid_channel = model_cfg.MID_CHANNEL
18 |         self.block_reps = model_cfg.BLOCK_REPS
19 |         self.block_residual = model_cfg.BLOCK_RESIDUAL
20 |         self.num_blocks = model_cfg.get('NUM_BLOCKS', None)
21 |         self.num_filters = model_cfg.get('NUM_FILTERS', None)
22 | 
23 |         if self.block_residual:
24 |             block = functools.partial(ResidualBlock, custom_sp1x1=self.model_cfg.get('CUSTOM_SP1X1', False))
25 |         else:
26 |             block = VGGBlock
27 | 
28 |         self.input_conv = spconv.SparseSequential(
29 |             spconv.SubMConv3d(
30 |                 self.in_channel, self.mid_channel, kernel_size=3, padding=1, bias=False, indice_key='subm1'
31 |             )
32 |         )
33 | 
34 |         if self.num_filters is not None:
35 |             block_channels = self.num_filters
36 |         else:
37 |             assert self.num_blocks is not None
38 |             block_channels = [self.mid_channel * (i + 1) for i in range(self.num_blocks)]
39 | 
40 |         self.unet = UBlock(block_channels, norm_fn, self.block_reps, block, indice_key_id=1)
41 |         self.output_layer = spconv.SparseSequential(
42 |             norm_fn(self.mid_channel), nn.ReLU()
43 |         )
44 | 
45 |         # init parameters
46 |         self.apply(self.set_bn_init)
47 | 
48 |     @staticmethod
49 |     def set_bn_init(m):
50 |         classname = m.__class__.__name__
51 |         if classname.find('BatchNorm') != -1:
52 |             m.weight.data.fill_(1.0)
53 |             m.bias.data.fill_(0.0)
54 | 
55 |     def forward(self, batch_dict):
56 |         input_sp_tensor = spconv.SparseConvTensor(
57 |             batch_dict['voxel_features'], batch_dict['voxel_coords'].int(),
58 |             batch_dict['spatial_shape'], batch_dict['batch_size']
59 |         )
60 |         output = self.input_conv(input_sp_tensor)
61 |         output = self.unet(output)
62 |         output = self.output_layer(output)
63 |         output_feats = output.features
64 |         # if not self.training and batch_dict['test_x4_split']:
65 |         #     output_feats = common_utils.merge_4_parts(output_feats)
66 | 
67 |         batch_dict['backbone_3d_feats'] = output_feats
68 |         return batch_dict
69 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__init__.py:
--------------------------------------------------------------------------------
1 | from .vfe_template import VFETemplate
2 | from .indoor_vfe import IndoorVFE
3 | 
4 | 
5 | __all__ = {
6 |     'VFETemplate': VFETemplate,
7 |     'IndoorVFE': IndoorVFE
8 | }
9 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/indoor_vfe.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_backbones_3d/vfe/__pycache__/vfe_template.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/indoor_vfe.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .vfe_template import VFETemplate
 4 | from ....external_libs.softgroup_ops.ops import functions as sg_ops
 5 | 
 6 | 
 7 | class IndoorVFE(VFETemplate):
 8 |     def __init__(self, model_cfg, voxel_mode, **kwargs):
 9 |         super(IndoorVFE, self).__init__(model_cfg)
10 |         self.use_xyz = model_cfg.get('USE_XYZ', False)
11 |         self.voxel_mode = voxel_mode
12 | 
13 |     def forward(self, batch):
14 |         batch_size = batch['batch_size']
15 |         # voxelization
16 |         # current implementation cannot support cuda
17 |         # TODO: modify the voxelization part
18 |         voxel_coords, v2p_map, p2v_map = sg_ops.voxelization_idx(
19 |             batch['points_xyz_voxel_scale'].cpu(), batch_size, self.voxel_mode
20 |         )
21 |         voxel_coords, v2p_map, p2v_map = voxel_coords.cuda(), v2p_map.cuda(), p2v_map.cuda()
22 | 
23 |         feats = batch['feats']  # (N, C), float32, cuda
24 | 
25 |         voxel_feats = sg_ops.voxelization(feats, p2v_map, self.voxel_mode)
26 | 
27 |         batch.update({
28 |             'voxel_features': voxel_feats,
29 |             'v2p_map': v2p_map.long(),
30 |             'voxel_coords': voxel_coords
31 |         })
32 | 
33 |         return batch
34 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_backbones_3d/vfe/vfe_template.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class VFETemplate(nn.Module):
 5 |     def __init__(self, model_cfg, **kwargs):
 6 |         super().__init__()
 7 |         self.model_cfg = model_cfg
 8 | 
 9 |     def get_output_feature_dim(self):
10 |         raise NotImplementedError
11 | 
12 |     def forward(self, **kwargs):
13 |         """
14 |         Args:
15 |             **kwargs:
16 | 
17 |         Returns:
18 |             batch_dict:
19 |                 ...
20 |                 vfe_features: (num_voxels, C)
21 |         """
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .network_template import ModelTemplate
 2 | from .sparseunet_textseg import SparseUNetTextSeg
 3 | 
 4 | __all__ = {
 5 |     'ModelTemplate': ModelTemplate,
 6 |     'SparseUNetTextSeg': SparseUNetTextSeg
 7 | }
 8 | 
 9 | 
10 | def build_model(model_cfg, num_class, dataset):
11 |     model = __all__[model_cfg.NAME](
12 |         model_cfg=model_cfg, num_class=num_class, dataset=dataset
13 |     )
14 | 
15 |     return model
16 | 


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/network_template.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/__pycache__/sparseunet_textseg.cpython-38.pyc


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/sparseunet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/models/vision_networks/sparseunet.py


--------------------------------------------------------------------------------
/pcseg/models/vision_networks/sparseunet_textseg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .network_template import ModelTemplate
 3 | 
 4 | 
 5 | class SparseUNetTextSeg(ModelTemplate):
 6 |     def __init__(self, model_cfg, num_class, dataset):
 7 |         super().__init__(model_cfg, num_class, dataset)
 8 |         if model_cfg.get('BINARY_HEAD', None):
 9 |             self.binary_head.register_hook_for_binary_head(self.backbone_3d)
10 | 
11 |     def forward(self, batch_dict):
12 |         batch_dict['test_x4_split'] = self.test_x4_split
13 |         # Order: vfe, backbone_3d, binary_head, seg_head, caption_head
14 |         for cur_module in self.module_list:
15 |             batch_dict = cur_module(batch_dict)
16 | 
17 |         ret_dict = self.task_head.forward_ret_dict
18 |         if self.training:
19 |             loss, tb_dict, disp_dict = self.get_training_loss()
20 | 
21 |             ret_dict['loss'] = loss
22 |             return ret_dict, tb_dict, disp_dict
23 |         else:
24 |             if hasattr(self, 'inst_head') and self.inst_head is not None:
25 |                 ret_dict.update(self.inst_head.forward_ret_dict)
26 |             return ret_dict
27 | 
28 |     def get_training_loss(self):
29 |         disp_dict = {}
30 |         tb_dict = {}
31 | 
32 |         # for segmentation loss
33 |         if not self.task_head.eval_only:
34 |             seg_loss, tb_dict_seg = self.task_head.get_loss()
35 |             tb_dict.update(tb_dict_seg)
36 |         else:
37 |             seg_loss = 0
38 | 
39 |         # for binary loss
40 |         if self.binary_head is not None:
41 |             binary_loss, tb_dict_binary = self.binary_head.get_loss()
42 |             tb_dict.update(tb_dict_binary)
43 |         else:
44 |             binary_loss = 0
45 | 
46 |         # for caption loss
47 |         if self.caption_head is not None:
48 |             caption_loss, tb_dict_caption = self.caption_head.get_loss()
49 |             tb_dict.update(tb_dict_caption)
50 |         else:
51 |             caption_loss = 0
52 | 
53 |         # for inst loss
54 |         if self.inst_head is not None:
55 |             inst_loss, tb_dict_inst = self.inst_head.get_loss()
56 |             tb_dict.update(tb_dict_inst)
57 |         else:
58 |             inst_loss = 0
59 | 
60 |         loss = seg_loss + binary_loss + caption_loss + inst_loss
61 |         tb_dict['loss'] = loss.item()
62 |         disp_dict.update(tb_dict)
63 | 
64 |         return loss, tb_dict, disp_dict
65 | 


--------------------------------------------------------------------------------
/pcseg/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/pcseg/utils/__init__.py


--------------------------------------------------------------------------------
/pcseg/utils/arnold_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class ArnoldUtils():
 5 |     def __init__(self, enabled, arnold_dir, logger) -> None:
 6 |         self.enabled = enabled
 7 |         self.logger = logger
 8 |         self.dir = arnold_dir
 9 | 
10 |     def save_ckpt(self, ckpt_path, last_epoch=False):
11 |         if self.enabled:
12 |             ckpt_dir, file_name = os.path.split(ckpt_path)
13 |             # import ipdb; ipdb.set_trace(context=10)
14 |             _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:]
15 |             os.system('hdfs dfs -mkdir -p hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir, _ckpt_dir))
16 |             if last_epoch:
17 |                 tgt_path = os.path.join(self.dir, _ckpt_dir, 'last_train.pth')
18 |             else:
19 |                 tgt_path = os.path.join(self.dir, _ckpt_dir, file_name)
20 |             os.system('hdfs dfs -put -f {} hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(ckpt_path, tgt_path))
21 |             self.logger.info('Put model to hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}'.format(tgt_path))
22 | 
23 |     def load_ckpt(self, ckpt_dir):
24 |         if self.enabled:
25 |             try:
26 |                 _ckpt_dir = ckpt_dir[ckpt_dir.find('output'):][7:]
27 |                 os.system('hdfs dfs -get hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}/* {}'.format(self.dir, _ckpt_dir, ckpt_dir))
28 |                 self.logger.info('Get model from hdfs://haruna/home/byte_arnold_hl_vc/user/ryding/{}/{}'.format(self.dir,_ckpt_dir))
29 |             except:
30 |                 pass
31 | 


--------------------------------------------------------------------------------
/pcseg/utils/caption_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import torch
  4 | import numpy as np
  5 | 
  6 | from . import commu_utils
  7 | from ..config import cfg
  8 | 
  9 | 
 10 | def get_caption_batch(caption_cfg, text_cfg, batch_dict, text_encoder):
 11 |     caption_infos = {}
 12 |     caption_data = batch_dict['caption_data']
 13 | 
 14 |     num_captions = 0
 15 |     for key in caption_cfg:
 16 |         if key in caption_cfg['KEY'] and caption_cfg[key].ENABLED:
 17 |             caption, idx = caption_data[key.lower()]['caption'], caption_data[key.lower()]['idx']
 18 |             num_captions += len(caption)
 19 | 
 20 |             # caption_embed: (K, 512), caption_idx: (N), (N > K)
 21 |             caption_embed, caption_idx = extract_caption_embed(caption, caption_cfg[key], text_cfg, text_encoder, cfg.LOCAL_RANK)
 22 |             normed_caption_embed = torch.nn.functional.normalize(caption_embed, dim=-1)
 23 | 
 24 |             caption_infos['caption_{}'.format(key.lower())] = {
 25 |                 'caption_embed': normed_caption_embed, 'caption_idx': caption_idx, 'select_image_corr': idx
 26 |             }
 27 | 
 28 |     batch_dict['caption_infos'] = caption_infos
 29 |     batch_dict['num_caption'] = num_captions / batch_dict['batch_size']
 30 |     return batch_dict
 31 | 
 32 | 
 33 | def extract_caption_embed(image_captions, caption_cfg, text_cfg, text_encoder, rank):
 34 |     # (B*K, 512)
 35 | 
 36 |     if caption_cfg.get('GATHER_CAPTION', True):
 37 |         image_captions_list = commu_utils.all_gather(image_captions)
 38 |         image_captions_all = [jj for ii in image_captions_list for jj in ii]
 39 |         num_caption_list = [len(ii) for ii in image_captions_list]
 40 |     else:
 41 |         image_captions_all = image_captions
 42 |         num_caption_list = [0] * 100
 43 |         num_caption_list[rank] = len(image_captions_all)
 44 |     caption_embed_all = forward_text_encoder(image_captions_all, text_encoder)
 45 | 
 46 |     # remove duplicate captions and re-index them
 47 |     if text_cfg.get('REMOVE_DUPLICATE_CAPTIONS', True):
 48 |         num_caption_list = torch.LongTensor([0] + num_caption_list).cuda()
 49 |         idx = torch.arange(num_caption_list[rank + 1]).long().cuda() + torch.cumsum(num_caption_list, 0)[rank]
 50 |         caption_embeds, unique_indices = torch.unique(caption_embed_all, dim=0, return_inverse=True)
 51 |         caption_idx = unique_indices[idx]
 52 |     else:
 53 |         caption_embeds = caption_embed_all
 54 |         caption_idx = torch.arange(caption_embed_all.shape[0]).long().cuda()
 55 | 
 56 |     return caption_embeds, caption_idx
 57 | 
 58 | 
 59 | def forward_text_encoder(image_captions, text_encoder):
 60 |     with torch.no_grad():
 61 |         if len(image_captions) > 0:
 62 |             if cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'CLIP':
 63 |                 text_tokens = text_encoder.tokenizer(image_captions, truncate=True).cuda()
 64 |                 text_embed = text_encoder.encode_text(text_tokens).float()
 65 |             elif cfg.MODEL.TASK_HEAD.TEXT_EMBED.NAME == 'Bert':
 66 |                 text_tokens = text_encoder.tokenizer(image_captions, return_tensors="pt", padding=True).to('cuda')
 67 |                 text_embed = text_encoder(**text_tokens).pooler_output
 68 |             else:
 69 |                 raise NotImplementedError
 70 |         else:
 71 |             text_embed = torch.zeros((0, cfg.MODEL.TASK_HEAD.TEXT_EMBED.CHANNEL), dtype=torch.float32).cuda()
 72 |     return text_embed
 73 | 
 74 | 
 75 | def select_images(caption_cfg, image_name, image_corr):
 76 |     """
 77 |     TODO: put this part into dataset
 78 |     Select part of images for training 
 79 |     """
 80 |     batch_size = len(image_name)
 81 |     if caption_cfg.get('SAMPLE', 1) > 1:
 82 |         random_start = np.random.randint(caption_cfg.SAMPLE)
 83 |         image_name = [(np.array(image_name[i])[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)]
 84 |         image_corr = [(np.array(image_corr[i], dtype=object)[random_start::caption_cfg.SAMPLE]).tolist() for i in range(batch_size)]
 85 |     if caption_cfg.SELECT == 'ratio' and caption_cfg.RATIO == 1.0:
 86 |         return image_name, image_corr
 87 | 
 88 |     selected_image_name = []
 89 |     selected_image_corr = []
 90 | 
 91 |     for i in range(batch_size):
 92 |         if image_name[i] is None or len(image_name[i]) == 0:  # lack 2d data
 93 |             selected_image_name.append([])
 94 |             selected_image_corr.append([])
 95 |             selected_idx = None
 96 |         elif caption_cfg.SELECT == 'fixed':
 97 |             # view-level caotion: random select fixed number
 98 |             num = int(caption_cfg.NUM)
 99 |             selected_idx = np.random.choice(len(image_name[i]), min(num, len(image_name[i])), replace=False)
100 |         elif caption_cfg.SELECT == 'ratio':
101 |             # sequence slicing
102 |             ratio = caption_cfg.RATIO
103 |             selected_idx = np.random.choice(len(image_name[i]), max(1, int(len(image_name[i]) * ratio)), replace=False)
104 |         elif caption_cfg.SELECT == 'hybrid':
105 |             num = max(int(caption_cfg.NUM), int(len(image_name[i]) * caption_cfg.RATIO))
106 |             selected_idx = np.random.choice(len(image_name[i]), min(max(1, num), len(image_name[i])), replace=False)
107 |         else:
108 |             raise NotImplementedError
109 | 
110 |         if selected_idx is not None:
111 |             selected_image_name.append(np.array(image_name[i])[selected_idx].tolist())
112 |             selected_image_corr.append(
113 |                 np.array(image_corr[i], dtype=object)[selected_idx].tolist()
114 |             )
115 | 
116 |     return selected_image_name, selected_image_corr
117 | 
118 | 


--------------------------------------------------------------------------------
/pcseg/utils/loss_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class CosineSimilarityLoss(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 |     
10 |     def forward(self, input, target, mask):
11 |         selected_input = input[mask]
12 |         cos_similarity = nn.functional.cosine_similarity(selected_input, target).mean()
13 |         return 1 - cos_similarity
14 | 
15 | 
16 | class BYOLLoss(nn.Module):
17 |     def __init__(self) -> None:
18 |         super().__init__()
19 | 
20 |     def forward(self, input, target):
21 |         loss = 2 - 2 * (input * target).sum(dim=-1)
22 |         return loss.mean()
23 | 


--------------------------------------------------------------------------------
/pcseg/utils/metric_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_open_vocab_metric(metric_class, base_class_idx, novel_class_idx):
 5 |     if isinstance(metric_class, list):
 6 |         metric_class = np.array(metric_class)
 7 |     metric_base = np.mean(metric_class[base_class_idx])
 8 |     metric_novel = np.mean(metric_class[novel_class_idx])
 9 |     h_metric = 2 * metric_base * metric_novel / (metric_base + metric_novel + 10e-10)
10 |     m_metric = (metric_base * len(base_class_idx) + metric_novel * len(novel_class_idx)) / (len(base_class_idx) + len(novel_class_idx))
11 |     return h_metric, m_metric, metric_base, metric_novel
12 | 
13 | 
14 | def cal_ov_metrics(cfg, logger, class_names, iou_class, acc_class, binary_acc_class):
15 |     base_class_idx = cfg.DATA_CONFIG.base_class_idx
16 |     novel_class_idx = cfg.DATA_CONFIG.novel_class_idx
17 |     if cfg.DATA_CONFIG.get('trainonly_class_idx', None):
18 |         trainonly_class_idx = cfg.DATA_CONFIG.trainonly_class_idx
19 |         base_class_idx = [idx for idx in base_class_idx if idx not in trainonly_class_idx]
20 |         novel_class_idx = [idx for idx in novel_class_idx if idx not in trainonly_class_idx]
21 | 
22 |     logger.info('----------- base class -----------')
23 |     for i in base_class_idx:
24 |         logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format(
25 |             class_names[i], iou_class[i], acc_class[i], binary_acc_class[i])
26 |         )
27 |     logger.info('----------- novel class -----------')
28 |     for i in novel_class_idx:
29 |         logger.info('Class {} : iou/acc/b_acc {:.4f}/{:.4f}/{:.4f}.'.format(
30 |             class_names[i], iou_class[i], acc_class[i], binary_acc_class[i])
31 |         )
32 |     hiou, miou, iou_base, iou_novel = get_open_vocab_metric(
33 |         iou_class, base_class_idx, novel_class_idx
34 |     )
35 |     hacc, macc, acc_base, acc_novel = get_open_vocab_metric(
36 |         acc_class, base_class_idx, novel_class_idx
37 |     )
38 |     return hiou, miou, iou_base, iou_novel, hacc, macc, acc_base, acc_novel
39 | 


--------------------------------------------------------------------------------
/pcseg/utils/spconv_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Set
 2 | 
 3 | try:
 4 |     import spconv.pytorch as spconv
 5 | except:
 6 |     import spconv as spconv
 7 | 
 8 | import torch.nn as nn
 9 | 
10 | 
11 | def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]:
12 |     """
13 |     Finds all spconv keys that need to have weight's transposed
14 |     """
15 |     found_keys: Set[str] = set()
16 |     for name, child in model.named_children():
17 |         new_prefix = f"{prefix}.{name}" if prefix != "" else name
18 | 
19 |         if isinstance(child, spconv.conv.SparseConvolution):
20 |             new_prefix = f"{new_prefix}.weight"
21 |             found_keys.add(new_prefix)
22 | 
23 |         found_keys.update(find_all_spconv_keys(child, prefix=new_prefix))
24 | 
25 |     return found_keys
26 | 
27 | 
28 | def replace_feature(out, new_features):
29 |     if "replace_feature" in out.__dir__():
30 |         # spconv 2.x behaviour
31 |         return out.replace_feature(new_features)
32 |     else:
33 |         out.features = new_features
34 |         return out
35 | 


--------------------------------------------------------------------------------
/pcseg/utils/voxelize_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Voxelization manner from Xin Lai
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def ravel_hash_vec(arr):
 9 |     """
10 |     Ravel the coordinates after subtracting the min coordinates.
11 |     """
12 |     assert arr.ndim == 2
13 |     arr = arr.copy()
14 |     arr -= arr.min(0)
15 |     arr = arr.astype(np.uint64, copy=False)
16 |     arr_max = arr.max(0).astype(np.uint64) + 1
17 | 
18 |     keys = np.zeros(arr.shape[0], dtype=np.uint64)
19 |     # Fortran style indexing
20 |     for j in range(arr.shape[1] - 1):
21 |         keys += arr[:, j]
22 |         keys *= arr_max[j + 1]
23 |     keys += arr[:, -1]
24 |     return keys
25 | 
26 | 
27 | def fnv_hash_vec(arr):
28 |     """
29 |     FNV64-1A
30 |     """
31 |     assert arr.ndim == 2
32 |     # Floor first for negative coordinates
33 |     arr = arr.copy()
34 |     arr = arr.astype(np.uint64, copy=False)
35 |     hashed_arr = np.uint64(14695981039346656037) * np.ones(arr.shape[0], dtype=np.uint64)
36 |     for j in range(arr.shape[1]):
37 |         hashed_arr *= np.uint64(1099511628211)
38 |         hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j])
39 |     return hashed_arr
40 | 
41 | 
42 | def voxelize_with_rec_idx(coord, voxel_size=0.05, hash_type='fnv', training=True):
43 |     discrete_coord = np.floor(coord / np.array(voxel_size))
44 |     if hash_type == 'ravel':
45 |         key = ravel_hash_vec(discrete_coord)
46 |     else:
47 |         key = fnv_hash_vec(discrete_coord)
48 | 
49 |     idx_sort = np.argsort(key)
50 |     key_sort = key[idx_sort]
51 |     _, count = np.unique(key_sort, return_counts=True)
52 |     if training:
53 |         idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + np.random.randint(0, count.max(), count.size) % count
54 |     else:
55 |         idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1])
56 | 
57 |     idx_unique = idx_sort[idx_select]
58 |     sorted_idx = np.zeros(key.shape[0]).astype(np.int)
59 |     sorted_idx[idx_select] = 1
60 |     sorted_idx = np.cumsum(sorted_idx) - 1
61 |     idx_recon = np.zeros(key.shape[0]).astype(np.int)
62 |     idx_recon[idx_sort] = sorted_idx
63 |     return idx_unique, idx_recon
64 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | open3d
 2 | numpy
 3 | torch==1.8.1+cu111
 4 | torchvision==0.9.1+cu111
 5 | tensorboardX
 6 | easydict
 7 | pyyaml
 8 | tqdm
 9 | SharedArray
10 | scipy
11 | opencv-python
12 | plyfile
13 | matplotlib
14 | scikit-learn
15 | scikit-image
16 | pandas
17 | transformers
18 | clip @ git+https://github.com/openai/CLIP.git
19 | spconv-cu111


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | from setuptools import find_packages, setup
 5 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 6 | 
 7 | 
 8 | def get_git_commit_number():
 9 |     if not os.path.exists('.git'):
10 |         return '0000000'
11 | 
12 |     cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
13 |     git_commit_number = cmd_out.stdout.decode('utf-8')[:7]
14 |     return git_commit_number
15 | 
16 | 
17 | def make_cuda_ext(name, module, sources):
18 |     cuda_ext = CUDAExtension(
19 |         name='%s.%s' % (module, name),
20 |         sources=[os.path.join(*module.split('.'), src) for src in sources]
21 |     )
22 |     return cuda_ext
23 | 
24 | 
25 | def write_version_to_file(version, target_file):
26 |     with open(target_file, 'w') as f:
27 |         print('__version__ = "%s"' % version, file=f)
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     version = '0.1.0+%s' % get_git_commit_number()
32 |     write_version_to_file(version, 'pcseg/version.py')
33 | 
34 |     setup(
35 |         name='pcseg',
36 |         version=version,
37 |         description='PCSeg',
38 |         install_requires=[
39 |             'numpy',
40 |             'tensorboardX',
41 |             'easydict',
42 |             'pyyaml',
43 |             'tqdm',
44 |             'SharedArray',
45 |             # 'spconv',  # spconv has different names depending on the cuda version
46 |         ],
47 | 
48 |         author='Jihan Yang',
49 |         author_email='jihanyang13@gmail.com',
50 |         license='Apache License 2.0',
51 |         packages=find_packages(exclude=['tools', 'data', 'output']),
52 |         cmdclass={
53 |             'build_ext': BuildExtension,
54 |         },
55 |         ext_modules=[],
56 |     )
57 | 


--------------------------------------------------------------------------------
/tools/_init_path.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, '../')


--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/s3dis_dataset.yaml:
--------------------------------------------------------------------------------
 1 | DATA_PATH: ../data/s3dis
 2 | DATASET: S3DISDataset
 3 | 
 4 | COLLATE_FN: collate_batch_indoor
 5 | MIN_SPATIAL_SCALE: 128
 6 | 
 7 | DATA_SPLIT:
 8 |   train: train
 9 |   test: val
10 |   data_suffix: .npy
11 |   test_area: 5
12 | 
13 | IGNORE_LABEL: -100
14 | 
15 | DATA_AUG:
16 |   AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
17 |   scene_aug:
18 |     scaling_scene:
19 |       enabled: False
20 |       p: 1.0
21 |       value: [0.9, 1.1]
22 | 
23 |     rotation:
24 |       p: 1.0
25 |       value: [0.0, 0.0, 1.0]
26 | 
27 |     jitter: True
28 |     color_jitter: True
29 | 
30 |     flip:
31 |       p: 0.5
32 | 
33 |     random_jitter:
34 |       enabled: False
35 |       value: 0.01
36 |       accord_to_size: False
37 |       p: 1.0
38 | 
39 |   elastic:
40 |     enabled: True
41 |     value: [[6, 40], [20, 160]]
42 |     apply_to_feat: False
43 |     p: 1.0
44 | 
45 |   crop:
46 |     step: 64
47 | 
48 |   shuffle: True
49 | 
50 | DATA_PROCESSOR:
51 |   repeat: 20
52 |   rgb_norm: True
53 |   point_range: 200000000
54 |   voxel_scale: 50  # voxel_size = 1 / scale, scale 25(0.02m)
55 |   cache: True
56 |   max_npoint: 250000
57 |   full_scale: [128, 512]
58 |   voxel_mode: 4
59 |   xyz_norm: False
60 |   x4_split: True
61 |   downsampling_scale: 4
62 |   xyz_as_feat: True
63 |   rgb_as_feat: True
64 | 
65 |   PROCESS_LIST: []
66 | 


--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/s3dis_dataset_image.yaml:
--------------------------------------------------------------------------------
 1 | DATA_PATH: ../data/s3dis
 2 | DATASET: S3DISDataset
 3 | 
 4 | COLLATE_FN: collate_batch_indoor
 5 | MIN_SPATIAL_SCALE: 128
 6 | 
 7 | DATA_SPLIT:
 8 |   train: train
 9 |   test: val
10 |   data_suffix: .npy
11 |   test_area: 5
12 | 
13 | IGNORE_LABEL: -100
14 | 
15 | DATA_AUG:
16 |   AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
17 |   scene_aug:
18 |     scaling_scene:
19 |       enabled: False
20 |       p: 1.0
21 |       value: [0.9, 1.1]
22 | 
23 |     rotation:
24 |       p: 1.0
25 |       value: [0.0, 0.0, 1.0]
26 | 
27 |     jitter: True
28 |     color_jitter: True
29 | 
30 |     flip:
31 |       p: 0.5
32 | 
33 |     random_jitter:
34 |       enabled: False
35 |       value: 0.01
36 |       accord_to_size: False
37 |       p: 1.0
38 | 
39 |   elastic:
40 |     enabled: True
41 |     value: [[6, 40], [20, 160]]
42 |     apply_to_feat: False
43 |     p: 1.0
44 | 
45 |   crop:
46 |     step: 64
47 | 
48 |   shuffle: True
49 | 
50 | DATA_PROCESSOR:
51 |   repeat: 20
52 |   rgb_norm: True
53 |   point_range: 200000000
54 |   voxel_scale: 50  # voxel_size = 1 / scale, scale 25(0.02m)
55 |   cache: False
56 |   max_npoint: 250000
57 |   full_scale: [128, 512]
58 |   voxel_mode: 4
59 |   xyz_norm: False
60 |   x4_split: True
61 |   downsampling_scale: 4
62 |   xyz_as_feat: True
63 |   rgb_as_feat: True
64 | 
65 |   PROCESS_LIST: []
66 | 
67 | 
68 | IMAGE_PATH: s3dis_2d
69 | DEPTH_IMAGE_SCALE: [1080, 1080]
70 | LOAD_IMAGE: True
71 | MERGE_IDX: True
72 | 


--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/scannet_dataset.yaml:
--------------------------------------------------------------------------------
 1 | DATA_PATH: ../data/scannetv2
 2 | DATASET: ScanNetDataset
 3 | 
 4 | COLLATE_FN: collate_batch_indoor
 5 | MIN_SPATIAL_SCALE: 128
 6 | 
 7 | DATA_SPLIT:
 8 |   train: train
 9 |   test: val
10 |   data_suffix: .pth
11 | 
12 | IGNORE_LABEL: -100
13 | 
14 | DATA_AUG:
15 |   AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
16 |   scene_aug:
17 |     scaling_scene:
18 |       enabled: False
19 |       p: 1.0
20 |       value: [0.9, 1.1]
21 | 
22 |     rotation:
23 |       p: 1.0
24 |       value: [0.0, 0.0, 1.0]
25 | 
26 |     jitter: True
27 |     color_jitter: True
28 | 
29 |     flip:
30 |       p: 0.5
31 | 
32 |     random_jitter:
33 |       enabled: False
34 |       value: 0.01
35 |       accord_to_size: False
36 |       p: 1.0
37 | 
38 |   elastic:
39 |     enabled: True
40 |     value: [[6, 40], [20, 160]]
41 |     apply_to_feat: False
42 |     p: 1.0
43 | 
44 |   crop:
45 |     step: 32
46 | 
47 |   shuffle: True
48 | 
49 | DATA_PROCESSOR:
50 |   repeat: 4
51 |   rgb_norm: True
52 |   point_range: 200000000
53 |   voxel_scale: 50  # voxel_size = 1 / scale, scale 25(0.02m)
54 |   cache: True
55 |   max_npoint: 250000
56 |   full_scale: [128, 512]
57 |   voxel_mode: 4
58 |   xyz_norm: False
59 |   xyz_as_feat: True
60 |   rgb_as_feat: True
61 | 
62 |   PROCESS_LIST: []
63 | 


--------------------------------------------------------------------------------
/tools/cfgs/dataset_configs/scannet_dataset_image.yaml:
--------------------------------------------------------------------------------
 1 | DATA_PATH: ../data/scannetv2
 2 | DATASET: ScanNetDataset
 3 | 
 4 | COLLATE_FN: collate_batch_indoor
 5 | MIN_SPATIAL_SCALE: 128
 6 | 
 7 | DATA_SPLIT:
 8 |   train: train
 9 |   test: val
10 |   data_suffix: .pth
11 | 
12 | IGNORE_LABEL: -100
13 | 
14 | DATA_AUG:
15 |   AUG_LIST: [ scene_aug, elastic, crop, shuffle ]
16 |   scene_aug:
17 |     scaling_scene:
18 |       enabled: False
19 |       p: 1.0
20 |       value: [0.9, 1.1]
21 | 
22 |     rotation:
23 |       p: 1.0
24 |       value: [0.0, 0.0, 1.0]
25 | 
26 |     jitter: True
27 |     color_jitter: True
28 | 
29 |     flip:
30 |       p: 0.5
31 | 
32 |     random_jitter:
33 |       enabled: False
34 |       value: 0.01
35 |       accord_to_size: False
36 |       p: 1.0
37 | 
38 |   elastic:
39 |     enabled: True
40 |     value: [[6, 40], [20, 160]]
41 |     apply_to_feat: False
42 |     p: 1.0
43 | 
44 |   crop:
45 |     step: 32
46 | 
47 |   shuffle: True
48 | 
49 | DATA_PROCESSOR:
50 |   repeat: 4
51 |   rgb_norm: True
52 |   point_range: 200000000
53 |   voxel_scale: 50  # voxel_size = 1 / scale, scale 25(0.02m)
54 |   cache: True
55 |   max_npoint: 250000
56 |   full_scale: [128, 512]
57 |   voxel_mode: 4
58 |   xyz_norm: False
59 |   xyz_as_feat: True
60 |   rgb_as_feat: True
61 | 
62 |   PROCESS_LIST: []
63 | 
64 | 
65 | IMAGE_PATH: scannet_frames_25k
66 | DEPTH_IMAGE_SCALE: [480, 640]
67 | LOAD_IMAGE: True
68 | MERGE_IDX: True


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml:
--------------------------------------------------------------------------------
  1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter]
  2 | 
  3 | DATA_CONFIG:
  4 |   _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml
  5 |   DATASET: S3DISInstDataset
  6 |   inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ]
  7 |   sem2ins_classes: [ 0, 1 ]
  8 |   inst_label_shift: 0
  9 | 
 10 |   ignore_class_idx: [12]
 11 | 
 12 | MODEL:
 13 |   NAME: SparseUNetTextSeg
 14 |   REMAP_FROM_3DLANG: False
 15 | 
 16 |   VFE:
 17 |     NAME: IndoorVFE
 18 |     USE_XYZ: True
 19 | 
 20 |   BACKBONE_3D:
 21 |     NAME: SparseUNetIndoor
 22 |     IN_CHANNEL: 6
 23 |     MID_CHANNEL: 16
 24 |     BLOCK_RESIDUAL: True
 25 |     BLOCK_REPS: 2
 26 |     NUM_BLOCKS: 7
 27 |     CUSTOM_SP1X1: True
 28 | 
 29 |   ADAPTER:
 30 |     NAME: VLAdapter
 31 |     EVAL_ONLY: False
 32 |     NUM_ADAPTER_LAYERS: 2
 33 |     TEXT_DIM: -1
 34 |     LAST_NORM: True
 35 | 
 36 |   TASK_HEAD:
 37 |     NAME: TextSegHead
 38 |     FEAT_NORM: False
 39 | 
 40 |     LAST_NORM: True
 41 |     TEXT_EMBED:
 42 |       NAME: CLIP
 43 |       NORM: True
 44 |       PATH: text_embed/s3dis_clip-ViT-B16_id.pth
 45 |     FEAT_NORM: False
 46 |     LOGIT_SCALE:
 47 |       value: 1.0
 48 |       learnable: False
 49 |   
 50 |   INST_HEAD:
 51 |     NAME: InstHead
 52 | 
 53 |     BLOCK_RESIDUAL: True
 54 |     CUSTOM_SP1X1: True
 55 | 
 56 |     CLUSTERING:
 57 |       PREPARE_EPOCH: 20
 58 |       GROUPING_CFG:
 59 |         SCORE_THR: 0.2
 60 |         RADIUS: 0.04
 61 |         MEAN_ACTIVE: 300
 62 |         CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
 63 |                               -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.]
 64 |         NPOINT_THR: 500  # absolute if class_numpoint == -1, relative if class_numpoint != -1
 65 |         IGNORE_CLASSES: [0, 1]
 66 |       INST_VOXEL_CFG:
 67 |         SCALE: 50
 68 |         SPATIAL_SHAPE: 20
 69 |       LOSS_CFG:
 70 |         MAX_PROPOSAL_NUM: 200
 71 |         POS_IOU_THR: 0.5
 72 |       TEST_CFG:
 73 |         # x4_split: False
 74 |         CLS_SCORE_THR: 0.001
 75 |         MASK_SCORE_THR: -0.5
 76 |         MIN_NPOINT: 100
 77 | 
 78 |     FIXED_MODULES: []
 79 |     SEMANTIC_ONLY: False
 80 | 
 81 | 
 82 | TEXT_ENCODER:
 83 |   NAME: CLIP
 84 |   BACKBONE: ViT-B/16  # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
 85 |   TEMPLATE: identity
 86 |   EXTRACT_EMBED: False  # Online extract text embeding from class or not
 87 | #  BERT:
 88 | #  BACKBONE: bert-base-uncased
 89 | 
 90 | OPTIMIZATION:
 91 |   TEST_BATCH_SIZE_PER_GPU: 1
 92 |   BATCH_SIZE_PER_GPU: 4
 93 |   NUM_EPOCHS: 64
 94 |   LR: 0.004  # 4e-3
 95 |   SCHEDULER: cos_after_step
 96 |   OPTIMIZER: adamw
 97 |   WEIGHT_DECAY: 0.0001
 98 |   MOMENTUM: 0.9
 99 |   STEP_EPOCH: 40
100 |   MULTIPLIER: 0.1
101 |   CLIP_GRAD: False
102 |   PCT_START: 0.52
103 |   DIV_FACTOR: 2
104 |   MOMS: [0.95, 0.85]
105 |   LR_CLIP: 0.000001
106 | 
107 | OTHERS:
108 |   PRINT_FREQ: 20
109 |   SYNC_BN: False
110 |   USE_AMP: True
111 |   EVAL_FREQ: 5
112 |   FIND_UNUSED_PARAMETERS: True
113 | 


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_base6_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml
2 | 
3 | DATA_CONFIG:
4 |   base_class_idx: [0, 2, 3, 4, 8, 9]
5 |   novel_class_idx: [1, 5, 6, 7, 10, 11]
6 |   ignore_class_idx: [12]
7 | 


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/inst/softgroup_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/s3dis_models/inst/softgroup_clip_adamw.yaml
 2 | 
 3 | DATA_CONFIG:
 4 |   inst_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
 5 |   base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ]
 6 |   novel_class_idx: [ 5, 7, 9, 10 ]
 7 |   ignore_class_idx: [ 12 ]
 8 | 
 9 |   CAPTION_INFO:
10 | 
11 |     KEY: [SCENE, VIEW, ENTITY]
12 |     SCENE:
13 |       ENABLED: False
14 |       CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json
15 |       GATHER_CAPTION: True
16 | 
17 |     VIEW:
18 |       ENABLED: True
19 |       CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json
20 |       IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx
21 |       SELECT: ratio
22 |       NUM: 1
23 |       RATIO: 0.2
24 |       GATHER_CAPTION: True
25 | 
26 |     ENTITY:
27 |       ENABLED: True
28 |       CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json
29 |       IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx
30 |       SELECT: ratio
31 |       NUM: 1
32 |       RATIO: 1.0
33 |       GATHER_CAPTION: True
34 |   
35 |     CAPTION_CORR_PATH_IN_ONE_FILE: False
36 | 
37 | 
38 | MODEL:
39 | 
40 |   BINARY_HEAD:
41 |     NAME: BinaryHead
42 |     DETACH: True
43 |     THRESH: 0.5
44 |     CUSTOM_SP1X1: True
45 |     HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
46 |                         'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
47 |                         'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
48 |                         'unet.u.u.u.u.u.u.blocks.block1' ]
49 | 
50 |   TASK_HEAD:
51 |     NAME: TextSegHead
52 |     CORRECT_SEG_PRED_BINARY: True
53 |   
54 |   CAPTION_HEAD:
55 |     NAME: CaptionHead
56 |     FEAT_NORM: True
57 |     LOGIT_SCALE:
58 |       value: 100.0
59 |       learnable: True
60 |     LOSS_WEIGHT:
61 |       SCENE: 0.0
62 |       VIEW: 0.08
63 |       ENTITY: 0.02
64 | 
65 |   INST_HEAD:
66 |     CORRECT_SEG_PRED_BINARY: Tru


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_adamw.yaml:
--------------------------------------------------------------------------------
 1 | CLASS_NAMES: [ceiling, floor, wall, beam, column, window, door, table, chair, sofa, bookcase, board, clutter]
 2 | 
 3 | DATA_CONFIG:
 4 |   _BASE_CONFIG_: cfgs/dataset_configs/s3dis_dataset.yaml
 5 |   ignore_class_idx: [12]
 6 | 
 7 | MODEL:
 8 |   NAME: SparseUNetTextSeg
 9 |   REMAP_FROM_3DLANG: False
10 |   REMAP_FROM_NOADAPTER: False
11 | 
12 |   VFE:
13 |     NAME: IndoorVFE
14 |     USE_XYZ: True
15 | 
16 |   BACKBONE_3D:
17 |     NAME: SparseUNetIndoor
18 |     IN_CHANNEL: 6
19 |     MID_CHANNEL: 16
20 |     BLOCK_RESIDUAL: True
21 |     BLOCK_REPS: 2
22 |     NUM_BLOCKS: 7
23 |     CUSTOM_SP1X1: True
24 | 
25 |   ADAPTER:
26 |     NAME: VLAdapter
27 |     EVAL_ONLY: False
28 |     NUM_ADAPTER_LAYERS: 2
29 |     TEXT_DIM: -1
30 |     LAST_NORM: True
31 |     FEAT_NORM: False
32 | 
33 |   TASK_HEAD:
34 |     NAME: TextSegHead
35 | 
36 |     TEXT_EMBED:
37 |       NAME: CLIP
38 |       NORM: True
39 |       PATH: text_embed/s3dis_clip-ViT-B16_id.pth
40 | 
41 |     LOGIT_SCALE:
42 |       value: 1.0
43 |       learnable: False
44 | 
45 | TEXT_ENCODER:
46 |   NAME: CLIP
47 |   BACKBONE: ViT-B/16  # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
48 |   TEMPLATE: identity
49 |   EXTRACT_EMBED: False  # Online extract text embeding from class or not
50 | #  BERT:
51 | #  BACKBONE: bert-base-uncased
52 | 
53 | OPTIMIZATION:
54 |   TEST_BATCH_SIZE_PER_GPU: 1
55 |   BATCH_SIZE_PER_GPU: 4
56 |   NUM_EPOCHS: 32
57 |   LR: 0.004  # 4e-3
58 |   SCHEDULER: cos_after_step
59 |   OPTIMIZER: adamw
60 |   WEIGHT_DECAY: 0.0001
61 |   MOMENTUM: 0.9
62 |   STEP_EPOCH: 20
63 |   MULTIPLIER: 0.1
64 |   CLIP_GRAD: False
65 |   PCT_START: 0.39
66 |   DIV_FACTOR: 1
67 |   MOMS: [0.95, 0.85]
68 |   LR_CLIP: 0.000001
69 | 
70 | OTHERS:
71 |   PRINT_FREQ: 20
72 |   EVAL_FREQ: 5
73 |   SYNC_BN: False
74 |   USE_AMP: True


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_base6_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml
2 | 
3 | DATA_CONFIG:
4 |   base_class_idx: [ 0, 2, 3, 4, 8, 9 ]
5 |   novel_class_idx: [ 1, 5, 6, 7, 10, 11 ]
6 |   ignore_class_idx: [ 12 ]
7 | 


--------------------------------------------------------------------------------
/tools/cfgs/s3dis_models/spconv_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/s3dis_models/spconv_clip_adamw.yaml
 2 | 
 3 | DATA_CONFIG:
 4 |   base_class_idx: [ 0, 1, 2, 3, 4, 6, 8, 11 ]
 5 |   novel_class_idx: [ 5, 7, 9, 10 ]
 6 |   ignore_class_idx: [ 12 ]
 7 | 
 8 |   CAPTION_INFO:
 9 | 
10 |     KEY: [SCENE, VIEW, ENTITY]
11 |     SCENE:
12 |       ENABLED: False
13 |       CAPTION_PATH: text_embed/caption_scene_s3dis_vit-gpt2-image-captioning_max50.json
14 |       GATHER_CAPTION: True
15 | 
16 |     VIEW:
17 |       ENABLED: True
18 |       CAPTION_PATH: text_embed/caption_view_s3dis_vit-gpt2-image-captioning_max50.json
19 |       IMAGE_CORR_PATH: caption_idx/s3dis_view_vit-gpt2_matching_idx
20 |       SELECT: ratio
21 |       NUM: 1
22 |       RATIO: 0.2
23 |       GATHER_CAPTION: True
24 | 
25 |     ENTITY:
26 |       ENABLED: True
27 |       CAPTION_PATH: text_embed/caption_entity_s3dis_vit-gpt2-image-captioning_max50.json
28 |       IMAGE_CORR_PATH: caption_idx/s3dis_entity_vit-gpt2_matching_idx
29 |       SELECT: ratio
30 |       NUM: 1
31 |       RATIO: 1.0
32 |       GATHER_CAPTION: True
33 |   
34 |     CAPTION_CORR_PATH_IN_ONE_FILE: False
35 | 
36 | 
37 | MODEL:
38 | 
39 |   BINARY_HEAD:
40 |     NAME: BinaryHead
41 |     DETACH: True
42 |     THRESH: 0.5
43 |     CUSTOM_SP1X1: True
44 |     HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
45 |                         'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
46 |                         'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
47 |                         'unet.u.u.u.u.u.u.blocks.block1' ]
48 | 
49 |   TASK_HEAD:
50 |     NAME: TextSegHead
51 |     CORRECT_SEG_PRED_BINARY: True
52 | 
53 | 
54 |   CAPTION_HEAD:
55 |     NAME: CaptionHead
56 |     FEAT_NORM: True
57 |     LOGIT_SCALE:
58 |       value: 100.0
59 |       learnable: True
60 |     LOSS_WEIGHT:
61 |       SCENE: 0.0
62 |       VIEW: 0.08
63 |       ENTITY: 0.02
64 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_adamw.yaml:
--------------------------------------------------------------------------------
  1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk,
  2 |               curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture]
  3 | 
  4 | DATA_CONFIG:
  5 |   _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml
  6 |   DATASET: ScanNetInstDataset
  7 |   inst_class_idx: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
  8 |   sem2ins_classes: []
  9 |   inst_label_shift: 2
 10 | 
 11 |   ignore_class_idx: [19]
 12 | 
 13 | MODEL:
 14 |   NAME: SparseUNetTextSeg
 15 |   REMAP_FROM_3DLANG: False
 16 | 
 17 |   VFE:
 18 |     NAME: IndoorVFE
 19 |     USE_XYZ: True
 20 | 
 21 |   BACKBONE_3D:
 22 |     NAME: SparseUNetIndoor
 23 |     IN_CHANNEL: 6
 24 |     MID_CHANNEL: 16
 25 |     BLOCK_RESIDUAL: True
 26 |     BLOCK_REPS: 2
 27 |     NUM_BLOCKS: 7
 28 |     CUSTOM_SP1X1: True
 29 | 
 30 |   ADAPTER:
 31 |     NAME: VLAdapter
 32 |     EVAL_ONLY: False
 33 |     NUM_ADAPTER_LAYERS: 2
 34 |     TEXT_DIM: -1
 35 |     LAST_NORM: True
 36 | 
 37 |   TASK_HEAD:
 38 |     NAME: TextSegHead
 39 |     FEAT_NORM: False
 40 | 
 41 |     LAST_NORM: True
 42 |     TEXT_EMBED:
 43 |       NAME: CLIP
 44 |       NORM: True
 45 |       PATH: text_embed/scannet_clip-ViT-B16_id.pth
 46 |     FEAT_NORM: False
 47 |     LOGIT_SCALE:
 48 |       value: 1.0
 49 |       learnable: False
 50 |   
 51 |   INST_HEAD:
 52 |     NAME: InstHead
 53 | 
 54 |     BLOCK_RESIDUAL: True
 55 |     CUSTOM_SP1X1: True
 56 | 
 57 |     CLUSTERING:
 58 |       PREPARE_EPOCH: 32
 59 |       GROUPING_CFG:
 60 |         SCORE_THR: 0.2
 61 |         RADIUS: 0.04
 62 |         MEAN_ACTIVE: 300
 63 |         CLASS_NUMPOINT_MEAN: [-1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
 64 |                               -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.]
 65 |         NPOINT_THR: 50  # absolute if class_numpoint == -1, relative if class_numpoint != -1
 66 |         IGNORE_CLASSES: [0, 1]
 67 |       INST_VOXEL_CFG:
 68 |         SCALE: 50
 69 |         SPATIAL_SHAPE: 20
 70 |       LOSS_CFG:
 71 |         MAX_PROPOSAL_NUM: 200
 72 |         POS_IOU_THR: 0.5
 73 |       TEST_CFG:
 74 |         # x4_split: False
 75 |         CLS_SCORE_THR: 0.001
 76 |         MASK_SCORE_THR: -0.5
 77 |         MIN_NPOINT: 100
 78 | 
 79 |     FIXED_MODULES: []
 80 |     SEMANTIC_ONLY: False
 81 | 
 82 | 
 83 | TEXT_ENCODER:
 84 |   NAME: CLIP
 85 |   BACKBONE: ViT-B/16  # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
 86 |   TEMPLATE: identity
 87 |   EXTRACT_EMBED: False  # Online extract text embeding from class or not
 88 | #  BERT:
 89 | #  BACKBONE: bert-base-uncased
 90 | 
 91 | OPTIMIZATION:
 92 |   TEST_BATCH_SIZE_PER_GPU: 1
 93 |   BATCH_SIZE_PER_GPU: 4
 94 |   NUM_EPOCHS: 150
 95 |   LR: 0.004  # 4e-3
 96 |   SCHEDULER: cos_after_step
 97 |   OPTIMIZER: adamw
 98 |   WEIGHT_DECAY: 0.0001
 99 |   MOMENTUM: 0.9
100 |   STEP_EPOCH: 82
101 |   MULTIPLIER: 0.1
102 |   CLIP_GRAD: False
103 |   PCT_START: 0.52
104 |   DIV_FACTOR: 2
105 |   MOMS: [0.95, 0.85]
106 |   LR_CLIP: 0.000001
107 | 
108 | OTHERS:
109 |   PRINT_FREQ: 20
110 |   SYNC_BN: False
111 |   USE_AMP: True
112 |   EVAL_FREQ: 10
113 |   FIND_UNUSED_PARAMETERS: True
114 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base10_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
2 | 
3 | DATA_CONFIG:
4 |   # TODO: make base + novel = all.
5 |   inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
6 |   base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ]
7 |   novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ]
8 |   ignore_class_idx: [ 19 ]
9 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_adamw.yaml
 2 | 
 3 | DATA_CONFIG:
 4 |   inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
 5 |   base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ]
 6 |   novel_class_idx: [ 5, 9, 12, 16 ]
 7 |   ignore_class_idx: [ 19 ]
 8 | 
 9 |   CAPTION_INFO:
10 | 
11 |     KEY: [SCENE, VIEW, ENTITY]
12 |   
13 |     SCENE:
14 |       ENABLED: False
15 |       CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json
16 |       GATHER_CAPTION: True
17 | 
18 |     VIEW:
19 |       ENABLED: True
20 |       CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json
21 |       IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle
22 |       SELECT: ratio
23 |       NUM: 1
24 |       RATIO: 0.5
25 |       GATHER_CAPTION: True
26 | 
27 |     ENTITY:
28 |       ENABLED: True
29 |       CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json
30 |       IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle
31 |       SELECT: ratio
32 |       NUM: 1
33 |       RATIO: 1.0
34 |       GATHER_CAPTION: True
35 |     
36 |     CAPTION_CORR_PATH_IN_ONE_FILE: True
37 | 
38 | 
39 | MODEL:
40 | 
41 |   BINARY_HEAD:
42 |     NAME: BinaryHead
43 |     DETACH: True
44 |     THRESH: 0.5
45 |     CUSTOM_SP1X1: True
46 |     HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
47 |                         'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
48 |                         'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
49 |                         'unet.u.u.u.u.u.u.blocks.block1' ]
50 | 
51 |   TASK_HEAD:
52 |     NAME: TextSegHead
53 |     CORRECT_SEG_PRED_BINARY: True
54 |   
55 |   CAPTION_HEAD:
56 |     NAME: CaptionHead
57 |     FEAT_NORM: True
58 |     LOGIT_SCALE:
59 |       value: 100.0
60 |       learnable: True
61 |     LOSS_WEIGHT:
62 |       SCENE: 0.0
63 |       VIEW: 0.05
64 |       ENTITY: 0.05
65 | 
66 |   INST_HEAD:
67 |     CORRECT_SEG_PRED_BINARY: True
68 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_base8_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
2 | 
3 | DATA_CONFIG:
4 |   # TODO: make base + novel = all.
5 |   inst_class_idx: [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
6 |   base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ]
7 |   novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ]
8 |   ignore_class_idx: [ 19 ]
9 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/inst/softgroup_clip_openvocab_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/scannet_models/inst/softgroup_clip_base13_caption_adamw.yaml
 2 | 
 3 | DATA_CONFIG:
 4 |   # TODO: split the input categories into base/novel/ignore.
 5 |   # Note that if you has gropud-truth annotations for the test samples,
 6 |   # you need to carefully set thoese parameters to evaluate the performance quantitatively.
 7 |   # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx.
 8 |   base_class_idx: [ 0, 1, 2, 3, 4]
 9 |   novel_class_idx: []
10 |   ignore_class_idx: [ ]
11 | 
12 |   # TODO: split the categories into inst_base/inst_novel
13 |   inst_class_idx: [2, 3]
14 |   base_inst_class_idx: [0, 1]  # the base category indices for instance categories. The length of this list should be the same as or smaller than the length of inst_class_idx
15 |   novel_inst_class_idx: []
16 | 
17 | MODEL:
18 |   TASK_HEAD:
19 |     CORRECT_SEG_PRED_BINARY: True  # TODO: For out-of-domain data, set this to False probably leads to better performance
20 | 
21 |   INST_HEAD:
22 |     CORRECT_SEG_PRED_BINARY: True  # TODO: For out-of-domain data, set this to False probably leads to better performance
23 |     CLUSTERING:
24 |       PREPARE_EPOCH: -1
25 | 
26 | TEXT_ENCODER:
27 |   EXTRACT_EMBED: True
28 |   CATEGORY_NAMES: [door, window, desk, keyboard, others]  # TODO: input your custom categories


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_adamw.yaml:
--------------------------------------------------------------------------------
 1 | CLASS_NAMES: [wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter,
 2 |               desk, curtain, refrigerator, showercurtain, toilet, sink, bathtub, otherfurniture]
 3 | 
 4 | DATA_CONFIG:
 5 |   _BASE_CONFIG_: cfgs/dataset_configs/scannet_dataset.yaml
 6 |   ignore_class_idx: [19]
 7 | 
 8 | MODEL:
 9 |   NAME: SparseUNetTextSeg
10 |   REMAP_FROM_3DLANG: False
11 |   REMAP_FROM_NOADAPTER: False
12 | 
13 |   VFE:
14 |     NAME: IndoorVFE
15 |     USE_XYZ: True
16 | 
17 |   BACKBONE_3D:
18 |     NAME: SparseUNetIndoor
19 |     IN_CHANNEL: 6
20 |     MID_CHANNEL: 16
21 |     BLOCK_RESIDUAL: True
22 |     BLOCK_REPS: 2
23 |     NUM_BLOCKS: 7
24 |     CUSTOM_SP1X1: True
25 | 
26 |   ADAPTER:
27 |     NAME: VLAdapter
28 |     EVAL_ONLY: False
29 |     NUM_ADAPTER_LAYERS: 2
30 |     TEXT_DIM: -1
31 |     LAST_NORM: True
32 | 
33 |   TASK_HEAD:
34 |     NAME: TextSegHead
35 |     FEAT_NORM: False
36 | 
37 |     TEXT_EMBED:
38 |       NAME: CLIP
39 |       NORM: True
40 |       PATH: text_embed/scannet_clip-ViT-B16_id.pth
41 | 
42 |     LOGIT_SCALE:
43 |       value: 1.0
44 |       learnable: False
45 | 
46 | TEXT_ENCODER:
47 |   NAME: CLIP
48 |   BACKBONE: ViT-B/16  # ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
49 |   TEMPLATE: identity
50 |   EXTRACT_EMBED: False  # Online extract text embeding from class or not
51 | #  BERT:
52 | #  BACKBONE: bert-base-uncased
53 | 
54 | 
55 | OPTIMIZATION:
56 |   BATCH_SIZE_PER_GPU: 4
57 |   NUM_EPOCHS: 128
58 |   LR: 0.004  # 4e-3
59 |   SCHEDULER: cos_after_step
60 |   OPTIMIZER: adamw
61 |   WEIGHT_DECAY: 0.0001
62 |   MOMENTUM: 0.9
63 |   STEP_EPOCH: 50
64 |   MULTIPLIER: 0.1
65 |   CLIP_GRAD: False
66 |   PCT_START: 0.39
67 |   DIV_FACTOR: 1
68 |   MOMS: [0.95, 0.85]
69 |   LR_CLIP: 0.000001
70 | 
71 | OTHERS:
72 |   PRINT_FREQ: 20
73 |   SYNC_BN: False
74 |   USE_AMP: True


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base10_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
2 | 
3 | 
4 | DATA_CONFIG:
5 |   # TODO: make base + novel = all.
6 |   base_class_idx: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 ]
7 |   novel_class_idx: [ 9, 10, 11, 12, 14, 15, 16, 17, 18 ]
8 |   ignore_class_idx: [ 19 ]
9 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base12_caption_adamw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
2 | 
3 | DATA_CONFIG:
4 |   # TODO: make base + novel = all.
5 |   base_class_idx: [ 0, 1, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16 ]
6 |   novel_class_idx: [ 3, 4, 6, 9, 10, 17, 18 ]
7 |   ignore_class_idx: [ 19 ]
8 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_adamw.yaml
 2 | 
 3 | 
 4 | DATA_CONFIG:
 5 |   base_class_idx: [ 0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 17, 18 ]
 6 |   novel_class_idx: [ 5, 9, 12, 16 ]
 7 |   ignore_class_idx: [ 19 ]
 8 | 
 9 |   CAPTION_INFO:
10 | 
11 |     KEY: [SCENE, VIEW, ENTITY]
12 |   
13 |     SCENE:
14 |       ENABLED: False
15 |       CAPTION_PATH: text_embed/caption_scene_scannet_vit-gpt2-image-captioning_25k.json
16 |       GATHER_CAPTION: True
17 | 
18 |     VIEW:
19 |       ENABLED: True
20 |       CAPTION_PATH: text_embed/caption_view_scannet_vit-gpt2-image-captioning_25k.json
21 |       IMAGE_CORR_PATH: caption_idx/scannetv2_view_vit-gpt2_matching_idx.pickle
22 |       SELECT: ratio
23 |       NUM: 1
24 |       RATIO: 0.5
25 |       GATHER_CAPTION: True
26 | 
27 |     ENTITY:
28 |       ENABLED: True
29 |       CAPTION_PATH: text_embed/caption_entity_scannet_vit-gpt2-image-captioning_25k.json
30 |       IMAGE_CORR_PATH: caption_idx/scannetv2_entity_vit-gpt2_matching_idx.pickle
31 |       SELECT: ratio
32 |       NUM: 1
33 |       RATIO: 1.0
34 |       GATHER_CAPTION: True
35 | 
36 |     CAPTION_CORR_PATH_IN_ONE_FILE: True
37 | 
38 | 
39 | MODEL:
40 | 
41 |   BINARY_HEAD:
42 |     NAME: BinaryHead
43 |     DETACH: True
44 |     THRESH: 0.5
45 |     CUSTOM_SP1X1: True
46 |     HOOK_FEATURE_LIST: ['unet.blocks.block1', 'unet.u.blocks.block1',
47 |                         'unet.u.u.blocks.block1', 'unet.u.u.u.blocks.block1',
48 |                         'unet.u.u.u.u.blocks.block1', 'unet.u.u.u.u.u.blocks.block1',
49 |                         'unet.u.u.u.u.u.u.blocks.block1' ]
50 | 
51 |   TASK_HEAD:
52 |     NAME: TextSegHead
53 |     CORRECT_SEG_PRED_BINARY: True
54 | 
55 |   CAPTION_HEAD:
56 |     NAME: CaptionHead
57 |     FEAT_NORM: True
58 |     LOGIT_SCALE:
59 |       value: 100.0
60 |       learnable: True
61 |     LOSS_FUNC: CrossEntropy
62 |     LOSS_WEIGHT:
63 |       SCENE: 0.0
64 |       VIEW: 0.05
65 |       ENTITY: 0.05
66 | 


--------------------------------------------------------------------------------
/tools/cfgs/scannet_models/spconv_clip_openvocab_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_CONFIG_: cfgs/scannet_models/spconv_clip_base15_caption_adamw.yaml
 2 | 
 3 | DATA_CONFIG:
 4 |   # TODO: split the input categories into base/novel/ignore.
 5 |   # Note that if you has gropud-truth annotations for the test samples,
 6 |   # you need to carefully set thoese parameters to evaluate the performance quantitatively.
 7 |   # If you just want to evaluate it qualitiatively, you can just put all the categories into base_class_idx.
 8 |   base_class_idx: [ 0, 1, 2, 3, 4]
 9 |   novel_class_idx: []
10 |   ignore_class_idx: [ ]
11 | 
12 | MODEL:
13 |   TASK_HEAD:
14 |     CORRECT_SEG_PRED_BINARY: True  # TODO: For out-of-domain data, set this to False probably leads to better performance
15 | 
16 | TEXT_ENCODER:
17 |   EXTRACT_EMBED: True
18 |   CATEGORY_NAMES: [door, window, desk, keyboard, others] # TODO: input your custom categories


--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/eval_utils/inst_eval/__init__.py


--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/instance_eval_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import numpy as np
  5 | from plyfile import PlyData
  6 | 
  7 | 
  8 | # matrix: 4x4 np array
  9 | # points Nx3 np array
 10 | def transform_points(matrix, points):
 11 |     assert len(points.shape) == 2 and points.shape[1] == 3
 12 |     num_points = points.shape[0]
 13 |     p = np.concatenate([points, np.ones((num_points, 1))], axis=1)
 14 |     p = np.matmul(matrix, np.transpose(p))
 15 |     p = np.transpose(p)
 16 |     p[:, :3] /= p[:, 3, None]
 17 |     return p[:, :3]
 18 | 
 19 | 
 20 | def export_ids(filename, ids):
 21 |     with open(filename, 'w') as f:
 22 |         for id in ids:
 23 |             f.write('%d\n' % id)
 24 | 
 25 | 
 26 | def load_ids(filename):
 27 |     ids = open(filename).read().splitlines()
 28 |     ids = np.array(ids, dtype=np.int64)
 29 |     return ids
 30 | 
 31 | 
 32 | def read_mesh_vertices(filename):
 33 |     assert os.path.isfile(filename)
 34 |     with open(filename, 'rb') as f:
 35 |         plydata = PlyData.read(f)
 36 |         num_verts = plydata['vertex'].count
 37 |         vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
 38 |         vertices[:, 0] = plydata['vertex'].data['x']
 39 |         vertices[:, 1] = plydata['vertex'].data['y']
 40 |         vertices[:, 2] = plydata['vertex'].data['z']
 41 |     return vertices
 42 | 
 43 | 
 44 | # export 3d instance labels for instance evaluation
 45 | def export_instance_ids_for_eval(filename, label_ids, instance_ids):
 46 |     assert label_ids.shape[0] == instance_ids.shape[0]
 47 |     output_mask_path_relative = 'pred_mask'
 48 |     name = os.path.splitext(os.path.basename(filename))[0]
 49 |     output_mask_path = os.path.join(os.path.dirname(filename), output_mask_path_relative)
 50 |     if not os.path.isdir(output_mask_path):
 51 |         os.mkdir(output_mask_path)
 52 |     insts = np.unique(instance_ids)
 53 |     zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32)
 54 |     with open(filename, 'w') as f:
 55 |         for idx, inst_id in enumerate(insts):
 56 |             if inst_id == 0:  # 0 -> no instance for this vertex
 57 |                 continue
 58 |             output_mask_file = os.path.join(output_mask_path_relative,
 59 |                                             name + '_' + str(idx) + '.txt')
 60 |             loc = np.where(instance_ids == inst_id)
 61 |             label_id = label_ids[loc[0][0]]
 62 |             f.write('%s %d %f\n' % (output_mask_file, label_id, 1.0))
 63 |             # write mask
 64 |             mask = np.copy(zero_mask)
 65 |             mask[loc[0]] = 1
 66 |             export_ids(output_mask_file, mask)
 67 | 
 68 | 
 69 | # ------------ Instance Utils ------------ #
 70 | 
 71 | 
 72 | class Instance(object):
 73 |     instance_id = 0
 74 |     label_id = 0
 75 |     vert_count = 0
 76 |     med_dist = -1
 77 |     dist_conf = 0.0
 78 | 
 79 |     def __init__(self, mesh_vert_instances, instance_id):
 80 |         if (instance_id == -1):
 81 |             return
 82 |         self.instance_id = int(instance_id)
 83 |         self.label_id = int(self.get_label_id(instance_id))
 84 |         self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id))
 85 | 
 86 |     def get_label_id(self, instance_id):
 87 |         return int(instance_id // 1000)
 88 | 
 89 |     def get_instance_verts(self, mesh_vert_instances, instance_id):
 90 |         return (mesh_vert_instances == instance_id).sum()
 91 | 
 92 |     def to_json(self):
 93 |         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
 94 | 
 95 |     def to_dict(self):
 96 |         dict = {}
 97 |         dict['instance_id'] = self.instance_id
 98 |         dict['label_id'] = self.label_id
 99 |         dict['vert_count'] = self.vert_count
100 |         dict['med_dist'] = self.med_dist
101 |         dict['dist_conf'] = self.dist_conf
102 |         return dict
103 | 
104 |     def from_json(self, data):
105 |         self.instance_id = int(data['instance_id'])
106 |         self.label_id = int(data['label_id'])
107 |         self.vert_count = int(data['vert_count'])
108 |         if ('med_dist' in data):
109 |             self.med_dist = float(data['med_dist'])
110 |             self.dist_conf = float(data['dist_conf'])
111 | 
112 |     def __str__(self):
113 |         return '(' + str(self.instance_id) + ')'
114 | 
115 | 
116 | def read_instance_prediction_file(filename, pred_path):
117 |     lines = open(filename).read().splitlines()
118 |     instance_info = {}
119 |     abs_pred_path = os.path.abspath(pred_path)
120 |     for line in lines:
121 |         parts = line.split(' ')
122 |         if len(parts) != 3:
123 |             print('invalid instance prediction file. Expected (per line): \
124 |                 [rel path prediction] [label id prediction] \
125 |                   [confidence prediction]')
126 |         if os.path.isabs(parts[0]):
127 |             print('invalid instance prediction file. \
128 |                 First entry in line must be a relative path')
129 |         mask_file = os.path.join(os.path.dirname(filename), parts[0])
130 |         mask_file = os.path.abspath(mask_file)
131 |         # check that mask_file lives inside prediction path
132 |         if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path:
133 |             print(('predicted mask {} in prediction text file {}' +
134 |                    'points outside of prediction path.').format(mask_file, filename))
135 | 
136 |         info = {}
137 |         info['label_id'] = int(float(parts[1]))
138 |         info['conf'] = float(parts[2])
139 |         instance_info[mask_file] = info
140 |     return instance_info
141 | 
142 | 
143 | def get_instances(ids, class_ids, class_labels, id2label):
144 |     instances = {}
145 |     for label in class_labels:
146 |         instances[label] = []
147 |     instance_ids = np.unique(ids)
148 |     for id in instance_ids:
149 |         if id == 0:
150 |             continue
151 |         inst = Instance(ids, id)
152 |         if inst.label_id in class_ids:
153 |             instances[id2label[inst.label_id]].append(inst.to_dict())
154 |     return instances
155 | 


--------------------------------------------------------------------------------
/tools/eval_utils/inst_eval/pointwise_eval_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def evaluate_semantic_acc(pred_list, gt_list, ignore_label=-100, logger=None):
 5 |     gt = np.concatenate(gt_list, axis=0)
 6 |     pred = np.concatenate(pred_list, axis=0)
 7 |     assert gt.shape == pred.shape
 8 |     correct = (gt[gt != ignore_label] == pred[gt != ignore_label]).sum()
 9 |     whole = (gt != ignore_label).sum()
10 |     acc = correct.astype(float) / whole * 100
11 |     logger.info(f'Acc: {acc:.1f}')
12 |     return acc
13 | 
14 | 
15 | def evaluate_semantic_miou(n_classes, pred_list, gt_list, ignore_label=-100, logger=None):
16 |     gt = np.concatenate(gt_list, axis=0)
17 |     pred = np.concatenate(pred_list, axis=0)
18 |     pos_inds = gt != ignore_label
19 |     gt = gt[pos_inds]
20 |     pred = pred[pos_inds]
21 |     assert gt.shape == pred.shape
22 |     iou_list = []
23 |     for _index in range(n_classes):
24 |         if _index != ignore_label:
25 |             intersection = ((gt == _index) & (pred == _index)).sum()
26 |             union = ((gt == _index) | (pred == _index)).sum()
27 |             iou = intersection.astype(float) / (union + 1e-10) * 100
28 |             iou_list.append(iou)
29 |     miou = np.nanmean(iou_list)
30 |     logger.info('Class-wise mIoU: ' + ' '.join(f'{x:.1f}' for x in iou_list))
31 |     logger.info(f'mIoU: {miou:.1f}')
32 |     return miou, iou_list
33 | 
34 | 
35 | def evaluate_offset_mae(pred_list, gt_list, gt_instance_list, ignore_label=-100, logger=None):
36 |     gt = np.concatenate(gt_list, axis=0)
37 |     pred = np.concatenate(pred_list, axis=0)
38 |     gt_instance = np.concatenate(gt_instance_list, axis=0)
39 |     pos_inds = gt_instance != ignore_label
40 |     gt = gt[pos_inds]
41 |     pred = pred[pos_inds]
42 |     mae = np.abs(gt - pred).sum() / pos_inds.sum()
43 |     logger.info(f'Offset MAE: {mae:.3f}')
44 |     return mae


--------------------------------------------------------------------------------
/tools/eval_utils/save_utils.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | from pcseg.models.model_utils.rle_utils import rle_decode
 6 | 
 7 | 
 8 | def save_npy(root, name, scan_ids, arrs):
 9 |     root = osp.join(root, name)
10 |     os.makedirs(root, exist_ok=True)
11 |     paths = [osp.join(root, f'{i}.npy') for i in scan_ids]
12 |     pool = mp.Pool()
13 |     pool.starmap(np.save, zip(paths, arrs))
14 |     pool.close()
15 |     pool.join()
16 | 
17 | 
18 | def save_single_instance(root, scan_id, insts, nyu_id=None):
19 |     f = open(osp.join(root, f'{scan_id}.txt'), 'w')
20 |     os.makedirs(osp.join(root, 'predicted_masks'), exist_ok=True)
21 |     for i, inst in enumerate(insts):
22 |         # assert scan_id == inst['scan_id']
23 |         label_id = inst['label_id']
24 |         # scannet dataset use nyu_id for evaluation
25 |         if nyu_id is not None:
26 |             label_id = nyu_id[label_id - 1]
27 |         conf = inst['conf']
28 |         f.write(f'predicted_masks/{scan_id}_{i:03d}.txt {label_id} {conf:.4f}\n')
29 |         mask_path = osp.join(root, 'predicted_masks', f'{scan_id}_{i:03d}.txt')
30 |         mask = rle_decode(inst['pred_mask'])
31 |         np.savetxt(mask_path, mask, fmt='%d')
32 |     f.close()
33 | 
34 | 
35 | def save_pred_instances(root, name, scan_ids, pred_insts, nyu_id=None):
36 |     root = osp.join(root, name)
37 |     os.makedirs(root, exist_ok=True)
38 |     roots = [root] * len(scan_ids)
39 |     nyu_ids = [nyu_id] * len(scan_ids)
40 |     pool = mp.Pool()
41 |     pool.starmap(save_single_instance, zip(roots, scan_ids, pred_insts, nyu_ids))
42 |     pool.close()
43 |     pool.join()
44 | 
45 | 
46 | def save_gt_instance(path, gt_inst, nyu_id=None):
47 |     if nyu_id is not None:
48 |         sem = gt_inst // 1000
49 |         ignore = sem == 0
50 |         ins = gt_inst % 1000
51 |         nyu_id = np.array(nyu_id)
52 |         sem = nyu_id[sem - 1]
53 |         sem[ignore] = 0
54 |         gt_inst = sem * 1000 + ins
55 |     np.savetxt(path, gt_inst, fmt='%d')
56 | 
57 | 
58 | def save_gt_instances(root, name, scan_ids, gt_insts, nyu_id=None):
59 |     root = osp.join(root, name)
60 |     os.makedirs(root, exist_ok=True)
61 |     paths = [osp.join(root, f'{i}.txt') for i in scan_ids]
62 |     pool = mp.Pool()
63 |     nyu_ids = [nyu_id] * len(scan_ids)
64 |     pool.starmap(save_gt_instance, zip(paths, gt_insts, nyu_ids))
65 |     pool.close()
66 |     pool.join()
67 | 


--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-310.pyc


--------------------------------------------------------------------------------
/tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVMI-Lab/PLA/3a7103a4211f6eb1f6d5c518f6cc870c26b96c52/tools/process_tools/__pycache__/generate_caption_idx.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/process_tools/combine_multiple_caption_files.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import tqdm
 4 | import pickle
 5 | 
 6 | 
 7 | def write_caption_to_file(data, path):
 8 |     with open(path, 'w') as f:
 9 |         json.dump(data, f)
10 | 
11 |     print(f'The caption is dump to {path}')
12 | 
13 | 
14 | def replace_dict_keys_with_new_keys(origin_dict, new_key_list):
15 |     curr_key_list = list(origin_dict.keys())
16 |     new_dict = {}
17 |     for i, key in enumerate(curr_key_list):
18 |         new_dict[new_key_list[i]] = origin_dict[key]
19 | 
20 |     return new_dict
21 | 
22 | 
23 | def merge_captions_with_path_list(caption_path_list, caption_save_path):
24 |     new_caption = {}
25 |     scene_caption_num = {}
26 | 
27 |     for caption_path in caption_path_list:
28 |         current_caption = json.load(open(caption_path, 'r'))
29 |         for scene_name, curr_scene_caption in tqdm.tqdm(current_caption.items(), total=len(current_caption)):
30 |             counter = scene_caption_num[scene_name] if scene_name in scene_caption_num else 0
31 | 
32 |             image_name_list = [f'{counter + i}' for i in range(len(curr_scene_caption))]
33 |             new_scene_caption = replace_dict_keys_with_new_keys(curr_scene_caption, image_name_list)
34 |             if scene_name in new_caption:
35 |                 new_caption[scene_name].update(new_scene_caption)
36 |             else:
37 |                 new_caption[scene_name] = new_scene_caption
38 | 
39 |             counter += len(curr_scene_caption)
40 |             scene_caption_num[scene_name] = counter
41 | 
42 |     write_caption_to_file(new_caption, caption_save_path)
43 | 
44 | 
45 | def merge_caption_idx_with_path_list(caption_idx_path_list, caption_idx_save_path):
46 |     new_caption_idx = []
47 |     caption_idx_list = []
48 |     for caption_idx_path in caption_idx_path_list:
49 |         caption_idx = pickle.load(open(caption_idx_path, 'rb'))
50 |         caption_idx_list.append(caption_idx)
51 | 
52 |     for i in tqdm.tqdm(range(len(caption_idx_list[0]))):
53 |         scene_caption = {}
54 |         scene_caption_infos = {}
55 |         counter = 0
56 |         for _, caption_idx in enumerate(caption_idx_list):
57 |             if 'scene_name' not in scene_caption:
58 |                 scene_caption['scene_name'] = caption_idx[i]['scene_name']
59 | 
60 |             new_image_name_list = [f'{counter + i}' for i in range(len(caption_idx[i]['infos']))]
61 |             new_scene_caption_idx = replace_dict_keys_with_new_keys(caption_idx[i]['infos'], new_image_name_list)
62 | 
63 |             scene_caption_infos.update(new_scene_caption_idx)
64 |             counter += len(caption_idx[i]['infos'])
65 | 
66 |         scene_caption['infos'] = scene_caption_infos
67 |         new_caption_idx.append(scene_caption)
68 | 
69 |     with open(caption_idx_save_path, 'wb') as f:
70 |         pickle.dump(new_caption_idx, f)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     parser = argparse.ArgumentParser('')
75 |     parser.add_argument('--caption_path_list',
76 |                         default=['data/nuscenes/text_embed/caption_basic_crop_nuscenes_v1.0-mini_vit-gpt2-image'
77 |                                  '-captioning_w400-500_overlap0.3.json',
78 |                                  'data/nuscenes/text_embed/caption_detic_crop_cap_nuscenes_v1.0-mini_vit-gpt2-image'
79 |                                  '-captioning_.json'],
80 |                         type=list, help='')
81 |     parser.add_argument('--caption_idx_path_list',
82 |                         default=['data/nuscenes/v1.0-mini/nuscenes_caption_idx_basic_crop.pkl',
83 |                                  'data/nuscenes/v1.0-mini/nuscenes_caption_idx_detic_crop_cap.pkl'],
84 |                         type=list, help='')
85 |     parser.add_argument('--caption_save_path', required=True, type=str, help='')
86 |     parser.add_argument('--caption_idx_save_path', required=True, type=str, help='')
87 | 
88 |     args = parser.parse_args()
89 | 
90 |     print('Start to merge captions ........')
91 |     merge_captions_with_path_list(args.caption_path_list, args.caption_save_path)
92 |     print('Finish merging captions ........')
93 | 
94 |     print('Start to merge captions idx file ........')
95 |     merge_caption_idx_with_path_list(args.caption_idx_path_list, args.caption_idx_save_path)
96 |     print('Finish merging captions idx file ........')
97 | 
98 | 


--------------------------------------------------------------------------------
/tools/process_tools/filter_caption_without_points.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pickle
 3 | import json
 4 | import tqdm
 5 | 
 6 | 
 7 | def write_caption_to_file(data, path):
 8 |     with open(path, 'w') as f:
 9 |         json.dump(data, f)
10 | 
11 |     print(f'The caption is dump to {path}')
12 | 
13 | 
14 | def filter_captions_without_points(caption_info, caption_idx_info):
15 |     for idx, scene_caption_idx_info in tqdm.tqdm(enumerate(caption_idx_info), total=len(caption_idx_info)):
16 |         scene_name = scene_caption_idx_info['scene_name']
17 |         scene_caption_idx = scene_caption_idx_info['infos']
18 |         scene_captions = caption_info[scene_name]
19 | 
20 |         image_name_list = list(scene_caption_idx.keys())
21 |         for image_name in image_name_list:
22 |             image_caption_idx = scene_caption_idx[image_name]
23 |             if image_caption_idx.shape[0] == 0:
24 |                 scene_caption_idx.pop(image_name)
25 |                 scene_captions.pop(image_name)
26 | 
27 |     write_caption_to_file(caption_info, args.save_caption_info_path)
28 |     with open(args.save_caption_idx_info_path, 'wb') as f:
29 |         pickle.dump(caption_idx_info, f)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     parser = argparse.ArgumentParser('')
34 |     parser.add_argument('--caption_info_path', type=str, help='')
35 |     parser.add_argument('--caption_idx_info_path', type=str, help='')
36 | 
37 |     parser.add_argument('--save_caption_info_path', type=str, help='')
38 |     parser.add_argument('--save_caption_idx_info_path', type=str, help='')
39 | 
40 |     global args
41 |     args = parser.parse_args()
42 | 
43 |     caption_info = json.load(open(args.caption_info_path, 'r'))
44 |     caption_idx_info = pickle.load(open(args.caption_idx_info_path, 'rb'))
45 | 
46 |     filter_captions_without_points(caption_info, caption_idx_info)
47 | 
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/tools/process_tools/generate_category_embedding.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import clip
 3 | import torch
 4 | from transformers import AutoTokenizer, AutoModel
 5 | 
 6 | from pcseg.models.text_networks.text_models import get_clip_model
 7 | 
 8 | 
 9 | class_names = {
10 |     'scannet': ['wall', 'floor', 'cabinet', 'bed', 'chair',
11 |                 'sofa', 'table', 'door', 'window', 'bookshelf',
12 |                 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
13 |                 'showercurtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'],
14 |     's3dis': ['ceiling', 'floor', 'wall', 'beam', 'column',
15 |               'window', 'door', 'table', 'chair', 'sofa',
16 |               'bookcase', 'board', 'clutter']
17 | }
18 | 
19 | 
20 | def construct_input_from_class_name(input, tokenizer):
21 |     inputs = tokenizer(input, return_tensors="pt", padding=True)
22 |     return inputs
23 | 
24 | 
25 | def get_embedding(args):
26 |     if args.model.startswith('clip'):
27 |         backbone_name = args.model[5:]
28 |         input = class_names[args.dataset]
29 |         _, model = get_clip_model(backbone_name)
30 |         model = model.cuda()
31 |         text = clip.tokenize(input).cuda()
32 |         output = model.encode_text(text)
33 |         print(output.shape)
34 |     else:
35 |         tokenizer = AutoTokenizer.from_pretrained(args.model)
36 |         model = AutoModel.from_pretrained(args.model)
37 | 
38 |         inputs = construct_input_from_class_name(class_names[args.dataset], tokenizer)
39 |         outputs = model(**inputs)
40 |         output = outputs.pooler_output
41 |         print(outputs.pooler_output.shape)
42 | 
43 |     return output
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     import argparse
48 | 
49 |     parser = argparse.ArgumentParser('language model')
50 |     parser.add_argument('--model', default='clip-ViT-B/16', type=str, help='language model name')
51 |     parser.add_argument('--dataset_path', default='../data/scannetv2', type=str, help='language model name')
52 |     parser.add_argument('--dataset', default='scannet', type=str, help='dataset name')
53 |     args = parser.parse_args()
54 | 
55 |     category_embedding = get_embedding(args)
56 | 
57 |     file_name = '{}_{}_{}_text_embed.pth'.format(
58 |         args.dataset, len(class_names[args.dataset]), args.model.replace('/', '')
59 |     )
60 |     save_dir = os.path.join(args.dataset_path, 'text_embed')
61 |     os.makedirs(save_dir, exist_ok=True)
62 |     save_path = os.path.join(save_dir, file_name)
63 | 
64 |     torch.save(category_embedding, save_path)
65 |     print("Saving category embedding into: ", save_path)
66 | 


--------------------------------------------------------------------------------
/tools/scripts/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | NGPUS=$1
 5 | PY_ARGS=${@:2}
 6 | 
 7 | while true
 8 | do
 9 |     PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
10 |     status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
11 |     if [ "${status}" != "0" ]; then
12 |         break;
13 |     fi
14 | done
15 | echo $PORT
16 | 
17 | ulimit -n 64000
18 | 
19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} test.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS}
20 | 
21 | 


--------------------------------------------------------------------------------
/tools/scripts/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | NGPUS=$1
 5 | PY_ARGS=${@:2}
 6 | 
 7 | while true
 8 | do
 9 |     PORT=$(( ((RANDOM<<15)|RANDOM) % 49152 + 10000 ))
10 |     status="$(nc -z 127.0.0.1 $PORT < /dev/null &>/dev/null; echo $?)"
11 |     if [ "${status}" != "0" ]; then
12 |         break;
13 |     fi
14 | done
15 | echo $PORT
16 | 
17 | ulimit -n 64000
18 | 
19 | python3 -m torch.distributed.launch --nproc_per_node=${NGPUS} train.py --launcher pytorch --tcp_port ${PORT} ${PY_ARGS}
20 | 
21 | 


--------------------------------------------------------------------------------
/tools/train_utils/optimization/__init__.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import torch.nn as nn
  4 | import torch.optim as optim
  5 | import torch.optim.lr_scheduler as lr_sched
  6 | import numpy as np
  7 | 
  8 | from .fastai_optim import OptimWrapper
  9 | from .learning_schedules_fastai import CosineWarmupLR, OneCycle
 10 | 
 11 | 
 12 | def build_optimizer(model, optim_cfg):
 13 |     if optim_cfg.OPTIMIZER == 'adam':
 14 |         optimizer = optim.Adam(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY)
 15 |     elif optim_cfg.OPTIMIZER == 'sgd':
 16 |         optimizer = optim.SGD(
 17 |             model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY,
 18 |             momentum=optim_cfg.MOMENTUM
 19 |         )
 20 |     elif optim_cfg.OPTIMIZER == 'adamw':
 21 |         optimizer = optim.AdamW(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY)
 22 |     elif optim_cfg.OPTIMIZER == 'adam_onecycle':
 23 |         def children(m: nn.Module):
 24 |             return list(m.children())
 25 | 
 26 |         def num_children(m: nn.Module) -> int:
 27 |             return len(children(m))
 28 | 
 29 |         flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m]
 30 |         get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))]
 31 | 
 32 |         optimizer_func = partial(optim.Adam, betas=(0.9, 0.99))
 33 |         optimizer = OptimWrapper.create(
 34 |             optimizer_func, 3e-3, get_layer_groups(model), wd=optim_cfg.WEIGHT_DECAY, true_wd=True, bn_wd=True
 35 |         )
 36 |     else:
 37 |         raise NotImplementedError
 38 | 
 39 |     return optimizer
 40 | 
 41 | 
 42 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg):
 43 |     total_steps = total_iters_each_epoch * total_epochs
 44 | 
 45 |     if optim_cfg.SCHEDULER == 'poly':
 46 |         lr_scheduler = PolyLR(optimizer, max_iter=total_steps, power=optim_cfg.POWER)
 47 |     elif optim_cfg.OPTIMIZER == 'adam_onecycle':
 48 |         lr_scheduler = OneCycle(
 49 |             optimizer, total_steps, optim_cfg.LR, list(optim_cfg.MOMS), optim_cfg.DIV_FACTOR, optim_cfg.PCT_START
 50 |         )
 51 |     else:
 52 |         lr_scheduler = None
 53 | 
 54 |     return lr_scheduler
 55 | 
 56 | 
 57 | class LambdaStepLR(lr_sched.LambdaLR):
 58 |     def __init__(self, optimizer, lr_lambda, last_step=-1):
 59 |         super(LambdaStepLR, self).__init__(optimizer, lr_lambda, last_step)
 60 | 
 61 |     @property
 62 |     def last_step(self):
 63 |         """Use last_epoch for the step counter"""
 64 |         return self.last_epoch
 65 | 
 66 |     @last_step.setter
 67 |     def last_step(self, v):
 68 |         self.last_epoch = v
 69 | 
 70 | 
 71 | class PolyLR(LambdaStepLR):
 72 |     """DeepLab learning rate policy"""
 73 |     def __init__(self, optimizer, max_iter, power=0.9, last_step=-1):
 74 |         super(PolyLR, self).__init__(optimizer, lambda s: (1 - s / (max_iter + 1))**power, last_step)
 75 | 
 76 | 
 77 | class CosLR(LambdaStepLR):
 78 |     """Runyu's LR policy"""
 79 |     def __init__(self, optimizer, cos_lambda_func, last_step=-1):
 80 |         super(CosLR, self).__init__(optimizer, cos_lambda_func, last_step)
 81 | 
 82 | 
 83 | def cosine_lr_after_step(optimizer, base_lr, epoch, step_epoch, total_epochs, clip=1e-6):
 84 |     if epoch < step_epoch:
 85 |         lr = base_lr
 86 |     else:
 87 |         lr = clip + 0.5 * (base_lr - clip) * \
 88 |             (1 + np.cos(np.pi * ((epoch - step_epoch) / (total_epochs - step_epoch))))
 89 | 
 90 |     for param_group in optimizer.param_groups:
 91 |         param_group['lr'] = lr
 92 | 
 93 | 
 94 | def adjust_lr(optim_cfg, optimizer, scheduler, total_epochs, total_iters_per_epoch, epoch, iter, accumulated_iter, no_step=False):
 95 |     # adjust learning rate
 96 |     if optim_cfg.SCHEDULER == 'cos':
 97 |         max_iter = total_iters_per_epoch * total_epochs
 98 |         cos_learning_rate(
 99 |             optimizer, optim_cfg.LR, epoch * total_iters_per_epoch + iter + 1, max_iter, 0, 0)
100 |     elif optim_cfg.SCHEDULER == 'cos_after_step':
101 |         cosine_lr_after_step(optimizer, optim_cfg.LR, epoch, optim_cfg.STEP_EPOCH, total_epochs)
102 |     elif optim_cfg.SCHEDULER in ['adam_onecycle', 'poly']:
103 |         assert scheduler is not None
104 |         if not no_step:
105 |             scheduler.step(accumulated_iter)
106 |     elif optim_cfg.SCHEDULER in ['multistep']:
107 |         pass
108 |     else:
109 |         raise NotImplementedError
110 | 


--------------------------------------------------------------------------------
/tools/train_utils/optimization/learning_schedules_fastai.py:
--------------------------------------------------------------------------------
  1 | # This file is modified from https://github.com/traveller59/second.pytorch
  2 | 
  3 | import math
  4 | from functools import partial
  5 | 
  6 | import numpy as np
  7 | import torch.optim.lr_scheduler as lr_sched
  8 | 
  9 | from .fastai_optim import OptimWrapper
 10 | 
 11 | 
 12 | class LRSchedulerStep(object):
 13 |     def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases,
 14 |                  mom_phases):
 15 |         # if not isinstance(fai_optimizer, OptimWrapper):
 16 |         #     raise TypeError('{} is not a fastai OptimWrapper'.format(
 17 |         #         type(fai_optimizer).__name__))
 18 |         self.optimizer = fai_optimizer
 19 |         self.total_step = total_step
 20 |         self.lr_phases = []
 21 | 
 22 |         for i, (start, lambda_func) in enumerate(lr_phases):
 23 |             if len(self.lr_phases) != 0:
 24 |                 assert self.lr_phases[-1][0] < start
 25 |             if isinstance(lambda_func, str):
 26 |                 lambda_func = eval(lambda_func)
 27 |             if i < len(lr_phases) - 1:
 28 |                 self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func))
 29 |             else:
 30 |                 self.lr_phases.append((int(start * total_step), total_step, lambda_func))
 31 |         assert self.lr_phases[0][0] == 0
 32 |         self.mom_phases = []
 33 |         for i, (start, lambda_func) in enumerate(mom_phases):
 34 |             if len(self.mom_phases) != 0:
 35 |                 assert self.mom_phases[-1][0] < start
 36 |             if isinstance(lambda_func, str):
 37 |                 lambda_func = eval(lambda_func)
 38 |             if i < len(mom_phases) - 1:
 39 |                 self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func))
 40 |             else:
 41 |                 self.mom_phases.append((int(start * total_step), total_step, lambda_func))
 42 |         assert self.mom_phases[0][0] == 0
 43 | 
 44 |     def step(self, step):
 45 |         for start, end, func in self.lr_phases:
 46 |             if step >= start:
 47 |                 self.optimizer.lr = func((step - start) / (end - start))
 48 |         for start, end, func in self.mom_phases:
 49 |             if step >= start:
 50 |                 self.optimizer.mom = func((step - start) / (end - start))
 51 | 
 52 | 
 53 | def annealing_cos(start, end, pct):
 54 |     # print(pct, start, end)
 55 |     "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
 56 |     cos_out = np.cos(np.pi * pct) + 1
 57 |     return end + (start - end) / 2 * cos_out
 58 | 
 59 | 
 60 | class OneCycle(LRSchedulerStep):
 61 |     def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor,
 62 |                  pct_start):
 63 |         self.lr_max = lr_max
 64 |         self.moms = moms
 65 |         self.div_factor = div_factor
 66 |         self.pct_start = pct_start
 67 |         a1 = int(total_step * self.pct_start)
 68 |         a2 = total_step - a1
 69 |         low_lr = self.lr_max / self.div_factor
 70 |         lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)),
 71 |                      (self.pct_start,
 72 |                       partial(annealing_cos, self.lr_max, low_lr / 1e4)))
 73 |         mom_phases = ((0, partial(annealing_cos, *self.moms)),
 74 |                       (self.pct_start, partial(annealing_cos,
 75 |                                                *self.moms[::-1])))
 76 |         fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0]
 77 |         super().__init__(fai_optimizer, total_step, lr_phases, mom_phases)
 78 | 
 79 | 
 80 | class CosineWarmupLR(lr_sched._LRScheduler):
 81 |     def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
 82 |         self.T_max = T_max
 83 |         self.eta_min = eta_min
 84 |         super(CosineWarmupLR, self).__init__(optimizer, last_epoch)
 85 | 
 86 |     def get_lr(self):
 87 |         return [self.eta_min + (base_lr - self.eta_min) *
 88 |                 (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2
 89 |                 for base_lr in self.base_lrs]
 90 | 
 91 | 
 92 | class FakeOptim:
 93 |     def __init__(self):
 94 |         self.lr = 0
 95 |         self.mom = 0
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     import matplotlib.pyplot as plt
100 | 
101 |     opt = FakeOptim()  # 3e-3, wd=0.4, div_factor=10
102 |     schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1)
103 | 
104 |     lrs = []
105 |     moms = []
106 |     for i in range(100):
107 |         schd.step(i)
108 |         lrs.append(opt.lr)
109 |         moms.append(opt.mom)
110 |     plt.plot(lrs)
111 |     # plt.plot(moms)
112 |     plt.show()
113 |     plt.plot(moms)
114 |     plt.show()
115 | 


--------------------------------------------------------------------------------
/tools/visual_utils/indoor_utils/ply_utils.py:
--------------------------------------------------------------------------------
 1 | from plyfile import PlyData
 2 | import numpy as np
 3 | 
 4 | 
 5 | def read_ply(path):
 6 |     plydata = PlyData.read(path)
 7 |     num_verts = plydata['vertex'].count
 8 | 
 9 |     vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
10 |     vertices[:, 0] = plydata['vertex']['x']
11 |     vertices[:, 1] = plydata['vertex']['y']
12 |     vertices[:, 2] = plydata['vertex']['z']
13 | 
14 |     rgb = np.zeros(shape=[num_verts, 3], dtype=np.float32)
15 |     rgb[:, 0] = plydata['vertex']['red']
16 |     rgb[:, 1] = plydata['vertex']['green']
17 |     rgb[:, 2] = plydata['vertex']['blue']
18 |     alpha = np.array(plydata['vertex']['alpha'])
19 | 
20 |     face_indices = plydata['face']['vertex_indices']
21 | 
22 |     return vertices, rgb, alpha, face_indices
23 | 
24 | 
25 | def write_ply(output_file, data_dict):
26 |     verts, colors = data_dict['xyz'], data_dict['rgb']
27 |     if 'indices' not in data_dict:
28 |         data_dict['indices'] = []
29 | 
30 |     file = open(output_file, 'w')
31 |     file.write('ply \n')
32 |     file.write('format ascii 1.0\n')
33 |     file.write('element vertex {:d}\n'.format(len(verts)))
34 |     file.write('property float x\n')
35 |     file.write('property float y\n')
36 |     file.write('property float z\n')
37 |     file.write('property uchar red\n')
38 |     file.write('property uchar green\n')
39 |     file.write('property uchar blue\n')
40 |     if 'alpha' in data_dict:
41 |         file.write('property uchar alpha\n')
42 |     file.write('element face {:d}\n'.format(len(data_dict['indices'])))
43 |     file.write('property list uchar uint vertex_indices\n')
44 |     file.write('end_header\n')
45 | 
46 |     if 'alpha' in data_dict:
47 |         for vert, color, a in zip(verts, colors, data_dict['alpha']):
48 |             file.write('{:f} {:f} {:f} {:d} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2],
49 |                                                                      int(color[0]),
50 |                                                                      int(color[1]),
51 |                                                                      int(color[2]),
52 |                                                                      int(a)))
53 |     else:
54 |         for vert, color in zip(verts, colors):
55 |             file.write('{:f} {:f} {:f} {:d} {:d} {:d}\n'.format(vert[0], vert[1], vert[2],
56 |                                                                 int(color[0]),
57 |                                                                 int(color[1]),
58 |                                                                 int(color[2])))
59 |     for ind in data_dict['indices']:
60 |         file.write('3 {:d} {:d} {:d}\n'.format(ind[0], ind[1], ind[2]))
61 |     file.close()
62 | 


--------------------------------------------------------------------------------