├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── configs ├── unidet3d_1xb8_scannet.py └── unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py ├── data ├── 3rscan │ ├── README.md │ ├── meta_data │ │ ├── 3RScan.json │ │ ├── 3RScan.v2_Semantic-Classes-Mapping.csv │ │ ├── bbox_size.pkl │ │ ├── camera_pose.pkl │ │ ├── nyu40_labels.csv │ │ ├── reference_axis_align_matrix.pkl │ │ ├── scans.txt │ │ ├── split │ │ │ ├── 3rscan_test.txt │ │ │ ├── 3rscan_train.txt │ │ │ ├── 3rscan_val.txt │ │ │ ├── test.txt │ │ │ ├── train.txt │ │ │ └── val.txt │ │ ├── test.txt │ │ ├── train.txt │ │ └── val.txt │ ├── prepare_bins_pkls.py │ ├── preprocess_raw_data.py │ └── utils.py ├── arkitscenes │ ├── README.md │ ├── arkitscenes_data_utils.py │ ├── data_prepare_offline.py │ ├── misc.py │ └── utils │ │ ├── __init__.py │ │ ├── box_utils.py │ │ ├── pc_utils.py │ │ ├── rotation.py │ │ ├── taxonomy.py │ │ └── tenFpsDataLoader.py ├── multiscan │ ├── README.md │ └── prepare_bins_pkls.py ├── s3dis │ ├── README.md │ └── remap_superpoints.py ├── scannet │ ├── README.md │ ├── batch_load_scannet_data.py │ ├── load_scannet_data.py │ ├── meta_data │ │ ├── scannet_means.npz │ │ ├── scannet_train.txt │ │ ├── scannetv2-labels.combined.tsv │ │ ├── scannetv2_test.txt │ │ ├── scannetv2_train.txt │ │ └── scannetv2_val.txt │ └── scannet_utils.py └── scannetpp │ ├── README.md │ ├── prepare_bins_pkls.py │ └── preprocess_raw_data.py ├── tools ├── create_data.py ├── indoor_converter.py ├── scannet_data_utils.py ├── test.py ├── train.py └── update_infos_to_v2.py └── unidet3d ├── __init__.py ├── arkitscenes_dataset.py ├── axis_aligned_iou_loss.py ├── concat_dataset.py ├── criterion.py ├── data_preprocessor.py ├── encoder.py ├── formatting.py ├── image_vis.py ├── indoor_eval.py ├── indoor_metric.py ├── loading.py ├── multiscan_dataset.py ├── rotated_iou_loss.py ├── rscan_dataset.py ├── s3dis_dataset.py ├── scannet_dataset.py ├── scannetpp_dataset.py ├── show_results.py ├── spconv_unet.py ├── structures.py ├── transforms_3d.py └── unidet3d.py /.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | work_dirs 3 | .vscode 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | *.ipynb 8 | *ipynb_checkpoints -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel 2 | 3 | # Install base apt packages 4 | RUN apt-get update \ 5 | && DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libopenblas-dev 6 | 7 | # Install MinkowskiEngine 8 | RUN TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \ 9 | pip install git+https://github.com/daizhirui/MinkowskiEngine.git@ce930eeb403a8e3f99693662ec5ce329a0ab3528 -v --no-deps \ 10 | --global-option="--blas=openblas" \ 11 | --global-option="--force_cuda" 12 | 13 | # Install OpenMMLab projects 14 | RUN pip install --no-deps \ 15 | mmengine==0.9.0 \ 16 | mmdet==3.3.0 \ 17 | mmsegmentation==1.2.0 \ 18 | mmdet3d==1.4.0 \ 19 | mmpretrain==1.2.0 20 | 21 | # Install mmcv 22 | RUN git clone https://github.com/open-mmlab/mmcv.git \ 23 | && cd mmcv \ 24 | && git reset --hard 780ffed9f3736fedadf18b51266ecbf521e64cf6 \ 25 | && sed -i "s/'-std=c++14'] if cuda_args else/'-std=c++14', '-arch=sm_90'] if cuda_args else/g" setup.py \ 26 | && TORCH_CUDA_ARCH_LIST="6.1 7.0 8.6 9.0" \ 27 | && pip install -v -e . --no-deps \ 28 | && cd .. 29 | 30 | # Install torch-scatter 31 | RUN pip install torch-scatter==2.1.2 -f https://data.pyg.org/whl/torch-2.1.0+cu121.html --no-deps 32 | 33 | # Install ScanNet superpoint segmentator 34 | RUN git clone https://github.com/Karbo123/segmentator.git \ 35 | && cd segmentator/csrc \ 36 | && git reset --hard 76efe46d03dd27afa78df972b17d07f2c6cfb696 \ 37 | && sed -i "s/set(CMAKE_CXX_STANDARD 14)/set(CMAKE_CXX_STANDARD 17)/g" CMakeLists.txt \ 38 | && mkdir build \ 39 | && cd build \ 40 | && cmake .. \ 41 | -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \ 42 | -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") \ 43 | -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") \ 44 | -DCMAKE_INSTALL_PREFIX=`python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())'` \ 45 | && make \ 46 | && make install \ 47 | && cd ../../.. 48 | 49 | # Install remaining python packages 50 | RUN pip install --no-deps \ 51 | spconv-cu120==2.3.6 \ 52 | addict==2.4.0 \ 53 | yapf==0.33.0 \ 54 | termcolor==2.3.0 \ 55 | packaging==23.1 \ 56 | numpy==1.24.1 \ 57 | rich==13.3.5 \ 58 | opencv-python==4.7.0.72 \ 59 | pycocotools==2.0.6 \ 60 | Shapely==1.8.5 \ 61 | scipy==1.10.1 \ 62 | terminaltables==3.1.10 \ 63 | numba==0.57.0 \ 64 | llvmlite==0.40.0 \ 65 | pccm==0.4.7 \ 66 | ccimport==0.4.2 \ 67 | pybind11==2.10.4 \ 68 | ninja==1.11.1 \ 69 | lark==1.1.5 \ 70 | cumm-cu120==0.5.1 \ 71 | pyquaternion==0.9.9 \ 72 | lyft-dataset-sdk==0.0.8 \ 73 | pandas==2.0.1 \ 74 | python-dateutil==2.8.2 \ 75 | matplotlib==3.5.2 \ 76 | pyparsing==3.0.9 \ 77 | cycler==0.11.0 \ 78 | kiwisolver==1.4.4 \ 79 | scikit-learn==1.2.2 \ 80 | joblib==1.2.0 \ 81 | threadpoolctl==3.1.0 \ 82 | cachetools==5.3.0 \ 83 | nuscenes-devkit==1.1.10 \ 84 | trimesh==3.21.6 \ 85 | open3d==0.17.0 \ 86 | plotly==5.18.0 \ 87 | dash==2.14.2 \ 88 | plyfile==1.0.2 \ 89 | flask==3.0.0 \ 90 | werkzeug==3.0.1 \ 91 | click==8.1.7 \ 92 | blinker==1.7.0 \ 93 | itsdangerous==2.1.2 \ 94 | importlib_metadata==2.1.2 \ 95 | zipp==3.17.0 \ 96 | natsort==8.4.0 \ 97 | timm==0.9.16 \ 98 | imageio==2.34.0 \ 99 | portalocker==2.8.2 \ 100 | ftfy==6.2.0 \ 101 | regex==2024.4.16 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## UniDet3D: Multi-dataset Indoor 3D Object Detection 2 | 3 | **News**: 4 | * :fire: December, 2024. UniDet3D is now accepted at AAAI 2025. 5 | * :fire: September, 2024. UniDet3D is state-of-the-art in 6 indoor benchmarks:
6 | ScanNet [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-scannetv2)](https://paperswithcode.com/sota/3d-object-detection-on-scannetv2?p=unidet3d-multi-dataset-indoor-3d-object)
7 | ARKitScenes [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-arkitscenes)](https://paperswithcode.com/sota/3d-object-detection-on-arkitscenes?p=unidet3d-multi-dataset-indoor-3d-object)
8 | S3DIS [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-s3dis)](https://paperswithcode.com/sota/3d-object-detection-on-s3dis?p=unidet3d-multi-dataset-indoor-3d-object)
9 | MultiScan [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-multiscan)](https://paperswithcode.com/sota/3d-object-detection-on-multiscan?p=unidet3d-multi-dataset-indoor-3d-object)
10 | 3RScan [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-3rscan)](https://paperswithcode.com/sota/3d-object-detection-on-3rscan?p=unidet3d-multi-dataset-indoor-3d-object)
11 | ScanNet++ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/unidet3d-multi-dataset-indoor-3d-object/3d-object-detection-on-scannet-1)](https://paperswithcode.com/sota/3d-object-detection-on-scannet-1?p=unidet3d-multi-dataset-indoor-3d-object). 12 | 13 | This repository contains an implementation of UniDet3D, a multi-dataset indoor 3D object detection method introduced in our paper: 14 | 15 | > **UniDet3D: Multi-dataset Indoor 3D Object Detection**
16 | > [Maksim Kolodiazhnyi](https://github.com/col14m), 17 | > [Anna Vorontsova](https://github.com/highrut), 18 | > [Matvey Skripkin](https://scholar.google.com/citations?user=hAlwb4wAAAAJ), 19 | > [Danila Rukhovich](https://github.com/filaPro), 20 | > [Anton Konushin](https://scholar.google.com/citations?user=ZT_k-wMAAAAJ) 21 | >
22 | > Artificial Intelligence Research Institute
23 | > https://arxiv.org/abs/2409.04234 24 | 25 | ### Installation 26 | 27 | For convenience, we provide a [Dockerfile](Dockerfile). 28 | This implementation is based on [mmdetection3d](https://github.com/open-mmlab/mmdetection3d) framework `v1.1.0`. If not using Docker, please follow [getting_started.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/get_started.md) for the installation instructions. 29 | 30 | 31 | ### Getting Started 32 | 33 | Please see [test_train.md](https://github.com/open-mmlab/mmdetection3d/blob/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/docs/en/user_guides/train_test.md) for some basic usage examples. 34 | 35 | #### Data Preprocessing 36 | 37 | UniDet3D is trained and tested using 6 datasets: [ScanNet](data/scannet), [ARKitScenes](data/arkitscenes), [S3DIS](data/s3dis), [MultiScan](data/multiscan), [3RScan](data/3rscan), and [ScanNet++](data/scannetpp). 38 | Preprocessed data can be found at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D). Download each archive, unpack, and move into the corresponding directory in [data](data). Please comply with the license agreement before downloading the data. 39 | 40 | Alternatively, you can preprocess the data by youself. 41 | Training data for 3D object detection methods that do not requires superpoints, e.g. [TR3D](https://github.com/SamsungLabs/tr3d) or [FCAF3D](https://github.com/SamsungLabs/fcaf3d), can be prepared according to the [instructions](data). 42 | 43 | Superpoints for ScanNet and MultiScan are provided as a part of the original annotation. For the rest datasets, you can either download pre-computed superpoints at our [Hugging Face](https://huggingface.co/datasets/maksimko123/UniDet3D), or compute them using [superpoint_transformer](https://github.com/drprojects/superpoint_transformer). 44 | 45 | #### Training 46 | 47 | Before training, please download the backbone [checkpoint](https://github.com/filapro/oneformer3d/releases/download/v1.0/oneformer3d_1xb4_scannet.pth) and save it under `work_dirs/tmp`. 48 | 49 | To train UniDet3D on 6 datasets jointly, simply run the [training](tools/train.py) script: 50 | 51 | ```bash 52 | python tools/train.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py 53 | ``` 54 | 55 | UniDet3D can also be trained on individual datasets, e.g., we provide a [config](configs/unidet3d_1xb8_scannet.py) for training using ScanNet solely. 56 | 57 | 58 | #### Testing 59 | 60 | To test a trained model, you can run the [testing](tools/test.py) script: 61 | 62 | ```bash 63 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \ 64 | work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/epoch_1024.pth 65 | ``` 66 | 67 | UniDet3D can also be tested on individual datasets. To this end, simply remove the unwanted datasets from `val_dataloader.dataset.datasets` in the config file. 68 | 69 | #### Visualization 70 | 71 | To visualize ground truth and predicted boxes, run the [testing](tools/test.py) script with additional arguments: 72 | 73 | ```bash 74 | python tools/test.py configs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes.py \ 75 | work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes/latest.pth --show \ 76 | --show-dir work_dirs/unidet3d_1xb8_scannet_s3dis_multiscan_3rscan_scannetpp_arkitscenes 77 | ``` 78 | You can also set `score_thr` in configs to `0.3` for better visualizations. 79 | 80 | ### Trained Model 81 | 82 | Please refer to the UniDet3D [checkpoint](https://github.com/filapro/unidet3d/releases/download/v1.0/unidet3d.pth) and [log file](https://github.com/filapro/unidet3d/releases/download/v1.0/log.txt). The corresponding metrics are given below (they might slightly deviate from the values reported in the paper due to the randomized training/testing procedure). 83 | 84 | | Dataset | mAP25 | mAP50 | 85 | |:-----------:|:-----------------:|:-----------------:| 86 | | ScanNet | 77.0 | 65.9 | 87 | | ARKitScenes | 60.1 | 47.2 | 88 | | S3DIS | 76.7 | 65.3 | 89 | | MultiScan | 62.6 | 52.3 | 90 | | 3RScan | 63.6 | 44.9 | 91 | | ScanNet++ | 24.0 | 16.8 | 92 | 93 | ### Predictions Example 94 | 95 |

96 | UniDet3D predictions 97 |

98 | 99 | ### Citation 100 | 101 | If you find this work useful for your research, please cite our paper: 102 | 103 | ``` 104 | @inproceedings{kolodiazhnyi2025unidet3d, 105 | title={Unidet3d: Multi-dataset indoor 3d object detection}, 106 | author={Kolodiazhnyi, Maksim and Vorontsova, Anna and Skripkin, Matvey and Rukhovich, Danila and Konushin, Anton}, 107 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, 108 | volume={39}, 109 | number={4}, 110 | pages={4365--4373}, 111 | year={2025} 112 | } 113 | ``` 114 | -------------------------------------------------------------------------------- /data/3rscan/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare 3RScan Data for Indoor 3D Detection 2 | 3 | 1. Download data from the official [3RScan](https://waldjohannau.github.io/RIO/). 4 | 5 | 2. Preprocess raw data by running: 6 | 7 | ```bash 8 | python preprocess_raw_data.py --dataset_root path_to_dataset --output_root path_to_save_preprocessed_raw_data 9 | ``` 10 | 11 | 3. Generate bins and pkls data by running: 12 | 13 | ```bash 14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins 15 | ``` 16 | 17 | Overall you achieve the following file structure in `bins` directory: 18 | ``` 19 | bins 20 | ├── bboxs 21 | │ ├── xxxxx_xx.npy 22 | ├── instance_mask 23 | │ ├── xxxxx_xx.bin 24 | ├── points 25 | │ ├── xxxxx_xx.bin 26 | ├── semantic_mask 27 | │ ├── xxxxx_xx.bin 28 | ├── superpoints 29 | │ ├── xxxxx_xx.bin 30 | ├── 3rscan_infos_train.pkl 31 | ├── 3rscan_infos_val.pkl 32 | ├── 3rscan_infos_test.pkl 33 | ``` 34 | -------------------------------------------------------------------------------- /data/3rscan/meta_data/bbox_size.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/bbox_size.pkl -------------------------------------------------------------------------------- /data/3rscan/meta_data/camera_pose.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/camera_pose.pkl -------------------------------------------------------------------------------- /data/3rscan/meta_data/nyu40_labels.csv: -------------------------------------------------------------------------------- 1 | nyu40id,nyu40class,mappedId,mappedIdConsecutive,weight 2 | 1,wall,(ignore),19,0.0 3 | 2,floor,(ignore),19,0.0 4 | 3,cabinet,3,1,3.9644974086960434 5 | 4,bed,4,2,5.459494152836571 6 | 5,chair,5,3,2.241522691584157 7 | 6,sofa,6,4,4.820655512680854 8 | 7,table,7,5,3.565918577548873 9 | 8,door,8,6,3.538498341919445 10 | 9,window,9,7,4.636521236560596 11 | 10,bookshelf,10,8,5.445050937449535 12 | 11,picture,11,9,5.079250281008131 13 | 12,counter,12,10,6.2030429647735845 14 | 13,blinds,(ignore),19,0.0 15 | 14,desk,14,11,4.622662494840168 16 | 15,shelves,(ignore),19,0.0 17 | 16,curtain,16,12,5.956294301248057 18 | 17,dresser,(ignore),19,0.0 19 | 18,pillow,(ignore),19,0.0 20 | 19,mirror,(ignore),19,0.0 21 | 20,floor_mat,(ignore),19,0.0 22 | 21,clothes,(ignore),19,0.0 23 | 22,ceiling,(ignore),19,0.0 24 | 23,books,(ignore),19,0.0 25 | 24,refridgerator,24,13,5.459141107819665 26 | 25,television,(ignore),19,0.0 27 | 26,paper,(ignore),19,0.0 28 | 27,towel,(ignore),19,0.0 29 | 28,shower_curtain,28,14,6.724871661883906 30 | 29,box,(ignore),19,0.0 31 | 30,whiteboard,(ignore),19,0.0 32 | 31,person,(ignore),19,0.0 33 | 32,night_stand,(ignore),19,0.0 34 | 33,toilet,33,15,5.832442848923174 35 | 34,sink,34,16,5.064773947290611 36 | 35,lamp,(ignore),19,0.0 37 | 36,bathtub,36,17,6.738988357113375 38 | 37,bag,(ignore),19,0.0 39 | 38,otherstructure,(ignore),19,0.0 40 | 39,otherfurniture,39,18,3.375217918833916 41 | 40,otherprop,(ignore),19,0.0 -------------------------------------------------------------------------------- /data/3rscan/meta_data/reference_axis_align_matrix.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/3rscan/meta_data/reference_axis_align_matrix.pkl -------------------------------------------------------------------------------- /data/3rscan/meta_data/split/3rscan_test.txt: -------------------------------------------------------------------------------- 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51 8 | 10b17942-3938-2467-8933-5d40ada6d445 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c 10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3 11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91 12 | 20c993bb-698f-29c5-8569-0db38352364f 13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47 14 | 352e9c30-69fb-27a7-8b19-c703f0e190da 15 | 38770cb0-86d7-27b8-8466-1782505891fd 16 | 422885b9-192d-25fc-84f4-6f7c1afd29af 17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa 18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878 19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4 20 | 5630cfdc-12bf-2860-87b7-c7eab95718be 21 | 634b2183-f5d0-2fb7-87ef-c536720918dc 22 | 634d11cd-6833-255d-8c5c-2b0cf661494a 23 | 6a360555-fa53-2915-9721-bff473f98125 24 | 6bde608b-9162-246f-8d16-901b429b2563 25 | 6bde60d2-9162-246f-8dbf-dbd911924def 26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4 27 | 75c259a1-9ca2-2844-973c-adc28f935d5d 28 | 7747a514-9431-24e8-8505-5979f3f20906 29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e 30 | 8eabc463-5af7-2f32-8537-22977a89efdd 31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751 32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96 33 | ad408c83-84db-2095-8aa4-924f966af2dc 34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91 35 | b1f2330c-d255-2761-965e-d203c6e253c3 36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4 37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca 38 | bf9a3da4-45a5-2e80-8082-be634b241693 39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe 40 | c7895f2f-339c-2d13-8388-28e95af958de 41 | c7895f80-339c-2d13-8253-64b490296e49 42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b 43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f 44 | ddc7379d-765b-241a-9f0b-50b72d6cd829 45 | ebc42041-82a4-2113-8583-cc8c1be818b3 46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74 -------------------------------------------------------------------------------- /data/3rscan/meta_data/split/3rscan_val.txt: -------------------------------------------------------------------------------- 1 | 095821f7-e2c2-2de1-9568-b9ce59920e29 2 | 2e369567-e133-204c-909a-c5da44bb58df 3 | 095821f9-e2c2-2de1-9707-8f735cd1c148 4 | 095821fb-e2c2-2de1-94df-20f2cb423bcb 5 | 0988ea72-eb32-2e61-8344-99e2283c2728 6 | 9766cbe5-6321-2e2f-8040-4e5b7a5d8ba1 7 | 9766cbf5-6321-2e2f-8131-78c4e204635d 8 | 9766cbf7-6321-2e2f-81e1-2b46533c64dd 9 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f 10 | 0cac7532-8d6f-2d13-8cea-1e70d5ae4856 11 | 0cac7534-8d6f-2d13-8de7-8a915ed90050 12 | 0cac7584-8d6f-2d13-8df8-c05e4307b418 13 | 0cac7582-8d6f-2d13-8d4b-e4041cb166c4 14 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc 15 | 0cac75de-8d6f-2d13-8e1a-b574569c3885 16 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721 17 | 7272e17c-a01b-20f6-8b2f-e659331ae41a 18 | 10b17940-3938-2467-8a7a-958300ba83d3 19 | c2d9933f-1947-2fbf-807f-c44bc1aed269 20 | c2d9933d-1947-2fbf-81fa-c8a7f9625eea 21 | 5630cfe7-12bf-2860-8710-52729dc36cc6 22 | c2d99349-1947-2fbf-837e-a0bd5e027c52 23 | c2d99347-1947-2fbf-834b-f95790c125dd 24 | c2d99345-1947-2fbf-818d-90ea82acef29 25 | c2d99343-1947-2fbf-808f-92dbb7d47aa5 26 | c2d99341-1947-2fbf-817a-5aa9b44f724f 27 | 6e67e55f-1209-2cd0-8194-8c6278434c80 28 | 137a8158-1db5-2cc0-8003-31c12610471e 29 | 5630cfd3-12bf-2860-8749-9dacb499fb14 30 | c92fb5b7-f771-2064-87a9-31c819832405 31 | f2c76ff1-2239-29d0-87f5-8a0346584384 32 | f2c76fed-2239-29d0-8598-9ed42cec9dc5 33 | f2c76feb-2239-29d0-8418-72b6051fc144 34 | f2c76fe9-2239-29d0-87ec-f2c7ced812c1 35 | f2c76fe7-2239-29d0-84f5-144c30fd7451 36 | f2c76fe5-2239-29d0-8593-1a2555125595 37 | d04eb40f-1d53-27ea-8a41-47892bde7017 38 | 6e67e550-1209-2cd0-8294-7cc2564cf82c 39 | 10b1792a-3938-2467-8b4e-a93da27a0985 40 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a 41 | 1d233fe6-e280-2b1a-8caf-eb0d13a59ad6 42 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2 43 | 1d234026-e280-2b1a-8fe1-713e28269f4d 44 | 1d234024-e280-2b1a-8c28-2743fefed020 45 | 20c993b7-698f-29c5-847d-c8cb8a685f5a 46 | 20c993b3-698f-29c5-859c-dca8ddecf220 47 | 280d8ebb-6cc6-2788-9153-98959a2da801 48 | 4731976c-f9f7-2a1a-95cc-31c4d1751d0b 49 | 1d2f850c-d757-207c-8fba-60b90a7d4691 50 | ea318260-0a4c-2749-9389-4c16c782c4b1 51 | 10b17957-3938-2467-88a5-9e9254930dad 52 | 321c8680-a5a8-2a84-85c2-816a26d59516 53 | 321c867e-a5a8-2a84-851a-818df115be05 54 | 38770ca1-86d7-27b8-8619-ab66f67d9adf 55 | 38770ca3-86d7-27b8-85a7-7d840ffdec6a 56 | 38770ca5-86d7-27b8-871c-57fdbfe87905 57 | 4138582f-a238-2435-8332-6902542c2823 58 | 5341b7a5-8a66-2cdd-8751-70b98263cb8d 59 | 8eabc445-5af7-2f32-85ae-90deb8eb1b0b 60 | 422885b3-192d-25fc-84c9-9b80eea1752d 61 | 422885c5-192d-25fc-85e6-12a3d65c8e7b 62 | 4238490c-60a7-271e-9f38-3c651e3b3912 63 | 4238490a-60a7-271e-9c04-3846221dc354 64 | 42384908-60a7-271e-9c46-01e562c8974c 65 | 10b17963-3938-2467-8a48-0d4af350ce92 66 | 43b8cae1-6678-2e38-9865-c19c07c25015 67 | 43b8cadf-6678-2e38-9920-064144c99406 68 | 43b8cae3-6678-2e38-9b67-5905de29f6d7 69 | 4a9a43d2-7736-2874-874d-d0fad0570e19 70 | 4a9a43d4-7736-2874-87a6-0c3089281af8 71 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f 72 | 4d3d82ae-8cf4-2e04-80de-20f96c814d9c 73 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4 74 | 4fbad32f-465b-2a5d-8408-146ab1d72808 75 | 4fbad331-465b-2a5d-8488-852fcda9513c 76 | 9af05c68-5794-2e19-8c5a-979f448da545 77 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75 78 | 7272e16c-a01b-20f6-8961-a0927b4a7629 79 | 5341b7e3-8a66-2cdd-8709-66a2159f0017 80 | 4138585b-a238-2435-810b-9728fc989b2f 81 | 5630cfcf-12bf-2860-8784-83d28a611a83 82 | 5630cfd1-12bf-2860-86b2-e7a96bc32c19 83 | bf9a3d9e-45a5-2e80-83c6-4e427c5586a2 84 | 10b1792e-3938-2467-8bb3-172148ae5a67 85 | 10b17944-3938-2467-8bac-5552375e4467 86 | 6bde6081-9162-246f-8c4e-ffaf709d17b1 87 | 6bde607b-9162-246f-8e65-76e3ef265504 88 | 6bde607d-9162-246f-8f84-98cf7ac2374c 89 | 6bde6083-9162-246f-8c9c-e170212059b2 90 | 6bde60cb-9162-246f-8cf5-d04f7426e56f 91 | 6bde60cd-9162-246f-8fad-fca80b4d6ad8 92 | 6bde60cf-9162-246f-8f98-6355d75494c2 93 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662 94 | 68bae75f-3567-2f7c-829d-7422117729f3 95 | 742e8f17-be0a-294e-9dd3-52492d308e2b 96 | 742e8f19-be0a-294e-9eb6-50dc474b110e 97 | 75c25975-9ca2-2844-9769-84677f46d4cf 98 | 8eabc455-5af7-2f32-8606-a0bdbe6c537d 99 | 7747a50c-9431-24e8-877d-e60c3a341cc2 100 | 7747a4ec-9431-24e8-848f-897279a1e9fe 101 | 7747a510-9431-24e8-8705-907ee78be2a2 102 | 8e0f1c28-9e28-2339-8584-ff06ff93c341 103 | 20c993bd-698f-29c5-8494-5556ba7d3fe9 104 | 20c993bf-698f-29c5-8549-a69fd169c1e1 105 | 8eabc45f-5af7-2f32-8528-640861d2a135 106 | 75c25989-9ca2-2844-97b4-31b81f7554b8 107 | 41385849-a238-2435-81d0-ceb0eba4541a 108 | 5341b7b3-8a66-2cdd-856d-9d70e194568b 109 | 8eabc461-5af7-2f32-8663-ce5a10fd97b3 110 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7 111 | a0905fdb-66f7-2272-9fc5-7c0008d5e87b 112 | a0905fdd-66f7-2272-9cdb-89360888ea67 113 | ab835fae-54c6-29a1-995e-b06cfc555786 114 | ab835faa-54c6-29a1-9b55-1a5217fcba19 115 | d7dc987e-a34a-2794-85c8-f75389b27532 116 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5 117 | 0cac75e8-8d6f-2d13-8fc4-acdbf00437c8 118 | 0cac75ea-8d6f-2d13-8e50-c5faf0159e32 119 | c7895f63-339c-2d13-81a3-0b07b1eb23b4 120 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e 121 | 0cac761b-8d6f-2d13-8f16-23a7d73c54fe 122 | 0cac7619-8d6f-2d13-8f36-ac562ec9a4de 123 | b1f23308-d255-2761-94da-981d962c6bf8 124 | 77361fd4-d054-2a22-88c4-b5b404f904ca 125 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64 126 | ba6fdaac-a4c1-2dca-8380-f16765679fd7 127 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8 128 | bcb0fe04-4f39-2c70-9f03-d0eec592de24 129 | bf9a3da2-45a5-2e80-8219-1f0a216399fe 130 | cdcaf5bd-ddd8-2ed6-97c3-489e105e4dde 131 | 10b1794e-3938-2467-89a7-ebc89e84cf88 132 | bf9a3de9-45a5-2e80-8022-277108d67404 133 | bf9a3ddf-45a5-2e80-8007-8e9e7f323e52 134 | bf9a3ddd-45a5-2e80-80bc-647365c7ca08 135 | bf9a3dd9-45a5-2e80-817c-f918e193231b 136 | c7895f27-339c-2d13-836b-c12dca280261 137 | c7895f2b-339c-2d13-8248-b0507e050314 138 | c7895f29-339c-2d13-83e9-90dbe61fa8be 139 | c7895f7c-339c-2d13-819f-3bb0b26c91f6 140 | c7895f7a-339c-2d13-82ac-09ef1c9001ba 141 | c7895f78-339c-2d13-82bb-cc990cbbc90f 142 | c92fb5b5-f771-2064-8570-dbe16cb33764 143 | 5630cfde-12bf-2860-8563-d68bdd98fab0 144 | 10b1793e-3938-2467-8b92-f56541e7ef9e 145 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42 146 | d7d40d4c-7a5d-2b36-95c1-5f6c9147caf0 147 | d7d40d50-7a5d-2b36-9446-7d636174329f 148 | ddc73797-765b-241a-9e2c-097c5989baf6 149 | 2451c048-fae8-24f6-9043-f1604dbada2c 150 | ddc7379b-765b-241a-9f45-c37e41608726 151 | ddc73799-765b-241a-9c30-f75dcb7627d4 152 | 0cac75b7-8d6f-2d13-8cb2-0b4e06913140 153 | c7895f07-339c-2d13-8176-7418b6e8d7ce 154 | e61b0e04-bada-2f31-82d6-72831a602ba7 155 | e61b0e02-bada-2f31-82d0-80fc5c70bd6f 156 | fcf66d7b-622d-291c-86b8-7db96aebcee3 157 | 787ed58c-9d98-2c97-83b9-b48a609ace15 -------------------------------------------------------------------------------- /data/3rscan/meta_data/split/test.txt: -------------------------------------------------------------------------------- 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51 8 | 10b17942-3938-2467-8933-5d40ada6d445 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c 10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3 11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91 12 | 20c993bb-698f-29c5-8569-0db38352364f 13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47 14 | 352e9c30-69fb-27a7-8b19-c703f0e190da 15 | 38770cb0-86d7-27b8-8466-1782505891fd 16 | 422885b9-192d-25fc-84f4-6f7c1afd29af 17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa 18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878 19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4 20 | 5630cfdc-12bf-2860-87b7-c7eab95718be 21 | 634b2183-f5d0-2fb7-87ef-c536720918dc 22 | 634d11cd-6833-255d-8c5c-2b0cf661494a 23 | 6a360555-fa53-2915-9721-bff473f98125 24 | 6bde608b-9162-246f-8d16-901b429b2563 25 | 6bde60d2-9162-246f-8dbf-dbd911924def 26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4 27 | 75c259a1-9ca2-2844-973c-adc28f935d5d 28 | 7747a514-9431-24e8-8505-5979f3f20906 29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e 30 | 8eabc463-5af7-2f32-8537-22977a89efdd 31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751 32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96 33 | ad408c83-84db-2095-8aa4-924f966af2dc 34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91 35 | b1f2330c-d255-2761-965e-d203c6e253c3 36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4 37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca 38 | bf9a3da4-45a5-2e80-8082-be634b241693 39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe 40 | c7895f2f-339c-2d13-8388-28e95af958de 41 | c7895f80-339c-2d13-8253-64b490296e49 42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b 43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f 44 | ddc7379d-765b-241a-9f0b-50b72d6cd829 45 | ebc42041-82a4-2113-8583-cc8c1be818b3 46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74 -------------------------------------------------------------------------------- /data/3rscan/meta_data/split/val.txt: -------------------------------------------------------------------------------- 1 | 0988ea72-eb32-2e61-8344-99e2283c2728 2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f 3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418 4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc 5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721 6 | 10b17940-3938-2467-8a7a-958300ba83d3 7 | 137a8158-1db5-2cc0-8003-31c12610471e 8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a 9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2 10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a 11 | 280d8ebb-6cc6-2788-9153-98959a2da801 12 | 321c8680-a5a8-2a84-85c2-816a26d59516 13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf 14 | 422885b3-192d-25fc-84c9-9b80eea1752d 15 | 4238490c-60a7-271e-9f38-3c651e3b3912 16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4 17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75 18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017 19 | 5630cfcf-12bf-2860-8784-83d28a611a83 20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f 21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1 22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f 23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662 24 | 75c25975-9ca2-2844-9769-84677f46d4cf 25 | 7747a50c-9431-24e8-877d-e60c3a341cc2 26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341 27 | 8eabc45f-5af7-2f32-8528-640861d2a135 28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7 29 | ab835fae-54c6-29a1-995e-b06cfc555786 30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e 31 | b1f23308-d255-2761-94da-981d962c6bf8 32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64 33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8 34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe 35 | bf9a3de9-45a5-2e80-8022-277108d67404 36 | c7895f27-339c-2d13-836b-c12dca280261 37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6 38 | c92fb5b5-f771-2064-8570-dbe16cb33764 39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42 40 | ddc73797-765b-241a-9e2c-097c5989baf6 41 | fcf66d7b-622d-291c-86b8-7db96aebcee3 42 | 4138582f-a238-2435-8332-6902542c2823 43 | 43b8cae1-6678-2e38-9865-c19c07c25015 44 | 4a9a43d2-7736-2874-874d-d0fad0570e19 45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5 46 | 095821f7-e2c2-2de1-9568-b9ce59920e29 47 | e61b0e04-bada-2f31-82d6-72831a602ba7 -------------------------------------------------------------------------------- /data/3rscan/meta_data/test.txt: -------------------------------------------------------------------------------- 1 | 00d42bed-778d-2ac6-86a7-0e0e5f5f5660 2 | 0988ea78-eb32-2e61-80ee-e4a44170bce9 3 | 0cac7547-8d6f-2d13-8c9c-08cad6632f46 4 | 0cac758d-8d6f-2d13-8d21-9133668b4d7d 5 | 0cac75e4-8d6f-2d13-8f7c-fb5b5747d1ba 6 | 0cac7633-8d6f-2d13-8e91-8b46e2b70f34 7 | 0cac7678-8d6f-2d13-8da3-dba8636cef51 8 | 10b17942-3938-2467-8933-5d40ada6d445 9 | 13af338e-7397-2e54-84fc-fa003f91ac0c 10 | 1d233ff0-e280-2b1a-8c10-2af29c344ee3 11 | 1d23402c-e280-2b1a-8e31-9a88c9c1fb91 12 | 20c993bb-698f-29c5-8569-0db38352364f 13 | 283ccfeb-107c-24d5-8bbf-05519a3c7c47 14 | 352e9c30-69fb-27a7-8b19-c703f0e190da 15 | 38770cb0-86d7-27b8-8466-1782505891fd 16 | 422885b9-192d-25fc-84f4-6f7c1afd29af 17 | 4fbad320-465b-2a5d-85d7-dc83aa09a9fa 18 | 5341b7af-8a66-2cdd-8431-8c73b0c10878 19 | 5341b7e7-8a66-2cdd-87ce-86596253cfa4 20 | 5630cfdc-12bf-2860-87b7-c7eab95718be 21 | 634b2183-f5d0-2fb7-87ef-c536720918dc 22 | 634d11cd-6833-255d-8c5c-2b0cf661494a 23 | 6a360555-fa53-2915-9721-bff473f98125 24 | 6bde608b-9162-246f-8d16-901b429b2563 25 | 6bde60d2-9162-246f-8dbf-dbd911924def 26 | 751a5598-fe61-2c3b-8cf2-1c23632af9b4 27 | 75c259a1-9ca2-2844-973c-adc28f935d5d 28 | 7747a514-9431-24e8-8505-5979f3f20906 29 | 8eabc410-5af7-2f32-844d-34726ad2dc7e 30 | 8eabc463-5af7-2f32-8537-22977a89efdd 31 | a0905fe1-66f7-2272-9d4a-a6f6579f6751 32 | a644cb8e-0ee5-2f66-9c91-1db77b5a8f96 33 | ad408c83-84db-2095-8aa4-924f966af2dc 34 | b05fdda4-fca0-2d4f-8beb-6172b0772f91 35 | b1f2330c-d255-2761-965e-d203c6e253c3 36 | ba6fdaae-a4c1-2dca-8240-6fbe1cb25eb4 37 | bcb0fe0a-4f39-2c70-9d5f-a7263a0180ca 38 | bf9a3da4-45a5-2e80-8082-be634b241693 39 | bf9a3df5-45a5-2e80-82c1-b108dc9a3fbe 40 | c7895f2f-339c-2d13-8388-28e95af958de 41 | c7895f80-339c-2d13-8253-64b490296e49 42 | c9fb7aa1-2a5b-2cf7-9222-6f111cb28b2b 43 | d7d40d54-7a5d-2b36-94d4-7cf59473177f 44 | ddc7379d-765b-241a-9f0b-50b72d6cd829 45 | ebc42041-82a4-2113-8583-cc8c1be818b3 46 | fcf66d8e-622d-291c-86ef-f8f2b3db7f74 -------------------------------------------------------------------------------- /data/3rscan/meta_data/val.txt: -------------------------------------------------------------------------------- 1 | 0988ea72-eb32-2e61-8344-99e2283c2728 2 | 0cac7540-8d6f-2d13-8eee-36ba2a428e3f 3 | 0cac7584-8d6f-2d13-8df8-c05e4307b418 4 | 0cac75dc-8d6f-2d13-8d08-9c497bd6acdc 5 | 0cac7676-8d6f-2d13-8f3a-d7bf7f03e721 6 | 10b17940-3938-2467-8a7a-958300ba83d3 7 | 137a8158-1db5-2cc0-8003-31c12610471e 8 | 1d233fe8-e280-2b1a-8fac-c3646a1cd64a 9 | 1d234022-e280-2b1a-8cec-e9787bb0d7b2 10 | 20c993b7-698f-29c5-847d-c8cb8a685f5a 11 | 280d8ebb-6cc6-2788-9153-98959a2da801 12 | 321c8680-a5a8-2a84-85c2-816a26d59516 13 | 38770ca1-86d7-27b8-8619-ab66f67d9adf 14 | 422885b3-192d-25fc-84c9-9b80eea1752d 15 | 4238490c-60a7-271e-9f38-3c651e3b3912 16 | 4fbad31e-465b-2a5d-84b7-c0ddea978db4 17 | 5341b79f-8a66-2cdd-84d5-6dba6525dd75 18 | 5341b7e3-8a66-2cdd-8709-66a2159f0017 19 | 5630cfcf-12bf-2860-8784-83d28a611a83 20 | 4d3d82b0-8cf4-2e04-80a8-c955ea964c2f 21 | 6bde6081-9162-246f-8c4e-ffaf709d17b1 22 | 6bde60cb-9162-246f-8cf5-d04f7426e56f 23 | 742e8f15-be0a-294e-9ebb-6c72dbcb9662 24 | 75c25975-9ca2-2844-9769-84677f46d4cf 25 | 7747a50c-9431-24e8-877d-e60c3a341cc2 26 | 8e0f1c28-9e28-2339-8584-ff06ff93c341 27 | 8eabc45f-5af7-2f32-8528-640861d2a135 28 | a0905fd9-66f7-2272-9dfb-0483fdcc54c7 29 | ab835fae-54c6-29a1-995e-b06cfc555786 30 | b05fdd96-fca0-2d4f-88c3-d9dfda85c00e 31 | b1f23308-d255-2761-94da-981d962c6bf8 32 | ba6fdaaa-a4c1-2dca-8163-a52b18bf6b64 33 | bcb0fe06-4f39-2c70-9c24-a8dd7496c2f8 34 | bf9a3da2-45a5-2e80-8219-1f0a216399fe 35 | bf9a3de9-45a5-2e80-8022-277108d67404 36 | c7895f27-339c-2d13-836b-c12dca280261 37 | c7895f7c-339c-2d13-819f-3bb0b26c91f6 38 | c92fb5b5-f771-2064-8570-dbe16cb33764 39 | d7d40d4e-7a5d-2b36-97e7-34324c52ac42 40 | ddc73797-765b-241a-9e2c-097c5989baf6 41 | fcf66d7b-622d-291c-86b8-7db96aebcee3 42 | 4138582f-a238-2435-8332-6902542c2823 43 | 43b8cae1-6678-2e38-9865-c19c07c25015 44 | 4a9a43d2-7736-2874-874d-d0fad0570e19 45 | b05fdd8a-fca0-2d4f-8ac5-f6ae697787f5 46 | 095821f7-e2c2-2de1-9568-b9ce59920e29 47 | e61b0e04-bada-2f31-82d6-72831a602ba7 -------------------------------------------------------------------------------- /data/3rscan/prepare_bins_pkls.py: -------------------------------------------------------------------------------- 1 | import mmengine 2 | import os 3 | from tqdm.auto import tqdm 4 | import numpy as np 5 | import argparse 6 | 7 | COLOR_TO_LABEL = { 8 | (0, 0, 0): 'unknown', 9 | (174, 199, 232): 'wall', 10 | (152, 223, 138): 'floor', 11 | (31, 119, 180): 'cabinet', 12 | (255, 187, 120): 'bed', 13 | (188, 189, 34): 'chair', 14 | (140, 86, 75): 'sofa', 15 | (255, 152, 150): 'table', 16 | (214, 39, 40): 'door', 17 | (197, 176, 213): 'window', 18 | (148, 103, 189): 'bookshelf', 19 | (196, 156, 148): 'picture', 20 | (23, 190, 207): 'counter', 21 | (178, 76, 76): 'blinds', 22 | (247, 182, 210): 'desk', 23 | (66, 188, 102): 'shelves', 24 | (219, 219, 141): 'curtain', 25 | (140, 57, 197): 'dresser', 26 | (202, 185, 52): 'pillow', 27 | (51, 176, 203): 'mirror', 28 | (200, 54, 131): 'floor mat', 29 | (92, 193, 61): 'clothes', 30 | (78, 71, 183): 'ceiling', 31 | (172, 114, 82): 'books', 32 | (255, 127, 14): 'fridge', 33 | (91, 163, 138): 'television', 34 | (153, 98, 156): 'paper', 35 | (140, 153, 101): 'towel', 36 | (158, 218, 229): 'shower curtain', 37 | (100, 125, 154): 'box', 38 | (178, 127, 135): 'whiteboard', 39 | (120, 185, 128): 'person', 40 | (146, 111, 194): 'night stand', 41 | (44, 160, 44): 'toilet', 42 | (112, 128, 144): 'sink', 43 | (96, 207, 209): 'lamp', 44 | (227, 119, 194): 'bathtub', 45 | (213, 92, 176): 'bag', 46 | (94, 106, 211): 'structure', 47 | (82, 84, 163): 'furniture', 48 | (100, 85, 144): 'prop' 49 | } 50 | 51 | OBJ2SEM = {v: idx for idx, (k, v) in enumerate(COLOR_TO_LABEL.items())} 52 | OBJ2SEM['unknown'] = -1 53 | REMAIN_BB_LABELS = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39] 54 | 55 | def create_dir(path): 56 | if not os.path.exists(path): 57 | os.mkdir(path) 58 | 59 | 60 | def _filter_bb(bb): 61 | final = [] 62 | for i in bb: 63 | if i[-1] in REMAIN_BB_LABELS: 64 | final.append(i) 65 | 66 | if len(final) == 0: 67 | return np.zeros((0,7)) 68 | 69 | return np.stack(final) 70 | 71 | def create_dirs(path): 72 | points = os.path.join(path, 'points') 73 | create_dir(points) 74 | 75 | semantic_mask = os.path.join(path, 'semantic_mask') 76 | create_dir(semantic_mask) 77 | 78 | instance_mask = os.path.join(path, 'instance_mask') 79 | create_dir(instance_mask) 80 | 81 | bboxs = os.path.join(path, 'bboxs') 82 | create_dir(bboxs) 83 | 84 | superpoints = os.path.join(path, 'superpoints') 85 | create_dir(superpoints) 86 | return { 87 | 'points': points, 88 | 'semantic_mask': semantic_mask, 89 | 'instance_mask': instance_mask, 90 | 'bboxs': bboxs, 91 | 'superpoints': superpoints 92 | } 93 | 94 | 95 | 96 | def rearrange_sup(sup): 97 | sup = sup.copy() 98 | unique_super = np.unique(sup) 99 | 100 | for idx, un in enumerate(unique_super): 101 | ind = np.where(sup == un)[0] 102 | sup[ind] = idx 103 | 104 | return sup 105 | 106 | 107 | def create_metainfo(): 108 | 109 | return { 110 | 'categories': OBJ2SEM, 111 | 'dataset': '3RScan', 112 | 'info_version': '1.0' 113 | } 114 | 115 | def create_data_list(split, splits, bins_path): 116 | scenes = splits[split] 117 | final_list = [] 118 | for scene in tqdm(scenes): 119 | 120 | lidar_points = { 121 | 'num_pts_feats': 6, 122 | 'lidar_path': f'{scene}.bin' 123 | } 124 | raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy')) 125 | instances = [] 126 | for rb in raw_bboxs: 127 | if len(rb) == 0: 128 | instances = [] 129 | 130 | else: 131 | instances.append({ 132 | 'bbox_3d': rb[:6].tolist(), 133 | 'bbox_label_3d': int(rb[-1]) 134 | }) 135 | 136 | final_list.append({ 137 | 'lidar_points': lidar_points, 138 | 'instances': instances, 139 | 'pts_semantic_mask_path': f'{scene}.bin', 140 | 'pts_instance_mask_path': f'{scene}.bin', 141 | 'axis_align_matrix': np.eye(4) 142 | }) 143 | 144 | return final_list 145 | 146 | def create_pkl_file(path_to_save, split, splits, bins_path, pkl_prefix = '3rscan'): 147 | metainfo = create_metainfo() 148 | data_list = create_data_list(split, splits, bins_path) 149 | anno = { 150 | 'metainfo': metainfo, 151 | 'data_list': data_list 152 | } 153 | filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl') 154 | mmengine.dump(anno, filename, 'pkl') 155 | 156 | 157 | if __name__ == '__main__': 158 | parser = argparse.ArgumentParser() 159 | parser.add_argument( 160 | '--path_to_data', 161 | required=True, 162 | help='Path to preprocessed raw data', 163 | type=str, 164 | ) 165 | 166 | parser.add_argument( 167 | '--path_to_save_bins', 168 | required=True, 169 | help='Enter here the path where to save bins and pkls', 170 | type=str, 171 | ) 172 | 173 | parser.add_argument( 174 | '--path_to_splits', 175 | default='meta_data/split/', 176 | help='Path to train/val/test splits', 177 | type=str, 178 | ) 179 | 180 | args = parser.parse_args() 181 | print(args) 182 | 183 | path_to_splits = args.path_to_splits 184 | path_to_raw_data = args.path_to_data 185 | 186 | path_to_save_data = args.path_to_save_bins 187 | create_dir(path_to_save_data) 188 | bins_path = create_dirs(path_to_save_data) 189 | 190 | with open(path_to_splits + '/train.txt') as train_file: 191 | train_scenes = train_file.read().splitlines() 192 | with open(path_to_splits + '/val.txt') as val_file: 193 | val_scenes = val_file.read().splitlines() 194 | with open(path_to_splits + '/test.txt') as test_file: 195 | test_scenes = test_file.read().splitlines() 196 | 197 | splits = { 198 | 'train': train_scenes, 199 | 'val': val_scenes, 200 | 'test': test_scenes 201 | } 202 | 203 | scene_ids = os.listdir(path_to_raw_data) 204 | 205 | for si in tqdm(scene_ids): 206 | temp_path = os.path.join(path_to_raw_data, si) 207 | point_cloud = np.load(temp_path + f'/{si}_aligned_vert.npy') 208 | sem_label = np.load(temp_path + f'/{si}_sem_label.npy')[:, 0] 209 | ins_label = np.load(temp_path + f'/{si}_ins_label.npy')[:, 0] 210 | bboxs = np.load(temp_path + f'/{si}_aligned_bbox.npy') 211 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy') 212 | superpoints = rearrange_sup(superpoints) 213 | bboxs = _filter_bb(bboxs) 214 | 215 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy') 216 | superpoints = rearrange_sup(superpoints) 217 | 218 | point_cloud.astype(np.float32).tofile(os.path.join(bins_path['points'], 219 | f'{si}.bin')) 220 | sem_label.astype(np.int64).tofile(os.path.join(bins_path['semantic_mask'], 221 | f'{si}.bin')) 222 | ins_label.astype(np.int64).tofile(os.path.join(bins_path['instance_mask'], 223 | f'{si}.bin')) 224 | superpoints.astype(np.int64).tofile(os.path.join(bins_path['superpoints'], 225 | f'{si}.bin')) 226 | np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs) 227 | 228 | 229 | create_pkl_file(path_to_save_data, 'train', splits, bins_path) 230 | create_pkl_file(path_to_save_data, 'val', splits, bins_path) 231 | create_pkl_file(path_to_save_data, 'test', splits, bins_path) 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | -------------------------------------------------------------------------------- /data/3rscan/preprocess_raw_data.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore", category=DeprecationWarning) 3 | 4 | import pickle 5 | import sys 6 | import os 7 | import argparse 8 | import glob 9 | import json 10 | import numpy as np 11 | import pandas as pd 12 | from concurrent.futures import ProcessPoolExecutor 13 | from itertools import repeat 14 | 15 | from utils import read_objmesh, point_indices_from_group 16 | 17 | #CLOUD_FILE_PFIX = 'mesh.refined.v2.color' 18 | CLOUD_FILE_PFIX = 'mesh.refined.v2' 19 | AGGREGATIONS_FILE_PFIX = 'semseg.v2.json' 20 | SEGMENTS_FILE_PFIX = 'mesh.refined.0.010000.segs.v2.json' 21 | 22 | 23 | def create_dir(path): 24 | if not os.path.exists(path): 25 | os.mkdir(path) 26 | 27 | def read_transform_matrix(Scan3RJson_PATH): 28 | rescan2ref = {} 29 | with open(Scan3RJson_PATH , "r") as read_file: 30 | data = json.load(read_file) 31 | for scene in data: 32 | for scans in scene["scans"]: 33 | if "transform" in scans: 34 | rescan2ref[scans["reference"]] = \ 35 | np.array(scans["transform"]).reshape(4,4).T 36 | return rescan2ref 37 | 38 | def get_reference_dic(Scan3RJson_PATH): 39 | meta_data = json.load(open(Scan3RJson_PATH)) 40 | reference_dic = {} 41 | for record in meta_data: 42 | reference = record['reference'] 43 | reference_dic[reference] = reference 44 | if 'scans' not in record: 45 | continue 46 | for scan in record['scans']: 47 | reference_dic[scan['reference']] = reference 48 | return reference_dic 49 | 50 | def handle_process(scene_path, output_path, labels_pd, 51 | train_scenes, val_scenes, test_scenes): 52 | scene_id = scene_path.split('/')[-1] 53 | obj_path = os.path.join(scene_path, f'{CLOUD_FILE_PFIX}.obj') 54 | aggregations_file = os.path.join(scene_path, f'{AGGREGATIONS_FILE_PFIX}') 55 | segments_file = os.path.join(scene_path, f'{SEGMENTS_FILE_PFIX}') 56 | # Rotating the mesh to axis aligned 57 | rot_matrix = rescan2ref.get(scene_id, np.identity(4)) 58 | 59 | ref_scene_id = reference_dic[scene_id] 60 | ref_rot_matrix = reference_axis_align_matrix_dic[ref_scene_id] 61 | 62 | if scene_id in train_scenes: 63 | split_name = 'train' 64 | elif scene_id in val_scenes: 65 | split_name = 'val' 66 | elif scene_id in test_scenes: 67 | split_name = 'test' 68 | else: 69 | print('*', scene_id, 70 | 'does not exist in [train, val, test] that have seg files') 71 | return 72 | 73 | print('Processing: ', scene_id, 'in', split_name) 74 | 75 | pointcloud, faces_array = read_objmesh(obj_path) 76 | points = pointcloud[:, :3] 77 | colors = pointcloud[:, 3:6] 78 | 79 | # Rotate PC to axis aligned 80 | r_points = pointcloud[:, :3].transpose() 81 | r_points = np.append(r_points, np.ones((1, 82 | r_points.shape[1])), axis=0) 83 | # reference align 84 | r_points = np.dot(rot_matrix, r_points) 85 | # reference axis align 86 | r_points = np.dot(ref_rot_matrix, r_points) 87 | ##### ! 88 | aligned_pointcloud = np.append(r_points.transpose()[:, :3], 89 | pointcloud[:, 3:], axis=1) 90 | 91 | # Generate new labels 92 | labelled_pc = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated 93 | instance_ids = np.zeros((pointcloud.shape[0], 1)) - 1 # -1: unannotated 94 | 95 | if os.path.isfile(aggregations_file): 96 | # Load segments file 97 | with open(segments_file) as f: 98 | segments = json.load(f) 99 | seg_indices = np.array(segments['segIndices']) 100 | # Load Aggregations file 101 | with open(aggregations_file) as f: 102 | aggregation = json.load(f) 103 | seg_groups = np.array(aggregation['segGroups']) 104 | 105 | num_instances = len(seg_groups) 106 | instance_bboxes = np.zeros((num_instances, 7)) 107 | aligned_instance_bboxes = np.zeros((num_instances, 7)) 108 | 109 | for obj_idx, group in enumerate(seg_groups): 110 | segment_points, aligned_segment_points, p_inds, label_id = \ 111 | point_indices_from_group(pointcloud, aligned_pointcloud, 112 | seg_indices, group, labels_pd) 113 | labelled_pc[p_inds] = label_id 114 | 115 | if len(segment_points) == 0: continue 116 | 117 | xmin = np.min(segment_points[:,0]) 118 | ymin = np.min(segment_points[:,1]) 119 | zmin = np.min(segment_points[:,2]) 120 | xmax = np.max(segment_points[:,0]) 121 | ymax = np.max(segment_points[:,1]) 122 | zmax = np.max(segment_points[:,2]) 123 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, 124 | (zmin+zmax)/2, xmax-xmin, 125 | ymax-ymin, zmax-zmin, label_id]) # also include object id 126 | instance_bboxes[obj_idx,:] = bbox 127 | 128 | if len(aligned_segment_points) == 0: continue 129 | 130 | instance_ids[p_inds] = obj_idx 131 | xmin = np.min(aligned_segment_points[:,0]) 132 | ymin = np.min(aligned_segment_points[:,1]) 133 | zmin = np.min(aligned_segment_points[:,2]) 134 | xmax = np.max(aligned_segment_points[:,0]) 135 | ymax = np.max(aligned_segment_points[:,1]) 136 | zmax = np.max(aligned_segment_points[:,2]) 137 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, 138 | (zmin+zmax)/2, xmax-xmin, ymax-ymin, 139 | zmax-zmin, label_id]) # also include object id 140 | aligned_instance_bboxes[obj_idx,:] = bbox 141 | else: 142 | # use zero as placeholders for the test scene 143 | #print("use placeholders") 144 | instance_bboxes = np.zeros((1, 7)) 145 | aligned_instance_bboxes = np.zeros((1, 7)) 146 | 147 | labelled_pc = labelled_pc.astype(int) 148 | instance_ids = instance_ids.astype(int) 149 | assert np.all(instance_ids[np.where(labelled_pc == -1)[0]] == -1) 150 | if -1 in np.unique(instance_ids): 151 | assert len(instance_bboxes) == len(np.unique(instance_ids)[1:]) 152 | 153 | else: 154 | assert len(instance_bboxes) == len(np.unique(instance_ids)) 155 | 156 | if (np.any(np.isnan(pointcloud)) or not np.all(np.isfinite(pointcloud))): 157 | raise ValueError('nan') 158 | 159 | output_path = os.path.join(output_path, f'{scene_id}') 160 | create_dir(os.path.join(output_path)) 161 | output_prefix = os.path.join(output_path, f'{scene_id}') 162 | np.save(output_prefix+'_aligned_vert.npy', aligned_pointcloud[:, :6]) 163 | np.save(output_prefix+'_sem_label.npy', labelled_pc) 164 | np.save(output_prefix+'_ins_label.npy', instance_ids) 165 | np.save(output_prefix+'_aligned_bbox.npy', aligned_instance_bboxes) 166 | np.save(output_prefix+'_superpoints.npy', seg_indices) 167 | 168 | 169 | if __name__ == '__main__': 170 | parser = argparse.ArgumentParser() 171 | parser.add_argument('--dataset_root', default='../data/3rscan/', 172 | help='Path to the 3RScan dataset containing scene folders') 173 | parser.add_argument('--output_root', default='preprocessed_raw_data', 174 | help='Output path where processed data will be located') 175 | parser.add_argument('--label_map_file', 176 | default='meta_data/3RScan.v2_Semantic-Classes-Mapping.csv', 177 | help='path to scannetv2-labels.combined.tsv') 178 | parser.add_argument('--num_workers', default=12, 179 | type=int, help='The number of parallel workers') 180 | parser.add_argument('--splits_path', default='meta_data/split', 181 | help='Where the txt files with the train/val splits live') 182 | config = parser.parse_args() 183 | 184 | # Load label map 185 | labels_pd = pd.read_csv(config.label_map_file, sep=',', header=1) 186 | 187 | # Load train/val splits 188 | with open(config.splits_path + '/train.txt') as train_file: 189 | train_scenes = train_file.read().splitlines() 190 | with open(config.splits_path + '/val.txt') as val_file: 191 | val_scenes = val_file.read().splitlines() 192 | with open(config.splits_path + '/test.txt') as test_file: 193 | test_scenes = test_file.read().splitlines() 194 | 195 | META_FILE = 'meta_data/3RScan.json' 196 | rescan2ref = read_transform_matrix(META_FILE) 197 | reference_dic = get_reference_dic(META_FILE) 198 | 199 | with open('./meta_data/reference_axis_align_matrix.pkl', 'rb') as f: 200 | reference_axis_align_matrix_dic = pickle.load(f) 201 | 202 | os.makedirs(config.output_root, exist_ok=True) 203 | 204 | # Load scene paths 205 | scene_paths = sorted(glob.glob(config.dataset_root + '/*')) 206 | 207 | # Preprocess data. 208 | pool = ProcessPoolExecutor(max_workers=config.num_workers) 209 | print('Processing scenes...') 210 | _ = list(pool.map(handle_process, scene_paths, 211 | repeat(config.output_root), repeat(labels_pd), 212 | repeat(train_scenes), repeat(val_scenes), 213 | repeat(test_scenes))) 214 | -------------------------------------------------------------------------------- /data/arkitscenes/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare ARKitScenes Data for Indoor 3D Detection 2 | 3 | For now we only support offline benchmark with a single reconstructed point clound for each scene. Online benchmark for single RGB-D frame detection can be supported in the future. The `utils` directory is used unchanged from [ARKitScenes](https://github.com/apple/ARKitScenes/tree/main/threedod/benchmark_scripts/utils), except fixing a single [issue](https://github.com/apple/ARKitScenes/issues/53). 4 | 5 | 1. Download data from the official [ARKitScenes](https://github.com/apple/ARKitScenes). From their repo you may run: 6 | ``` 7 | python download_data.py 3dod --video-id-csv threedod/3dod_train_val_splits.csv 8 | ``` 9 | 10 | After this step you have the following file structure here: 11 | ``` 12 | 3dod 13 | ├── metadata.csv 14 | ├── Training 15 | │ ├── xxxxxxxx 16 | │ │ ├── xxxxxxxx_3dod_annotation.json 17 | │ │ ├── xxxxxxxx_3dod_mesh.ply 18 | │ │ ├── xxxxxxxx_frames 19 | ├── Validation 20 | │ ├── xxxxxxxx 21 | │ │ ├── xxxxxxxx_3dod_annotation.json 22 | │ │ ├── xxxxxxxx_3dod_mesh.ply 23 | │ │ ├── xxxxxxxx_frames 24 | ``` 25 | 26 | 2. Preprocess data for offline benchmark with our adapted script: 27 | ``` 28 | python data_prepare_offline.py 29 | ``` 30 | After this step you have the following file structure here: 31 | ``` 32 | offline_prepared_data 33 | ├── xxxxxxxx_point.npy 34 | ├── xxxxxxxx_bbox.npy 35 | ├── xxxxxxxx_label.npy 36 | ``` 37 | 38 | 3. Enter the project root directory, generate training and validation data by running: 39 | ``` 40 | python tools/create_data.py arkitscenes --root-path ./data/arkitscenes --out-dir ./data/arkitscenes --extra-tag arkitscenes-offline 41 | ``` 42 | Overall you achieve the following file structure in `data` directory: 43 | ``` 44 | arkitscenes 45 | ├── offline_prepared_data 46 | │ ├── xxxxxxxx_point.bin 47 | ├── arkitscenes_offline_train_infos.pkl 48 | ├── arkitscenes_offline_val_infos.pkl 49 | 50 | ``` 51 | -------------------------------------------------------------------------------- /data/arkitscenes/arkitscenes_data_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from concurrent import futures as futures 3 | from os import path as osp 4 | import mmengine 5 | import numpy as np 6 | from typing import List, Optional 7 | 8 | 9 | class ARKitScenesOfflineData: 10 | """ARKitScenesOfflineData 11 | Generate arkitscenes infos (offline benchmark) for indoor_converter. 12 | 13 | Args: 14 | root_path (str): Root path of the raw data. 15 | split (str): Stplit type 'train' or 'val'. 16 | """ 17 | def __init__(self, root_path: str, split: str): 18 | self.split = split 19 | raw_path = os.path.join(root_path, '3dod') 20 | self.data_path = os.path.join(root_path, 'offline_prepared_data') 21 | assert split in ['train', 'val'] 22 | class_names = [ 23 | 'cabinet', 'refrigerator', 'shelf', 'stove', 'bed', 24 | 'sink', 'washer', 'toilet', 'bathtub', 'oven', 25 | 'dishwasher', 'fireplace', 'stool', 'chair', 'table', 26 | 'tv_monitor', 'sofa' 27 | ] 28 | self.name2class = { 29 | name: i 30 | for i, name in enumerate(class_names) 31 | } 32 | all_id_list = set( 33 | map(lambda x: x.split('_')[0], 34 | os.listdir(self.data_path))) 35 | split_dir = 'Training' if split == 'train' else 'Validation' 36 | split_id_list = set(os.listdir(osp.join(raw_path, split_dir))) 37 | self.sample_id_list = all_id_list & split_id_list 38 | print(f'{split}, raw ids: {len(split_id_list)}, ' 39 | f'processed ids: {len(self.sample_id_list)}') 40 | 41 | def __len__(self) -> int: 42 | """Length of the dataset.""" 43 | return len(self.sample_id_list) 44 | 45 | def get_infos(self, 46 | num_workers: int = 4, 47 | has_label: bool = True, 48 | sample_id_list: Optional[List[str]] = None) -> dict: 49 | """Get data infos. 50 | This method gets information from the raw data. 51 | 52 | Args: 53 | num_workers (int, optional): Number of threads to be used. 54 | Default: 4. 55 | has_label (bool, optional): Whether the data has label. 56 | Default: True. 57 | sample_id_list (list[str], optional): Index list of the sample. 58 | Default: None. 59 | 60 | Returns: 61 | dict: Information of the raw data. 62 | """ 63 | def process_single_scene(sample_idx): 64 | print(f'{self.split} sample_idx: {sample_idx}', end='\r') 65 | info = { 66 | 'lidar_points': { 67 | 'num_pts_feats': 6, 68 | 'lidar_path': f'{sample_idx}_point.bin' 69 | } 70 | } 71 | boxes = np.load( 72 | osp.join(self.data_path, f'{sample_idx}_bbox.npy')) 73 | labels = np.load( 74 | osp.join(self.data_path, f'{sample_idx}_label.npy')) 75 | instances = [] 76 | for box, label in zip(boxes, labels): 77 | # follow heading angle of DepthInstance3DBoxes 78 | box[-1] = -box[-1] 79 | instances.append({ 80 | 'bbox_3d': box.tolist(), 81 | 'bbox_label_3d': self.name2class[label] 82 | }) 83 | info['instances'] = instances 84 | return info 85 | 86 | sample_id_list = sample_id_list if sample_id_list is not None \ 87 | else self.sample_id_list 88 | with futures.ThreadPoolExecutor(num_workers) as executor: 89 | infos = executor.map(process_single_scene, list(sample_id_list)) 90 | 91 | infos = { 92 | 'metainfo': { 93 | 'categories': self.name2class, 94 | 'dataset': 'arkitscenes_offline', 95 | 'info_version': '1.0' 96 | }, 97 | 'data_list': list(infos) 98 | } 99 | return infos 100 | 101 | 102 | # do not want to add create_annotations.py to projects 103 | if __name__ == '__main__': 104 | root_path = '/opt/project/data/arkitscenes' 105 | out_path = '/opt/project/work_dirs/tmp' 106 | infos_train = ARKitScenesOfflineData( 107 | root_path=root_path, split='train').get_infos() 108 | train_filename = osp.join(out_path, 'arkitscenes_offline_infos_train.pkl') 109 | mmengine.dump(infos_train, train_filename, 'pkl') 110 | infos_val = ARKitScenesOfflineData( 111 | root_path=root_path, split='val').get_infos() 112 | val_filename = osp.join(out_path, 'arkitscenes_offline_infos_val.pkl') 113 | mmengine.dump(infos_val, val_filename, 'pkl') 114 | -------------------------------------------------------------------------------- /data/arkitscenes/data_prepare_offline.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/apple/ARKitScenes/blob/main/threedod/benchmark_scripts/data_prepare_offline.py 2 | import argparse 3 | import numpy as np 4 | import os 5 | import pandas as pd 6 | from functools import partial 7 | from tqdm.contrib.concurrent import process_map 8 | 9 | import utils.box_utils as box_utils 10 | import utils.pc_utils as pc_utils 11 | import utils.taxonomy as taxonomy 12 | from utils.tenFpsDataLoader import TenFpsDataLoader, extract_gt 13 | 14 | # we keep this rough grid_size=0.05 from the original benchmark, 15 | # however accuracy might be better with smaller grid_size 16 | def accumulate_wrapper(loader, grid_size=0.05): 17 | """ 18 | Args: 19 | loader: TenFpsDataLoader 20 | Returns: 21 | world_pc: (N, 3) 22 | xyz in world coordinate system 23 | world_sem: (N, d) 24 | semantic for each point 25 | grid_size: float 26 | keep only one point in each (g_size, g_size, g_size) grid 27 | """ 28 | world_pc, world_rgb, poses = np.zeros((0, 3)), np.zeros((0, 3)), [] 29 | for i in range(len(loader)): 30 | frame = loader[i] 31 | image_path = frame["image_path"] 32 | pcd = frame["pcd"] # in world coordinate 33 | pose = frame["pose"] 34 | rgb = frame["color"] 35 | 36 | world_pc = np.concatenate((world_pc, pcd), axis=0) 37 | world_rgb = np.concatenate((world_rgb, rgb), axis=0) 38 | 39 | choices = pc_utils.down_sample(world_pc, grid_size) 40 | world_pc = world_pc[choices] 41 | world_rgb = world_rgb[choices] 42 | 43 | return world_pc, world_rgb, poses 44 | 45 | 46 | def main(scene_id, split, data_root, output_dir): 47 | # step 0.0: output folder, make dir 48 | os.makedirs(output_dir, exist_ok=True) 49 | point_output_path = os.path.join(output_dir, f"{scene_id}_point.npy") 50 | bbox_output_path = os.path.join(output_dir, f"{scene_id}_bbox.npy") 51 | label_output_path = os.path.join(output_dir, f"{scene_id}_label.npy") 52 | # skip already processed scenes 53 | if os.path.exists(point_output_path) \ 54 | and os.path.exists(bbox_output_path) \ 55 | and os.path.exists(label_output_path): 56 | return 57 | 58 | # step 0.1: get annotation first, 59 | # if skipped or no gt boxes, we will not bother calling further steps 60 | gt_path = os.path.join(data_root, split, scene_id, f"{scene_id}_3dod_annotation.json") 61 | skipped, boxes_corners, _, _, labels, _ = extract_gt(gt_path) 62 | if skipped or boxes_corners.shape[0] == 0: 63 | return 64 | 65 | # step 0.2: data 66 | data_path = os.path.join(data_root, split, scene_id, f"{scene_id}_frames") 67 | loader = TenFpsDataLoader( 68 | dataset_cfg=None, 69 | class_names=taxonomy.class_names, 70 | root_path=data_path) 71 | 72 | # step 1: accumulate points and save points 73 | world_pc, world_rgb, _ = accumulate_wrapper(loader) 74 | # despite original benchmark script ignores rgb here, we save it 75 | # to allow user to use or skip it for trainig / testing / visualization 76 | points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32) 77 | points.tofile(point_output_path) 78 | 79 | # step 2: save labels and boxes 80 | # not sure if we need uids, but keep them followinig original benchmark 81 | boxes = box_utils.corners_to_boxes(boxes_corners) 82 | np.save(bbox_output_path, boxes) 83 | np.save(label_output_path, labels) 84 | 85 | 86 | if __name__ == "__main__": 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument( 89 | "--data-root", 90 | default="./3dod", 91 | help="input folder with ./Training/{scene_id}, ./Validation/{scene_id}" 92 | "and metadata.json" 93 | ) 94 | parser.add_argument( 95 | "--output-dir", 96 | default="./offline_prepared_data", 97 | help="directory to save the data and annoation" 98 | ) 99 | parser.add_argument( 100 | "--max-workers", 101 | default=1, 102 | type=int, 103 | help="number of parallel processes" 104 | ) 105 | 106 | args = parser.parse_args() 107 | df = pd.read_csv(os.path.join(args.data_root, "metadata.csv")) 108 | scene_ids = list(map(str, df["video_id"].to_list())) 109 | splits = list(map(str, df["fold"].to_list())) 110 | process_map( 111 | partial(main, data_root=args.data_root, output_dir=args.output_dir), 112 | scene_ids, splits, max_workers=args.max_workers) 113 | -------------------------------------------------------------------------------- /data/arkitscenes/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from collections import defaultdict 5 | from plyfile import PlyData 6 | from tqdm import tqdm 7 | 8 | import mmengine 9 | from mmdet3d.structures import DepthInstance3DBoxes 10 | from mmdet3d.apis import inference_detector, init_model 11 | from projects.TR3D.tr3d.local_visualizer import TR3DLocalVisualizer 12 | from utils.box_utils import boxes_to_corners_3d 13 | from utils.pc_utils import down_sample 14 | 15 | 16 | def verify_corners(): 17 | a = np.random.rand(100, 7) 18 | mmdet3d_corners = DepthInstance3DBoxes(a, origin=(.5, .5, .5)).corners.numpy() 19 | a[:, -1] = -a[:, -1] 20 | arkiscenes_corners = boxes_to_corners_3d(a)[:, [2, 6, 7, 3, 1, 5, 4, 0]] 21 | assert np.abs(arkiscenes_corners - mmdet3d_corners).max() < 1e-5 22 | 23 | 24 | def print_object_statistics(path): 25 | print(path) 26 | infos = mmengine.load(path) 27 | categories = infos['metainfo']['categories'] 28 | inverse_categories = {v: k for k, v in categories.items()} 29 | data = {c: defaultdict(list) for c in categories} 30 | for d in infos['data_list']: 31 | for instance in d['instances']: 32 | category_data = data[inverse_categories[instance['bbox_label_3d']]] 33 | box = instance['bbox_3d'] 34 | category_data['xy_min'].append(min(box[3], box[4])) 35 | category_data['xy_max'].append(max(box[3], box[4])) 36 | category_data['z'].append(box[5]) 37 | 38 | quantiles = (0, .75, 1) 39 | columns = ['category', 'N'] 40 | df_data = [] 41 | for key in category_data.keys(): 42 | for q in quantiles: 43 | columns.append(f'{key}.{q}') 44 | for category, category_data in data.items(): 45 | table_row = [category, len(category_data['z'])] 46 | for key in category_data.keys(): 47 | for q in quantiles: 48 | value = np.quantile(category_data[key], q) 49 | table_row.append(value) # f'{value:.4f}' 50 | df_data.append(table_row) 51 | df = pd.DataFrame(data=df_data, columns=columns) 52 | pd.set_option('display.precision', 3) 53 | target = df[['xy_max.0.75', 'z.0.75']].to_numpy().max(axis=1) 54 | target = target > np.median(target) 55 | df['target'] = target 56 | print(df) 57 | print('target:', target.astype(int).tolist()) 58 | 59 | 60 | def aggregate_multiple_ply(path, grid_size=0.05): 61 | world_pc, world_rgb = np.zeros((0, 3)), np.zeros((0, 3)) 62 | for file_name in tqdm(os.listdir(path)): 63 | data = PlyData.read(os.path.join(path, file_name)) 64 | pc = np.stack(( 65 | data['vertex']['x'], 66 | data['vertex']['y'], 67 | data['vertex']['z']), axis=1) 68 | rgb = np.stack(( 69 | data['vertex']['red'], 70 | data['vertex']['green'], 71 | data['vertex']['blue']), axis=1) 72 | world_pc = np.concatenate((world_pc, pc)) 73 | world_rgb = np.concatenate((world_rgb, rgb)) 74 | choices = down_sample(world_pc, grid_size) 75 | world_pc = world_pc[choices] 76 | world_rgb = world_rgb[choices] 77 | points = np.concatenate((world_pc, world_rgb), axis=1).astype(np.float32) 78 | file_name = f'{os.path.basename(os.path.dirname(path))}.bin' 79 | points.tofile(os.path.join('work_dirs/tmp/tmp', file_name)) 80 | 81 | 82 | def predict(pcd_path, config_path, checkpoint_path): 83 | model = init_model(config_path, checkpoint_path, device='cuda:0', 84 | cfg_options=dict(test_dataloader=dict(dataset=dict(box_type_3d='depth')))) 85 | points = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, 6) 86 | points = np.concatenate((points[:, :3], points[:, 3:] / 255), axis=1) 87 | result = inference_detector(model, points) 88 | TR3DLocalVisualizer().add_datasample( 89 | name='', 90 | data_input=dict(points=points), 91 | data_sample=result[0], 92 | draw_gt=False, 93 | out_file=pcd_path, 94 | vis_task='lidar_det') 95 | 96 | if __name__ == '__main__': 97 | # verify_corners() 98 | # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_train.pkl') 99 | # print_object_statistics('/opt/project/data/arkitscenes/arkitscenes_offline_infos_val.pkl') 100 | aggregate_multiple_ply('data/tmp/230621_sr_room_samples/Jun18at10-18PM-poly/pcds') 101 | predict( 102 | 'work_dirs/tmp/tmp/Jun18at10-18PM-poly.bin', 103 | 'projects/arkitscenes/configs/tr3d_1xb16_arkitscenes-offline-3d-4class.py', 104 | 'work_dirs/tmp/tr3d_arkitscenes_epoch10.pth') 105 | -------------------------------------------------------------------------------- /data/arkitscenes/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/arkitscenes/utils/__init__.py -------------------------------------------------------------------------------- /data/arkitscenes/utils/box_utils.py: -------------------------------------------------------------------------------- 1 | # TODO: Explain 8 corners logic at the top and use it consistently 2 | # Add comments of explanation 3 | 4 | import numpy as np 5 | import scipy.spatial 6 | 7 | from .rotation import rotate_points_along_z 8 | 9 | 10 | def get_size(box): 11 | """ 12 | Args: 13 | box: 8x3 14 | Returns: 15 | size: [dx, dy, dz] 16 | """ 17 | distance = scipy.spatial.distance.cdist(box[0:1, :], box[1:5, :]) 18 | l = distance[0, 2] 19 | w = distance[0, 0] 20 | h = distance[0, 3] 21 | return [l, w, h] 22 | 23 | 24 | def get_heading_angle(box): 25 | """ 26 | Args: 27 | box: (8, 3) 28 | Returns: 29 | heading_angle: float 30 | """ 31 | a = box[0, 0] - box[1, 0] 32 | b = box[0, 1] - box[1, 1] 33 | 34 | heading_angle = np.arctan2(a, b) 35 | return heading_angle 36 | 37 | 38 | def compute_box_3d(size, center, rotmat): 39 | """Compute corners of a single box from rotation matrix 40 | Args: 41 | size: list of float [dx, dy, dz] 42 | center: np.array [x, y, z] 43 | rotmat: np.array (3, 3) 44 | Returns: 45 | corners: (8, 3) 46 | """ 47 | l, h, w = [i / 2 for i in size] 48 | center = np.reshape(center, (-1, 3)) 49 | center = center.reshape(3) 50 | x_corners = [l, l, -l, -l, l, l, -l, -l] 51 | y_corners = [h, -h, -h, h, h, -h, -h, h] 52 | z_corners = [w, w, w, w, -w, -w, -w, -w] 53 | corners_3d = np.dot( 54 | np.transpose(rotmat), np.vstack([x_corners, y_corners, z_corners]) 55 | ) 56 | corners_3d[0, :] += center[0] 57 | corners_3d[1, :] += center[1] 58 | corners_3d[2, :] += center[2] 59 | return np.transpose(corners_3d) 60 | 61 | 62 | def corners_to_boxes(corners3d): 63 | """ 64 | 7 -------- 4 65 | /| /| 66 | 6 -------- 5 . 67 | | | | | 68 | . 3 -------- 0 69 | |/ |/ 70 | 2 -------- 1 71 | Args: 72 | corners: (N, 8, 3), vertex order shown in figure above 73 | 74 | Returns: 75 | boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading] 76 | with (x, y, z) is the box center 77 | (dx, dy, dz) as the box size 78 | and heading as the clockwise rotation angle 79 | """ 80 | 81 | boxes3d = np.zeros((corners3d.shape[0], 7)) 82 | for i in range(corners3d.shape[0]): 83 | boxes3d[i, :3] = np.mean(corners3d[i, :, :], axis=0) 84 | boxes3d[i, 3:6] = get_size(corners3d[i, :, :]) 85 | boxes3d[i, 6] = get_heading_angle(corners3d[i, :, :]) 86 | 87 | return boxes3d 88 | 89 | 90 | def boxes_to_corners_3d(boxes3d): 91 | """ 92 | 7 -------- 4 93 | /| /| 94 | 6 -------- 5 . 95 | | | | | 96 | . 3 -------- 0 97 | |/ |/ 98 | 2 -------- 1 99 | Args: 100 | boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading], 101 | (x, y, z) is the box center 102 | 103 | Returns: 104 | corners: (N, 8, 3) 105 | """ 106 | template = np.array([[1, 1, -1], 107 | [1, -1, -1], 108 | [-1, -1, -1], 109 | [-1, 1, -1], 110 | [1, 1, 1], 111 | [1, -1, 1], 112 | [-1, -1, 1], 113 | [-1, 1, 1]] 114 | ) / 2. 115 | 116 | # corners3d: of shape (N, 3, 8) 117 | corners3d = np.tile(boxes3d[:, None, 3:6], (1, 8, 1)) * template[None, :, :] 118 | 119 | corners3d = rotate_points_along_z(corners3d.reshape(-1, 8, 3), boxes3d[:, 6]).reshape( 120 | -1, 8, 3 121 | ) 122 | corners3d += boxes3d[:, None, 0:3] 123 | 124 | return corners3d 125 | 126 | 127 | def points_in_boxes(points, boxes): 128 | """ 129 | Args: 130 | pc: np.array (n, 3+d) 131 | boxes: np.array (m, 8, 3) 132 | Returns: 133 | mask: np.array (n, m) of type bool 134 | """ 135 | if len(boxes) == 0: 136 | return np.zeros([points.shape[0], 1], dtype=np.bool) 137 | points = points[:, :3] # get xyz 138 | # u = p6 - p5 139 | u = boxes[:, 6, :] - boxes[:, 5, :] # (m, 3) 140 | # v = p6 - p7 141 | v = boxes[:, 6, :] - boxes[:, 7, :] # (m, 3) 142 | # w = p6 - p2 143 | w = boxes[:, 6, :] - boxes[:, 2, :] # (m, 3) 144 | 145 | # ux, vx, wx 146 | ux = np.matmul(points, u.T) # (n, m) 147 | vx = np.matmul(points, v.T) 148 | wx = np.matmul(points, w.T) 149 | 150 | # up6, up5, vp6, vp7, wp6, wp2 151 | up6 = np.sum(u * boxes[:, 6, :], axis=1) 152 | up5 = np.sum(u * boxes[:, 5, :], axis=1) 153 | vp6 = np.sum(v * boxes[:, 6, :], axis=1) 154 | vp7 = np.sum(v * boxes[:, 7, :], axis=1) 155 | wp6 = np.sum(w * boxes[:, 6, :], axis=1) 156 | wp2 = np.sum(w * boxes[:, 2, :], axis=1) 157 | 158 | mask_u = np.logical_and(ux <= up6, ux >= up5) # (1024, n) 159 | mask_v = np.logical_and(vx <= vp6, vx >= vp7) 160 | mask_w = np.logical_and(wx <= wp6, wx >= wp2) 161 | 162 | mask = mask_u & mask_v & mask_w # (10240, n) 163 | 164 | return mask 165 | 166 | 167 | def poly_area(x,y): 168 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """ 169 | return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) 170 | 171 | 172 | def polygon_clip(subjectPolygon, clipPolygon): 173 | """ Clip a polygon with another polygon. 174 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 175 | Args: 176 | subjectPolygon: a list of (x,y) 2d points, any polygon. 177 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 178 | Note: 179 | **points have to be counter-clockwise ordered** 180 | Return: 181 | a list of (x,y) vertex point for the intersection polygon. 182 | """ 183 | 184 | def inside(p): 185 | return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0]) 186 | 187 | def computeIntersection(): 188 | dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]] 189 | dp = [s[0] - e[0], s[1] - e[1]] 190 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 191 | n2 = s[0] * e[1] - s[1] * e[0] 192 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 193 | return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3] 194 | 195 | outputList = subjectPolygon 196 | cp1 = clipPolygon[-1] 197 | 198 | for clipVertex in clipPolygon: 199 | cp2 = clipVertex 200 | inputList = outputList 201 | outputList = [] 202 | s = inputList[-1] 203 | 204 | for subjectVertex in inputList: 205 | e = subjectVertex 206 | if inside(e): 207 | if not inside(s): 208 | outputList.append(computeIntersection()) 209 | outputList.append(e) 210 | elif inside(s): 211 | outputList.append(computeIntersection()) 212 | s = e 213 | cp1 = cp2 214 | if len(outputList) == 0: 215 | return None 216 | return (outputList) 217 | 218 | 219 | def convex_hull_intersection(p1, p2): 220 | """ Compute area of two convex hull's intersection area. 221 | p1,p2 are a list of (x,y) tuples of hull vertices. 222 | return a list of (x,y) for the intersection and its volume 223 | """ 224 | inter_p = polygon_clip(p1,p2) 225 | if inter_p is not None: 226 | hull_inter = scipy.spatial.ConvexHull(inter_p) 227 | return inter_p, hull_inter.volume 228 | else: 229 | return None, 0.0 230 | 231 | 232 | def box3d_vol(corners): 233 | ''' corners: (8,3) no assumption on axis direction ''' 234 | a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2)) 235 | b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2)) 236 | c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2)) 237 | return a*b*c 238 | 239 | 240 | def box3d_iou(corners1, corners2): 241 | ''' Compute 3D bounding box IoU. 242 | 243 | Input: 244 | corners1: numpy array (8,3), assume up direction is negative Y 245 | corners2: numpy array (8,3), assume up direction is negative Y 246 | Output: 247 | iou: 3D bounding box IoU 248 | iou_2d: bird's eye view 2D bounding box IoU 249 | 250 | ''' 251 | # corner points are in counter clockwise order 252 | rect1 = [(corners1[i,0], corners1[i,1]) for i in range(3,-1,-1)] 253 | rect2 = [(corners2[i,0], corners2[i,1]) for i in range(3,-1,-1)] 254 | area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1]) 255 | area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1]) 256 | inter, inter_area = convex_hull_intersection(rect1, rect2) 257 | iou_2d = inter_area/(area1+area2-inter_area) 258 | ymax = min(corners1[:,2].max(), corners2[:,2].max()) 259 | ymin = max(corners1[:,2].min(), corners2[:,2].min()) 260 | inter_vol = inter_area * max(0.0, ymax-ymin) 261 | vol1 = box3d_vol(corners1) 262 | vol2 = box3d_vol(corners2) 263 | iou = inter_vol / (vol1 + vol2 - inter_vol) 264 | return iou -------------------------------------------------------------------------------- /data/arkitscenes/utils/pc_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def down_sample(point_cloud, voxel_sz): 5 | """Quantize point cloud by voxel_size 6 | Returns kept indices 7 | 8 | Args: 9 | all_points: np.array (n, 3) float 10 | voxel_sz: float 11 | Returns: 12 | indices: (m, ) int 13 | """ 14 | coordinates = np.round(point_cloud / voxel_sz).astype(np.int32) 15 | _, indices = np.unique(coordinates, axis=0, return_index=True) 16 | return indices -------------------------------------------------------------------------------- /data/arkitscenes/utils/rotation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import numpy as np 4 | 5 | 6 | def eulerAnglesToRotationMatrix(theta): 7 | """Euler rotation matrix with clockwise logic. 8 | Rotation 9 | 10 | Args: 11 | theta: list of float 12 | [theta_x, theta_y, theta_z] 13 | Returns: 14 | R: np.array (3, 3) 15 | rotation matrix of Rz*Ry*Rx 16 | """ 17 | R_x = np.array( 18 | [ 19 | [1, 0, 0], 20 | [0, math.cos(theta[0]), -math.sin(theta[0])], 21 | [0, math.sin(theta[0]), math.cos(theta[0])], 22 | ] 23 | ) 24 | 25 | R_y = np.array( 26 | [ 27 | [math.cos(theta[1]), 0, math.sin(theta[1])], 28 | [0, 1, 0], 29 | [-math.sin(theta[1]), 0, math.cos(theta[1])], 30 | ] 31 | ) 32 | 33 | R_z = np.array( 34 | [ 35 | [math.cos(theta[2]), -math.sin(theta[2]), 0], 36 | [math.sin(theta[2]), math.cos(theta[2]), 0], 37 | [0, 0, 1], 38 | ] 39 | ) 40 | 41 | R = np.dot(R_z, np.dot(R_y, R_x)) 42 | return R 43 | 44 | 45 | def upright_camera_relative_transform(pose): 46 | """Generate pose matrix with z-dim as height 47 | 48 | Args: 49 | pose: np.array (4, 4) 50 | Returns: 51 | urc: (4, 4) 52 | urc_inv: (4, 4) 53 | """ 54 | 55 | # take viewing direction in camera local coordiantes (which is simply unit vector along +z) 56 | view_dir_camera = np.asarray([0, 0, 1]) 57 | R = pose[0:3, 0:3] 58 | t = pose[0:3, 3] 59 | 60 | # convert to world coordinates 61 | view_dir_world = np.dot(R, view_dir_camera) 62 | 63 | # compute heading 64 | view_dir_xy = view_dir_world[0:2] 65 | heading = math.atan2(view_dir_xy[1], view_dir_xy[0]) 66 | 67 | # compute rotation around Z to align heading with +Y 68 | zRot = -heading + math.pi / 2 69 | 70 | # translation first, back to camera point 71 | urc_t = np.identity(4) 72 | urc_t[0:2, 3] = -1 * t[0:2] 73 | 74 | # compute rotation matrix 75 | urc_r = np.identity(4) 76 | urc_r[0:3, 0:3] = eulerAnglesToRotationMatrix([0, 0, zRot]) 77 | 78 | urc = np.dot(urc_r, urc_t) 79 | urc_inv = np.linalg.inv(urc) 80 | 81 | return urc, urc_inv 82 | 83 | 84 | def rotate_pc(pc, rotmat): 85 | """Rotation points w.r.t. rotmat 86 | Args: 87 | pc: np.array (n, 3) 88 | rotmat: np.array (4, 4) 89 | Returns: 90 | pc: (n, 3) 91 | """ 92 | pc_4 = np.ones([pc.shape[0], 4]) 93 | pc_4[:, 0:3] = pc 94 | pc_4 = np.dot(pc_4, np.transpose(rotmat)) 95 | 96 | return pc_4[:, 0:3] 97 | 98 | 99 | def rotate_points_along_z(points, angle): 100 | """Rotation clockwise 101 | Args: 102 | points: np.array of np.array (B, N, 3 + C) or 103 | (N, 3 + C) for single batch 104 | angle: np.array of np.array (B, ) 105 | or (, ) for single batch 106 | angle along z-axis, angle increases x ==> y 107 | Returns: 108 | points_rot: (B, N, 3 + C) or (N, 3 + C) 109 | 110 | """ 111 | single_batch = len(points.shape) == 2 112 | if single_batch: 113 | points = np.expand_dims(points, axis=0) 114 | angle = np.expand_dims(angle, axis=0) 115 | cosa = np.expand_dims(np.cos(angle), axis=1) 116 | sina = np.expand_dims(np.sin(angle), axis=1) 117 | zeros = np.zeros_like(cosa) # angle.new_zeros(points.shape[0]) 118 | ones = np.ones_like(sina) # angle.new_ones(points.shape[0]) 119 | 120 | rot_matrix = ( 121 | np.concatenate((cosa, -sina, zeros, sina, cosa, zeros, zeros, zeros, ones), axis=1) 122 | .reshape(-1, 3, 3) 123 | ) 124 | 125 | # print(rot_matrix.view(3, 3)) 126 | points_rot = np.matmul(points[:, :, :3], rot_matrix) 127 | points_rot = np.concatenate((points_rot, points[:, :, 3:]), axis=-1) 128 | 129 | if single_batch: 130 | points_rot = points_rot.squeeze(0) 131 | 132 | return points_rot 133 | 134 | 135 | def convert_angle_axis_to_matrix3(angle_axis): 136 | """Return a Matrix3 for the angle axis. 137 | Arguments: 138 | angle_axis {Point3} -- a rotation in angle axis form. 139 | """ 140 | matrix, jacobian = cv2.Rodrigues(angle_axis) 141 | return matrix -------------------------------------------------------------------------------- /data/arkitscenes/utils/taxonomy.py: -------------------------------------------------------------------------------- 1 | #TODO: no original categories 2 | # shortened version only 3 | 4 | import copy 5 | import numpy as np 6 | 7 | 8 | # After merging, our label-id to class (string); 9 | class_names = [ 10 | "cabinet", "refrigerator", "shelf", "stove", "bed", # 0..5 11 | "sink", "washer", "toilet", "bathtub", "oven", # 5..10 12 | "dishwasher", "fireplace", "stool", "chair", "table", # 10..15 13 | "tv_monitor", "sofa", # 15..17 14 | ] 15 | 16 | # 3D Anchor-sizes of merged categories (dx, dy, dz) 17 | ''' 18 | Anchor box sizes are computed based on box corner order below: 19 | 6 -------- 7 20 | /| /| 21 | 5 -------- 4 . 22 | | | | | 23 | . 2 -------- 3 24 | |/ |/ 25 | 1 -------- 0 26 | ''' 27 | 28 | 29 | class ARKitDatasetConfig(object): 30 | def __init__(self): 31 | """ 32 | init will set values for: 33 | self.class_names 34 | self.cls2label (after mapping) 35 | self.label2cls (after mapping) 36 | self.num_class 37 | 38 | Args: 39 | """ 40 | # final training/val categories 41 | self.class_names = class_names 42 | self.label2cls = {} 43 | self.cls2label = {} 44 | for i, cls_ in enumerate(class_names): 45 | self.label2cls[i] = cls_ 46 | self.cls2label[cls_] = i 47 | 48 | self.num_class = len(self.class_names) 49 | -------------------------------------------------------------------------------- /data/multiscan/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare MultiScan Data for Indoor 3D Detection 2 | 3 | 1. Download and unzip data from the official [MultiScan](https://github.com/smartscenes/multiscan?tab=readme-ov-file). 4 | 5 | 2. Generate bins and pkls data by running: 6 | 7 | ```bash 8 | python prepare_bins_pkls.py --path_to_pths path_to_unzipped_folder --path_to_save_bins path_to_save_bins 9 | ``` 10 | 11 | Overall you achieve the following file structure in `bins` directory: 12 | ``` 13 | bins 14 | ├── bboxs 15 | │ ├── xxxxx_xx.npy 16 | ├── instance_mask 17 | │ ├── xxxxx_xx.bin 18 | ├── points 19 | │ ├── xxxxx_xx.bin 20 | ├── semantic_mask 21 | │ ├── xxxxx_xx.bin 22 | ├── super_points 23 | │ ├── xxxxx_xx.bin 24 | ├── multiscan_infos_train.pkl 25 | ├── multiscan_infos_val.pkl 26 | ├── multiscan_infos_test.pkl 27 | ``` 28 | -------------------------------------------------------------------------------- /data/s3dis/README.md: -------------------------------------------------------------------------------- 1 | ### Preparation of S3DIS Data for Indoor Detection 2 | 3 | Please follow original mmdetection3d [instruction](https://github.com/open-mmlab/mmdetection3d/tree/22aaa47fdb53ce1870ff92cb7e3f96ae38d17f61/data/s3dis). However, to match the order of points in each scene with our superpoints it will be needed to run `remap_superpoints.py` script. 4 | -------------------------------------------------------------------------------- /data/s3dis/remap_superpoints.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import numpy as np 3 | from sklearn.neighbors import KDTree 4 | import argparse 5 | from tqdm import tqdm 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Remap superpoints from source to destination point cloud data') 9 | parser.add_argument('--src', type=str, required=True, help='Path to source data') 10 | parser.add_argument('--dst', type=str, required=True, help='Path to destination data') 11 | return parser.parse_args() 12 | 13 | args = parse_args() 14 | 15 | src_folder = Path(args.src) 16 | dst_folder = Path(args.dst) 17 | 18 | for src_file in tqdm(src_folder.glob('points/*.bin'), desc="Processing files"): 19 | pcds_src = np.fromfile(src_file, dtype=np.float32).reshape(-1, 6)[:, :3] 20 | sp_src = np.fromfile(src_file.parent.parent / 'super_points' / src_file.name, dtype=np.int64) 21 | 22 | dst_file = dst_folder / 'points' / src_file.name 23 | if dst_file.exists(): 24 | pcds_dst = np.fromfile(dst_file, dtype=np.float32).reshape(-1, 6)[:, :3] 25 | 26 | tree = KDTree(pcds_src) 27 | _, indices = tree.query(pcds_dst, k=1) 28 | sp_dst = sp_src[indices.flatten()] 29 | 30 | dst_sp_file = dst_file.parent.parent / 'super_points' / dst_file.name 31 | dst_sp_file.parent.mkdir(parents=True, exist_ok=True) 32 | sp_dst.astype(np.int64).tofile(dst_sp_file) 33 | else: 34 | print(f"Corresponding file not found in destination folder: {dst_file}") -------------------------------------------------------------------------------- /data/scannet/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare ScanNet Data for Indoor Detection or Segmentation Task 2 | 3 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/). 4 | 5 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Link or move the 'scans' folder to this level of directory. If you are performing segmentation tasks and want to upload the results to its official [benchmark](http://kaldir.vc.in.tum.de/scannet_benchmark/), please also link or move the 'scans_test' folder to this directory. 6 | 7 | 2. In this directory, extract point clouds and annotations by running `python batch_load_scannet_data.py`. Add the `--scannet200` flag if you want to get markup for the ScanNet200 dataset. 8 | 9 | 3. Enter the project root directory, generate training data by running 10 | 11 | ```bash 12 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 13 | ``` 14 |         or for ScanNet200: 15 | 16 | ```bash 17 | mkdir data/scannet200 18 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200 19 | ``` 20 | 21 | The overall process for ScanNet could be achieved through the following script 22 | 23 | ```bash 24 | python batch_load_scannet_data.py 25 | cd ../.. 26 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 27 | ``` 28 | 29 | Or for ScanNet200: 30 | 31 | ```bash 32 | python batch_load_scannet_data.py --scannet200 33 | cd ../.. 34 | mkdir data/scannet200 35 | python tools/create_data.py scannet200 --root-path ./data/scannet --out-dir ./data/scannet200 --extra-tag scannet200 36 | ``` 37 | 38 | The directory structure after pre-processing should be as below 39 | 40 | ``` 41 | scannet 42 | ├── meta_data 43 | ├── batch_load_scannet_data.py 44 | ├── load_scannet_data.py 45 | ├── scannet_utils.py 46 | ├── README.md 47 | ├── scans 48 | ├── scans_test 49 | ├── scannet_instance_data 50 | ├── points 51 | │ ├── xxxxx.bin 52 | ├── instance_mask 53 | │ ├── xxxxx.bin 54 | ├── semantic_mask 55 | │ ├── xxxxx.bin 56 | ├── super_points 57 | │ ├── xxxxx.bin 58 | ├── seg_info 59 | │ ├── train_label_weight.npy 60 | │ ├── train_resampled_scene_idxs.npy 61 | │ ├── val_label_weight.npy 62 | │ ├── val_resampled_scene_idxs.npy 63 | ├── scannet_infos_train.pkl 64 | ├── scannet_infos_val.pkl 65 | ├── scannet_infos_test.pkl 66 | 67 | ``` 68 | -------------------------------------------------------------------------------- /data/scannet/batch_load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Modified from 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/batch_load_scannet_data.py 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | """Batch mode in loading Scannet scenes with vertices and ground truth labels 8 | for semantic and instance segmentations. 9 | 10 | Usage example: python ./batch_load_scannet_data.py 11 | """ 12 | import argparse 13 | import datetime 14 | import os 15 | from os import path as osp 16 | 17 | import torch 18 | import segmentator 19 | import open3d as o3d 20 | import numpy as np 21 | from load_scannet_data import export 22 | 23 | DONOTCARE_CLASS_IDS = np.array([]) 24 | 25 | SCANNET_OBJ_CLASS_IDS = np.array( 26 | [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]) 27 | 28 | SCANNET200_OBJ_CLASS_IDS = np.array([2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 29 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154, 30 | 155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 342, 356, 370, 392, 395, 399, 408, 417, 31 | 488, 540, 562, 570, 572, 581, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191]) 32 | 33 | 34 | 35 | def export_one_scan(scan_name, 36 | output_filename_prefix, 37 | max_num_point, 38 | label_map_file, 39 | scannet_dir, 40 | test_mode=False, 41 | scannet200=False): 42 | mesh_file = osp.join(scannet_dir, scan_name, scan_name + '_vh_clean_2.ply') 43 | agg_file = osp.join(scannet_dir, scan_name, 44 | scan_name + '.aggregation.json') 45 | seg_file = osp.join(scannet_dir, scan_name, 46 | scan_name + '_vh_clean_2.0.010000.segs.json') 47 | # includes axisAlignment info for the train set scans. 48 | meta_file = osp.join(scannet_dir, scan_name, f'{scan_name}.txt') 49 | mesh_vertices, semantic_labels, instance_labels, unaligned_bboxes, \ 50 | aligned_bboxes, instance2semantic, axis_align_matrix = export( 51 | mesh_file, agg_file, seg_file, meta_file, label_map_file, None, 52 | test_mode, scannet200) 53 | 54 | if not test_mode: 55 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 56 | mesh_vertices = mesh_vertices[mask, :] 57 | semantic_labels = semantic_labels[mask] 58 | instance_labels = instance_labels[mask] 59 | 60 | num_instances = len(np.unique(instance_labels)) 61 | print(f'Num of instances: {num_instances}') 62 | if scannet200: 63 | OBJ_CLASS_IDS = SCANNET200_OBJ_CLASS_IDS 64 | else: 65 | OBJ_CLASS_IDS = SCANNET_OBJ_CLASS_IDS 66 | 67 | bbox_mask = np.in1d(unaligned_bboxes[:, -1], OBJ_CLASS_IDS) 68 | unaligned_bboxes = unaligned_bboxes[bbox_mask, :] 69 | bbox_mask = np.in1d(aligned_bboxes[:, -1], OBJ_CLASS_IDS) 70 | aligned_bboxes = aligned_bboxes[bbox_mask, :] 71 | assert unaligned_bboxes.shape[0] == aligned_bboxes.shape[0] 72 | print(f'Num of care instances: {unaligned_bboxes.shape[0]}') 73 | 74 | if max_num_point is not None: 75 | max_num_point = int(max_num_point) 76 | N = mesh_vertices.shape[0] 77 | if N > max_num_point: 78 | choices = np.random.choice(N, max_num_point, replace=False) 79 | mesh_vertices = mesh_vertices[choices, :] 80 | if not test_mode: 81 | semantic_labels = semantic_labels[choices] 82 | instance_labels = instance_labels[choices] 83 | 84 | mesh = o3d.io.read_triangle_mesh(mesh_file) 85 | vertices = torch.from_numpy(np.array(mesh.vertices).astype(np.float32)) 86 | faces = torch.from_numpy(np.array(mesh.triangles).astype(np.int64)) 87 | superpoints = segmentator.segment_mesh(vertices, faces).numpy() 88 | 89 | np.save(f'{output_filename_prefix}_sp_label.npy', superpoints) 90 | np.save(f'{output_filename_prefix}_vert.npy', mesh_vertices) 91 | 92 | if not test_mode: 93 | assert superpoints.shape == semantic_labels.shape 94 | np.save(f'{output_filename_prefix}_sem_label.npy', semantic_labels) 95 | np.save(f'{output_filename_prefix}_ins_label.npy', instance_labels) 96 | np.save(f'{output_filename_prefix}_unaligned_bbox.npy', 97 | unaligned_bboxes) 98 | np.save(f'{output_filename_prefix}_aligned_bbox.npy', aligned_bboxes) 99 | np.save(f'{output_filename_prefix}_axis_align_matrix.npy', 100 | axis_align_matrix) 101 | 102 | 103 | def batch_export(max_num_point, 104 | output_folder, 105 | scan_names_file, 106 | label_map_file, 107 | scannet_dir, 108 | test_mode=False, 109 | scannet200=False): 110 | if test_mode and not os.path.exists(scannet_dir): 111 | # test data preparation is optional 112 | return 113 | if not os.path.exists(output_folder): 114 | print(f'Creating new data folder: {output_folder}') 115 | os.mkdir(output_folder) 116 | 117 | scan_names = [line.rstrip() for line in open(scan_names_file)] 118 | for scan_name in scan_names: 119 | print('-' * 20 + 'begin') 120 | print(datetime.datetime.now()) 121 | print(scan_name) 122 | output_filename_prefix = osp.join(output_folder, scan_name) 123 | if osp.isfile(f'{output_filename_prefix}_vert.npy'): 124 | print('File already exists. skipping.') 125 | print('-' * 20 + 'done') 126 | continue 127 | try: 128 | export_one_scan(scan_name, output_filename_prefix, max_num_point, 129 | label_map_file, scannet_dir, test_mode, scannet200) 130 | except Exception: 131 | print(f'Failed export scan: {scan_name}') 132 | print('-' * 20 + 'done') 133 | 134 | 135 | def main(): 136 | parser = argparse.ArgumentParser() 137 | parser.add_argument( 138 | '--max_num_point', 139 | default=None, 140 | help='The maximum number of the points.') 141 | parser.add_argument( 142 | '--output_folder', 143 | default='./scannet_instance_data', 144 | help='output folder of the result.') 145 | parser.add_argument( 146 | '--train_scannet_dir', default='scans', help='scannet data directory.') 147 | parser.add_argument( 148 | '--test_scannet_dir', 149 | default='scans_test', 150 | help='scannet data directory.') 151 | parser.add_argument( 152 | '--label_map_file', 153 | default='meta_data/scannetv2-labels.combined.tsv', 154 | help='The path of label map file.') 155 | parser.add_argument( 156 | '--train_scan_names_file', 157 | default='meta_data/scannet_train.txt', 158 | help='The path of the file that stores the scan names.') 159 | parser.add_argument( 160 | '--test_scan_names_file', 161 | default='meta_data/scannetv2_test.txt', 162 | help='The path of the file that stores the scan names.') 163 | parser.add_argument( 164 | '--scannet200', 165 | action='store_true', 166 | help='Use it for scannet200 mapping') 167 | args = parser.parse_args() 168 | batch_export( 169 | args.max_num_point, 170 | args.output_folder, 171 | args.train_scan_names_file, 172 | args.label_map_file, 173 | args.train_scannet_dir, 174 | test_mode=False, 175 | scannet200=args.scannet200) 176 | batch_export( 177 | args.max_num_point, 178 | args.output_folder, 179 | args.test_scan_names_file, 180 | args.label_map_file, 181 | args.test_scannet_dir, 182 | test_mode=True, 183 | scannet200=args.scannet200) 184 | 185 | 186 | if __name__ == '__main__': 187 | main() 188 | -------------------------------------------------------------------------------- /data/scannet/load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Modified from 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | """Load Scannet scenes with vertices and ground truth labels for semantic and 8 | instance segmentations.""" 9 | import argparse 10 | import inspect 11 | import json 12 | import os 13 | 14 | import numpy as np 15 | import scannet_utils 16 | 17 | currentdir = os.path.dirname( 18 | os.path.abspath(inspect.getfile(inspect.currentframe()))) 19 | 20 | 21 | def read_aggregation(filename): 22 | assert os.path.isfile(filename) 23 | object_id_to_segs = {} 24 | label_to_segs = {} 25 | with open(filename) as f: 26 | data = json.load(f) 27 | num_objects = len(data['segGroups']) 28 | for i in range(num_objects): 29 | object_id = data['segGroups'][i][ 30 | 'objectId'] + 1 # instance ids should be 1-indexed 31 | label = data['segGroups'][i]['label'] 32 | segs = data['segGroups'][i]['segments'] 33 | object_id_to_segs[object_id] = segs 34 | if label in label_to_segs: 35 | label_to_segs[label].extend(segs) 36 | else: 37 | label_to_segs[label] = segs 38 | return object_id_to_segs, label_to_segs 39 | 40 | 41 | def read_segmentation(filename): 42 | assert os.path.isfile(filename) 43 | seg_to_verts = {} 44 | with open(filename) as f: 45 | data = json.load(f) 46 | num_verts = len(data['segIndices']) 47 | for i in range(num_verts): 48 | seg_id = data['segIndices'][i] 49 | if seg_id in seg_to_verts: 50 | seg_to_verts[seg_id].append(i) 51 | else: 52 | seg_to_verts[seg_id] = [i] 53 | return seg_to_verts, num_verts 54 | 55 | 56 | def extract_bbox(mesh_vertices, object_id_to_segs, object_id_to_label_id, 57 | instance_ids): 58 | num_instances = len(np.unique(list(object_id_to_segs.keys()))) 59 | instance_bboxes = np.zeros((num_instances, 7)) 60 | for obj_id in object_id_to_segs: 61 | label_id = object_id_to_label_id[obj_id] 62 | obj_pc = mesh_vertices[instance_ids == obj_id, 0:3] 63 | if len(obj_pc) == 0: 64 | continue 65 | xyz_min = np.min(obj_pc, axis=0) 66 | xyz_max = np.max(obj_pc, axis=0) 67 | bbox = np.concatenate([(xyz_min + xyz_max) / 2.0, xyz_max - xyz_min, 68 | np.array([label_id])]) 69 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES 70 | instance_bboxes[obj_id - 1, :] = bbox 71 | return instance_bboxes 72 | 73 | 74 | def export(mesh_file, 75 | agg_file, 76 | seg_file, 77 | meta_file, 78 | label_map_file, 79 | output_file=None, 80 | test_mode=False, 81 | scannet200=False): 82 | """Export original files to vert, ins_label, sem_label and bbox file. 83 | 84 | Args: 85 | mesh_file (str): Path of the mesh_file. 86 | agg_file (str): Path of the agg_file. 87 | seg_file (str): Path of the seg_file. 88 | meta_file (str): Path of the meta_file. 89 | label_map_file (str): Path of the label_map_file. 90 | output_file (str): Path of the output folder. 91 | Default: None. 92 | test_mode (bool): Whether is generating test data without labels. 93 | Default: False. 94 | 95 | It returns a tuple, which contains the the following things: 96 | np.ndarray: Vertices of points data. 97 | np.ndarray: Indexes of label. 98 | np.ndarray: Indexes of instance. 99 | np.ndarray: Instance bboxes. 100 | dict: Map from object_id to label_id. 101 | """ 102 | if scannet200: 103 | label_map = scannet_utils.read_label_mapping( 104 | label_map_file, label_from='raw_category', label_to='id') 105 | else: 106 | label_map = scannet_utils.read_label_mapping( 107 | label_map_file, label_from='raw_category', label_to='nyu40id') 108 | 109 | mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file) 110 | 111 | # Load scene axis alignment matrix 112 | lines = open(meta_file).readlines() 113 | # test set data doesn't have align_matrix 114 | axis_align_matrix = np.eye(4) 115 | for line in lines: 116 | if 'axisAlignment' in line: 117 | axis_align_matrix = [ 118 | float(x) 119 | for x in line.rstrip().strip('axisAlignment = ').split(' ') 120 | ] 121 | break 122 | axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4)) 123 | 124 | # perform global alignment of mesh vertices 125 | pts = np.ones((mesh_vertices.shape[0], 4)) 126 | pts[:, 0:3] = mesh_vertices[:, 0:3] 127 | pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 128 | aligned_mesh_vertices = np.concatenate([pts[:, 0:3], mesh_vertices[:, 3:]], 129 | axis=1) 130 | 131 | # Load semantic and instance labels 132 | if not test_mode: 133 | object_id_to_segs, label_to_segs = read_aggregation(agg_file) 134 | seg_to_verts, num_verts = read_segmentation(seg_file) 135 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) 136 | object_id_to_label_id = {} 137 | for label, segs in label_to_segs.items(): 138 | label_id = label_map[label] 139 | for seg in segs: 140 | verts = seg_to_verts[seg] 141 | label_ids[verts] = label_id 142 | instance_ids = np.zeros( 143 | shape=(num_verts), dtype=np.uint32) # 0: unannotated 144 | for object_id, segs in object_id_to_segs.items(): 145 | for seg in segs: 146 | verts = seg_to_verts[seg] 147 | instance_ids[verts] = object_id 148 | if object_id not in object_id_to_label_id: 149 | object_id_to_label_id[object_id] = label_ids[verts][0] 150 | unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs, 151 | object_id_to_label_id, instance_ids) 152 | aligned_bboxes = extract_bbox(aligned_mesh_vertices, object_id_to_segs, 153 | object_id_to_label_id, instance_ids) 154 | else: 155 | label_ids = None 156 | instance_ids = None 157 | unaligned_bboxes = None 158 | aligned_bboxes = None 159 | object_id_to_label_id = None 160 | 161 | if output_file is not None: 162 | np.save(output_file + '_vert.npy', mesh_vertices) 163 | if not test_mode: 164 | np.save(output_file + '_sem_label.npy', label_ids) 165 | np.save(output_file + '_ins_label.npy', instance_ids) 166 | np.save(output_file + '_unaligned_bbox.npy', unaligned_bboxes) 167 | np.save(output_file + '_aligned_bbox.npy', aligned_bboxes) 168 | np.save(output_file + '_axis_align_matrix.npy', axis_align_matrix) 169 | 170 | return mesh_vertices, label_ids, instance_ids, unaligned_bboxes, \ 171 | aligned_bboxes, object_id_to_label_id, axis_align_matrix 172 | 173 | 174 | def main(): 175 | parser = argparse.ArgumentParser() 176 | parser.add_argument( 177 | '--scan_path', 178 | required=True, 179 | help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00') 180 | parser.add_argument('--output_file', required=True, help='output file') 181 | parser.add_argument( 182 | '--label_map_file', 183 | required=True, 184 | help='path to scannetv2-labels.combined.tsv') 185 | parser.add_argument( 186 | '--scannet200', 187 | action='store_true', 188 | help='Use it for scannet200 mapping') 189 | 190 | opt = parser.parse_args() 191 | 192 | scan_name = os.path.split(opt.scan_path)[-1] 193 | mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply') 194 | agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json') 195 | seg_file = os.path.join(opt.scan_path, 196 | scan_name + '_vh_clean_2.0.010000.segs.json') 197 | meta_file = os.path.join( 198 | opt.scan_path, scan_name + 199 | '.txt') # includes axisAlignment info for the train set scans. 200 | export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, 201 | opt.output_file, scannet200=opt.scannet200) 202 | 203 | 204 | if __name__ == '__main__': 205 | main() 206 | -------------------------------------------------------------------------------- /data/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filaPro/unidet3d/940a730a09711b0bf266fd972504da29a83b91f6/data/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /data/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /data/scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 313 | -------------------------------------------------------------------------------- /data/scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | # Modified from 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/scannet_utils.py 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | """Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts 8 | """ 9 | 10 | import csv 11 | import os 12 | 13 | import numpy as np 14 | from plyfile import PlyData 15 | 16 | 17 | def represents_int(s): 18 | """Judge whether string s represents an int. 19 | 20 | Args: 21 | s(str): The input string to be judged. 22 | 23 | Returns: 24 | bool: Whether s represents int or not. 25 | """ 26 | try: 27 | int(s) 28 | return True 29 | except ValueError: 30 | return False 31 | 32 | 33 | def read_label_mapping(filename, 34 | label_from='raw_category', 35 | label_to='nyu40id'): 36 | assert os.path.isfile(filename) 37 | mapping = dict() 38 | with open(filename) as csvfile: 39 | reader = csv.DictReader(csvfile, delimiter='\t') 40 | for row in reader: 41 | mapping[row[label_from]] = int(row[label_to]) 42 | if represents_int(list(mapping.keys())[0]): 43 | mapping = {int(k): v for k, v in mapping.items()} 44 | return mapping 45 | 46 | 47 | def read_mesh_vertices(filename): 48 | """Read XYZ for each vertex. 49 | 50 | Args: 51 | filename(str): The name of the mesh vertices file. 52 | 53 | Returns: 54 | ndarray: Vertices. 55 | """ 56 | assert os.path.isfile(filename) 57 | with open(filename, 'rb') as f: 58 | plydata = PlyData.read(f) 59 | num_verts = plydata['vertex'].count 60 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 61 | vertices[:, 0] = plydata['vertex'].data['x'] 62 | vertices[:, 1] = plydata['vertex'].data['y'] 63 | vertices[:, 2] = plydata['vertex'].data['z'] 64 | return vertices 65 | 66 | 67 | def read_mesh_vertices_rgb(filename): 68 | """Read XYZ and RGB for each vertex. 69 | 70 | Args: 71 | filename(str): The name of the mesh vertices file. 72 | 73 | Returns: 74 | Vertices. Note that RGB values are in 0-255. 75 | """ 76 | assert os.path.isfile(filename) 77 | with open(filename, 'rb') as f: 78 | plydata = PlyData.read(f) 79 | num_verts = plydata['vertex'].count 80 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 81 | vertices[:, 0] = plydata['vertex'].data['x'] 82 | vertices[:, 1] = plydata['vertex'].data['y'] 83 | vertices[:, 2] = plydata['vertex'].data['z'] 84 | vertices[:, 3] = plydata['vertex'].data['red'] 85 | vertices[:, 4] = plydata['vertex'].data['green'] 86 | vertices[:, 5] = plydata['vertex'].data['blue'] 87 | return vertices 88 | -------------------------------------------------------------------------------- /data/scannetpp/README.md: -------------------------------------------------------------------------------- 1 | ## Prepare ScanNet++ Data for Indoor 3D Detection 2 | 3 | 1. Download data from the official [ScanNet++](https://github.com/scannetpp/scannetpp). 4 | 5 | 2. Preprocess raw data by running: 6 | 7 | ```bash 8 | python preprocess_raw_data.py --path_to_data path_to_dataset --output_dir path_to_save_preprocessed_raw_data 9 | ``` 10 | 11 | 3. Generate bins and pkls data by running: 12 | 13 | ```bash 14 | python prepare_bins_pkls.py --path_to_data path_to_preprocessed_raw_data --path_to_save_bins path_to_save_bins 15 | ``` 16 | 17 | Overall you achieve the following file structure in `bins` directory: 18 | ``` 19 | bins 20 | ├── bboxs 21 | │ ├── xxxxx_xx.npy 22 | ├── instance_mask 23 | │ ├── xxxxx_xx.bin 24 | ├── points 25 | │ ├── xxxxx_xx.bin 26 | ├── semantic_mask 27 | │ ├── xxxxx_xx.bin 28 | ├── superpoints 29 | │ ├── xxxxx_xx.bin 30 | ├── scannetpp_infos_train.pkl 31 | ├── scannetpp_infos_val.pkl 32 | ├── scannetpp_infos_test.pkl 33 | ``` 34 | -------------------------------------------------------------------------------- /data/scannetpp/prepare_bins_pkls.py: -------------------------------------------------------------------------------- 1 | import mmengine 2 | import os 3 | from tqdm.auto import tqdm 4 | import numpy as np 5 | import argparse 6 | 7 | OBJ2SEM = {'wall': 0, 8 | 'ceiling': 1, 9 | 'floor': 2, 10 | 'table': 3, 11 | 'door': 4, 12 | 'ceiling lamp': 5, 13 | 'cabinet': 6, 14 | 'blinds': 7, 15 | 'curtain': 8, 16 | 'chair': 9, 17 | 'storage cabinet': 10, 18 | 'office chair': 11, 19 | 'bookshelf': 12, 20 | 'whiteboard': 13, 21 | 'window': 14, 22 | 'box': 15, 23 | 'window frame': 16, 24 | 'monitor': 17, 25 | 'shelf': 18, 26 | 'doorframe': 19, 27 | 'pipe': 20, 28 | 'heater': 21, 29 | 'kitchen cabinet': 22, 30 | 'sofa': 23, 31 | 'windowsill': 24, 32 | 'bed': 25, 33 | 'shower wall': 26, 34 | 'trash can': 27, 35 | 'book': 28, 36 | 'plant': 29, 37 | 'blanket': 30, 38 | 'tv': 31, 39 | 'computer tower': 32, 40 | 'kitchen counter': 33, 41 | 'refrigerator': 34, 42 | 'jacket': 35, 43 | 'electrical duct': 36, 44 | 'sink': 37, 45 | 'bag': 38, 46 | 'picture': 39, 47 | 'pillow': 40, 48 | 'towel': 41, 49 | 'suitcase': 42, 50 | 'backpack': 43, 51 | 'crate': 44, 52 | 'keyboard': 45, 53 | 'rack': 46, 54 | 'toilet': 47, 55 | 'paper': 48, 56 | 'printer': 49, 57 | 'poster': 50, 58 | 'painting': 51, 59 | 'microwave': 52, 60 | 'board': 53, 61 | 'shoes': 54, 62 | 'socket': 55, 63 | 'bottle': 56, 64 | 'bucket': 57, 65 | 'cushion': 58, 66 | 'basket': 59, 67 | 'shoe rack': 60, 68 | 'telephone': 61, 69 | 'file folder': 62, 70 | 'cloth': 63, 71 | 'blind rail': 64, 72 | 'laptop': 65, 73 | 'plant pot': 66, 74 | 'exhaust fan': 67, 75 | 'cup': 68, 76 | 'coat hanger': 69, 77 | 'light switch': 70, 78 | 'speaker': 71, 79 | 'table lamp': 72, 80 | 'air vent': 73, 81 | 'clothes hanger': 74, 82 | 'kettle': 75, 83 | 'smoke detector': 76, 84 | 'container': 77, 85 | 'power strip': 78, 86 | 'slippers': 79, 87 | 'paper bag': 80, 88 | 'mouse': 81, 89 | 'cutting board': 82, 90 | 'toilet paper': 83, 91 | 'paper towel': 84, 92 | 'pot': 85, 93 | 'clock': 86, 94 | 'pan': 87, 95 | 'tap': 88, 96 | 'jar': 89, 97 | 'soap dispenser': 90, 98 | 'binder': 91, 99 | 'bowl': 92, 100 | 'tissue box': 93, 101 | 'whiteboard eraser': 94, 102 | 'toilet brush': 95, 103 | 'spray bottle': 96, 104 | 'headphones': 97, 105 | 'stapler': 98, 106 | 'marker': 99} 107 | 108 | def create_dir(path): 109 | if not os.path.exists(path): 110 | os.mkdir(path) 111 | 112 | def load_txt(path): 113 | res = [] 114 | 115 | with open(path) as f: 116 | for line in tqdm(f): 117 | res.append(line.strip()) 118 | 119 | return res 120 | 121 | def create_dirs(path): 122 | points = os.path.join(path, 'points') 123 | create_dir(points) 124 | 125 | semantic_mask = os.path.join(path, 'semantic_mask') 126 | create_dir(semantic_mask) 127 | 128 | instance_mask = os.path.join(path, 'instance_mask') 129 | create_dir(instance_mask) 130 | 131 | bboxs = os.path.join(path, 'bboxs') 132 | create_dir(bboxs) 133 | 134 | superpoints = os.path.join(path, 'superpoints') 135 | create_dir(superpoints) 136 | return { 137 | 'points': points, 138 | 'semantic_mask': semantic_mask, 139 | 'instance_mask': instance_mask, 140 | 'bboxs': bboxs, 141 | 'superpoints': superpoints 142 | } 143 | 144 | def create_metainfo(): 145 | 146 | return { 147 | 'categories': OBJ2SEM, 148 | 'dataset': 'scannetpp', 149 | 'info_version': '1.0' 150 | } 151 | 152 | def create_data_list(split, splits, bins_path): 153 | 154 | scenes = splits[split] 155 | final_list = [] 156 | for scene in tqdm(scenes): 157 | lidar_points = { 158 | 'num_pts_feats': 6, 159 | 'lidar_path': f'{scene}.bin' 160 | } 161 | raw_bboxs = np.load(os.path.join(bins_path['bboxs'], f'{scene}.npy')) 162 | instances = [] 163 | for rb in raw_bboxs: 164 | if len(rb) == 0: 165 | instances = [] 166 | else: 167 | instances.append({ 168 | 'bbox_3d': rb[:6].tolist(), 169 | 'bbox_label_3d': int(rb[-1]) 170 | }) 171 | final_list.append({ 172 | 'lidar_points': lidar_points, 173 | 'instances': instances, 174 | 'pts_semantic_mask_path': f'{scene}.bin', 175 | 'pts_instance_mask_path': f'{scene}.bin', 176 | 'axis_align_matrix': np.eye(4) 177 | }) 178 | 179 | return final_list 180 | 181 | def create_pkl_file(path_to_save, split, splits, 182 | bins_path, pkl_prefix = 'scannetpp'): 183 | metainfo = create_metainfo() 184 | data_list = create_data_list(split, splits, bins_path) 185 | anno = { 186 | 'metainfo': metainfo, 187 | 'data_list': data_list 188 | } 189 | filename = os.path.join(path_to_save, f'{pkl_prefix}_infos_{split}.pkl') 190 | mmengine.dump(anno, filename, 'pkl') 191 | 192 | if __name__ == '__main__': 193 | parser = argparse.ArgumentParser() 194 | parser.add_argument( 195 | '--path_to_data', 196 | required=True, 197 | help='Path to preprocessed raw data', 198 | type=str, 199 | ) 200 | 201 | parser.add_argument( 202 | '--path_to_save_bins', 203 | required=True, 204 | help='Enter here the path where to save bins and pkls', 205 | type=str, 206 | ) 207 | 208 | args = parser.parse_args() 209 | print(args) 210 | 211 | path_to_raw_data = args.path_to_data 212 | path_to_save_data = args.path_to_save_bins 213 | create_dir(path_to_save_data) 214 | bins_path = create_dirs(path_to_save_data) 215 | 216 | path_to_train_ids = os.path.join(path_to_raw_data, 'nvs_sem_train.txt') 217 | train_scenes = load_txt(path_to_train_ids) 218 | path_to_val_ids = os.path.join(path_to_raw_data, 'nvs_sem_val.txt') 219 | val_scenes = load_txt(path_to_val_ids) 220 | path_to_sem_test_ids = os.path.join(path_to_raw_data, 'sem_test.txt') 221 | test_scenes = load_txt(path_to_sem_test_ids) 222 | 223 | splits = { 224 | 'train': train_scenes, 225 | 'val': val_scenes, 226 | 'test': test_scenes 227 | } 228 | 229 | path_to_raw_data = os.path.join(path_to_raw_data, 'data') 230 | scene_ids = os.listdir(path_to_raw_data) 231 | 232 | for si in tqdm(scene_ids): 233 | temp_path = os.path.join(path_to_raw_data, si) 234 | point_cloud = np.load(temp_path + f'/{si}_point_cloud.npy') 235 | sem_label = np.load(temp_path + f'/{si}_semantic.npy') 236 | ins_label = np.load(temp_path + f'/{si}_instance.npy') 237 | bboxs = np.load(temp_path + f'/{si}_bboxs.npy') 238 | superpoints = np.load(temp_path + f'/{si}_superpoints.npy') 239 | 240 | point_cloud.astype(np.float32).tofile( 241 | os.path.join(bins_path['points'], f'{si}.bin')) 242 | sem_label.astype(np.int64).tofile( 243 | os.path.join(bins_path['semantic_mask'], f'{si}.bin')) 244 | ins_label.astype(np.int64).tofile( 245 | os.path.join(bins_path['instance_mask'], f'{si}.bin')) 246 | superpoints.astype(np.int64).tofile( 247 | os.path.join(bins_path['superpoints'], f'{si}.bin')) 248 | np.save(os.path.join(bins_path['bboxs'], f'{si}.npy'), bboxs) 249 | 250 | create_pkl_file(path_to_save_data, 'train', splits, bins_path) 251 | create_pkl_file(path_to_save_data, 'val', splits, bins_path) 252 | create_pkl_file(path_to_save_data, 'test', splits, bins_path) 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | -------------------------------------------------------------------------------- /data/scannetpp/preprocess_raw_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | import numpy as np 5 | from plyfile import PlyData 6 | import pandas as pd 7 | from tqdm.auto import tqdm 8 | from concurrent.futures import ProcessPoolExecutor 9 | import shutil 10 | import segmentator 11 | import torch 12 | import trimesh 13 | 14 | POINT_CLOUD_PFX = "mesh_aligned_0.05.ply" 15 | SEGMENTS_ANNO_PFX = "segments_anno.json" 16 | 17 | def _handle_id(scene_id): 18 | print(f'Processing: {scene_id}') 19 | if not os.path.isdir(os.path.join(PATH_TO_IDS, scene_id, 'scans')): 20 | return 21 | 22 | point_cloud, _ = read_plymesh(os.path.join(PATH_TO_IDS, scene_id, 23 | 'scans', POINT_CLOUD_PFX)) 24 | 25 | mesh = trimesh.load_mesh(os.path.join(PATH_TO_IDS, scene_id, 26 | 'scans', POINT_CLOUD_PFX)) 27 | vertices = mesh.vertices 28 | faces = mesh.faces 29 | 30 | vertices = torch.from_numpy(vertices.astype(np.float32)) 31 | faces = torch.from_numpy(faces.astype(np.int64)) 32 | super_points = segmentator.segment_mesh(vertices, faces).numpy() 33 | 34 | mapping_superpoints = {tuple(i.tolist()): 35 | super_points[idx] for idx, i in enumerate(vertices)} 36 | super_points = np.array([mapping_superpoints[tuple(i.tolist())] 37 | for i in point_cloud[:, :3]]) 38 | 39 | assert point_cloud.shape[1] == 6 40 | assert point_cloud.shape[0] == super_points.shape[0] 41 | 42 | semantic = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated 43 | instance = np.zeros((point_cloud.shape[0], 1)) - 1 # -1: unannotated 44 | if scene_id in TRAIN_IDS or scene_id in VAL_IDS: 45 | seg_anno = load_json(os.path.join(PATH_TO_IDS, scene_id, 46 | 'scans', SEGMENTS_ANNO_PFX)) 47 | seg_groups = seg_anno['segGroups'] 48 | obj_idx = 0 49 | bboxs = [] 50 | for idx, group in enumerate(seg_groups): 51 | label = group['label'] 52 | segments = np.array(group['segments']) 53 | 54 | if label in TOP100SEM2ID: 55 | new_label = label 56 | 57 | elif label in SEMANTIC_MAP_TO and label not in TOP100SEM2ID: 58 | if SEMANTIC_MAP_TO[label] in TOP100SEM2ID: 59 | new_label = SEMANTIC_MAP_TO[label] 60 | else: 61 | continue 62 | else: 63 | continue 64 | 65 | label_id = TOP100SEM2ID[new_label] 66 | 67 | point_segments = point_cloud[segments] 68 | instance[segments] = obj_idx 69 | semantic[segments] = label_id 70 | xmin = np.min(point_segments[:,0]) 71 | ymin = np.min(point_segments[:,1]) 72 | zmin = np.min(point_segments[:,2]) 73 | xmax = np.max(point_segments[:,0]) 74 | ymax = np.max(point_segments[:,1]) 75 | zmax = np.max(point_segments[:,2]) 76 | 77 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, 78 | xmax-xmin, ymax-ymin, zmax-zmin, label_id]) 79 | 80 | bboxs.append(bbox) 81 | obj_idx += 1 82 | 83 | bboxs = np.stack(bboxs) 84 | data = { 85 | 'point_cloud': point_cloud, 86 | 'semantic': semantic[:, 0].astype(int), 87 | 'instance': instance[:, 0].astype(int), 88 | 'bboxs': bboxs, 89 | 'super_points': super_points 90 | } 91 | 92 | elif scene_id in SEM_TEST_IDS: 93 | 94 | data = { 95 | 'point_cloud': point_cloud, 96 | 'semantic': semantic[:, 0].astype(int), 97 | 'instance': instance[:, 0].astype(int), 98 | 'bboxs': np.zeros((0,7)), 99 | 'super_points': super_points 100 | } 101 | 102 | output_path = os.path.join(OUTPUT_DIR_DATA, f'{scene_id}') 103 | create_dir(os.path.join(output_path)) 104 | output_prefix = os.path.join(output_path, f'{scene_id}') 105 | np.save(output_prefix+'_point_cloud.npy', data['point_cloud']) 106 | np.save(output_prefix+'_semantic.npy', data['semantic']) 107 | np.save(output_prefix+'_instance.npy', data['instance']) 108 | np.save(output_prefix+'_bboxs.npy', data['bboxs']) 109 | np.save(output_prefix+'_superpoints.npy', data['super_points']) 110 | 111 | def create_dir(path): 112 | if not os.path.exists(path): 113 | os.mkdir(path) 114 | 115 | def load_json(path): 116 | with open(path) as jd: 117 | return json.load(jd) 118 | 119 | def load_txt(path): 120 | res = [] 121 | 122 | with open(path) as f: 123 | for line in tqdm(f): 124 | res.append(line.strip()) 125 | 126 | return res 127 | 128 | def read_plymesh(filepath): 129 | """Read ply file and return it as numpy array. Returns None if emtpy.""" 130 | with open(filepath, 'rb') as f: 131 | plydata = PlyData.read(f) 132 | if plydata.elements: 133 | vertices = pd.DataFrame(plydata['vertex'].data).values 134 | faces = np.array([f[0] for f in plydata["face"].data]) 135 | return vertices, faces 136 | 137 | 138 | if __name__ == '__main__': 139 | parser = argparse.ArgumentParser() 140 | parser.add_argument( 141 | '--path_to_data', 142 | required=True, 143 | help='Path to raw data', 144 | type=str, 145 | ) 146 | 147 | parser.add_argument( 148 | '--output_dir', 149 | required=True, 150 | help='Path to save preprocessed raw data', 151 | type=str, 152 | ) 153 | 154 | parser.add_argument('--num_workers', default=20, type=int, 155 | help='The number of parallel workers') 156 | 157 | args = parser.parse_args() 158 | print(args) 159 | PATH_TO_DATA = args.path_to_data 160 | PATH_TO_IDS = os.path.join(PATH_TO_DATA, 'data') 161 | OUTPUT_DIR = args.output_dir 162 | create_dir(OUTPUT_DIR) 163 | 164 | OUTPUT_DIR_DATA = os.path.join(OUTPUT_DIR, 'data') 165 | create_dir(OUTPUT_DIR_DATA) 166 | 167 | TOP100SEM2ID = {} 168 | with open(os.path.join(PATH_TO_DATA , 169 | 'metadata/semantic_benchmark/top100.txt')) as f: 170 | # check = f.read() 171 | for idx, line in enumerate(f): 172 | line = line.strip() 173 | TOP100SEM2ID[line] = idx 174 | 175 | TOPINST2ID = {} 176 | with open(os.path.join(PATH_TO_DATA, 177 | 'metadata/semantic_benchmark/top100_instance.txt')) as f: 178 | for idx, line in enumerate(f): 179 | line = line.strip() 180 | TOPINST2ID[line] = TOP100SEM2ID[line] 181 | 182 | MAPPING_BENCH = pd.read_csv(os.path.join(PATH_TO_DATA, 183 | 'metadata/semantic_benchmark/map_benchmark.csv')) 184 | SEMANTIC_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['semantic_map_to'].isna()] 185 | INSTANCE_MAP_TO = MAPPING_BENCH[~MAPPING_BENCH['instance_map_to'].isna()] 186 | 187 | SEMANTIC_MAP_TO = SEMANTIC_MAP_TO[['class','semantic_map_to']].values 188 | SEMANTIC_MAP_TO = dict(zip(SEMANTIC_MAP_TO[:, 0], SEMANTIC_MAP_TO[:, 1])) 189 | print(len(SEMANTIC_MAP_TO)) 190 | 191 | INSTANCE_MAP_TO = INSTANCE_MAP_TO[['class','instance_map_to']].values 192 | INSTANCE_MAP_TO = dict(zip(INSTANCE_MAP_TO[:, 0], INSTANCE_MAP_TO[:, 1])) 193 | print(len(INSTANCE_MAP_TO)) 194 | 195 | SCENE_IDS = os.listdir(os.path.join(PATH_TO_DATA, 'data')) 196 | SCENE_IDS.remove('.ipynb_checkpoints') 197 | 198 | assert len(SCENE_IDS) == 380 199 | 200 | path_to_train_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_train.txt') 201 | TRAIN_IDS = load_txt(path_to_train_ids) 202 | path_to_val_ids = os.path.join(PATH_TO_DATA, 'splits', 'nvs_sem_val.txt') 203 | VAL_IDS = load_txt(path_to_val_ids) 204 | path_to_sem_test_ids = os.path.join(PATH_TO_DATA, 'splits', 'sem_test.txt') 205 | SEM_TEST_IDS = load_txt(path_to_sem_test_ids) 206 | 207 | shutil.copytree(os.path.join(PATH_TO_DATA, 'splits'), 208 | OUTPUT_DIR, dirs_exist_ok=True) 209 | 210 | pool = ProcessPoolExecutor(max_workers=args.num_workers) 211 | print('Processing scenes...') 212 | _ = list(pool.map(_handle_id, SCENE_IDS)) 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /tools/create_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from os import path as osp 4 | 5 | from indoor_converter import create_indoor_info_file 6 | from update_infos_to_v2 import update_pkl_infos 7 | 8 | 9 | def scannet_data_prep(root_path, info_prefix, out_dir, workers): 10 | """Prepare the info file for scannet dataset. 11 | 12 | Args: 13 | root_path (str): Path of dataset root. 14 | info_prefix (str): The prefix of info filenames. 15 | out_dir (str): Output directory of the generated info file. 16 | workers (int): Number of threads to be used. 17 | """ 18 | create_indoor_info_file( 19 | root_path, info_prefix, out_dir, workers=workers) 20 | info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') 21 | info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') 22 | info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') 23 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_train_path) 24 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_val_path) 25 | update_pkl_infos(info_prefix, out_dir=out_dir, pkl_path=info_test_path) 26 | 27 | 28 | parser = argparse.ArgumentParser(description='Data converter arg parser') 29 | parser.add_argument('dataset', metavar='kitti', help='name of the dataset') 30 | parser.add_argument( 31 | '--root-path', 32 | type=str, 33 | default='./data/kitti', 34 | help='specify the root path of dataset') 35 | parser.add_argument( 36 | '--out-dir', 37 | type=str, 38 | default='./data/kitti', 39 | required=False, 40 | help='name of info pkl') 41 | parser.add_argument('--extra-tag', type=str, default='kitti') 42 | parser.add_argument( 43 | '--workers', type=int, default=4, help='number of threads to be used') 44 | args = parser.parse_args() 45 | 46 | if __name__ == '__main__': 47 | from mmdet3d.utils import register_all_modules 48 | register_all_modules() 49 | 50 | if args.dataset in ('scannet', 'scannet200'): 51 | scannet_data_prep( 52 | root_path=args.root_path, 53 | info_prefix=args.extra_tag, 54 | out_dir=args.out_dir, 55 | workers=args.workers) 56 | else: 57 | raise NotImplementedError(f'Don\'t support {args.dataset} dataset.') 58 | -------------------------------------------------------------------------------- /tools/indoor_converter.py: -------------------------------------------------------------------------------- 1 | # Modified from mmdetection3d/tools/dataset_converters/indoor_converter.py 2 | # We just support ScanNet 200. 3 | import os 4 | 5 | import mmengine 6 | 7 | from scannet_data_utils import ScanNetData 8 | 9 | 10 | def create_indoor_info_file(data_path, 11 | pkl_prefix='sunrgbd', 12 | save_path=None, 13 | use_v1=False, 14 | workers=4): 15 | """Create indoor information file. 16 | 17 | Get information of the raw data and save it to the pkl file. 18 | 19 | Args: 20 | data_path (str): Path of the data. 21 | pkl_prefix (str, optional): Prefix of the pkl to be saved. 22 | Default: 'sunrgbd'. 23 | save_path (str, optional): Path of the pkl to be saved. Default: None. 24 | use_v1 (bool, optional): Whether to use v1. Default: False. 25 | workers (int, optional): Number of threads to be used. Default: 4. 26 | """ 27 | assert os.path.exists(data_path) 28 | assert pkl_prefix in ['scannet', 'scannet200'], \ 29 | f'unsupported indoor dataset {pkl_prefix}' 30 | save_path = data_path if save_path is None else save_path 31 | assert os.path.exists(save_path) 32 | 33 | # generate infos for both detection and segmentation task 34 | train_filename = os.path.join( 35 | save_path, f'{pkl_prefix}_infos_train.pkl') 36 | val_filename = os.path.join( 37 | save_path, f'{pkl_prefix}_infos_val.pkl') 38 | test_filename = os.path.join( 39 | save_path, f'{pkl_prefix}_infos_test.pkl') 40 | if pkl_prefix == 'scannet': 41 | # ScanNet has a train-val-test split 42 | train_dataset = ScanNetData(root_path=data_path, split='train') 43 | val_dataset = ScanNetData(root_path=data_path, split='val') 44 | test_dataset = ScanNetData(root_path=data_path, split='test') 45 | else: # ScanNet200 46 | # ScanNet has a train-val-test split 47 | train_dataset = ScanNetData(root_path=data_path, split='train', 48 | scannet200=True, save_path=save_path) 49 | val_dataset = ScanNetData(root_path=data_path, split='val', 50 | scannet200=True, save_path=save_path) 51 | test_dataset = ScanNetData(root_path=data_path, split='test', 52 | scannet200=True, save_path=save_path) 53 | 54 | infos_train = train_dataset.get_infos( 55 | num_workers=workers, has_label=True) 56 | mmengine.dump(infos_train, train_filename, 'pkl') 57 | print(f'{pkl_prefix} info train file is saved to {train_filename}') 58 | 59 | infos_val = val_dataset.get_infos( 60 | num_workers=workers, has_label=True) 61 | mmengine.dump(infos_val, val_filename, 'pkl') 62 | print(f'{pkl_prefix} info val file is saved to {val_filename}') 63 | 64 | infos_test = test_dataset.get_infos( 65 | num_workers=workers, has_label=False) 66 | mmengine.dump(infos_test, test_filename, 'pkl') 67 | print(f'{pkl_prefix} info test file is saved to {test_filename}') 68 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # This is an exact copy of tools/test.py from open-mmlab/mmdetection3d. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | 6 | from mmengine.config import Config, ConfigDict, DictAction 7 | from mmengine.registry import RUNNERS 8 | from mmengine.runner import Runner 9 | 10 | from mmdet3d.utils import replace_ceph_backend 11 | 12 | 13 | # TODO: support fuse_conv_bn and format_only 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='MMDet3D test (and eval) a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('checkpoint', help='checkpoint file') 19 | parser.add_argument( 20 | '--work-dir', 21 | help='the directory to save the file containing evaluation metrics') 22 | parser.add_argument( 23 | '--ceph', action='store_true', help='Use ceph as data storage backend') 24 | parser.add_argument( 25 | '--show', action='store_true', help='show prediction results') 26 | parser.add_argument( 27 | '--show-dir', 28 | help='directory where painted images will be saved. ' 29 | 'If specified, it will be automatically saved ' 30 | 'to the work_dir/timestamp/show_dir') 31 | parser.add_argument( 32 | '--score-thr', type=float, default=0.1, help='bbox score threshold') 33 | parser.add_argument( 34 | '--task', 35 | type=str, 36 | choices=[ 37 | 'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg', 38 | 'multi-modality_det' 39 | ], 40 | help='Determine the visualization method depending on the task.') 41 | parser.add_argument( 42 | '--wait-time', type=float, default=2, help='the interval of show (s)') 43 | parser.add_argument( 44 | '--cfg-options', 45 | nargs='+', 46 | action=DictAction, 47 | help='override some settings in the used config, the key-value pair ' 48 | 'in xxx=yyy format will be merged into config file. If the value to ' 49 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 50 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 51 | 'Note that the quotation marks are necessary and that no white space ' 52 | 'is allowed.') 53 | parser.add_argument( 54 | '--launcher', 55 | choices=['none', 'pytorch', 'slurm', 'mpi'], 56 | default='none', 57 | help='job launcher') 58 | parser.add_argument( 59 | '--tta', action='store_true', help='Test time augmentation') 60 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` 61 | # will pass the `--local-rank` parameter to `tools/test.py` instead 62 | # of `--local_rank`. 63 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0) 64 | args = parser.parse_args() 65 | if 'LOCAL_RANK' not in os.environ: 66 | os.environ['LOCAL_RANK'] = str(args.local_rank) 67 | return args 68 | 69 | 70 | def trigger_visualization_hook(cfg, args): 71 | default_hooks = cfg.default_hooks 72 | if 'visualization' in default_hooks: 73 | visualization_hook = default_hooks['visualization'] 74 | # Turn on visualization 75 | visualization_hook['draw'] = True 76 | if args.show: 77 | visualization_hook['show'] = True 78 | visualization_hook['wait_time'] = args.wait_time 79 | if args.show_dir: 80 | visualization_hook['test_out_dir'] = args.show_dir 81 | all_task_choices = [ 82 | 'mono_det', 'multi-view_det', 'lidar_det', 'lidar_seg', 83 | 'multi-modality_det' 84 | ] 85 | assert args.task in all_task_choices, 'You must set '\ 86 | f"'--task' in {all_task_choices} in the command " \ 87 | 'if you want to use visualization hook' 88 | visualization_hook['vis_task'] = args.task 89 | visualization_hook['score_thr'] = args.score_thr 90 | else: 91 | raise RuntimeError( 92 | 'VisualizationHook must be included in default_hooks.' 93 | 'refer to usage ' 94 | '"visualization=dict(type=\'VisualizationHook\')"') 95 | 96 | return cfg 97 | 98 | 99 | def main(): 100 | args = parse_args() 101 | 102 | # load config 103 | cfg = Config.fromfile(args.config) 104 | 105 | # TODO: We will unify the ceph support approach with other OpenMMLab repos 106 | if args.ceph: 107 | cfg = replace_ceph_backend(cfg) 108 | 109 | cfg.launcher = args.launcher 110 | if args.cfg_options is not None: 111 | cfg.merge_from_dict(args.cfg_options) 112 | 113 | # work_dir is determined in this priority: CLI > segment in file > filename 114 | if args.work_dir is not None: 115 | # update configs according to CLI args if args.work_dir is not None 116 | cfg.work_dir = args.work_dir 117 | elif cfg.get('work_dir', None) is None: 118 | # use config filename as default work_dir if cfg.work_dir is None 119 | cfg.work_dir = osp.join('./work_dirs', 120 | osp.splitext(osp.basename(args.config))[0]) 121 | 122 | cfg.load_from = args.checkpoint 123 | 124 | if args.show or args.show_dir: 125 | # cfg = trigger_visualization_hook(cfg, args) 126 | cfg.test_evaluator['vis_dir'] = args.show_dir 127 | 128 | if args.tta: 129 | # Currently, we only support tta for 3D segmentation 130 | # TODO: Support tta for 3D detection 131 | assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.' 132 | assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` in config.' 133 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline 134 | cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model) 135 | 136 | # build the runner from config 137 | if 'runner_type' not in cfg: 138 | # build the default runner 139 | runner = Runner.from_cfg(cfg) 140 | else: 141 | # build customized runner from the registry 142 | # if 'runner_type' is set in the cfg 143 | runner = RUNNERS.build(cfg) 144 | 145 | # start testing 146 | runner.test() 147 | 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # This is an exact copy of tools/train.py from open-mmlab/mmdetection3d. 2 | import argparse 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | from mmengine.config import Config, DictAction 8 | from mmengine.logging import print_log 9 | from mmengine.registry import RUNNERS 10 | from mmengine.runner import Runner 11 | 12 | from mmdet3d.utils import replace_ceph_backend 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a 3D detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument('--work-dir', help='the dir to save logs and models') 19 | parser.add_argument( 20 | '--amp', 21 | action='store_true', 22 | default=False, 23 | help='enable automatic-mixed-precision training') 24 | parser.add_argument( 25 | '--auto-scale-lr', 26 | action='store_true', 27 | help='enable automatically scaling LR.') 28 | parser.add_argument( 29 | '--resume', 30 | nargs='?', 31 | type=str, 32 | const='auto', 33 | help='If specify checkpoint path, resume from it, while if not ' 34 | 'specify, try to auto resume from the latest checkpoint ' 35 | 'in the work directory.') 36 | parser.add_argument( 37 | '--ceph', action='store_true', help='Use ceph as data storage backend') 38 | parser.add_argument( 39 | '--cfg-options', 40 | nargs='+', 41 | action=DictAction, 42 | help='override some settings in the used config, the key-value pair ' 43 | 'in xxx=yyy format will be merged into config file. If the value to ' 44 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 45 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 46 | 'Note that the quotation marks are necessary and that no white space ' 47 | 'is allowed.') 48 | parser.add_argument( 49 | '--launcher', 50 | choices=['none', 'pytorch', 'slurm', 'mpi'], 51 | default='none', 52 | help='job launcher') 53 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` 54 | # will pass the `--local-rank` parameter to `tools/train.py` instead 55 | # of `--local_rank`. 56 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0) 57 | args = parser.parse_args() 58 | if 'LOCAL_RANK' not in os.environ: 59 | os.environ['LOCAL_RANK'] = str(args.local_rank) 60 | return args 61 | 62 | 63 | def main(): 64 | args = parse_args() 65 | 66 | # load config 67 | cfg = Config.fromfile(args.config) 68 | 69 | # TODO: We will unify the ceph support approach with other OpenMMLab repos 70 | if args.ceph: 71 | cfg = replace_ceph_backend(cfg) 72 | 73 | cfg.launcher = args.launcher 74 | if args.cfg_options is not None: 75 | cfg.merge_from_dict(args.cfg_options) 76 | 77 | # work_dir is determined in this priority: CLI > segment in file > filename 78 | if args.work_dir is not None: 79 | # update configs according to CLI args if args.work_dir is not None 80 | cfg.work_dir = args.work_dir 81 | elif cfg.get('work_dir', None) is None: 82 | # use config filename as default work_dir if cfg.work_dir is None 83 | cfg.work_dir = osp.join('./work_dirs', 84 | osp.splitext(osp.basename(args.config))[0]) 85 | 86 | # enable automatic-mixed-precision training 87 | if args.amp is True: 88 | optim_wrapper = cfg.optim_wrapper.type 89 | if optim_wrapper == 'AmpOptimWrapper': 90 | print_log( 91 | 'AMP training is already enabled in your config.', 92 | logger='current', 93 | level=logging.WARNING) 94 | else: 95 | assert optim_wrapper == 'OptimWrapper', ( 96 | '`--amp` is only supported when the optimizer wrapper type is ' 97 | f'`OptimWrapper` but got {optim_wrapper}.') 98 | cfg.optim_wrapper.type = 'AmpOptimWrapper' 99 | cfg.optim_wrapper.loss_scale = 'dynamic' 100 | 101 | # enable automatically scaling LR 102 | if args.auto_scale_lr: 103 | if 'auto_scale_lr' in cfg and \ 104 | 'enable' in cfg.auto_scale_lr and \ 105 | 'base_batch_size' in cfg.auto_scale_lr: 106 | cfg.auto_scale_lr.enable = True 107 | else: 108 | raise RuntimeError('Can not find "auto_scale_lr" or ' 109 | '"auto_scale_lr.enable" or ' 110 | '"auto_scale_lr.base_batch_size" in your' 111 | ' configuration file.') 112 | 113 | # resume is determined in this priority: resume from > auto_resume 114 | if args.resume == 'auto': 115 | cfg.resume = True 116 | cfg.load_from = None 117 | elif args.resume is not None: 118 | cfg.resume = True 119 | cfg.load_from = args.resume 120 | 121 | # build the runner from config 122 | if 'runner_type' not in cfg: 123 | # build the default runner 124 | runner = Runner.from_cfg(cfg) 125 | else: 126 | # build customized runner from the registry 127 | # if 'runner_type' is set in the cfg 128 | runner = RUNNERS.build(cfg) 129 | 130 | # start training 131 | runner.train() 132 | 133 | 134 | if __name__ == '__main__': 135 | main() 136 | -------------------------------------------------------------------------------- /unidet3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .unidet3d import UniDet3D 2 | from .spconv_unet import SpConvUNet 3 | from .encoder import UniDet3DEncoder 4 | from .criterion import UniDet3DCriterion 5 | from .loading import LoadAnnotations3D_, NormalizePointsColor_, DenormalizePointsColor 6 | from .formatting import Pack3DDetInputs_ 7 | from .transforms_3d import PointDetClassMappingScanNet 8 | from .data_preprocessor import Det3DDataPreprocessor_ 9 | from .scannet_dataset import ScanNetSegDataset_, ScanNetDetDataset 10 | from .s3dis_dataset import S3DISSegDetDataset 11 | from .arkitscenes_dataset import ARKitScenesOfflineDataset 12 | from .multiscan_dataset import MultiScan_ 13 | from .rscan_dataset import ThreeRScan_ 14 | from .scannetpp_dataset import Scannetpp_ 15 | from .structures import InstanceData_ 16 | from .axis_aligned_iou_loss import UniDet3DAxisAlignedIoULoss 17 | from .rotated_iou_loss import UniDet3DRotatedIoU3DLoss 18 | from .indoor_metric import IndoorMetric_ 19 | from .concat_dataset import ConcatDataset_ -------------------------------------------------------------------------------- /unidet3d/arkitscenes_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | 4 | from mmdet3d.registry import DATASETS 5 | from mmdet3d.datasets import Det3DDataset 6 | from mmdet3d.structures import DepthInstance3DBoxes 7 | from mmengine.logging import print_log 8 | import logging 9 | import numpy as np 10 | 11 | @DATASETS.register_module() 12 | class ARKitScenesOfflineDataset(Det3DDataset): 13 | r"""ARKitScenes dataset (offline benchmark). 14 | 15 | Args: 16 | partition(float): Defaults to 1, the part of 17 | the dataset that will be used. 18 | data_prefix (dict): Prefix for data. Defaults to 19 | dict(pts='offline_prepared_data'). 20 | box_type_3d (str): Type of 3D box of this dataset. 21 | Based on the `box_type_3d`, the dataset will encapsulate the box 22 | to its original format then converted them to `box_type_3d`. 23 | Defaults to 'Depth'. 24 | """ 25 | METAINFO = { 26 | 'classes': ('cabinet', 'refrigerator', 'shelf', 'stove', 'bed', 27 | 'sink', 'washer', 'toilet', 'bathtub', 'oven', 28 | 'dishwasher', 'fireplace', 'stool', 'chair', 'table', 29 | 'tv_monitor', 'sofa') 30 | } 31 | 32 | def __init__(self, 33 | partition: float = 1, 34 | data_prefix: dict = dict(pts='offline_prepared_data'), 35 | box_type_3d: str = 'Depth', 36 | **kwargs) -> None: 37 | self.partition = partition 38 | super().__init__( 39 | data_prefix=data_prefix, 40 | box_type_3d=box_type_3d, 41 | **kwargs) 42 | 43 | def parse_ann_info(self, info: dict) -> dict: 44 | """Process the `instances` in data info to `ann_info`. 45 | 46 | Args: 47 | info (dict): Info dict. 48 | 49 | Returns: 50 | dict: Processed `ann_info` 51 | """ 52 | ann_info = super().parse_ann_info(info) 53 | # empty gt 54 | if ann_info is None: 55 | ann_info = dict() 56 | ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32) 57 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) 58 | 59 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( 60 | ann_info['gt_bboxes_3d'], 61 | origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) 62 | 63 | return ann_info 64 | 65 | def parse_data_info(self, info: dict) -> dict: 66 | """Process the raw data info. 67 | 68 | Args: 69 | info (dict): Raw info dict. 70 | 71 | Returns: 72 | dict: Has `ann_info` in training stage. And 73 | all path has been converted to absolute path. 74 | """ 75 | info['super_pts_path'] = osp.join( 76 | self.data_prefix.get('sp_pts_mask', ''), 77 | info['lidar_points']['lidar_path']) #info['super_pts_path'] 78 | 79 | info = super().parse_data_info(info) 80 | 81 | return info 82 | 83 | def __getitem__(self, idx: int) -> dict: 84 | """Get the idx-th image and data information of dataset after 85 | ``self.pipeline``, and ``full_init`` will be called if the dataset has 86 | not been fully initialized. 87 | 88 | During training phase, if ``self.pipeline`` get ``None``, 89 | ``self._rand_another`` will be called until a valid image is fetched or 90 | the maximum limit of refetech is reached. 91 | 92 | Args: 93 | idx (int): The index of self.data_list. 94 | 95 | Returns: 96 | dict: The idx-th image and data information of dataset after 97 | ``self.pipeline``. 98 | """ 99 | # Performing full initialization by calling `__getitem__` will consume 100 | # extra memory. If a dataset is not fully initialized by setting 101 | # `lazy_init=True` and then fed into the dataloader. Different workers 102 | # will simultaneously read and parse the annotation. It will cost more 103 | # time and memory, although this may work. Therefore, it is recommended 104 | # to manually call `full_init` before dataset fed into dataloader to 105 | # ensure all workers use shared RAM from master process. 106 | 107 | if not self.test_mode: 108 | if self.serialize_data: 109 | dataset_len = len(self.data_address) 110 | else: 111 | dataset_len = len(self.data_list) 112 | idx = np.random.randint(0, dataset_len) 113 | if not self._fully_initialized: 114 | print_log( 115 | 'Please call `full_init()` method manually to accelerate ' 116 | 'the speed.', 117 | logger='current', 118 | level=logging.WARNING) 119 | self.full_init() 120 | 121 | if self.test_mode: 122 | data = self.prepare_data(idx) 123 | if data is None: 124 | raise Exception('Test time pipline should not get `None` ' 125 | 'data_sample') 126 | return data 127 | 128 | for _ in range(self.max_refetch + 1): 129 | data = self.prepare_data(idx) 130 | # Broken images or random augmentations may cause the returned data 131 | # to be None 132 | if data is None: 133 | idx = self._rand_another() 134 | continue 135 | return data 136 | 137 | def __len__(self) -> int: 138 | """Get the length of filtered dataset and automatically call 139 | ``full_init`` if the dataset has not been fully init. 140 | 141 | Returns: 142 | int: The length of filtered dataset. 143 | """ 144 | 145 | if self.serialize_data: 146 | dataset_len = len(self.data_address) 147 | else: 148 | dataset_len = len(self.data_list) 149 | if not self.test_mode: 150 | return int(self.partition * dataset_len) 151 | else: 152 | return dataset_len -------------------------------------------------------------------------------- /unidet3d/axis_aligned_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import Optional 3 | 4 | import torch 5 | from mmdet.models.losses.utils import weighted_loss 6 | from torch import Tensor 7 | from torch import nn as nn 8 | 9 | from mmdet3d.models import axis_aligned_iou_loss 10 | from mmdet3d.registry import MODELS 11 | from mmdet3d.structures import AxisAlignedBboxOverlaps3D 12 | 13 | 14 | @weighted_loss 15 | def axis_aligned_diou_loss(pred: Tensor, target: Tensor) -> Tensor: 16 | """Calculate the DIoU loss (1-DIoU) of two sets of axis aligned bounding 17 | boxes. Note that predictions and targets are one-to-one corresponded. 18 | 19 | Args: 20 | pred (torch.Tensor): Bbox predictions with shape [..., 6] 21 | (x1, y1, z1, x2, y2, z2). 22 | target (torch.Tensor): Bbox targets (gt) with shape [..., 6] 23 | (x1, y1, z1, x2, y2, z2). 24 | 25 | Returns: 26 | torch.Tensor: DIoU loss between predictions and targets. 27 | """ 28 | axis_aligned_iou = AxisAlignedBboxOverlaps3D()( 29 | pred, target, is_aligned=True) 30 | iou_loss = 1 - axis_aligned_iou 31 | 32 | xp1, yp1, zp1, xp2, yp2, zp2 = pred.split(1, dim=-1) 33 | xt1, yt1, zt1, xt2, yt2, zt2 = target.split(1, dim=-1) 34 | 35 | xpc = (xp1 + xp2) / 2 36 | ypc = (yp1 + yp2) / 2 37 | zpc = (zp1 + zp2) / 2 38 | xtc = (xt1 + xt2) / 2 39 | ytc = (yt1 + yt2) / 2 40 | ztc = (zt1 + zt2) / 2 41 | r2 = (xpc - xtc)**2 + (ypc - ytc)**2 + (zpc - ztc)**2 42 | 43 | x_min = torch.minimum(xp1, xt1) 44 | x_max = torch.maximum(xp2, xt2) 45 | y_min = torch.minimum(yp1, yt1) 46 | y_max = torch.maximum(yp2, yt2) 47 | z_min = torch.minimum(zp1, zt1) 48 | z_max = torch.maximum(zp2, zt2) 49 | c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2 50 | 51 | diou_loss = iou_loss + (r2 / c2)[:, 0] 52 | 53 | return diou_loss 54 | 55 | 56 | @MODELS.register_module() 57 | class UniDet3DAxisAlignedIoULoss(nn.Module): 58 | """Calculate the IoU loss (1-IoU) of axis aligned bounding boxes. The only 59 | difference with original AxisAlignedIoULoss is the addition of DIoU mode. 60 | These classes should be merged in the future. 61 | 62 | Args: 63 | mode (str): 'iou' for intersection over union or 'diou' for 64 | distance-iou loss. Defaults to 'iou'. 65 | reduction (str): Method to reduce losses. 66 | The valid reduction method are 'none', 'sum' or 'mean'. 67 | Defaults to 'mean'. 68 | loss_weight (float): Weight of loss. Defaults to 1.0. 69 | """ 70 | 71 | def __init__(self, 72 | mode: str = 'iou', 73 | reduction: str = 'mean', 74 | loss_weight: float = 1.0) -> None: 75 | super(UniDet3DAxisAlignedIoULoss, self).__init__() 76 | assert mode in ['iou', 'diou'] 77 | self.loss = axis_aligned_iou_loss if mode == 'iou' \ 78 | else axis_aligned_diou_loss 79 | assert reduction in ['none', 'sum', 'mean'] 80 | self.reduction = reduction 81 | self.loss_weight = loss_weight 82 | 83 | def forward(self, 84 | pred: Tensor, 85 | target: Tensor, 86 | weight: Optional[Tensor] = None, 87 | avg_factor: Optional[float] = None, 88 | reduction_override: Optional[str] = None, 89 | **kwargs) -> Tensor: 90 | """Forward function of loss calculation. 91 | 92 | Args: 93 | pred (Tensor): Bbox predictions with shape [..., 3]. 94 | target (Tensor): Bbox targets (gt) with shape [..., 3]. 95 | weight (Tensor, optional): Weight of loss. 96 | Defaults to None. 97 | avg_factor (float, optional): Average factor that is used to 98 | average the loss. Defaults to None. 99 | reduction_override (str, optional): Method to reduce losses. 100 | The valid reduction method are 'none', 'sum' or 'mean'. 101 | Defaults to None. 102 | 103 | Returns: 104 | Tensor: IoU loss between predictions and targets. 105 | """ 106 | assert reduction_override in (None, 'none', 'mean', 'sum') 107 | reduction = ( 108 | reduction_override if reduction_override else self.reduction) 109 | if (weight is not None) and (not torch.any(weight > 0)) and ( 110 | reduction != 'none'): 111 | return (pred * weight).sum() 112 | return self.loss( 113 | pred, 114 | target, 115 | weight=weight, 116 | avg_factor=avg_factor, 117 | reduction=reduction) * self.loss_weight -------------------------------------------------------------------------------- /unidet3d/concat_dataset.py: -------------------------------------------------------------------------------- 1 | from mmengine.dataset.dataset_wrapper import ConcatDataset 2 | from mmengine.dataset.base_dataset import BaseDataset 3 | from mmdet3d.registry import DATASETS 4 | 5 | 6 | @DATASETS.register_module() 7 | class ConcatDataset_(ConcatDataset): 8 | """A wrapper of concatenated dataset. 9 | 10 | Args: 11 | datasets (Sequence[BaseDataset] or Sequence[dict]): A list of datasets 12 | which will be concatenated. 13 | lazy_init (bool, optional): Whether to load annotation during 14 | instantiation. Defaults to False. 15 | ignore_keys (List[str] or str): Ignore the keys that can be 16 | unequal in `dataset.metainfo`. Defaults to None. 17 | `New in version 0.3.0.` 18 | """ 19 | 20 | def __init__(self, 21 | datasets, 22 | lazy_init=False, 23 | ignore_keys=None): 24 | self.datasets = [] 25 | for i, dataset in enumerate(datasets): 26 | if isinstance(dataset, dict): 27 | self.datasets.append(DATASETS.build(dataset)) 28 | elif isinstance(dataset, BaseDataset): 29 | self.datasets.append(dataset) 30 | else: 31 | raise TypeError( 32 | 'elements in datasets sequence should be config or ' 33 | f'`BaseDataset` instance, but got {type(dataset)}') 34 | if ignore_keys is None: 35 | self.ignore_keys = [] 36 | elif isinstance(ignore_keys, str): 37 | self.ignore_keys = [ignore_keys] 38 | elif isinstance(ignore_keys, list): 39 | self.ignore_keys = ignore_keys 40 | else: 41 | raise TypeError('ignore_keys should be a list or str, ' 42 | f'but got {type(ignore_keys)}') 43 | 44 | meta_keys: set = set() 45 | for dataset in self.datasets: 46 | meta_keys |= dataset.metainfo.keys() 47 | # Only use metainfo of first dataset. 48 | self._metainfo = self.datasets[0].metainfo 49 | 50 | self._fully_initialized = False 51 | if not lazy_init: 52 | self.full_init() 53 | -------------------------------------------------------------------------------- /unidet3d/data_preprocessor.py: -------------------------------------------------------------------------------- 1 | # Copied from mmdet3d/models/data_preprocessors/data_preprocessor.py 2 | from mmdet3d.models.data_preprocessors.data_preprocessor import \ 3 | Det3DDataPreprocessor 4 | from mmdet3d.registry import MODELS 5 | 6 | 7 | @MODELS.register_module() 8 | class Det3DDataPreprocessor_(Det3DDataPreprocessor): 9 | """ 10 | We add only this 2 lines: 11 | if 'elastic_coords' in inputs: 12 | batch_inputs['elastic_coords'] = inputs['elastic_coords'] 13 | """ 14 | def simple_process(self, data, training=False): 15 | """Perform normalization, padding and bgr2rgb conversion for img data 16 | based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel` 17 | is set to be True. 18 | 19 | Args: 20 | data (dict): Data sampled from dataloader. 21 | training (bool): Whether to enable training time augmentation. 22 | Defaults to False. 23 | 24 | Returns: 25 | dict: Data in the same format as the model input. 26 | """ 27 | if 'img' in data['inputs']: 28 | batch_pad_shape = self._get_pad_shape(data) 29 | 30 | data = self.collate_data(data) 31 | inputs, data_samples = data['inputs'], data['data_samples'] 32 | batch_inputs = dict() 33 | 34 | if 'points' in inputs: 35 | batch_inputs['points'] = inputs['points'] 36 | 37 | if self.voxel: 38 | voxel_dict = self.voxelize(inputs['points'], data_samples) 39 | batch_inputs['voxels'] = voxel_dict 40 | 41 | if 'elastic_coords' in inputs: 42 | batch_inputs['elastic_coords'] = inputs['elastic_coords'] 43 | 44 | if 'imgs' in inputs: 45 | imgs = inputs['imgs'] 46 | 47 | if data_samples is not None: 48 | # NOTE the batched image size information may be useful, e.g. 49 | # in DETR, this is needed for the construction of masks, which 50 | # is then used for the transformer_head. 51 | batch_input_shape = tuple(imgs[0].size()[-2:]) 52 | for data_sample, pad_shape in zip(data_samples, 53 | batch_pad_shape): 54 | data_sample.set_metainfo({ 55 | 'batch_input_shape': batch_input_shape, 56 | 'pad_shape': pad_shape 57 | }) 58 | 59 | if hasattr(self, 'boxtype2tensor') and self.boxtype2tensor: 60 | from mmdet.models.utils.misc import \ 61 | samplelist_boxtype2tensor 62 | samplelist_boxtype2tensor(data_samples) 63 | elif hasattr(self, 'boxlist2tensor') and self.boxlist2tensor: 64 | from mmdet.models.utils.misc import \ 65 | samplelist_boxlist2tensor 66 | samplelist_boxlist2tensor(data_samples) 67 | if self.pad_mask: 68 | self.pad_gt_masks(data_samples) 69 | 70 | if self.pad_seg: 71 | self.pad_gt_sem_seg(data_samples) 72 | 73 | if training and self.batch_augments is not None: 74 | for batch_aug in self.batch_augments: 75 | imgs, data_samples = batch_aug(imgs, data_samples) 76 | batch_inputs['imgs'] = imgs 77 | 78 | return {'inputs': batch_inputs, 'data_samples': data_samples} 79 | -------------------------------------------------------------------------------- /unidet3d/formatting.py: -------------------------------------------------------------------------------- 1 | # Adapted from mmdet3d/datasets/transforms/formating.py 2 | import numpy as np 3 | from .structures import InstanceData_ 4 | from mmdet3d.datasets.transforms import Pack3DDetInputs 5 | from mmdet3d.datasets.transforms.formating import to_tensor 6 | from mmdet3d.registry import TRANSFORMS 7 | from mmdet3d.structures import BaseInstance3DBoxes, Det3DDataSample, PointData 8 | from mmdet3d.structures.points import BasePoints 9 | 10 | 11 | @TRANSFORMS.register_module() 12 | class Pack3DDetInputs_(Pack3DDetInputs): 13 | """Just add elastic_coords, sp_pts_mask, and gt_sp_masks. 14 | """ 15 | INPUTS_KEYS = ['points', 'img', 'elastic_coords'] 16 | SEG_KEYS = [ 17 | 'gt_seg_map', 18 | 'pts_instance_mask', 19 | 'pts_semantic_mask', 20 | 'gt_semantic_seg', 21 | 'sp_pts_mask', 22 | ] 23 | INSTANCEDATA_3D_KEYS = [ 24 | 'gt_bboxes_3d', 'gt_labels_3d', 'attr_labels', 'depths', 'centers_2d', 25 | 'gt_sp_masks' 26 | ] 27 | 28 | def pack_single_results(self, results: dict) -> dict: 29 | """Method to pack the single input data. when the value in this dict is 30 | a list, it usually is in Augmentations Testing. 31 | 32 | Args: 33 | results (dict): Result dict from the data pipeline. 34 | 35 | Returns: 36 | dict: A dict contains 37 | 38 | - 'inputs' (dict): The forward data of models. It usually contains 39 | following keys: 40 | 41 | - points 42 | - img 43 | 44 | - 'data_samples' (:obj:`Det3DDataSample`): The annotation info 45 | of the sample. 46 | """ 47 | # Format 3D data 48 | if 'points' in results: 49 | if isinstance(results['points'], BasePoints): 50 | results['points'] = results['points'].tensor 51 | 52 | if 'img' in results: 53 | if isinstance(results['img'], list): 54 | # process multiple imgs in single frame 55 | imgs = np.stack(results['img'], axis=0) 56 | if imgs.flags.c_contiguous: 57 | imgs = to_tensor(imgs).permute(0, 3, 1, 2).contiguous() 58 | else: 59 | imgs = to_tensor( 60 | np.ascontiguousarray(imgs.transpose(0, 3, 1, 2))) 61 | results['img'] = imgs 62 | else: 63 | img = results['img'] 64 | if len(img.shape) < 3: 65 | img = np.expand_dims(img, -1) 66 | # To improve the computational speed by by 3-5 times, apply: 67 | # `torch.permute()` rather than `np.transpose()`. 68 | # Refer to https://github.com/open-mmlab/mmdetection/pull/9533 69 | # for more details 70 | if img.flags.c_contiguous: 71 | img = to_tensor(img).permute(2, 0, 1).contiguous() 72 | else: 73 | img = to_tensor( 74 | np.ascontiguousarray(img.transpose(2, 0, 1))) 75 | results['img'] = img 76 | 77 | for key in [ 78 | 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels', 79 | 'gt_bboxes_labels', 'attr_labels', 'pts_instance_mask', 80 | 'pts_semantic_mask', 'sp_pts_mask', 'gt_sp_masks', 81 | 'elastic_coords', 'centers_2d', 'depths', 'gt_labels_3d' 82 | ]: 83 | if key not in results: 84 | continue 85 | if isinstance(results[key], list): 86 | results[key] = [to_tensor(res) for res in results[key]] 87 | else: 88 | results[key] = to_tensor(results[key]) 89 | if 'gt_bboxes_3d' in results: 90 | if not isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes): 91 | results['gt_bboxes_3d'] = to_tensor(results['gt_bboxes_3d']) 92 | 93 | if 'gt_semantic_seg' in results: 94 | results['gt_semantic_seg'] = to_tensor( 95 | results['gt_semantic_seg'][None]) 96 | if 'gt_seg_map' in results: 97 | results['gt_seg_map'] = results['gt_seg_map'][None, ...] 98 | 99 | data_sample = Det3DDataSample() 100 | gt_instances_3d = InstanceData_() 101 | gt_instances = InstanceData_() 102 | gt_pts_seg = PointData() 103 | 104 | img_metas = {} 105 | for key in self.meta_keys: 106 | if key in results: 107 | img_metas[key] = results[key] 108 | data_sample.set_metainfo(img_metas) 109 | 110 | inputs = {} 111 | for key in self.keys: 112 | if key in results: 113 | if key in self.INPUTS_KEYS: 114 | inputs[key] = results[key] 115 | elif key in self.INSTANCEDATA_3D_KEYS: 116 | gt_instances_3d[self._remove_prefix(key)] = results[key] 117 | elif key in self.INSTANCEDATA_2D_KEYS: 118 | if key == 'gt_bboxes_labels': 119 | gt_instances['labels'] = results[key] 120 | else: 121 | gt_instances[self._remove_prefix(key)] = results[key] 122 | elif key in self.SEG_KEYS: 123 | gt_pts_seg[self._remove_prefix(key)] = results[key] 124 | else: 125 | raise NotImplementedError(f'Please modified ' 126 | f'`Pack3DDetInputs` ' 127 | f'to put {key} to ' 128 | f'corresponding field') 129 | 130 | data_sample.gt_instances_3d = gt_instances_3d 131 | data_sample.gt_instances = gt_instances 132 | data_sample.gt_pts_seg = gt_pts_seg 133 | if 'eval_ann_info' in results: 134 | data_sample.eval_ann_info = results['eval_ann_info'] 135 | else: 136 | data_sample.eval_ann_info = None 137 | 138 | packed_results = dict() 139 | packed_results['data_samples'] = data_sample 140 | packed_results['inputs'] = inputs 141 | 142 | return packed_results 143 | -------------------------------------------------------------------------------- /unidet3d/image_vis.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | import cv2 5 | import numpy as np 6 | import torch 7 | from matplotlib import pyplot as plt 8 | 9 | 10 | def project_pts_on_img(points, 11 | raw_img, 12 | lidar2img_rt, 13 | max_distance=70, 14 | thickness=-1): 15 | """Project the 3D points cloud on 2D image. 16 | 17 | Args: 18 | points (numpy.array): 3D points cloud (x, y, z) to visualize. 19 | raw_img (numpy.array): The numpy array of image. 20 | lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix 21 | according to the camera intrinsic parameters. 22 | max_distance (float, optional): the max distance of the points cloud. 23 | Default: 70. 24 | thickness (int, optional): The thickness of 2D points. Default: -1. 25 | """ 26 | img = raw_img.copy() 27 | num_points = points.shape[0] 28 | pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1) 29 | pts_2d = pts_4d @ lidar2img_rt.T 30 | 31 | # cam_points is Tensor of Nx4 whose last column is 1 32 | # transform camera coordinate to image coordinate 33 | pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999) 34 | pts_2d[:, 0] /= pts_2d[:, 2] 35 | pts_2d[:, 1] /= pts_2d[:, 2] 36 | 37 | fov_inds = ((pts_2d[:, 0] < img.shape[1]) 38 | & (pts_2d[:, 0] >= 0) 39 | & (pts_2d[:, 1] < img.shape[0]) 40 | & (pts_2d[:, 1] >= 0)) 41 | 42 | imgfov_pts_2d = pts_2d[fov_inds, :3] # u, v, d 43 | 44 | cmap = plt.cm.get_cmap('hsv', 256) 45 | cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255 46 | for i in range(imgfov_pts_2d.shape[0]): 47 | depth = imgfov_pts_2d[i, 2] 48 | color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :] 49 | cv2.circle( 50 | img, 51 | center=(int(np.round(imgfov_pts_2d[i, 0])), 52 | int(np.round(imgfov_pts_2d[i, 1]))), 53 | radius=1, 54 | color=tuple(color), 55 | thickness=thickness, 56 | ) 57 | cv2.imshow('project_pts_img', img.astype(np.uint8)) 58 | cv2.waitKey(100) 59 | 60 | 61 | def plot_rect3d_on_img(img, 62 | num_rects, 63 | rect_corners, 64 | color=(0, 255, 0), 65 | thickness=1): 66 | """Plot the boundary lines of 3D rectangular on 2D images. 67 | 68 | Args: 69 | img (numpy.array): The numpy array of image. 70 | num_rects (int): Number of 3D rectangulars. 71 | rect_corners (numpy.array): Coordinates of the corners of 3D 72 | rectangulars. Should be in the shape of [num_rect, 8, 2]. 73 | color (tuple[int], optional): The color to draw bboxes. 74 | Default: (0, 255, 0). 75 | thickness (int, optional): The thickness of bboxes. Default: 1. 76 | """ 77 | line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7), 78 | (4, 5), (4, 7), (2, 6), (5, 6), (6, 7)) 79 | for i in range(num_rects): 80 | corners = rect_corners[i].astype(np.int) 81 | for start, end in line_indices: 82 | cv2.line(img, (corners[start, 0], corners[start, 1]), 83 | (corners[end, 0], corners[end, 1]), color, thickness, 84 | cv2.LINE_AA) 85 | 86 | return img.astype(np.uint8) 87 | 88 | 89 | def draw_lidar_bbox3d_on_img(bboxes3d, 90 | raw_img, 91 | lidar2img_rt, 92 | img_metas, 93 | color=(0, 255, 0), 94 | thickness=1): 95 | """Project the 3D bbox on 2D plane and draw on input image. 96 | 97 | Args: 98 | bboxes3d (:obj:`LiDARInstance3DBoxes`): 99 | 3d bbox in lidar coordinate system to visualize. 100 | raw_img (numpy.array): The numpy array of image. 101 | lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix 102 | according to the camera intrinsic parameters. 103 | img_metas (dict): Useless here. 104 | color (tuple[int], optional): The color to draw bboxes. 105 | Default: (0, 255, 0). 106 | thickness (int, optional): The thickness of bboxes. Default: 1. 107 | """ 108 | img = raw_img.copy() 109 | corners_3d = bboxes3d.corners 110 | num_bbox = corners_3d.shape[0] 111 | pts_4d = np.concatenate( 112 | [corners_3d.reshape(-1, 3), 113 | np.ones((num_bbox * 8, 1))], axis=-1) 114 | lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4) 115 | if isinstance(lidar2img_rt, torch.Tensor): 116 | lidar2img_rt = lidar2img_rt.cpu().numpy() 117 | pts_2d = pts_4d @ lidar2img_rt.T 118 | 119 | pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5) 120 | pts_2d[:, 0] /= pts_2d[:, 2] 121 | pts_2d[:, 1] /= pts_2d[:, 2] 122 | imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2) 123 | 124 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness) 125 | 126 | 127 | # TODO: remove third parameter in all functions here in favour of img_metas 128 | def draw_depth_bbox3d_on_img(bboxes3d, 129 | raw_img, 130 | calibs, 131 | img_metas, 132 | color=(0, 255, 0), 133 | thickness=1): 134 | """Project the 3D bbox on 2D plane and draw on input image. 135 | 136 | Args: 137 | bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]): 138 | 3d bbox in depth coordinate system to visualize. 139 | raw_img (numpy.array): The numpy array of image. 140 | calibs (dict): Camera calibration information, Rt and K. 141 | img_metas (dict): Used in coordinates transformation. 142 | color (tuple[int], optional): The color to draw bboxes. 143 | Default: (0, 255, 0). 144 | thickness (int, optional): The thickness of bboxes. Default: 1. 145 | """ 146 | from mmdet3d.core.bbox import points_cam2img 147 | from mmdet3d.models import apply_3d_transformation 148 | 149 | img = raw_img.copy() 150 | img_metas = copy.deepcopy(img_metas) 151 | corners_3d = bboxes3d.corners 152 | num_bbox = corners_3d.shape[0] 153 | points_3d = corners_3d.reshape(-1, 3) 154 | 155 | # first reverse the data transformations 156 | xyz_depth = apply_3d_transformation( 157 | points_3d, 'DEPTH', img_metas, reverse=True) 158 | 159 | # project to 2d to get image coords (uv) 160 | uv_origin = points_cam2img(xyz_depth, 161 | xyz_depth.new_tensor(img_metas['depth2img'])) 162 | uv_origin = (uv_origin - 1).round() 163 | imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy() 164 | 165 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness) 166 | 167 | 168 | def draw_camera_bbox3d_on_img(bboxes3d, 169 | raw_img, 170 | cam2img, 171 | img_metas, 172 | color=(0, 255, 0), 173 | thickness=1): 174 | """Project the 3D bbox on 2D plane and draw on input image. 175 | 176 | Args: 177 | bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]): 178 | 3d bbox in camera coordinate system to visualize. 179 | raw_img (numpy.array): The numpy array of image. 180 | cam2img (dict): Camera intrinsic matrix, 181 | denoted as `K` in depth bbox coordinate system. 182 | img_metas (dict): Useless here. 183 | color (tuple[int], optional): The color to draw bboxes. 184 | Default: (0, 255, 0). 185 | thickness (int, optional): The thickness of bboxes. Default: 1. 186 | """ 187 | from mmdet3d.core.bbox import points_cam2img 188 | 189 | img = raw_img.copy() 190 | cam2img = copy.deepcopy(cam2img) 191 | corners_3d = bboxes3d.corners 192 | num_bbox = corners_3d.shape[0] 193 | points_3d = corners_3d.reshape(-1, 3) 194 | if not isinstance(cam2img, torch.Tensor): 195 | cam2img = torch.from_numpy(np.array(cam2img)) 196 | 197 | assert (cam2img.shape == torch.Size([3, 3]) 198 | or cam2img.shape == torch.Size([4, 4])) 199 | cam2img = cam2img.float().cpu() 200 | 201 | # project to 2d to get image coords (uv) 202 | uv_origin = points_cam2img(points_3d, cam2img) 203 | uv_origin = (uv_origin - 1).round() 204 | imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy() 205 | 206 | return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness) -------------------------------------------------------------------------------- /unidet3d/indoor_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import Dict, List, Optional, Sequence 3 | 4 | from mmengine.evaluator import BaseMetric 5 | from mmengine.logging import MMLogger 6 | 7 | from .indoor_eval import indoor_eval 8 | from mmdet3d.registry import METRICS 9 | from mmdet3d.structures import get_box_type 10 | from .show_results import show_result_v2 11 | from pathlib import Path 12 | 13 | @METRICS.register_module() 14 | class IndoorMetric_(BaseMetric): 15 | """Indoor scene evaluation metric. 16 | 17 | Args: 18 | iou_thr (float or List[float]): List of iou threshold when calculate 19 | the metric. Defaults to [0.25, 0.5]. 20 | collect_device (str): Device name used for collecting results from 21 | different ranks during distributed training. Must be 'cpu' or 22 | 'gpu'. Defaults to 'cpu'. 23 | prefix (str, optional): The prefix that will be added in the metric 24 | names to disambiguate homonymous metrics of different evaluators. 25 | If prefix is not provided in the argument, self.default_prefix will 26 | be used instead. Defaults to None. 27 | """ 28 | 29 | def __init__(self, 30 | datasets, 31 | datasets_classes, 32 | vis_dir: str = None, 33 | iou_thr: List[float] = [0.25, 0.5], 34 | collect_device: str = 'cpu', 35 | prefix: Optional[str] = None) -> None: 36 | super(IndoorMetric_, self).__init__( 37 | prefix=prefix, collect_device=collect_device) 38 | self.iou_thr = [iou_thr] if isinstance(iou_thr, float) else iou_thr 39 | self.datasets = datasets 40 | self.datasets_classes = datasets_classes 41 | self.vis_dir = vis_dir 42 | 43 | def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: 44 | """Process one batch of data samples and predictions. 45 | 46 | The processed results should be stored in ``self.results``, which will 47 | be used to compute the metrics when all batches have been processed. 48 | 49 | Args: 50 | data_batch (dict): A batch of data from the dataloader. 51 | data_samples (Sequence[dict]): A batch of outputs from the model. 52 | """ 53 | for data_sample in data_samples: 54 | pred_3d = data_sample['pred_instances_3d'] 55 | pred_3d['dataset'] = self.get_dataset(data_sample['lidar_path']) 56 | eval_ann_info = data_sample['eval_ann_info'] 57 | cpu_pred_3d = dict() 58 | for k, v in pred_3d.items(): 59 | if hasattr(v, 'to'): 60 | cpu_pred_3d[k] = v.to('cpu') 61 | else: 62 | cpu_pred_3d[k] = v 63 | self.results.append((eval_ann_info, cpu_pred_3d)) 64 | 65 | def compute_metrics(self, results: list) -> Dict[str, float]: 66 | """Compute the metrics from processed results. 67 | 68 | Args: 69 | results (list): The processed results of each batch. 70 | 71 | Returns: 72 | Dict[str, float]: The computed metrics. The keys are the names of 73 | the metrics, and the values are corresponding results. 74 | """ 75 | logger: MMLogger = MMLogger.get_current_instance() 76 | ann_infos = [[] for _ in self.datasets] 77 | pred_results = [[] for _ in self.datasets] 78 | 79 | for eval_ann, sinlge_pred_results in results: 80 | idx = self.datasets.index(sinlge_pred_results['dataset']) 81 | ann_infos[idx].append(eval_ann) 82 | pred_results[idx].append(sinlge_pred_results) 83 | if self.vis_dir is not None: 84 | self.vis_results(eval_ann, sinlge_pred_results) 85 | 86 | # some checkpoints may not record the key "box_type_3d" 87 | box_type_3d, box_mode_3d = get_box_type( 88 | self.dataset_meta.get('box_type_3d', 'depth')) 89 | 90 | ret_dict = {} 91 | for i in range(len(self.datasets)): 92 | ret_dict[self.datasets[i]] = indoor_eval( 93 | ann_infos[i], 94 | pred_results[i], 95 | self.iou_thr, 96 | self.datasets_classes[i], 97 | logger=logger, 98 | box_mode_3d=box_mode_3d) 99 | 100 | return ret_dict 101 | 102 | def get_dataset(self, lidar_path): 103 | for dataset in self.datasets: 104 | if dataset in lidar_path.split('/'): 105 | return dataset 106 | 107 | def vis_results(self, eval_ann, sinlge_pred_results): 108 | pts = sinlge_pred_results['points'].numpy() 109 | pts[:, 3:] *= 127.5 110 | pts[:, 3:] += 127.5 111 | show_result_v2(pts, eval_ann['gt_bboxes_3d'].corners, 112 | eval_ann['gt_labels_3d'], 113 | sinlge_pred_results['bboxes_3d'].corners, 114 | sinlge_pred_results['labels_3d'], 115 | Path(self.vis_dir) / sinlge_pred_results['dataset'], 116 | eval_ann['lidar_idx']) -------------------------------------------------------------------------------- /unidet3d/loading.py: -------------------------------------------------------------------------------- 1 | # Adapted from mmdet3d/datasets/transforms/loading.py 2 | import mmengine 3 | import numpy as np 4 | 5 | from mmdet3d.datasets.transforms import LoadAnnotations3D 6 | from mmdet3d.datasets.transforms.loading import get 7 | from mmdet3d.datasets.transforms.loading import NormalizePointsColor 8 | from mmdet3d.registry import TRANSFORMS 9 | 10 | 11 | @TRANSFORMS.register_module() 12 | class LoadAnnotations3D_(LoadAnnotations3D): 13 | """Just add super point mask loading. 14 | 15 | Args: 16 | with_sp_mask_3d (bool): Whether to load super point maks. 17 | """ 18 | 19 | def __init__(self, with_sp_mask_3d, **kwargs): 20 | self.with_sp_mask_3d = with_sp_mask_3d 21 | super().__init__(**kwargs) 22 | 23 | def _load_sp_pts_3d(self, results): 24 | """Private function to load 3D superpoints mask annotations. 25 | 26 | Args: 27 | results (dict): Result dict from :obj:`mmdet3d.CustomDataset`. 28 | 29 | Returns: 30 | dict: The dict containing loaded 3D mask annotations. 31 | """ 32 | sp_pts_mask_path = results['super_pts_path'] 33 | 34 | try: 35 | mask_bytes = get( 36 | sp_pts_mask_path, backend_args=self.backend_args) 37 | # add .copy() to fix read-only bug 38 | sp_pts_mask = np.frombuffer( 39 | mask_bytes, dtype=np.int64).copy() 40 | except ConnectionError: 41 | mmengine.check_file_exist(sp_pts_mask_path) 42 | sp_pts_mask = np.fromfile( 43 | sp_pts_mask_path, dtype=np.int64) 44 | 45 | results['sp_pts_mask'] = sp_pts_mask 46 | 47 | # 'eval_ann_info' will be passed to evaluator 48 | if 'eval_ann_info' in results: 49 | results['eval_ann_info']['sp_pts_mask'] = sp_pts_mask 50 | results['eval_ann_info']['lidar_idx'] = \ 51 | sp_pts_mask_path.split("/")[-1][:-4] 52 | return results 53 | 54 | def transform(self, results: dict) -> dict: 55 | """Function to load multiple types annotations. 56 | 57 | Args: 58 | results (dict): Result dict from :obj:`mmdet3d.CustomDataset`. 59 | 60 | Returns: 61 | dict: The dict containing loaded 3D bounding box, label, mask and 62 | semantic segmentation annotations. 63 | """ 64 | results = super().transform(results) 65 | if self.with_sp_mask_3d: 66 | results = self._load_sp_pts_3d(results) 67 | return results 68 | 69 | 70 | @TRANSFORMS.register_module() 71 | class NormalizePointsColor_(NormalizePointsColor): 72 | """Just add color_std parameter. 73 | 74 | Args: 75 | color_mean (list[float]): Mean color of the point cloud. 76 | color_std (list[float]): Std color of the point cloud. 77 | Default value is from SPFormer preprocessing. 78 | """ 79 | 80 | def __init__(self, color_mean, color_std=127.5): 81 | self.color_mean = color_mean 82 | self.color_std = color_std 83 | 84 | def transform(self, input_dict): 85 | """Call function to normalize color of points. 86 | 87 | Args: 88 | results (dict): Result dict containing point clouds data. 89 | 90 | Returns: 91 | dict: The result dict containing the normalized points. 92 | Updated key and value are described below. 93 | - points (:obj:`BasePoints`): Points after color normalization. 94 | """ 95 | points = input_dict['points'] 96 | assert points.attribute_dims is not None and \ 97 | 'color' in points.attribute_dims.keys(), \ 98 | 'Expect points have color attribute' 99 | if self.color_mean is not None: 100 | points.color = points.color - \ 101 | points.color.new_tensor(self.color_mean) 102 | if self.color_std is not None: 103 | points.color = points.color / \ 104 | points.color.new_tensor(self.color_std) 105 | input_dict['points'] = points 106 | return input_dict 107 | 108 | 109 | @TRANSFORMS.register_module() 110 | class DenormalizePointsColor(NormalizePointsColor): 111 | """Denormalize points colors. 112 | 113 | Args: 114 | color_mean (list[float]): Mean color of the point cloud. 115 | color_std (list[float]): Std color of the point cloud. 116 | Default value is from SPFormer preprocessing. 117 | """ 118 | 119 | def __init__(self, color_mean, color_std): 120 | self.color_mean = color_mean 121 | self.color_std = color_std 122 | 123 | def transform(self, input_dict): 124 | """Call function to normalize color of points. 125 | 126 | Args: 127 | results (dict): Result dict containing point clouds data. 128 | 129 | Returns: 130 | dict: The result dict containing the normalized points. 131 | Updated key and value are described below. 132 | - points (:obj:`BasePoints`): Points after color normalization. 133 | """ 134 | points = input_dict['points'] 135 | assert points.attribute_dims is not None and \ 136 | 'color' in points.attribute_dims.keys(), \ 137 | 'Expect points have color attribute' 138 | if self.color_std is not None: 139 | points.color = points.color * \ 140 | points.color.new_tensor(self.color_std) 141 | if self.color_mean is not None: 142 | points.color = points.color + \ 143 | points.color.new_tensor(self.color_mean) 144 | 145 | input_dict['points'] = points 146 | return input_dict -------------------------------------------------------------------------------- /unidet3d/multiscan_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from mmdet3d.datasets import Det3DDataset 3 | from mmdet3d.registry import DATASETS 4 | from mmdet3d.structures import DepthInstance3DBoxes 5 | import os.path as osp 6 | from mmengine.logging import print_log 7 | import logging 8 | import numpy as np 9 | 10 | @DATASETS.register_module() 11 | class MultiScan(Det3DDataset): 12 | """MultiScan dataset. 13 | 14 | Args: 15 | data_prefix (dict): Prefix for data. Defaults to 16 | dict(pts='points', pts_instance_mask='instance_mask', 17 | pts_semantic_mask='semantic_mask'). 18 | box_type_3d (str): Type of 3D box of this dataset. 19 | Based on the `box_type_3d`, the dataset will encapsulate the box 20 | to its original format then converted them to `box_type_3d`. 21 | Defaults to 'Depth'. 22 | """ 23 | METAINFO = { 24 | 'classes': 25 | # ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow', 26 | # 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed', 27 | # 'refrigerator', 'toilet', 'no_target') 28 | ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow', 29 | 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed', 30 | 'refrigerator', 'toilet') 31 | } 32 | 33 | def __init__(self, 34 | data_prefix=dict( 35 | pts='points', 36 | pts_instance_mask='instance_mask', 37 | pts_semantic_mask='semantic_mask'), 38 | box_type_3d='Depth', 39 | **kwargs): 40 | super().__init__( 41 | data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs) 42 | 43 | def parse_ann_info(self, info): 44 | """Process the `instances` in data info to `ann_info`. 45 | 46 | Args: 47 | info (dict): Info dict. 48 | 49 | Returns: 50 | dict: Processed `ann_info` 51 | """ 52 | ann_info = super().parse_ann_info(info) 53 | if ann_info is None: 54 | ann_info = dict() 55 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) 56 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) 57 | 58 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( 59 | ann_info['gt_bboxes_3d'], 60 | origin=(0.5, 0.5, 0.5), box_dim=6, 61 | with_yaw=False).convert_to(self.box_mode_3d) 62 | 63 | return ann_info 64 | 65 | @DATASETS.register_module() 66 | class MultiScan_(MultiScan): 67 | """MultiScan dataset with partition. 68 | 69 | Args: 70 | partition(float): Defaults to 1, the part of 71 | the dataset that will be used. 72 | """ 73 | METAINFO = { 74 | 'classes': 75 | ('door', 'table', 'chair', 'cabinet', 'window', 'sofa', 'microwave', 'pillow', 76 | 'tv_monitor', 'curtain', 'trash_can', 'suitcase', 'sink', 'backpack', 'bed', 77 | 'refrigerator', 'toilet'), 78 | 'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) 79 | } 80 | 81 | def __init__(self, 82 | partition: float = 1, 83 | **kwargs) -> None: 84 | self.partition = partition 85 | super().__init__(**kwargs) 86 | 87 | def parse_ann_info(self, info: dict) -> Union[dict, None]: 88 | """Process the `instances` in data info to `ann_info`. 89 | 90 | In `Custom3DDataset`, we simply concatenate all the field 91 | in `instances` to `np.ndarray`, you can do the specific 92 | process in subclass. You have to convert `gt_bboxes_3d` 93 | to different coordinates according to the task. 94 | 95 | Args: 96 | info (dict): Info dict. 97 | 98 | Returns: 99 | dict or None: Processed `ann_info`. 100 | """ 101 | ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])} 102 | instances = [] 103 | for instance in info['instances']: 104 | if instance['bbox_label_3d'] in ids: 105 | instance['bbox_label_3d'] = ids[instance['bbox_label_3d']] 106 | instances.append(instance) 107 | info['instances'] = instances 108 | return super().parse_ann_info(info) 109 | 110 | def __getitem__(self, idx: int) -> dict: 111 | """Get the idx-th image and data information of dataset after 112 | ``self.pipeline``, and ``full_init`` will be called if the dataset has 113 | not been fully initialized. 114 | 115 | During training phase, if ``self.pipeline`` get ``None``, 116 | ``self._rand_another`` will be called until a valid image is fetched or 117 | the maximum limit of refetech is reached. 118 | 119 | Args: 120 | idx (int): The index of self.data_list. 121 | 122 | Returns: 123 | dict: The idx-th image and data information of dataset after 124 | ``self.pipeline``. 125 | """ 126 | # Performing full initialization by calling `__getitem__` will consume 127 | # extra memory. If a dataset is not fully initialized by setting 128 | # `lazy_init=True` and then fed into the dataloader. Different workers 129 | # will simultaneously read and parse the annotation. It will cost more 130 | # time and memory, although this may work. Therefore, it is recommended 131 | # to manually call `full_init` before dataset fed into dataloader to 132 | # ensure all workers use shared RAM from master process. 133 | 134 | if not self.test_mode: 135 | if self.serialize_data: 136 | dataset_len = len(self.data_address) 137 | else: 138 | dataset_len = len(self.data_list) 139 | idx = np.random.randint(0, dataset_len) 140 | if not self._fully_initialized: 141 | print_log( 142 | 'Please call `full_init()` method manually to accelerate ' 143 | 'the speed.', 144 | logger='current', 145 | level=logging.WARNING) 146 | self.full_init() 147 | 148 | if self.test_mode: 149 | data = self.prepare_data(idx) 150 | if data is None: 151 | raise Exception('Test time pipline should not get `None` ' 152 | 'data_sample') 153 | return data 154 | 155 | for _ in range(self.max_refetch + 1): 156 | data = self.prepare_data(idx) 157 | # Broken images or random augmentations may cause the returned data 158 | # to be None 159 | if data is None: 160 | idx = self._rand_another() 161 | continue 162 | return data 163 | 164 | def __len__(self) -> int: 165 | """Get the length of filtered dataset and automatically call 166 | ``full_init`` if the dataset has not been fully init. 167 | 168 | Returns: 169 | int: The length of filtered dataset. 170 | """ 171 | 172 | if self.serialize_data: 173 | dataset_len = len(self.data_address) 174 | else: 175 | dataset_len = len(self.data_list) 176 | if not self.test_mode: 177 | return int(self.partition * dataset_len) 178 | else: 179 | return dataset_len 180 | 181 | def parse_data_info(self, info: dict) -> dict: 182 | """Process the raw data info. 183 | 184 | Args: 185 | info (dict): Raw info dict. 186 | 187 | Returns: 188 | dict: Has `ann_info` in training stage. And 189 | all path has been converted to absolute path. 190 | """ 191 | info['super_pts_path'] = osp.join( 192 | self.data_prefix.get('sp_pts_mask', ''), 193 | info['lidar_points']['lidar_path']) #info['super_pts_path'] 194 | 195 | info = super().parse_data_info(info) 196 | 197 | return info -------------------------------------------------------------------------------- /unidet3d/rotated_iou_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import Optional 3 | 4 | import torch 5 | from mmcv.ops.diff_iou_rotated import box2corners, oriented_box_intersection_2d 6 | from mmdet.models.losses.utils import weighted_loss 7 | from torch import Tensor 8 | from torch import nn as nn 9 | 10 | from mmdet3d.models import rotated_iou_3d_loss 11 | from mmdet3d.registry import MODELS 12 | 13 | 14 | def diff_diou_rotated_3d(box3d1: Tensor, box3d2: Tensor) -> Tensor: 15 | """Calculate differentiable DIoU of rotated 3d boxes. 16 | 17 | Args: 18 | box3d1 (Tensor): (B, N, 3+3+1) First box (x,y,z,w,h,l,alpha). 19 | box3d2 (Tensor): (B, N, 3+3+1) Second box (x,y,z,w,h,l,alpha). 20 | Returns: 21 | Tensor: (B, N) DIoU. 22 | """ 23 | box1 = box3d1[..., [0, 1, 3, 4, 6]] 24 | box2 = box3d2[..., [0, 1, 3, 4, 6]] 25 | corners1 = box2corners(box1) 26 | corners2 = box2corners(box2) 27 | intersection, _ = oriented_box_intersection_2d(corners1, corners2) 28 | zmax1 = box3d1[..., 2] + box3d1[..., 5] * 0.5 29 | zmin1 = box3d1[..., 2] - box3d1[..., 5] * 0.5 30 | zmax2 = box3d2[..., 2] + box3d2[..., 5] * 0.5 31 | zmin2 = box3d2[..., 2] - box3d2[..., 5] * 0.5 32 | z_overlap = (torch.min(zmax1, zmax2) - 33 | torch.max(zmin1, zmin2)).clamp_(min=0.) 34 | intersection_3d = intersection * z_overlap 35 | volume1 = box3d1[..., 3] * box3d1[..., 4] * box3d1[..., 5] 36 | volume2 = box3d2[..., 3] * box3d2[..., 4] * box3d2[..., 5] 37 | union_3d = volume1 + volume2 - intersection_3d 38 | 39 | x1_max = torch.max(corners1[..., 0], dim=2)[0] 40 | x1_min = torch.min(corners1[..., 0], dim=2)[0] 41 | y1_max = torch.max(corners1[..., 1], dim=2)[0] 42 | y1_min = torch.min(corners1[..., 1], dim=2)[0] 43 | 44 | x2_max = torch.max(corners2[..., 0], dim=2)[0] 45 | x2_min = torch.min(corners2[..., 0], dim=2)[0] 46 | y2_max = torch.max(corners2[..., 1], dim=2)[0] 47 | y2_min = torch.min(corners2[..., 1], dim=2)[0] 48 | 49 | x_max = torch.max(x1_max, x2_max) 50 | x_min = torch.min(x1_min, x2_min) 51 | y_max = torch.max(y1_max, y2_max) 52 | y_min = torch.min(y1_min, y2_min) 53 | 54 | z_max = torch.max(zmax1, zmax2) 55 | z_min = torch.min(zmin1, zmin2) 56 | 57 | r2 = ((box1[..., :3] - box2[..., :3])**2).sum(dim=-1) 58 | c2 = (x_min - x_max)**2 + (y_min - y_max)**2 + (z_min - z_max)**2 59 | 60 | return intersection_3d / union_3d - r2 / c2 61 | 62 | 63 | @weighted_loss 64 | def rotated_diou_3d_loss(pred: Tensor, target: Tensor) -> Tensor: 65 | """Calculate the DIoU loss (1-DIoU) of two sets of rotated bounding boxes. 66 | Note that predictions and targets are one-to-one corresponded. 67 | 68 | Args: 69 | pred (torch.Tensor): Bbox predictions with shape [N, 7] 70 | (x, y, z, w, l, h, alpha). 71 | target (torch.Tensor): Bbox targets (gt) with shape [N, 7] 72 | (x, y, z, w, l, h, alpha). 73 | 74 | Returns: 75 | torch.Tensor: IoU loss between predictions and targets. 76 | """ 77 | if len(pred.shape) == 2: 78 | pred, target = pred.unsqueeze(0), target.unsqueeze(0) 79 | diou_loss = 1 - diff_diou_rotated_3d(pred, target)[0] 80 | else: 81 | diou_loss = 1 - diff_diou_rotated_3d(pred, target) 82 | return diou_loss 83 | 84 | 85 | @MODELS.register_module() 86 | class UniDet3DRotatedIoU3DLoss(nn.Module): 87 | """Calculate the IoU loss (1-IoU) of rotated bounding boxes. The only 88 | difference with original RotatedIoU3DLoss is the addition of DIoU mode. 89 | These classes should be merged in the future. 90 | 91 | Args: 92 | mode (str): 'iou' for intersection over union or 'diou' for 93 | distance-iou loss. Defaults to 'iou'. 94 | reduction (str): Method to reduce losses. 95 | The valid reduction method are 'none', 'sum' or 'mean'. 96 | Defaults to 'mean'. 97 | loss_weight (float): Weight of loss. Defaults to 1.0. 98 | """ 99 | 100 | def __init__(self, 101 | mode: str = 'iou', 102 | reduction: str = 'mean', 103 | loss_weight: float = 1.0) -> None: 104 | super(UniDet3DRotatedIoU3DLoss, self).__init__() 105 | assert mode in ['iou', 'diou'] 106 | self.loss = rotated_iou_3d_loss if mode == 'iou' \ 107 | else rotated_diou_3d_loss 108 | assert reduction in ['none', 'sum', 'mean'] 109 | self.reduction = reduction 110 | self.loss_weight = loss_weight 111 | 112 | def forward(self, 113 | pred: Tensor, 114 | target: Tensor, 115 | weight: Optional[Tensor] = None, 116 | avg_factor: Optional[float] = None, 117 | reduction_override: Optional[str] = None, 118 | **kwargs) -> Tensor: 119 | """Forward function of loss calculation. 120 | 121 | Args: 122 | pred (Tensor): Bbox predictions with shape [..., 7] 123 | (x, y, z, w, l, h, alpha). 124 | target (Tensor): Bbox targets (gt) with shape [..., 7] 125 | (x, y, z, w, l, h, alpha). 126 | weight (Tensor, optional): Weight of loss. 127 | Defaults to None. 128 | avg_factor (float, optional): Average factor that is used to 129 | average the loss. Defaults to None. 130 | reduction_override (str, optional): Method to reduce losses. 131 | The valid reduction method are 'none', 'sum' or 'mean'. 132 | Defaults to None. 133 | 134 | Returns: 135 | Tensor: IoU loss between predictions and targets. 136 | """ 137 | if weight is not None and not torch.any(weight > 0): 138 | return pred.sum() * weight.sum() # 0 139 | assert reduction_override in (None, 'none', 'mean', 'sum') 140 | reduction = ( 141 | reduction_override if reduction_override else self.reduction) 142 | if weight is not None and weight.dim() > 1: 143 | weight = weight.mean(-1) 144 | loss = self.loss_weight * self.loss( 145 | pred, 146 | target, 147 | weight, 148 | reduction=reduction, 149 | avg_factor=avg_factor, 150 | **kwargs) 151 | 152 | return loss -------------------------------------------------------------------------------- /unidet3d/rscan_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import numpy as np 3 | from mmdet3d.datasets import Det3DDataset 4 | from mmdet3d.registry import DATASETS 5 | from mmdet3d.structures import DepthInstance3DBoxes 6 | import os.path as osp 7 | from mmengine.logging import print_log 8 | import logging 9 | import numpy as np 10 | 11 | @DATASETS.register_module() 12 | class RScan(Det3DDataset): 13 | """RScan dataset. 14 | 15 | Args: 16 | data_prefix (dict): Prefix for data. Defaults to 17 | dict(pts='points', pts_instance_mask='instance_mask', 18 | pts_semantic_mask='semantic_mask'). 19 | box_type_3d (str): Type of 3D box of this dataset. 20 | Based on the `box_type_3d`, the dataset will encapsulate the box 21 | to its original format then converted them to `box_type_3d`. 22 | Defaults to 'Depth'. 23 | """ 24 | METAINFO = { 25 | 'classes': 26 | ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 27 | 'counter', 'blinds', 'desk', 'shelves', 'curtain', 'dresser', 'pillow', 'mirror', 'floor mat', 'clothes', 28 | 'ceiling', 'books', 'fridge', 'television', 'paper', 'towel', 'shower curtain', 'box', 'whiteboard', 'person', 29 | 'night stand', 'toilet', 'sink', 'lamp', 'bathtub', 'bag', 'structure', 'furniture', 'prop') 30 | } 31 | 32 | def __init__(self, 33 | data_prefix=dict( 34 | pts='points', 35 | pts_instance_mask='instance_mask', 36 | pts_semantic_mask='semantic_mask'), 37 | box_type_3d='Depth', 38 | **kwargs): 39 | super().__init__( 40 | data_prefix=data_prefix, box_type_3d=box_type_3d, **kwargs) 41 | 42 | def parse_ann_info(self, info): 43 | """Process the `instances` in data info to `ann_info`. 44 | 45 | Args: 46 | info (dict): Info dict. 47 | 48 | Returns: 49 | dict: Processed `ann_info` 50 | """ 51 | ann_info = super().parse_ann_info(info) 52 | if ann_info is None: 53 | ann_info = dict() 54 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) 55 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) 56 | 57 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( 58 | ann_info['gt_bboxes_3d'], 59 | origin=(0.5, 0.5, 0.5), box_dim=6, 60 | with_yaw=False).convert_to(self.box_mode_3d) 61 | 62 | return ann_info 63 | 64 | @DATASETS.register_module() 65 | class ThreeRScan_(RScan): 66 | """3RScan dataset with partition. 67 | 68 | Args: 69 | partition(float): Defaults to 1, the part of 70 | the dataset that will be used. 71 | """ 72 | METAINFO = { 73 | 'classes': 74 | ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 75 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 76 | 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture'), 77 | 'valid_class_ids': (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39) 78 | } 79 | def __init__(self, 80 | partition: float = 1, 81 | **kwargs) -> None: 82 | self.partition = partition 83 | super().__init__(**kwargs) 84 | 85 | def parse_ann_info(self, info: dict) -> Union[dict, None]: 86 | """Process the `instances` in data info to `ann_info`. 87 | 88 | In `Custom3DDataset`, we simply concatenate all the field 89 | in `instances` to `np.ndarray`, you can do the specific 90 | process in subclass. You have to convert `gt_bboxes_3d` 91 | to different coordinates according to the task. 92 | 93 | Args: 94 | info (dict): Info dict. 95 | 96 | Returns: 97 | dict or None: Processed `ann_info`. 98 | """ 99 | ids = {c: i for i, c in enumerate(self.metainfo['valid_class_ids'])} 100 | instances = [] 101 | for instance in info['instances']: 102 | if instance['bbox_label_3d'] in ids: 103 | instance['bbox_label_3d'] = ids[instance['bbox_label_3d']] 104 | instances.append(instance) 105 | info['instances'] = instances 106 | return super().parse_ann_info(info) 107 | 108 | def parse_data_info(self, info: dict) -> dict: 109 | """Process the raw data info. 110 | 111 | Args: 112 | info (dict): Raw info dict. 113 | 114 | Returns: 115 | dict: Has `ann_info` in training stage. And 116 | all path has been converted to absolute path. 117 | """ 118 | info['super_pts_path'] = osp.join( 119 | self.data_prefix.get('sp_pts_mask', ''), 120 | info['lidar_points']['lidar_path']) #info['super_pts_path'] 121 | 122 | info = super().parse_data_info(info) 123 | 124 | return info 125 | 126 | def __getitem__(self, idx: int) -> dict: 127 | """Get the idx-th image and data information of dataset after 128 | ``self.pipeline``, and ``full_init`` will be called if the dataset has 129 | not been fully initialized. 130 | 131 | During training phase, if ``self.pipeline`` get ``None``, 132 | ``self._rand_another`` will be called until a valid image is fetched or 133 | the maximum limit of refetech is reached. 134 | 135 | Args: 136 | idx (int): The index of self.data_list. 137 | 138 | Returns: 139 | dict: The idx-th image and data information of dataset after 140 | ``self.pipeline``. 141 | """ 142 | # Performing full initialization by calling `__getitem__` will consume 143 | # extra memory. If a dataset is not fully initialized by setting 144 | # `lazy_init=True` and then fed into the dataloader. Different workers 145 | # will simultaneously read and parse the annotation. It will cost more 146 | # time and memory, although this may work. Therefore, it is recommended 147 | # to manually call `full_init` before dataset fed into dataloader to 148 | # ensure all workers use shared RAM from master process. 149 | 150 | if not self.test_mode: 151 | if self.serialize_data: 152 | dataset_len = len(self.data_address) 153 | else: 154 | dataset_len = len(self.data_list) 155 | idx = np.random.randint(0, dataset_len) 156 | 157 | if not self._fully_initialized: 158 | print_log( 159 | 'Please call `full_init()` method manually to accelerate ' 160 | 'the speed.', 161 | logger='current', 162 | level=logging.WARNING) 163 | self.full_init() 164 | 165 | if self.test_mode: 166 | data = self.prepare_data(idx) 167 | if data is None: 168 | raise Exception('Test time pipline should not get `None` ' 169 | 'data_sample') 170 | return data 171 | 172 | for _ in range(self.max_refetch + 1): 173 | data = self.prepare_data(idx) 174 | # Broken images or random augmentations may cause the returned data 175 | # to be None 176 | if data is None: 177 | idx = self._rand_another() 178 | continue 179 | return data 180 | 181 | def __len__(self) -> int: 182 | """Get the length of filtered dataset and automatically call 183 | ``full_init`` if the dataset has not been fully init. 184 | 185 | Returns: 186 | int: The length of filtered dataset. 187 | """ 188 | 189 | if self.serialize_data: 190 | dataset_len = len(self.data_address) 191 | else: 192 | dataset_len = len(self.data_list) 193 | if not self.test_mode: 194 | return int(self.partition * dataset_len) 195 | else: 196 | return dataset_len 197 | -------------------------------------------------------------------------------- /unidet3d/s3dis_dataset.py: -------------------------------------------------------------------------------- 1 | from mmdet3d.registry import DATASETS 2 | from mmdet3d.datasets.s3dis_dataset import S3DISDataset 3 | import os.path as osp 4 | from mmengine.logging import print_log 5 | import logging 6 | import numpy as np 7 | 8 | @DATASETS.register_module() 9 | class S3DISSegDetDataset(S3DISDataset): 10 | """S3DISSegDetDataset dataset. 11 | 12 | Args: 13 | partition(float): Defaults to 1, the part of 14 | the dataset that will be used. 15 | """ 16 | def __init__(self, 17 | partition: float = 1, 18 | **kwargs) -> None: 19 | self.partition = partition 20 | super().__init__(**kwargs) 21 | 22 | def parse_data_info(self, info: dict) -> dict: 23 | """Process the raw data info. 24 | 25 | Args: 26 | info (dict): Raw info dict. 27 | 28 | Returns: 29 | dict: Has `ann_info` in training stage. And 30 | all path has been converted to absolute path. 31 | """ 32 | info['super_pts_path'] = osp.join( 33 | self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path']) 34 | 35 | info = super().parse_data_info(info) 36 | 37 | return info 38 | 39 | def __getitem__(self, idx: int) -> dict: 40 | """Get the idx-th image and data information of dataset after 41 | ``self.pipeline``, and ``full_init`` will be called if the dataset has 42 | not been fully initialized. 43 | 44 | During training phase, if ``self.pipeline`` get ``None``, 45 | ``self._rand_another`` will be called until a valid image is fetched or 46 | the maximum limit of refetech is reached. 47 | 48 | Args: 49 | idx (int): The index of self.data_list. 50 | 51 | Returns: 52 | dict: The idx-th image and data information of dataset after 53 | ``self.pipeline``. 54 | """ 55 | # Performing full initialization by calling `__getitem__` will consume 56 | # extra memory. If a dataset is not fully initialized by setting 57 | # `lazy_init=True` and then fed into the dataloader. Different workers 58 | # will simultaneously read and parse the annotation. It will cost more 59 | # time and memory, although this may work. Therefore, it is recommended 60 | # to manually call `full_init` before dataset fed into dataloader to 61 | # ensure all workers use shared RAM from master process. 62 | 63 | if not self.test_mode: 64 | if self.serialize_data: 65 | dataset_len = len(self.data_address) 66 | else: 67 | dataset_len = len(self.data_list) 68 | idx = np.random.randint(0, dataset_len) 69 | if not self._fully_initialized: 70 | print_log( 71 | 'Please call `full_init()` method manually to accelerate ' 72 | 'the speed.', 73 | logger='current', 74 | level=logging.WARNING) 75 | self.full_init() 76 | 77 | if self.test_mode: 78 | data = self.prepare_data(idx) 79 | if data is None: 80 | raise Exception('Test time pipline should not get `None` ' 81 | 'data_sample') 82 | return data 83 | 84 | for _ in range(self.max_refetch + 1): 85 | data = self.prepare_data(idx) 86 | # Broken images or random augmentations may cause the returned data 87 | # to be None 88 | if data is None: 89 | idx = self._rand_another() 90 | continue 91 | return data 92 | 93 | def __len__(self) -> int: 94 | """Get the length of filtered dataset and automatically call 95 | ``full_init`` if the dataset has not been fully init. 96 | 97 | Returns: 98 | int: The length of filtered dataset. 99 | """ 100 | 101 | if self.serialize_data: 102 | dataset_len = len(self.data_address) 103 | else: 104 | dataset_len = len(self.data_list) 105 | if not self.test_mode: 106 | return int(self.partition * dataset_len) 107 | else: 108 | return dataset_len 109 | -------------------------------------------------------------------------------- /unidet3d/scannet_dataset.py: -------------------------------------------------------------------------------- 1 | from os import path as osp 2 | import numpy as np 3 | import warnings 4 | 5 | from mmdet3d.datasets.scannet_dataset import ScanNetSegDataset 6 | from mmdet3d.structures import DepthInstance3DBoxes 7 | from mmdet3d.registry import DATASETS 8 | 9 | 10 | @DATASETS.register_module() 11 | class ScanNetSegDataset_(ScanNetSegDataset): 12 | """We just add super_pts_path.""" 13 | 14 | def get_scene_idxs(self, *args, **kwargs): 15 | """Compute scene_idxs for data sampling.""" 16 | return np.arange(len(self)).astype(np.int32) 17 | 18 | def parse_data_info(self, info: dict) -> dict: 19 | """Process the raw data info. 20 | 21 | Args: 22 | info (dict): Raw info dict. 23 | 24 | Returns: 25 | dict: Has `ann_info` in training stage. And 26 | all path has been converted to absolute path. 27 | """ 28 | info['super_pts_path'] = osp.join( 29 | self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path']) 30 | 31 | info = super().parse_data_info(info) 32 | 33 | return info 34 | 35 | @DATASETS.register_module() 36 | class ScanNetDetDataset(ScanNetSegDataset_): 37 | """Dataset with loading gt_bboxes_3d, gt_labels_3d and 38 | axis-align matrix for evaluating SPFormer/OneFormer with 39 | IndoorMetric. We just copy some functions from Det3DDataset 40 | and comment some lines in them. 41 | """ 42 | @staticmethod 43 | def _get_axis_align_matrix(info: dict) -> np.ndarray: 44 | """Get axis_align_matrix from info. If not exist, return identity mat. 45 | 46 | Args: 47 | info (dict): Info of a single sample data. 48 | 49 | Returns: 50 | np.ndarray: 4x4 transformation matrix. 51 | """ 52 | if 'axis_align_matrix' in info: 53 | return np.array(info['axis_align_matrix']) 54 | else: 55 | warnings.warn( 56 | 'axis_align_matrix is not found in ScanNet data info, please ' 57 | 'use new pre-process scripts to re-generate ScanNet data') 58 | return np.eye(4).astype(np.float32) 59 | 60 | def parse_data_info(self, info: dict) -> dict: 61 | """Process the raw data info. 62 | 63 | The only difference with it in `Det3DDataset` 64 | is the specific process for `axis_align_matrix'. 65 | 66 | Args: 67 | info (dict): Raw info dict. 68 | 69 | Returns: 70 | dict: Has `ann_info` in training stage. And 71 | all path has been converted to absolute path. 72 | """ 73 | 74 | info['axis_align_matrix'] = self._get_axis_align_matrix(info) 75 | # info['super_pts_path'] = osp.join( 76 | # self.data_prefix.get('sp_pts_mask', ''), info['super_pts_path']) 77 | 78 | info = super().parse_data_info(info) 79 | 80 | if not self.test_mode: 81 | # used in training 82 | info['ann_info'] = self.parse_ann_info(info) 83 | if self.test_mode and self.load_eval_anns: 84 | info['eval_ann_info'] = self.parse_ann_info(info) 85 | 86 | return info 87 | 88 | def _det3d_parse_ann_info(self, info): 89 | """Process the `instances` in data info to `ann_info`. 90 | 91 | In `Custom3DDataset`, we simply concatenate all the field 92 | in `instances` to `np.ndarray`, you can do the specific 93 | process in subclass. You have to convert `gt_bboxes_3d` 94 | to different coordinates according to the task. 95 | 96 | Args: 97 | info (dict): Info dict. 98 | 99 | Returns: 100 | dict or None: Processed `ann_info`. 101 | """ 102 | # add s or gt prefix for most keys after concat 103 | # we only process 3d annotations here, the corresponding 104 | # 2d annotation process is in the `LoadAnnotations3D` 105 | # in `transforms` 106 | name_mapping = { 107 | 'bbox_label_3d': 'gt_labels_3d', 108 | 'bbox_label': 'gt_bboxes_labels', 109 | 'bbox': 'gt_bboxes', 110 | 'bbox_3d': 'gt_bboxes_3d', 111 | 'depth': 'depths', 112 | 'center_2d': 'centers_2d', 113 | 'attr_label': 'attr_labels', 114 | 'velocity': 'velocities', 115 | } 116 | instances = info['instances'] 117 | # empty gt 118 | if len(instances) == 0: 119 | return None 120 | else: 121 | keys = list(instances[0].keys()) 122 | ann_info = dict() 123 | for ann_name in keys: 124 | temp_anns = [item[ann_name] for item in instances] 125 | # map the original dataset label to training label 126 | # if 'label' in ann_name and ann_name != 'attr_label': 127 | # temp_anns = [ 128 | # self.label_mapping[item] for item in temp_anns 129 | # ] 130 | if ann_name in name_mapping: 131 | mapped_ann_name = name_mapping[ann_name] 132 | else: 133 | mapped_ann_name = ann_name 134 | 135 | if 'label' in ann_name: 136 | temp_anns = np.array(temp_anns).astype(np.int64) 137 | elif ann_name in name_mapping: 138 | temp_anns = np.array(temp_anns).astype(np.float32) 139 | else: 140 | temp_anns = np.array(temp_anns) 141 | 142 | ann_info[mapped_ann_name] = temp_anns 143 | ann_info['instances'] = info['instances'] 144 | 145 | # for label in ann_info['gt_labels_3d']: 146 | # if label != -1: 147 | # cat_name = self.metainfo['classes'][label] 148 | # self.num_ins_per_cat[cat_name] += 1 149 | 150 | return ann_info 151 | 152 | def parse_ann_info(self, info: dict) -> dict: 153 | """Process the `instances` in data info to `ann_info`. 154 | 155 | Args: 156 | info (dict): Info dict. 157 | 158 | Returns: 159 | dict: Processed `ann_info`. 160 | """ 161 | ann_info = self._det3d_parse_ann_info(info) 162 | # empty gt 163 | if ann_info is None: 164 | ann_info = dict() 165 | ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) 166 | ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) 167 | # to target box structure 168 | 169 | ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( 170 | ann_info['gt_bboxes_3d'], 171 | box_dim=ann_info['gt_bboxes_3d'].shape[-1], 172 | with_yaw=False, 173 | origin=(0.5, 0.5, 0.5)) # .convert_to(self.box_mode_3d) 174 | 175 | return ann_info -------------------------------------------------------------------------------- /unidet3d/structures.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sized 2 | from mmengine.structures import InstanceData 3 | 4 | 5 | class InstanceData_(InstanceData): 6 | """We only remove a single assert from __setattr__.""" 7 | 8 | def __setattr__(self, name: str, value: Sized): 9 | """setattr is only used to set data. 10 | 11 | The value must have the attribute of `__len__` and have the same length 12 | of `InstanceData`. 13 | """ 14 | if name in ('_metainfo_fields', '_data_fields'): 15 | if not hasattr(self, name): 16 | super(InstanceData, self).__setattr__(name, value) 17 | else: 18 | raise AttributeError(f'{name} has been used as a ' 19 | 'private attribute, which is immutable.') 20 | 21 | else: 22 | assert isinstance(value, 23 | Sized), 'value must contain `__len__` attribute' 24 | 25 | super(InstanceData, self).__setattr__(name, value) 26 | --------------------------------------------------------------------------------